from __future__ import division import os, time, datetime, calendar import rawdoglib.plugins import random from rawdoglib.rawdog import DayWriter, write_ascii, format_time, fill_template, safe_ftime, encode_references, get_system_encoding from StringIO import StringIO title_list = u""" <!DOCTYPE html> <html lang="en"> <head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# join-marrow: http://ogp.me/ns/fb/join-marrow#"> <meta property="og:type" content="join-marrow:headlines" /> <meta property="og:title" content="Current Headlines" /> <meta property="og:site_name" content="Catholic News" /> <meta property="og:url" content="http://planet.joinmarrow.com/headlines" /> <meta property="og:image" content="http://planet.joinmarrow.com/planet.joinmarrow.png" /> <meta property="og:image:width" content="512" /> <meta property="og:image:height" content="512" /> <meta property="og:description" content="Headlines from a collection of Catholic blogs. Visit us to take the pulse of the Catholic internet" /> <meta property="fb:app_id" content="897925460261572" /> <base target="_blank" /> <meta http-equiv="refresh" content="900"> <!-- refresh every 15 minutes --> <link rel="apple-touch-icon" sizes="57x57" href="/apple-touch-icon-57x57.png"> <link rel="apple-touch-icon" sizes="60x60" href="/apple-touch-icon-60x60.png"> <link rel="apple-touch-icon" sizes="72x72" href="/apple-touch-icon-72x72.png"> <link rel="apple-touch-icon" sizes="76x76" href="/apple-touch-icon-76x76.png"> <link rel="apple-touch-icon" sizes="114x114" href="/apple-touch-icon-114x114.png"> <link rel="apple-touch-icon" sizes="120x120" href="/apple-touch-icon-120x120.png"> <link rel="apple-touch-icon" sizes="144x144" href="/apple-touch-icon-144x144.png"> <link rel="apple-touch-icon" sizes="152x152" href="/apple-touch-icon-152x152.png"> <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon-180x180.png"> <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32"> <link rel="icon" type="image/png" href="/favicon-194x194.png" sizes="194x194"> <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96"> <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192"> <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16"> <link rel="manifest" href="/manifest.json"> <link rel="mask-icon" href="/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#888888"> <meta name="msapplication-TileImage" content="/mstile-144x144.png"> <meta name="theme-color" content="#ffffff"> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <title>Catholic Feeds - Headlines</title> <link rel="stylesheet" href="headline-style.css"> <!-- Piwik --> <script type="text/javascript"> var _paq = _paq || []; _paq.push(['trackPageView']); _paq.push(['enableLinkTracking']); (function() { var u="//piwik.elangley.org/"; _paq.push(['setTrackerUrl', u+'piwik.php']); _paq.push(['setSiteId', 1]); var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; g.type='text/javascript'; g.async=true; g.defer=true; g.src=u+'piwik.js'; s.parentNode.insertBefore(g,s); })(); </script> <noscript><p><img src="//piwik.elangley.org/piwik.php?idsite=1" style="border:0;" alt="" /></p></noscript> <!-- End Piwik Code --> </head> <body> <div id="fb-root"></div> <script>(function(d, s, id) { var js, fjs = d.getElementsByTagName(s)[0]; if (d.getElementById(id)) return; js = d.createElement(s); js.id = id; js.src = "//connect.facebook.net/en_US/sdk.js#xfbml=1&version=v2.5&appId=897925460261572"; fjs.parentNode.insertBefore(js, fjs); }(document, 'script', 'facebook-jssdk'));</script> <h1>Headlines</h1> <main> %s </main> </body> </html> """ colTemplate = u""" <div class="col"> %s </div> """ categoryClassTemplate = u"category-%s" item = u""" <article data-v="%(score)s" data-date="%(date)s" class="%(categories)s"> <a href="%(url)s" rel="nofollow"> <h2>%(title)s</h2> <span class="source">%(author)s %(feed)s</span> </a> </article> """ def safe_strftime(obj, format): """Call the strftime method on an object, and convert the result to ASCII-encoded HTML.""" u = unicode(obj.strftime(format), get_system_encoding()) return encode_references(u) import itertools import re def slugerate(str): str = [u'-' if c in {u' ', u'_'} else c for c in str if c.isalnum() or c in {u' ', u'-', u'_'}] str = u''.join(str).lower() str = re.sub(u'-+', u'-', str) return str class HeadlineOutput: def __init__(self): self.filename = "output/headlines.html" self.headlines = 120 self.titles = [] def config_option(self, config, name, value): if name == "headlinefile": self.filename = value return False elif name == "headlines": self.headlines = value return False else: return True def output_write_files(self, rawdog, config, articles, article_dates): output = title_list items = [[], [], []] titles = [] articles = list(reversed(sorted(articles, key=article_dates.get))) article_info = [] title_words = {} authors = {} for article in articles: ei = article.entry_info articleTitle = ei[u'title'] articleLink = ei[u'link'] articleAuthor = u"%s —" % ei.get(u'author', u"") feedTitle = rawdog.feeds[article.feed].feed_info[u'title'] for word in articleTitle.split(): if len(word) <= 3: title_words[word] = 0 else: title_words[word] = title_words.get(word, 0) + 1 authors[articleAuthor] = 1 categories = u' '.join(sorted([categoryClassTemplate % slugerate(y.get(u'term')) for y in ei.get(u'tags', [])])).strip() #ha! #if categories: print categories.encode('utf-8') article_info.append(dict(url=articleLink,title=articleTitle,author=articleAuthor,feed=feedTitle,date=datetime.datetime.fromtimestamp(article_dates[article]).isoformat(), article=article, categories=categories)) reftime = time.time() article_info = article_info[:self.headlines] def sort_key(x): words = x[u'title'].split() score = 0 score += authors.get(x[u'author'],0) for word in words: score += title_words.get(word,0) score += (reftime - article_dates[x[u'article']]) / 10000 x[u'score'] = score return score * (random.random() + 0.5) article_info = sorted(article_info, key=sort_key) buckets = {} #for itm in article_info: # buckets.setdefault(sort_key(itm),[]).append(itm) #article_info = [] #keys = itertools.cycle(sorted(buckets.keys())) #while buckets != {}: # key = keys.next() # cur_list = buckets.get(key) # if cur_list is None: continue # article_info.append(cur_list.pop(0)) # if cur_list == []: # del buckets[key] print map(sort_key, article_info) for idx, article_datum in enumerate(article_info[:self.headlines]): items[idx % 3].append(item % article_datum) while len(items) > 3: print u"This should be empty:", items.pop() output %= u"\n".join(colTemplate % u"\n".join(rows for rows in column) for column in items) with open(self.filename, u"w") as f: print u"Writing headlines to: %s" % self.filename f.write(output.encode('utf-8')) return True p = HeadlineOutput() rawdoglib.plugins.attach_hook(u"config_option", p.config_option) rawdoglib.plugins.attach_hook(u"output_write_files", p.output_write_files)