a88b5484 |
from __future__ import division
import os, time, datetime, calendar
import rawdoglib.plugins
|
d9cc423c |
import random
|
a88b5484 |
from rawdoglib.rawdog import DayWriter, write_ascii, format_time, fill_template, safe_ftime, encode_references, get_system_encoding
from StringIO import StringIO
|
d9cc423c |
title_list = u"""
|
a88b5484 |
<!DOCTYPE html>
<html lang="en">
|
564b85e6 |
<head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# join-marrow: http://ogp.me/ns/fb/join-marrow#">
<meta property="og:type" content="join-marrow:headlines" />
<meta property="og:title" content="Current Headlines" />
<meta property="og:site_name" content="Catholic News" />
<meta property="og:url" content="http://planet.joinmarrow.com/headlines" />
<meta property="og:image" content="http://planet.joinmarrow.com/planet.joinmarrow.png" />
<meta property="og:image:width" content="512" />
<meta property="og:image:height" content="512" />
<meta property="og:description" content="Headlines from a collection of Catholic blogs. Visit us to take the pulse of the Catholic internet" />
<meta property="fb:app_id" content="897925460261572" />
<base target="_blank" />
<meta http-equiv="refresh" content="900"> <!-- refresh every 15 minutes -->
<link rel="apple-touch-icon" sizes="57x57" href="/apple-touch-icon-57x57.png">
<link rel="apple-touch-icon" sizes="60x60" href="/apple-touch-icon-60x60.png">
<link rel="apple-touch-icon" sizes="72x72" href="/apple-touch-icon-72x72.png">
<link rel="apple-touch-icon" sizes="76x76" href="/apple-touch-icon-76x76.png">
<link rel="apple-touch-icon" sizes="114x114" href="/apple-touch-icon-114x114.png">
<link rel="apple-touch-icon" sizes="120x120" href="/apple-touch-icon-120x120.png">
<link rel="apple-touch-icon" sizes="144x144" href="/apple-touch-icon-144x144.png">
<link rel="apple-touch-icon" sizes="152x152" href="/apple-touch-icon-152x152.png">
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon-180x180.png">
<link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
<link rel="icon" type="image/png" href="/favicon-194x194.png" sizes="194x194">
<link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
<link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
<link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
<link rel="manifest" href="/manifest.json">
|
d9cc423c |
<link rel="mask-icon" href="/safari-pinned-tab.svg" color="#5bbad5">
<meta name="msapplication-TileColor" content="#888888">
|
564b85e6 |
<meta name="msapplication-TileImage" content="/mstile-144x144.png">
<meta name="theme-color" content="#ffffff">
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Catholic Feeds - Headlines</title>
<link rel="stylesheet" href="headline-style.css">
<!-- Piwik -->
<script type="text/javascript">
var _paq = _paq || [];
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="//piwik.elangley.org/";
_paq.push(['setTrackerUrl', u+'piwik.php']);
_paq.push(['setSiteId', 1]);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.type='text/javascript'; g.async=true; g.defer=true; g.src=u+'piwik.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<noscript><p><img src="//piwik.elangley.org/piwik.php?idsite=1" style="border:0;" alt="" /></p></noscript>
<!-- End Piwik Code -->
</head>
<body>
<div id="fb-root"></div>
<script>(function(d, s, id) {
var js, fjs = d.getElementsByTagName(s)[0];
if (d.getElementById(id)) return;
js = d.createElement(s); js.id = id;
js.src = "//connect.facebook.net/en_US/sdk.js#xfbml=1&version=v2.5&appId=897925460261572";
fjs.parentNode.insertBefore(js, fjs);
}(document, 'script', 'facebook-jssdk'));</script>
<h1>Headlines</h1>
<main>
%s
</main>
</body>
</html>
|
a88b5484 |
"""
|
d9cc423c |
colTemplate = u"""
|
a88b5484 |
<div class="col">
%s
</div>
"""
|
d9cc423c |
categoryClassTemplate = u"category-%s"
item = u"""
<article data-v="%(score)s" data-date="%(date)s" class="%(categories)s">
|
564b85e6 |
<a href="%(url)s" rel="nofollow">
<h2>%(title)s</h2>
<span class="source">%(author)s %(feed)s</span>
|
a88b5484 |
</a>
</article>
"""
def safe_strftime(obj, format):
"""Call the strftime method on an object, and convert the result to
ASCII-encoded HTML."""
u = unicode(obj.strftime(format), get_system_encoding())
return encode_references(u)
|
564b85e6 |
import itertools
|
a88b5484 |
|
d9cc423c |
import re
def slugerate(str):
str = [u'-' if c in {u' ', u'_'} else c for c in str if c.isalnum() or c in {u' ', u'-', u'_'}]
str = u''.join(str).lower()
str = re.sub(u'-+', u'-', str)
return str
|
a88b5484 |
class HeadlineOutput:
def __init__(self):
self.filename = "output/headlines.html"
|
564b85e6 |
self.headlines = 120
|
a88b5484 |
self.titles = []
def config_option(self, config, name, value):
if name == "headlinefile":
self.filename = value
return False
elif name == "headlines":
self.headlines = value
return False
else:
return True
def output_write_files(self, rawdog, config, articles, article_dates):
output = title_list
|
564b85e6 |
items = [[], [], []]
titles = []
articles = list(reversed(sorted(articles, key=article_dates.get)))
article_info = []
title_words = {}
authors = {}
for article in articles:
|
a88b5484 |
ei = article.entry_info
|
d9cc423c |
articleTitle = ei[u'title']
articleLink = ei[u'link']
articleAuthor = u"%s —" % ei.get(u'author', u"")
feedTitle = rawdog.feeds[article.feed].feed_info[u'title']
|
564b85e6 |
for word in articleTitle.split():
if len(word) <= 3:
title_words[word] = 0
else:
title_words[word] = title_words.get(word, 0) + 1
authors[articleAuthor] = 1
|
d9cc423c |
categories = u' '.join(sorted([categoryClassTemplate % slugerate(y.get(u'term')) for y in ei.get(u'tags', [])])).strip() #ha!
#if categories: print categories.encode('utf-8')
article_info.append(dict(url=articleLink,title=articleTitle,author=articleAuthor,feed=feedTitle,date=datetime.datetime.fromtimestamp(article_dates[article]).isoformat(), article=article,
categories=categories))
|
564b85e6 |
reftime = time.time()
article_info = article_info[:self.headlines]
def sort_key(x):
|
d9cc423c |
words = x[u'title'].split()
|
564b85e6 |
score = 0
|
d9cc423c |
score += authors.get(x[u'author'],0)
|
564b85e6 |
for word in words:
score += title_words.get(word,0)
|
d9cc423c |
score += (reftime - article_dates[x[u'article']]) / 10000
x[u'score'] = score
return score * (random.random() + 0.5)
|
564b85e6 |
article_info = sorted(article_info, key=sort_key)
buckets = {}
#for itm in article_info:
# buckets.setdefault(sort_key(itm),[]).append(itm)
#article_info = []
#keys = itertools.cycle(sorted(buckets.keys()))
#while buckets != {}:
# key = keys.next()
# cur_list = buckets.get(key)
# if cur_list is None: continue
# article_info.append(cur_list.pop(0))
# if cur_list == []:
# del buckets[key]
print map(sort_key, article_info)
for idx, article_datum in enumerate(article_info[:self.headlines]):
items[idx % 3].append(item % article_datum)
|
a88b5484 |
while len(items) > 3:
|
d9cc423c |
print u"This should be empty:", items.pop()
output %= u"\n".join(colTemplate % u"\n".join(rows for rows in column) for column in items)
with open(self.filename, u"w") as f:
print u"Writing headlines to: %s" % self.filename
|
a88b5484 |
f.write(output.encode('utf-8'))
|
d9cc423c |
return True
|
a88b5484 |
p = HeadlineOutput()
|
d9cc423c |
rawdoglib.plugins.attach_hook(u"config_option", p.config_option)
rawdoglib.plugins.attach_hook(u"output_write_files", p.output_write_files)
|