from __future__ import division
import os, time, datetime, calendar
import rawdoglib.plugins
import random
from rawdoglib.rawdog import DayWriter, write_ascii, format_time, fill_template, safe_ftime, encode_references, get_system_encoding
from StringIO import StringIO
title_list = u"""
Catholic Feeds - Headlines
Headlines
%s
"""
colTemplate = u"""
%s
"""
categoryClassTemplate = u"category-%s"
item = u"""
%(title)s
%(author)s %(feed)s
"""
def safe_strftime(obj, format):
"""Call the strftime method on an object, and convert the result to
ASCII-encoded HTML."""
u = unicode(obj.strftime(format), get_system_encoding())
return encode_references(u)
import itertools
import re
def slugerate(str):
str = [u'-' if c in {u' ', u'_'} else c for c in str if c.isalnum() or c in {u' ', u'-', u'_'}]
str = u''.join(str).lower()
str = re.sub(u'-+', u'-', str)
return str
class HeadlineOutput:
def __init__(self):
self.filename = "output/headlines.html"
self.headlines = 120
self.titles = []
def config_option(self, config, name, value):
if name == "headlinefile":
self.filename = value
return False
elif name == "headlines":
self.headlines = value
return False
else:
return True
def output_write_files(self, rawdog, config, articles, article_dates):
output = title_list
items = [[], [], []]
titles = []
articles = list(reversed(sorted(articles, key=article_dates.get)))
article_info = []
title_words = {}
authors = {}
for article in articles:
ei = article.entry_info
articleTitle = ei[u'title']
articleLink = ei[u'link']
articleAuthor = u"%s —" % ei.get(u'author', u"")
feedTitle = rawdog.feeds[article.feed].feed_info[u'title']
for word in articleTitle.split():
if len(word) <= 3:
title_words[word] = 0
else:
title_words[word] = title_words.get(word, 0) + 1
authors[articleAuthor] = 1
categories = u' '.join(sorted([categoryClassTemplate % slugerate(y.get(u'term')) for y in ei.get(u'tags', [])])).strip() #ha!
#if categories: print categories.encode('utf-8')
article_info.append(dict(url=articleLink,title=articleTitle,author=articleAuthor,feed=feedTitle,date=datetime.datetime.fromtimestamp(article_dates[article]).isoformat(), article=article,
categories=categories))
reftime = time.time()
article_info = article_info[:self.headlines]
def sort_key(x):
words = x[u'title'].split()
score = 0
score += authors.get(x[u'author'],0)
for word in words:
score += title_words.get(word,0)
score += (reftime - article_dates[x[u'article']]) / 10000
x[u'score'] = score
return score * (random.random() + 0.5)
article_info = sorted(article_info, key=sort_key)
buckets = {}
#for itm in article_info:
# buckets.setdefault(sort_key(itm),[]).append(itm)
#article_info = []
#keys = itertools.cycle(sorted(buckets.keys()))
#while buckets != {}:
# key = keys.next()
# cur_list = buckets.get(key)
# if cur_list is None: continue
# article_info.append(cur_list.pop(0))
# if cur_list == []:
# del buckets[key]
print map(sort_key, article_info)
for idx, article_datum in enumerate(article_info[:self.headlines]):
items[idx % 3].append(item % article_datum)
while len(items) > 3:
print u"This should be empty:", items.pop()
output %= u"\n".join(colTemplate % u"\n".join(rows for rows in column) for column in items)
with open(self.filename, u"w") as f:
print u"Writing headlines to: %s" % self.filename
f.write(output.encode('utf-8'))
return True
p = HeadlineOutput()
rawdoglib.plugins.attach_hook(u"config_option", p.config_option)
rawdoglib.plugins.attach_hook(u"output_write_files", p.output_write_files)