from __future__ import division import os, time, datetime, calendar import rawdoglib.plugins import random from rawdoglib.rawdog import DayWriter, write_ascii, format_time, fill_template, safe_ftime, encode_references, get_system_encoding from StringIO import StringIO title_list = u""" Catholic Feeds - Headlines

Headlines

%s
""" colTemplate = u"""
%s
""" categoryClassTemplate = u"category-%s" item = u"""

%(title)s

%(author)s %(feed)s
""" def safe_strftime(obj, format): """Call the strftime method on an object, and convert the result to ASCII-encoded HTML.""" u = unicode(obj.strftime(format), get_system_encoding()) return encode_references(u) import itertools import re def slugerate(str): str = [u'-' if c in {u' ', u'_'} else c for c in str if c.isalnum() or c in {u' ', u'-', u'_'}] str = u''.join(str).lower() str = re.sub(u'-+', u'-', str) return str class HeadlineOutput: def __init__(self): self.filename = "output/headlines.html" self.headlines = 120 self.titles = [] def config_option(self, config, name, value): if name == "headlinefile": self.filename = value return False elif name == "headlines": self.headlines = value return False else: return True def output_write_files(self, rawdog, config, articles, article_dates): output = title_list items = [[], [], []] titles = [] articles = list(reversed(sorted(articles, key=article_dates.get))) article_info = [] title_words = {} authors = {} for article in articles: ei = article.entry_info articleTitle = ei[u'title'] articleLink = ei[u'link'] articleAuthor = u"%s —" % ei.get(u'author', u"") feedTitle = rawdog.feeds[article.feed].feed_info[u'title'] for word in articleTitle.split(): if len(word) <= 3: title_words[word] = 0 else: title_words[word] = title_words.get(word, 0) + 1 authors[articleAuthor] = 1 categories = u' '.join(sorted([categoryClassTemplate % slugerate(y.get(u'term')) for y in ei.get(u'tags', [])])).strip() #ha! #if categories: print categories.encode('utf-8') article_info.append(dict(url=articleLink,title=articleTitle,author=articleAuthor,feed=feedTitle,date=datetime.datetime.fromtimestamp(article_dates[article]).isoformat(), article=article, categories=categories)) reftime = time.time() article_info = article_info[:self.headlines] def sort_key(x): words = x[u'title'].split() score = 0 score += authors.get(x[u'author'],0) for word in words: score += title_words.get(word,0) score += (reftime - article_dates[x[u'article']]) / 10000 x[u'score'] = score return score * (random.random() + 0.5) article_info = sorted(article_info, key=sort_key) buckets = {} #for itm in article_info: # buckets.setdefault(sort_key(itm),[]).append(itm) #article_info = [] #keys = itertools.cycle(sorted(buckets.keys())) #while buckets != {}: # key = keys.next() # cur_list = buckets.get(key) # if cur_list is None: continue # article_info.append(cur_list.pop(0)) # if cur_list == []: # del buckets[key] print map(sort_key, article_info) for idx, article_datum in enumerate(article_info[:self.headlines]): items[idx % 3].append(item % article_datum) while len(items) > 3: print u"This should be empty:", items.pop() output %= u"\n".join(colTemplate % u"\n".join(rows for rows in column) for column in items) with open(self.filename, u"w") as f: print u"Writing headlines to: %s" % self.filename f.write(output.encode('utf-8')) return True p = HeadlineOutput() rawdoglib.plugins.attach_hook(u"config_option", p.config_option) rawdoglib.plugins.attach_hook(u"output_write_files", p.output_write_files)