git.fiddlerwoaroof.com
Raw Blame History
from __future__ import division
import os, time, datetime, calendar
import rawdoglib.plugins
import random
from rawdoglib.rawdog import DayWriter, write_ascii, format_time, fill_template, safe_ftime, encode_references, get_system_encoding
from StringIO import StringIO

title_list = u"""
<!DOCTYPE html>
<html lang="en">
  <head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# join-marrow: http://ogp.me/ns/fb/join-marrow#">
    <meta property="og:type" content="join-marrow:headlines" />
    <meta property="og:title" content="Current Headlines" />
    <meta property="og:site_name" content="Catholic News" />
    <meta property="og:url" content="http://planet.joinmarrow.com/headlines" />
    <meta property="og:image" content="http://planet.joinmarrow.com/planet.joinmarrow.png" />
    <meta property="og:image:width"  content="512" />
    <meta property="og:image:height" content="512" />
    <meta property="og:description" content="Headlines from a collection of Catholic blogs. Visit us to take the pulse of the Catholic internet" />
    <meta property="fb:app_id" content="897925460261572" />
    <base target="_blank" />
    <meta http-equiv="refresh" content="900"> <!-- refresh every 15 minutes -->

    <link rel="apple-touch-icon" sizes="57x57" href="/apple-touch-icon-57x57.png">
    <link rel="apple-touch-icon" sizes="60x60" href="/apple-touch-icon-60x60.png">
    <link rel="apple-touch-icon" sizes="72x72" href="/apple-touch-icon-72x72.png">
    <link rel="apple-touch-icon" sizes="76x76" href="/apple-touch-icon-76x76.png">
    <link rel="apple-touch-icon" sizes="114x114" href="/apple-touch-icon-114x114.png">
    <link rel="apple-touch-icon" sizes="120x120" href="/apple-touch-icon-120x120.png">
    <link rel="apple-touch-icon" sizes="144x144" href="/apple-touch-icon-144x144.png">
    <link rel="apple-touch-icon" sizes="152x152" href="/apple-touch-icon-152x152.png">
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon-180x180.png">
    <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
    <link rel="icon" type="image/png" href="/favicon-194x194.png" sizes="194x194">
    <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
    <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
    <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
    <link rel="manifest" href="/manifest.json">
    <link rel="mask-icon" href="/safari-pinned-tab.svg" color="#5bbad5">
    <meta name="msapplication-TileColor" content="#888888">
    <meta name="msapplication-TileImage" content="/mstile-144x144.png">
    <meta name="theme-color" content="#ffffff">

    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">

    <title>Catholic Feeds - Headlines</title>
    <link rel="stylesheet" href="headline-style.css">
    <!-- Piwik -->
    <script type="text/javascript">
      var _paq = _paq || [];
      _paq.push(['trackPageView']);
      _paq.push(['enableLinkTracking']);
      (function() {
       var u="//piwik.elangley.org/";
       _paq.push(['setTrackerUrl', u+'piwik.php']);
       _paq.push(['setSiteId', 1]);
       var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
       g.type='text/javascript'; g.async=true; g.defer=true; g.src=u+'piwik.js'; s.parentNode.insertBefore(g,s);
       })();
     </script>
     <noscript><p><img src="//piwik.elangley.org/piwik.php?idsite=1" style="border:0;" alt="" /></p></noscript>
     <!-- End Piwik Code -->

   </head>
   <body>
     <div id="fb-root"></div>
     <script>(function(d, s, id) {
       var js, fjs = d.getElementsByTagName(s)[0];
       if (d.getElementById(id)) return;
       js = d.createElement(s); js.id = id;
       js.src = "//connect.facebook.net/en_US/sdk.js#xfbml=1&version=v2.5&appId=897925460261572";
       fjs.parentNode.insertBefore(js, fjs);
       }(document, 'script', 'facebook-jssdk'));</script>

     <h1>Headlines</h1>
     <main>
     %s
     </main>
   </body>
 </html>
"""

colTemplate = u"""
<div class="col">
%s
</div>
"""

categoryClassTemplate = u"category-%s"

item = u"""
<article data-v="%(score)s" data-date="%(date)s" class="%(categories)s">
<a href="%(url)s" rel="nofollow">
<h2>%(title)s</h2>
<span class="source">%(author)s %(feed)s</span>
</a>
</article>
"""

def safe_strftime(obj, format):
   """Call the strftime method on an object, and convert the result to
   ASCII-encoded HTML."""
   u = unicode(obj.strftime(format), get_system_encoding())
   return encode_references(u)
import itertools

import re
def slugerate(str):
  str = [u'-' if c in {u' ', u'_'} else c for c in str if c.isalnum() or c in {u' ', u'-', u'_'}]
  str = u''.join(str).lower()
  str = re.sub(u'-+', u'-', str)
  return str


class HeadlineOutput:
  def __init__(self):
    self.filename = "output/headlines.html"
    self.headlines = 120
    self.titles = []

  def config_option(self, config, name, value):
    if name == "headlinefile":
      self.filename = value
      return False
    elif name == "headlines":
      self.headlines = value
      return False
    else:
      return True

  def output_write_files(self, rawdog, config, articles, article_dates):
    output = title_list
    items = [[], [], []]
    titles = []
    articles = list(reversed(sorted(articles, key=article_dates.get)))
    article_info = []
    title_words = {}
    authors = {}
    for article in articles:
      ei = article.entry_info
      articleTitle = ei[u'title']
      articleLink = ei[u'link']
      articleAuthor = u"%s &mdash;" % ei.get(u'author', u"")
      feedTitle = rawdog.feeds[article.feed].feed_info[u'title']

      for word in articleTitle.split():
        if len(word) <= 3:
          title_words[word] = 0
        else:
          title_words[word] = title_words.get(word, 0) + 1

      authors[articleAuthor] = 1
      categories = u' '.join(sorted([categoryClassTemplate % slugerate(y.get(u'term')) for y in ei.get(u'tags', [])])).strip() #ha!
      #if categories: print categories.encode('utf-8')
      article_info.append(dict(url=articleLink,title=articleTitle,author=articleAuthor,feed=feedTitle,date=datetime.datetime.fromtimestamp(article_dates[article]).isoformat(), article=article,
        categories=categories))

    reftime = time.time()
    article_info = article_info[:self.headlines]
    def sort_key(x):
      words = x[u'title'].split()
      score = 0
      score += authors.get(x[u'author'],0)
      for word in words:
        score += title_words.get(word,0)
      score += (reftime - article_dates[x[u'article']]) / 10000
      x[u'score'] = score
      return score * (random.random() + 0.5)


    article_info = sorted(article_info, key=sort_key)
    buckets = {}
    #for itm in article_info:
    #  buckets.setdefault(sort_key(itm),[]).append(itm)

    #article_info = []
    #keys = itertools.cycle(sorted(buckets.keys()))
    #while buckets != {}:
    #  key = keys.next()
    #  cur_list = buckets.get(key)
    #  if cur_list is None: continue
    #  article_info.append(cur_list.pop(0))
    #  if cur_list == []:
    #    del buckets[key]


    print map(sort_key, article_info)
    for idx, article_datum in enumerate(article_info[:self.headlines]):
      items[idx % 3].append(item % article_datum)
      
    while len(items) > 3:
      print u"This should be empty:", items.pop()
    output %= u"\n".join(colTemplate % u"\n".join(rows for rows in column) for column in items)
    with open(self.filename, u"w") as f:
      print u"Writing headlines to: %s" % self.filename
      f.write(output.encode('utf-8'))
    return True



p = HeadlineOutput()
rawdoglib.plugins.attach_hook(u"config_option", p.config_option)
rawdoglib.plugins.attach_hook(u"output_write_files", p.output_write_files)