git.fiddlerwoaroof.com
plugins/98-titles.py
a88b5484
 from __future__ import division
 import os, time, datetime, calendar
 import rawdoglib.plugins
d9cc423c
 import random
a88b5484
 from rawdoglib.rawdog import DayWriter, write_ascii, format_time, fill_template, safe_ftime, encode_references, get_system_encoding
 from StringIO import StringIO
 
d9cc423c
 title_list = u"""
a88b5484
 <!DOCTYPE html>
 <html lang="en">
564b85e6
   <head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# join-marrow: http://ogp.me/ns/fb/join-marrow#">
     <meta property="og:type" content="join-marrow:headlines" />
     <meta property="og:title" content="Current Headlines" />
     <meta property="og:site_name" content="Catholic News" />
     <meta property="og:url" content="http://planet.joinmarrow.com/headlines" />
     <meta property="og:image" content="http://planet.joinmarrow.com/planet.joinmarrow.png" />
     <meta property="og:image:width"  content="512" />
     <meta property="og:image:height" content="512" />
     <meta property="og:description" content="Headlines from a collection of Catholic blogs. Visit us to take the pulse of the Catholic internet" />
     <meta property="fb:app_id" content="897925460261572" />
     <base target="_blank" />
     <meta http-equiv="refresh" content="900"> <!-- refresh every 15 minutes -->
 
     <link rel="apple-touch-icon" sizes="57x57" href="/apple-touch-icon-57x57.png">
     <link rel="apple-touch-icon" sizes="60x60" href="/apple-touch-icon-60x60.png">
     <link rel="apple-touch-icon" sizes="72x72" href="/apple-touch-icon-72x72.png">
     <link rel="apple-touch-icon" sizes="76x76" href="/apple-touch-icon-76x76.png">
     <link rel="apple-touch-icon" sizes="114x114" href="/apple-touch-icon-114x114.png">
     <link rel="apple-touch-icon" sizes="120x120" href="/apple-touch-icon-120x120.png">
     <link rel="apple-touch-icon" sizes="144x144" href="/apple-touch-icon-144x144.png">
     <link rel="apple-touch-icon" sizes="152x152" href="/apple-touch-icon-152x152.png">
     <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon-180x180.png">
     <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
     <link rel="icon" type="image/png" href="/favicon-194x194.png" sizes="194x194">
     <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
     <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
     <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
     <link rel="manifest" href="/manifest.json">
d9cc423c
     <link rel="mask-icon" href="/safari-pinned-tab.svg" color="#5bbad5">
     <meta name="msapplication-TileColor" content="#888888">
564b85e6
     <meta name="msapplication-TileImage" content="/mstile-144x144.png">
     <meta name="theme-color" content="#ffffff">
 
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1">
 
     <title>Catholic Feeds - Headlines</title>
     <link rel="stylesheet" href="headline-style.css">
     <!-- Piwik -->
     <script type="text/javascript">
       var _paq = _paq || [];
       _paq.push(['trackPageView']);
       _paq.push(['enableLinkTracking']);
       (function() {
        var u="//piwik.elangley.org/";
        _paq.push(['setTrackerUrl', u+'piwik.php']);
        _paq.push(['setSiteId', 1]);
        var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
        g.type='text/javascript'; g.async=true; g.defer=true; g.src=u+'piwik.js'; s.parentNode.insertBefore(g,s);
        })();
      </script>
      <noscript><p><img src="//piwik.elangley.org/piwik.php?idsite=1" style="border:0;" alt="" /></p></noscript>
      <!-- End Piwik Code -->
 
    </head>
    <body>
      <div id="fb-root"></div>
      <script>(function(d, s, id) {
        var js, fjs = d.getElementsByTagName(s)[0];
        if (d.getElementById(id)) return;
        js = d.createElement(s); js.id = id;
        js.src = "//connect.facebook.net/en_US/sdk.js#xfbml=1&version=v2.5&appId=897925460261572";
        fjs.parentNode.insertBefore(js, fjs);
        }(document, 'script', 'facebook-jssdk'));</script>
 
      <h1>Headlines</h1>
      <main>
      %s
      </main>
    </body>
  </html>
a88b5484
 """
 
d9cc423c
 colTemplate = u"""
a88b5484
 <div class="col">
 %s
 </div>
 """
 
d9cc423c
 categoryClassTemplate = u"category-%s"
 
 item = u"""
 <article data-v="%(score)s" data-date="%(date)s" class="%(categories)s">
564b85e6
 <a href="%(url)s" rel="nofollow">
 <h2>%(title)s</h2>
 <span class="source">%(author)s %(feed)s</span>
a88b5484
 </a>
 </article>
 """
 
 def safe_strftime(obj, format):
    """Call the strftime method on an object, and convert the result to
    ASCII-encoded HTML."""
    u = unicode(obj.strftime(format), get_system_encoding())
    return encode_references(u)
564b85e6
 import itertools
a88b5484
 
d9cc423c
 import re
 def slugerate(str):
   str = [u'-' if c in {u' ', u'_'} else c for c in str if c.isalnum() or c in {u' ', u'-', u'_'}]
   str = u''.join(str).lower()
   str = re.sub(u'-+', u'-', str)
   return str
 
 
a88b5484
 class HeadlineOutput:
   def __init__(self):
     self.filename = "output/headlines.html"
564b85e6
     self.headlines = 120
a88b5484
     self.titles = []
 
   def config_option(self, config, name, value):
     if name == "headlinefile":
       self.filename = value
       return False
     elif name == "headlines":
       self.headlines = value
       return False
     else:
       return True
 
   def output_write_files(self, rawdog, config, articles, article_dates):
     output = title_list
564b85e6
     items = [[], [], []]
     titles = []
     articles = list(reversed(sorted(articles, key=article_dates.get)))
     article_info = []
     title_words = {}
     authors = {}
     for article in articles:
a88b5484
       ei = article.entry_info
d9cc423c
       articleTitle = ei[u'title']
       articleLink = ei[u'link']
       articleAuthor = u"%s &mdash;" % ei.get(u'author', u"")
       feedTitle = rawdog.feeds[article.feed].feed_info[u'title']
564b85e6
 
       for word in articleTitle.split():
         if len(word) <= 3:
           title_words[word] = 0
         else:
           title_words[word] = title_words.get(word, 0) + 1
 
       authors[articleAuthor] = 1
d9cc423c
       categories = u' '.join(sorted([categoryClassTemplate % slugerate(y.get(u'term')) for y in ei.get(u'tags', [])])).strip() #ha!
       #if categories: print categories.encode('utf-8')
       article_info.append(dict(url=articleLink,title=articleTitle,author=articleAuthor,feed=feedTitle,date=datetime.datetime.fromtimestamp(article_dates[article]).isoformat(), article=article,
         categories=categories))
564b85e6
 
     reftime = time.time()
     article_info = article_info[:self.headlines]
     def sort_key(x):
d9cc423c
       words = x[u'title'].split()
564b85e6
       score = 0
d9cc423c
       score += authors.get(x[u'author'],0)
564b85e6
       for word in words:
         score += title_words.get(word,0)
d9cc423c
       score += (reftime - article_dates[x[u'article']]) / 10000
       x[u'score'] = score
       return score * (random.random() + 0.5)
 
564b85e6
 
     article_info = sorted(article_info, key=sort_key)
     buckets = {}
     #for itm in article_info:
     #  buckets.setdefault(sort_key(itm),[]).append(itm)
 
     #article_info = []
     #keys = itertools.cycle(sorted(buckets.keys()))
     #while buckets != {}:
     #  key = keys.next()
     #  cur_list = buckets.get(key)
     #  if cur_list is None: continue
     #  article_info.append(cur_list.pop(0))
     #  if cur_list == []:
     #    del buckets[key]
 
 
     print map(sort_key, article_info)
     for idx, article_datum in enumerate(article_info[:self.headlines]):
       items[idx % 3].append(item % article_datum)
a88b5484
       
     while len(items) > 3:
d9cc423c
       print u"This should be empty:", items.pop()
     output %= u"\n".join(colTemplate % u"\n".join(rows for rows in column) for column in items)
     with open(self.filename, u"w") as f:
       print u"Writing headlines to: %s" % self.filename
a88b5484
       f.write(output.encode('utf-8'))
d9cc423c
     return True
a88b5484
 
 
 
 p = HeadlineOutput()
d9cc423c
 rawdoglib.plugins.attach_hook(u"config_option", p.config_option)
 rawdoglib.plugins.attach_hook(u"output_write_files", p.output_write_files)