git.fiddlerwoaroof.com
htmlize.py
fb9faba2
 # -*- utf-8 -*-
 import sys
 import cgi
 import json
 import dateutil.parser
 import re
 import textblob
 
 def get_date(date):
     start_date = dateutil.parser.parse('Wed Oct 22 01:18')
     time_parser = re.compile(ur'(\d+) (hr|min|sec|hour)')
     try:
         return dateutil.parser.parse(date)
     except (TypeError,ValueError):
         if date.startswith('Yesterday'):
             date = date.partition(' ')[2]
             date = 'August 24 %s' % date
             return dateutil.parser.parse(date)
         elif date.startswith('Just now'):
             return start_date
         else:
             match = time_parser.match(date)
             if match:
                 num,unit = match.groups()
                 if unit == 'hr' or unit == 'hour':
                     return start_date + dateutil.relativedelta.relativedelta(hours=-int(num))
                 elif unit == 'min':
                     return start_date + dateutil.relativedelta.relativedelta(minutes=-int(num))
                 elif unit == 'sec':
                     return start_date + dateutil.relativedelta.relativedelta(seconds=-int(num))
                 else:
                     print >>sys.stderr, date
                     raise
             else:
                 print >>sys.stderr, date
                 raise
 
 delimiter = u'--##--%%--##--'
 with file('TNET') as f:
     txt = f.read().decode('utf-8')
     txt = txt.split(delimiter)
 
 beginning = u'''\
 <html prefix="og: http://ogp.me/ns#">
 <head>
     <title>TNET</title>
     <meta property="og:title" content="tNET" />
     <meta property="og:url" content="http://srv2.elangley.org/~edwlan/TNET.html" />
     <meta property="og:type" content="website" />
     <meta property="og:image" content="http://srv2.elangley.org/~edwlan/TNET_256.png" />
     <meta property="og:description" content="This is an archive of tNET in its full glory." />
     <script src="http://code.jquery.com/jquery-1.11.0.min.js"></script>
     <script src="http://code.jquery.com/jquery-migrate-1.2.1.min.js"></script>
     <script type="text/javascript">
        $(document).ready(function() {
            $('#submit').click(function() {
                var targ = $('#goto');
                var scrollTo = '#post_'+targ.val();
                console.log(scrollTo);
                $('html, body').animate({
                    'scrollTop': $(scrollTo).offset().top
                })
            })
            $('#goto').keyup(function(ev) {
                if (ev.which == 13) {
                    $('#submit').click();
                }
            });
        });
     </script>
     <style type="text/css">
         ol {
             padding-left:5em;
         }
         .even {
             background: hsl(0,0%,90%);
         }
         .name {
             min-width: 20em;
             background: hsl(255,50%,90%);
             margin-right: 0.5em;
             width: 100%;
         }
         #container {
             max-width: 900px;
             margin: auto auto;
         }
         .entry + .entry {
             margin-top: 0.5em;
         }
         .entry:hover {
             outline: thin black solid;
         }
 
         div#nav {
             background:olive;
             color:white;
             position: fixed;
             left: 1em;
             top: 1em;
             border-radius:1em;
             padding:1em;
             width:5em;
         }
 
         input#goto {
             width: 100%;
             background: white;
             border: thin solid black;
         }
 
         input#submit {
             background: gray;
             border: thin solid black;
             margin: auto auto;
             position: relative;
             width: 50%;
             left: 25%;
         }
         .text {
             color: white;
         }
     </style>
 </head>
 <body>
 <div id="nav">
     Post #:
     <input type="text" id="goto"></input>
     <input type="button" value="Go" id="submit"></input>
 </div>
 <div id="container">
 <ol>'''
 end = u'''\
 </ol>
 </div>
 </body>
 </html>'''
 
 out = []
 
 div_template = u'''\
 <li id="post_%(index)d" class="entry %(classes)s" data-likes="%(likes)d" data-name="%(name)s" data-date="%(date)s" style="background-color: hsl(%(polarity)d, %(subjectivity)d%%, 50%%);">
     <div class="name">%(name)s at %(date)s with %(likes)d likes</div>
     <p class="text">%(text)s</p>
 </li>'''
 
 counter = 0
 
 splitter = u' \xb7 '
 
 data = []
 
 def get_name(line):
     line = line.split()
     name_end = 1
     while name_end < len(line) and (line[name_end].endswith('.') or len(line[name_end]) < 4):
         name_end += 1
     name = line[:name_end+1]
     if len(name) > 2 and name[1] == 'HF': name = name[:2]
     elif len(name) > 2 and name[1] == 'Lng': name = name[:2]
     elif len(name) > 2 and name[1] == 'Haaf': name = line[:3]
     name = ' '.join(name)
     if name == 'Christopher Michael': name = ' '.join(line[:3])
     elif name == 'Jody Haaf': name = ' '.join(line[:3])
     name_length = len(name)
     if name == 'Peregrine Bonaventure': name = 'Scott Weinberg'
     elif name == 'JA Escalante': name = 'Jehoshaphat Escalante'
     return name, name_length
 
 for k in txt:
     k = k.strip()
     lines = k.split('\n')
     lines = map(cgi.escape, lines)
     lines = filter(None, lines)
     if 'ike' in lines[-1]:
         items = lines[-1].split(splitter)
         try:
             date = get_date(items[0])
         except UnicodeEncodeError:
             print >> sys.stderr, '**problem:', items, items[0]
             print >> sys.stderr, lines
             break
         likes = 0
         if items[-1].isdigit():
             likes = int(items[-1])
         counter += 1
         name, name_length = get_name(lines[0])
         class_ = u'odd' if counter % 2 == 1 else u'even'
         lines = lines[:-1]
         j = '\n'.join(lines)[name_length:].lstrip()
         blob = textblob.TextBlob(j)
         data.append(dict(
                 name = name,
                 name_length = name_length,
                 date = date,
                 likes = likes,
                 text = j,
                 index = counter,
                 classes = class_,
                 polarity = int(round(((blob.sentiment.polarity + 1)/2)*255)),
                 subjectivity = int(round(blob.sentiment.subjectivity * 75 + 25))
         ))
         ntemplate = data[-1].copy()
         ntemplate['date'] = ntemplate['date'].isoformat()
         ntemplate['text'] = u'<br />'.join(ntemplate['text'].split('\n'))
         out.append(div_template % ntemplate)
 
 import argparse
 a = argparse.ArgumentParser()
 a.add_argument('--json', '-j', action='store_true', default=False)
 a.add_argument('--fwc', '-w', action='store_true', default=False)
 args = a.parse_args()
 if args.json:
     import json
     import datetime
     class DateTimeEncoder(json.JSONEncoder):
         def default(self, obj):
             if isinstance(obj, datetime.datetime):
                 return obj.isoformat()
             return json.JSONEncoder.default(self, obj)
     print json.dumps(data, cls=DateTimeEncoder, indent=2)
 elif args.fwc:
     for x in data:
         txt = x['text'].split('\n')[:-1]
         txt = u'\n'.join(txt)
         txt = txt[len(x['name'])+1:]
         txt = u' '.join(filter(None,(y for y in txt.split() if y.isalpha())))
         print txt.encode('utf-8'),
 else:
     print beginning
     for x in out:
         print x.encode('utf-8')
     print end