# -*- utf-8 -*- import sys import cgi import json import dateutil.parser import re import textblob def get_date(date): start_date = dateutil.parser.parse('Wed Oct 22 01:18') time_parser = re.compile(ur'(\d+) (hr|min|sec|hour)') try: return dateutil.parser.parse(date) except (TypeError,ValueError): if date.startswith('Yesterday'): date = date.partition(' ')[2] date = 'August 24 %s' % date return dateutil.parser.parse(date) elif date.startswith('Just now'): return start_date else: match = time_parser.match(date) if match: num,unit = match.groups() if unit == 'hr' or unit == 'hour': return start_date + dateutil.relativedelta.relativedelta(hours=-int(num)) elif unit == 'min': return start_date + dateutil.relativedelta.relativedelta(minutes=-int(num)) elif unit == 'sec': return start_date + dateutil.relativedelta.relativedelta(seconds=-int(num)) else: print >>sys.stderr, date raise else: print >>sys.stderr, date raise delimiter = u'--##--%%--##--' with file('TNET') as f: txt = f.read().decode('utf-8') txt = txt.split(delimiter) beginning = u'''\ <html prefix="og: http://ogp.me/ns#"> <head> <title>TNET</title> <meta property="og:title" content="tNET" /> <meta property="og:url" content="http://srv2.elangley.org/~edwlan/TNET.html" /> <meta property="og:type" content="website" /> <meta property="og:image" content="http://srv2.elangley.org/~edwlan/TNET_256.png" /> <meta property="og:description" content="This is an archive of tNET in its full glory." /> <script src="http://code.jquery.com/jquery-1.11.0.min.js"></script> <script src="http://code.jquery.com/jquery-migrate-1.2.1.min.js"></script> <script type="text/javascript"> $(document).ready(function() { $('#submit').click(function() { var targ = $('#goto'); var scrollTo = '#post_'+targ.val(); console.log(scrollTo); $('html, body').animate({ 'scrollTop': $(scrollTo).offset().top }) }) $('#goto').keyup(function(ev) { if (ev.which == 13) { $('#submit').click(); } }); }); </script> <style type="text/css"> ol { padding-left:5em; } .even { background: hsl(0,0%,90%); } .name { min-width: 20em; background: hsl(255,50%,90%); margin-right: 0.5em; width: 100%; } #container { max-width: 900px; margin: auto auto; } .entry + .entry { margin-top: 0.5em; } .entry:hover { outline: thin black solid; } div#nav { background:olive; color:white; position: fixed; left: 1em; top: 1em; border-radius:1em; padding:1em; width:5em; } input#goto { width: 100%; background: white; border: thin solid black; } input#submit { background: gray; border: thin solid black; margin: auto auto; position: relative; width: 50%; left: 25%; } .text { color: white; } </style> </head> <body> <div id="nav"> Post #: <input type="text" id="goto"></input> <input type="button" value="Go" id="submit"></input> </div> <div id="container"> <ol>''' end = u'''\ </ol> </div> </body> </html>''' out = [] div_template = u'''\ <li id="post_%(index)d" class="entry %(classes)s" data-likes="%(likes)d" data-name="%(name)s" data-date="%(date)s" style="background-color: hsl(%(polarity)d, %(subjectivity)d%%, 50%%);"> <div class="name">%(name)s at %(date)s with %(likes)d likes</div> <p class="text">%(text)s</p> </li>''' counter = 0 splitter = u' \xb7 ' data = [] def get_name(line): line = line.split() name_end = 1 while name_end < len(line) and (line[name_end].endswith('.') or len(line[name_end]) < 4): name_end += 1 name = line[:name_end+1] if len(name) > 2 and name[1] == 'HF': name = name[:2] elif len(name) > 2 and name[1] == 'Lng': name = name[:2] elif len(name) > 2 and name[1] == 'Haaf': name = line[:3] name = ' '.join(name) if name == 'Christopher Michael': name = ' '.join(line[:3]) elif name == 'Jody Haaf': name = ' '.join(line[:3]) name_length = len(name) if name == 'Peregrine Bonaventure': name = 'Scott Weinberg' elif name == 'JA Escalante': name = 'Jehoshaphat Escalante' return name, name_length for k in txt: k = k.strip() lines = k.split('\n') lines = map(cgi.escape, lines) lines = filter(None, lines) if 'ike' in lines[-1]: items = lines[-1].split(splitter) try: date = get_date(items[0]) except UnicodeEncodeError: print >> sys.stderr, '**problem:', items, items[0] print >> sys.stderr, lines break likes = 0 if items[-1].isdigit(): likes = int(items[-1]) counter += 1 name, name_length = get_name(lines[0]) class_ = u'odd' if counter % 2 == 1 else u'even' lines = lines[:-1] j = '\n'.join(lines)[name_length:].lstrip() blob = textblob.TextBlob(j) data.append(dict( name = name, name_length = name_length, date = date, likes = likes, text = j, index = counter, classes = class_, polarity = int(round(((blob.sentiment.polarity + 1)/2)*255)), subjectivity = int(round(blob.sentiment.subjectivity * 75 + 25)) )) ntemplate = data[-1].copy() ntemplate['date'] = ntemplate['date'].isoformat() ntemplate['text'] = u'<br />'.join(ntemplate['text'].split('\n')) out.append(div_template % ntemplate) import argparse a = argparse.ArgumentParser() a.add_argument('--json', '-j', action='store_true', default=False) a.add_argument('--fwc', '-w', action='store_true', default=False) args = a.parse_args() if args.json: import json import datetime class DateTimeEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, datetime.datetime): return obj.isoformat() return json.JSONEncoder.default(self, obj) print json.dumps(data, cls=DateTimeEncoder, indent=2) elif args.fwc: for x in data: txt = x['text'].split('\n')[:-1] txt = u'\n'.join(txt) txt = txt[len(x['name'])+1:] txt = u' '.join(filter(None,(y for y in txt.split() if y.isalpha()))) print txt.encode('utf-8'), else: print beginning for x in out: print x.encode('utf-8') print end