GitList

Raw Blame History
# -*- utf-8 -*-
import sys
import cgi
import json
import dateutil.parser
import re
import textblob

def get_date(date):
    start_date = dateutil.parser.parse('Wed Oct 22 01:18')
    time_parser = re.compile(ur'(\d+) (hr|min|sec|hour)')
    try:
        return dateutil.parser.parse(date)
    except (TypeError,ValueError):
        if date.startswith('Yesterday'):
            date = date.partition(' ')[2]
            date = 'August 24 %s' % date
            return dateutil.parser.parse(date)
        elif date.startswith('Just now'):
            return start_date
        else:
            match = time_parser.match(date)
            if match:
                num,unit = match.groups()
                if unit == 'hr' or unit == 'hour':
                    return start_date + dateutil.relativedelta.relativedelta(hours=-int(num))
                elif unit == 'min':
                    return start_date + dateutil.relativedelta.relativedelta(minutes=-int(num))
                elif unit == 'sec':
                    return start_date + dateutil.relativedelta.relativedelta(seconds=-int(num))
                else:
                    print >>sys.stderr, date
                    raise
            else:
                print >>sys.stderr, date
                raise

delimiter = u'--##--%%--##--'
with file('TNET') as f:
    txt = f.read().decode('utf-8')
    txt = txt.split(delimiter)

beginning = u'''\
<html prefix="og: http://ogp.me/ns#">
<head>
    <title>TNET</title>
    <meta property="og:title" content="tNET" />
    <meta property="og:url" content="http://srv2.elangley.org/~edwlan/TNET.html" />
    <meta property="og:type" content="website" />
    <meta property="og:image" content="http://srv2.elangley.org/~edwlan/TNET_256.png" />
    <meta property="og:description" content="This is an archive of tNET in its full glory." />
    <script src="http://code.jquery.com/jquery-1.11.0.min.js"></script>
    <script src="http://code.jquery.com/jquery-migrate-1.2.1.min.js"></script>
    <script type="text/javascript">
       $(document).ready(function() {
           $('#submit').click(function() {
               var targ = $('#goto');
               var scrollTo = '#post_'+targ.val();
               console.log(scrollTo);
               $('html, body').animate({
                   'scrollTop': $(scrollTo).offset().top
               })
           })
           $('#goto').keyup(function(ev) {
               if (ev.which == 13) {
                   $('#submit').click();
               }
           });
       });
    </script>
    <style type="text/css">
        ol {
            padding-left:5em;
        }
        .even {
            background: hsl(0,0%,90%);
        }
        .name {
            min-width: 20em;
            background: hsl(255,50%,90%);
            margin-right: 0.5em;
            width: 100%;
        }
        #container {
            max-width: 900px;
            margin: auto auto;
        }
        .entry + .entry {
            margin-top: 0.5em;
        }
        .entry:hover {
            outline: thin black solid;
        }

        div#nav {
            background:olive;
            color:white;
            position: fixed;
            left: 1em;
            top: 1em;
            border-radius:1em;
            padding:1em;
            width:5em;
        }

        input#goto {
            width: 100%;
            background: white;
            border: thin solid black;
        }

        input#submit {
            background: gray;
            border: thin solid black;
            margin: auto auto;
            position: relative;
            width: 50%;
            left: 25%;
        }
        .text {
            color: white;
        }
    </style>
</head>
<body>
<div id="nav">
    Post #:
    <input type="text" id="goto"></input>
    <input type="button" value="Go" id="submit"></input>
</div>
<div id="container">
<ol>'''
end = u'''\
</ol>
</div>
</body>
</html>'''

out = []

div_template = u'''\
<li id="post_%(index)d" class="entry %(classes)s" data-likes="%(likes)d" data-name="%(name)s" data-date="%(date)s" style="background-color: hsl(%(polarity)d, %(subjectivity)d%%, 50%%);">
    <div class="name">%(name)s at %(date)s with %(likes)d likes</div>
    <p class="text">%(text)s</p>
</li>'''

counter = 0

splitter = u' \xb7 '

data = []

def get_name(line):
    line = line.split()
    name_end = 1
    while name_end < len(line) and (line[name_end].endswith('.') or len(line[name_end]) < 4):
        name_end += 1
    name = line[:name_end+1]
    if len(name) > 2 and name[1] == 'HF': name = name[:2]
    elif len(name) > 2 and name[1] == 'Lng': name = name[:2]
    elif len(name) > 2 and name[1] == 'Haaf': name = line[:3]
    name = ' '.join(name)
    if name == 'Christopher Michael': name = ' '.join(line[:3])
    elif name == 'Jody Haaf': name = ' '.join(line[:3])
    name_length = len(name)
    if name == 'Peregrine Bonaventure': name = 'Scott Weinberg'
    elif name == 'JA Escalante': name = 'Jehoshaphat Escalante'
    return name, name_length

for k in txt:
    k = k.strip()
    lines = k.split('\n')
    lines = map(cgi.escape, lines)
    lines = filter(None, lines)
    if 'ike' in lines[-1]:
        items = lines[-1].split(splitter)
        try:
            date = get_date(items[0])
        except UnicodeEncodeError:
            print >> sys.stderr, '**problem:', items, items[0]
            print >> sys.stderr, lines
            break
        likes = 0
        if items[-1].isdigit():
            likes = int(items[-1])
        counter += 1
        name, name_length = get_name(lines[0])
        class_ = u'odd' if counter % 2 == 1 else u'even'
        lines = lines[:-1]
        j = '\n'.join(lines)[name_length:].lstrip()
        blob = textblob.TextBlob(j)
        data.append(dict(
                name = name,
                name_length = name_length,
                date = date,
                likes = likes,
                text = j,
                index = counter,
                classes = class_,
                polarity = int(round(((blob.sentiment.polarity + 1)/2)*255)),
                subjectivity = int(round(blob.sentiment.subjectivity * 75 + 25))
        ))
        ntemplate = data[-1].copy()
        ntemplate['date'] = ntemplate['date'].isoformat()
        ntemplate['text'] = u'<br />'.join(ntemplate['text'].split('\n'))
        out.append(div_template % ntemplate)

import argparse
a = argparse.ArgumentParser()
a.add_argument('--json', '-j', action='store_true', default=False)
a.add_argument('--fwc', '-w', action='store_true', default=False)
args = a.parse_args()
if args.json:
    import json
    import datetime
    class DateTimeEncoder(json.JSONEncoder):
        def default(self, obj):
            if isinstance(obj, datetime.datetime):
                return obj.isoformat()
            return json.JSONEncoder.default(self, obj)
    print json.dumps(data, cls=DateTimeEncoder, indent=2)
elif args.fwc:
    for x in data:
        txt = x['text'].split('\n')[:-1]
        txt = u'\n'.join(txt)
        txt = txt[len(x['name'])+1:]
        txt = u' '.join(filter(None,(y for y in txt.split() if y.isalpha())))
        print txt.encode('utf-8'),
else:
    print beginning
    for x in out:
        print x.encode('utf-8')
    print end