fb9faba2 |
# -*- utf-8 -*-
import sys
import cgi
import json
import dateutil.parser
import re
import textblob
def get_date(date):
start_date = dateutil.parser.parse('Wed Oct 22 01:18')
time_parser = re.compile(ur'(\d+) (hr|min|sec|hour)')
try:
return dateutil.parser.parse(date)
except (TypeError,ValueError):
if date.startswith('Yesterday'):
date = date.partition(' ')[2]
date = 'August 24 %s' % date
return dateutil.parser.parse(date)
elif date.startswith('Just now'):
return start_date
else:
match = time_parser.match(date)
if match:
num,unit = match.groups()
if unit == 'hr' or unit == 'hour':
return start_date + dateutil.relativedelta.relativedelta(hours=-int(num))
elif unit == 'min':
return start_date + dateutil.relativedelta.relativedelta(minutes=-int(num))
elif unit == 'sec':
return start_date + dateutil.relativedelta.relativedelta(seconds=-int(num))
else:
print >>sys.stderr, date
raise
else:
print >>sys.stderr, date
raise
delimiter = u'--##--%%--##--'
with file('TNET') as f:
txt = f.read().decode('utf-8')
txt = txt.split(delimiter)
beginning = u'''\
<html prefix="og: http://ogp.me/ns#">
<head>
<title>TNET</title>
<meta property="og:title" content="tNET" />
<meta property="og:url" content="http://srv2.elangley.org/~edwlan/TNET.html" />
<meta property="og:type" content="website" />
<meta property="og:image" content="http://srv2.elangley.org/~edwlan/TNET_256.png" />
<meta property="og:description" content="This is an archive of tNET in its full glory." />
<script src="http://code.jquery.com/jquery-1.11.0.min.js"></script>
<script src="http://code.jquery.com/jquery-migrate-1.2.1.min.js"></script>
<script type="text/javascript">
$(document).ready(function() {
$('#submit').click(function() {
var targ = $('#goto');
var scrollTo = '#post_'+targ.val();
console.log(scrollTo);
$('html, body').animate({
'scrollTop': $(scrollTo).offset().top
})
})
$('#goto').keyup(function(ev) {
if (ev.which == 13) {
$('#submit').click();
}
});
});
</script>
<style type="text/css">
ol {
padding-left:5em;
}
.even {
background: hsl(0,0%,90%);
}
.name {
min-width: 20em;
background: hsl(255,50%,90%);
margin-right: 0.5em;
width: 100%;
}
#container {
max-width: 900px;
margin: auto auto;
}
.entry + .entry {
margin-top: 0.5em;
}
.entry:hover {
outline: thin black solid;
}
div#nav {
background:olive;
color:white;
position: fixed;
left: 1em;
top: 1em;
border-radius:1em;
padding:1em;
width:5em;
}
input#goto {
width: 100%;
background: white;
border: thin solid black;
}
input#submit {
background: gray;
border: thin solid black;
margin: auto auto;
position: relative;
width: 50%;
left: 25%;
}
.text {
color: white;
}
</style>
</head>
<body>
<div id="nav">
Post #:
<input type="text" id="goto"></input>
<input type="button" value="Go" id="submit"></input>
</div>
<div id="container">
<ol>'''
end = u'''\
</ol>
</div>
</body>
</html>'''
out = []
div_template = u'''\
<li id="post_%(index)d" class="entry %(classes)s" data-likes="%(likes)d" data-name="%(name)s" data-date="%(date)s" style="background-color: hsl(%(polarity)d, %(subjectivity)d%%, 50%%);">
<div class="name">%(name)s at %(date)s with %(likes)d likes</div>
<p class="text">%(text)s</p>
</li>'''
counter = 0
splitter = u' \xb7 '
data = []
def get_name(line):
line = line.split()
name_end = 1
while name_end < len(line) and (line[name_end].endswith('.') or len(line[name_end]) < 4):
name_end += 1
name = line[:name_end+1]
if len(name) > 2 and name[1] == 'HF': name = name[:2]
elif len(name) > 2 and name[1] == 'Lng': name = name[:2]
elif len(name) > 2 and name[1] == 'Haaf': name = line[:3]
name = ' '.join(name)
if name == 'Christopher Michael': name = ' '.join(line[:3])
elif name == 'Jody Haaf': name = ' '.join(line[:3])
name_length = len(name)
if name == 'Peregrine Bonaventure': name = 'Scott Weinberg'
elif name == 'JA Escalante': name = 'Jehoshaphat Escalante'
return name, name_length
for k in txt:
k = k.strip()
lines = k.split('\n')
lines = map(cgi.escape, lines)
lines = filter(None, lines)
if 'ike' in lines[-1]:
items = lines[-1].split(splitter)
try:
date = get_date(items[0])
except UnicodeEncodeError:
print >> sys.stderr, '**problem:', items, items[0]
print >> sys.stderr, lines
break
likes = 0
if items[-1].isdigit():
likes = int(items[-1])
counter += 1
name, name_length = get_name(lines[0])
class_ = u'odd' if counter % 2 == 1 else u'even'
lines = lines[:-1]
j = '\n'.join(lines)[name_length:].lstrip()
blob = textblob.TextBlob(j)
data.append(dict(
name = name,
name_length = name_length,
date = date,
likes = likes,
text = j,
index = counter,
classes = class_,
polarity = int(round(((blob.sentiment.polarity + 1)/2)*255)),
subjectivity = int(round(blob.sentiment.subjectivity * 75 + 25))
))
ntemplate = data[-1].copy()
ntemplate['date'] = ntemplate['date'].isoformat()
ntemplate['text'] = u'<br />'.join(ntemplate['text'].split('\n'))
out.append(div_template % ntemplate)
import argparse
a = argparse.ArgumentParser()
a.add_argument('--json', '-j', action='store_true', default=False)
a.add_argument('--fwc', '-w', action='store_true', default=False)
args = a.parse_args()
if args.json:
import json
import datetime
class DateTimeEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
return json.JSONEncoder.default(self, obj)
print json.dumps(data, cls=DateTimeEncoder, indent=2)
elif args.fwc:
for x in data:
txt = x['text'].split('\n')[:-1]
txt = u'\n'.join(txt)
txt = txt[len(x['name'])+1:]
txt = u' '.join(filter(None,(y for y in txt.split() if y.isalpha())))
print txt.encode('utf-8'),
else:
print beginning
for x in out:
print x.encode('utf-8')
print end
|