git.fiddlerwoaroof.com
url_handler.py
2588d43a
 import re
 import time
 import collections
 import urllib.request
 import urllib.parse
 import feedparser
 import json
 
 def urlopen(url):
 	headers = {'User-Agent': "the/edgent"}
 	req = urllib.request.Request(url, headers=headers)
 	return urllib.request.urlopen(req)
 
 class URLHandler(object):
 	registry = collections.OrderedDict()
 
 	@classmethod
 	def register(cls, pattern):
 		def _inner(ncls):
 			cls.registry[re.compile(pattern)] = ncls
 			return ncls
 		return _inner
 
 	@classmethod
 	def handle(cls, url, **args):
 		print(args)
 		for x in reversed(cls.registry):
 			if x.match(url):
 				return cls.registry[x](url).run(url, **args)
 
 	def __init__(self, url):
 		self.url = url
 
 	def run(self, url, **args):
 		data = self.get_data(url, **args)
 		return self.postprocess(data)
 
 	def get_data(self, url, **args): return urlopen(url)
 	def postprocess(self, result): return result
 
 @URLHandler.register('.')
 class BasicHandler(URLHandler):
 	def get_data(self, url, **args):
 		return feedparser.parse(self.url, **args)
 
 @URLHandler.register(r'^http[s]?://(www\.)?reddit.com/r/[^/]*/$')
 class RedditJSONHandler(URLHandler):
 	def get_data(self, url, **args):
 		result = urlopen('%s.json' % url)
 		result = result.read().decode(result.headers.get_content_charset())
 		return json.loads(result)
 	def postprocess(self, data):
 		result = feedparser.FeedParserDict()
 		desc = urllib.parse.urljoin(self.url, 'about.json')
 		desc = urlopen(desc)
 		desc = json.loads(desc.read().decode(desc.headers.get_content_charset()))['data']
 
 		result['feed'] = feedparser.FeedParserDict()
 		result.feed['title'] = desc['title']
 		result.feed['link'] = 'http://reddit.com/%s' % desc['url']
 		result['entries'] = []
 		result.etag = None
 		result.modified = None
 		result.status = 200
 
 		for x in data['data']['children']:
 			result.entries.append(feedparser.FeedParserDict())
 			dat = x['data']
 			result.entries[-1]['title'] = dat['title']
 			result.entries[-1]['link'] = dat['url']
 			result.entries[-1]['published_parsed'] = time.gmtime(dat['created_utc'])
 			result.entries[-1]['id'] = dat['id']
 
 		return result