2588d43a |
import url_handler
import feedparser
import hashlib
import json
#import redis
class Feed(object):
@property
def url(self):
return self._url
@url.setter
def url(self, url):
self.urlhash = hashlib.md5(url.encode('utf-8')).hexdigest()
self._url = url
def __init__(self, title, link, url, etag=None, modified=None, status=0):
self.urlhash = hashlib.md5(url.encode('utf-8')).hexdigest()
self.title, self.link, self.url = title, link, url
print('__init__', 'etag is', etag, 'modified is', modified)
self.etag = etag
self.modified = modified
self.status = int(status)
self.entries = []
def add_entry(self, entry):
if self.entries:
self.entries[-1].sep = False
self.entries.append(entry)
self.entries[-1].sep = True
def to_redis(self, redis):
feed_key = self.get_feed_key(self.url)
object_prefix = '%s:%%s' % feed_key
print(object_prefix)
redis.set(feed_key, 'exists')
redis.set(object_prefix % 'title', self.title)
redis.set(object_prefix % 'link', self.link)
redis.set(object_prefix % 'url', self.url)
redis.set(object_prefix % 'etag', self.etag)
redis.set(object_prefix % 'modified', self.modified)
redis.set(object_prefix % 'status', self.status)
self.put_entries(redis, object_prefix)
def put_entries(self, redis, object_prefix):
for entry in reversed(self.entries):
eid = hashlib.md5(entry.id.encode('utf-8')).hexdigest()
entry_key = object_prefix % ('entry:%s' % eid)
redis.sadd(object_prefix % 'entries', eid)
entry.to_redis(redis, entry_key)
@classmethod
def get_feed_key(cls, url):
return 'feed:%s' % hashlib.md5(url.encode('utf-8')).hexdigest()
@classmethod
def from_redis(cls, redis, url):
feed_key = cls.get_feed_key(url)
if redis.get(feed_key):
object_prefix = '%s:%%s' % feed_key
title = redis.get(object_prefix % 'title').decode('utf-8')
link = redis.get(object_prefix % 'link').decode('utf-8')
url = redis.get(object_prefix % 'url').decode('utf-8')
etag = redis.get(object_prefix % 'etag').decode('utf-8')
modified = redis.get(object_prefix % 'modified').decode('utf-8')
status = redis.get(object_prefix % 'status').decode('utf-8')
self = cls(title, link, url)
entries = redis.smembers(object_prefix % 'entries')
for eid in entries:
eid = eid.decode('utf-8')
entry_key = object_prefix % ('entry:%s' % eid)
hl = Headline.from_redis(redis, entry_key)
if hl is None: break
else: self.add_entry(hl)
if self.entries != []:
self.entries[-1].sep = False
self.entries.sort(key=lambda x:x.date)
self.entries[-1].sep = True
return self
@classmethod
def pull_feed(cls, url, etag=None, modified=None):
print('etag is', etag, 'modified is', modified)
feed = url_handler.URLHandler.handle(url, etag=etag, modified=modified)
return cls.from_parsed_feed(feed, url)
@classmethod
def from_parsed_feed(cls, data, url):
title = data.feed.title
url = url
link = data.feed.link
etag = data.etag if hasattr(data, 'etag') else 'No Etag'
modified = data.modified if hasattr(data, 'modified') else 'No Last Modified'
print('parsed_feed', 'etag is', etag, 'modified is', modified)
status = data.status
self = cls(title, link, url, etag=etag, modified=modified, status=status)
for entry in data.entries:
hl = Headline(entry.title, entry.link, date=entry.published_parsed, id=entry.id)
self.add_entry(hl)
return self
@classmethod
def get_feed(cls, url, redis=None):
res = None
update = False
newfeed = None
if redis is not None:
res = cls.from_redis(redis, url)
if res is not None:
newfeed = url_handler.URLHandler.handle(url, etag=res.etag, modified=res.modified)
update = newfeed.status != 304
print('newfeed.status is', newfeed.status, 'update is', update)
print('res is', res, 'update is', update, 'url is', url)
if update or res is None:
if update:
updates = cls.from_parsed_feed(newfeed, url)
object_prefix = '%s:%%s' % cls.get_feed_key(url)
updates.put_entries(redis, object_prefix)
print('putting updates!')
updates.to_redis(redis)
res = cls.from_redis(redis, url)
else:
data = url_handler.URLHandler.handle(url)
res = cls.from_parsed_feed(data, url)
res.to_redis(redis)
return res
class Headline(object):
@property
def url(self):
return self._url
@url.setter
def url(self, url):
self.urlhash = hashlib.md5(url.encode('utf-8')).hexdigest()
self._url = url
serialized_attributes = ['title', 'url', 'img', 'id', 'date']
def __init__(self, title, url, sep=False, img=None, id=None, date=None):
for x in self.serialized_attributes:
setattr(self, x, locals()[x])
self.date = list(self.date)
self.sep = sep
def __repr__(self):
return '<%s>' % ', '.join(str(getattr(self,x)) for x in self.serialized_attributes)
trans_map = dict(date=json.dumps)
rtrans_map = { y:x for (x,y) in trans_map.items() }
def to_redis(self, redis, entry_key):
redis.set(entry_key, 'exists')
object_prefix = '%s:%%s' % entry_key
for x in self.serialized_attributes:
redis.set(object_prefix % x, self.trans_map.get(x, lambda x:x)(getattr(self, x)))
@classmethod
def from_redis(cls, redis, entry_key):
if redis.get(entry_key) is not None:
object_prefix = '%s:%%s' % entry_key
args = {}
for x in cls.serialized_attributes:
args[x] = redis.get(object_prefix % x).decode('utf-8')
args[x] = cls.rtrans_map.get(x, lambda x:x)(args[x])
return cls(**args)
@classmethod
def from_rss(cls, entry):
name_mapping = dict(
url='link',
date='published_parsed',
)
self = cls(entry.title, entry.link, id=entry.id, date=entry.published_parsed)
def to_json(self):
return json.dumps([self.title, self.link, self.sep, self.img])
@classmethod
def from_json(cls, enc):
enc = json.loads(enc)
self = cls(*json.loads(enc))
class Feeds(object):
def __init__(self, urls, redis=None):
self.feeds = list(filter(None, (Feed.get_feed(url, redis) for url in urls)))
print(self.feeds)
if __name__ == '__main__':
import json
import redis
print('getting feeds . . .', end=' ')
with open('blogs.json') as f:
feeds = json.load(f)
feeds = Feeds(feeds, redis.Redis())
print('done.')
|