# -*- coding: utf-8 -*- # # rawdog plugin to generate RSS, OPML and FOAF output # Copyright 2008 Jonathan Riddell # Copyright 2009 Adam Sampson # # rawdog_rss is free software; you can redistribute and/or modify it # under the terms of that license as published by the Free Software # Foundation; either version 2 of the License, or (at your option) # any later version. # # rawdog_rss is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with rawdog_rss; see the file COPYING. If not, write to the Free # Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA, or see http://www.gnu.org/. # # --- # # This plugin supports the following configuration options: # # outputxml RSS output filename # outputfoaf FOAF output filename # outputopml OPML output filename # xmltitle Feed title (e.g. "Planet Foo") # xmllink Feed link (e.g. "http://planet-foo.example.com/") # xmllanguage Feed language (e.g. "en") # xmlurl URL of the generated RSS (e.g. "http://planet-foo.example.com/rss20.xml") # xmldescription Feed description (e.g. "People who work on foo") # xmlownername Feed owner's name # xmlowneremail Feed owner's email address # xmlmaxarticles Maximum number of articles to include in the feed # (defaults to maxarticles if not specified) # # If you're using rawdog to produce a planet page, you'll probably want to have # "sortbyfeeddate true" in your config file too. import os, time, cgi import rawdoglib.plugins, rawdoglib.rawdog import libxml2 import HTMLParser from rawdoglib.rawdog import detail_to_html, string_to_html from time import gmtime, strftime def rfc822_date(tm): """Format a GMT timestamp as returned by time.gmtime() in RFC822 format. (This is insensitive to the current locale.)""" days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] months = [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", ] return "%s, %02d %s %04d %02d:%02d:%02d GMT" % \ (days[tm[6]], tm[2], months[tm[1] - 1], tm[0], tm[3], tm[4], tm[5]) class RSS_Feed: def __init__(self): self.options = { "outputxml": "rss20.xml", "outputfoaf": "foafroll.xml", "outputopml": "opml.xml", "xmltitle": "Planet KDE", "xmllink": "http://planetKDE.org/", "xmllanguage": "en", "xmlurl": "http://planetKDE.org/rss20.xml", "xmldescription": "Planet KDE - http://planetKDE.org/", "xmlownername": "Jonathan Riddell", "xmlowneremail": "", "xmlmaxarticles": "", } def config_option(self, config, name, value): if name in self.options: self.options[name] = value return False else: return True def feed_name(self, feed, config): """Return the label used for a feed. If it has a "name" define, use that; otherwise, use the feed title.""" if "define_name" in feed.args: return feed.args["define_name"] else: return feed.get_html_name(config) def article_to_xml(self, xml_article, rawdog, config, article, doc): entry_info = article.entry_info id = entry_info.get("id", self.options["xmlurl"] + "#id" + article.hash) guid = xml_article.newChild(None, 'guid', string_to_html(id, config)) guid.setProp('isPermaLink', 'false') h = HTMLParser.HTMLParser() title = self.feed_name(rawdog.feeds[article.feed], config) s = detail_to_html(entry_info.get("title_detail"), True, config) if s is not None: title += ": " + h.unescape(s) xml_article.newChild(None, 'title', title.encode('utf-8')) if article.date is not None: date = rfc822_date(gmtime(article.date)) xml_article.newChild(None, 'pubDate', date) s = entry_info.get("link") if s is not None and s != "": xml_article.newChild(None, 'link', string_to_html(h.unescape(s.encode('utf-8')), config)) for key in ["content", "summary_detail"]: s = detail_to_html(entry_info.get(key), False, config) if s is not None: cdata = doc.newCDataBlock(s, len(s)) xml_article.newChild(None, 'description', "").addChild(cdata) break return True def write_rss(self, rawdog, config, articles): doc = libxml2.newDoc("1.0") rss = doc.newChild(None, 'rss', None) rss.setProp('version', "2.0") rss.setProp('xmlns:dc', "http://purl.org/dc/elements/1.1/") rss.setProp('xmlns:atom', 'http://www.w3.org/2005/Atom') channel = rss.newChild(None, 'channel', None) channel.newChild(None, 'title', self.options["xmltitle"]) channel.newChild(None, 'link', self.options["xmllink"]) channel.newChild(None, 'language', self.options["xmllanguage"]) channel.newChild(None, 'description', self.options["xmldescription"]) atom_link = channel.newChild(None, 'atom:link', None) atom_link.setProp('href', self.options["xmlurl"]) atom_link.setProp('rel', 'self') atom_link.setProp('type', 'application/rss+xml') try: maxarticles = int(self.options["xmlmaxarticles"]) except ValueError: maxarticles = len(articles) for article in articles[:maxarticles]: xml_article = channel.newChild(None, 'item', None) self.article_to_xml(xml_article, rawdog, config, article, doc) print "Writing RSS to . . .", self.options["outputxml"], doc.saveFormatFile(self.options["outputxml"], 1) print "done" doc.freeDoc() def write_foaf(self, rawdog, config): doc = libxml2.newDoc("1.0") xml = doc.newChild(None, 'rdf:RDF', None) xml.setProp('xmlns:rdf', "http://www.w3.org/1999/02/22-rdf-syntax-ns#") xml.setProp('xmlns:rdfs', "http://www.w3.org/2000/01/rdf-schema#") xml.setProp('xmlns:foaf', "http://xmlns.com/foaf/0.1/") xml.setProp('xmlns:rss', "http://purl.org/rss/1.0/") xml.setProp('xmlns:dc', "http://purl.org/dc/elements/1.1/") group = xml.newChild(None, 'foaf:Group', None) group.newChild(None, 'foaf:name', self.options["xmltitle"]) group.newChild(None, 'foaf:homepage', self.options["xmllink"]) seeAlso = group.newChild(None, 'rdfs:seeAlso', None) seeAlso.setProp('rdf:resource', '') for url in sorted(rawdog.feeds.keys()): member = group.newChild(None, 'foaf:member', None) agent = member.newChild(None, 'foaf:Agent', None) agent.newChild(None, 'foaf:name', self.feed_name(rawdog.feeds[url], config)) weblog = agent.newChild(None, 'foaf:weblog', None) document = weblog.newChild(None, 'foaf:Document', None) document.setProp('rdf:about', url) seealso = document.newChild(None, 'rdfs:seeAlso', None) channel = seealso.newChild(None, 'rss:channel', None) channel.setProp('rdf:about', '') print "saving FOAF to: %s . . ." % self.options["outputfoaf"], doc.saveFormatFile(self.options["outputfoaf"], 1) print "done" doc.freeDoc() def write_opml(self, rawdog, config): doc = libxml2.newDoc("1.0") xml = doc.newChild(None, 'opml', None) xml.setProp('version', "1.1") head = xml.newChild(None, 'head', None) head.newChild(None, 'title', self.options["xmltitle"]) now = rfc822_date(gmtime()) head.newChild(None, 'dateCreated', now) head.newChild(None, 'dateModified', now) head.newChild(None, 'ownerName', self.options["xmlownername"]) head.newChild(None, 'ownerEmail', self.options["xmlowneremail"]) body = xml.newChild(None, 'body', None) for url in sorted(rawdog.feeds.keys()): outline = body.newChild(None, 'outline', None) outline.setProp('text', self.feed_name(rawdog.feeds[url], config)) outline.setProp('xmlUrl', url) print "saving OPML to: %s . . ." % self.options["outputopml"], doc.saveFormatFile(self.options["outputopml"], 1) print("done") doc.freeDoc() def output_write(self, rawdog, config, articles): self.write_rss(rawdog, config, articles) self.write_foaf(rawdog, config) self.write_opml(rawdog, config) return True rss_feed = RSS_Feed() rawdoglib.plugins.attach_hook("config_option", rss_feed.config_option) rawdoglib.plugins.attach_hook("output_write", rss_feed.output_write)