02cd881d |
# -*- coding: utf-8 -*-
#
# rawdog plugin to generate RSS, OPML and FOAF output
# Copyright 2008 Jonathan Riddell
# Copyright 2009 Adam Sampson <ats@offog.org>
#
# rawdog_rss is free software; you can redistribute and/or modify it
# under the terms of that license as published by the Free Software
# Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# rawdog_rss is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rawdog_rss; see the file COPYING. If not, write to the Free
# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA, or see http://www.gnu.org/.
#
# ---
#
# This plugin supports the following configuration options:
#
# outputxml RSS output filename
# outputfoaf FOAF output filename
# outputopml OPML output filename
# xmltitle Feed title (e.g. "Planet Foo")
# xmllink Feed link (e.g. "http://planet-foo.example.com/")
# xmllanguage Feed language (e.g. "en")
# xmlurl URL of the generated RSS (e.g. "http://planet-foo.example.com/rss20.xml")
# xmldescription Feed description (e.g. "People who work on foo")
# xmlownername Feed owner's name
# xmlowneremail Feed owner's email address
# xmlmaxarticles Maximum number of articles to include in the feed
# (defaults to maxarticles if not specified)
#
# If you're using rawdog to produce a planet page, you'll probably want to have
# "sortbyfeeddate true" in your config file too.
import os, time, cgi
import rawdoglib.plugins, rawdoglib.rawdog
import libxml2
import HTMLParser
from rawdoglib.rawdog import detail_to_html, string_to_html
from time import gmtime, strftime
def rfc822_date(tm):
"""Format a GMT timestamp as returned by time.gmtime() in RFC822 format.
(This is insensitive to the current locale.)"""
days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
months = [
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
]
return "%s, %02d %s %04d %02d:%02d:%02d GMT" % \
(days[tm[6]], tm[2], months[tm[1] - 1], tm[0], tm[3], tm[4], tm[5])
class RSS_Feed:
def __init__(self):
self.options = {
"outputxml": "rss20.xml",
"outputfoaf": "foafroll.xml",
"outputopml": "opml.xml",
"xmltitle": "Planet KDE",
"xmllink": "http://planetKDE.org/",
"xmllanguage": "en",
"xmlurl": "http://planetKDE.org/rss20.xml",
"xmldescription": "Planet KDE - http://planetKDE.org/",
"xmlownername": "Jonathan Riddell",
"xmlowneremail": "",
"xmlmaxarticles": "",
}
def config_option(self, config, name, value):
if name in self.options:
self.options[name] = value
return False
else:
return True
def feed_name(self, feed, config):
"""Return the label used for a feed. If it has a "name" define, use
that; otherwise, use the feed title."""
if "define_name" in feed.args:
return feed.args["define_name"]
else:
return feed.get_html_name(config)
def article_to_xml(self, xml_article, rawdog, config, article, doc):
entry_info = article.entry_info
id = entry_info.get("id", self.options["xmlurl"] + "#id" + article.hash)
guid = xml_article.newChild(None, 'guid', string_to_html(id, config))
guid.setProp('isPermaLink', 'false')
h = HTMLParser.HTMLParser()
title = self.feed_name(rawdog.feeds[article.feed], config)
s = detail_to_html(entry_info.get("title_detail"), True, config)
if s is not None:
title += ": " + h.unescape(s)
xml_article.newChild(None, 'title', title.encode('utf-8'))
if article.date is not None:
date = rfc822_date(gmtime(article.date))
xml_article.newChild(None, 'pubDate', date)
s = entry_info.get("link")
if s is not None and s != "":
xml_article.newChild(None, 'link', string_to_html(h.unescape(s.encode('utf-8')), config))
for key in ["content", "summary_detail"]:
s = detail_to_html(entry_info.get(key), False, config)
if s is not None:
cdata = doc.newCDataBlock(s, len(s))
xml_article.newChild(None, 'description', "").addChild(cdata)
break
return True
def write_rss(self, rawdog, config, articles):
doc = libxml2.newDoc("1.0")
rss = doc.newChild(None, 'rss', None)
rss.setProp('version', "2.0")
rss.setProp('xmlns:dc', "http://purl.org/dc/elements/1.1/")
rss.setProp('xmlns:atom', 'http://www.w3.org/2005/Atom')
channel = rss.newChild(None, 'channel', None)
channel.newChild(None, 'title', self.options["xmltitle"])
channel.newChild(None, 'link', self.options["xmllink"])
channel.newChild(None, 'language', self.options["xmllanguage"])
channel.newChild(None, 'description', self.options["xmldescription"])
atom_link = channel.newChild(None, 'atom:link', None)
atom_link.setProp('href', self.options["xmlurl"])
atom_link.setProp('rel', 'self')
atom_link.setProp('type', 'application/rss+xml')
try:
maxarticles = int(self.options["xmlmaxarticles"])
except ValueError:
maxarticles = len(articles)
for article in articles[:maxarticles]:
xml_article = channel.newChild(None, 'item', None)
self.article_to_xml(xml_article, rawdog, config, article, doc)
print "Writing RSS to . . .", self.options["outputxml"],
doc.saveFormatFile(self.options["outputxml"], 1)
print "done"
doc.freeDoc()
def write_foaf(self, rawdog, config):
doc = libxml2.newDoc("1.0")
xml = doc.newChild(None, 'rdf:RDF', None)
xml.setProp('xmlns:rdf', "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
xml.setProp('xmlns:rdfs', "http://www.w3.org/2000/01/rdf-schema#")
xml.setProp('xmlns:foaf', "http://xmlns.com/foaf/0.1/")
xml.setProp('xmlns:rss', "http://purl.org/rss/1.0/")
xml.setProp('xmlns:dc', "http://purl.org/dc/elements/1.1/")
group = xml.newChild(None, 'foaf:Group', None)
group.newChild(None, 'foaf:name', self.options["xmltitle"])
group.newChild(None, 'foaf:homepage', self.options["xmllink"])
seeAlso = group.newChild(None, 'rdfs:seeAlso', None)
seeAlso.setProp('rdf:resource', '')
for url in sorted(rawdog.feeds.keys()):
member = group.newChild(None, 'foaf:member', None)
agent = member.newChild(None, 'foaf:Agent', None)
agent.newChild(None, 'foaf:name', self.feed_name(rawdog.feeds[url], config))
weblog = agent.newChild(None, 'foaf:weblog', None)
document = weblog.newChild(None, 'foaf:Document', None)
document.setProp('rdf:about', url)
seealso = document.newChild(None, 'rdfs:seeAlso', None)
channel = seealso.newChild(None, 'rss:channel', None)
channel.setProp('rdf:about', '')
print "saving FOAF to: %s . . ." % self.options["outputfoaf"],
doc.saveFormatFile(self.options["outputfoaf"], 1)
print "done"
doc.freeDoc()
def write_opml(self, rawdog, config):
doc = libxml2.newDoc("1.0")
xml = doc.newChild(None, 'opml', None)
xml.setProp('version', "1.1")
head = xml.newChild(None, 'head', None)
head.newChild(None, 'title', self.options["xmltitle"])
now = rfc822_date(gmtime())
head.newChild(None, 'dateCreated', now)
head.newChild(None, 'dateModified', now)
head.newChild(None, 'ownerName', self.options["xmlownername"])
head.newChild(None, 'ownerEmail', self.options["xmlowneremail"])
body = xml.newChild(None, 'body', None)
for url in sorted(rawdog.feeds.keys()):
outline = body.newChild(None, 'outline', None)
outline.setProp('text', self.feed_name(rawdog.feeds[url], config))
outline.setProp('xmlUrl', url)
print "saving OPML to: %s . . ." % self.options["outputopml"],
doc.saveFormatFile(self.options["outputopml"], 1)
print("done")
doc.freeDoc()
def output_write(self, rawdog, config, articles):
self.write_rss(rawdog, config, articles)
self.write_foaf(rawdog, config)
self.write_opml(rawdog, config)
return True
rss_feed = RSS_Feed()
rawdoglib.plugins.attach_hook("config_option", rss_feed.config_option)
rawdoglib.plugins.attach_hook("output_write", rss_feed.output_write)
|