cd914c16 |
(declaim (optimize (speed 0) (safety 3) (debug 3)))
(in-package :alimenta.rss)
|
93a579ec |
(defclass rss-image ()
((url :initarg :url :initform nil)
(title :initarg :title :initform nil)
(link :initarg :link :initform nil)
(width :initarg :width :initform nil)
(height :initarg :height :initform nil)
(description :initarg :description :initform nil)))
(defclass rss-category ()
|
2815e4e1 |
((category :initarg :category :accessor category :initform nil)
(domain :initarg :domain :accessor domain :initform nil)))
|
93a579ec |
(define-data-class rss-feed (doc "channel") (feed)
language copyright webmaster
generator docs cloud ttl rating
(image "image"
:value (apply #'make-image
(mapcar (lambda (x) (when (> (length x) 0)
(plump:text (elt x 0))))
(let ((plump:*tag-dispatchers* plump:*xml-tags*))
($1 (inline doc) "channel > image"
|
c5ce4bfd |
(combine "url" "title" "link" "width" "height"
"description"))))))
|
93a579ec |
(categories "category" :value (get-categories doc "channel > category"))
(text-input "textInput")
|
5dc30073 |
(managing-editor "managingEditor")
|
93a579ec |
(skip-days "skipDays")
|
5dc30073 |
(skip-hours "skipHours")
(publication-date "publicationDate" :transform get-date)
|
93a579ec |
(last-build-date "lastBuildDate" :transform get-date))
(define-data-class rss-item (doc "") (item)
(categories "category" :value (get-categories doc "> category"))
|
2815e4e1 |
source comments enclosure description)
|
cd914c16 |
|
379b0b37 |
;; TODO: finish the stuff necessary for rss->atom
(defmethod id ((object rss-feed))
(feed-link object))
|
58a15352 |
(defmethod print-object ((self rss-image) stream)
(print-unreadable-object (self stream :type t :identity t)
(format stream "~a" (slot-value self 'url))))
(defmethod print-object ((self rss-category) stream)
(print-unreadable-object (self stream :type t :identity t)
(format stream "~a~@[ ~a~]"
(slot-value self 'category)
(slot-value self 'domain))))
|
a16882ba |
(defmacro check ((test &body body))
`(let ((val (progn ,@body)))
(when (,test val)
val)))
(defun all-alpha (str)
(check-type str string)
(loop for char across str
always (alpha-char-p char)))
(defun extract-date-timezone (date-str)
(declare (optimize (debug 3)))
(let ((tz-inited nil))
(flet ((init-tz ()
|
6e681005 |
(handler-case (unless (or tz-inited
(local-time:find-timezone-by-location-name
"America/Los_Angeles"))
(local-time:reread-timezone-repository))
(error ()
(local-time:reread-timezone-repository)
(setf tz-inited t)))))
|
a16882ba |
(macrolet ((ensure-tz-inited (&body body)
`(progn (init-tz)
,@body)))
(let* ((last-space (position #\space date-str :from-end t))
(tz-name (check (all-alpha (subseq date-str (1+ last-space)))))
(timestamp-raw (if tz-name
|
6e681005 |
(subseq date-str 0 last-space)
date-str)))
|
a16882ba |
(values (if tz-name
|
6e681005 |
(ensure-tz-inited
(local-time:find-timezone-by-location-name
|
a16882ba |
(string-upcase tz-name)))
|
6e681005 |
local-time:+utc-zone+)
|
a16882ba |
timestamp-raw))))))
|
58a15352 |
(defun get-date (str)
|
a4392760 |
(declare (optimize (debug 3)))
|
58a15352 |
(handler-case
|
dcb293c2 |
(local-time:parse-timestring str)
|
58a15352 |
(local-time::invalid-timestring (c) (declare (ignore c))
|
a16882ba |
(multiple-value-bind (local-time:*default-timezone* timestamp-raw) (extract-date-timezone str)
(multiple-value-bind (res groups) (cl-ppcre:scan-to-strings "(.*)\s*([+-][0-9]{2,4})\s?$" timestamp-raw)
(let ((ts (if res (elt groups 0) timestamp-raw))
(tz-offset (if res (elt groups 1) "0000")))
(let* ((timestamp (string-trim " " ts))
|
dcb293c2 |
;; Handle numeric timzones like -0430 or +0320
|
a16882ba |
(hour-offset (parse-integer tz-offset :end 3))
(minute-offset (if (> (length tz-offset) 3)
|
dcb293c2 |
(* (signum hour-offset)
(parse-integer tz-offset :start 3))
0)))
|
a16882ba |
(loop
(restart-case (return
|
dcb293c2 |
(fw.lu:let-each (:be *)
|
a16882ba |
(chronicity:parse timestamp)
(local-time:timestamp- * minute-offset :minute)
|
5dc30073 |
(local-time:timestamp- * hour-offset :hour)))
|
a16882ba |
(pop-token () (setf timestamp
(subseq timestamp
0
(position #\space timestamp
:from-end t)))))))))))))
(defun pop-token ()
(when-let ((restart (find-restart 'pop-token)))
(invoke-restart restart)))
|
58a15352 |
(defmethod primary-value ((self rss-image))
(slot-value self 'url))
(defun make-image (url title &optional link width height description)
(let ((link (or link url)))
(make-instance 'rss-image
:url url
:title title
:link link
:width width
:height height
:description description)))
(defmethod primary-value ((self rss-category))
(slot-value self 'category))
(defun make-category (category &optional domain)
(make-instance 'rss-category :category category :domain domain))
(defun get-categories (doc tag)
($ (inline doc) tag
(combine (text) (attr "domain"))
(map-apply #'make-category)))
|
a3645f2a |
(defmethod alimenta::get-items (xml-dom (feed-type (eql :rss)))
|
cd914c16 |
($ (inline xml-dom) "channel > item"))
|
a4392760 |
(defmethod generate-xml ((item item) (feed-type (eql :rss)) &key partial)
|
cd914c16 |
(prog1 partial
|
93a579ec |
(let ((item-root (make-element ($1 (inline partial) "channel") "item")))
(flet ((make-element (tag) (make-element item-root tag)))
|
311bcb8e |
(with-slots (title id date link content) item
($ (inline (make-element "title")) (text title)
(inline (make-element "link")) (text link)
(inline (make-element "pubDate")) (text date)
|
5dc30073 |
(inline (make-element "description")) (text content))
|
311bcb8e |
(plump-dom:set-attribute
($ (inline (make-element "guid")) (text id) (node))
"isPermaLink"
"false"))))))
|
cd914c16 |
|
a4392760 |
(defmethod generate-xml ((feed feed) (feed-type (eql :rss)) &rest r)
|
cd914c16 |
(declare (ignore r))
(let* ((xml-root (plump:make-root))
(feed-root (plump:make-element xml-root "rss"))
(channel (plump-dom:make-element feed-root "channel")))
($ (inline feed-root)
(attr "version" "2.0")
(attr "xmlns:content" "http://purl.org/rss/1.0/modules/content/")
(attr "xmlns:atom" "http://www.w3.org/2005/Atom"))
(with-slots (title link feed-link description) feed
($ (inline (make-element channel "title"))
(text title))
($ (inline (make-element channel "link"))
(text link))
(awhen description
($ (inline (make-element channel "description"))
(text it)))
($ (inline (make-element channel "atom:link"))
(attr "rel" "self")
(attr "type" "application/rss+xml")
(attr "href" link)))
xml-root))
|
2815e4e1 |
(defmethod content-el ((entity rss-item))
(fw.lu:let-each (:be *)
(doc entity)
($1 (inline *)
(combine "> content::encoded"
"> description"))
(or (elt (car *) 0)
(elt (cadr *) 0))))
|
cd914c16 |
(defmethod make-item (xml-dom (type (eql :rss)))
|
93a579ec |
(let* ((*lquery-master-document* xml-dom)
(item-title ($1 "> title" (text)))
(item-link ($1 "> link" (text)))
(item-date (awhen ($1 "> pubDate" (text)) (get-date it)))
(item-guid ($1 "> guid" (text)))
(item-description ($1 "> description" (text)))
(item-content-encoded ($1 "> content::encoded" (text)))
|
8a464908 |
(content (aif (or item-content-encoded item-description)
(with-output-to-string (s)
(serialize (parse (or item-content-encoded item-description)) s))))
|
cd914c16 |
(*tag-dispatchers* *html-tags*))
|
93a579ec |
(make-instance 'rss-item
|
5dc30073 |
:content content
|
cd914c16 |
:date item-date
:doc xml-dom
:id item-guid
:link item-link
:title item-title)))
|
93a579ec |
(deftest get-date ()
(should be local-time:timestamp=
(local-time:parse-timestring "2016-01-09T23:00:00.000000-0100")
(get-date "Fri, 09 Jan 2016 23:00:00-0100"))
(should be local-time:timestamp=
(local-time:parse-timestring "2016-01-09T23:00:00.000000-0100")
(get-date "Fri, 09 Jan 2016 23:00:00 -0100"))
(should be local-time:timestamp=
(local-time:parse-timestring "2016-01-09T23:00:00.000000-0100")
|
5dc30073 |
(get-date "Fri, 09 Jan 2016 22:00:00 -0200"))
|
93a579ec |
(should be local-time:timestamp=
(local-time:parse-timestring "2016-01-09T23:00:00.000000-0100")
|
5dc30073 |
(get-date "Fri, 09 Jan 2016 21:30:00 -0230")))
|
8a464908 |
|
a4392760 |
(defmethod alimenta::-to-feed (xml-dom (type (eql :rss)) &key feed-link)
|
c5ce4bfd |
;; TODO: store feed-link
|
8a464908 |
(flet ((get-channel-element (el)
($ (inline xml-dom) el (text) (node))))
|
93a579ec |
(let* ((*lquery-master-document* xml-dom)
(doc-title (get-channel-element "channel > title"))
(doc-link (get-channel-element "channel > link"))
(doc-description (get-channel-element "channel > description"))
(doc-feed-link (or feed-link
|
a16882ba |
($ "channel > atom::link[rel=self]" (attr "href") (node)))))
|
8a464908 |
(make-instance 'rss-feed
|
c5ce4bfd |
:title doc-title
:link doc-link
:description doc-description
:feed-link doc-feed-link))))
|