git.fiddlerwoaroof.com
rss.lisp
cd914c16
 (declaim (optimize (speed 0) (safety 3) (debug 3)))
 (in-package :alimenta.rss)
93a579ec
 
 (defclass rss-image ()
   ((url :initarg :url :initform nil)
    (title :initarg :title :initform nil)
    (link :initarg :link :initform nil)
    (width :initarg :width :initform nil)
    (height :initarg :height :initform nil)
    (description :initarg :description :initform nil)))
 
 (defclass rss-category ()
2815e4e1
   ((category :initarg :category :accessor category :initform nil)
    (domain :initarg :domain :accessor domain :initform nil)))
93a579ec
 
 (define-data-class rss-feed (doc "channel") (feed)
   language copyright webmaster
   generator docs cloud ttl rating
   (image "image"
          :value (apply #'make-image
                        (mapcar (lambda (x) (when (> (length x) 0)
                                              (plump:text (elt x 0))))
                                (let ((plump:*tag-dispatchers* plump:*xml-tags*))
                                  ($1 (inline doc) "channel > image"
c5ce4bfd
                                    (combine "url" "title" "link" "width" "height"
                                             "description"))))))
93a579ec
 
   (categories "category" :value (get-categories doc  "channel > category"))
   (text-input "textInput")
5dc30073
   (managing-editor "managingEditor")
93a579ec
   (skip-days "skipDays")
5dc30073
   (skip-hours "skipHours")
   (publication-date "publicationDate" :transform get-date)
93a579ec
   (last-build-date "lastBuildDate" :transform get-date))
 
 (define-data-class rss-item (doc "") (item)
   (categories "category" :value (get-categories doc "> category"))
2815e4e1
   source comments enclosure description)
cd914c16
 
379b0b37
 ;; TODO: finish the stuff necessary for rss->atom
 (defmethod id ((object rss-feed))
   (feed-link object))
 
58a15352
 (defmethod print-object ((self rss-image) stream)
   (print-unreadable-object (self stream :type t :identity t)
     (format stream "~a" (slot-value self 'url))))
 
 (defmethod print-object ((self rss-category) stream)
   (print-unreadable-object (self stream :type t :identity t)
     (format stream "~a~@[ ~a~]"
             (slot-value self 'category)
             (slot-value self 'domain))))
 
a16882ba
 (defmacro check ((test &body body))
   `(let ((val (progn ,@body)))
      (when (,test val)
        val)))
 
 (defun all-alpha (str)
   (check-type str string)
   (loop for char across str
         always (alpha-char-p char)))
 
 (defun extract-date-timezone (date-str)
   (declare (optimize (debug 3)))
   (let ((tz-inited nil))
     (flet ((init-tz ()
6e681005
              (handler-case (unless (or tz-inited
                                        (local-time:find-timezone-by-location-name
                                         "America/Los_Angeles"))
                              (local-time:reread-timezone-repository))
                (error ()
                  (local-time:reread-timezone-repository)
                  (setf tz-inited t)))))
a16882ba
 
       (macrolet ((ensure-tz-inited (&body body)
                    `(progn (init-tz)
                            ,@body)))
         (let* ((last-space (position #\space date-str :from-end t))
                (tz-name (check (all-alpha (subseq date-str (1+ last-space)))))
                (timestamp-raw (if tz-name
6e681005
                                   (subseq date-str 0 last-space)
                                   date-str)))
a16882ba
           (values (if tz-name
6e681005
                       (ensure-tz-inited
                        (local-time:find-timezone-by-location-name
a16882ba
                         (string-upcase tz-name)))
6e681005
                       local-time:+utc-zone+)
a16882ba
                   timestamp-raw))))))
 
58a15352
 (defun get-date (str)
a4392760
   (declare (optimize (debug 3)))
58a15352
   (handler-case
dcb293c2
       (local-time:parse-timestring str)
58a15352
     (local-time::invalid-timestring (c) (declare (ignore c))
a16882ba
       (multiple-value-bind (local-time:*default-timezone* timestamp-raw) (extract-date-timezone str)
         (multiple-value-bind (res groups) (cl-ppcre:scan-to-strings "(.*)\s*([+-][0-9]{2,4})\s?$" timestamp-raw)
           (let ((ts (if res (elt groups 0) timestamp-raw))
                 (tz-offset (if res (elt groups 1) "0000")))
             (let* ((timestamp (string-trim " " ts))
dcb293c2
                    ;; Handle numeric timzones like -0430 or +0320
a16882ba
                    (hour-offset (parse-integer tz-offset :end 3))
                    (minute-offset (if (> (length tz-offset) 3)
dcb293c2
                                       (* (signum hour-offset)
                                          (parse-integer tz-offset :start 3))
                                       0)))
a16882ba
 
               (loop
                 (restart-case (return
dcb293c2
                                 (fw.lu:let-each (:be *)
a16882ba
                                   (chronicity:parse timestamp)
                                   (local-time:timestamp- * minute-offset :minute)
5dc30073
                                   (local-time:timestamp- * hour-offset   :hour)))
a16882ba
                   (pop-token () (setf timestamp
                                       (subseq timestamp
                                               0
                                               (position #\space timestamp
                                                         :from-end t)))))))))))))
 
 (defun pop-token ()
   (when-let ((restart (find-restart 'pop-token)))
     (invoke-restart restart)))
58a15352
 
 (defmethod primary-value ((self rss-image))
   (slot-value self 'url))
 
 (defun make-image (url title &optional link width height description)
   (let ((link (or link url)))
     (make-instance 'rss-image
                    :url url
                    :title title
                    :link link
                    :width width
                    :height height
                    :description description)))
 
 (defmethod primary-value ((self rss-category))
   (slot-value self 'category))
 
 (defun make-category (category &optional domain)
  (make-instance 'rss-category :category category :domain domain))
 
 (defun get-categories (doc tag)
   ($ (inline doc) tag
      (combine (text) (attr "domain"))
      (map-apply #'make-category)))
 
a3645f2a
 (defmethod alimenta::get-items (xml-dom (feed-type (eql :rss)))
cd914c16
   ($ (inline xml-dom) "channel > item"))
 
a4392760
 (defmethod generate-xml ((item item) (feed-type (eql :rss)) &key partial)
cd914c16
   (prog1 partial
93a579ec
     (let ((item-root (make-element ($1 (inline partial) "channel") "item")))
       (flet ((make-element (tag) (make-element item-root tag)))
311bcb8e
 	(with-slots (title id date link content) item
 	  ($ (inline (make-element "title")) (text title)
 	     (inline (make-element "link")) (text link)
 	     (inline (make-element "pubDate")) (text date)
5dc30073
 	     (inline (make-element "description")) (text content))
311bcb8e
 	  (plump-dom:set-attribute
 	   ($ (inline (make-element "guid")) (text id) (node))
 	   "isPermaLink"
 	   "false"))))))
cd914c16
 
a4392760
 (defmethod generate-xml ((feed feed) (feed-type (eql :rss)) &rest r)
cd914c16
   (declare (ignore r))
   (let* ((xml-root (plump:make-root))
          (feed-root (plump:make-element xml-root "rss"))
          (channel (plump-dom:make-element feed-root "channel")))
     ($ (inline feed-root)
        (attr "version" "2.0")
        (attr "xmlns:content" "http://purl.org/rss/1.0/modules/content/")
        (attr "xmlns:atom" "http://www.w3.org/2005/Atom"))
     (with-slots (title link feed-link description) feed
       ($ (inline (make-element channel "title"))
          (text title))
       ($ (inline (make-element channel "link"))
          (text link))
       (awhen description
         ($ (inline (make-element channel "description"))
            (text it)))
       ($ (inline (make-element channel "atom:link"))
          (attr "rel" "self")
          (attr "type" "application/rss+xml")
          (attr "href" link)))
     xml-root))
 
2815e4e1
 (defmethod content-el ((entity rss-item))
   (fw.lu:let-each (:be *)
     (doc entity)
     ($1 (inline *)
 	(combine "> content::encoded"
 		 "> description"))
     (or (elt (car *) 0)
 	(elt (cadr *) 0))))
 
cd914c16
 (defmethod make-item (xml-dom (type (eql :rss)))
93a579ec
   (let* ((*lquery-master-document* xml-dom)
          (item-title ($1 "> title" (text)))
          (item-link ($1 "> link" (text)))
          (item-date (awhen ($1 "> pubDate" (text)) (get-date it)))
          (item-guid ($1 "> guid" (text)))
          (item-description ($1 "> description" (text)))
          (item-content-encoded ($1 "> content::encoded" (text)))
8a464908
          (content (aif (or item-content-encoded item-description)
                     (with-output-to-string (s)
                       (serialize (parse (or item-content-encoded item-description)) s))))
cd914c16
          (*tag-dispatchers* *html-tags*))
93a579ec
     (make-instance 'rss-item
5dc30073
                    :content content
cd914c16
                    :date item-date
                    :doc xml-dom
                    :id item-guid
                    :link item-link
                    :title item-title)))
 
93a579ec
 (deftest get-date ()
   (should be local-time:timestamp=
           (local-time:parse-timestring "2016-01-09T23:00:00.000000-0100")
           (get-date "Fri, 09 Jan 2016 23:00:00-0100"))
   (should be local-time:timestamp=
           (local-time:parse-timestring "2016-01-09T23:00:00.000000-0100")
           (get-date "Fri, 09 Jan 2016 23:00:00 -0100"))
   (should be local-time:timestamp=
           (local-time:parse-timestring "2016-01-09T23:00:00.000000-0100")
5dc30073
           (get-date "Fri, 09 Jan 2016 22:00:00 -0200"))
93a579ec
   (should be local-time:timestamp=
           (local-time:parse-timestring "2016-01-09T23:00:00.000000-0100")
5dc30073
           (get-date "Fri, 09 Jan 2016 21:30:00 -0230")))
8a464908
 
a4392760
 (defmethod alimenta::-to-feed (xml-dom (type (eql :rss)) &key feed-link)
c5ce4bfd
   ;; TODO: store feed-link
8a464908
   (flet ((get-channel-element (el)
            ($ (inline xml-dom) el (text) (node))))
93a579ec
     (let* ((*lquery-master-document* xml-dom)
            (doc-title (get-channel-element "channel > title"))
            (doc-link (get-channel-element "channel > link"))
            (doc-description (get-channel-element "channel > description"))
            (doc-feed-link (or feed-link
a16882ba
                               ($ "channel > atom::link[rel=self]" (attr "href") (node)))))
8a464908
       (make-instance 'rss-feed
c5ce4bfd
                      :title doc-title
                      :link doc-link
                      :description doc-description
                      :feed-link doc-feed-link))))