git.fiddlerwoaroof.com
rss.lisp
67a3d329
 (in-package :cl-user)
17e50f7b
 (declaim (optimize (safety 3) (speed 0) (debug 3)))
67a3d329
 
 (load "tables.lisp")
 
 (defpackage :whitespace.rss
   (:use #:cl #:alexandria #:postmodern #:lquery #:cl-syntax #:cl-annot.syntax #:cl-annot.class
17e50f7b
         #:whitespace.tables #:iterate #:whitespace.utils)
67a3d329
   (:import-from anaphora it))
 
bf62f0f4
 
89bed873
 (in-package :whitespace.rss)
 (cl-annot.syntax:enable-annot-syntax)
 
17e50f7b
 (lquery:define-lquery-list-function tag-name (nodes &rest tags)
   "Manipulate elements on the basis of their tag-name.
    With no arguments, return their names else return
    the corresponding tags."
   (if (null tags)
     (map 'vector #'plump:tag-name nodes)
     (apply #'vector
            (loop for node across nodes
                  if (find (plump:tag-name node) tags :test #'string=)
                  collect node))))
89bed873
 
67a3d329
 @export
 (defmacro get-elements (feed &optional (filter nil))
   (let ((feed-sym (gensym))
         (filter-lis `(lambda (x) (and (plump-dom:element-p x) ,@(loop for x in filter
                                                                       collect `(funcall ,x x))))))
     `(let ((,feed-sym ,feed))
        (remove-if-not ,filter-lis (plump:children ,feed-sym)))))
 
 @export
 (defmacro get-elements-by-tagname (feed tagname)
   `(get-elements ,feed ((lambda (x) (string= ,tagname (plump:tag-name x))))))
 
 @export
 (defmacro extract-text (selector &optional (default ""))
   `(or (lquery:$ ,selector (text) (node)) ,default))
 
 (defmacro defserializer ((specializes) &body slots)
   (with-gensyms (obj o-t p-t)
     `(defmethod serialize ((,obj ,specializes) &optional (,o-t #'identity) (,p-t #'%default-pair-transform))
       (transform-result (,o-t ,p-t)
         (slots-to-pairs ,obj ,slots)))))
 
 @export
 (defmacro xml-text-bind (syms &body body)
   "Bind the symbols passed in the second arg to the text of the matching
    elements in the document lquery has been initialized with and then run the
    body in the resulting lexical scope.  This assumes that lquery:initialize
    has already been passed the proper xml document"
   `(let* ,(loop for sym in syms
            collect `(,sym (or (lquery:$ ,(symbol-name sym) (text) (node)) "")))
      ,@body))
 
 (defmacro make-instance-from-symbols (class &rest initargs)
   `(make-instance ,class ,@(iterate (for (to from) in (ensure-mapping initargs))
                                     (appending (list (make-keyword (symbol-name to)) from)))))
 
89bed873
 @export-class
 (defclass rss-feed ()
   ((feed :accessor rss-feed-feed :initarg :feed)
    (channel :accessor rss-feed-channel :initarg :channel)
    (title :accessor rss-feed-title :initarg :title)
    (link :accessor rss-feed-link :initarg :link)
    (description :accessor rss-feed-description :initarg :description)
67a3d329
    (items :accessor rss-feed-items :initarg :items)
    (fetch-url :accessor fetch-url :initarg :fetch-url)))
89bed873
 
 @export-class
 (defclass rss-item ()
   ((item :accessor rss-item-item  :initarg :item)
    (title :accessor rss-item-title :initarg :title)
    (link :accessor rss-item-link :initarg :link)
    (description-raw :accessor rss-item-description-raw :initarg :description-raw)
    (description :accessor rss-item-description :initarg :description)
    (category :accessor rss-item-category :initarg :category)
    (comments :accessor rss-item-comments :initarg :comments)
    (enclosure :accessor rss-item-enclosure :initarg :enclosure)
    (guid :accessor rss-item-guid :initarg :guid)
    (pub-date :accessor rss-item-pub-date :initarg :pub-date)
    (source :accessor rss-item-source  :initarg :source)))
 
67a3d329
 @export
 (defun make-rss-feed (feed)
   (lquery:initialize feed)
   (let* ((channel (lquery:$ "channel" (node)))
          (fetch-url (lquery:$ "channel" (children) (tag-name "atom:link") (filter "[rel=self]") (attr :href) (node)))
          (link (lquery:$ "channel > link" (text) (node)))
          (link (if (string= link "") (lquery:$ "channel" (children) (tag-name "atom:link") (attr :href) (node)) link))
          (items (lquery:$ "item")))
     (xml-text-bind (title description)
       (make-instance-from-symbols 'rss-feed
                                   feed title link description channel fetch-url
                                   (items (iterate (for it in-sequence items)
                                                   (collecting (make-rss-item it))))))))
 
 @export
 (defgeneric serialize (cls &optional output-transform pair-transform))
89bed873
 
3ef36f73
 
67a3d329
 (defmethod serialize ((obj sequence) &optional (o-t #'identity) (p-t #'%default-pair-transform))
   (iterate (for item in-sequence obj)
            (collect (serialize item o-t p-t))))
3ef36f73
 
67a3d329
 ; These are the interface I'm planning to remove as duplicate
 (defserializer (rss-feed)
   title link description fetch-url
   (items (iterate (for item in items)
                   (collect item))))
3ef36f73
 
67a3d329
 (defserializer (rss-item)
17e50f7b
   title link (description description-raw :bind-from description-raw) guid pub-date source)
3ef36f73
 
67a3d329
 ; this is the interface to be used
 (defserializer (rss_feed_store)
   title link description fetch-url)
 
 (defserializer (rss_item_store)
   title link description fetch-url)
 
 (defmethod jonathan:%to-json ((obj rss-feed))
   (jonathan:%to-json (serialize obj #'alexandria:alist-hash-table #'%json-pair-transform)))
 
 (defmethod jonathan:%to-json ((obj rss-item))
   (jonathan:%to-json (serialize obj #'alexandria:alist-hash-table #'%json-pair-transform)))
 
 (defmacro get-id-for-object ((table key-column &optional (id-column :id)) key &body body)
   "Anaphoric macro: binds id to the id it retrieves!"
   (once-only (id-column key)
     `(let ((id (anaphora:awhen (postmodern:query (:select ,id-column :from ',table :where (:= ',key-column ,key)))
                  (caar it))))
        ,@body)))
 
 (defgeneric get-dao-for (obj &optional link)
   ; NOTE: this won't make dao objects for the _items_ when called on the feed!
   ; also NOTE: this _prefers_ the passed object
   (:method ((obj rss-feed) &optional linked-objects)
    (declare (ignore linked-objects))
    (with-slots (title link description fetch-url) obj
      (get-id-for-object (rss_feed_store link) link
        (make-instance-from-symbols 'rss_feed_store id title link description fetch-url (fetch-defaults t))) ))
 
   (:method ((obj rss-item) &optional feed)
    (with-slots (title link description-raw guid pub-date source) obj
      (get-id-for-object (rss_item_store guid) guid
        (make-instance-from-symbols 'rss_item_store id title link (description description-raw)
                                    guid pub-date source feed (fetch-defaults t))))))
3ef36f73
 
17e50f7b
 (define-condition blarg () ((text :initarg text)))
3ef36f73
 @export
67a3d329
 (defun get-feed-from-dao (rss-feed)
   (let ((feed-dao (get-dao-for rss-feed)))
     (list feed-dao
17e50f7b
           (error 'blarg :text (format t "~a~%" rss-feed))
67a3d329
           (with-slots (items) rss-feed
             (iterate (for item in items)
                      (collect (get-dao-for item (slot-value feed-dao 'id))))))))
3ef36f73
 
 
67a3d329
 @export
 (defun upsert-feed (rss-feed)
   (postmodern:ensure-transaction
     (destructuring-bind (feed items) (get-feed-from-dao rss-feed)
       (postmodern:upsert-dao feed)
       (mapcar #'postmodern:upsert-dao items))))
3ef36f73
 
67a3d329
 ; TODO: get rid of eval
3ef36f73
 @export
 (defun store-feed-dao (serialized-rss-feed &optional link)
   (declare (ignore link))
   (let* ((items nil)
67a3d329
          (rss_feed (apply #'postmodern:make-dao
                           (cons 'rss_feed_store
                                 (iterate (for (k . v) in-sequence serialized-rss-feed)
                                          (if (eql k :items)
                                            (setf items v)
                                            (appending (list k v))))))))
3ef36f73
     (iterate (for item in items)
67a3d329
              (store-item-dao (serialize item)
                              (slot-value rss_feed 'id)))
3ef36f73
     rss_feed))
 
 @export
67a3d329
 (defun store-item-dao (serialized-rss-item link)
3ef36f73
  (eval `(postmodern:make-dao
           'rss_item_store
           :feed ,link
67a3d329
           ,@(iterate (for (k . v) in-sequence serialized-rss-item)
3ef36f73
                      (appending (list k v))))))
 
67a3d329
 (defun get-and-possibly-store-feed (rss-feed)
   "Given an rss-feed, return the db's feed-id, persisting it if it doesn't already exist."
   (postmodern:ensure-transaction
     (anaphora:aif (postmodern:select-dao 'rss_feed_store (:= 'link (rss-feed-link rss-feed)))
       (car anaphora:it) ;; The postmodern query returns a nested list
       (store-feed-dao (serialize rss-feed)))))
 
 @export
 (defun store-feed (doc)
   (postmodern:with-transaction ()
     (let ((rss-feed (make-rss-feed doc)))
       (values rss-feed
               (get-and-possibly-store-feed rss-feed)))))
 
 @export ; TODO: this should eventually take a username/userobject rather than ids . . .
 (defun subscribe-to-feed (uid feedid)
   (postmodern:query
     (:insert-into 'subscriptions :set 'uid uid 'feedid feedid)))
 
3ef36f73
 #|
 (:documentation
   "Store a serialized rss object into rhe database: the basic idea here is
    that the quasi-quoted expression generates a form that would insert the
    item and then we eval it.")
 |#
89bed873
 
 @export
 (defun deserialize-item (item)
   (let ((result (make-instance 'rss-item)))
     (copy-slots (title link (description description-raw) comments enclosure guid pub-date source)
                 item
                 result)
     result))
 
 @export
 (defun deserialize-items (feed-id)
   (let ((items (postmodern:query-dao 'rss_item_store
                                      (:select :* :from 'rss_item_store :where (:= :feed feed-id)))))
     (loop for item in items collect (deserialize-item item))))
 
 @export
 (defun deserialize-feed (feed)
   (let ((result (make-instance 'rss-feed)))
67a3d329
     (copy-slots (title link description fetch-url) feed result)
89bed873
     (setf (rss-feed-items result) (deserialize-items (rfs-id feed)))
     result))
 
 @export
 (defun deserialize (&optional user-info)
67a3d329
   (default-when #() (not (null user-info))
89bed873
     (let ((feeds
             (postmodern:query-dao 'rss_feed_store
67a3d329
                                   (:select 'rss_feed_store.*
                                    :from 'rss_feed_store
                                    :inner-join  'subscriptions :on (:= 'rss_feed_store.id  'subscriptions.feedid)
89bed873
                                    :inner-join  'reader_user :on (:= 'reader_user.id  'subscriptions.uid)
67a3d329
                                    :where (:= 'reader_user.foreign_id (user-foreign-id user-info))))))
89bed873
       (apply #'vector (loop for feed in feeds collect (deserialize-feed feed))))))
 
 
 @export
 (defun normalize-html (html)
   (let ((plump-parser:*tag-dispatchers* plump:*html-tags*))
     (with-output-to-string (ss)
       (map 'vector
            (lambda (x) (plump:serialize (plump:parse (plump:text x)) ss))
            html)
       ss)))
 
 @export
 (defun make-rss-item (item)
   (lquery:initialize item)
   (flet ((dehtml (h) (plump:text (plump:parse h)))
          (get-category-names (it) ;;; TODO: simplify this---Ask Shinmera on IRC
            (if (not (equalp #() it))
              (map 'vector
                   (lambda (x) (plump:text (elt (plump:children x) 0)))
                   it)
              #())))
     (let* ((content-encoded (lquery:$ (children) (tag-name "content:encoded")))
 
            (description-element (default-when content-encoded (emptyp content-encoded)
                                   (lquery:$ (children "description"))))
 
            (description-raw (normalize-html
                               (default-when description-element (emptyp description-element)
                                 (extract-text "description"))))
 
            (description-munged (dehtml description-raw))
            (category (get-category-names (lquery:$ "category"))))
            ;(enclosure) --- TODO: implement comment / enclosure handling
3ef36f73
 
89bed873
       (xml-text-bind (title link guid pub-date source comments)
67a3d329
         (make-instance-from-symbols 'rss-item
                                     item title link description-raw (description description-munged)
                                     category guid pub-date source comments)))))
89bed873
       ;(setf (rss-item-enclosure result) enclosure)      -- TODO: comment/enclosure . . .
 
67a3d329
 
 ; \o/
 ;  | Arrr
 ; / \