git.fiddlerwoaroof.com
feed-archive.lisp
25a661c3
 (in-package :alimenta.feed-archive)
 
 (defvar *feeds*)
 (defvar *feed-base*)
 
 (defparameter +dirname-format+
   '((:year 4) #\- (:month 2) #\- (:day 2) #\/ (:hour 2) #\- (:min 2) #\/))
 
e3a39cd6
 (defmacro lambda* ((&rest args) &body body)
   (let ((rest-arg (gensym "REST")))
     `(lambda (,@args &rest ,rest-arg)
        (declare (ignore ,rest-arg))
        ,@body)))
 
c1d0bd12
 (defun safe-pull-feed (feed-url &aux (pop-times 0))
f92f94ff
   "Handles date parsing errors in the feed: chronicity won't parse
    certain date formats, this catches the error and modifies the
    format to something chronicity can handle."
e3a39cd6
   (handler-bind ((warning #'muffle-warning)
                  (error (lambda* (c)
                           (when (find-restart 'alimenta:pop-token c)
                             (cond
                               ((< pop-times 50)
                                (incf pop-times)
                                (format t
                                        "~&Processing error, trying to pop a token (popped ~d times)~%"
                                        pop-times)
                                (alimenta:pop-token))
                               (t
                                (continue)))))))
     (prog1-bind (feed (alimenta.pull-feed:pull-feed feed-url))
       ;; Why am I decf-ing here?
       (alimenta:transform feed
c9158189
                           (lambda (entity)
                             (typecase entity
                               (alimenta:item
b4966c2f
                                (let ((v (alimenta:content entity)))
                                  (when v
                                    (setf (alimenta:content entity)
c9158189
                                          (html-sanitizer:sanitize v))))))))
e3a39cd6
       (decf pop-times))))
f7e44666
 
36fc9de2
 (defmacro with-progress-message ((stream before after &optional (error-msg " ERROR~%~4t~a~%")) &body body)
   (once-only (before after stream)
     `(handler-bind ((error (op (format ,stream ,error-msg _))))
        (format ,stream "~&~a . . ." ,before)
        (multiple-value-prog1 (progn
114a2b3d
                                ,@body)
          (format ,stream " ~a~%" ,after)))))
0010b953
 
6a6d9288
 (defun skip-feed (c)
   (when-let ((restart (find-restart 'skip-feed c)))
f7e44666
     (invoke-restart restart)))
25a661c3
 
2f3de0b8
 (defun save-feed (feed output-file &key (if-exists :supersede))
   (with-output-to-file (s output-file :if-exists if-exists)
     (plump:serialize (alimenta:doc feed) s)))
 
114a2b3d
 (defun log-pull (feed-puller feed-url stream)
   (let ((before-message (concatenate 'string "Trying to pull: " feed-url)))
     (with-progress-message (stream before-message "Success")
       (funcall feed-puller feed-url))))
 
 (defun normalize-feed (feed-url feed)
   (alimenta:filter-feed (coerce-feed-link feed-url feed)
                         (complement #'older-than-a-month)
                         :key 'alimenta:date))
 
 (defun log-serialization (feed-url stream feed path)
d28d3987
   (declare (ignorable feed-url stream feed path))
6a6d9288
   (with-progress-message (stream "Serializing XML" (format nil "done with ~a at ~a" feed-url path))
114a2b3d
     (save-feed feed (merge-pathnames "feed.xml" path))))
 
 (defun feed-relative-pathname (path &optional (feed-base *feed-base*))
   (uiop:enough-pathname path feed-base))
 
d28d3987
 (defun feed-index (index-stream pull-time references)
   (yason:with-output (index-stream :indent t)
     (yason:encode-object
9ef705d4
      (make-instance 'feed-index
                     :pull-time pull-time
                     :references (remove-if 'null references)))))
 
d28d3987
 
8eac37f7
 (defvar *error-client* nil)
 (defgeneric record-error (client url)
   (:method ((client null) feed-url)))
 
d28d3987
 (defun pull-and-store-feed (feed-url stream-provider &optional (feed-puller #'safe-pull-feed))
9ef705d4
   (declare (optimize (debug 3)))
36fc9de2
   (flet ((log-pull (stream)
114a2b3d
            (declare (inline) (dynamic-extent stream))
            (log-pull feed-puller feed-url stream))
          (log-serialization (stream feed path)
            (declare (inline) (dynamic-extent stream))
d28d3987
            (log-serialization feed-url stream feed
bf75e5ab
                               (stream-provider:absolute-path stream-provider path))))
9ef705d4
     (handler-bind ((cl+ssl:ssl-error-verify
e3a39cd6
                      (lambda (c)
                        (declare (ignore c))
                        (format *error-output* "~&SSL Error while pulling ~a~%"
                                feed-url))))
8eac37f7
       (restart-case
b4966c2f
           (let* ((feed (with-retry ("Pull feed again.")
                          (normalize-feed feed-url (log-pull t)))))
             (trivia:match (store feed stream-provider)
               ((list title path)
                (log-serialization t feed path)
                (make-feed-reference (alimenta:feed-link feed)
                                     :title title
                                     :path (feed-relative-pathname
bf75e5ab
                                            (stream-provider:absolute-path stream-provider path))))))
8eac37f7
         (skip-feed ()
           :report (lambda (s)
                     (format s "Stop processing for ~a" feed-url))
           (record-error *error-client* feed-url))))))
79f9884d
 
114a2b3d
 (defun archive-feeds (pull-time pull-directory index-stream)
d28d3987
   (prog1-bind (references (mapcar (op (pull-and-store-feed _ pull-directory))
                                   *feeds*))
     (feed-index index-stream pull-time references)))
114a2b3d
 
9ef705d4
 (defpackage :alimenta.feed-archive/tests
   (:use :cl :should-test)
   (:export ))
 (in-package :alimenta.feed-archive/tests)
 
 (eval-when (:compile-toplevel :load-toplevel :execute)
   (import 'alimenta.feed-archive::feed-index))
 
 (defun hash-table= (ht1 ht2 &key (key-test 'equal) (value-test 'equal))
   (let ((ht1-keys (alexandria:hash-table-keys ht1))
         (ht2-keys (alexandria:hash-table-keys ht2)))
     (and (= (length ht1-keys)
             (length ht2-keys))
          (every key-test ht1-keys ht2-keys)
          (every value-test
                 (alexandria:hash-table-values ht1)
                 (alexandria:hash-table-values ht2)))))
 
 (deftest feed-index ()
e3a39cd6
   (should be hash-table=
           (yason:parse
            (with-output-to-string (s)
              (feed-index s (local-time:encode-timestamp 0 0 0 0 1 1 1) '()))
            :object-as :hash-table :json-arrays-as-vectors nil)
           (alexandria:alist-hash-table
            '(("pull-time" . "0001-01-01T00:00:00.000000-08:00")
              ("feeds" . ())))))