25a661c3 |
(in-package :alimenta.feed-archive)
(defvar *feeds*)
(defvar *feed-base*)
(defparameter +dirname-format+
'((:year 4) #\- (:month 2) #\- (:day 2) #\/ (:hour 2) #\- (:min 2) #\/))
|
e3a39cd6 |
(defmacro lambda* ((&rest args) &body body)
(let ((rest-arg (gensym "REST")))
`(lambda (,@args &rest ,rest-arg)
(declare (ignore ,rest-arg))
,@body)))
|
c1d0bd12 |
(defun safe-pull-feed (feed-url &aux (pop-times 0))
|
f92f94ff |
"Handles date parsing errors in the feed: chronicity won't parse
certain date formats, this catches the error and modifies the
format to something chronicity can handle."
|
e3a39cd6 |
(handler-bind ((warning #'muffle-warning)
(error (lambda* (c)
(when (find-restart 'alimenta:pop-token c)
(cond
((< pop-times 50)
(incf pop-times)
(format t
"~&Processing error, trying to pop a token (popped ~d times)~%"
pop-times)
(alimenta:pop-token))
(t
(continue)))))))
(prog1-bind (feed (alimenta.pull-feed:pull-feed feed-url))
;; Why am I decf-ing here?
(alimenta:transform feed
|
c9158189 |
(lambda (entity)
(typecase entity
(alimenta:item
|
b4966c2f |
(let ((v (alimenta:content entity)))
(when v
(setf (alimenta:content entity)
|
c9158189 |
(html-sanitizer:sanitize v))))))))
|
e3a39cd6 |
(decf pop-times))))
|
f7e44666 |
|
36fc9de2 |
(defmacro with-progress-message ((stream before after &optional (error-msg " ERROR~%~4t~a~%")) &body body)
(once-only (before after stream)
`(handler-bind ((error (op (format ,stream ,error-msg _))))
(format ,stream "~&~a . . ." ,before)
(multiple-value-prog1 (progn
|
114a2b3d |
,@body)
(format ,stream " ~a~%" ,after)))))
|
0010b953 |
|
6a6d9288 |
(defun skip-feed (c)
(when-let ((restart (find-restart 'skip-feed c)))
|
f7e44666 |
(invoke-restart restart)))
|
25a661c3 |
|
2f3de0b8 |
(defun save-feed (feed output-file &key (if-exists :supersede))
(with-output-to-file (s output-file :if-exists if-exists)
(plump:serialize (alimenta:doc feed) s)))
|
114a2b3d |
(defun log-pull (feed-puller feed-url stream)
(let ((before-message (concatenate 'string "Trying to pull: " feed-url)))
(with-progress-message (stream before-message "Success")
(funcall feed-puller feed-url))))
(defun normalize-feed (feed-url feed)
(alimenta:filter-feed (coerce-feed-link feed-url feed)
(complement #'older-than-a-month)
:key 'alimenta:date))
(defun log-serialization (feed-url stream feed path)
|
d28d3987 |
(declare (ignorable feed-url stream feed path))
|
6a6d9288 |
(with-progress-message (stream "Serializing XML" (format nil "done with ~a at ~a" feed-url path))
|
114a2b3d |
(save-feed feed (merge-pathnames "feed.xml" path))))
(defun feed-relative-pathname (path &optional (feed-base *feed-base*))
(uiop:enough-pathname path feed-base))
|
d28d3987 |
(defun feed-index (index-stream pull-time references)
(yason:with-output (index-stream :indent t)
(yason:encode-object
|
9ef705d4 |
(make-instance 'feed-index
:pull-time pull-time
:references (remove-if 'null references)))))
|
d28d3987 |
|
8eac37f7 |
(defvar *error-client* nil)
(defgeneric record-error (client url)
(:method ((client null) feed-url)))
|
d28d3987 |
(defun pull-and-store-feed (feed-url stream-provider &optional (feed-puller #'safe-pull-feed))
|
9ef705d4 |
(declare (optimize (debug 3)))
|
36fc9de2 |
(flet ((log-pull (stream)
|
114a2b3d |
(declare (inline) (dynamic-extent stream))
(log-pull feed-puller feed-url stream))
(log-serialization (stream feed path)
(declare (inline) (dynamic-extent stream))
|
d28d3987 |
(log-serialization feed-url stream feed
|
bf75e5ab |
(stream-provider:absolute-path stream-provider path))))
|
9ef705d4 |
(handler-bind ((cl+ssl:ssl-error-verify
|
e3a39cd6 |
(lambda (c)
(declare (ignore c))
(format *error-output* "~&SSL Error while pulling ~a~%"
feed-url))))
|
8eac37f7 |
(restart-case
|
b4966c2f |
(let* ((feed (with-retry ("Pull feed again.")
(normalize-feed feed-url (log-pull t)))))
(trivia:match (store feed stream-provider)
((list title path)
(log-serialization t feed path)
(make-feed-reference (alimenta:feed-link feed)
:title title
:path (feed-relative-pathname
|
bf75e5ab |
(stream-provider:absolute-path stream-provider path))))))
|
8eac37f7 |
(skip-feed ()
:report (lambda (s)
(format s "Stop processing for ~a" feed-url))
(record-error *error-client* feed-url))))))
|
79f9884d |
|
114a2b3d |
(defun archive-feeds (pull-time pull-directory index-stream)
|
d28d3987 |
(prog1-bind (references (mapcar (op (pull-and-store-feed _ pull-directory))
*feeds*))
(feed-index index-stream pull-time references)))
|
114a2b3d |
|
9ef705d4 |
(defpackage :alimenta.feed-archive/tests
(:use :cl :should-test)
(:export ))
(in-package :alimenta.feed-archive/tests)
(eval-when (:compile-toplevel :load-toplevel :execute)
(import 'alimenta.feed-archive::feed-index))
(defun hash-table= (ht1 ht2 &key (key-test 'equal) (value-test 'equal))
(let ((ht1-keys (alexandria:hash-table-keys ht1))
(ht2-keys (alexandria:hash-table-keys ht2)))
(and (= (length ht1-keys)
(length ht2-keys))
(every key-test ht1-keys ht2-keys)
(every value-test
(alexandria:hash-table-values ht1)
(alexandria:hash-table-values ht2)))))
(deftest feed-index ()
|
e3a39cd6 |
(should be hash-table=
(yason:parse
(with-output-to-string (s)
(feed-index s (local-time:encode-timestamp 0 0 0 0 1 1 1) '()))
:object-as :hash-table :json-arrays-as-vectors nil)
(alexandria:alist-hash-table
'(("pull-time" . "0001-01-01T00:00:00.000000-08:00")
("feeds" . ())))))
|