(in-package :alimenta.feed-archive) (defvar *feeds*) (defvar *feed-base*) (defparameter +dirname-format+ '((:year 4) #\- (:month 2) #\- (:day 2) #\/ (:hour 2) #\- (:min 2) #\/)) (defmacro lambda* ((&rest args) &body body) (let ((rest-arg (gensym "REST"))) `(lambda (,@args &rest ,rest-arg) (declare (ignore ,rest-arg)) ,@body))) (defun safe-pull-feed (feed-url &aux (pop-times 0)) "Handles date parsing errors in the feed: chronicity won't parse certain date formats, this catches the error and modifies the format to something chronicity can handle." (handler-bind ((warning #'muffle-warning) (error (lambda* (c) (when (find-restart 'alimenta:pop-token c) (cond ((< pop-times 50) (incf pop-times) (format t "~&Processing error, trying to pop a token (popped ~d times)~%" pop-times) (alimenta:pop-token)) (t (continue))))))) (prog1-bind (feed (alimenta.pull-feed:pull-feed feed-url)) ;; Why am I decf-ing here? (alimenta:transform feed (lambda (entity) (typecase entity (alimenta:item (let ((v (alimenta:content entity))) (when v (setf (alimenta:content entity) (html-sanitizer:sanitize v)))))))) (decf pop-times)))) (defmacro with-progress-message ((stream before after &optional (error-msg " ERROR~%~4t~a~%")) &body body) (once-only (before after stream) `(handler-bind ((error (op (format ,stream ,error-msg _)))) (format ,stream "~&~a . . ." ,before) (multiple-value-prog1 (progn ,@body) (format ,stream " ~a~%" ,after))))) (defun skip-feed (c) (when-let ((restart (find-restart 'skip-feed c))) (invoke-restart restart))) (defun save-feed (feed output-file &key (if-exists :supersede)) (with-output-to-file (s output-file :if-exists if-exists) (plump:serialize (alimenta:doc feed) s))) (defun log-pull (feed-puller feed-url stream) (let ((before-message (concatenate 'string "Trying to pull: " feed-url))) (with-progress-message (stream before-message "Success") (funcall feed-puller feed-url)))) (defun normalize-feed (feed-url feed) (alimenta:filter-feed (coerce-feed-link feed-url feed) (complement #'older-than-a-month) :key 'alimenta:date)) (defun log-serialization (feed-url stream feed path) (declare (ignorable feed-url stream feed path)) (with-progress-message (stream "Serializing XML" (format nil "done with ~a at ~a" feed-url path)) (save-feed feed (merge-pathnames "feed.xml" path)))) (defun feed-relative-pathname (path &optional (feed-base *feed-base*)) (uiop:enough-pathname path feed-base)) (defun feed-index (index-stream pull-time references) (yason:with-output (index-stream :indent t) (yason:encode-object (make-instance 'feed-index :pull-time pull-time :references (remove-if 'null references))))) (defvar *error-client* nil) (defgeneric record-error (client url) (:method ((client null) feed-url))) (defun pull-and-store-feed (feed-url stream-provider &optional (feed-puller #'safe-pull-feed)) (declare (optimize (debug 3))) (flet ((log-pull (stream) (declare (inline) (dynamic-extent stream)) (log-pull feed-puller feed-url stream)) (log-serialization (stream feed path) (declare (inline) (dynamic-extent stream)) (log-serialization feed-url stream feed (stream-provider:absolute-path stream-provider path)))) (handler-bind ((cl+ssl:ssl-error-verify (lambda (c) (declare (ignore c)) (format *error-output* "~&SSL Error while pulling ~a~%" feed-url)))) (restart-case (let* ((feed (with-retry ("Pull feed again.") (normalize-feed feed-url (log-pull t))))) (trivia:match (store feed stream-provider) ((list title path) (log-serialization t feed path) (make-feed-reference (alimenta:feed-link feed) :title title :path (feed-relative-pathname (stream-provider:absolute-path stream-provider path)))))) (skip-feed () :report (lambda (s) (format s "Stop processing for ~a" feed-url)) (record-error *error-client* feed-url)))))) (defun archive-feeds (pull-time pull-directory index-stream) (prog1-bind (references (mapcar (op (pull-and-store-feed _ pull-directory)) *feeds*)) (feed-index index-stream pull-time references))) (defpackage :alimenta.feed-archive/tests (:use :cl :should-test) (:export )) (in-package :alimenta.feed-archive/tests) (eval-when (:compile-toplevel :load-toplevel :execute) (import 'alimenta.feed-archive::feed-index)) (defun hash-table= (ht1 ht2 &key (key-test 'equal) (value-test 'equal)) (let ((ht1-keys (alexandria:hash-table-keys ht1)) (ht2-keys (alexandria:hash-table-keys ht2))) (and (= (length ht1-keys) (length ht2-keys)) (every key-test ht1-keys ht2-keys) (every value-test (alexandria:hash-table-values ht1) (alexandria:hash-table-values ht2))))) (deftest feed-index () (should be hash-table= (yason:parse (with-output-to-string (s) (feed-index s (local-time:encode-timestamp 0 0 0 0 1 1 1) '())) :object-as :hash-table :json-arrays-as-vectors nil) (alexandria:alist-hash-table '(("pull-time" . "0001-01-01T00:00:00.000000-08:00") ("feeds" . ())))))