git.fiddlerwoaroof.com
Raw Blame History
(defpackage :alimenta.feed-archive
  (:use :cl :alexandria :serapeum :fw.lu :alimenta.feed-archive.tools)
  (:shadowing-import-from :alimenta.feed-archive.tools :->)
  (:export #:init-feeds #:archive-feeds #:command-line-main))

(in-package :alimenta.feed-archive)


(defvar *feeds*)
(defvar *feed-base*)

(defparameter +dirname-format+
  '((:year 4) #\- (:month 2) #\- (:day 2) #\/ (:hour 2) #\- (:min 2) #\/))

(defclass feed-index ()
  ((%pull-time :initarg :pull-time :reader pull-time)
   ;; Why this slot? Won't the references duplicate this?
   (%feed-urls :initarg :feed-urls :reader feed-urls)
   (%feed-references :initarg :references :reader references)))

(defclass feed-reference ()
  ((%url :initarg :url :reader url)
   (%title :initarg :title :reader title :initform nil)
   (%path :initarg :path :reader path :initform nil)))

(defun make-feed-index (pull-time feeds paths)
  (make-instance 'feed-index
		 :pull-time pull-time
		 :feed-urls feeds
		 :references (mapcar (destructuring-lambda (url (title path))
				       (make-feed-reference url :title title :path path))
				     feeds
				     paths)))

(defun make-feed-reference (url &rest feed-data)
  (apply #'make-instance 'feed-reference
	 :url url
	 feed-data))

(defmethod yason:encode-slots progn ((object feed-reference))
  (let ((title (title object))
	(path (path object)))
    (yason:encode-object-element "url" (url object))
    (when title
      (yason:encode-object-element "title" title))
    (when path
      (yason:encode-object-element "path" path))))

(defmethod yason:encode-slots progn ((object feed-index))
  (with-accessors ((pull-time pull-time) (feeds feed-urls) (references references)) object
    (yason:encode-object-elements "pull-time" (local-time:format-timestring nil pull-time)
				  "feed-urls" feeds)
    (yason:with-object-element ("feeds")
      (yason:with-array ()
	(mapcar 'yason:encode-object references)))))

(defun get-store-directory-name (timestamp)
  (flet ((make-dirname (timestamp)
	   (-> (local-time:format-timestring nil (local-time:timestamp-minimize-part timestamp :sec)
					     :format +dirname-format+)
	       (merge-pathnames *feed-base*))))
    (-> (prog1-let ((result (make-dirname timestamp)))
	  (ensure-directories-exist result))
	(car))))

(defun test-feed-list ()
  (values '("http://feeds.feedburner.com/GamasutraFeatureArticles/"
	    "https://www.codinghorror.com/blog/index.xml"
	    "https://sancrucensis.wordpress.com/feed/")
	  #p"/tmp/feed-archive/"))

(defun init-feeds (&key feed-list archive-root)
  (ubiquitous:restore 'alimenta.feed-archiver)
  (let ((default-root (or archive-root
			  (merge-pathnames ".feed-archive/"
					   (truename "~/")))))
    (values (ubiquitous:defaulted-value feed-list :feeds)
	    (ubiquitous:defaulted-value default-root :archive :root))))

(defun add-feed (feed)
  (init-feeds)
  (pushnew feed
	   (ubiquitous:value :feeds)
	   :test #'equalp))

(defun safe-pull-feed (feed-url &aux (pop-times 0))
  "Handles date parsing errors in the feed: chronicity won't parse
   certain date formats, this catches the error and modifies the
   format to something chronicity can handle."
  (flet ((pop-50-tokens (c)
	   (declare (ignore c))
	   (when (find-restart 'alimenta:pop-token) 
	     (if (< pop-times 50)
		 (progn (incf pop-times)
			(format t "~&Processing error, trying to pop a token (popped ~d times)~%"
				pop-times)
			(alimenta:pop-token))
		 (continue)))))
    (handler-bind ((warning #'muffle-warning)
		   (error #'pop-50-tokens))
      (prog1 (alimenta.pull-feed:pull-feed feed-url)
	;; Why am I decf-ing here?
	(decf pop-times)))))

(defun log-pull (stream feed-url)
  (format stream "~&Trying to pull: ~a... " feed-url)
  (handler-bind ((error (lambda (c) (format stream "... Error ~a~%" c))))
      (prog1 (safe-pull-feed feed-url)
		  (format stream "... Success~%"))))

(defun skip-feed ()
  (when-let ((restart (find-restart 'skip-feed)))
    (invoke-restart restart)))


(defun save-feed (feed output-file &key (if-exists :supersede))
  (with-output-to-file (s output-file :if-exists if-exists)
    (plump:serialize (alimenta:doc feed) s)))

(defun pull-and-store-feeds (feeds pull-directory)
  (mapcar (lambda (feed-url)
	    (with-simple-restart (skip-feed "Skip ~a" feed-url)
	      (let* ((feed (with-retry ("Pull feed again.")
			     (log-pull t feed-url)))
		     (result (store (coerce-feed-link feed-url feed)
				    pull-directory)))
		(prog1 result
		  (format t "Serializing XML...")
		  (save-feed feed
			     (merge-pathnames "feed.xml"
					      (cadr result)))))))
	  feeds))

(defun feed-index (index-stream pull-time paths)
  (yason:with-output (index-stream :indent t)
    (yason:encode-object
     (make-feed-index pull-time *feeds*
		      (mapcar (destructuring-lambda ((title path))
				(list title (uiop:enough-pathname path *feed-base*)))
			      paths)))))

(defun archive-feeds ()
  (let* ((pull-time (local-time:now))
	 (pull-directory (get-store-directory-name pull-time)) 
	 (paths (pull-and-store-feeds *feeds* pull-directory))
	 (index-path (merge-pathnames "index.json" pull-directory)))
    (with-open-file (index index-path :direction :output)
      (feed-index index pull-time paths))))

;; This is an ungodly mess, we need to avoid funneling everything through fix-pathname-or-skip
(defun command-line-main (&optional (feed-list-initializer #'init-feeds))
  (labels ((feed-type-unsupported (c &key (restart 'skip-feed))
	     (format t "~&Feed type unsupported: ~a for feed ~a~%"
		     (alimenta:feed-type c)
		     (alimenta:feed-link c))
	     (funcall restart))
	   (fix-pathname-or-skip (c &key (restart 'skip-feed) (wrapped-condition nil wc-p))
	     (typecase (or wrapped-condition c)
	       (alimenta:feed-type-unsupported (feed-type-unsupported c))
	       (otherwise
		(if (find-restart 'fix-pathname)
		    (fix-pathname)
		    (progn (unless (eq restart 'continue)
			     (format t "~&Skipping a feed... ~s~%"
				     (if wc-p
					 (alimenta.feed-archive.encoders:the-feed c)
					 "Unknown")))
			   (funcall restart)))))))

    (handler-bind ((alimenta.feed-archive.encoders:feed-error
		    (lambda (c)
		      (fix-pathname-or-skip c :wrapped-condition (alimenta.feed-archive.encoders:the-condition c))))
		   (alimenta:feed-type-unsupported #'feed-type-unsupported)
		   (error (lambda (c)
			    (fix-pathname-or-skip c :restart 'continue))))
      (multiple-value-bind (*feeds* *feed-base*) (funcall feed-list-initializer)
	(alimenta.pull-feed:with-user-agent ("Feed Archiver v0.1b")
	  (archive-feeds))))))