git.fiddlerwoaroof.com
Browse code

Redesign feed index generation.

I've switched from using a series of lists to represent the feed index
to a custom class with appropriately defined yason methods for JSON
serialization.

fiddlerwoaroof authored on 05/04/2017 00:46:43
Showing 3 changed files
... ...
@@ -1,7 +1,8 @@
1 1
 (defpackage :alimenta.feed-archive.encoders
2 2
   (:use :cl :alexandria :serapeum :fw.lu :alimenta.feed-archive.tools)
3 3
   (:shadowing-import-from :alimenta.feed-archive.tools :->)
4
-  (:export :skip-item :the-condition :the-feed :feed-error))
4
+  (:export :skip-item :the-condition :the-feed :feed-error
5
+	   :unwrap-feed-errors))
5 6
 
6 7
 (in-package :alimenta.feed-archive.encoders)
7 8
 
... ...
@@ -22,6 +23,10 @@
22 23
 	 :feed feed
23 24
 	 :condition condition))
24 25
 
26
+(defmacro unwrap-feed-errors (() &body body)
27
+  `(handler-bind ((feed-error (op (error (the-condition _)))))
28
+     ,@body))
29
+
25 30
 (defun %encode-feed-as-json (feed item-storage-info root-dir &optional stream)
26 31
   (with-accessors ((description alimenta:description)
27 32
 		   (feed-link alimenta:feed-link)
... ...
@@ -44,23 +49,11 @@
44 49
 	      )))))))
45 50
 
46 51
 (defmethod store ((items sequence) (directory pathname))
47
-  (map 'list (lambda (item) (store item directory))
48
-       (stable-sort
49
-	(sort (remove-if #'older-than-a-week items :key #'alimenta:date)
50
-	      #'string-lessp
51
-	      :key #'alimenta:title)
52
-	#'local-time:timestamp>
53
-	:key #'alimenta:date)))
54
-
55
-(defun sort-and-filter-items (feed)
56
-  (setf (alimenta:items feed)
57
-	(stable-sort
58
-	 (sort (remove-if #'older-than-a-week (alimenta:items feed)
59
-			  :key #'alimenta:date)
60
-	       #'string-lessp
61
-	       :key #'alimenta:title)
62
-	 #'local-time:timestamp>
63
-	 :key #'alimenta:date)))
52
+  (map 'list (op (store _ directory))
53
+       (stable-sort (sort items #'string-lessp
54
+			  :key #'alimenta:title)
55
+		    #'local-time:timestamp>
56
+		    :key #'alimenta:date)))
64 57
 
65 58
 (defmethod store ((feed alimenta:feed) (directory pathname))
66 59
   (flet ((get-feed-store-name (feed directory)
... ...
@@ -75,14 +68,15 @@
75 68
 		     (title alimenta:title)) feed
76 69
       ; We wrap all errors with our own condition
77 70
       (handler-bind ((error (lambda (c) (error 'feed-error :feed feed :condition c))))
78
-	(prog1-let ((feed-title title)
79
-		    (feed-store (get-feed-store-name feed directory)))
80
-	  (ensure-directories-exist feed-store)
81
-	  (with-open-file (index (merge-pathnames "index.json" feed-store) :direction :output)
82
-	    (%encode-feed-as-json feed
83
-				  (store items feed-store)
84
-				  feed-store
85
-				  index)))))))
71
+	(values (prog1-let ((feed-title title)
72
+			    (feed-store (get-feed-store-name feed directory)))
73
+		  (ensure-directories-exist feed-store)
74
+		  (with-open-file (index (merge-pathnames "index.json" feed-store) :direction :output)
75
+		    (%encode-feed-as-json feed
76
+					  (store items feed-store)
77
+					  feed-store
78
+					  index)))
79
+		feed-link)))))
86 80
 
87 81
 (defmethod store ((feed alimenta:feed) (stream stream))
88 82
   (handler-bind ((error (lambda (c)
... ...
@@ -15,7 +15,6 @@
15 15
 (defclass feed-index ()
16 16
   ((%pull-time :initarg :pull-time :reader pull-time)
17 17
    ;; Why this slot? Won't the references duplicate this?
18
-   (%feed-urls :initarg :feed-urls :reader feed-urls)
19 18
    (%feed-references :initarg :references :reader references)))
20 19
 
21 20
 (defclass feed-reference ()
... ...
@@ -23,19 +22,15 @@
23 22
    (%title :initarg :title :reader title :initform nil)
24 23
    (%path :initarg :path :reader path :initform nil)))
25 24
 
26
-(defun make-feed-index (pull-time feeds paths)
25
+(defun make-feed-index (pull-time references)
27 26
   (make-instance 'feed-index
28 27
 		 :pull-time pull-time
29
-		 :feed-urls feeds
30
-		 :references (mapcar (destructuring-lambda (url (title path))
31
-				       (make-feed-reference url :title title :path path))
32
-				     feeds
33
-				     paths)))
28
+		 :references (copy-seq references)))
34 29
 
35
-(defun make-feed-reference (url &rest feed-data)
30
+(defun make-feed-reference (url &rest feed-data &key title path)
31
+  (declare (ignore title path))
36 32
   (apply #'make-instance 'feed-reference
37
-	 :url url
38
-	 feed-data))
33
+	 :url url feed-data))
39 34
 
40 35
 (defmethod yason:encode-slots progn ((object feed-reference))
41 36
   (let ((title (title object))
... ...
@@ -47,9 +42,8 @@
47 42
       (yason:encode-object-element "path" path))))
48 43
 
49 44
 (defmethod yason:encode-slots progn ((object feed-index))
50
-  (with-accessors ((pull-time pull-time) (feeds feed-urls) (references references)) object
51
-    (yason:encode-object-elements "pull-time" (local-time:format-timestring nil pull-time)
52
-				  "feed-urls" feeds)
45
+  (with-accessors ((pull-time pull-time) (references references)) object
46
+    (yason:encode-object-element "pull-time" (local-time:format-timestring nil pull-time))
53 47
     (yason:with-object-element ("feeds")
54 48
       (yason:with-array ()
55 49
 	(mapcar 'yason:encode-object references)))))
... ...
@@ -102,50 +96,59 @@
102 96
 	;; Why am I decf-ing here?
103 97
 	(decf pop-times)))))
104 98
 
105
-(defun log-pull (stream feed-url)
106
-  (format stream "~&Trying to pull: ~a... " feed-url)
107
-  (handler-bind ((error (lambda (c) (format stream "... Error ~a~%" c))))
108
-      (prog1 (safe-pull-feed feed-url)
109
-		  (format stream "... Success~%"))))
99
+(defmacro with-progress-message ((stream before after &optional (error-msg " ERROR~%~4t~a~%")) &body body)
100
+  (once-only (before after stream)
101
+    `(handler-bind ((error (op (format ,stream ,error-msg _))))
102
+       (format ,stream "~&~a . . ." ,before)
103
+       (multiple-value-prog1 (progn
104
+			       ,@body)
105
+	 (format ,stream " ~a~%" ,after)))))
110 106
 
111 107
 (defun skip-feed ()
112 108
   (when-let ((restart (find-restart 'skip-feed)))
113 109
     (invoke-restart restart)))
114 110
 
115
-
116 111
 (defun save-feed (feed output-file &key (if-exists :supersede))
117 112
   (with-output-to-file (s output-file :if-exists if-exists)
118 113
     (plump:serialize (alimenta:doc feed) s)))
119 114
 
120 115
 (defun pull-and-store-feeds (feeds pull-directory)
121
-  (mapcar (lambda (feed-url)
122
-	    (with-simple-restart (skip-feed "Skip ~a" feed-url)
123
-	      (let* ((feed (with-retry ("Pull feed again.")
124
-			     (log-pull t feed-url)))
125
-		     (result (store (coerce-feed-link feed-url feed)
126
-				    pull-directory)))
127
-		(prog1 result
128
-		  (format t "Serializing XML...")
129
-		  (save-feed feed
130
-			     (merge-pathnames "feed.xml"
131
-					      (cadr result)))))))
116
+  (mapcar (op (pull-and-store-feed _ pull-directory))
132 117
 	  feeds))
133 118
 
134
-(defun feed-index (index-stream pull-time paths)
119
+(defun pull-and-store-feed (feed-url pull-directory)
120
+  (flet ((log-pull (stream)
121
+	   (let ((before-message (format nil "Trying to pull: ~a" feed-url)))
122
+	     (with-progress-message (stream before-message "Success")
123
+	       (prog1 (safe-pull-feed feed-url)))))
124
+	 (log-serialization (stream feed path)
125
+	   (with-progress-message (stream "Serializing XML" (format nil "done with ~a" feed-url))
126
+	     (save-feed feed (merge-pathnames "feed.xml" path)))))
127
+
128
+    (with-simple-restart (skip-feed "Stop processing for ~a" feed-url)
129
+      (let* ((feed (with-retry ("Pull feed again.")
130
+		     (alimenta:filter-feed (coerce-feed-link feed-url
131
+							     (log-pull t))
132
+					   (complement #'older-than-a-month)
133
+					   :key 'alimenta:date))))
134
+	(multiple-value-bind (result url) (store feed pull-directory)
135
+	  (destructuring-bind (title path) result
136
+	    (log-serialization t feed path)
137
+	    (make-feed-reference url :title title
138
+				 :path (uiop:enough-pathname path *feed-base*))))))))
139
+
140
+(defun feed-index (index-stream pull-time references)
135 141
   (yason:with-output (index-stream :indent t)
136 142
     (yason:encode-object
137
-     (make-feed-index pull-time *feeds*
138
-		      (mapcar (destructuring-lambda ((title path))
139
-				(list title (uiop:enough-pathname path *feed-base*)))
140
-			      paths)))))
143
+     (make-feed-index pull-time (remove-if 'null references)))))
141 144
 
142 145
 (defun archive-feeds ()
143 146
   (let* ((pull-time (local-time:now))
144 147
 	 (pull-directory (get-store-directory-name pull-time)) 
145
-	 (paths (pull-and-store-feeds *feeds* pull-directory))
148
+	 (references (pull-and-store-feeds *feeds* pull-directory))
146 149
 	 (index-path (merge-pathnames "index.json" pull-directory)))
147 150
     (with-open-file (index index-path :direction :output)
148
-      (feed-index index pull-time paths))))
151
+      (feed-index index pull-time references))))
149 152
 
150 153
 ;; This is an ungodly mess, we need to avoid funneling everything through fix-pathname-or-skip
151 154
 (defun command-line-main (&optional (feed-list-initializer #'init-feeds))
... ...
@@ -167,12 +170,19 @@
167 170
 					 "Unknown")))
168 171
 			   (funcall restart)))))))
169 172
 
170
-    (handler-bind ((alimenta.feed-archive.encoders:feed-error
171
-		    (lambda (c)
172
-		      (fix-pathname-or-skip c :wrapped-condition (alimenta.feed-archive.encoders:the-condition c))))
173
-		   (alimenta:feed-type-unsupported #'feed-type-unsupported)
174
-		   (error (lambda (c)
175
-			    (fix-pathname-or-skip c :restart 'continue))))
176
-      (multiple-value-bind (*feeds* *feed-base*) (funcall feed-list-initializer)
177
-	(alimenta.pull-feed:with-user-agent ("Feed Archiver v0.1b")
178
-	  (archive-feeds))))))
173
+    (let ((error-count 0))
174
+      (handler-bind ((alimenta.feed-archive.encoders:feed-error
175
+		      (op (fix-pathname-or-skip _1 :wrapped-condition (alimenta.feed-archive.encoders:the-condition _1))))
176
+		     (alimenta:feed-type-unsupported #'feed-type-unsupported)
177
+		     ((or usocket:timeout-error
178
+			  usocket:ns-error) (op (alimenta.pull-feed:skip-feed _)))
179
+		     (error
180
+		      (op
181
+			(format t "~&Error signaled, ~a (count ~d)" _1 error-count)
182
+			(incf error-count)
183
+			(unless (< error-count 15)
184
+			  (format t " continuing~%")
185
+			  (fix-pathname-or-skip _1 :restart 'continue)))))
186
+	(multiple-value-bind (*feeds* *feed-base*) (funcall feed-list-initializer)
187
+	  (alimenta.pull-feed:with-user-agent ("Feed Archiver v0.1b")
188
+	    (archive-feeds)))))))
... ...
@@ -2,7 +2,8 @@
2 2
   (:use :cl :alexandria :serapeum :fw.lu)
3 3
   (:shadow :->)
4 4
   (:export :fix-pathname :sha256-string :get-id :older-than-a-week :-> :get-feed-store-name
5
-	   :store :get-item-store-name :restart-once :coerce-feed-link :with-retry))
5
+	   :store :get-item-store-name :restart-once :coerce-feed-link :with-retry
6
+	   :older-than-a-month))
6 7
 
7 8
 (in-package :alimenta.feed-archive.tools)
8 9
 
... ...
@@ -52,6 +53,11 @@
52 53
 	       (sha256-string (alimenta:id item))
53 54
 	       #+nil ".json"))
54 55
 
56
+(defun older-than-a-month (date)
57
+  (let ((month-ago (local-time:timestamp- (local-time:now)
58
+					  31 :day)))
59
+    (local-time:timestamp< date month-ago)))
60
+
55 61
 (defun older-than-a-week (date)
56 62
   (let ((week-ago (local-time:timestamp- (local-time:now)
57 63
                                          7 :day)))
... ...
@@ -74,7 +80,8 @@ next time, it re-raises the exception."
74 80
 
75 81
 (defun coerce-feed-link (link feed)
76 82
   (prog1 feed
77
-    (setf (alimenta:feed-link feed) link)))
83
+    (unless (alimenta:feed-link feed)
84
+      (setf (alimenta:feed-link feed) link))))
78 85
 
79 86
 (defmacro with-retry ((&optional (message "retry the operation")) &body body)
80 87
   `(loop