Browse code
Redesign feed index generation.
I've switched from using a series of lists to represent the feed index
to a custom class with appropriately defined yason methods for JSON
serialization.
Showing 3 changed files
... | ... |
@@ -1,7 +1,8 @@ |
1 | 1 |
(defpackage :alimenta.feed-archive.encoders |
2 | 2 |
(:use :cl :alexandria :serapeum :fw.lu :alimenta.feed-archive.tools) |
3 | 3 |
(:shadowing-import-from :alimenta.feed-archive.tools :->) |
4 |
- (:export :skip-item :the-condition :the-feed :feed-error)) |
|
4 |
+ (:export :skip-item :the-condition :the-feed :feed-error |
|
5 |
+ :unwrap-feed-errors)) |
|
5 | 6 |
|
6 | 7 |
(in-package :alimenta.feed-archive.encoders) |
7 | 8 |
|
... | ... |
@@ -22,6 +23,10 @@ |
22 | 23 |
:feed feed |
23 | 24 |
:condition condition)) |
24 | 25 |
|
26 |
+(defmacro unwrap-feed-errors (() &body body) |
|
27 |
+ `(handler-bind ((feed-error (op (error (the-condition _))))) |
|
28 |
+ ,@body)) |
|
29 |
+ |
|
25 | 30 |
(defun %encode-feed-as-json (feed item-storage-info root-dir &optional stream) |
26 | 31 |
(with-accessors ((description alimenta:description) |
27 | 32 |
(feed-link alimenta:feed-link) |
... | ... |
@@ -44,23 +49,11 @@ |
44 | 49 |
))))))) |
45 | 50 |
|
46 | 51 |
(defmethod store ((items sequence) (directory pathname)) |
47 |
- (map 'list (lambda (item) (store item directory)) |
|
48 |
- (stable-sort |
|
49 |
- (sort (remove-if #'older-than-a-week items :key #'alimenta:date) |
|
50 |
- #'string-lessp |
|
51 |
- :key #'alimenta:title) |
|
52 |
- #'local-time:timestamp> |
|
53 |
- :key #'alimenta:date))) |
|
54 |
- |
|
55 |
-(defun sort-and-filter-items (feed) |
|
56 |
- (setf (alimenta:items feed) |
|
57 |
- (stable-sort |
|
58 |
- (sort (remove-if #'older-than-a-week (alimenta:items feed) |
|
59 |
- :key #'alimenta:date) |
|
60 |
- #'string-lessp |
|
61 |
- :key #'alimenta:title) |
|
62 |
- #'local-time:timestamp> |
|
63 |
- :key #'alimenta:date))) |
|
52 |
+ (map 'list (op (store _ directory)) |
|
53 |
+ (stable-sort (sort items #'string-lessp |
|
54 |
+ :key #'alimenta:title) |
|
55 |
+ #'local-time:timestamp> |
|
56 |
+ :key #'alimenta:date))) |
|
64 | 57 |
|
65 | 58 |
(defmethod store ((feed alimenta:feed) (directory pathname)) |
66 | 59 |
(flet ((get-feed-store-name (feed directory) |
... | ... |
@@ -75,14 +68,15 @@ |
75 | 68 |
(title alimenta:title)) feed |
76 | 69 |
; We wrap all errors with our own condition |
77 | 70 |
(handler-bind ((error (lambda (c) (error 'feed-error :feed feed :condition c)))) |
78 |
- (prog1-let ((feed-title title) |
|
79 |
- (feed-store (get-feed-store-name feed directory))) |
|
80 |
- (ensure-directories-exist feed-store) |
|
81 |
- (with-open-file (index (merge-pathnames "index.json" feed-store) :direction :output) |
|
82 |
- (%encode-feed-as-json feed |
|
83 |
- (store items feed-store) |
|
84 |
- feed-store |
|
85 |
- index))))))) |
|
71 |
+ (values (prog1-let ((feed-title title) |
|
72 |
+ (feed-store (get-feed-store-name feed directory))) |
|
73 |
+ (ensure-directories-exist feed-store) |
|
74 |
+ (with-open-file (index (merge-pathnames "index.json" feed-store) :direction :output) |
|
75 |
+ (%encode-feed-as-json feed |
|
76 |
+ (store items feed-store) |
|
77 |
+ feed-store |
|
78 |
+ index))) |
|
79 |
+ feed-link))))) |
|
86 | 80 |
|
87 | 81 |
(defmethod store ((feed alimenta:feed) (stream stream)) |
88 | 82 |
(handler-bind ((error (lambda (c) |
... | ... |
@@ -15,7 +15,6 @@ |
15 | 15 |
(defclass feed-index () |
16 | 16 |
((%pull-time :initarg :pull-time :reader pull-time) |
17 | 17 |
;; Why this slot? Won't the references duplicate this? |
18 |
- (%feed-urls :initarg :feed-urls :reader feed-urls) |
|
19 | 18 |
(%feed-references :initarg :references :reader references))) |
20 | 19 |
|
21 | 20 |
(defclass feed-reference () |
... | ... |
@@ -23,19 +22,15 @@ |
23 | 22 |
(%title :initarg :title :reader title :initform nil) |
24 | 23 |
(%path :initarg :path :reader path :initform nil))) |
25 | 24 |
|
26 |
-(defun make-feed-index (pull-time feeds paths) |
|
25 |
+(defun make-feed-index (pull-time references) |
|
27 | 26 |
(make-instance 'feed-index |
28 | 27 |
:pull-time pull-time |
29 |
- :feed-urls feeds |
|
30 |
- :references (mapcar (destructuring-lambda (url (title path)) |
|
31 |
- (make-feed-reference url :title title :path path)) |
|
32 |
- feeds |
|
33 |
- paths))) |
|
28 |
+ :references (copy-seq references))) |
|
34 | 29 |
|
35 |
-(defun make-feed-reference (url &rest feed-data) |
|
30 |
+(defun make-feed-reference (url &rest feed-data &key title path) |
|
31 |
+ (declare (ignore title path)) |
|
36 | 32 |
(apply #'make-instance 'feed-reference |
37 |
- :url url |
|
38 |
- feed-data)) |
|
33 |
+ :url url feed-data)) |
|
39 | 34 |
|
40 | 35 |
(defmethod yason:encode-slots progn ((object feed-reference)) |
41 | 36 |
(let ((title (title object)) |
... | ... |
@@ -47,9 +42,8 @@ |
47 | 42 |
(yason:encode-object-element "path" path)))) |
48 | 43 |
|
49 | 44 |
(defmethod yason:encode-slots progn ((object feed-index)) |
50 |
- (with-accessors ((pull-time pull-time) (feeds feed-urls) (references references)) object |
|
51 |
- (yason:encode-object-elements "pull-time" (local-time:format-timestring nil pull-time) |
|
52 |
- "feed-urls" feeds) |
|
45 |
+ (with-accessors ((pull-time pull-time) (references references)) object |
|
46 |
+ (yason:encode-object-element "pull-time" (local-time:format-timestring nil pull-time)) |
|
53 | 47 |
(yason:with-object-element ("feeds") |
54 | 48 |
(yason:with-array () |
55 | 49 |
(mapcar 'yason:encode-object references))))) |
... | ... |
@@ -102,50 +96,59 @@ |
102 | 96 |
;; Why am I decf-ing here? |
103 | 97 |
(decf pop-times))))) |
104 | 98 |
|
105 |
-(defun log-pull (stream feed-url) |
|
106 |
- (format stream "~&Trying to pull: ~a... " feed-url) |
|
107 |
- (handler-bind ((error (lambda (c) (format stream "... Error ~a~%" c)))) |
|
108 |
- (prog1 (safe-pull-feed feed-url) |
|
109 |
- (format stream "... Success~%")))) |
|
99 |
+(defmacro with-progress-message ((stream before after &optional (error-msg " ERROR~%~4t~a~%")) &body body) |
|
100 |
+ (once-only (before after stream) |
|
101 |
+ `(handler-bind ((error (op (format ,stream ,error-msg _)))) |
|
102 |
+ (format ,stream "~&~a . . ." ,before) |
|
103 |
+ (multiple-value-prog1 (progn |
|
104 |
+ ,@body) |
|
105 |
+ (format ,stream " ~a~%" ,after))))) |
|
110 | 106 |
|
111 | 107 |
(defun skip-feed () |
112 | 108 |
(when-let ((restart (find-restart 'skip-feed))) |
113 | 109 |
(invoke-restart restart))) |
114 | 110 |
|
115 |
- |
|
116 | 111 |
(defun save-feed (feed output-file &key (if-exists :supersede)) |
117 | 112 |
(with-output-to-file (s output-file :if-exists if-exists) |
118 | 113 |
(plump:serialize (alimenta:doc feed) s))) |
119 | 114 |
|
120 | 115 |
(defun pull-and-store-feeds (feeds pull-directory) |
121 |
- (mapcar (lambda (feed-url) |
|
122 |
- (with-simple-restart (skip-feed "Skip ~a" feed-url) |
|
123 |
- (let* ((feed (with-retry ("Pull feed again.") |
|
124 |
- (log-pull t feed-url))) |
|
125 |
- (result (store (coerce-feed-link feed-url feed) |
|
126 |
- pull-directory))) |
|
127 |
- (prog1 result |
|
128 |
- (format t "Serializing XML...") |
|
129 |
- (save-feed feed |
|
130 |
- (merge-pathnames "feed.xml" |
|
131 |
- (cadr result))))))) |
|
116 |
+ (mapcar (op (pull-and-store-feed _ pull-directory)) |
|
132 | 117 |
feeds)) |
133 | 118 |
|
134 |
-(defun feed-index (index-stream pull-time paths) |
|
119 |
+(defun pull-and-store-feed (feed-url pull-directory) |
|
120 |
+ (flet ((log-pull (stream) |
|
121 |
+ (let ((before-message (format nil "Trying to pull: ~a" feed-url))) |
|
122 |
+ (with-progress-message (stream before-message "Success") |
|
123 |
+ (prog1 (safe-pull-feed feed-url))))) |
|
124 |
+ (log-serialization (stream feed path) |
|
125 |
+ (with-progress-message (stream "Serializing XML" (format nil "done with ~a" feed-url)) |
|
126 |
+ (save-feed feed (merge-pathnames "feed.xml" path))))) |
|
127 |
+ |
|
128 |
+ (with-simple-restart (skip-feed "Stop processing for ~a" feed-url) |
|
129 |
+ (let* ((feed (with-retry ("Pull feed again.") |
|
130 |
+ (alimenta:filter-feed (coerce-feed-link feed-url |
|
131 |
+ (log-pull t)) |
|
132 |
+ (complement #'older-than-a-month) |
|
133 |
+ :key 'alimenta:date)))) |
|
134 |
+ (multiple-value-bind (result url) (store feed pull-directory) |
|
135 |
+ (destructuring-bind (title path) result |
|
136 |
+ (log-serialization t feed path) |
|
137 |
+ (make-feed-reference url :title title |
|
138 |
+ :path (uiop:enough-pathname path *feed-base*)))))))) |
|
139 |
+ |
|
140 |
+(defun feed-index (index-stream pull-time references) |
|
135 | 141 |
(yason:with-output (index-stream :indent t) |
136 | 142 |
(yason:encode-object |
137 |
- (make-feed-index pull-time *feeds* |
|
138 |
- (mapcar (destructuring-lambda ((title path)) |
|
139 |
- (list title (uiop:enough-pathname path *feed-base*))) |
|
140 |
- paths))))) |
|
143 |
+ (make-feed-index pull-time (remove-if 'null references))))) |
|
141 | 144 |
|
142 | 145 |
(defun archive-feeds () |
143 | 146 |
(let* ((pull-time (local-time:now)) |
144 | 147 |
(pull-directory (get-store-directory-name pull-time)) |
145 |
- (paths (pull-and-store-feeds *feeds* pull-directory)) |
|
148 |
+ (references (pull-and-store-feeds *feeds* pull-directory)) |
|
146 | 149 |
(index-path (merge-pathnames "index.json" pull-directory))) |
147 | 150 |
(with-open-file (index index-path :direction :output) |
148 |
- (feed-index index pull-time paths)))) |
|
151 |
+ (feed-index index pull-time references)))) |
|
149 | 152 |
|
150 | 153 |
;; This is an ungodly mess, we need to avoid funneling everything through fix-pathname-or-skip |
151 | 154 |
(defun command-line-main (&optional (feed-list-initializer #'init-feeds)) |
... | ... |
@@ -167,12 +170,19 @@ |
167 | 170 |
"Unknown"))) |
168 | 171 |
(funcall restart))))))) |
169 | 172 |
|
170 |
- (handler-bind ((alimenta.feed-archive.encoders:feed-error |
|
171 |
- (lambda (c) |
|
172 |
- (fix-pathname-or-skip c :wrapped-condition (alimenta.feed-archive.encoders:the-condition c)))) |
|
173 |
- (alimenta:feed-type-unsupported #'feed-type-unsupported) |
|
174 |
- (error (lambda (c) |
|
175 |
- (fix-pathname-or-skip c :restart 'continue)))) |
|
176 |
- (multiple-value-bind (*feeds* *feed-base*) (funcall feed-list-initializer) |
|
177 |
- (alimenta.pull-feed:with-user-agent ("Feed Archiver v0.1b") |
|
178 |
- (archive-feeds)))))) |
|
173 |
+ (let ((error-count 0)) |
|
174 |
+ (handler-bind ((alimenta.feed-archive.encoders:feed-error |
|
175 |
+ (op (fix-pathname-or-skip _1 :wrapped-condition (alimenta.feed-archive.encoders:the-condition _1)))) |
|
176 |
+ (alimenta:feed-type-unsupported #'feed-type-unsupported) |
|
177 |
+ ((or usocket:timeout-error |
|
178 |
+ usocket:ns-error) (op (alimenta.pull-feed:skip-feed _))) |
|
179 |
+ (error |
|
180 |
+ (op |
|
181 |
+ (format t "~&Error signaled, ~a (count ~d)" _1 error-count) |
|
182 |
+ (incf error-count) |
|
183 |
+ (unless (< error-count 15) |
|
184 |
+ (format t " continuing~%") |
|
185 |
+ (fix-pathname-or-skip _1 :restart 'continue))))) |
|
186 |
+ (multiple-value-bind (*feeds* *feed-base*) (funcall feed-list-initializer) |
|
187 |
+ (alimenta.pull-feed:with-user-agent ("Feed Archiver v0.1b") |
|
188 |
+ (archive-feeds))))))) |
... | ... |
@@ -2,7 +2,8 @@ |
2 | 2 |
(:use :cl :alexandria :serapeum :fw.lu) |
3 | 3 |
(:shadow :->) |
4 | 4 |
(:export :fix-pathname :sha256-string :get-id :older-than-a-week :-> :get-feed-store-name |
5 |
- :store :get-item-store-name :restart-once :coerce-feed-link :with-retry)) |
|
5 |
+ :store :get-item-store-name :restart-once :coerce-feed-link :with-retry |
|
6 |
+ :older-than-a-month)) |
|
6 | 7 |
|
7 | 8 |
(in-package :alimenta.feed-archive.tools) |
8 | 9 |
|
... | ... |
@@ -52,6 +53,11 @@ |
52 | 53 |
(sha256-string (alimenta:id item)) |
53 | 54 |
#+nil ".json")) |
54 | 55 |
|
56 |
+(defun older-than-a-month (date) |
|
57 |
+ (let ((month-ago (local-time:timestamp- (local-time:now) |
|
58 |
+ 31 :day))) |
|
59 |
+ (local-time:timestamp< date month-ago))) |
|
60 |
+ |
|
55 | 61 |
(defun older-than-a-week (date) |
56 | 62 |
(let ((week-ago (local-time:timestamp- (local-time:now) |
57 | 63 |
7 :day))) |
... | ... |
@@ -74,7 +80,8 @@ next time, it re-raises the exception." |
74 | 80 |
|
75 | 81 |
(defun coerce-feed-link (link feed) |
76 | 82 |
(prog1 feed |
77 |
- (setf (alimenta:feed-link feed) link))) |
|
83 |
+ (unless (alimenta:feed-link feed) |
|
84 |
+ (setf (alimenta:feed-link feed) link)))) |
|
78 | 85 |
|
79 | 86 |
(defmacro with-retry ((&optional (message "retry the operation")) &body body) |
80 | 87 |
`(loop |