Browse code
Refactoring...
fiddlerwoaroof authored on 26/11/2016 21:34:07
Showing 6 changed files
Showing 6 changed files
19 | 20 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,84 @@ |
1 |
+(defpackage :alimenta.feed-archive.encoders |
|
2 |
+ (:use :cl :alexandria :serapeum :fw.lu :alimenta.feed-archive.tools) |
|
3 |
+ (:shadowing-import-from :alimenta.feed-archive.tools :->) |
|
4 |
+ (:export :skip-item)) |
|
5 |
+ |
|
6 |
+(in-package :alimenta.feed-archive.encoders) |
|
7 |
+ |
|
8 |
+(defun fix-path (path) |
|
9 |
+ ;; Work around some issue with pathnames |
|
10 |
+ (setf path (merge-pathnames path (make-pathname :type :unspecific)))) |
|
11 |
+ |
|
12 |
+(defun %encode-item (root-dir item) |
|
13 |
+ (destructuring-bind (title path) item |
|
14 |
+ (format t "~&Encoding ~a~%" title) |
|
15 |
+ (restart-once (fix-pathname () (fix-path path)) |
|
16 |
+ (let ((pathname (uiop:enough-pathname path root-dir))) |
|
17 |
+ (yason:with-object () |
|
18 |
+ (yason:encode-object-element "title" title) |
|
19 |
+ (yason:encode-object-element "path" pathname)))))) |
|
20 |
+ |
|
21 |
+(defun skip-item () |
|
22 |
+ (when-let ((restart (find-restart 'skip-item))) |
|
23 |
+ (invoke-restart restart))) |
|
24 |
+ |
|
25 |
+(defun %encode-feed-as-json (feed item-storage-info root-dir &optional stream) |
|
26 |
+ (with-accessors ((description alimenta:description) |
|
27 |
+ (feed-link alimenta:feed-link) |
|
28 |
+ (items alimenta:items) |
|
29 |
+ (link alimenta:link) |
|
30 |
+ (source-type alimenta:source-type) |
|
31 |
+ (title alimenta:title)) feed) |
|
32 |
+ (yason:with-output (stream :indent t) |
|
33 |
+ (yason:with-object () |
|
34 |
+ (yason:encode-object-element "metadata" feed) |
|
35 |
+ (yason:with-object-element ("items") |
|
36 |
+ (yason:with-array () |
|
37 |
+ (dolist (item item-storage-info) |
|
38 |
+ (with-simple-restart (skip-item "Skip item ~s" (car item)) |
|
39 |
+ (%encode-item root-dir item)))))))) |
|
40 |
+ |
|
41 |
+(defmethod store ((item alimenta:item) (directory pathname)) |
|
42 |
+ (flet ((get-item-store-name (item directory) |
|
43 |
+ (let ((id (get-id item))) |
|
44 |
+ (merge-pathnames (make-pathname :name id) directory)))) |
|
45 |
+ |
|
46 |
+ (prog1-let ((item-title (alimenta:title item)) |
|
47 |
+ (fn (get-item-store-name item directory))) |
|
48 |
+ (with-open-file (item-f fn :direction :output) |
|
49 |
+ (yason:encode item item-f))))) |
|
50 |
+ |
|
51 |
+(define-condition feed-error (error) |
|
52 |
+ ((%feed :initarg :feed :initform (error "We need a feed")) |
|
53 |
+ (%condition :initarg :condition :initform (error "feed-error must wrap a condition")))) |
|
54 |
+ |
|
55 |
+(defmethod store ((feed alimenta:feed) (directory pathname)) |
|
56 |
+ (flet ((get-feed-store-name (feed directory) |
|
57 |
+ (merge-pathnames (get-id feed) |
|
58 |
+ directory))) |
|
59 |
+ |
|
60 |
+ (with-accessors ((description alimenta:description) |
|
61 |
+ (feed-link alimenta:feed-link) |
|
62 |
+ (items alimenta:items) |
|
63 |
+ (link alimenta:link) |
|
64 |
+ (source-type alimenta:source-type) |
|
65 |
+ (title alimenta:title)) feed |
|
66 |
+ (handler-bind ((error (lambda (c) (error 'feed-error :feed feed :condition c)))) |
|
67 |
+ (prog1-let ((feed-title title) |
|
68 |
+ (feed-store (get-feed-store-name feed directory))) |
|
69 |
+ (ensure-directories-exist feed-store) |
|
70 |
+ (with-open-file (index (merge-pathnames "index.json" feed-store) :direction :output) |
|
71 |
+ (%encode-feed-as-json feed |
|
72 |
+ (store items feed-store) |
|
73 |
+ feed-store |
|
74 |
+ index))))))) |
|
75 |
+ |
|
76 |
+(defmethod store ((items sequence) (directory pathname)) |
|
77 |
+ (map 'list (lambda (item) (store item directory)) |
|
78 |
+ (stable-sort |
|
79 |
+ (sort (remove-if #'older-than-a-week items :key #'alimenta:date) |
|
80 |
+ #'string-lessp |
|
81 |
+ :key #'alimenta:title) |
|
82 |
+ #'local-time:timestamp> |
|
83 |
+ :key #'alimenta:date))) |
|
84 |
+ |
... | ... |
@@ -12,100 +12,13 @@ |
12 | 12 |
|
13 | 13 |
(defun get-store-directory-name (timestamp) |
14 | 14 |
(flet ((make-dirname (timestamp) |
15 |
- (merge-pathnames (local-time:format-timestring nil (local-time:timestamp-minimize-part timestamp :sec) |
|
16 |
- :format +dirname-format+) |
|
17 |
- *feed-base*))) |
|
15 |
+ (-> (local-time:format-timestring nil (local-time:timestamp-minimize-part timestamp :sec) |
|
16 |
+ :format +dirname-format+) |
|
17 |
+ (merge-pathnames *feed-base*)))) |
|
18 | 18 |
(-> (prog1-let ((result (make-dirname timestamp))) |
19 | 19 |
(ensure-directories-exist result)) |
20 | 20 |
(car)))) |
21 | 21 |
|
22 |
-(defun %encode-item (root-dir item) |
|
23 |
- (let ((restarted nil)) |
|
24 |
- (destructuring-bind (title path) item |
|
25 |
- (tagbody start |
|
26 |
- (format t "~&Restarted: ~a" restarted) |
|
27 |
- (when restarted |
|
28 |
- (format t " ~a~%"(namestring path))) |
|
29 |
- (restart-case |
|
30 |
- (progn (format t "~&encoding . . .~%") |
|
31 |
- (let ((pathname (uiop:enough-pathname path root-dir))) |
|
32 |
- (yason:with-object () |
|
33 |
- (yason:encode-object-element "title" title) |
|
34 |
- (yason:encode-object-element "path" pathname)))) |
|
35 |
- (fix-pathname () |
|
36 |
- (setf path |
|
37 |
- (merge-pathnames path |
|
38 |
- (make-pathname :type :unspecific))) |
|
39 |
- (unless restarted |
|
40 |
- (setf restarted t) |
|
41 |
- (go start)))))))) |
|
42 |
- |
|
43 |
-(defun encode-feed-as-json (feed item-storage-info root-dir &optional stream) |
|
44 |
- (with-accessors ((description alimenta:description) |
|
45 |
- (feed-link alimenta:feed-link) |
|
46 |
- (items alimenta:items) |
|
47 |
- (link alimenta:link) |
|
48 |
- (source-type alimenta:source-type) |
|
49 |
- (title alimenta:title)) feed) |
|
50 |
- (yason:with-output (stream :indent t) |
|
51 |
- (yason:with-object () |
|
52 |
- (yason:encode-object-element "metadata" feed) |
|
53 |
- (yason:with-object-element ("items") |
|
54 |
- (yason:with-array () |
|
55 |
- (dolist (item item-storage-info) |
|
56 |
- (with-simple-restart (continue "Skip item ~s" (car item)) |
|
57 |
- (%encode-item root-dir item)))))))) |
|
58 |
- |
|
59 |
-(defmethod store ((item alimenta:item) directory) |
|
60 |
- (prog1-let ((item-title (alimenta:title item)) |
|
61 |
- (fn (get-item-store-name item directory))) |
|
62 |
- (with-open-file (item-f fn :direction :output) |
|
63 |
- (yason:encode item item-f)))) |
|
64 |
- |
|
65 |
-(defmethod store ((feed alimenta:feed) directory) |
|
66 |
- (with-accessors ((description alimenta:description) |
|
67 |
- (feed-link alimenta:feed-link) |
|
68 |
- (items alimenta:items) |
|
69 |
- (link alimenta:link) |
|
70 |
- (source-type alimenta:source-type) |
|
71 |
- (title alimenta:title)) feed |
|
72 |
- (prog1-let ((feed-title title) |
|
73 |
- (feed-store (get-feed-store-name feed directory))) |
|
74 |
- (ensure-directories-exist feed-store) |
|
75 |
- (with-open-file (index (merge-pathnames "index.json" feed-store) :direction :output) |
|
76 |
- (encode-feed-as-json feed |
|
77 |
- (store items feed-store) |
|
78 |
- feed-store |
|
79 |
- index))))) |
|
80 |
- |
|
81 |
-(defmethod store ((items sequence) directory) |
|
82 |
- (map 'list (lambda (item) (store item directory)) |
|
83 |
- (stable-sort |
|
84 |
- (sort (remove-if #'older-than-a-week items :key #'alimenta:date) |
|
85 |
- #'string-lessp |
|
86 |
- :key #'alimenta:title) |
|
87 |
- #'local-time:timestamp> |
|
88 |
- :key #'alimenta:date))) |
|
89 |
- |
|
90 |
-(defmethod yason:encode ((item alimenta:item) &optional stream) |
|
91 |
- (with-accessors ((author alimenta::author) |
|
92 |
- (content alimenta:content) |
|
93 |
- (date alimenta:date) |
|
94 |
- (id alimenta:id) |
|
95 |
- (link alimenta:link) |
|
96 |
- (title alimenta:title)) item |
|
97 |
- (let* ((date (local-time:format-timestring nil date))) |
|
98 |
- (yason:with-output (stream :indent t) |
|
99 |
- (yason:with-object () |
|
100 |
- (yason:encode-object-element "title" title) |
|
101 |
- (yason:encode-object-element "date" date) |
|
102 |
- (yason:encode-object-element "author" title) |
|
103 |
- (yason:encode-object-element "id" (princ-to-string id)) |
|
104 |
- (yason:encode-object-element "link" link) |
|
105 |
- (yason:encode-object-element "content" content))))) |
|
106 |
- item) |
|
107 |
- |
|
108 |
- |
|
109 | 22 |
(defun test-feed-list () |
110 | 23 |
(values '("http://feeds.feedburner.com/GamasutraFeatureArticles/" |
111 | 24 |
"http://feeds.feedburner.com/GamasutraNews/" |
... | ... |
@@ -115,72 +28,79 @@ |
115 | 28 |
(defun init-feeds (&key feed-list archive-root) |
116 | 29 |
(ubiquitous:restore 'alimenta.feed-archiver) |
117 | 30 |
(let ((default-root (or archive-root |
118 |
- (merge-pathnames ".feed-archive/" |
|
119 |
- (truename "~/"))))) |
|
31 |
+ (merge-pathnames ".feed-archive/" |
|
32 |
+ (truename "~/"))))) |
|
120 | 33 |
(values (ubiquitous:defaulted-value feed-list :feeds) |
121 |
- (ubiquitous:defaulted-value default-root :archive :root)))) |
|
34 |
+ (ubiquitous:defaulted-value default-root :archive :root)))) |
|
122 | 35 |
|
123 | 36 |
(defun add-feed (feed) |
124 | 37 |
(init-feeds) |
125 | 38 |
(pushnew feed |
126 |
- (ubiquitous:value :feeds) |
|
127 |
- :test #'equalp)) |
|
39 |
+ (ubiquitous:value :feeds) |
|
40 |
+ :test #'equalp)) |
|
128 | 41 |
|
129 | 42 |
(defun safe-pull-feed (feed-url) |
43 |
+ "Handles date parsing errors in the feed: chronicity won't parse certain date formats, this catches the error |
|
44 |
+and modifies the format to something chronicity can handle." |
|
130 | 45 |
(let ((pop-times 0)) |
131 |
- (handler-bind |
|
132 |
- ((condition |
|
133 |
- (lambda (c) c |
|
134 |
- (when (find-restart 'alimenta.rss::pop-token) |
|
135 |
- (if (< pop-times 50) |
|
136 |
- (progn (incf pop-times) |
|
137 |
- (format t "~&Processing error, trying to pop a token (popped ~d times)~%" |
|
138 |
- pop-times) |
|
139 |
- (alimenta.rss::pop-token)) |
|
140 |
- (progn |
|
141 |
- (break) |
|
142 |
- (continue))))))) |
|
143 |
- (prog1 (alimenta.pull-feed:pull-feed feed-url) |
|
144 |
- (decf pop-times))))) |
|
46 |
+ (flet ((pop-50-tokens (c) |
|
47 |
+ (declare (ignore c)) |
|
48 |
+ (when (find-restart 'alimenta.rss::pop-token) |
|
49 |
+ (if (< pop-times 50) |
|
50 |
+ (progn (incf pop-times) |
|
51 |
+ (format t "~&Processing error, trying to pop a token (popped ~d times)~%" |
|
52 |
+ pop-times) |
|
53 |
+ (alimenta.rss::pop-token)) |
|
54 |
+ (continue))))) |
|
55 |
+ (handler-bind ((error #'pop-50-tokens)) |
|
56 |
+ (prog1 (alimenta.pull-feed:pull-feed feed-url) |
|
57 |
+ ;; Why am I decf-ing here? |
|
58 |
+ (decf pop-times)))))) |
|
59 |
+ |
|
60 |
+(defun skip-feed () |
|
61 |
+ (when-let ((restart (find-restart 'skip-feed))) |
|
62 |
+ (invoke-restart restart))) |
|
145 | 63 |
|
146 | 64 |
(defun archive-feeds () |
147 | 65 |
(let ((pull-time (local-time:now))) |
148 |
- (alimenta.pull-feed::with-user-agent ("Feed Archiver v0.1b") |
|
149 |
- (let* ((pull-directory (get-store-directory-name pull-time)) |
|
150 |
- (paths (loop for feed-url in *feeds* collect |
|
151 |
- (with-simple-restart (continue "Skip ~a" feed-url) |
|
152 |
- (let ((feed (safe-pull-feed feed-url))) |
|
153 |
- (setf (alimenta:feed-link feed) |
|
154 |
- feed-url) |
|
155 |
- (store feed pull-directory)))))) |
|
156 |
- (with-open-file (index (merge-pathnames "index.json" pull-directory) :direction :output) |
|
157 |
- (yason:with-output (index :indent t) |
|
158 |
- (yason:with-object () |
|
159 |
- (yason:encode-object-element "pull-time" (local-time:format-timestring nil pull-time)) |
|
160 |
- (yason:encode-object-element "feed-urls" *feeds*) |
|
161 |
- (yason:with-object-element ("feeds") |
|
162 |
- (yason:with-array () |
|
163 |
- (mapcar (lambda (url feed-data) |
|
164 |
- (yason:with-object () |
|
165 |
- (yason:encode-object-element "url" url) |
|
166 |
- (when feed-data |
|
167 |
- (destructuring-bind (title path) feed-data |
|
168 |
- (yason:encode-object-element "title" title) |
|
169 |
- (yason:encode-object-element "path" |
|
170 |
- (princ-to-string |
|
171 |
- (uiop:enough-pathname path *feed-base*))))))) |
|
172 |
- *feeds* |
|
173 |
- paths)))))))))) |
|
66 |
+ (let* ((pull-directory (get-store-directory-name pull-time)) |
|
67 |
+ (paths (loop for feed-url in *feeds* collect |
|
68 |
+ (with-simple-restart (skip-feed "Skip ~a" feed-url) |
|
69 |
+ (let ((feed (safe-pull-feed feed-url))) |
|
70 |
+ (setf (alimenta:feed-link feed) |
|
71 |
+ feed-url) |
|
72 |
+ (store feed pull-directory)))))) |
|
73 |
+ (with-open-file (index (merge-pathnames "index.json" pull-directory) :direction :output) |
|
74 |
+ (yason:with-output (index :indent t) |
|
75 |
+ (yason:with-object () |
|
76 |
+ (yason:encode-object-element "pull-time" (local-time:format-timestring nil pull-time)) |
|
77 |
+ (yason:encode-object-element "feed-urls" *feeds*) |
|
78 |
+ (yason:with-object-element ("feeds") |
|
79 |
+ (yason:with-array () |
|
80 |
+ (mapcar (lambda (url feed-data) |
|
81 |
+ (yason:with-object () |
|
82 |
+ (yason:encode-object-element "url" url) |
|
83 |
+ (when feed-data |
|
84 |
+ (destructuring-bind (title path) feed-data |
|
85 |
+ (yason:encode-object-element "title" title) |
|
86 |
+ (yason:encode-object-element "path" |
|
87 |
+ (princ-to-string |
|
88 |
+ (uiop:enough-pathname path *feed-base*))))))) |
|
89 |
+ *feeds* |
|
90 |
+ paths))))))))) |
|
174 | 91 |
|
175 | 92 |
|
176 | 93 |
(defun command-line-main (&optional (feed-list-initializer #'init-feeds)) |
177 |
- (handler-bind ((error (lambda (c) |
|
178 |
- c |
|
179 |
- (format t "~&CONDITION RECEIVED: ~S~%RESTARTS: ~s~%" c (compute-restarts c)) |
|
180 |
- (if (find-restart 'fix-pathname) |
|
181 |
- (fix-pathname) |
|
182 |
- (progn (format t "~&Skip a feed...~%") |
|
183 |
- (continue)))))) |
|
184 |
- (multiple-value-bind (*feeds* *feed-base*) (funcall feed-list-initializer) |
|
185 |
- (archive-feeds)))) |
|
94 |
+ (flet ((fix-pathname-or-continue (c) |
|
95 |
+ (declare (ignorable c)) |
|
96 |
+ (format t "~&Received condition ~s~%" c) |
|
97 |
+ (if (find-restart 'fix-pathname) |
|
98 |
+ (progn (fix-pathname) |
|
99 |
+ (format t "~&Fixing pathname...~%")) |
|
100 |
+ (progn (format t "~&Skipping a feed...~%") |
|
101 |
+ (continue))))) |
|
102 |
+ (handler-bind ((error (lambda (c) (fix-pathname-or-continue c)))) |
|
103 |
+ (multiple-value-bind (*feeds* *feed-base*) (funcall feed-list-initializer) |
|
104 |
+ (alimenta.pull-feed::with-user-agent ("Feed Archiver v0.1b") |
|
105 |
+ (archive-feeds)))))) |
|
186 | 106 |
|
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
(:use :cl :alexandria :serapeum :fw.lu) |
3 | 3 |
(:shadow :->) |
4 | 4 |
(:export :fix-pathname :sha256-string :get-id :older-than-a-week :-> :get-feed-store-name |
5 |
- :store :get-item-store-name)) |
|
5 |
+ :store :get-item-store-name :restart-once)) |
|
6 | 6 |
|
7 | 7 |
(in-package :alimenta.feed-archive.tools) |
8 | 8 |
|
... | ... |
@@ -52,16 +52,22 @@ |
52 | 52 |
(sha256-string (alimenta:id item)) |
53 | 53 |
".json")) |
54 | 54 |
|
55 |
-(defun get-item-store-name (item directory) |
|
56 |
- (let ((id (get-id item))) |
|
57 |
- (merge-pathnames (make-pathname :name id) directory))) |
|
58 |
- |
|
59 |
-(defun get-feed-store-name (feed directory) |
|
60 |
- (merge-pathnames (get-id feed) |
|
61 |
- directory)) |
|
62 |
- |
|
63 | 55 |
(defun older-than-a-week (date) |
64 | 56 |
(let ((week-ago (local-time:timestamp- (local-time:now) |
65 | 57 |
7 :day))) |
66 | 58 |
(local-time:timestamp< date week-ago))) |
67 | 59 |
|
60 |
+(defmacro restart-once ((restart-name (&rest restart-args) &body handler) &body body) |
|
61 |
+ "Defines a restart that, the first time it's executed, runs a chunk of code and then, |
|
62 |
+next time, it re-raises the exception." |
|
63 |
+ (with-gensyms (start restarted) |
|
64 |
+ `(let ((,restarted nil)) |
|
65 |
+ (tagbody ,start |
|
66 |
+ (restart-case |
|
67 |
+ (progn ,@body) |
|
68 |
+ (,restart-name ,restart-args |
|
69 |
+ ,@handler |
|
70 |
+ (unless ,restarted |
|
71 |
+ (setf ,restarted t) |
|
72 |
+ (go ,start)))))))) |
|
73 |
+ |
... | ... |
@@ -28,3 +28,21 @@ |
28 | 28 |
(yason:with-output (stream :indent t) |
29 | 29 |
(yason:encode-object feed))) |
30 | 30 |
|
31 |
+(defmethod yason:encode ((item alimenta:item) &optional stream) |
|
32 |
+ (with-accessors ((author alimenta::author) |
|
33 |
+ (content alimenta:content) |
|
34 |
+ (date alimenta:date) |
|
35 |
+ (id alimenta:id) |
|
36 |
+ (link alimenta:link) |
|
37 |
+ (title alimenta:title)) item |
|
38 |
+ (let* ((date (local-time:format-timestring nil date))) |
|
39 |
+ (yason:with-output (stream :indent t) |
|
40 |
+ (yason:with-object () |
|
41 |
+ (yason:encode-object-element "title" title) |
|
42 |
+ (yason:encode-object-element "date" date) |
|
43 |
+ (yason:encode-object-element "author" title) |
|
44 |
+ (yason:encode-object-element "id" (princ-to-string id)) |
|
45 |
+ (yason:encode-object-element "link" link) |
|
46 |
+ (yason:encode-object-element "content" content))))) |
|
47 |
+ item) |
|
48 |
+ |