Browse code
Refactor feed-archive and ignore junk
fiddlerwoaroof authored on 10/03/2017 20:27:29
Showing 3 changed files
Showing 3 changed files
... | ... |
@@ -5,12 +5,55 @@ |
5 | 5 |
|
6 | 6 |
(in-package :alimenta.feed-archive) |
7 | 7 |
|
8 |
+ |
|
8 | 9 |
(defvar *feeds*) |
9 | 10 |
(defvar *feed-base*) |
10 | 11 |
|
11 | 12 |
(defparameter +dirname-format+ |
12 | 13 |
'((:year 4) #\- (:month 2) #\- (:day 2) #\/ (:hour 2) #\- (:min 2) #\/)) |
13 | 14 |
|
15 |
+(defclass feed-index () |
|
16 |
+ ((%pull-time :initarg :pull-time :reader pull-time) |
|
17 |
+ ;; Why this slot? Won't the references duplicate this? |
|
18 |
+ (%feed-urls :initarg :feed-urls :reader feed-urls) |
|
19 |
+ (%feed-references :initarg :references :reader references))) |
|
20 |
+ |
|
21 |
+(defclass feed-reference () |
|
22 |
+ ((%url :initarg :url :reader url) |
|
23 |
+ (%title :initarg :title :reader title :initform nil) |
|
24 |
+ (%path :initarg :path :reader path :initform nil))) |
|
25 |
+ |
|
26 |
+(defun make-feed-index (pull-time feeds paths) |
|
27 |
+ (make-instance 'feed-index |
|
28 |
+ :pull-time pull-time |
|
29 |
+ :feed-urls feeds |
|
30 |
+ :references (mapcar (destructuring-lambda (url (title path)) |
|
31 |
+ (make-feed-reference url :title title :path path)) |
|
32 |
+ feeds |
|
33 |
+ paths))) |
|
34 |
+ |
|
35 |
+(defun make-feed-reference (url &rest feed-data) |
|
36 |
+ (apply #'make-instance 'feed-reference |
|
37 |
+ :url url |
|
38 |
+ feed-data)) |
|
39 |
+ |
|
40 |
+(defmethod yason:encode-slots progn ((object feed-reference)) |
|
41 |
+ (let ((title (title object)) |
|
42 |
+ (path (path object))) |
|
43 |
+ (yason:encode-object-element "url" (url object)) |
|
44 |
+ (when title |
|
45 |
+ (yason:encode-object-element "title" title)) |
|
46 |
+ (when path |
|
47 |
+ (yason:encode-object-element "path" path)))) |
|
48 |
+ |
|
49 |
+(defmethod yason:encode-slots progn ((object feed-index)) |
|
50 |
+ (with-accessors ((pull-time pull-time) (feeds feed-urls) (references references)) object |
|
51 |
+ (yason:encode-object-elements "pull-time" (local-time:format-timestring nil pull-time) |
|
52 |
+ "feed-urls" feeds) |
|
53 |
+ (yason:with-object-element ("feeds") |
|
54 |
+ (yason:with-array () |
|
55 |
+ (mapcar 'yason:encode-object references))))) |
|
56 |
+ |
|
14 | 57 |
(defun get-store-directory-name (timestamp) |
15 | 58 |
(flet ((make-dirname (timestamp) |
16 | 59 |
(-> (local-time:format-timestring nil (local-time:timestamp-minimize-part timestamp :sec) |
... | ... |
@@ -55,77 +98,29 @@ |
55 | 98 |
(continue))))) |
56 | 99 |
(handler-bind ((warning #'muffle-warning) |
57 | 100 |
(error #'pop-50-tokens)) |
58 |
- (format t "~&Trying to pull: ~a... " feed-url) |
|
59 | 101 |
(prog1 (alimenta.pull-feed:pull-feed feed-url) |
60 | 102 |
;; Why am I decf-ing here? |
61 |
- (format t "... Success~%") |
|
62 | 103 |
(decf pop-times))))) |
63 | 104 |
|
105 |
+(defun log-pull (stream feed-url) |
|
106 |
+ (format stream "~&Trying to pull: ~a... " feed-url) |
|
107 |
+ (handler-bind ((error (lambda (c) (format stream "... Error ~a~%" c)))) |
|
108 |
+ (prog1 (safe-pull-feed feed-url) |
|
109 |
+ (format stream "... Success~%")))) |
|
110 |
+ |
|
64 | 111 |
(defun skip-feed () |
65 | 112 |
(when-let ((restart (find-restart 'skip-feed))) |
66 | 113 |
(invoke-restart restart))) |
67 | 114 |
|
68 |
-(defun pull-and-store-feeds (feeds pull-directory) |
|
69 |
- (loop for feed-url in feeds |
|
70 |
- collect |
|
71 |
- (with-simple-restart (skip-feed "Skip ~a" feed-url) |
|
72 |
- (let ((feed (safe-pull-feed feed-url))) |
|
73 |
- (setf (alimenta:feed-link feed) |
|
74 |
- feed-url) |
|
75 |
- (store feed pull-directory))))) |
|
76 | 115 |
|
77 |
-(defun archive-feeds () |
|
78 |
- (let* ((pull-time (local-time:now)) |
|
79 |
- (pull-directory (get-store-directory-name pull-time)) |
|
80 |
- (paths (pull-and-store-feeds *feeds* pull-directory))) |
|
81 |
- (with-open-file (index (merge-pathnames "index.json" pull-directory) :direction :output) |
|
82 |
- (feed-index index pull-time paths)))) |
|
83 |
- |
|
84 |
-(defclass feed-reference () |
|
85 |
- ((%url :initarg :url :reader url) |
|
86 |
- (%title :initarg :title :reader title :initform nil) |
|
87 |
- (%path :initarg :path :reader path :initform nil))) |
|
88 |
- |
|
89 |
-(defun make-feed-reference (url &rest feed-data) |
|
90 |
- (apply #'make-instance 'feed-reference |
|
91 |
- :url url |
|
92 |
- feed-data)) |
|
93 |
- |
|
94 |
-(defmethod yason:encode-slots progn ((object feed-reference)) |
|
95 |
- (let ((title (title object)) |
|
96 |
- (path (path object))) |
|
97 |
- (yason:encode-object-element "url" (url object)) |
|
98 |
- (when title |
|
99 |
- (yason:encode-object-element "title" title)) |
|
100 |
- (when path |
|
101 |
- (yason:encode-object-element "path" path)))) |
|
102 |
- |
|
103 |
-(defun interleave (list1 list2) |
|
104 |
- (mapcan #'list list1 list2)) |
|
105 |
- |
|
106 |
-(defclass feed-index () |
|
107 |
- ((%pull-time :initarg :pull-time :reader pull-time) |
|
108 |
- ;; Why this slot? Won't the references duplicate this? |
|
109 |
- (%feed-urls :initarg :feed-urls :reader feed-urls) |
|
110 |
- (%feed-references :initarg :references :reader references))) |
|
111 |
- |
|
112 |
-(defun make-feed-index (pull-time feeds paths) |
|
113 |
- (make-instance 'feed-index |
|
114 |
- :pull-time pull-time |
|
115 |
- :feed-urls feeds |
|
116 |
- :references (mapcar (destructuring-lambda (url (title path)) |
|
117 |
- (make-feed-reference url :title title :path path)) |
|
118 |
- feeds |
|
119 |
- paths))) |
|
120 |
- |
|
121 |
-(defmethod yason:encode-slots progn ((object feed-index)) |
|
122 |
- (with-accessors ((pull-time pull-time) (feeds feed-urls) (references references)) object |
|
123 |
- (yason:encode-object-elements |
|
124 |
- "pull-time" (local-time:format-timestring nil pull-time) |
|
125 |
- "feed-urls" feeds) |
|
126 |
- (yason:with-object-element ("feeds") |
|
127 |
- (yason:with-array () |
|
128 |
- (mapcar 'yason:encode-object references))))) |
|
116 |
+(defun pull-and-store-feeds (feeds pull-directory) |
|
117 |
+ (mapcar (lambda (feed-url) |
|
118 |
+ (with-simple-restart (skip-feed "Skip ~a" feed-url) |
|
119 |
+ (let ((feed (with-retry ("Pull feed again.") |
|
120 |
+ (log-pull t feed-url)))) |
|
121 |
+ (store (coerce-feed-link feed-url feed) |
|
122 |
+ pull-directory)))) |
|
123 |
+ feeds)) |
|
129 | 124 |
|
130 | 125 |
(defun feed-index (index-stream pull-time paths) |
131 | 126 |
(yason:with-output (index-stream :indent t) |
... | ... |
@@ -135,6 +130,13 @@ |
135 | 130 |
(list title (uiop:enough-pathname path *feed-base*))) |
136 | 131 |
paths))))) |
137 | 132 |
|
133 |
+(defun archive-feeds () |
|
134 |
+ (let* ((pull-time (local-time:now)) |
|
135 |
+ (pull-directory (get-store-directory-name pull-time)) |
|
136 |
+ (paths (pull-and-store-feeds *feeds* pull-directory)) |
|
137 |
+ (index-path (merge-pathnames "index.json" pull-directory))) |
|
138 |
+ (with-open-file (index index-path :direction :output) |
|
139 |
+ (feed-index index pull-time paths)))) |
|
138 | 140 |
|
139 | 141 |
;; This is an ungodly mess, we need to avoid funneling everything through fix-pathname-or-skip |
140 | 142 |
(defun command-line-main (&optional (feed-list-initializer #'init-feeds)) |
... | ... |
@@ -145,7 +147,7 @@ |
145 | 147 |
(funcall restart)) |
146 | 148 |
(fix-pathname-or-skip (c &key (restart 'skip-feed) (wrapped-condition nil wc-p)) |
147 | 149 |
(typecase (or wrapped-condition c) |
148 |
- (alimenta:feed-type-unsupported (feed-type-unsupported)) |
|
150 |
+ (alimenta:feed-type-unsupported (feed-type-unsupported c)) |
|
149 | 151 |
(otherwise |
150 | 152 |
(if (find-restart 'fix-pathname) |
151 | 153 |
(fix-pathname) |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
(:use :cl :alexandria :serapeum :fw.lu) |
3 | 3 |
(:shadow :->) |
4 | 4 |
(:export :fix-pathname :sha256-string :get-id :older-than-a-week :-> :get-feed-store-name |
5 |
- :store :get-item-store-name :restart-once)) |
|
5 |
+ :store :get-item-store-name :restart-once :coerce-feed-link :with-retry)) |
|
6 | 6 |
|
7 | 7 |
(in-package :alimenta.feed-archive.tools) |
8 | 8 |
|
... | ... |
@@ -71,3 +71,14 @@ next time, it re-raises the exception." |
71 | 71 |
(setf ,restarted t) |
72 | 72 |
(go ,start)))))))) |
73 | 73 |
|
74 |
+ |
|
75 |
+(defun coerce-feed-link (link feed) |
|
76 |
+ (prog1 feed |
|
77 |
+ (setf (alimenta:feed-link feed) link))) |
|
78 |
+ |
|
79 |
+(defmacro with-retry ((&optional (message "retry the operation")) &body body) |
|
80 |
+ `(loop |
|
81 |
+ (restart-case (return (progn ,@body)) |
|
82 |
+ (retry () |
|
83 |
+ :report (lambda (s) |
|
84 |
+ (format s "~@<~a~@:>" ,message)))))) |