git.fiddlerwoaroof.com
Browse code

Refactor feed-archive and ignore junk

fiddlerwoaroof authored on 10/03/2017 20:27:29
Showing 3 changed files
... ...
@@ -2,3 +2,4 @@
2 2
 .[#]*
3 3
 [#]*#
4 4
 feed-archiver
5
+*.fasl
... ...
@@ -5,12 +5,55 @@
5 5
 
6 6
 (in-package :alimenta.feed-archive)
7 7
 
8
+
8 9
 (defvar *feeds*)
9 10
 (defvar *feed-base*)
10 11
 
11 12
 (defparameter +dirname-format+
12 13
   '((:year 4) #\- (:month 2) #\- (:day 2) #\/ (:hour 2) #\- (:min 2) #\/))
13 14
 
15
+(defclass feed-index ()
16
+  ((%pull-time :initarg :pull-time :reader pull-time)
17
+   ;; Why this slot? Won't the references duplicate this?
18
+   (%feed-urls :initarg :feed-urls :reader feed-urls)
19
+   (%feed-references :initarg :references :reader references)))
20
+
21
+(defclass feed-reference ()
22
+  ((%url :initarg :url :reader url)
23
+   (%title :initarg :title :reader title :initform nil)
24
+   (%path :initarg :path :reader path :initform nil)))
25
+
26
+(defun make-feed-index (pull-time feeds paths)
27
+  (make-instance 'feed-index
28
+		 :pull-time pull-time
29
+		 :feed-urls feeds
30
+		 :references (mapcar (destructuring-lambda (url (title path))
31
+				       (make-feed-reference url :title title :path path))
32
+				     feeds
33
+				     paths)))
34
+
35
+(defun make-feed-reference (url &rest feed-data)
36
+  (apply #'make-instance 'feed-reference
37
+	 :url url
38
+	 feed-data))
39
+
40
+(defmethod yason:encode-slots progn ((object feed-reference))
41
+  (let ((title (title object))
42
+	(path (path object)))
43
+    (yason:encode-object-element "url" (url object))
44
+    (when title
45
+      (yason:encode-object-element "title" title))
46
+    (when path
47
+      (yason:encode-object-element "path" path))))
48
+
49
+(defmethod yason:encode-slots progn ((object feed-index))
50
+  (with-accessors ((pull-time pull-time) (feeds feed-urls) (references references)) object
51
+    (yason:encode-object-elements "pull-time" (local-time:format-timestring nil pull-time)
52
+				  "feed-urls" feeds)
53
+    (yason:with-object-element ("feeds")
54
+      (yason:with-array ()
55
+	(mapcar 'yason:encode-object references)))))
56
+
14 57
 (defun get-store-directory-name (timestamp)
15 58
   (flet ((make-dirname (timestamp)
16 59
 	   (-> (local-time:format-timestring nil (local-time:timestamp-minimize-part timestamp :sec)
... ...
@@ -55,77 +98,29 @@
55 98
 		 (continue)))))
56 99
     (handler-bind ((warning #'muffle-warning)
57 100
 		   (error #'pop-50-tokens))
58
-      (format t "~&Trying to pull: ~a... " feed-url)
59 101
       (prog1 (alimenta.pull-feed:pull-feed feed-url)
60 102
 	;; Why am I decf-ing here?
61
-	(format t "... Success~%")
62 103
 	(decf pop-times)))))
63 104
 
105
+(defun log-pull (stream feed-url)
106
+  (format stream "~&Trying to pull: ~a... " feed-url)
107
+  (handler-bind ((error (lambda (c) (format stream "... Error ~a~%" c))))
108
+      (prog1 (safe-pull-feed feed-url)
109
+		  (format stream "... Success~%"))))
110
+
64 111
 (defun skip-feed ()
65 112
   (when-let ((restart (find-restart 'skip-feed)))
66 113
     (invoke-restart restart)))
67 114
 
68
-(defun pull-and-store-feeds (feeds pull-directory)
69
-  (loop for feed-url in feeds
70
-     collect
71
-       (with-simple-restart (skip-feed "Skip ~a" feed-url)
72
-	 (let ((feed (safe-pull-feed feed-url)))
73
-	   (setf (alimenta:feed-link feed)
74
-		 feed-url)
75
-	   (store feed pull-directory)))))
76 115
 
77
-(defun archive-feeds ()
78
-  (let* ((pull-time (local-time:now))
79
-	 (pull-directory (get-store-directory-name pull-time)) 
80
-	 (paths (pull-and-store-feeds *feeds* pull-directory)))
81
-    (with-open-file (index (merge-pathnames "index.json" pull-directory) :direction :output)
82
-      (feed-index index pull-time paths))))
83
-
84
-(defclass feed-reference ()
85
-  ((%url :initarg :url :reader url)
86
-   (%title :initarg :title :reader title :initform nil)
87
-   (%path :initarg :path :reader path :initform nil)))
88
-
89
-(defun make-feed-reference (url &rest feed-data)
90
-  (apply #'make-instance 'feed-reference
91
-	 :url url
92
-	 feed-data))
93
-
94
-(defmethod yason:encode-slots progn ((object feed-reference))
95
-  (let ((title (title object))
96
-	(path (path object)))
97
-    (yason:encode-object-element "url" (url object))
98
-    (when title
99
-      (yason:encode-object-element "title" title))
100
-    (when path
101
-      (yason:encode-object-element "path" path))))
102
-
103
-(defun interleave (list1 list2)
104
-  (mapcan #'list list1 list2))
105
-
106
-(defclass feed-index ()
107
-  ((%pull-time :initarg :pull-time :reader pull-time)
108
-   ;; Why this slot? Won't the references duplicate this?
109
-   (%feed-urls :initarg :feed-urls :reader feed-urls)
110
-   (%feed-references :initarg :references :reader references)))
111
-
112
-(defun make-feed-index (pull-time feeds paths)
113
-  (make-instance 'feed-index
114
-		 :pull-time pull-time
115
-		 :feed-urls feeds
116
-		 :references (mapcar (destructuring-lambda (url (title path))
117
-				       (make-feed-reference url :title title :path path))
118
-				     feeds
119
-				     paths)))
120
-
121
-(defmethod yason:encode-slots progn ((object feed-index))
122
-  (with-accessors ((pull-time pull-time) (feeds feed-urls) (references references)) object
123
-    (yason:encode-object-elements
124
-     "pull-time" (local-time:format-timestring nil pull-time)
125
-     "feed-urls" feeds)
126
-    (yason:with-object-element ("feeds")
127
-      (yason:with-array ()
128
-	(mapcar 'yason:encode-object references)))))
116
+(defun pull-and-store-feeds (feeds pull-directory)
117
+  (mapcar (lambda (feed-url)
118
+	    (with-simple-restart (skip-feed "Skip ~a" feed-url)
119
+	      (let ((feed (with-retry ("Pull feed again.")
120
+			    (log-pull t feed-url))))
121
+		(store (coerce-feed-link feed-url feed)
122
+		       pull-directory))))
123
+	  feeds))
129 124
 
130 125
 (defun feed-index (index-stream pull-time paths)
131 126
   (yason:with-output (index-stream :indent t)
... ...
@@ -135,6 +130,13 @@
135 130
 				(list title (uiop:enough-pathname path *feed-base*)))
136 131
 			      paths)))))
137 132
 
133
+(defun archive-feeds ()
134
+  (let* ((pull-time (local-time:now))
135
+	 (pull-directory (get-store-directory-name pull-time)) 
136
+	 (paths (pull-and-store-feeds *feeds* pull-directory))
137
+	 (index-path (merge-pathnames "index.json" pull-directory)))
138
+    (with-open-file (index index-path :direction :output)
139
+      (feed-index index pull-time paths))))
138 140
 
139 141
 ;; This is an ungodly mess, we need to avoid funneling everything through fix-pathname-or-skip
140 142
 (defun command-line-main (&optional (feed-list-initializer #'init-feeds))
... ...
@@ -145,7 +147,7 @@
145 147
 	     (funcall restart))
146 148
 	   (fix-pathname-or-skip (c &key (restart 'skip-feed) (wrapped-condition nil wc-p))
147 149
 	     (typecase (or wrapped-condition c)
148
-	       (alimenta:feed-type-unsupported (feed-type-unsupported))
150
+	       (alimenta:feed-type-unsupported (feed-type-unsupported c))
149 151
 	       (otherwise
150 152
 		(if (find-restart 'fix-pathname)
151 153
 		    (fix-pathname)
... ...
@@ -2,7 +2,7 @@
2 2
   (:use :cl :alexandria :serapeum :fw.lu)
3 3
   (:shadow :->)
4 4
   (:export :fix-pathname :sha256-string :get-id :older-than-a-week :-> :get-feed-store-name
5
-	   :store :get-item-store-name :restart-once))
5
+	   :store :get-item-store-name :restart-once :coerce-feed-link :with-retry))
6 6
 
7 7
 (in-package :alimenta.feed-archive.tools)
8 8
 
... ...
@@ -71,3 +71,14 @@ next time, it re-raises the exception."
71 71
 		(setf ,restarted t)
72 72
 		(go ,start))))))))
73 73
 
74
+
75
+(defun coerce-feed-link (link feed)
76
+  (prog1 feed
77
+    (setf (alimenta:feed-link feed) link)))
78
+
79
+(defmacro with-retry ((&optional (message "retry the operation")) &body body)
80
+  `(loop
81
+      (restart-case (return (progn ,@body))
82
+	(retry ()
83
+	  :report (lambda (s)
84
+		    (format s "~@<~a~@:>" ,message))))))