Browse code
Sanitize the RSS feed content a bit.
fiddlerwoaroof authored on 16/08/2017 07:28:38
Showing 6 changed files
Showing 6 changed files
- Makefile
- alimenta-feed-archive.asd
- archive-root/run.js
- encoders.lisp
- feed-archive.lisp
- yason-encoders.lisp
... | ... |
@@ -30,17 +30,17 @@ root = new Vue({ |
30 | 30 |
|
31 | 31 |
feed_item_counts: {}, |
32 | 32 |
|
33 |
- likes: [], |
|
34 |
- show_likes: true, |
|
33 |
+ likes: [], |
|
34 |
+ show_likes: true, |
|
35 | 35 |
}, |
36 | 36 |
|
37 | 37 |
computed: { |
38 |
- feed_visible() { |
|
39 |
- return (!this.show_likes) && |
|
40 |
- this.current_feed.metadata.title !== null && |
|
41 |
- this.current_feed.items.length > 0; |
|
42 |
- }, |
|
43 |
- |
|
38 |
+ feed_visible() { |
|
39 |
+ return (!this.show_likes) && |
|
40 |
+ this.current_feed.metadata.title !== null && |
|
41 |
+ this.current_feed.items.length > 0; |
|
42 |
+ }, |
|
43 |
+ |
|
44 | 44 |
item_content() { |
45 | 45 |
let result = null; |
46 | 46 |
if (this.current_item.content !== null) { |
... | ... |
@@ -54,18 +54,18 @@ root = new Vue({ |
54 | 54 |
}, |
55 | 55 |
|
56 | 56 |
methods: { |
57 |
- list_likes() { |
|
58 |
- this.likes = [] |
|
59 |
- oboe('events.json') |
|
60 |
- .node('*', (ev) => { |
|
61 |
- let {event} = ev; |
|
62 |
- if (event === 'like-item') { |
|
63 |
- this.likes.push(ev); |
|
64 |
- } |
|
65 |
- }); |
|
66 |
- this.show_likes = !this.show_likes; |
|
67 |
- }, |
|
68 |
- |
|
57 |
+ list_likes() { |
|
58 |
+ this.likes = [] |
|
59 |
+ oboe('events.json') |
|
60 |
+ .node('*', (ev) => { |
|
61 |
+ let {event} = ev; |
|
62 |
+ if (event === 'like-item') { |
|
63 |
+ this.likes.push(ev); |
|
64 |
+ } |
|
65 |
+ }); |
|
66 |
+ this.show_likes = !this.show_likes; |
|
67 |
+ }, |
|
68 |
+ |
|
69 | 69 |
toggleCollapse() { |
70 | 70 |
this.collapsed = !this.collapsed; |
71 | 71 |
}, |
... | ... |
@@ -79,7 +79,7 @@ root = new Vue({ |
79 | 79 |
|
80 | 80 |
get_remote_feed: function (path) { |
81 | 81 |
var promise = new Promise((resolve, reject) => { |
82 |
- window.fetch(path+'index.json').then((resp) => resp.json()) |
|
82 |
+ window.fetch(path).then((resp) => resp.json()) |
|
83 | 83 |
.then((data) => { |
84 | 84 |
var result = Object.assign({}, data); |
85 | 85 |
result.fetch_url = data['fetch-url']; |
... | ... |
@@ -97,43 +97,43 @@ root = new Vue({ |
97 | 97 |
get_feed: function (path) { |
98 | 98 |
this.get_remote_feed(path) |
99 | 99 |
.then((result) => Vue.set(this, 'current_feed', result)) |
100 |
- .then(() => this.show_likes = false); |
|
101 |
- }, |
|
100 |
+ .then(() => this.show_likes = false); |
|
101 |
+ }, |
|
102 | 102 |
|
103 | 103 |
like(item, feed) { |
104 |
- fetch('https://<URL>/hub/feed_archive', { |
|
105 |
- method: 'POST', |
|
106 |
- body: JSON.stringify({ |
|
107 |
- 'event': 'like-item', |
|
108 |
- 'item': item.link, |
|
109 |
- 'title': item.title, |
|
110 |
- 'author': item.author, |
|
111 |
- 'feed-title': feed.metadata.title, |
|
112 |
- 'feed-link': feed.metadata.link, |
|
113 |
- }), |
|
114 |
- }); |
|
104 |
+ fetch('https://<URL>/hub/feed_archive', { |
|
105 |
+ method: 'POST', |
|
106 |
+ body: JSON.stringify({ |
|
107 |
+ 'event': 'like-item', |
|
108 |
+ 'item': item.link, |
|
109 |
+ 'title': item.title, |
|
110 |
+ 'author': item.author, |
|
111 |
+ 'feed-title': feed.metadata.title, |
|
112 |
+ 'feed-link': feed.metadata.link, |
|
113 |
+ }), |
|
114 |
+ }); |
|
115 | 115 |
}, |
116 | 116 |
|
117 | 117 |
get_item(path) { |
118 |
- window.fetch(this.current_feed.base_path + path).then((resp) => resp.json()) |
|
119 |
- .then((data) => { |
|
120 |
- fetch('https://<URL>/hub/feed_archive', { |
|
121 |
- method: 'POST', |
|
122 |
- body: JSON.stringify({ |
|
123 |
- 'event': 'read-item', |
|
124 |
- 'item': data.link, |
|
125 |
- 'title': data.title, |
|
126 |
- 'author': data.author, |
|
127 |
- 'feed-title': this.current_feed.metadata.title, |
|
128 |
- 'feed-link': this.current_feed.metadata.link, |
|
129 |
- }), |
|
130 |
- }); |
|
131 |
- window.history.pushState({ |
|
132 |
- 'current_feed': root.current_feed, |
|
133 |
- 'current_item': data |
|
134 |
- }, "", window.location.pathname); |
|
135 |
- Object.assign(this.current_item, data); |
|
136 |
- }); |
|
118 |
+ window.fetch(this.current_feed.base_path + path).then((resp) => resp.json()) |
|
119 |
+ .then((data) => { |
|
120 |
+ fetch('https://<URL>/hub/feed_archive', { |
|
121 |
+ method: 'POST', |
|
122 |
+ body: JSON.stringify({ |
|
123 |
+ 'event': 'read-item', |
|
124 |
+ 'item': data.link, |
|
125 |
+ 'title': data.title, |
|
126 |
+ 'author': data.author, |
|
127 |
+ 'feed-title': this.current_feed.metadata.title, |
|
128 |
+ 'feed-link': this.current_feed.metadata.link, |
|
129 |
+ }), |
|
130 |
+ }); |
|
131 |
+ window.history.pushState({ |
|
132 |
+ 'current_feed': root.current_feed, |
|
133 |
+ 'current_item': data |
|
134 |
+ }, "", window.location.pathname); |
|
135 |
+ Object.assign(this.current_item, data); |
|
136 |
+ }); |
|
137 | 137 |
}, |
138 | 138 |
|
139 | 139 |
has_items(feed) { |
... | ... |
@@ -144,14 +144,14 @@ root = new Vue({ |
144 | 144 |
}, |
145 | 145 |
|
146 | 146 |
ready() { |
147 |
- oboe('events.json') |
|
148 |
- .node('*', (ev) => { |
|
149 |
- let {event} = ev; |
|
150 |
- if (event === 'like-item') { |
|
151 |
- this.likes.push(ev); |
|
152 |
- } |
|
153 |
- }); |
|
154 |
- |
|
147 |
+ oboe('events.json') |
|
148 |
+ .node('*', (ev) => { |
|
149 |
+ let {event} = ev; |
|
150 |
+ if (event === 'like-item') { |
|
151 |
+ this.likes.push(ev); |
|
152 |
+ } |
|
153 |
+ }); |
|
154 |
+ |
|
155 | 155 |
window.fetch(baseUrl+'/index.json').then((resp) => resp.json()) |
156 | 156 |
.then(function (data) { |
157 | 157 |
root.pull_time = data['pull-time']; |
... | ... |
@@ -161,7 +161,7 @@ root = new Vue({ |
161 | 161 |
}) |
162 | 162 |
.then((data) => { |
163 | 163 |
root.feeds.forEach((feed) => { |
164 |
- // console.log(feed); |
|
164 |
+ // console.log(feed); |
|
165 | 165 |
this.get_remote_feed(feed.path) |
166 | 166 |
.then((feed_index) => Vue.set(this.feed_item_counts, feed.path, feed_index.items.length)); |
167 | 167 |
}); |
... | ... |
@@ -111,7 +111,7 @@ |
111 | 111 |
|
112 | 112 |
(defmethod store ((item alimenta:item) (stream stream)) |
113 | 113 |
(yason:with-output (stream :indent t) |
114 |
- (yason:encode-object item)) |
|
114 |
+ (yason:encode-slots item)) |
|
115 | 115 |
(list (alimenta:title item) |
116 | 116 |
stream)) |
117 | 117 |
|
... | ... |
@@ -139,7 +139,6 @@ |
139 | 139 |
(defmethod stream-provider:get-nested-provider ((provider stream-provider:stream-provider) (streamable alimenta:feed)) |
140 | 140 |
(with (items-root (uiop:merge-pathnames* (uiop:pathname-directory-pathname (stream-provider:stream-key provider streamable)) |
141 | 141 |
(stream-provider:root provider))) |
142 |
- (format t "~&items-root: ~a @#%@#$^#$&^&%$~%" items-root) (terpri) |
|
143 | 142 |
(ensure-gethash items-root |
144 | 143 |
(item-providers provider) |
145 | 144 |
(make-instance 'stream-provider:file-provider :root items-root)))) |
... | ... |
@@ -153,6 +152,16 @@ |
153 | 152 |
(stream-provider:with-storage-stream (s item stream-provider) |
154 | 153 |
(store item s))) |
155 | 154 |
|
155 |
+(defmethod store :around ((item alimenta:item) (dest stream-provider:stream-provider)) |
|
156 |
+ (with-simple-restart (skip-item "Skip item ~s" (car item)) |
|
157 |
+ (call-next-method))) |
|
158 |
+ |
|
159 |
+(defun map-coalesce (fun &rest seqs) |
|
160 |
+ (apply #'mappend |
|
161 |
+ (compose #'unsplice |
|
162 |
+ fun) |
|
163 |
+ seqs)) |
|
164 |
+ |
|
156 | 165 |
(defmethod store ((feed alimenta:feed) (stream-provider stream-provider:stream-provider)) |
157 | 166 |
(stream-provider:with-storage-stream (s feed stream-provider) |
158 | 167 |
(with-accessors ((description alimenta:description) |
... | ... |
@@ -162,15 +171,12 @@ |
162 | 171 |
(source-type alimenta:source-type) |
163 | 172 |
(title alimenta:title)) feed |
164 | 173 |
(let* ((item-provider (stream-provider:get-nested-provider stream-provider feed)) |
165 |
- (item-storage-info (map 'list (op (store _ item-provider)) |
|
166 |
- items))) |
|
174 |
+ (item-storage-info (map-coalesce (op (store _ item-provider)) |
|
175 |
+ items))) |
|
167 | 176 |
(yason:with-output (s :indent t) |
168 |
- (yason:with-object () |
|
169 |
- (yason:encode-object-element "metadata" feed) |
|
170 |
- (yason:with-object-element ("items") |
|
171 |
- (yason:with-array () |
|
172 |
- (dolist (item item-storage-info) |
|
173 |
- (destructuring-bind (title path) item |
|
174 |
- (yason:with-object () |
|
175 |
- (yason:encode-object-elements "title" title |
|
176 |
- "path" path)))))))))))) |
|
177 |
+ (with-collection (item "items" item-storage-info "metadata" feed) |
|
178 |
+ (destructuring-bind (title path) item |
|
179 |
+ (yason:with-object () |
|
180 |
+ (yason:encode-object-elements |
|
181 |
+ "title" title |
|
182 |
+ "path" path))))))))) |
... | ... |
@@ -17,6 +17,7 @@ |
17 | 17 |
|
18 | 18 |
(defun test-feed-list () |
19 | 19 |
(values '("http://feeds.feedburner.com/GamasutraFeatureArticles/" |
20 |
+ "http://edwardfeser.blogspot.com/feeds/posts/default" |
|
20 | 21 |
"https://www.codinghorror.com/blog/index.xml" |
21 | 22 |
"https://sancrucensis.wordpress.com/feed/") |
22 | 23 |
#p"/tmp/feed-archive/")) |
... | ... |
@@ -50,8 +51,14 @@ |
50 | 51 |
(continue))))) |
51 | 52 |
(handler-bind ((warning #'muffle-warning) |
52 | 53 |
(error #'pop-50-tokens)) |
53 |
- (prog1 (alimenta.pull-feed:pull-feed feed-url) |
|
54 |
+ (prog1-bind (feed (alimenta.pull-feed:pull-feed feed-url)) |
|
54 | 55 |
;; Why am I decf-ing here? |
56 |
+ (alimenta:transform feed |
|
57 |
+ (fw.lu:glambda (entity) |
|
58 |
+ (:method (entity)) |
|
59 |
+ (:method ((entity alimenta:item)) |
|
60 |
+ (setf (alimenta:content entity) |
|
61 |
+ (html-sanitizer:sanitize (alimenta:content entity)))))) |
|
55 | 62 |
(decf pop-times))))) |
56 | 63 |
|
57 | 64 |
(defmacro with-progress-message ((stream before after &optional (error-msg " ERROR~%~4t~a~%")) &body body) |
... | ... |
@@ -70,10 +77,6 @@ |
70 | 77 |
(with-output-to-file (s output-file :if-exists if-exists) |
71 | 78 |
(plump:serialize (alimenta:doc feed) s))) |
72 | 79 |
|
73 |
-(defun pull-and-store-feeds (feeds pull-directory) |
|
74 |
- (mapcar (op (pull-and-store-feed _ pull-directory)) |
|
75 |
- feeds)) |
|
76 |
- |
|
77 | 80 |
(defun log-pull (feed-puller feed-url stream) |
78 | 81 |
(let ((before-message (concatenate 'string "Trying to pull: " feed-url))) |
79 | 82 |
(with-progress-message (stream before-message "Success") |
... | ... |
@@ -85,44 +88,57 @@ |
85 | 88 |
:key 'alimenta:date)) |
86 | 89 |
|
87 | 90 |
(defun log-serialization (feed-url stream feed path) |
91 |
+ (declare (ignorable feed-url stream feed path)) |
|
88 | 92 |
(with-progress-message (stream "Serializing XML" (format nil "done with ~a" feed-url)) |
89 | 93 |
(save-feed feed (merge-pathnames "feed.xml" path)))) |
90 | 94 |
|
91 | 95 |
(defun feed-relative-pathname (path &optional (feed-base *feed-base*)) |
92 | 96 |
(uiop:enough-pathname path feed-base)) |
93 | 97 |
|
94 |
-(defun pull-and-store-feed (feed-url pull-directory &optional (feed-puller #'safe-pull-feed)) |
|
98 |
+(defun feed-index (index-stream pull-time references) |
|
99 |
+ (yason:with-output (index-stream :indent t) |
|
100 |
+ (yason:encode-object |
|
101 |
+ (make-feed-index pull-time (remove-if 'null references))))) |
|
102 |
+ |
|
103 |
+(defun pull-and-store-feed (feed-url stream-provider &optional (feed-puller #'safe-pull-feed)) |
|
95 | 104 |
(flet ((log-pull (stream) |
96 | 105 |
(declare (inline) (dynamic-extent stream)) |
97 | 106 |
(log-pull feed-puller feed-url stream)) |
98 | 107 |
(log-serialization (stream feed path) |
99 | 108 |
(declare (inline) (dynamic-extent stream)) |
100 |
- (log-serialization feed-url stream feed path))) |
|
109 |
+ (log-serialization feed-url stream feed |
|
110 |
+ (merge-pathnames path |
|
111 |
+ (stream-provider:root stream-provider))))) |
|
101 | 112 |
|
102 | 113 |
(with-simple-restart (skip-feed "Stop processing for ~a" feed-url) |
103 | 114 |
(let* ((feed (with-retry ("Pull feed again.") |
104 | 115 |
(normalize-feed feed-url (log-pull t))))) |
105 |
- (trivia:multiple-value-match (store feed pull-directory) |
|
106 |
- (((list title path) url) |
|
116 |
+ (trivia:match (store feed stream-provider) |
|
117 |
+ ((list title path) |
|
107 | 118 |
(log-serialization t feed path) |
108 |
- (make-feed-reference url :title title :path (feed-relative-pathname path)))))))) |
|
109 |
- |
|
110 |
-(defun feed-index (index-stream pull-time references) |
|
111 |
- (yason:with-output (index-stream :indent t) |
|
112 |
- (yason:encode-object |
|
113 |
- (make-feed-index pull-time (remove-if 'null references))))) |
|
119 |
+ (make-feed-reference (alimenta:feed-link feed) |
|
120 |
+ :title title |
|
121 |
+ :path (feed-relative-pathname |
|
122 |
+ (uiop:pathname-directory-pathname |
|
123 |
+ (merge-pathnames path |
|
124 |
+ (stream-provider:root stream-provider))))))))))) |
|
114 | 125 |
|
115 | 126 |
(defun archive-feeds (pull-time pull-directory index-stream) |
116 |
- (let ((references (pull-and-store-feeds *feeds* pull-directory))) |
|
117 |
- (feed-index index-stream pull-time references) |
|
118 |
- references)) |
|
127 |
+ (prog1-bind (references (mapcar (op (pull-and-store-feed _ pull-directory)) |
|
128 |
+ *feeds*)) |
|
129 |
+ (feed-index index-stream pull-time references))) |
|
119 | 130 |
|
120 | 131 |
(defun archive-feeds-nondeterm () |
121 | 132 |
(let* ((pull-time (local-time:now)) |
122 | 133 |
(pull-directory (get-store-directory-name pull-time)) |
123 |
- (index-path (merge-pathnames "index.json" pull-directory))) |
|
134 |
+ (index-path (merge-pathnames "index.json" pull-directory)) |
|
135 |
+ (feed-stream-provider (make-instance 'alimenta.feed-archive.encoders:feed-stream-provider |
|
136 |
+ :if-exists :error |
|
137 |
+ :root pull-directory))) |
|
124 | 138 |
(with-open-file (index index-path :direction :output) |
125 |
- (archive-feeds pull-time pull-directory index)) |
|
139 |
+ (archive-feeds pull-time |
|
140 |
+ feed-stream-provider |
|
141 |
+ index)) |
|
126 | 142 |
(format t "~&!! pull-directory ~a~%" (uiop:enough-pathname pull-directory *feed-base*)))) |
127 | 143 |
|
128 | 144 |
;; This is an ungodly mess, we need to avoid funneling everything through fix-pathname-or-skip |
... | ... |
@@ -1,23 +1,25 @@ |
1 |
+(in-package :alimenta.feed-archive.encoders) |
|
2 |
+ |
|
1 | 3 |
(defmethod yason:encode ((object pathname) &optional stream) |
2 | 4 |
(yason:encode (princ-to-string (uiop:native-namestring object)) |
3 |
- stream) |
|
5 |
+ stream) |
|
4 | 6 |
object) |
5 | 7 |
|
6 | 8 |
(defmethod yason:encode ((object puri:uri) &optional stream) |
7 | 9 |
(yason:encode (puri:render-uri object nil) |
8 |
- stream) |
|
10 |
+ stream) |
|
9 | 11 |
object) |
10 | 12 |
|
11 | 13 |
(defmethod yason:encode-slots progn ((feed alimenta:feed)) |
12 | 14 |
(with-accessors ((description alimenta:description) |
13 |
- (feed-link alimenta:feed-link) |
|
14 |
- (items alimenta:items) |
|
15 |
- (link alimenta:link) |
|
16 |
- (source-type alimenta:source-type) |
|
17 |
- (title alimenta:title)) feed |
|
15 |
+ (feed-link alimenta:feed-link) |
|
16 |
+ (items alimenta:items) |
|
17 |
+ (link alimenta:link) |
|
18 |
+ (source-type alimenta:source-type) |
|
19 |
+ (title alimenta:title)) feed |
|
18 | 20 |
(yason:encode-object-element "title" title) |
19 | 21 |
(yason:encode-object-element "fetch-url" |
20 |
- (puri:render-uri feed-link nil)) |
|
22 |
+ (puri:render-uri feed-link nil)) |
|
21 | 23 |
(yason:encode-object-element "link" link) |
22 | 24 |
;;(yason:encode-object-element "source-type" source-type) |
23 | 25 |
(yason:encode-object-element "description" description)) |
... | ... |
@@ -30,11 +32,11 @@ |
30 | 32 |
|
31 | 33 |
(defmethod yason:encode-slots progn ((item alimenta:item)) |
32 | 34 |
(with-accessors ((author alimenta:author) |
33 |
- (content alimenta:content) |
|
34 |
- (date alimenta:date) |
|
35 |
- (id alimenta:id) |
|
36 |
- (link alimenta:link) |
|
37 |
- (title alimenta:title)) item |
|
35 |
+ (content alimenta:content) |
|
36 |
+ (date alimenta:date) |
|
37 |
+ (id alimenta:id) |
|
38 |
+ (link alimenta:link) |
|
39 |
+ (title alimenta:title)) item |
|
38 | 40 |
(let* ((date (local-time:format-timestring nil date))) |
39 | 41 |
(yason:with-object () |
40 | 42 |
(yason:encode-object-element "title" title) |
... | ... |
@@ -49,3 +51,20 @@ |
49 | 51 |
(yason:encode-slots item)) |
50 | 52 |
item) |
51 | 53 |
|
54 |
+ |
|
55 |
+(defun encode-collection-object (other-pairs collection-key collection-value element-encoder) |
|
56 |
+ (yason:with-object () |
|
57 |
+ (loop for (key value) on other-pairs by #'cddr |
|
58 |
+ do |
|
59 |
+ (yason:encode-object-element key value)) |
|
60 |
+ (yason:with-object-element (collection-key) |
|
61 |
+ (yason:with-array () |
|
62 |
+ (dolist (item collection-value) |
|
63 |
+ (funcall element-encoder item)))))) |
|
64 |
+ |
|
65 |
+(defmacro with-collection ((item-sym key collection &rest other-pairs) &body encoder) |
|
66 |
+ (once-only (key collection) |
|
67 |
+ `(encode-collection-object (list ,@other-pairs) |
|
68 |
+ ,key ,collection |
|
69 |
+ (lambda (,item-sym) |
|
70 |
+ ,@encoder)))) |