git.fiddlerwoaroof.com
Browse code

Sanitize the RSS feed content a bit.

fiddlerwoaroof authored on 16/08/2017 07:28:38
Showing 6 changed files
... ...
@@ -1,4 +1,4 @@
1
-all: ccl
1
+all: sbcl
2 2
 
3 3
 sbcl:
4 4
 	sbcl --no-userinit --disable-debugger --load deploy.lisp
... ...
@@ -13,7 +13,8 @@
13 13
                #:ubiquitous
14 14
                #:uiop
15 15
                #:vector-update-stream
16
-               #:yason)
16
+               #:yason
17
+               #:html-sanitizer)
17 18
   :serial t
18 19
   :components ((:file "package")
19 20
                (:file "tools")
... ...
@@ -30,17 +30,17 @@ root = new Vue({
30 30
 
31 31
         feed_item_counts: {},
32 32
 
33
-	likes: [],
34
-	show_likes: true,
33
+        likes: [],
34
+        show_likes: true,
35 35
     },
36 36
 
37 37
     computed: {
38
-	feed_visible() {
39
-	    return (!this.show_likes) &&
40
-		this.current_feed.metadata.title !== null &&
41
-		this.current_feed.items.length > 0;
42
-	},
43
-	
38
+        feed_visible() {
39
+            return (!this.show_likes) &&
40
+                this.current_feed.metadata.title !== null &&
41
+                this.current_feed.items.length > 0;
42
+        },
43
+        
44 44
         item_content() {
45 45
             let result = null;
46 46
             if (this.current_item.content !== null) {
... ...
@@ -54,18 +54,18 @@ root = new Vue({
54 54
     },
55 55
 
56 56
     methods: {
57
-	list_likes() {
58
-	    this.likes = []
59
-	    oboe('events.json')
60
-		.node('*', (ev) => {
61
-		    let {event} = ev;
62
-		    if (event === 'like-item') {
63
-			this.likes.push(ev);
64
-		    }
65
-		});
66
-	    this.show_likes = !this.show_likes;
67
-	},
68
-	
57
+        list_likes() {
58
+            this.likes = []
59
+            oboe('events.json')
60
+                .node('*', (ev) => {
61
+                    let {event} = ev;
62
+                    if (event === 'like-item') {
63
+                        this.likes.push(ev);
64
+                    }
65
+                });
66
+            this.show_likes = !this.show_likes;
67
+        },
68
+        
69 69
         toggleCollapse() {
70 70
             this.collapsed = !this.collapsed;
71 71
         },
... ...
@@ -79,7 +79,7 @@ root = new Vue({
79 79
 
80 80
         get_remote_feed: function (path) {
81 81
             var promise = new Promise((resolve, reject) => {
82
-                window.fetch(path+'index.json').then((resp) => resp.json())
82
+                window.fetch(path).then((resp) => resp.json())
83 83
                     .then((data) => {
84 84
                         var result = Object.assign({}, data);
85 85
                         result.fetch_url = data['fetch-url'];
... ...
@@ -97,43 +97,43 @@ root = new Vue({
97 97
         get_feed: function (path) {
98 98
             this.get_remote_feed(path)
99 99
                 .then((result) => Vue.set(this, 'current_feed', result))
100
-		.then(() => this.show_likes = false);
101
-	},
100
+                .then(() => this.show_likes = false);
101
+        },
102 102
 
103 103
         like(item, feed) {
104
-          fetch('https://<URL>/hub/feed_archive', {
105
-            method: 'POST',
106
-            body: JSON.stringify({
107
-              'event': 'like-item',
108
-              'item': item.link,
109
-              'title': item.title,
110
-              'author': item.author,
111
-              'feed-title': feed.metadata.title,
112
-              'feed-link': feed.metadata.link,
113
-            }),
114
-          });
104
+            fetch('https://<URL>/hub/feed_archive', {
105
+                method: 'POST',
106
+                body: JSON.stringify({
107
+                    'event': 'like-item',
108
+                    'item': item.link,
109
+                    'title': item.title,
110
+                    'author': item.author,
111
+                    'feed-title': feed.metadata.title,
112
+                    'feed-link': feed.metadata.link,
113
+                }),
114
+            });
115 115
         },
116 116
 
117 117
         get_item(path) {
118
-          window.fetch(this.current_feed.base_path + path).then((resp) => resp.json())
119
-          .then((data) => {
120
-            fetch('https://<URL>/hub/feed_archive', {
121
-              method: 'POST',
122
-              body: JSON.stringify({
123
-                'event': 'read-item',
124
-                'item': data.link,
125
-                'title': data.title,
126
-                'author': data.author,
127
-                'feed-title': this.current_feed.metadata.title,
128
-                'feed-link': this.current_feed.metadata.link,
129
-              }),
130
-            });
131
-            window.history.pushState({
132
-              'current_feed': root.current_feed,
133
-              'current_item': data
134
-            }, "", window.location.pathname);
135
-            Object.assign(this.current_item, data);
136
-          });
118
+            window.fetch(this.current_feed.base_path + path).then((resp) => resp.json())
119
+                .then((data) => {
120
+                    fetch('https://<URL>/hub/feed_archive', {
121
+                        method: 'POST',
122
+                        body: JSON.stringify({
123
+                            'event': 'read-item',
124
+                            'item': data.link,
125
+                            'title': data.title,
126
+                            'author': data.author,
127
+                            'feed-title': this.current_feed.metadata.title,
128
+                            'feed-link': this.current_feed.metadata.link,
129
+                        }),
130
+                    });
131
+                    window.history.pushState({
132
+                        'current_feed': root.current_feed,
133
+                        'current_item': data
134
+                    }, "", window.location.pathname);
135
+                    Object.assign(this.current_item, data);
136
+                });
137 137
         },
138 138
 
139 139
         has_items(feed) {
... ...
@@ -144,14 +144,14 @@ root = new Vue({
144 144
     },
145 145
 
146 146
     ready() {
147
-	oboe('events.json')
148
-	    .node('*', (ev) => {
149
-		let {event} = ev;
150
-		if (event === 'like-item') {
151
-		    this.likes.push(ev);
152
-		}
153
-	    });
154
-	
147
+        oboe('events.json')
148
+            .node('*', (ev) => {
149
+                let {event} = ev;
150
+                if (event === 'like-item') {
151
+                    this.likes.push(ev);
152
+                }
153
+            });
154
+        
155 155
         window.fetch(baseUrl+'/index.json').then((resp) => resp.json())
156 156
             .then(function (data) {
157 157
                 root.pull_time = data['pull-time'];
... ...
@@ -161,7 +161,7 @@ root = new Vue({
161 161
             })
162 162
             .then((data) => {
163 163
                 root.feeds.forEach((feed) => {
164
-		    // console.log(feed);
164
+                    // console.log(feed);
165 165
                     this.get_remote_feed(feed.path)
166 166
                         .then((feed_index) => Vue.set(this.feed_item_counts, feed.path, feed_index.items.length));
167 167
                 });
... ...
@@ -111,7 +111,7 @@
111 111
 
112 112
 (defmethod store ((item alimenta:item) (stream stream))
113 113
   (yason:with-output (stream :indent t)
114
-    (yason:encode-object item))
114
+    (yason:encode-slots item))
115 115
   (list (alimenta:title item)
116 116
         stream))
117 117
 
... ...
@@ -139,7 +139,6 @@
139 139
 (defmethod stream-provider:get-nested-provider ((provider stream-provider:stream-provider) (streamable alimenta:feed))
140 140
   (with (items-root (uiop:merge-pathnames* (uiop:pathname-directory-pathname (stream-provider:stream-key provider streamable))
141 141
                                            (stream-provider:root provider)))
142
-    (format t "~&items-root: ~a   @#%@#$^#$&^&%$~%" items-root) (terpri)
143 142
     (ensure-gethash items-root
144 143
                     (item-providers provider)
145 144
                     (make-instance 'stream-provider:file-provider :root items-root))))
... ...
@@ -153,6 +152,16 @@
153 152
   (stream-provider:with-storage-stream (s item stream-provider)
154 153
     (store item s)))
155 154
 
155
+(defmethod store :around ((item alimenta:item) (dest stream-provider:stream-provider))
156
+  (with-simple-restart (skip-item "Skip item ~s" (car item))
157
+    (call-next-method)))
158
+
159
+(defun map-coalesce (fun &rest seqs)
160
+  (apply #'mappend
161
+         (compose #'unsplice
162
+                  fun)
163
+         seqs))
164
+
156 165
 (defmethod store ((feed alimenta:feed) (stream-provider stream-provider:stream-provider))
157 166
   (stream-provider:with-storage-stream (s feed stream-provider)
158 167
     (with-accessors ((description alimenta:description)
... ...
@@ -162,15 +171,12 @@
162 171
                      (source-type alimenta:source-type)
163 172
                      (title alimenta:title)) feed
164 173
       (let* ((item-provider (stream-provider:get-nested-provider stream-provider feed))
165
-             (item-storage-info (map 'list (op (store _ item-provider))
166
-                                                        items)))
174
+             (item-storage-info (map-coalesce (op (store _ item-provider))
175
+                                              items)))
167 176
         (yason:with-output (s :indent t)
168
-          (yason:with-object ()
169
-            (yason:encode-object-element "metadata" feed)
170
-            (yason:with-object-element ("items")
171
-              (yason:with-array ()
172
-                (dolist (item item-storage-info)
173
-                  (destructuring-bind (title path) item
174
-                    (yason:with-object ()
175
-                      (yason:encode-object-elements "title" title
176
-                                                    "path" path))))))))))))
177
+          (with-collection (item "items" item-storage-info "metadata" feed)
178
+            (destructuring-bind (title path) item
179
+              (yason:with-object ()
180
+                (yason:encode-object-elements
181
+                 "title" title
182
+                 "path" path)))))))))
... ...
@@ -17,6 +17,7 @@
17 17
 
18 18
 (defun test-feed-list ()
19 19
   (values '("http://feeds.feedburner.com/GamasutraFeatureArticles/"
20
+            "http://edwardfeser.blogspot.com/feeds/posts/default"
20 21
             "https://www.codinghorror.com/blog/index.xml"
21 22
             "https://sancrucensis.wordpress.com/feed/")
22 23
           #p"/tmp/feed-archive/"))
... ...
@@ -50,8 +51,14 @@
50 51
                  (continue)))))
51 52
     (handler-bind ((warning #'muffle-warning)
52 53
                    (error #'pop-50-tokens))
53
-      (prog1 (alimenta.pull-feed:pull-feed feed-url)
54
+      (prog1-bind (feed (alimenta.pull-feed:pull-feed feed-url))
54 55
         ;; Why am I decf-ing here?
56
+        (alimenta:transform feed
57
+                            (fw.lu:glambda (entity)
58
+                              (:method (entity))
59
+                              (:method ((entity alimenta:item))
60
+                                (setf (alimenta:content entity)
61
+                                      (html-sanitizer:sanitize (alimenta:content entity))))))
55 62
         (decf pop-times)))))
56 63
 
57 64
 (defmacro with-progress-message ((stream before after &optional (error-msg " ERROR~%~4t~a~%")) &body body)
... ...
@@ -70,10 +77,6 @@
70 77
   (with-output-to-file (s output-file :if-exists if-exists)
71 78
     (plump:serialize (alimenta:doc feed) s)))
72 79
 
73
-(defun pull-and-store-feeds (feeds pull-directory)
74
-  (mapcar (op (pull-and-store-feed _ pull-directory))
75
-          feeds))
76
-
77 80
 (defun log-pull (feed-puller feed-url stream)
78 81
   (let ((before-message (concatenate 'string "Trying to pull: " feed-url)))
79 82
     (with-progress-message (stream before-message "Success")
... ...
@@ -85,44 +88,57 @@
85 88
                         :key 'alimenta:date))
86 89
 
87 90
 (defun log-serialization (feed-url stream feed path)
91
+  (declare (ignorable feed-url stream feed path))
88 92
   (with-progress-message (stream "Serializing XML" (format nil "done with ~a" feed-url))
89 93
     (save-feed feed (merge-pathnames "feed.xml" path))))
90 94
 
91 95
 (defun feed-relative-pathname (path &optional (feed-base *feed-base*))
92 96
   (uiop:enough-pathname path feed-base))
93 97
 
94
-(defun pull-and-store-feed (feed-url pull-directory &optional (feed-puller #'safe-pull-feed))
98
+(defun feed-index (index-stream pull-time references)
99
+  (yason:with-output (index-stream :indent t)
100
+    (yason:encode-object
101
+     (make-feed-index pull-time (remove-if 'null references)))))
102
+
103
+(defun pull-and-store-feed (feed-url stream-provider &optional (feed-puller #'safe-pull-feed))
95 104
   (flet ((log-pull (stream)
96 105
            (declare (inline) (dynamic-extent stream))
97 106
            (log-pull feed-puller feed-url stream))
98 107
          (log-serialization (stream feed path)
99 108
            (declare (inline) (dynamic-extent stream))
100
-           (log-serialization feed-url stream feed path)))
109
+           (log-serialization feed-url stream feed
110
+                              (merge-pathnames path
111
+                                               (stream-provider:root stream-provider)))))
101 112
 
102 113
     (with-simple-restart (skip-feed "Stop processing for ~a" feed-url)
103 114
       (let* ((feed (with-retry ("Pull feed again.")
104 115
                      (normalize-feed feed-url (log-pull t)))))
105
-        (trivia:multiple-value-match (store feed pull-directory)
106
-          (((list title path) url)
116
+        (trivia:match (store feed stream-provider)
117
+          ((list title path)
107 118
            (log-serialization t feed path)
108
-           (make-feed-reference url :title title :path (feed-relative-pathname path))))))))
109
-
110
-(defun feed-index (index-stream pull-time references)
111
-  (yason:with-output (index-stream :indent t)
112
-    (yason:encode-object
113
-     (make-feed-index pull-time (remove-if 'null references)))))
119
+           (make-feed-reference (alimenta:feed-link feed)
120
+                                :title title
121
+                                :path (feed-relative-pathname
122
+                                       (uiop:pathname-directory-pathname
123
+                                        (merge-pathnames path
124
+                                                         (stream-provider:root stream-provider)))))))))))
114 125
 
115 126
 (defun archive-feeds (pull-time pull-directory index-stream)
116
-  (let ((references (pull-and-store-feeds *feeds* pull-directory)))
117
-    (feed-index index-stream pull-time references)
118
-    references))
127
+  (prog1-bind (references (mapcar (op (pull-and-store-feed _ pull-directory))
128
+                                  *feeds*))
129
+    (feed-index index-stream pull-time references)))
119 130
 
120 131
 (defun archive-feeds-nondeterm ()
121 132
   (let* ((pull-time (local-time:now))
122 133
 	 (pull-directory (get-store-directory-name pull-time)) 
123
-	 (index-path (merge-pathnames "index.json" pull-directory)))
134
+	 (index-path (merge-pathnames "index.json" pull-directory))
135
+         (feed-stream-provider (make-instance 'alimenta.feed-archive.encoders:feed-stream-provider
136
+                                              :if-exists :error
137
+                                              :root pull-directory)))
124 138
     (with-open-file (index index-path :direction :output)
125
-      (archive-feeds pull-time pull-directory index))
139
+      (archive-feeds pull-time
140
+                     feed-stream-provider
141
+                     index))
126 142
     (format t "~&!! pull-directory ~a~%" (uiop:enough-pathname pull-directory *feed-base*))))
127 143
 
128 144
 ;; This is an ungodly mess, we need to avoid funneling everything through fix-pathname-or-skip
... ...
@@ -1,23 +1,25 @@
1
+(in-package :alimenta.feed-archive.encoders)
2
+
1 3
 (defmethod yason:encode ((object pathname) &optional stream)
2 4
   (yason:encode (princ-to-string (uiop:native-namestring object))
3
-		       stream)
5
+                       stream)
4 6
   object)
5 7
 
6 8
 (defmethod yason:encode ((object puri:uri) &optional stream)
7 9
   (yason:encode (puri:render-uri object nil)
8
-		stream)
10
+                stream)
9 11
   object)
10 12
 
11 13
 (defmethod yason:encode-slots progn ((feed alimenta:feed))
12 14
   (with-accessors ((description alimenta:description)
13
-		   (feed-link alimenta:feed-link)
14
-		   (items alimenta:items)
15
-		   (link alimenta:link)
16
-		   (source-type alimenta:source-type)
17
-		   (title alimenta:title)) feed
15
+                   (feed-link alimenta:feed-link)
16
+                   (items alimenta:items)
17
+                   (link alimenta:link)
18
+                   (source-type alimenta:source-type)
19
+                   (title alimenta:title)) feed
18 20
     (yason:encode-object-element "title" title)
19 21
     (yason:encode-object-element "fetch-url"
20
-				 (puri:render-uri feed-link nil))
22
+                                 (puri:render-uri feed-link nil))
21 23
     (yason:encode-object-element "link" link)
22 24
     ;;(yason:encode-object-element "source-type" source-type)
23 25
     (yason:encode-object-element "description" description))
... ...
@@ -30,11 +32,11 @@
30 32
 
31 33
 (defmethod yason:encode-slots progn ((item alimenta:item))
32 34
   (with-accessors ((author alimenta:author)
33
-		   (content alimenta:content)
34
-		   (date alimenta:date)
35
-		   (id alimenta:id)
36
-		   (link alimenta:link)
37
-		   (title alimenta:title)) item
35
+                   (content alimenta:content)
36
+                   (date alimenta:date)
37
+                   (id alimenta:id)
38
+                   (link alimenta:link)
39
+                   (title alimenta:title)) item
38 40
     (let* ((date (local-time:format-timestring nil date)))
39 41
       (yason:with-object ()
40 42
 	(yason:encode-object-element "title" title)
... ...
@@ -49,3 +51,20 @@
49 51
     (yason:encode-slots item))
50 52
   item)
51 53
 
54
+
55
+(defun encode-collection-object (other-pairs collection-key collection-value element-encoder)
56
+  (yason:with-object ()
57
+    (loop for (key value) on other-pairs by #'cddr
58
+       do
59
+         (yason:encode-object-element key value))
60
+    (yason:with-object-element (collection-key)
61
+      (yason:with-array ()
62
+        (dolist (item collection-value)
63
+          (funcall element-encoder item))))))
64
+
65
+(defmacro with-collection ((item-sym key collection &rest other-pairs) &body encoder)
66
+  (once-only (key collection)
67
+    `(encode-collection-object (list ,@other-pairs)
68
+                               ,key ,collection
69
+                               (lambda (,item-sym)
70
+                                 ,@encoder))))