git.fiddlerwoaroof.com
Browse code

Initial Functionality

fiddlerwoaroof authored on 23/01/2016 01:30:28
Showing 6 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1 @@
1
+This is the stub README.txt for the "alimenta" project.
0 2
new file mode 100644
... ...
@@ -0,0 +1,20 @@
1
+;;;; alimenta.asd
2
+
3
+(asdf:defsystem #:alimenta
4
+  :description "A little library to discover, fetch, parse and generate RSS feeds"
5
+  :author "Fiddlerwoaroof <fiddlerwoaroof@howit.is>"
6
+  :license "MIT"
7
+  :depends-on (#:plump
8
+               #:lquery
9
+               #:should-test
10
+               #:alexandria
11
+               #:anaphora
12
+               #:drakma)
13
+  :serial t
14
+  :components ((:file "package")
15
+               (:file "alimenta")  
16
+               (:file "fetching")
17
+               (:file "discovery")))
18
+
19
+
20
+;; vim: set ft=lisp:
0 21
new file mode 100644
... ...
@@ -0,0 +1,258 @@
1
+;;;; alimenta.lisp
2
+(declaim (optimize (speed 0) (safety 3) (debug 3)))
3
+
4
+
5
+(in-package #:alimenta)
6
+
7
+;;; "alimenta" goes here. Hacks and glory await!
8
+(defun detect-feed-type (xml-dom)
9
+  (let ((root-node-name (make-keyword (string-upcase
10
+                                        ($ (inline xml-dom) (children)
11
+                                           (map #'tag-name) (node))))))
12
+    (setf type
13
+          (case root-node-name
14
+            ((:feed) :atom)
15
+            (t root-node-name)))))
16
+
17
+
18
+(defgeneric push-item (feed item)
19
+  (:documentation "Adds an item to the feed"))
20
+
21
+(defgeneric make-item (xml-dom type))
22
+
23
+(defgeneric parse-feed (feed))
24
+
25
+(defgeneric %to-feed (doc type &key feed-link))
26
+
27
+(defgeneric get-items (feed doc type)
28
+  (:documentation
29
+    "Get items for a feed. Specialize a symbol on type, to get items from a new sort of feed"))
30
+
31
+(defgeneric %generate-xml (feed feed-type &key partial))
32
+
33
+(defun generate-xml (feed &key (feed-type :rss))
34
+  (%generate-xml feed feed-type))
35
+
36
+(defmethod %generate-xml :around (feed feed-type &key partial)
37
+  (call-next-method feed feed-type :partial partial))
38
+
39
+
40
+(defun to-feed (doc &key type feed-link)
41
+  "Makes an instance of feed from the given document.  Specialize %to-feed with
42
+   an equal-specializer on type with an appropriate symbol to implement a new
43
+   sort of feed."
44
+  (unless type
45
+    (setf type (detect-feed-type doc)))
46
+  (%to-feed doc type :feed-link feed-link))
47
+
48
+(defun get-items (feed xml-dom &key type)
49
+  (with-slots (items) feed
50
+    (loop for item across ($ (inline xml-dom) "channel > item")
51
+          do (push (make-item xml-dom type) items)
52
+          finally (return items)))) 
53
+
54
+(defmethod %to-feed :around (xml-dom doc-type &key feed-link)
55
+  "This wraps the particular methods so that _they_ don't have to implement item fetching.
56
+   NIL passed to the type activates auto-detection"
57
+  (aprog1 (call-next-method xml-dom doc-type :feed-link feed-link)
58
+    (with-slots (doc source-type) it
59
+      (setf doc xml-dom
60
+            source-type feed-link))
61
+    (get-items it xml-dom :type doc-type)))
62
+
63
+(defgeneric feed-to-rss (feed))
64
+(defgeneric feed-to-atom (feed))
65
+(defgeneric feed-to-json (feed))
66
+(defgeneric feed-to-html5 (feed)
67
+  (:documentation
68
+    "take a feed object, produce an html5 output.  Simple format:
69
+     <!DOCTYPE html>
70
+     <html lang=\"en\">
71
+     <head>
72
+       <meta charset=\"UTF-8\">
73
+       <title>Feed Title</title>
74
+     </head>
75
+     <body>
76
+       <main>
77
+         <article id=\"id\">
78
+           <h1>Title</h1>
79
+           <h2>Author</h2>
80
+           <span class=\"date\">Date</span>
81
+           <p>Content</p>
82
+         </article>
83
+       </main>
84
+     </body>
85
+     </html>"))
86
+
87
+(defclass feed ()
88
+  ((title :initarg :title :initform nil)
89
+   (link :initarg :link :initform nil)
90
+   (items :initarg :items :initform nil)
91
+   (description :initarg :description :initform nil)
92
+   (feed-link :initarg :feed-link :initform nil)
93
+   (doc :initarg :doc :initform nil)
94
+   (source-type :initarg :source-type :initform nil)))
95
+
96
+(defun make-feed (&key title link items feed-link description)
97
+  (make-instance 'feed :title title :link link :items items :feed-link feed-link :description description))
98
+
99
+(let ((n 0))
100
+  (defun next-id ()
101
+    (incf n)))
102
+
103
+(defun add-item-to-feed (feed &key title (next-id #'next-id) date link content)
104
+  (alet (make-instance 'item :title title :date date :link link :content content)
105
+    (with-slots (id) it
106
+      (setf id (funcall next-id it)))
107
+    (push-item feed it)
108
+    (values feed it)))
109
+
110
+(defmethod %generate-xml :around ((feed feed) feed-type &rest r)
111
+  (declare (ignore r))
112
+  (let ((result (call-next-method feed feed-type)))
113
+    (with-slots (items) feed
114
+      (loop for item in items
115
+            do (%generate-xml item feed-type :partial result)))
116
+    result))
117
+
118
+(defmethod shorten-link (link)
119
+  (let ((link (cl-ppcre:regex-replace "^https?:" link "")))
120
+    (subseq link 0 (min 30 (length link)))))
121
+
122
+(defmethod print-object ((object feed) stream)
123
+  (print-unreadable-object (object stream :type t :identity t)
124
+    (with-slots (title link) object
125
+      (format stream "title: ~s link: ~s"
126
+              (aif title (shorten-link it) "<untitled>")
127
+              (aif link (shorten-link it) "<no link>")))))
128
+
129
+(defmethod %generate-xml ((feed feed) (feed-type (eql :rss)) &rest r)
130
+  (declare (ignore r))
131
+  (let* ((xml-root (plump:make-root))
132
+         (feed-root (plump:make-element xml-root "rss"))
133
+         (channel (plump-dom:make-element feed-root "channel")))
134
+    ($ (inline feed-root)
135
+       (attr "version" "2.0")
136
+       (attr "xmlns:content" "http://purl.org/rss/1.0/modules/content/")
137
+       (attr "xmlns:atom" "http://www.w3.org/2005/Atom"))
138
+    (with-slots (title link feed-link description) feed
139
+      ($ (inline (make-element channel "title"))
140
+         (text title))
141
+      ($ (inline (make-element channel "link"))
142
+         (text link))
143
+      (awhen description
144
+        ($ (inline (make-element channel "description"))
145
+         (text it)))
146
+      ($ (inline (make-element channel "atom:link"))
147
+         (attr "rel" "self")
148
+         (attr "type" "application/rss+xml")
149
+         (attr "href" link)))
150
+    xml-root))
151
+
152
+(defclass item ()
153
+  ((title :initarg :title :initform nil)
154
+   (id :initarg :id :initform nil)
155
+   (date :initarg :date :initform nil)
156
+   (link :initarg :link :initform nil)
157
+   (content :initarg :content :initform nil)
158
+   (doc :initarg :doc :initform nil)))
159
+
160
+(defmethod %generate-xml ((item item) (feed-type (eql :rss)) &key partial)
161
+  (prog1 partial
162
+    (let ((item-root (make-element ($ (inline partial) "channel" (node)) "item")))
163
+      (with-slots (title id date link content) item
164
+        ($ (inline (make-element item-root "title")) (text title)) 
165
+        ($ (inline (make-element item-root "link")) (text link)) 
166
+        (plump-dom:set-attribute
167
+          ($ (inline (make-element item-root "guid")) (text id) (node))
168
+          "isPermaLink"
169
+          "false") 
170
+        ($ (inline (make-element item-root "pubDate")) (text date)) 
171
+        ($ (inline (make-element item-root "description")) (text content))))))
172
+
173
+(defmethod print-object ((object item) stream)
174
+  (print-unreadable-object (object stream :type t :identity t)
175
+    (with-slots (title link date) object
176
+      (format stream "title: ~s link: ~s date:~s"
177
+              (aif title (shorten-link it) "<untitled>")
178
+              (aif link (shorten-link it) "<no link>")
179
+              (aif date (shorten-link it) "<no date>")))))
180
+
181
+;{{{ RSS feed handling
182
+(defmethod make-item (xml-dom (type (eql :rss)))
183
+  (let* ((item-title ($ "> title" (text) (node)))
184
+         (item-link ($ "> link" (text) (node)))
185
+         (item-date ($ "> pubDate" (text) (node)))
186
+         (item-guid ($ "> guid" (text) (node)))
187
+         (item-description ($ "> description" (text) (node)))
188
+         (item-content-encoded ($ "> content::encoded" (text) (node)))
189
+         (content (with-output-to-string (s)
190
+                    (serialize (parse (or item-content-encoded item-description)) s)))
191
+         (*tag-dispatchers* *html-tags*))
192
+    (make-instance 'item
193
+                   :content content   
194
+                   :date item-date
195
+                   :doc xml-dom
196
+                   :id item-guid
197
+                   :link item-link
198
+                   :title item-title)))
199
+
200
+(defmethod %to-feed (xml-dom (type (eql :rss)) &key feed-link)
201
+  ; TODO: store feed-link
202
+  (lquery:initialize xml-dom)
203
+  (let ((doc-title ($ "channel > title" (text) (node)))
204
+        (doc-link ($ "channel > link" (text) (node)))
205
+        (doc-feed-link (or feed-link
206
+                           ($ "feed > atom::link[rel=self]" (first) (attr "href") (node)))))
207
+    (make-instance 'feed :title doc-title :link doc-link :feed-link doc-feed-link)))
208
+;}}} 
209
+
210
+; {{{ ATOM feed handling
211
+(defmethod make-item (xml-dom (type (eql :atom)))
212
+  (let* ((item-title ($ "> title" (text) (node)))
213
+         (item-link ($ "> link[rel=alternate]" (attr "href") (first) (node)))
214
+         (item-date (or ($ "> updated" (text) (node))
215
+                        ($ "> published" (text) (node)))) ;; Which should be default?
216
+         (item-guid ($ "> id" (text) (node)))
217
+         (item-description ($ "> summary" (text) (node)))
218
+         (item-content ($ "> content" (text) (node)))
219
+         (*tag-dispatchers* *html-tags*) 
220
+         (content (with-output-to-string (s)
221
+                    (serialize (parse (or item-content item-description)) s))))
222
+    (make-instance 'item
223
+                   :content content   
224
+                   :date item-date 
225
+                   :id item-guid 
226
+                   :link item-link 
227
+                   :title item-title)))
228
+
229
+(defmethod %to-feed (xml-dom (type (eql :atom)) &key feed-link)
230
+  (declare (ignore type) (ignorable feed-link))
231
+  ; TODO: store feed-link
232
+  (lquery:initialize xml-dom)
233
+  (let ((doc-title ($ "feed > title" (text) (node)))
234
+        (doc-link ($ "feed > link[rel=alternate]" (first) (attr "href") (node)))
235
+        (doc-feed-link (or feed-link
236
+                           ($ "feed > link[rel=self]" (first) (attr "href") (node)))))
237
+    (make-instance 'feed :title doc-title :link doc-link :feed-link doc-feed-link)))
238
+;}}}
239
+  
240
+(defun rdf-to-feed (xml-dom))
241
+(defun json-to-feed (json-object))
242
+(defun html5-to-feed (html-dom))
243
+
244
+(defmethod push-item ((feed feed) (item item))
245
+  (push item (slot-value feed 'items)))
246
+
247
+(deftest push-item ()
248
+  (let ((feed (make-instance 'feed))
249
+        (item (make-instance 'item)))
250
+    (with-slots (items) feed
251
+      ;(should signal error (push-item feed 2))
252
+      (should be eql item
253
+              (progn
254
+                (push-item feed item)
255
+                (car items))))))
256
+
257
+
258
+;; vim: set foldmethod=marker:
0 259
new file mode 100644
... ...
@@ -0,0 +1,16 @@
1
+(in-package :alimenta.discover)
2
+
3
+(define-lquery-function get-alternates (node)
4
+  ($ (inline node) "link[rel=alternate]"
5
+     (combine (attr "type") (attr "href"))))
6
+
7
+(defun transform-nodes (feed-info-nodes)
8
+  (map 'list (lambda (x) (cons (make-keyword (string-upcase (car x))) (cadr x)))
9
+       feed-info-nodes))
10
+
11
+(defgeneric discover-feed (doc))
12
+(defmethod discover-feed ((doc string))
13
+       (transform-nodes ($ (initialize doc) (get-alternates) (node))))
14
+(defmethod discover-feed ((doc plump:node))
15
+       (transform-nodes ($ (inline doc) (get-alternates) (node))))
16
+
0 17
new file mode 100644
... ...
@@ -0,0 +1,40 @@
1
+(declaim (optimize (speed 0) (safety 3) (debug 3)))
2
+(in-package :alimenta.pull-feed)
3
+
4
+(defun fetch-doc-from-url (url)
5
+  (let ((plump:*tag-dispatchers* plump:*xml-tags*)
6
+        (drakma:*text-content-types* (concatenate 'list
7
+                                                  '(("application" . "atom+xml") ("application" . "rss+xml"))
8
+                                                  drakma:*text-content-types*)))
9
+    (plump:parse (drakma:http-request url))))
10
+
11
+(define-condition fetch-error () ())
12
+(define-condition feed-ambiguous (fetch-error) ((choices :initarg :choices :initform nil)))
13
+(define-condition no-feed (fetch-error) ((url :initarg :url :initform nil)))
14
+
15
+(defun feed-ambiguous (choices)
16
+  (error 'feed-ambiguous
17
+         :choices choices))
18
+
19
+(defun no-feed (url)
20
+  (cerror "Skip this feed" 'no-feed :url url))
21
+
22
+(defun fetch-feed-from-url (url &key type)
23
+  (let* ((feeds (alimenta.discover:discover-feed (drakma:http-request url)))
24
+         (feeds (if type (remove-if-not (lambda (x) (eql type (car x))) feeds) feeds)))
25
+    (format t "~a << type" type)
26
+    (if (not feeds) (no-feed url)
27
+      (fetch-doc-from-url
28
+        (cdar 
29
+          (restart-case
30
+            (if (cdr feeds) (feed-ambiguous feeds) feeds)
31
+            (take-first-feed nil
32
+                             :report (lambda (s) (format s "Take the first feed"))
33
+                             feeds)
34
+            (take-nth-feed (n)
35
+                           :report (lambda (s) (format s "Take the nth feed"))
36
+                           (list (elt feeds n)))
37
+            (select-feed (selector)
38
+                         :report (lambda (s) (format s "Provide a function to select the right feed"))
39
+                         (find-if selector feeds))))))))
40
+
0 41
new file mode 100644
... ...
@@ -0,0 +1,20 @@
1
+;;;; package.lisp
2
+
3
+(defpackage #:alimenta
4
+  (:use #:cl #:should-test #:lquery #:plump #:alexandria #:anaphora)
5
+  (:export #:to-feed #:generate-xml
6
+           #:feed #:title #:link #:items #:feed-link #:doc #:source-type #:id #:date #:content
7
+           #:item))
8
+
9
+(defpackage #:alimenta.discover
10
+  (:use #:cl #:alimenta #:alexandria #:anaphora #:lquery)
11
+  (:export #:discover-feed))
12
+
13
+(defpackage #:alimenta.pull-feed 
14
+  (:use #:cl #:alimenta #:alexandria #:anaphora #:lquery)
15
+  (:export #:pull-feed)) 
16
+
17
+(defmethod asdf:perform ((o asdf:test-op) (s (eql (asdf:find-system :alimenta))))
18
+  (asdf:load-system :alimenta)
19
+  (st:test :package :alimenta)
20
+  t)