Browse code
Initial Functionality
fiddlerwoaroof authored on 23/01/2016 01:30:28
Showing 6 changed files
Showing 6 changed files
0 | 2 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,20 @@ |
1 |
+;;;; alimenta.asd |
|
2 |
+ |
|
3 |
+(asdf:defsystem #:alimenta |
|
4 |
+ :description "A little library to discover, fetch, parse and generate RSS feeds" |
|
5 |
+ :author "Fiddlerwoaroof <fiddlerwoaroof@howit.is>" |
|
6 |
+ :license "MIT" |
|
7 |
+ :depends-on (#:plump |
|
8 |
+ #:lquery |
|
9 |
+ #:should-test |
|
10 |
+ #:alexandria |
|
11 |
+ #:anaphora |
|
12 |
+ #:drakma) |
|
13 |
+ :serial t |
|
14 |
+ :components ((:file "package") |
|
15 |
+ (:file "alimenta") |
|
16 |
+ (:file "fetching") |
|
17 |
+ (:file "discovery"))) |
|
18 |
+ |
|
19 |
+ |
|
20 |
+;; vim: set ft=lisp: |
0 | 21 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,258 @@ |
1 |
+;;;; alimenta.lisp |
|
2 |
+(declaim (optimize (speed 0) (safety 3) (debug 3))) |
|
3 |
+ |
|
4 |
+ |
|
5 |
+(in-package #:alimenta) |
|
6 |
+ |
|
7 |
+;;; "alimenta" goes here. Hacks and glory await! |
|
8 |
+(defun detect-feed-type (xml-dom) |
|
9 |
+ (let ((root-node-name (make-keyword (string-upcase |
|
10 |
+ ($ (inline xml-dom) (children) |
|
11 |
+ (map #'tag-name) (node)))))) |
|
12 |
+ (setf type |
|
13 |
+ (case root-node-name |
|
14 |
+ ((:feed) :atom) |
|
15 |
+ (t root-node-name))))) |
|
16 |
+ |
|
17 |
+ |
|
18 |
+(defgeneric push-item (feed item) |
|
19 |
+ (:documentation "Adds an item to the feed")) |
|
20 |
+ |
|
21 |
+(defgeneric make-item (xml-dom type)) |
|
22 |
+ |
|
23 |
+(defgeneric parse-feed (feed)) |
|
24 |
+ |
|
25 |
+(defgeneric %to-feed (doc type &key feed-link)) |
|
26 |
+ |
|
27 |
+(defgeneric get-items (feed doc type) |
|
28 |
+ (:documentation |
|
29 |
+ "Get items for a feed. Specialize a symbol on type, to get items from a new sort of feed")) |
|
30 |
+ |
|
31 |
+(defgeneric %generate-xml (feed feed-type &key partial)) |
|
32 |
+ |
|
33 |
+(defun generate-xml (feed &key (feed-type :rss)) |
|
34 |
+ (%generate-xml feed feed-type)) |
|
35 |
+ |
|
36 |
+(defmethod %generate-xml :around (feed feed-type &key partial) |
|
37 |
+ (call-next-method feed feed-type :partial partial)) |
|
38 |
+ |
|
39 |
+ |
|
40 |
+(defun to-feed (doc &key type feed-link) |
|
41 |
+ "Makes an instance of feed from the given document. Specialize %to-feed with |
|
42 |
+ an equal-specializer on type with an appropriate symbol to implement a new |
|
43 |
+ sort of feed." |
|
44 |
+ (unless type |
|
45 |
+ (setf type (detect-feed-type doc))) |
|
46 |
+ (%to-feed doc type :feed-link feed-link)) |
|
47 |
+ |
|
48 |
+(defun get-items (feed xml-dom &key type) |
|
49 |
+ (with-slots (items) feed |
|
50 |
+ (loop for item across ($ (inline xml-dom) "channel > item") |
|
51 |
+ do (push (make-item xml-dom type) items) |
|
52 |
+ finally (return items)))) |
|
53 |
+ |
|
54 |
+(defmethod %to-feed :around (xml-dom doc-type &key feed-link) |
|
55 |
+ "This wraps the particular methods so that _they_ don't have to implement item fetching. |
|
56 |
+ NIL passed to the type activates auto-detection" |
|
57 |
+ (aprog1 (call-next-method xml-dom doc-type :feed-link feed-link) |
|
58 |
+ (with-slots (doc source-type) it |
|
59 |
+ (setf doc xml-dom |
|
60 |
+ source-type feed-link)) |
|
61 |
+ (get-items it xml-dom :type doc-type))) |
|
62 |
+ |
|
63 |
+(defgeneric feed-to-rss (feed)) |
|
64 |
+(defgeneric feed-to-atom (feed)) |
|
65 |
+(defgeneric feed-to-json (feed)) |
|
66 |
+(defgeneric feed-to-html5 (feed) |
|
67 |
+ (:documentation |
|
68 |
+ "take a feed object, produce an html5 output. Simple format: |
|
69 |
+ <!DOCTYPE html> |
|
70 |
+ <html lang=\"en\"> |
|
71 |
+ <head> |
|
72 |
+ <meta charset=\"UTF-8\"> |
|
73 |
+ <title>Feed Title</title> |
|
74 |
+ </head> |
|
75 |
+ <body> |
|
76 |
+ <main> |
|
77 |
+ <article id=\"id\"> |
|
78 |
+ <h1>Title</h1> |
|
79 |
+ <h2>Author</h2> |
|
80 |
+ <span class=\"date\">Date</span> |
|
81 |
+ <p>Content</p> |
|
82 |
+ </article> |
|
83 |
+ </main> |
|
84 |
+ </body> |
|
85 |
+ </html>")) |
|
86 |
+ |
|
87 |
+(defclass feed () |
|
88 |
+ ((title :initarg :title :initform nil) |
|
89 |
+ (link :initarg :link :initform nil) |
|
90 |
+ (items :initarg :items :initform nil) |
|
91 |
+ (description :initarg :description :initform nil) |
|
92 |
+ (feed-link :initarg :feed-link :initform nil) |
|
93 |
+ (doc :initarg :doc :initform nil) |
|
94 |
+ (source-type :initarg :source-type :initform nil))) |
|
95 |
+ |
|
96 |
+(defun make-feed (&key title link items feed-link description) |
|
97 |
+ (make-instance 'feed :title title :link link :items items :feed-link feed-link :description description)) |
|
98 |
+ |
|
99 |
+(let ((n 0)) |
|
100 |
+ (defun next-id () |
|
101 |
+ (incf n))) |
|
102 |
+ |
|
103 |
+(defun add-item-to-feed (feed &key title (next-id #'next-id) date link content) |
|
104 |
+ (alet (make-instance 'item :title title :date date :link link :content content) |
|
105 |
+ (with-slots (id) it |
|
106 |
+ (setf id (funcall next-id it))) |
|
107 |
+ (push-item feed it) |
|
108 |
+ (values feed it))) |
|
109 |
+ |
|
110 |
+(defmethod %generate-xml :around ((feed feed) feed-type &rest r) |
|
111 |
+ (declare (ignore r)) |
|
112 |
+ (let ((result (call-next-method feed feed-type))) |
|
113 |
+ (with-slots (items) feed |
|
114 |
+ (loop for item in items |
|
115 |
+ do (%generate-xml item feed-type :partial result))) |
|
116 |
+ result)) |
|
117 |
+ |
|
118 |
+(defmethod shorten-link (link) |
|
119 |
+ (let ((link (cl-ppcre:regex-replace "^https?:" link ""))) |
|
120 |
+ (subseq link 0 (min 30 (length link))))) |
|
121 |
+ |
|
122 |
+(defmethod print-object ((object feed) stream) |
|
123 |
+ (print-unreadable-object (object stream :type t :identity t) |
|
124 |
+ (with-slots (title link) object |
|
125 |
+ (format stream "title: ~s link: ~s" |
|
126 |
+ (aif title (shorten-link it) "<untitled>") |
|
127 |
+ (aif link (shorten-link it) "<no link>"))))) |
|
128 |
+ |
|
129 |
+(defmethod %generate-xml ((feed feed) (feed-type (eql :rss)) &rest r) |
|
130 |
+ (declare (ignore r)) |
|
131 |
+ (let* ((xml-root (plump:make-root)) |
|
132 |
+ (feed-root (plump:make-element xml-root "rss")) |
|
133 |
+ (channel (plump-dom:make-element feed-root "channel"))) |
|
134 |
+ ($ (inline feed-root) |
|
135 |
+ (attr "version" "2.0") |
|
136 |
+ (attr "xmlns:content" "http://purl.org/rss/1.0/modules/content/") |
|
137 |
+ (attr "xmlns:atom" "http://www.w3.org/2005/Atom")) |
|
138 |
+ (with-slots (title link feed-link description) feed |
|
139 |
+ ($ (inline (make-element channel "title")) |
|
140 |
+ (text title)) |
|
141 |
+ ($ (inline (make-element channel "link")) |
|
142 |
+ (text link)) |
|
143 |
+ (awhen description |
|
144 |
+ ($ (inline (make-element channel "description")) |
|
145 |
+ (text it))) |
|
146 |
+ ($ (inline (make-element channel "atom:link")) |
|
147 |
+ (attr "rel" "self") |
|
148 |
+ (attr "type" "application/rss+xml") |
|
149 |
+ (attr "href" link))) |
|
150 |
+ xml-root)) |
|
151 |
+ |
|
152 |
+(defclass item () |
|
153 |
+ ((title :initarg :title :initform nil) |
|
154 |
+ (id :initarg :id :initform nil) |
|
155 |
+ (date :initarg :date :initform nil) |
|
156 |
+ (link :initarg :link :initform nil) |
|
157 |
+ (content :initarg :content :initform nil) |
|
158 |
+ (doc :initarg :doc :initform nil))) |
|
159 |
+ |
|
160 |
+(defmethod %generate-xml ((item item) (feed-type (eql :rss)) &key partial) |
|
161 |
+ (prog1 partial |
|
162 |
+ (let ((item-root (make-element ($ (inline partial) "channel" (node)) "item"))) |
|
163 |
+ (with-slots (title id date link content) item |
|
164 |
+ ($ (inline (make-element item-root "title")) (text title)) |
|
165 |
+ ($ (inline (make-element item-root "link")) (text link)) |
|
166 |
+ (plump-dom:set-attribute |
|
167 |
+ ($ (inline (make-element item-root "guid")) (text id) (node)) |
|
168 |
+ "isPermaLink" |
|
169 |
+ "false") |
|
170 |
+ ($ (inline (make-element item-root "pubDate")) (text date)) |
|
171 |
+ ($ (inline (make-element item-root "description")) (text content)))))) |
|
172 |
+ |
|
173 |
+(defmethod print-object ((object item) stream) |
|
174 |
+ (print-unreadable-object (object stream :type t :identity t) |
|
175 |
+ (with-slots (title link date) object |
|
176 |
+ (format stream "title: ~s link: ~s date:~s" |
|
177 |
+ (aif title (shorten-link it) "<untitled>") |
|
178 |
+ (aif link (shorten-link it) "<no link>") |
|
179 |
+ (aif date (shorten-link it) "<no date>"))))) |
|
180 |
+ |
|
181 |
+;{{{ RSS feed handling |
|
182 |
+(defmethod make-item (xml-dom (type (eql :rss))) |
|
183 |
+ (let* ((item-title ($ "> title" (text) (node))) |
|
184 |
+ (item-link ($ "> link" (text) (node))) |
|
185 |
+ (item-date ($ "> pubDate" (text) (node))) |
|
186 |
+ (item-guid ($ "> guid" (text) (node))) |
|
187 |
+ (item-description ($ "> description" (text) (node))) |
|
188 |
+ (item-content-encoded ($ "> content::encoded" (text) (node))) |
|
189 |
+ (content (with-output-to-string (s) |
|
190 |
+ (serialize (parse (or item-content-encoded item-description)) s))) |
|
191 |
+ (*tag-dispatchers* *html-tags*)) |
|
192 |
+ (make-instance 'item |
|
193 |
+ :content content |
|
194 |
+ :date item-date |
|
195 |
+ :doc xml-dom |
|
196 |
+ :id item-guid |
|
197 |
+ :link item-link |
|
198 |
+ :title item-title))) |
|
199 |
+ |
|
200 |
+(defmethod %to-feed (xml-dom (type (eql :rss)) &key feed-link) |
|
201 |
+ ; TODO: store feed-link |
|
202 |
+ (lquery:initialize xml-dom) |
|
203 |
+ (let ((doc-title ($ "channel > title" (text) (node))) |
|
204 |
+ (doc-link ($ "channel > link" (text) (node))) |
|
205 |
+ (doc-feed-link (or feed-link |
|
206 |
+ ($ "feed > atom::link[rel=self]" (first) (attr "href") (node))))) |
|
207 |
+ (make-instance 'feed :title doc-title :link doc-link :feed-link doc-feed-link))) |
|
208 |
+;}}} |
|
209 |
+ |
|
210 |
+; {{{ ATOM feed handling |
|
211 |
+(defmethod make-item (xml-dom (type (eql :atom))) |
|
212 |
+ (let* ((item-title ($ "> title" (text) (node))) |
|
213 |
+ (item-link ($ "> link[rel=alternate]" (attr "href") (first) (node))) |
|
214 |
+ (item-date (or ($ "> updated" (text) (node)) |
|
215 |
+ ($ "> published" (text) (node)))) ;; Which should be default? |
|
216 |
+ (item-guid ($ "> id" (text) (node))) |
|
217 |
+ (item-description ($ "> summary" (text) (node))) |
|
218 |
+ (item-content ($ "> content" (text) (node))) |
|
219 |
+ (*tag-dispatchers* *html-tags*) |
|
220 |
+ (content (with-output-to-string (s) |
|
221 |
+ (serialize (parse (or item-content item-description)) s)))) |
|
222 |
+ (make-instance 'item |
|
223 |
+ :content content |
|
224 |
+ :date item-date |
|
225 |
+ :id item-guid |
|
226 |
+ :link item-link |
|
227 |
+ :title item-title))) |
|
228 |
+ |
|
229 |
+(defmethod %to-feed (xml-dom (type (eql :atom)) &key feed-link) |
|
230 |
+ (declare (ignore type) (ignorable feed-link)) |
|
231 |
+ ; TODO: store feed-link |
|
232 |
+ (lquery:initialize xml-dom) |
|
233 |
+ (let ((doc-title ($ "feed > title" (text) (node))) |
|
234 |
+ (doc-link ($ "feed > link[rel=alternate]" (first) (attr "href") (node))) |
|
235 |
+ (doc-feed-link (or feed-link |
|
236 |
+ ($ "feed > link[rel=self]" (first) (attr "href") (node))))) |
|
237 |
+ (make-instance 'feed :title doc-title :link doc-link :feed-link doc-feed-link))) |
|
238 |
+;}}} |
|
239 |
+ |
|
240 |
+(defun rdf-to-feed (xml-dom)) |
|
241 |
+(defun json-to-feed (json-object)) |
|
242 |
+(defun html5-to-feed (html-dom)) |
|
243 |
+ |
|
244 |
+(defmethod push-item ((feed feed) (item item)) |
|
245 |
+ (push item (slot-value feed 'items))) |
|
246 |
+ |
|
247 |
+(deftest push-item () |
|
248 |
+ (let ((feed (make-instance 'feed)) |
|
249 |
+ (item (make-instance 'item))) |
|
250 |
+ (with-slots (items) feed |
|
251 |
+ ;(should signal error (push-item feed 2)) |
|
252 |
+ (should be eql item |
|
253 |
+ (progn |
|
254 |
+ (push-item feed item) |
|
255 |
+ (car items)))))) |
|
256 |
+ |
|
257 |
+ |
|
258 |
+;; vim: set foldmethod=marker: |
0 | 259 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,16 @@ |
1 |
+(in-package :alimenta.discover) |
|
2 |
+ |
|
3 |
+(define-lquery-function get-alternates (node) |
|
4 |
+ ($ (inline node) "link[rel=alternate]" |
|
5 |
+ (combine (attr "type") (attr "href")))) |
|
6 |
+ |
|
7 |
+(defun transform-nodes (feed-info-nodes) |
|
8 |
+ (map 'list (lambda (x) (cons (make-keyword (string-upcase (car x))) (cadr x))) |
|
9 |
+ feed-info-nodes)) |
|
10 |
+ |
|
11 |
+(defgeneric discover-feed (doc)) |
|
12 |
+(defmethod discover-feed ((doc string)) |
|
13 |
+ (transform-nodes ($ (initialize doc) (get-alternates) (node)))) |
|
14 |
+(defmethod discover-feed ((doc plump:node)) |
|
15 |
+ (transform-nodes ($ (inline doc) (get-alternates) (node)))) |
|
16 |
+ |
0 | 17 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,40 @@ |
1 |
+(declaim (optimize (speed 0) (safety 3) (debug 3))) |
|
2 |
+(in-package :alimenta.pull-feed) |
|
3 |
+ |
|
4 |
+(defun fetch-doc-from-url (url) |
|
5 |
+ (let ((plump:*tag-dispatchers* plump:*xml-tags*) |
|
6 |
+ (drakma:*text-content-types* (concatenate 'list |
|
7 |
+ '(("application" . "atom+xml") ("application" . "rss+xml")) |
|
8 |
+ drakma:*text-content-types*))) |
|
9 |
+ (plump:parse (drakma:http-request url)))) |
|
10 |
+ |
|
11 |
+(define-condition fetch-error () ()) |
|
12 |
+(define-condition feed-ambiguous (fetch-error) ((choices :initarg :choices :initform nil))) |
|
13 |
+(define-condition no-feed (fetch-error) ((url :initarg :url :initform nil))) |
|
14 |
+ |
|
15 |
+(defun feed-ambiguous (choices) |
|
16 |
+ (error 'feed-ambiguous |
|
17 |
+ :choices choices)) |
|
18 |
+ |
|
19 |
+(defun no-feed (url) |
|
20 |
+ (cerror "Skip this feed" 'no-feed :url url)) |
|
21 |
+ |
|
22 |
+(defun fetch-feed-from-url (url &key type) |
|
23 |
+ (let* ((feeds (alimenta.discover:discover-feed (drakma:http-request url))) |
|
24 |
+ (feeds (if type (remove-if-not (lambda (x) (eql type (car x))) feeds) feeds))) |
|
25 |
+ (format t "~a << type" type) |
|
26 |
+ (if (not feeds) (no-feed url) |
|
27 |
+ (fetch-doc-from-url |
|
28 |
+ (cdar |
|
29 |
+ (restart-case |
|
30 |
+ (if (cdr feeds) (feed-ambiguous feeds) feeds) |
|
31 |
+ (take-first-feed nil |
|
32 |
+ :report (lambda (s) (format s "Take the first feed")) |
|
33 |
+ feeds) |
|
34 |
+ (take-nth-feed (n) |
|
35 |
+ :report (lambda (s) (format s "Take the nth feed")) |
|
36 |
+ (list (elt feeds n))) |
|
37 |
+ (select-feed (selector) |
|
38 |
+ :report (lambda (s) (format s "Provide a function to select the right feed")) |
|
39 |
+ (find-if selector feeds)))))))) |
|
40 |
+ |
0 | 41 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,20 @@ |
1 |
+;;;; package.lisp |
|
2 |
+ |
|
3 |
+(defpackage #:alimenta |
|
4 |
+ (:use #:cl #:should-test #:lquery #:plump #:alexandria #:anaphora) |
|
5 |
+ (:export #:to-feed #:generate-xml |
|
6 |
+ #:feed #:title #:link #:items #:feed-link #:doc #:source-type #:id #:date #:content |
|
7 |
+ #:item)) |
|
8 |
+ |
|
9 |
+(defpackage #:alimenta.discover |
|
10 |
+ (:use #:cl #:alimenta #:alexandria #:anaphora #:lquery) |
|
11 |
+ (:export #:discover-feed)) |
|
12 |
+ |
|
13 |
+(defpackage #:alimenta.pull-feed |
|
14 |
+ (:use #:cl #:alimenta #:alexandria #:anaphora #:lquery) |
|
15 |
+ (:export #:pull-feed)) |
|
16 |
+ |
|
17 |
+(defmethod asdf:perform ((o asdf:test-op) (s (eql (asdf:find-system :alimenta)))) |
|
18 |
+ (asdf:load-system :alimenta) |
|
19 |
+ (st:test :package :alimenta) |
|
20 |
+ t) |