git.fiddlerwoaroof.com
Browse code

Better date handling and better support of RSS

- also: add xml-dom to atom items

fiddlerwoaroof authored on 12/02/2016 06:15:16
Showing 3 changed files
... ...
@@ -9,6 +9,7 @@
9 9
                #:should-test
10 10
                #:alexandria
11 11
                #:anaphora
12
+               #:chronicity
12 13
                #:drakma)
13 14
   :serial t
14 15
   :components ((:file "package")
... ...
@@ -64,6 +64,7 @@
64 64
          (content (with-output-to-string (s)
65 65
                     (awhen (or item-content item-description) (serialize  (parse it) s)))))
66 66
     (make-instance 'atom-item
67
+		   :doc xml-dom
67 68
                    :content content
68 69
                    :date (local-time:parse-timestring item-date)
69 70
                    :id item-guid
... ...
@@ -2,7 +2,24 @@
2 2
 
3 3
 (in-package :alimenta.rss)
4 4
 
5
-(defclass rss-feed (feed) ())
5
+(defclass rss-feed (feed)
6
+  ((language :initarg :language :initform nil)
7
+   (copyright :initarg :copyright :initform nil)
8
+   (managing-editor :initarg :managing-editor :initform nil)
9
+   (webmaster :initarg :webmaster :initform nil)
10
+   (publication-date :initarg :publication-date :initform nil)
11
+   (last-build-date :initarg :last-build-date :initform nil)
12
+   (categories :initarg :categories :initform nil)
13
+   (generator :initarg :generator :initform nil)
14
+   (docs :initarg :docs :initform nil)
15
+   (cloud :initarg :cloud :initform nil)
16
+   (ttl :initarg :ttl :initform nil)
17
+   (image :initarg :image :initform nil)
18
+   (rating :initarg :rating :initform nil)
19
+   (text-input :initarg :text-input :initform nil)
20
+   (skip-hours :initarg :skip-hours :initform nil)
21
+   (skip-days :initarg :skip-days :initform nil)))
22
+
6 23
 (defclass rss-item (feed) ())
7 24
 
8 25
 (defmethod %get-items (xml-dom (feed-type (eql :rss)))
... ...
@@ -45,14 +62,15 @@
45 62
     xml-root))
46 63
 
47 64
 (defmethod make-item (xml-dom (type (eql :rss)))
48
-  (let* ((item-title ($ "> title" (text) (node)))
49
-         (item-link ($ "> link" (text) (node)))
50
-         (item-date ($ "> pubDate" (text) (node)))
51
-         (item-guid ($ "> guid" (text) (node)))
52
-         (item-description ($ "> description" (text) (node)))
53
-         (item-content-encoded ($ "> content::encoded" (text) (node)))
54
-         (content (with-output-to-string (s)
55
-                    (serialize (parse (or item-content-encoded item-description)) s)))
65
+  (let* ((item-title ($ "item > title" (text) (node)))
66
+         (item-link ($ "item > link" (text) (node)))
67
+         (item-date (get-date ($ "item > pubDate" (text) (node))))
68
+         (item-guid ($ "item > guid" (text) (node)))
69
+         (item-description ($ "item > description" (text) (node)))
70
+         (item-content-encoded ($ "item > content::encoded" (text) (node)))
71
+         (content (aif (or item-content-encoded item-description)
72
+                    (with-output-to-string (s)
73
+                      (serialize (parse (or item-content-encoded item-description)) s))))
56 74
          (*tag-dispatchers* *html-tags*))
57 75
     (make-instance 'item
58 76
                    :content content   
... ...
@@ -62,12 +80,62 @@
62 80
                    :link item-link
63 81
                    :title item-title)))
64 82
 
83
+(defun get-date (str)
84
+  (handler-case
85
+    (local-time:parse-timestring str)
86
+    (local-time::invalid-timestring (c) (declare (ignore c))
87
+      (multiple-value-bind (res groups) (cl-ppcre:scan-to-strings "(.*)\s*([+-][0-9]{2,4})\s?$" str)
88
+        (let ((local-time:*default-timezone* local-time:+utc-zone+))
89
+          (let ((timestamp (string-trim " " (if res (elt groups 0) str)))
90
+                (offset (if res (parse-integer (elt groups 1)) 0)))
91
+            (local-time:timestamp- (chronicity:parse timestamp) offset :hour)))))))
92
+
65 93
 (defmethod %to-feed (xml-dom (type (eql :rss)) &key feed-link)
66 94
   ; TODO: store feed-link
67 95
   (lquery:initialize xml-dom)
68
-  (let ((doc-title ($ "channel > title" (text) (node)))
69
-        (doc-link ($ "channel > link" (text) (node)))
70
-        (doc-feed-link (or feed-link
71
-                           ($ "feed > atom::link[rel=self]" (first) (attr "href") (node)))))
72
-    (make-instance 'rss-feed :title doc-title :link doc-link :feed-link doc-feed-link)))
96
+  (flet ((get-channel-element (el)
97
+           ($ (inline xml-dom) el (text) (node))))
98
+    (let ((doc-title (get-channel-element "channel > title"))
99
+          (doc-link (get-channel-element "channel > link"))
100
+
101
+          (doc-language (get-channel-element "channel > language"))
102
+          (doc-copyright (get-channel-element "channel > copyright"))
103
+          (doc-managing-editor (get-channel-element "channel > managingEditor"))
104
+          (doc-webmaster (get-channel-element "channel > webMaster"))
105
+          (doc-publication-date (awhen (get-channel-element "channel > pubDate") (get-date it)))
106
+          (doc-last-build-date (awhen (get-channel-element "channel > lastBuildDate") (get-date it)))
107
+          (doc-categories (get-channel-element "channel > category"))
108
+          (doc-generator (get-channel-element "channel > generator"))
109
+          (doc-docs (get-channel-element "channel > docs"))
110
+          (doc-cloud (get-channel-element "channel > cloud"))
111
+          (doc-ttl (get-channel-element "channel > ttl"))
112
+          (doc-image (get-channel-element "channel > image"))
113
+          (doc-rating (get-channel-element "channel > rating"))
114
+          (doc-text-input (get-channel-element "channel > textInput"))
115
+          (doc-skip-hours (get-channel-element "channel > skipHours"))
116
+          (doc-skip-days (get-channel-element "channel > skipDays"))
117
+
118
+          (doc-feed-link (or feed-link
119
+                             ($ "feed > atom::link[rel=self]" (attr "href") (node)))))
120
+      (make-instance 'rss-feed
121
+        :title doc-title 
122
+        :link doc-link 
123
+        :feed-link doc-feed-link
124
+        
125
+        :language doc-language
126
+        :copyright doc-copyright
127
+        :managing-editor doc-managing-editor
128
+        :webmaster doc-webmaster
129
+        :publication-date doc-publication-date
130
+        :last-build-date doc-last-build-date
131
+        :categories doc-categories
132
+        :generator doc-generator
133
+        :docs doc-docs
134
+        :cloud doc-cloud
135
+        :ttl doc-ttl
136
+        :image doc-image
137
+        :rating doc-rating
138
+        :text-input doc-text-input
139
+        :skip-hours doc-skip-hours
140
+        :skip-days doc-skip-days))))
73 141