git.fiddlerwoaroof.com
Browse code

Reworking data model---support more of RSS standard

fiddlerwoaroof authored on 14/02/2016 08:45:58
Showing 7 changed files
... ...
@@ -10,6 +10,7 @@
10 10
                #:alexandria
11 11
                #:anaphora
12 12
                #:chronicity
13
+               #:fwoar.lisputils
13 14
                #:drakma)
14 15
   :serial t
15 16
   :components ((:file "package")
... ...
@@ -22,6 +22,12 @@
22 22
    (content :initarg :content :initform nil)
23 23
    (doc :initarg :doc :initform nil)))
24 24
 
25
+(defclass complex-value () ())
26
+
27
+(defgeneric primary-value (self)
28
+  (:documentation "Primarily for COMPLEX-VALUES: this should take one and return a useful primary value")
29
+  (:method ((self t)) self))
30
+
25 31
 (define-condition duplicate-link-type (error)
26 32
   ((old :reader duplicate-link-type-old :initarg :old)
27 33
    (new :reader duplicate-link-type-new :initarg :new))
28 34
new file mode 100644
... ...
@@ -0,0 +1,48 @@
1
+(in-package #:data-class)
2
+
3
+(eval-when (:compile-toplevel :load-toplevel :execute)
4
+  (defun get-channel-element (xml-dom el)
5
+    ($ (inline xml-dom) el (text) (node))))
6
+
7
+(defun element-name-from-symbol (sym)
8
+  (let* ((base (string-downcase sym))
9
+         (split (split-sequence:split-sequence #\~ base))
10
+         (capit (cons (car split) (map 'list #'nstring-capitalize (cdr split)))))
11
+    (apply #'concatenate 'string capit)))
12
+
13
+(defmacro ensure-slot (sym &body or-else)
14
+  `(handler-case
15
+     ,sym
16
+     (unbound-slot (c)
17
+       (declare (ignore c))
18
+       ,@or-else)))
19
+
20
+(defmacro lazy-load-slot (class-name doc-slot root-el name tag-name &key transform value)
21
+  `(defmethod ,name :before ((self ,class-name))
22
+     (with-slots (,name ,doc-slot) self
23
+       (ensure-slot ,name
24
+         (alet ,(or value
25
+                    `(get-channel-element ,doc-slot
26
+                                          ,(format nil "~a > ~a" root-el tag-name)))
27
+           ,(if transform
28
+              `(setf ,name (when it (,transform it)))
29
+              `(setf ,name it)))))))
30
+
31
+(defgeneric all-slots (self format))
32
+
33
+(defmacro define-data-class (name (doc-slot root-el) (&rest superclasses) &body slots)
34
+  `(progn
35
+     (defclass ,name ,superclasses
36
+       ,(loop for (slot) in (fw.lu:ensure-mapping slots)
37
+              collect `(,slot :initarg ,(make-keyword slot) :accessor ,slot)))
38
+     ,@(loop for (slot tag-name . rest) in (fw.lu:ensure-mapping slots)
39
+             collect `(lazy-load-slot ,name ,doc-slot ,root-el ,slot ,tag-name ,@rest))
40
+     (defmethod all-slots ((self ,name) format)
41
+       (pairlis (list ,@(mapcar (fw.lu::alambda (make-keyword (car it)))
42
+                                (fw.lu:ensure-mapping slots)))
43
+                (list ,@(loop for (slot) in (fw.lu:ensure-mapping slots)
44
+                              collect `(,slot self)))))))
45
+
46
+
47
+
48
+
... ...
@@ -1,13 +1,21 @@
1 1
 ;;;; package.lisp
2 2
 
3
+(defpackage #:data-class
4
+  (:use #:cl #:should-test #:lquery #:plump #:alexandria #:anaphora)
5
+  (:export #:define-data-class))
6
+
3 7
 (defpackage #:alimenta
4 8
   (:use #:cl #:should-test #:lquery #:plump #:alexandria #:anaphora)
5 9
   (:export #:to-feed #:generate-xml
6 10
            #:feed #:title #:link #:items #:feed-link #:doc #:source-type #:id #:date #:content
7
-           #:item #:description #:%generate-xml #:%to-feed #:%get-items #:make-item))
11
+           #:item #:description #:%generate-xml #:%to-feed #:%get-items #:make-item #:complex-value
12
+           #:primary-value))
8 13
 
9 14
 (defpackage #:alimenta.rss
10
-  (:use #:cl #:should-test #:lquery #:plump #:alexandria #:anaphora #:alimenta))
15
+  (:use #:cl #:should-test #:lquery #:plump #:alexandria #:anaphora #:alimenta)
16
+  (:export #:language #:copyright #:managing-editor #:webmaster #:publication-date #:last-build-date
17
+           #:categories #:generator #:docs #:cloud #:ttl #:image #:rating #:text-input #:skip-hours
18
+           #:skip-days #:rss-feed #:rss-item))
11 19
 
12 20
 (defpackage #:alimenta.atom
13 21
   (:use #:cl #:should-test #:lquery #:plump #:alexandria #:anaphora #:alimenta))
... ...
@@ -18,7 +26,7 @@
18 26
 
19 27
 (defpackage #:alimenta.pull-feed 
20 28
   (:use #:cl #:alimenta #:alexandria #:anaphora #:lquery)
21
-  (:export #:pull-feed)) 
29
+  (:export #:pull-feed #:fetch-doc-from-url)) 
22 30
 
23 31
 (defmethod asdf:perform ((o asdf:test-op) (s (eql (asdf:find-system :alimenta))))
24 32
   (asdf:load-system :alimenta)
... ...
@@ -1,42 +1,111 @@
1 1
 (declaim (optimize (speed 0) (safety 3) (debug 3)))
2 2
 
3 3
 (in-package :alimenta.rss)
4
+(defun get-date (str)
5
+  (handler-case
6
+    (local-time:parse-timestring str)
7
+    (local-time::invalid-timestring (c) (declare (ignore c))
8
+      (multiple-value-bind (res groups) (cl-ppcre:scan-to-strings "(.*)\s*([+-][0-9]{2,4})\s?$" str)
9
+        (let ((local-time:*default-timezone* local-time:+utc-zone+))
10
+          (let* ((timestamp (string-trim " " (if res (elt groups 0) str)))
11
+                 (hour-offset (if res (parse-integer (elt groups 1) :end 3) 0)) 
12
+                 (minute-offset (if (and res (> (length (elt groups 1)) 3))
13
+                                  (* (signum hour-offset) (parse-integer (elt groups 1) :start 3))
14
+                                  0)))
15
+                  
16
+            (local-time:timestamp- (local-time:timestamp- (chronicity:parse timestamp) minute-offset :minute)
17
+                                   hour-offset :hour)))))))
18
+
19
+
20
+(defclass rss-image ()
21
+  ((url :initarg :url :initform nil)
22
+   (title :initarg :title :initform nil)
23
+   (link :initarg :link :initform nil)
24
+   (width :initarg :width :initform nil)
25
+   (height :initarg :height :initform nil)
26
+   (description :initarg :description :initform nil)))
27
+
28
+(defmethod print-object ((self rss-image) stream)
29
+  (print-unreadable-object (self stream :type t :identity t)
30
+    (format stream "~a" (slot-value self 'url))))
31
+
32
+(defmethod primary-value ((self rss-image))
33
+  (slot-value self 'url))
34
+
35
+(defun make-image (url title &optional link width height description)
36
+  (let ((link (or link url)))
37
+    (make-instance 'rss-image
38
+                   :url url
39
+                   :title title
40
+                   :link link
41
+                   :width width
42
+                   :height height
43
+                   :description description)))
44
+
45
+(defclass rss-category ()
46
+  ((category :initarg :category :initform nil)
47
+   (domain :initarg :domain :initform nil)))
48
+
49
+(defmethod print-object ((self rss-category) stream)
50
+  (print-unreadable-object (self stream :type t :identity t)
51
+    (format stream "~a~@[ ~a~]"
52
+            (slot-value self 'category)
53
+            (slot-value self 'domain))))
4 54
 
5
-(defclass rss-feed (feed)
6
-  ((language :initarg :language :initform nil)
7
-   (copyright :initarg :copyright :initform nil)
8
-   (managing-editor :initarg :managing-editor :initform nil)
9
-   (webmaster :initarg :webmaster :initform nil)
10
-   (publication-date :initarg :publication-date :initform nil)
11
-   (last-build-date :initarg :last-build-date :initform nil)
12
-   (categories :initarg :categories :initform nil)
13
-   (generator :initarg :generator :initform nil)
14
-   (docs :initarg :docs :initform nil)
15
-   (cloud :initarg :cloud :initform nil)
16
-   (ttl :initarg :ttl :initform nil)
17
-   (image :initarg :image :initform nil)
18
-   (rating :initarg :rating :initform nil)
19
-   (text-input :initarg :text-input :initform nil)
20
-   (skip-hours :initarg :skip-hours :initform nil)
21
-   (skip-days :initarg :skip-days :initform nil)))
22
-
23
-(defclass rss-item (feed) ())
55
+(defmethod primary-value ((self rss-category))
56
+  (slot-value self 'category))
57
+
58
+(defun make-category (category &optional domain)
59
+ (make-instance 'rss-category :category category :domain domain))
60
+
61
+(defun get-categories (doc tag)
62
+  ($ (inline doc) tag
63
+     (combine (text) (attr "domain"))
64
+     (map-apply #'make-category)))
65
+
66
+(define-data-class rss-feed (doc "channel") (feed)
67
+  language copyright webmaster
68
+  generator docs cloud ttl rating
69
+  (image "image"
70
+         :value (apply #'make-image
71
+                       (mapcar (lambda (x) (when (> (length x) 0)
72
+                                             (plump:text (elt x 0))))
73
+                               (let ((plump:*tag-dispatchers* plump:*xml-tags*))
74
+                                 ($1 (inline doc) "channel > image"
75
+                                     (combine "url" "title" "link" "width" "height"
76
+                                              "description")))))) 
77
+
78
+  (categories "category" :value (get-categories doc  "channel > category"))
79
+  (text-input "textInput")
80
+  (managing-editor "managingEditor") 
81
+  (skip-days "skipDays")
82
+  (skip-hours "skipHours") 
83
+  (publication-date "publicationDate" :transform get-date) 
84
+  (last-build-date "lastBuildDate" :transform get-date))
85
+
86
+(define-data-class rss-item (doc "") (item)
87
+  (categories "category" :value (get-categories doc "> category"))
88
+  source comments enclosure )
24 89
 
25 90
 (defmethod %get-items (xml-dom (feed-type (eql :rss)))
26 91
   ($ (inline xml-dom) "channel > item"))
27 92
 
28 93
 (defmethod %generate-xml ((item item) (feed-type (eql :rss)) &key partial)
29 94
   (prog1 partial
30
-    (let ((item-root (make-element ($ (inline partial) "channel" (node)) "item")))
31
-      (with-slots (title id date link content) item
32
-        ($ (inline (make-element item-root "title")) (text title)) 
33
-        ($ (inline (make-element item-root "link")) (text link)) 
34
-        (plump-dom:set-attribute
35
-          ($ (inline (make-element item-root "guid")) (text id) (node))
36
-          "isPermaLink"
37
-          "false") 
38
-        ($ (inline (make-element item-root "pubDate")) (text date)) 
39
-        ($ (inline (make-element item-root "description")) (text content))))))
95
+    (let ((item-root (make-element ($1 (inline partial) "channel") "item")))
96
+      (flet ((make-element (tag) (make-element item-root tag)))
97
+        (with-slots (title id date link content) item
98
+          ($ (inline (make-element "title")) (text title)
99
+            (inline (make-element "link")) (text link)
100
+            (inline (make-element "pubDate")) (text date)
101
+            (inline (make-element "description")) (text content))    
102
+          (plump-dom:set-attribute
103
+            ($ (inline (make-element "guid")) (text id) (node))
104
+            "isPermaLink"
105
+            "false") 
106
+          ))
107
+
108
+      )))
40 109
 
41 110
 (defmethod %generate-xml ((feed feed) (feed-type (eql :rss)) &rest r)
42 111
   (declare (ignore r))
... ...
@@ -62,17 +131,18 @@
62 131
     xml-root))
63 132
 
64 133
 (defmethod make-item (xml-dom (type (eql :rss)))
65
-  (let* ((item-title ($ "item > title" (text) (node)))
66
-         (item-link ($ "item > link" (text) (node)))
67
-         (item-date (get-date ($ "item > pubDate" (text) (node))))
68
-         (item-guid ($ "item > guid" (text) (node)))
69
-         (item-description ($ "item > description" (text) (node)))
70
-         (item-content-encoded ($ "item > content::encoded" (text) (node)))
134
+  (let* ((*lquery-master-document* xml-dom)
135
+         (item-title ($1 "> title" (text)))
136
+         (item-link ($1 "> link" (text)))
137
+         (item-date (awhen ($1 "> pubDate" (text)) (get-date it)))
138
+         (item-guid ($1 "> guid" (text)))
139
+         (item-description ($1 "> description" (text)))
140
+         (item-content-encoded ($1 "> content::encoded" (text)))
71 141
          (content (aif (or item-content-encoded item-description)
72 142
                     (with-output-to-string (s)
73 143
                       (serialize (parse (or item-content-encoded item-description)) s))))
74 144
          (*tag-dispatchers* *html-tags*))
75
-    (make-instance 'item
145
+    (make-instance 'rss-item
76 146
                    :content content   
77 147
                    :date item-date
78 148
                    :doc xml-dom
... ...
@@ -80,62 +150,33 @@
80 150
                    :link item-link
81 151
                    :title item-title)))
82 152
 
83
-(defun get-date (str)
84
-  (handler-case
85
-    (local-time:parse-timestring str)
86
-    (local-time::invalid-timestring (c) (declare (ignore c))
87
-      (multiple-value-bind (res groups) (cl-ppcre:scan-to-strings "(.*)\s*([+-][0-9]{2,4})\s?$" str)
88
-        (let ((local-time:*default-timezone* local-time:+utc-zone+))
89
-          (let ((timestamp (string-trim " " (if res (elt groups 0) str)))
90
-                (offset (if res (parse-integer (elt groups 1)) 0)))
91
-            (local-time:timestamp- (chronicity:parse timestamp) offset :hour)))))))
153
+(deftest get-date ()
154
+  (should be local-time:timestamp=
155
+          (local-time:parse-timestring "2016-01-09T23:00:00.000000-0100")
156
+          (get-date "Fri, 09 Jan 2016 23:00:00-0100"))
157
+  (should be local-time:timestamp=
158
+          (local-time:parse-timestring "2016-01-09T23:00:00.000000-0100")
159
+          (get-date "Fri, 09 Jan 2016 23:00:00 -0100"))
160
+  (should be local-time:timestamp=
161
+          (local-time:parse-timestring "2016-01-09T23:00:00.000000-0100")
162
+          (get-date "Fri, 09 Jan 2016 22:00:00 -0200"))  
163
+  (should be local-time:timestamp=
164
+          (local-time:parse-timestring "2016-01-09T23:00:00.000000-0100")
165
+          (get-date "Fri, 09 Jan 2016 21:30:00 -0230"))) 
92 166
 
93 167
 (defmethod %to-feed (xml-dom (type (eql :rss)) &key feed-link)
94 168
   ; TODO: store feed-link
95
-  (lquery:initialize xml-dom)
96 169
   (flet ((get-channel-element (el)
97 170
            ($ (inline xml-dom) el (text) (node))))
98
-    (let ((doc-title (get-channel-element "channel > title"))
99
-          (doc-link (get-channel-element "channel > link"))
100
-
101
-          (doc-language (get-channel-element "channel > language"))
102
-          (doc-copyright (get-channel-element "channel > copyright"))
103
-          (doc-managing-editor (get-channel-element "channel > managingEditor"))
104
-          (doc-webmaster (get-channel-element "channel > webMaster"))
105
-          (doc-publication-date (awhen (get-channel-element "channel > pubDate") (get-date it)))
106
-          (doc-last-build-date (awhen (get-channel-element "channel > lastBuildDate") (get-date it)))
107
-          (doc-categories (get-channel-element "channel > category"))
108
-          (doc-generator (get-channel-element "channel > generator"))
109
-          (doc-docs (get-channel-element "channel > docs"))
110
-          (doc-cloud (get-channel-element "channel > cloud"))
111
-          (doc-ttl (get-channel-element "channel > ttl"))
112
-          (doc-image (get-channel-element "channel > image"))
113
-          (doc-rating (get-channel-element "channel > rating"))
114
-          (doc-text-input (get-channel-element "channel > textInput"))
115
-          (doc-skip-hours (get-channel-element "channel > skipHours"))
116
-          (doc-skip-days (get-channel-element "channel > skipDays"))
117
-
118
-          (doc-feed-link (or feed-link
119
-                             ($ "feed > atom::link[rel=self]" (attr "href") (node)))))
171
+    (let* ((*lquery-master-document* xml-dom)
172
+           (doc-title (get-channel-element "channel > title"))
173
+           (doc-link (get-channel-element "channel > link"))
174
+           (doc-description (get-channel-element "channel > description"))
175
+           (doc-feed-link (or feed-link
176
+                              ($ "feed > atom::link[rel=self]" (attr "href") (node)))))
120 177
       (make-instance 'rss-feed
121
-        :title doc-title 
122
-        :link doc-link 
123
-        :feed-link doc-feed-link
124
-        
125
-        :language doc-language
126
-        :copyright doc-copyright
127
-        :managing-editor doc-managing-editor
128
-        :webmaster doc-webmaster
129
-        :publication-date doc-publication-date
130
-        :last-build-date doc-last-build-date
131
-        :categories doc-categories
132
-        :generator doc-generator
133
-        :docs doc-docs
134
-        :cloud doc-cloud
135
-        :ttl doc-ttl
136
-        :image doc-image
137
-        :rating doc-rating
138
-        :text-input doc-text-input
139
-        :skip-hours doc-skip-hours
140
-        :skip-days doc-skip-days))))
178
+        :title doc-title
179
+        :link doc-link
180
+        :description doc-description
181
+        :feed-link doc-feed-link))))
141 182
 
... ...
@@ -9,10 +9,11 @@
9 9
     `(*
10 10
        :padding "0px"
11 11
        :margin "0px")
12
+
12 13
     `(body
13 14
        :box-sizing "border-box"
14 15
        :font-family sans-serif
15
-       )
16
+       :background "#888")
16 17
 
17 18
     `(header
18 19
        :border-bottom "thin solid black"
... ...
@@ -21,22 +22,33 @@
21 22
        :margin-bottom "2em"
22 23
        :padding "1em")
23 24
 
25
+    `(div.articles
26
+       :display "flex"
27
+       :flex-flow "row"
28
+       :flex-wrap "wrap-reverse"
29
+       :align-items "baseline"
30
+       :justify-content "space-around"
31
+       :align-content "space-between"
32
+       )
33
+
24 34
     `(article
25 35
        :padding "1em"
26 36
        :border "4px double #888"
27
-       :display "inline-block"
37
+       :vertical-align "middle"
28 38
        :width "30%"
29 39
        :overflow "hidden"
30 40
        :min-height "4em"
41
+       :background "#aaa"
31 42
        )
32 43
     ))
33 44
 
34 45
  (defmethod araneus:view ((name (eql 'root)) (item alimenta:item))
35
-  (with-slots ((title alimenta:title) (link alimenta:link)) item
46
+  (with-slots ((title alimenta:title) (link alimenta:link) (content alimenta:content)) item
36 47
     (spinneret:with-html 
37 48
       (:article
38 49
         (:div.title title)
39
-        (:a.link :href link link)))))
50
+        (:a.link :href link link)
51
+        (:div.content (:raw content))))))
40 52
 
41 53
 (defmethod araneus:view ((name (eql 'root)) (feed alimenta:feed))
42 54
   (with-slots ((title alimenta:title) (link alimenta:link)) feed
... ...
@@ -56,13 +68,14 @@
56 68
         (:body
57 69
           (:main
58 70
             (call-next-method)
59
-            (loop for item in items
60
-                  do (araneus:view 'root item))))))))
71
+            (:div.articles
72
+              (loop for item in items
73
+                    do (araneus:view 'root item)))))))))
61 74
 
62 75
 (araneus:define-controller root (params)
63
-  (let* ((url "https://reddit.com/r/programming.rss")
76
+  (let* ((url "http://thomism.wordpress.com/feed/atom")
64 77
          (feed (alimenta.pull-feed::fetch-doc-from-url url)))
65
-    (alimenta:to-feed feed :type :atom :feed-link url)))
78
+    (alimenta:to-feed feed :feed-link url)))
66 79
 
67 80
 (defvar *app* (make-instance 'ningle:<app>))
68 81