7c37909c |
(defpackage :cells-html-scraper
(:use :cl :alexandria :serapeum :fw.lu :cells))
(in-package :cells-html-scraper)
(lquery:define-lquery-macro progn (nodes &rest args)
`(lquery:$
(inline ,nodes)
,@args))
(lquery:define-lquery-function hn-score (item)
(lquery:$1 (inline item)
(next)
".score"
(text)))
(lquery:define-lquery-function hn-age (item)
(lquery:$1 (inline item)
(next)
".age"
(text)))
(lquery:define-lquery-function hn-comments (item)
(lquery:$1 (inline item)
(next)
".age"
(lquery-funcs:next)
(next)
(next)
(text)))
(defclass hn-item ()
((%url :initarg :url :reader url)
(%title :initarg :title :reader title)
(%score :initarg :score :reader score)
(%age :initarg :age :reader age)
(%comments :initarg :comments :reader comments)))
(defun make-hn-item (url title score age comments)
(make-instance 'hn-item
:url (puri:parse-uri url)
:title title
:score (when score (parse-integer score :junk-allowed t))
:age age
:comments (when comments (parse-integer comments :junk-allowed t))))
(defmodel hn-scraped ()
((%html :initarg :html
:accessor html
:initform (c-in ""))
(%doc :reader %doc :initform (c? (plump:parse (^html))))
(%hnmain :reader %hnmain
:initform (c? (lquery:$1
(inline (^%doc))
"#hnmain")))
(%body :reader %body
:initform (c? (lquery:$
(inline (^%hnmain))
".itemlist tr.athing")))
(%titles :reader titles
:initform (c? (lquery:$
(inline (^%body))
|
d23434c0 |
(combine (progn ".title a" (attr "href")
|
7c37909c |
(node))
|
d23434c0 |
(progn ".title a" (text)
|
7c37909c |
(node))
(hn-score)
(hn-age)
(hn-comments)))))
(%items :reader items :initform (c? (map 'vector
(op (apply 'make-hn-item _*))
(^titles))))))
(defmodel url-getter ()
((%url :initarg :url
:accessor url
:initform (c-in '()))
(%text :reader text
:initform (c? (let ((drakma:*text-content-types* (acons "application" "json" drakma:*text-content-types*)))
(drakma:http-request (^url)))))))
(defun get-links (url)
(restart-case (values (map 'list (compose (op (list* url _))
#'cdr)
(remove-if-not (op (string= _ "alternate"))
(lquery:$
(initialize (drakma:http-request url))
"link"
(combine (attr "rel") (attr "href") (attr "type")))
:key #'car))
"")
(continue nil
:report (lambda (stream) (format stream "skip url ~a" url))
(values nil url))))
|
d23434c0 |
#+lispworks
(progn
|
2fe8978c |
(defun request-new-items (interface page)
(setf (cells-html-scraper::html (hnr-scraper interface))
|
d23434c0 |
(drakma:http-request (format nil "https://news.ycombinator.com/~a" page))))
|
2fe8978c |
(defun open-item (interface item)
(capi:browser-pane-navigate (hnr-browser interface)
(puri:render-uri (url item)
nil)))
|
d23434c0 |
(capi:define-interface hn-reader ()
|
2fe8978c |
((scraper :reader hnr-scraper :initform (make-instance 'hn-scraped)))
(:panes
(pages capi:list-panel
:reader hnr-pages
:items (list "news" "newest" "ask" "show" "jobs")
:initial-constraints '(:visible-max-width (:string "newestest"))
:selection-callback 'request-new-items
:callback-type :interface-item)
(item-panel capi:list-panel
:reader hnr-item-panel
:print-function 'title
:selection-callback 'open-item
:callback-type :interface-item)
(browser capi:browser-pane
:reader hnr-browser
:url "https://fwoar.co"))
(:layouts
(right-side capi:column-layout
'(item-panel :divider browser)
:y-ratios '(1 nil 2)
:uniform-size-p nil)
(main-layout capi:row-layout
'(pages :divider right-side)
:visible-min-width '(:character 120)
:visible-min-height '(:character 40)
:x-ratios '(1 nil 2)
:uniform-size-p nil))
(:default-initargs
:layout 'main-layout
:title "HN Reader"))
(defmethod initialize-instance :after ((o hn-reader) &key)
(cells:defobserver
cells-html-scraper::%items ((self (eql (hnr-scraper o))) new-value)
(capi:apply-in-pane-process
#1=(hnr-item-panel o)
(lambda (pane)
(let ((cleaned-items (remove-if 'null new-value :key 'cells-html-scraper::title)))
(capi:remove-items pane (constantly t))
(capi:append-items pane
cleaned-items)))
#1#))
(setf (cells-html-scraper::html (hnr-scraper o))
(drakma:http-request "https://news.ycombinator.com/")))
(defun startup ()
(capi:display (make-instance 'hn-reader)))
|
d23434c0 |
)
|