git.fiddlerwoaroof.com
Raw Blame History
(defpackage :cells-html-scraper
  (:use :cl :alexandria :serapeum :fw.lu :cells))

(in-package :cells-html-scraper)

(lquery:define-lquery-macro progn (nodes &rest args)
  `(lquery:$
     (inline ,nodes)
     ,@args))

(lquery:define-lquery-function hn-score (item)
  (lquery:$1 (inline item)
             (next)
             ".score"
             (text)))

(lquery:define-lquery-function hn-age (item)
  (lquery:$1 (inline item)
             (next)
             ".age"
             (text)))

(lquery:define-lquery-function hn-comments (item)
  (lquery:$1 (inline item)
             (next)
             ".age"
             (lquery-funcs:next)
             (next)
             (next)
             (text)))

(defclass hn-item ()
  ((%url :initarg :url :reader url)
   (%title :initarg :title :reader title)
   (%score :initarg :score :reader score)
   (%age :initarg :age :reader age)
   (%comments :initarg :comments :reader comments)))

(defun make-hn-item (url title score age comments)
  (make-instance 'hn-item
                 :url (puri:parse-uri url)
                 :title title
                 :score (when score (parse-integer score :junk-allowed t))
                 :age age
                 :comments (when comments (parse-integer comments :junk-allowed t))))

(defmodel hn-scraped ()
  ((%html :initarg :html
          :accessor html
          :initform (c-in ""))
   (%doc :reader %doc :initform (c? (plump:parse (^html))))
   (%hnmain :reader %hnmain
            :initform (c? (lquery:$1
                            (inline (^%doc))
                            "#hnmain")))
   (%body :reader %body
          :initform (c? (lquery:$
                          (inline (^%hnmain))
                          ".itemlist tr.athing")))
   (%titles :reader titles
            :initform (c? (lquery:$
                            (inline (^%body))
                            (combine (progn ".title a" (attr "href")
                                            (node))
                                     (progn ".title a" (text)
                                            (node))
                                     (hn-score)
                                     (hn-age)
                                     (hn-comments)))))
   (%items :reader items :initform (c? (map 'vector
                                            (op (apply 'make-hn-item _*))
                                            (^titles))))))

(defmodel url-getter ()
  ((%url :initarg :url
         :accessor url
         :initform (c-in '()))
   (%text :reader text
          :initform (c? (let ((drakma:*text-content-types* (acons "application" "json" drakma:*text-content-types*)))
                          (drakma:http-request (^url)))))))

(defun get-links (url)
  (restart-case (values (map 'list (compose (op (list* url _))
                                            #'cdr)
                             (remove-if-not (op (string= _ "alternate"))
                                            (lquery:$
                                              (initialize (drakma:http-request url))
                                              "link"
                                              (combine (attr "rel") (attr "href") (attr "type")))
                                            :key #'car))
                        "")
    (continue nil
      :report (lambda (stream) (format stream "skip url ~a" url))
      (values nil url))))

#+lispworks
(progn
  (defun request-new-items (interface page)
    (setf (cells-html-scraper::html (hnr-scraper interface))
          (drakma:http-request (format nil "https://news.ycombinator.com/~a" page))))

  (defun open-item (interface item)
    (capi:browser-pane-navigate (hnr-browser interface)
                                (puri:render-uri (url item)
                                                 nil)))
  
    
  
  
  (capi:define-interface hn-reader ()
    ((scraper :reader hnr-scraper :initform (make-instance 'hn-scraped)))
    (:panes
     (pages capi:list-panel
            :reader hnr-pages
            :items (list "news" "newest" "ask" "show" "jobs")
            :initial-constraints '(:visible-max-width (:string "newestest"))
            :selection-callback 'request-new-items
            :callback-type :interface-item)
     (item-panel capi:list-panel
                 :reader hnr-item-panel
                 :print-function 'title
                 :selection-callback 'open-item
                 :callback-type :interface-item)
     (browser capi:browser-pane
              :reader hnr-browser
              :url "https://fwoar.co"))
    (:layouts
     (right-side capi:column-layout
                 '(item-panel :divider browser)
                 :y-ratios '(1 nil 2)
                 :uniform-size-p nil)
     (main-layout capi:row-layout
                  '(pages :divider right-side)
                  :visible-min-width '(:character 120)
                  :visible-min-height '(:character 40)
                  :x-ratios '(1 nil 2)
                  :uniform-size-p nil))
    (:default-initargs
     :layout 'main-layout
     :title "HN Reader"))

  (defmethod initialize-instance :after ((o hn-reader) &key)
    (cells:defobserver
     cells-html-scraper::%items ((self (eql (hnr-scraper o))) new-value)
     
     (capi:apply-in-pane-process
      #1=(hnr-item-panel o)
      (lambda (pane)
        (let ((cleaned-items (remove-if 'null new-value :key 'cells-html-scraper::title)))
          (capi:remove-items pane (constantly t))
          (capi:append-items pane 
                             cleaned-items)))
      #1#))

    (setf (cells-html-scraper::html (hnr-scraper o))
          (drakma:http-request "https://news.ycombinator.com/")))
  (defun startup ()
    (capi:display (make-instance 'hn-reader)))
  )