git.fiddlerwoaroof.com
Raw Blame History
(ql:quickload '(:plump
                :lquery
                :serapeum
                :alexandria
                :flexi-streams
                :chipz
                :babel
                :net.didierverna.clon))

(in-package #:org.shirakumo.plump.parser)
(define-tag-dispatcher (script *tag-dispatchers* *html-tags*)
    (name)
  (string-equal name "script")
  (let* ((closing (consume))
         (attrs
           (if (char= closing #\ )
               (prog1 (read-attributes) (setf closing (consume)))
               (make-attribute-map))))
    (case closing
      (#\/ (advance) (make-element *root* "script" :attributes attrs))
      (#\>
       (let ((*root* (make-fulltext-element *root* "script" :attributes attrs)))
         (make-text-node *root*
                         (consume-until
                          (make-matcher
                           (or (is "</script>") (is "</SCRIPT>")))))
         (advance-n 9)
         *root*)))))

(in-package :cl-user)
(defpackage :ct-transform
  (:use :cl :lquery :serapeum :alexandria :net.didierverna.clon))
(in-package :ct-transform)

(defvar *version* "0.001")
(defsynopsis (:postfix "FILE")
    (group (:header "Generic options")
           (flag :short-name "v" :long-name "version"
                 :description "Show the program version")
           (flag :short-name "h" :long-name "help"
                 :description "Show this help")))


(defvar *txt* nil "The parsed HTML")
(defvar *fn* nil "The file to be pulled in")
(defvar *lookup-table* (make-hash-table :test 'equalp))

;; (uiop:directory-files "." (uiop:merge-pathnames* (make-pathname :type "bz2") uiop:*wild-file*))

(defun call-with-decompressed-text (fn cb &optional (encoding :iso-8859-1))
  (with-input-from-file (s fn :element-type '(unsigned-byte 8))
    (let* ((decompressing-stream (chipz:make-decompressing-stream 'chipz:bzip2 s))
           (flexi-stream (flexi-streams:make-flexi-stream decompressing-stream :external-format encoding)))
      (unwind-protect (funcall cb flexi-stream)
        (close flexi-stream)
        (close decompressing-stream)))))

(defun lookup-ref (p q a &rest r)
  (gethash (format nil "~aq.~da.~d~{~a~}" (string-upcase p) q a r)
           *lookup-table*))

(defun translate-book-ref (ref)
  (string-case ref
    ("" :st)
    ("CG" :scg)
    (t (make-keyword ref))))

(defun normalize-ref (ref)
  (destructuring-bind (book . ref) (split-sequence #\, ref)
    (if ref
        (setf ref (string-join ref ","))
        (setf ref book
              book ""))
    (values (string-join (split-sequence #\space ref
                                         :remove-empty-subseqs t))
            (translate-book-ref (remove-if-not #'upper-case-p
                                               (string-capitalize book))))))


(defun help ())
(defun show-version ()
  (format t "~&~a~%" *version*))

(declaim (ftype (function () nil) to-top))
(defun to-top ())
(defmacro mark-start (&body body)
  (with-gensyms (start)
    `(tagbody
        ,start
        (flet ((to-top () (go ,start)))
          ,@body))))

(defun transform-ct-main ()
  (make-context)
  (mark-start
   (restart-case
       (cond
         ((getopt :long-name "help") (help))
         ((getopt :long-name "version") (show-version))
         (t (let ((file (car (remainder)))
                  (ofile (cadr (remainder)))
                  (*package* (find-package 'ct-transform)))
              (lquery:initialize (call-with-decompressed-text file #'plump:parse))
              (map 'list
                   (op (destructuring-bind (ref el) _
                         (setf (gethash (multiple-value-list (normalize-ref ref))
                                        *lookup-table*)
                               (plump:text el))))
                   ($ "p[title]" (combine (attr :title) (node))))

              (let ((*print-case* :downcase))
                (alexandria:with-output-to-file (*standard-output* ofile)
                  (loop for (ref book) being the hash-keys in *lookup-table* using (hash-value text)
                        do (print `(ref ,book ,ref
                                        ,text)))))
              ;; (alexandria:with-input-from-file (s *fn* :external-format :iso-8859-1)
              ;;   (setf *txt* (plump:parse s)))

              ;; (uiop:directory-files "." (uiop:merge-pathnames* (make-pathname :type "bz2") uiop:*wild-file*))
              ;; (car *)
              ;; (plump:parse *)
              ;; (lquery:initialize *)
              ;; ($ "p[title]" (combine (attr :title)
              ;;          (text)))

              )))
     (retry () (to-top))
     (abort ()))))

(defun make-executable ()
  (dump "transform-ct" transform-ct-main
        :compression 8
        :purify t))