git.fiddlerwoaroof.com
transform-ct.lisp
1a481a69
 (ql:quickload '(:plump
a8cd04b5
                 :lquery
                 :serapeum
                 :alexandria
                 :flexi-streams
                 :chipz
                 :babel
                 :net.didierverna.clon))
1a481a69
 
 (in-package #:org.shirakumo.plump.parser)
 (define-tag-dispatcher (script *tag-dispatchers* *html-tags*)
     (name)
a8cd04b5
   (string-equal name "script")
1a481a69
   (let* ((closing (consume))
          (attrs
a8cd04b5
            (if (char= closing #\ )
                (prog1 (read-attributes) (setf closing (consume)))
                (make-attribute-map))))
1a481a69
     (case closing
       (#\/ (advance) (make-element *root* "script" :attributes attrs))
       (#\>
        (let ((*root* (make-fulltext-element *root* "script" :attributes attrs)))
          (make-text-node *root*
                          (consume-until
                           (make-matcher
                            (or (is "</script>") (is "</SCRIPT>")))))
          (advance-n 9)
          *root*)))))
 
 (in-package :cl-user)
 (defpackage :ct-transform
   (:use :cl :lquery :serapeum :alexandria :net.didierverna.clon))
 (in-package :ct-transform)
 
 (defvar *version* "0.001")
 (defsynopsis (:postfix "FILE")
a8cd04b5
     (group (:header "Generic options")
            (flag :short-name "v" :long-name "version"
                  :description "Show the program version")
            (flag :short-name "h" :long-name "help"
                  :description "Show this help")))
1a481a69
 
 
 (defvar *txt* nil "The parsed HTML")
 (defvar *fn* nil "The file to be pulled in")
 (defvar *lookup-table* (make-hash-table :test 'equalp))
 
 ;; (uiop:directory-files "." (uiop:merge-pathnames* (make-pathname :type "bz2") uiop:*wild-file*))
 
 (defun call-with-decompressed-text (fn cb &optional (encoding :iso-8859-1))
   (with-input-from-file (s fn :element-type '(unsigned-byte 8))
     (let* ((decompressing-stream (chipz:make-decompressing-stream 'chipz:bzip2 s))
a8cd04b5
            (flexi-stream (flexi-streams:make-flexi-stream decompressing-stream :external-format encoding)))
1a481a69
       (unwind-protect (funcall cb flexi-stream)
a8cd04b5
         (close flexi-stream)
         (close decompressing-stream)))))
1a481a69
 
 (defun lookup-ref (p q a &rest r)
   (gethash (format nil "~aq.~da.~d~{~a~}" (string-upcase p) q a r)
a8cd04b5
            *lookup-table*))
1a481a69
 
 (defun translate-book-ref (ref)
   (string-case ref
     ("" :st)
     ("CG" :scg)
     (t (make-keyword ref))))
 
 (defun normalize-ref (ref)
   (destructuring-bind (book . ref) (split-sequence #\, ref)
     (if ref
a8cd04b5
         (setf ref (string-join ref ","))
         (setf ref book
               book ""))
1a481a69
     (values (string-join (split-sequence #\space ref
a8cd04b5
                                          :remove-empty-subseqs t))
             (translate-book-ref (remove-if-not #'upper-case-p
                                                (string-capitalize book))))))
1a481a69
 
 
 (defun help ())
 (defun show-version ()
   (format t "~&~a~%" *version*))
 
 (declaim (ftype (function () nil) to-top))
 (defun to-top ())
 (defmacro mark-start (&body body)
   (with-gensyms (start)
     `(tagbody
a8cd04b5
         ,start
         (flet ((to-top () (go ,start)))
           ,@body))))
1a481a69
 
 (defun transform-ct-main ()
   (make-context)
   (mark-start
a8cd04b5
    (restart-case
        (cond
          ((getopt :long-name "help") (help))
          ((getopt :long-name "version") (show-version))
          (t (let ((file (car (remainder)))
                   (ofile (cadr (remainder)))
                   (*package* (find-package 'ct-transform)))
               (lquery:initialize (call-with-decompressed-text file #'plump:parse))
               (map 'list
                    (op (destructuring-bind (ref el) _
                          (setf (gethash (multiple-value-list (normalize-ref ref))
                                         *lookup-table*)
                                (plump:text el))))
                    ($ "p[title]" (combine (attr :title) (node))))
 
               (let ((*print-case* :downcase))
                 (alexandria:with-output-to-file (*standard-output* ofile)
                   (loop for (ref book) being the hash-keys in *lookup-table* using (hash-value text)
                         do (print `(ref ,book ,ref
                                         ,text)))))
               ;; (alexandria:with-input-from-file (s *fn* :external-format :iso-8859-1)
               ;;   (setf *txt* (plump:parse s)))
 
               ;; (uiop:directory-files "." (uiop:merge-pathnames* (make-pathname :type "bz2") uiop:*wild-file*))
               ;; (car *)
               ;; (plump:parse *)
               ;; (lquery:initialize *)
               ;; ($ "p[title]" (combine (attr :title)
               ;;          (text)))
 
               )))
      (retry () (to-top))
      (abort ()))))
1a481a69
 
 (defun make-executable ()
   (dump "transform-ct" transform-ct-main
         :compression 8
         :purify t))