Browse code
feat: eclector test, reddit-dataset-creator
Edward authored on 07/01/2021 10:03:05
Showing 2 changed files
Showing 2 changed files
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,103 @@ |
1 |
+(defpackage :fwoar.lisp-sandbox.eclector-test |
|
2 |
+ (:use :cl ) |
|
3 |
+ (:export )) |
|
4 |
+(in-package :fwoar.lisp-sandbox.eclector-test) |
|
5 |
+ |
|
6 |
+(defvar *verbose* nil) |
|
7 |
+ |
|
8 |
+(defclass my-reader () |
|
9 |
+ ()) |
|
10 |
+(defvar *eclector-client* |
|
11 |
+ (make-instance 'my-reader)) |
|
12 |
+ |
|
13 |
+(fw.lu:defclass+ fw-package () |
|
14 |
+ ((%name :initarg :name :reader fw-name) |
|
15 |
+ (%symbols :reader fw-symbols :initform (make-hash-table :test 'equal)))) |
|
16 |
+ |
|
17 |
+ |
|
18 |
+(defvar *fw-packages* |
|
19 |
+ (alexandria:alist-hash-table (list (cons "KEYWORD" (fw-package "KEYWORD"))) |
|
20 |
+ :test 'equal)) |
|
21 |
+ |
|
22 |
+(defmethod print-object ((o fw-package) s) |
|
23 |
+ (print-unreadable-object (o s :type t :identity t) |
|
24 |
+ (format s "~s with ~d symbols" |
|
25 |
+ (fw-name o) |
|
26 |
+ (hash-table-count (fw-symbols o))))) |
|
27 |
+ |
|
28 |
+(defgeneric my-intern (symbol package) |
|
29 |
+ (:method ((symbol string) (package package)) |
|
30 |
+ (intern symbol package)) |
|
31 |
+ (:method ((symbol string) (package fw-package)) |
|
32 |
+ (when *verbose* |
|
33 |
+ (:printv symbol package)) |
|
34 |
+ (if (eql package (ensure-package "KEYWORD")) |
|
35 |
+ (intern symbol (find-package "KEYWORD")) |
|
36 |
+ (fw.lu:if-let* ((s-p (find-package (fw-name package))) |
|
37 |
+ (s-s (and s-p (find-symbol symbol s-p))) |
|
38 |
+ (_ (not (eql s-p (symbol-package s-s))))) |
|
39 |
+ (my-intern symbol |
|
40 |
+ (ensure-package |
|
41 |
+ (package-name |
|
42 |
+ (symbol-package s-s)))) |
|
43 |
+ (alexandria:ensure-gethash symbol |
|
44 |
+ (fw-symbols package) |
|
45 |
+ (make-symbol symbol)))))) |
|
46 |
+ |
|
47 |
+(defgeneric my-find-symbol (symbol package) |
|
48 |
+ (:method ((symbol string) (package package)) |
|
49 |
+ (find-symbol symbol package)) |
|
50 |
+ (:method ((symbol string) (package fw-package)) |
|
51 |
+ (gethash symbol (fw-symbols package)))) |
|
52 |
+ |
|
53 |
+(defun my-find-package (package) |
|
54 |
+ (gethash package *fw-packages*)) |
|
55 |
+ |
|
56 |
+(defun ensure-package (package) |
|
57 |
+ (etypecase package |
|
58 |
+ (fw-package package) |
|
59 |
+ (package (ensure-package (package-name package))) |
|
60 |
+ (symbol (ensure-package (symbol-name package))) |
|
61 |
+ (string (alexandria:ensure-gethash package *fw-packages* |
|
62 |
+ (fw-package package))))) |
|
63 |
+ |
|
64 |
+(defmethod eclector.reader:interpret-symbol |
|
65 |
+ ((client my-reader) input-stream package-indicator symbol-name internp) |
|
66 |
+ (let ((package (case package-indicator |
|
67 |
+ (:current (ensure-package *package*)) |
|
68 |
+ (:keyword (ensure-package "KEYWORD")) |
|
69 |
+ (t (or (ensure-package package-indicator)))))) |
|
70 |
+ (if *verbose* |
|
71 |
+ (:printv package-indicator package symbol-name (my-intern symbol-name package)) |
|
72 |
+ (my-intern symbol-name package)))) |
|
73 |
+ |
|
74 |
+(defparameter *my-readtable* |
|
75 |
+ (let ((readtable (eclector.readtable:copy-readtable eclector.reader:*readtable*))) |
|
76 |
+ (eclector.readtable:set-dispatch-macro-character |
|
77 |
+ readtable #\# #\+ |
|
78 |
+ (lambda (stream char parameter) |
|
79 |
+ parameter |
|
80 |
+ char |
|
81 |
+ (:printv |
|
82 |
+ eclector.reader:*client* |
|
83 |
+ (let ((*verbose* t)) |
|
84 |
+ (cons 'when-feature |
|
85 |
+ (eclector.reader:read stream t |
|
86 |
+ nil t)))))) |
|
87 |
+ (eclector.readtable:set-dispatch-macro-character |
|
88 |
+ readtable #\# #\- |
|
89 |
+ (lambda (stream char parameter) |
|
90 |
+ parameter |
|
91 |
+ char |
|
92 |
+ (:printv |
|
93 |
+ eclector.reader:*client* |
|
94 |
+ (let ((*verbose* t)) |
|
95 |
+ (cons 'unless-feature |
|
96 |
+ (eclector.reader:read stream t |
|
97 |
+ nil t)))))) |
|
98 |
+ readtable)) |
|
99 |
+ |
|
100 |
+(defun my-read-from-string (string) |
|
101 |
+ (let ((eclector.reader:*client* *eclector-client*) |
|
102 |
+ (eclector.reader:*readtable* *my-readtable*)) |
|
103 |
+ (eclector.reader:read-from-string string))) |
0 | 104 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,125 @@ |
1 |
+(defpackage :fwoar.lisp-sandbox.reddit-dataset-creator |
|
2 |
+ (:use :cl ) |
|
3 |
+ (:export )) |
|
4 |
+(in-package :fwoar.lisp-sandbox.reddit-dataset-creator) |
|
5 |
+ |
|
6 |
+(defun get-data (subreddit) |
|
7 |
+ (alimenta:to-feed |
|
8 |
+ (fw.lu:closing |
|
9 |
+ (plump:parse |
|
10 |
+ (drakma:http-request (puri:merge-uris (format nil "~(~a~).rss?limit=100" subreddit) |
|
11 |
+ "https://www.reddit.com/r/") |
|
12 |
+ :want-stream t))))) |
|
13 |
+ |
|
14 |
+(defun get-data-json (subreddit) |
|
15 |
+ (fw.lu:closing |
|
16 |
+ (yason:parse |
|
17 |
+ (drakma:http-request (puri:merge-uris (format nil "~(~a~).json?limit=100" subreddit) |
|
18 |
+ "https://www.reddit.com/r/") |
|
19 |
+ :want-stream t)))) |
|
20 |
+ |
|
21 |
+(defun subreddit-json-image-urls (subreddit &optional (fetch 'get-data-json-cached)) |
|
22 |
+ (data-lens.transducers:transduce |
|
23 |
+ (data-lens:• |
|
24 |
+ (data-lens.transducers:mapping |
|
25 |
+ (data-lens:key "data")) |
|
26 |
+ #+nil |
|
27 |
+ (data-lens.transducers:mapping |
|
28 |
+ (lambda (it) |
|
29 |
+ (format t "~&~s~%" (gethash "url" it)) |
|
30 |
+ it)) |
|
31 |
+ (data-lens.transducers:mapping |
|
32 |
+ (data-lens:• (lambda (it) |
|
33 |
+ (fw.lu:dive '("source" "url") |
|
34 |
+ it)) |
|
35 |
+ (data-lens:applicable-when (data-lens:element 0) |
|
36 |
+ (complement #'null)) |
|
37 |
+ (data-lens:sorted '< :key (data-lens:key "width")) |
|
38 |
+ (lambda (it) |
|
39 |
+ (fw.lu:dive '("preview" "images") |
|
40 |
+ it)))) |
|
41 |
+ (data-lens.transducers:filtering 'identity)) |
|
42 |
+ 'data-lens.transducers:list-builder |
|
43 |
+ (fw.lu:dive '("data" "children") |
|
44 |
+ (funcall fetch subreddit)))) |
|
45 |
+ |
|
46 |
+(defun subreddit-image-urls (subreddit &optional (fetch 'get-data-cached)) |
|
47 |
+ (data-lens.transducers:transduce |
|
48 |
+ (data-lens:• |
|
49 |
+ (data-lens.transducers:mapping |
|
50 |
+ (data-lens:• |
|
51 |
+ (lambda (it) |
|
52 |
+ (lquery:$ (initialize it) "a" (attr "href"))) |
|
53 |
+ 'alimenta:content)) |
|
54 |
+ (data-lens.transducers:catting) |
|
55 |
+ (data-lens.transducers:filtering |
|
56 |
+ (data-lens:regex-match "[.](jpe?g|png)$"))) |
|
57 |
+ 'data-lens.transducers:list-builder |
|
58 |
+ (funcall fetch subreddit))) |
|
59 |
+ |
|
60 |
+(defvar *reset-cache* nil) |
|
61 |
+(defun get-data-cached (subreddit) |
|
62 |
+ (let ((cache (load-time-value (make-hash-table)))) |
|
63 |
+ (when *reset-cache* |
|
64 |
+ (clrhash cache) |
|
65 |
+ (setf *reset-cache* nil)) |
|
66 |
+ (alexandria:ensure-gethash subreddit |
|
67 |
+ cache |
|
68 |
+ (get-data subreddit)))) |
|
69 |
+ |
|
70 |
+(defun get-data-json-cached (subreddit) |
|
71 |
+ (let ((cache (load-time-value (make-hash-table)))) |
|
72 |
+ (when *reset-cache* |
|
73 |
+ (clrhash cache) |
|
74 |
+ (setf *reset-cache* nil)) |
|
75 |
+ (alexandria:ensure-gethash subreddit |
|
76 |
+ cache |
|
77 |
+ (get-data-json subreddit)))) |
|
78 |
+ |
|
79 |
+ |
|
80 |
+(defun dump-stream (input output &optional fn) |
|
81 |
+ (with-open-stream (echo (make-echo-stream input output)) |
|
82 |
+ (loop with buffer-length = 1000 |
|
83 |
+ with buffer = (make-array buffer-length :element-type 'character) |
|
84 |
+ for read-chars = (read-sequence buffer echo) |
|
85 |
+ do (when fn |
|
86 |
+ (funcall fn buffer buffer-length)) |
|
87 |
+ (unless (< read-chars buffer-length) |
|
88 |
+ (return nil))))) |
|
89 |
+ |
|
90 |
+(defun connect-streams (input output &key (background t) fn) |
|
91 |
+ "This reads from input and writes output until the end of input is found." |
|
92 |
+ (dump-stream input output fn)) |
|
93 |
+ |
|
94 |
+(defun store-image (base category url) |
|
95 |
+ (let* ((uri (puri:parse-uri url)) |
|
96 |
+ (fn (pathname-name |
|
97 |
+ (parse-namestring |
|
98 |
+ (puri:uri-path uri))))) |
|
99 |
+ (alexandria:with-output-to-file (s (ensure-directories-exist |
|
100 |
+ (merge-pathnames (make-pathname |
|
101 |
+ :directory (list :relative |
|
102 |
+ (string category)) |
|
103 |
+ :name fn) |
|
104 |
+ (parse-namestring base)))) |
|
105 |
+ (fw.lu:closing |
|
106 |
+ (connect-streams (drakma:http-request url :want-stream t) |
|
107 |
+ s |
|
108 |
+ :background nil))))) |
|
109 |
+ |
|
110 |
+(format *standard-output* "~&~s ~s ~s ~s ~s ~s~%" |
|
111 |
+ SB-EXT:*CORE-PATHNAME* |
|
112 |
+ SB-EXT:*RUNTIME-PATHNAME* |
|
113 |
+ asdf/user::*nil-pathname* |
|
114 |
+ *COMPILE-FILE-PATHNAME* |
|
115 |
+ *LOAD-PATHNAME* |
|
116 |
+ swank/sbcl::*buffer-name* |
|
117 |
+ ) |
|
118 |
+ |
|
119 |
+#+(or) |
|
120 |
+(asdf:initialize-source-registry |
|
121 |
+ (fwoar.git-systems:define-dir-deps () |
|
122 |
+ (:git"alimenta" "git@git.fiddlerwoaroof.com:u/edwlan/alimenta.git" "master") |
|
123 |
+ (:git"alimenta-feed-archive" "git@git.fiddlerwoaroof.com:u/edwlan/alimenta-feed-archive.git" "master") |
|
124 |
+ (:git"data-lens" "git@git.fiddlerwoaroof.com:data-lens.git" "master") |
|
125 |
+ (:git"collection-class" "git@git.fiddlerwoaroof.com:u/edwlan/collection-class.git" "master"))) |