git.fiddlerwoaroof.com
Browse code

feat: eclector test, reddit-dataset-creator

Edward authored on 07/01/2021 10:03:05
Showing 2 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,103 @@
1
+(defpackage :fwoar.lisp-sandbox.eclector-test
2
+  (:use :cl )
3
+  (:export ))
4
+(in-package :fwoar.lisp-sandbox.eclector-test)
5
+
6
+(defvar *verbose* nil)
7
+
8
+(defclass my-reader ()
9
+  ())
10
+(defvar *eclector-client*
11
+  (make-instance 'my-reader))
12
+
13
+(fw.lu:defclass+ fw-package ()
14
+  ((%name :initarg :name :reader fw-name)
15
+   (%symbols :reader fw-symbols :initform (make-hash-table :test 'equal))))
16
+
17
+
18
+(defvar *fw-packages*
19
+  (alexandria:alist-hash-table (list (cons "KEYWORD" (fw-package "KEYWORD")))
20
+                               :test 'equal))
21
+
22
+(defmethod print-object ((o fw-package) s)
23
+  (print-unreadable-object (o s :type t :identity t)
24
+    (format s "~s with ~d symbols"
25
+            (fw-name o)
26
+            (hash-table-count (fw-symbols o)))))
27
+
28
+(defgeneric my-intern (symbol package)
29
+  (:method ((symbol string) (package package))
30
+    (intern symbol package))
31
+  (:method ((symbol string) (package fw-package))
32
+    (when *verbose*
33
+      (:printv symbol package))
34
+    (if (eql package (ensure-package "KEYWORD"))
35
+        (intern symbol (find-package "KEYWORD"))
36
+        (fw.lu:if-let* ((s-p (find-package (fw-name package)))
37
+                        (s-s (and s-p (find-symbol symbol s-p)))
38
+                        (_ (not (eql s-p (symbol-package s-s)))))
39
+          (my-intern symbol
40
+                     (ensure-package
41
+                      (package-name
42
+                       (symbol-package s-s))))
43
+          (alexandria:ensure-gethash symbol
44
+                                     (fw-symbols package)
45
+                                     (make-symbol symbol))))))
46
+
47
+(defgeneric my-find-symbol (symbol package)
48
+  (:method ((symbol string) (package package))
49
+    (find-symbol symbol package))
50
+  (:method ((symbol string) (package fw-package))
51
+    (gethash symbol (fw-symbols package))))
52
+
53
+(defun my-find-package (package)
54
+  (gethash package *fw-packages*))
55
+
56
+(defun ensure-package (package)
57
+  (etypecase package
58
+    (fw-package package)
59
+    (package (ensure-package (package-name package)))
60
+    (symbol (ensure-package (symbol-name package)))
61
+    (string (alexandria:ensure-gethash package *fw-packages*
62
+                                       (fw-package package)))))
63
+
64
+(defmethod eclector.reader:interpret-symbol
65
+    ((client my-reader) input-stream package-indicator symbol-name internp)
66
+  (let ((package (case package-indicator
67
+                   (:current (ensure-package *package*))
68
+                   (:keyword (ensure-package "KEYWORD"))
69
+                   (t        (or (ensure-package package-indicator))))))
70
+    (if *verbose*
71
+        (:printv package-indicator package symbol-name (my-intern symbol-name package))
72
+        (my-intern symbol-name package))))
73
+
74
+(defparameter *my-readtable*
75
+  (let ((readtable (eclector.readtable:copy-readtable eclector.reader:*readtable*)))
76
+    (eclector.readtable:set-dispatch-macro-character
77
+     readtable #\# #\+
78
+     (lambda (stream char parameter)
79
+       parameter
80
+       char
81
+       (:printv
82
+        eclector.reader:*client*
83
+        (let ((*verbose* t))
84
+          (cons 'when-feature
85
+                (eclector.reader:read stream t
86
+                                      nil t))))))
87
+    (eclector.readtable:set-dispatch-macro-character
88
+     readtable #\# #\-
89
+     (lambda (stream char parameter)
90
+       parameter
91
+       char
92
+       (:printv
93
+        eclector.reader:*client*
94
+        (let ((*verbose* t))
95
+          (cons 'unless-feature
96
+                (eclector.reader:read stream t
97
+                                      nil t))))))
98
+    readtable))
99
+
100
+(defun my-read-from-string (string)
101
+  (let ((eclector.reader:*client* *eclector-client*)
102
+        (eclector.reader:*readtable* *my-readtable*))
103
+    (eclector.reader:read-from-string string)))
0 104
new file mode 100644
... ...
@@ -0,0 +1,125 @@
1
+(defpackage :fwoar.lisp-sandbox.reddit-dataset-creator
2
+  (:use :cl )
3
+  (:export ))
4
+(in-package :fwoar.lisp-sandbox.reddit-dataset-creator)
5
+
6
+(defun get-data (subreddit)
7
+  (alimenta:to-feed
8
+   (fw.lu:closing
9
+       (plump:parse
10
+        (drakma:http-request (puri:merge-uris (format nil "~(~a~).rss?limit=100" subreddit)
11
+                                              "https://www.reddit.com/r/")
12
+                             :want-stream t)))))
13
+
14
+(defun get-data-json (subreddit)
15
+  (fw.lu:closing
16
+      (yason:parse
17
+       (drakma:http-request (puri:merge-uris (format nil "~(~a~).json?limit=100" subreddit)
18
+                                             "https://www.reddit.com/r/")
19
+                            :want-stream t))))
20
+
21
+(defun subreddit-json-image-urls (subreddit &optional (fetch 'get-data-json-cached))
22
+  (data-lens.transducers:transduce
23
+   (data-lens:•
24
+    (data-lens.transducers:mapping
25
+     (data-lens:key "data"))
26
+    #+nil
27
+    (data-lens.transducers:mapping
28
+     (lambda (it)
29
+       (format t "~&~s~%" (gethash "url" it))
30
+       it))
31
+    (data-lens.transducers:mapping
32
+     (data-lens:• (lambda (it)
33
+                    (fw.lu:dive '("source" "url")
34
+                                it))
35
+                  (data-lens:applicable-when (data-lens:element 0)
36
+                                             (complement #'null))
37
+                  (data-lens:sorted '< :key (data-lens:key "width"))
38
+                  (lambda (it)
39
+                    (fw.lu:dive '("preview" "images")
40
+                                it))))
41
+    (data-lens.transducers:filtering 'identity))
42
+   'data-lens.transducers:list-builder
43
+   (fw.lu:dive '("data" "children")
44
+               (funcall fetch subreddit))))
45
+
46
+(defun subreddit-image-urls (subreddit &optional (fetch 'get-data-cached))
47
+  (data-lens.transducers:transduce
48
+   (data-lens:•
49
+    (data-lens.transducers:mapping
50
+     (data-lens:•
51
+      (lambda (it)
52
+        (lquery:$ (initialize it) "a" (attr "href")))
53
+      'alimenta:content))
54
+    (data-lens.transducers:catting)
55
+    (data-lens.transducers:filtering
56
+     (data-lens:regex-match "[.](jpe?g|png)$")))
57
+   'data-lens.transducers:list-builder
58
+   (funcall fetch subreddit)))
59
+
60
+(defvar *reset-cache* nil)
61
+(defun get-data-cached (subreddit)
62
+  (let ((cache (load-time-value (make-hash-table))))
63
+    (when *reset-cache*
64
+      (clrhash cache)
65
+      (setf *reset-cache* nil))
66
+    (alexandria:ensure-gethash subreddit
67
+                               cache
68
+                               (get-data subreddit))))
69
+
70
+(defun get-data-json-cached (subreddit)
71
+  (let ((cache (load-time-value (make-hash-table))))
72
+    (when *reset-cache*
73
+      (clrhash cache)
74
+      (setf *reset-cache* nil))
75
+    (alexandria:ensure-gethash subreddit
76
+                               cache
77
+                               (get-data-json subreddit))))
78
+
79
+
80
+(defun dump-stream (input output &optional fn)
81
+  (with-open-stream (echo (make-echo-stream input output))
82
+    (loop with buffer-length = 1000
83
+          with buffer = (make-array buffer-length :element-type 'character)
84
+          for read-chars = (read-sequence buffer echo)
85
+          do (when fn
86
+               (funcall fn buffer buffer-length))
87
+             (unless (< read-chars buffer-length)
88
+               (return nil)))))
89
+
90
+(defun connect-streams (input output &key (background t) fn)
91
+  "This reads from input and writes output until the end of input is found."
92
+  (dump-stream input output fn))
93
+
94
+(defun store-image (base category url)
95
+  (let* ((uri (puri:parse-uri url))
96
+         (fn (pathname-name
97
+              (parse-namestring
98
+               (puri:uri-path uri)))))
99
+    (alexandria:with-output-to-file (s (ensure-directories-exist
100
+                                        (merge-pathnames (make-pathname
101
+                                                          :directory (list :relative
102
+                                                                           (string category))
103
+                                                          :name fn)
104
+                                                         (parse-namestring base))))
105
+      (fw.lu:closing
106
+          (connect-streams (drakma:http-request url :want-stream t)
107
+                           s
108
+                           :background nil)))))
109
+
110
+(format *standard-output* "~&~s ~s ~s ~s ~s ~s~%"
111
+        SB-EXT:*CORE-PATHNAME*
112
+        SB-EXT:*RUNTIME-PATHNAME*
113
+        asdf/user::*nil-pathname*
114
+        *COMPILE-FILE-PATHNAME*
115
+        *LOAD-PATHNAME*
116
+        swank/sbcl::*buffer-name*
117
+        )
118
+
119
+#+(or)
120
+(asdf:initialize-source-registry
121
+ (fwoar.git-systems:define-dir-deps ()
122
+     (:git"alimenta"              "git@git.fiddlerwoaroof.com:u/edwlan/alimenta.git"              "master")
123
+   (:git"alimenta-feed-archive" "git@git.fiddlerwoaroof.com:u/edwlan/alimenta-feed-archive.git" "master")
124
+   (:git"data-lens"             "git@git.fiddlerwoaroof.com:data-lens.git"                      "master")
125
+   (:git"collection-class"      "git@git.fiddlerwoaroof.com:u/edwlan/collection-class.git"      "master")))