Browse code
feat: tabularize util, transduce derivation
fiddlerwoaroof authored on 12/12/2020 18:09:06
Showing 2 changed files
Showing 2 changed files
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,66 @@ |
1 |
+(defpackage :tabularize |
|
2 |
+ (:use :cl :alexandria :fw.lu :fwoar.string-utils :net.didierverna.clon) |
|
3 |
+ (:export )) |
|
4 |
+(in-package :tabularize) |
|
5 |
+ |
|
6 |
+(defun read-n-lines (lines stream) |
|
7 |
+ (loop for n below lines |
|
8 |
+ for line = (read-line stream nil) |
|
9 |
+ while line |
|
10 |
+ collect line)) |
|
11 |
+ |
|
12 |
+(defun maximize-cell (part-lens) |
|
13 |
+ (reduce (lambda (accum next) |
|
14 |
+ (map 'vector #'max accum next)) |
|
15 |
+ part-lens |
|
16 |
+ :initial-value (fill (make-array (length (car part-lens))) |
|
17 |
+ 0))) |
|
18 |
+ |
|
19 |
+(defun left-pad (str len) |
|
20 |
+ (concatenate 'string |
|
21 |
+ (make-string (- len (length str)) :initial-element #\space) |
|
22 |
+ str)) |
|
23 |
+ |
|
24 |
+(defun normalize-chunk (lines separator) |
|
25 |
+ (let* ((parts (mapcar (lambda (line) (split separator line)) |
|
26 |
+ lines)) |
|
27 |
+ (part-lens (mapcar (lambda (line) |
|
28 |
+ (map 'vector #'length line)) |
|
29 |
+ parts)) |
|
30 |
+ (pad-to (maximize-cell part-lens))) |
|
31 |
+ (map 'list (lambda (line) |
|
32 |
+ (map 'list (lambda (part len) (left-pad part (1+ len))) |
|
33 |
+ line |
|
34 |
+ pad-to)) |
|
35 |
+ parts))) |
|
36 |
+ |
|
37 |
+(defun tabularize (stream &key (separator #\tab) (chunk-length 25)) |
|
38 |
+ (loop for lines = (read-n-lines chunk-length stream) |
|
39 |
+ for normalized = (normalize-chunk lines separator) |
|
40 |
+ while lines |
|
41 |
+ do (format t "~&~{~{~a~}~%~}" normalized)) |
|
42 |
+ (values)) |
|
43 |
+ |
|
44 |
+(defsynopsis () |
|
45 |
+ (text :contents "A program for tabularizing data") |
|
46 |
+ (stropt :short-name "s" :long-name "separator" |
|
47 |
+ :description "Separator between fields: must be a single character" |
|
48 |
+ :default-value (format nil "~c" #\tab)) |
|
49 |
+ (lispobj :short-name "c" :long-name "chunk-length" |
|
50 |
+ :description "The number of lines to format as a unit" |
|
51 |
+ :typespec 'positive-integer |
|
52 |
+ :default-value 25) |
|
53 |
+ (flag :long-name "help" |
|
54 |
+ :short-name "h" |
|
55 |
+ :description "Print help")) |
|
56 |
+ |
|
57 |
+(defun main () |
|
58 |
+ (make-context) |
|
59 |
+ (cond ((getopt :long-name "help") (help)) |
|
60 |
+ (t (tabularize *standard-input* |
|
61 |
+ :separator (getopt :long-name "separator") |
|
62 |
+ :chunk-length (getopt :long-name "chunk-length"))))) |
|
63 |
+ |
|
64 |
+(defun make-executable () |
|
65 |
+ (dump "tabularize" main |
|
66 |
+ :compression 8)) |
0 | 67 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,266 @@ |
1 |
+(defun compose (function &rest more-functions) |
|
2 |
+ "Returns a function composed of FUNCTION and MORE-FUNCTIONS that applies its |
|
3 |
+arguments to to each in turn, starting from the rightmost of MORE-FUNCTIONS, |
|
4 |
+and then calling the next one with the primary value of the last." |
|
5 |
+ (declare (optimize (speed 3) (safety 1) (debug 1))) |
|
6 |
+ (reduce (lambda (f g) |
|
7 |
+ (let ((f (ensure-function f)) |
|
8 |
+ (g (ensure-function g))) |
|
9 |
+ (lambda (&rest arguments) |
|
10 |
+ (declare (dynamic-extent arguments)) |
|
11 |
+ (funcall f (apply g arguments))))) |
|
12 |
+ more-functions |
|
13 |
+ :initial-value function)) |
|
14 |
+ |
|
15 |
+(defun make-snoc () |
|
16 |
+ (vector nil nil)) |
|
17 |
+(defun add-to-snoc (acc a) |
|
18 |
+ (if (elt acc 1) |
|
19 |
+ (let* ((to-build (elt acc 1)) |
|
20 |
+ (updated (push a (cdr to-build)))) |
|
21 |
+ (setf (elt acc 1) (cdr to-build))) |
|
22 |
+ (let ((new (list a))) |
|
23 |
+ (setf (elt acc 0) new |
|
24 |
+ (elt acc 1) new))) |
|
25 |
+ acc) |
|
26 |
+(defun desnoc (acc) |
|
27 |
+ (elt acc 0)) |
|
28 |
+ |
|
29 |
+(defun 2* (it) |
|
30 |
+ (* 2 it)) |
|
31 |
+ |
|
32 |
+;; mapcar: conses up three lists, but keeps the steps separate |
|
33 |
+cl-user> (mapcar '1+ |
|
34 |
+ (mapcar '2* |
|
35 |
+ (mapcar 'parse-integer |
|
36 |
+ '("234" "345" "567" "213")))) |
|
37 |
+(469 691 1135 427) |
|
38 |
+ |
|
39 |
+;; reduce: conses up one list, but at the expense of mixing all the steps together |
|
40 |
+cl-user> (desnoc |
|
41 |
+ (reduce (lambda (acc a) |
|
42 |
+ (add-to-snoc acc |
|
43 |
+ (1+ (2* (parse-integer a))))) |
|
44 |
+ '("234" "345" "567" "213") |
|
45 |
+ :initial-value (make-snoc))) |
|
46 |
+(469 691 1135 427) |
|
47 |
+ |
|
48 |
+;; minor reshuffling to separate "building up the result" from the steps |
|
49 |
+cl-user> (flet ((it (rf) |
|
50 |
+ (lambda (acc a) |
|
51 |
+ (funcall rf |
|
52 |
+ acc (1+ (2* (parse-integer a))))))) |
|
53 |
+ (desnoc |
|
54 |
+ (reduce (it 'add-to-snoc) |
|
55 |
+ '("234" "345" "567" "213") |
|
56 |
+ :initial-value (make-snoc)))) |
|
57 |
+(469 691 1135 427) |
|
58 |
+ |
|
59 |
+;; extract one step into its own function |
|
60 |
+cl-user> (labels ((1+-each (rf) |
|
61 |
+ (lambda (acc next) |
|
62 |
+ (funcall rf acc (1+ next)))) |
|
63 |
+ (it (rf) |
|
64 |
+ (lambda (acc next) |
|
65 |
+ (funcall (1+-each rf) acc (2* (parse-integer next)))))) |
|
66 |
+ (desnoc |
|
67 |
+ (reduce (it 'add-to-snoc) |
|
68 |
+ '("234" "345" "567" "213") |
|
69 |
+ :initial-value (make-snoc)))) |
|
70 |
+(469 691 1135 427) |
|
71 |
+ |
|
72 |
+;; continue the pattern, still only two lists are built, instead of three |
|
73 |
+cl-user> (labels ((1+-each (rf) |
|
74 |
+ (lambda (acc next) |
|
75 |
+ (funcall rf acc (1+ next)))) |
|
76 |
+ (2*-each (rf) |
|
77 |
+ (lambda (acc next) |
|
78 |
+ (funcall rf acc (2* next)))) |
|
79 |
+ (parse-integer-each (rf) |
|
80 |
+ (lambda (acc next) |
|
81 |
+ (funcall rf acc (parse-integer next)))) |
|
82 |
+ (it (rf) |
|
83 |
+ (lambda (acc next) |
|
84 |
+ (funcall (parse-integer-each (2*-each (1+-each rf))) acc next)))) |
|
85 |
+ (desnoc |
|
86 |
+ (reduce (it 'add-to-snoc) |
|
87 |
+ '("234" "345" "567" "213") |
|
88 |
+ :initial-value (make-snoc)))) |
|
89 |
+(469 691 1135 427) |
|
90 |
+ |
|
91 |
+;; inline IT |
|
92 |
+cl-user> (labels ((1+-each (rf) |
|
93 |
+ (lambda (acc next) |
|
94 |
+ (funcall rf acc (1+ next)))) |
|
95 |
+ (2*-each (rf) |
|
96 |
+ (lambda (acc next) |
|
97 |
+ (funcall rf acc (2* next)))) |
|
98 |
+ (parse-integer-each (rf) |
|
99 |
+ (lambda (acc next) |
|
100 |
+ (funcall rf acc (parse-integer next))))) |
|
101 |
+ (desnoc |
|
102 |
+ (reduce (parse-integer-each (2*-each (1+-each 'add-to-snoc))) |
|
103 |
+ '("234" "345" "567" "213") |
|
104 |
+ :initial-value (make-snoc)))) |
|
105 |
+(469 691 1135 427) |
|
106 |
+ |
|
107 |
+;; Abstract the "mapping" pattern |
|
108 |
+cl-user> (labels ((mapping (function) |
|
109 |
+ (lambda (rf) |
|
110 |
+ (lambda (acc next) |
|
111 |
+ (funcall rf acc (funcall function next)))))) |
|
112 |
+ (desnoc |
|
113 |
+ (reduce (funcall (mapping #'parse-integer) |
|
114 |
+ (funcall (mapping #'2*) |
|
115 |
+ (funcall (mapping #'1+) |
|
116 |
+ 'add-to-snoc))) |
|
117 |
+ '("234" "345" "567" "213") |
|
118 |
+ :initial-value (make-snoc)))) |
|
119 |
+(469 691 1135 427) |
|
120 |
+ |
|
121 |
+;; re-express as composition of steps |
|
122 |
+cl-user> (labels ((mapping (function) |
|
123 |
+ (lambda (rf) |
|
124 |
+ (lambda (acc next) |
|
125 |
+ (funcall rf acc (funcall function next)))))) |
|
126 |
+ (desnoc |
|
127 |
+ (reduce (funcall (compose (mapping #'parse-integer) |
|
128 |
+ (mapping #'2*) |
|
129 |
+ (mapping #'1+)) |
|
130 |
+ 'add-to-snoc) |
|
131 |
+ '("234" "345" "567" "213") |
|
132 |
+ :initial-value (make-snoc)))) |
|
133 |
+(469 691 1135 427) |
|
134 |
+ |
|
135 |
+;; combine "add item to list" with "unwrap the result" |
|
136 |
+cl-user> (labels ((mapping (function) |
|
137 |
+ (lambda (rf) |
|
138 |
+ (lambda (acc next) |
|
139 |
+ (funcall rf acc (funcall function next))))) |
|
140 |
+ (builder (acc &optional (next nil next-p)) |
|
141 |
+ (if next-p |
|
142 |
+ (add-to-snoc acc next) |
|
143 |
+ (desnoc acc)))) |
|
144 |
+ (builder |
|
145 |
+ (reduce (funcall (compose (mapping #'parse-integer) |
|
146 |
+ (mapping #'2*) |
|
147 |
+ (mapping #'1+)) |
|
148 |
+ #'builder) |
|
149 |
+ '("234" "345" "567" "213") |
|
150 |
+ :initial-value (make-snoc)))) |
|
151 |
+(469 691 1135 427) |
|
152 |
+ |
|
153 |
+;; use the builder to get the initial value |
|
154 |
+cl-user> (labels ((mapping (function) |
|
155 |
+ (lambda (rf) |
|
156 |
+ (lambda (acc next) |
|
157 |
+ (funcall rf acc (funcall function next))))) |
|
158 |
+ (builder (&optional (acc nil acc-p) (next nil next-p)) |
|
159 |
+ (cond (next-p (add-to-snoc acc next)) |
|
160 |
+ (acc-p (desnoc acc)) |
|
161 |
+ (t (make-snoc))))) |
|
162 |
+ (builder |
|
163 |
+ (reduce (funcall (compose (mapping #'parse-integer) |
|
164 |
+ (mapping #'2*) |
|
165 |
+ (mapping #'1+)) |
|
166 |
+ #'builder) |
|
167 |
+ '("234" "345" "567" "213") |
|
168 |
+ :initial-value (builder)))) |
|
169 |
+(469 691 1135 427) |
|
170 |
+ |
|
171 |
+;; abstract a TRANSDUCE operation: now the transformation is built up step-wise, and separated from |
|
172 |
+;; the parts that build up the resulting data structure |
|
173 |
+cl-user> (labels ((mapping (function) |
|
174 |
+ (lambda (rf) |
|
175 |
+ (lambda (acc next) |
|
176 |
+ (funcall rf acc (funcall function next))))) |
|
177 |
+ (transduce (xf build seq) |
|
178 |
+ (funcall build |
|
179 |
+ (reduce (funcall xf build) seq :initial-value (funcall build))))) |
|
180 |
+ (transduce (compose (mapping #'parse-integer) |
|
181 |
+ (mapping #'2*) |
|
182 |
+ (mapping #'1+)) |
|
183 |
+ (lambda (&optional (acc nil acc-p) (next nil next-p)) |
|
184 |
+ (cond (next-p (add-to-snoc acc next)) |
|
185 |
+ (acc-p (desnoc acc)) |
|
186 |
+ (t (make-snoc)))) |
|
187 |
+ '("234" "345" "567" "213"))) |
|
188 |
+(469 691 1135 427) |
|
189 |
+ |
|
190 |
+;; We can trivially switch data structures now |
|
191 |
+cl-user> (labels ((mapping (function) |
|
192 |
+ (lambda (rf) |
|
193 |
+ (lambda (acc next) |
|
194 |
+ (funcall rf acc (funcall function next))))) |
|
195 |
+ (transduce (xf build seq) |
|
196 |
+ (funcall build |
|
197 |
+ (reduce (funcall xf build) seq :initial-value (funcall build))))) |
|
198 |
+ (transduce (compose (mapping #'parse-integer) |
|
199 |
+ (mapping #'2*) |
|
200 |
+ (mapping #'1+)) |
|
201 |
+ (lambda (&optional (acc nil acc-p) (next nil next-p)) |
|
202 |
+ (cond (next-p (vector-push-extend next acc) acc) |
|
203 |
+ (acc-p acc) |
|
204 |
+ (t (make-array 0 :fill-pointer t :adjustable t)))) |
|
205 |
+ '("234" "345" "567" "213"))) |
|
206 |
+#(469 691 1135 427) |
|
207 |
+ |
|
208 |
+(ql:quickload :data-lens) |
|
209 |
+ |
|
210 |
+;; We can trivially switch data structures now |
|
211 |
+cl-user> (labels ((mapping (function) |
|
212 |
+ (lambda (rf) |
|
213 |
+ (lambda (acc next) |
|
214 |
+ (funcall rf acc (funcall function next))))) |
|
215 |
+ (transduce (xf build seq) |
|
216 |
+ (funcall build |
|
217 |
+ (reduce (funcall xf build) seq :initial-value (funcall build))))) |
|
218 |
+ (let ((result (transduce (compose (mapping (data-lens:juxt #'identity #'identity)) |
|
219 |
+ (mapping (data-lens:over #'parse-integer)) |
|
220 |
+ (mapping (data-lens:transform-head #'2*)) |
|
221 |
+ (mapping (data-lens:transform-head #'1+))) |
|
222 |
+ (lambda (&optional (acc nil acc-p) (next nil next-p)) |
|
223 |
+ (cond (next-p (destructuring-bind (k v) next |
|
224 |
+ (setf (gethash k acc) v)) acc) |
|
225 |
+ (acc-p acc) |
|
226 |
+ (t (make-hash-table)))) |
|
227 |
+ '("234" "345" "567" "213")))) |
|
228 |
+ (values result |
|
229 |
+ (alexandria:hash-table-alist result)))) |
|
230 |
+#<HASH-TABLE :TEST EQL :COUNT 4 {10075E2E13}> |
|
231 |
+((427 . 213) (1135 . 567) (691 . 345) (469 . 234)) |
|
232 |
+ |
|
233 |
+;; We can trivially switch data structures now |
|
234 |
+cl-user> (labels ((mapping (function) |
|
235 |
+ (lambda (rf) |
|
236 |
+ (lambda (acc next) |
|
237 |
+ (funcall rf acc (funcall function next))))) |
|
238 |
+ (filtering (predicate) |
|
239 |
+ (lambda (rf) |
|
240 |
+ (lambda (acc next) |
|
241 |
+ (if (funcall predicate next) |
|
242 |
+ (funcall rf acc next) |
|
243 |
+ acc)))) |
|
244 |
+ (catting () |
|
245 |
+ (lambda (rf) |
|
246 |
+ (lambda (acc next) |
|
247 |
+ (reduce rf next :initial-value acc)))) |
|
248 |
+ (transduce (xf build seq) |
|
249 |
+ (funcall build |
|
250 |
+ (reduce (funcall xf build) seq :initial-value (funcall build))))) |
|
251 |
+ (let ((result (transduce (compose (catting) |
|
252 |
+ (mapping #'parse-integer) |
|
253 |
+ (filtering (complement #'evenp)) |
|
254 |
+ (mapping (data-lens:juxt #'identity #'identity)) |
|
255 |
+ (mapping (data-lens:transform-head #'2*)) |
|
256 |
+ (mapping (data-lens:transform-head #'1+))) |
|
257 |
+ (lambda (&optional (acc nil acc-p) (next nil next-p)) |
|
258 |
+ (cond (next-p (destructuring-bind (k v) next |
|
259 |
+ (setf (gethash k acc) v)) acc) |
|
260 |
+ (acc-p acc) |
|
261 |
+ (t (make-hash-table)))) |
|
262 |
+ '(("234" "345") ("567" "213"))))) |
|
263 |
+ (values result |
|
264 |
+ (alexandria:hash-table-alist result)))) |
|
265 |
+#<HASH-TABLE :TEST EQL :COUNT 3 {100AA69173}> |
|
266 |
+((427 . 213) (1135 . 567) (691 . 345)) |