git.fiddlerwoaroof.com
Browse code

Add flexible parsing framework

Ed Langley authored on 28/01/2019 11:02:34
Showing 2 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,50 @@
1
+(defpackage :access-log-reader
2
+  (:shadowing-import-from :multi-fun :rest)
3
+  (:shadowing-import-from :data-lens :pick :defun-ct)
4
+  (:use :cl :multi-fun :data-lens :alexandria :fw.lu)
5
+  (:export ))
6
+(in-package :access-log-reader)
7
+
8
+(defun make-timestamp (record)
9
+  (let ((local-time:*default-timezone* local-time:+utc-zone+))
10
+    (destructuring-bind (day month year time) record
11
+      (local-time:with-decoded-timestamp (:hour hour :minute minute :sec second :nsec nsec) time
12
+        (local-time:encode-timestamp nsec second minute hour day month year)))))
13
+
14
+(defun extract-query-params (s)
15
+  (serapeum:mapply 'cons
16
+                   (map 'list 
17
+                        (serapeum:op (coerce (fwoar.string-utils:split #\= _ :count 2) 'list))
18
+                        (fwoar.string-utils:split #\& s))))
19
+(defparameter +access-log-parser+
20
+  (list (delimited-field #\:)
21
+        (delimited-field #\space) 
22
+        (as (delimited-field #\space) (lambda (s) (subseq s 1 (1- (length s)))))
23
+        (as (delimited-field #\space) (lambda (s) (unless (equal s "-") (parse-integer s))))
24
+        (as (delimited-field #\space) (lambda (s) (unless (equal s "-") (parse-integer s))))
25
+        (as (delimited-field #\space) #'parse-integer)
26
+        (as (delimited-field #\space) (lambda (s) (unless (equal s "-") (parse-integer s))))
27
+        (as (subformat (v (ignore-char #\[)
28
+                          (as (delimited-field #\/) #'parse-integer)
29
+                          (as (delimited-field #\/) (serapeum:op (position _ local-time:+short-month-names+ :test 'equal)))
30
+                          (as (delimited-field #\:) #'parse-integer)
31
+                          (as (delimited-field #\]) (lambda (x) (local-time:parse-timestring (remove #\space x) :allow-missing-date-part t)))))
32
+            'make-timestamp)
33
+        (whitespace)
34
+        (as (delimited-field #\space) (lambda (s) (subseq s 1)))
35
+        (splat-result (as (delimited-field #\space)
36
+                          (alexandria:compose
37
+                           (data-lens:juxt 'quri:uri-path
38
+                                           (alexandria:compose 'extract-query-params 'quri:uri-query))
39
+                           'quri:uri)))
40
+        (delimited-field #\")
41
+        (delimited-field #\space)
42
+        (delimited-field #\space)
43
+        (delimited-field #\space)
44
+        (ignore-char #\")
45
+        (as (delimited-field #\") 'quri:uri)
46
+        (whitespace)
47
+        (rest)))
48
+
49
+(defun parse-log (f)
50
+  (parse-file +access-log-parser+ f))
0 51
new file mode 100644
... ...
@@ -0,0 +1,219 @@
1
+(defpackage :multi-fun
2
+  (:shadow :rest)
3
+  (:use :cl )
4
+  (:export
5
+   #:filter-functions
6
+   #:number-recognizer
7
+   #:delimited-field
8
+   #:ignore-char
9
+   #:whitespace
10
+   #:iso-8601-timestamp
11
+   #:parse-qs
12
+   #:rest
13
+   #:as
14
+   #:splat-result
15
+   #:defun
16
+   #:month-recognizer
17
+   #:take-field
18
+   #:parse-format
19
+   #:parse-file
20
+   #:subformat
21
+   #:treeify))
22
+(in-package :multi-fun)
23
+
24
+(defun filter-functions (funs list)
25
+  (loop
26
+     for fun in funs
27
+     for results = (loop
28
+                      for value in list
29
+                      for (result . num) = (multiple-value-list (funcall fun value))
30
+                      when result
31
+                      collect (list (car num) result))
32
+     when (and results (= (length results) (length list)))
33
+     collect fun))
34
+
35
+(defun number-recognizer (range)
36
+  (destructuring-bind (min max)
37
+      (etypecase range
38
+        (integer (list 0 range))
39
+        (list range))
40
+    (flet ((in-range-p (n)
41
+             (< min n (1+ max))))
42
+      (lambda (str)
43
+        (loop for digits
44
+           from (min (length str)
45
+                     (ceiling (log max 10)))
46
+           downto (if (<= min 0)
47
+                      1
48
+                      (max 1
49
+                           (floor (log min 10))))
50
+           for (result . rest) = (multiple-value-list (parse-integer str :end digits :junk-allowed t))
51
+           for relevant-result = (and result (in-range-p result))
52
+           until relevant-result
53
+           finally (when relevant-result
54
+                     (return (values-list
55
+                              (cons result rest)))))))))
56
+
57
+(defun delimited-field (delimiter)
58
+  (lambda (str)
59
+    (alexandria:if-let ((val (position delimiter str)))
60
+      (values (subseq str 0 val)
61
+              (min (length str)
62
+                   (1+ val)))
63
+      (values str
64
+              (length str)))))
65
+
66
+(defun ignore-char (char)
67
+  (lambda (str)
68
+    (when (char= (elt str 0) char)
69
+      (values 'drop 1))))
70
+
71
+(defun whitespace ()
72
+  (lambda (str)
73
+    (values 'drop
74
+            (position-if-not 'serapeum:whitespacep
75
+                             str))))
76
+
77
+(defun iso-8601-timestamp ()
78
+  (lambda (str)
79
+    (values (local-time:parse-timestring (subseq str 0 32))
80
+            32)))
81
+
82
+(defun parse-qs (str)
83
+  (serapeum:mapply 'cons
84
+                   (map 'list
85
+                        (serapeum:op (coerce (fwoar.string-utils:split #\= _ :count 2) 'list))
86
+                        (fwoar.string-utils:split #\& str))))
87
+
88
+(defun rest (&optional (as #'identity))
89
+  (lambda (str)
90
+    (values (funcall as str)
91
+            (length str))))
92
+
93
+(defun as (fun transform)
94
+  (flet ((do-transform (val) (funcall transform val)))
95
+    (lambda (str)
96
+      (fw.lu:transform-first-value (do-transform (funcall fun str))))))
97
+
98
+(defun splat-result (fun)
99
+  (flet ((splatize (val) (list* 'splat (alexandria:ensure-list val))))
100
+    (lambda (str)
101
+      (fw.lu:transform-first-value (splatize (funcall fun str))))))
102
+
103
+(defun month-recognizer (str)
104
+  (macrolet ((match-char (elt &body body)
105
+               (let ((cases (mapcar (serapeum:op
106
+                                      (if (listp _1)
107
+                                          `(,(elt (car _1) elt) (match-char ,(1+ elt) ,@_1))
108
+                                          `(,(elt _1 elt) (when (alexandria:starts-with-subseq ,_1 str)
109
+                                                            ,_1))))
110
+                                    body)))
111
+                 `(when (> (length str) ,elt)
112
+                    (case (elt str ,elt)
113
+                      ,@cases)))))
114
+    (alexandria:if-let ((value (match-char 0
115
+                                           ("April" "August")
116
+                                           "December"
117
+                                           "February"
118
+                                           ("January" ("July" "June"))
119
+                                           (("March" "May"))
120
+                                           "November"
121
+                                           "October"
122
+                                           "September")))
123
+      (values value (length value))
124
+      (values nil 0))))
125
+
126
+(defun take-field (fun strs)
127
+  (loop for str in strs
128
+     for (val chars-read) = (multiple-value-list (funcall fun str))
129
+     do (format t "~&VAL: ~s CHARS-READ: ~s" val chars-read)
130
+     collect (list val (subseq str chars-read))))
131
+
132
+(defun parse-format (funs str)
133
+  (loop
134
+     for line = str then (subseq line chars-read)
135
+     for fun in funs
136
+     for (val chars-read) = (multiple-value-list (funcall fun line))
137
+     for total-chars = chars-read then (+ total-chars chars-read)
138
+     if (and (consp val) (eql (car val) 'splat)) append (cdr val) into result
139
+     else when (not (eql val 'drop)) collect val into result
140
+     finally (return (values result
141
+                             total-chars))))
142
+
143
+(defmacro subformat ((v-sym &rest parsers) &body transform)
144
+  `(flet ((transform (,v-sym)
145
+            ,@(if transform
146
+                  transform
147
+                  (list v-sym))))
148
+     (lambda (str)
149
+       (fw.lu:transform-first-value (transform (parse-format (list ,@parsers) str))))))
150
+
151
+(defun parse-file (format file &optional (get-record #'read-line))
152
+  (let* ((records ())
153
+         (cur records))
154
+    (flet ((collect-record (record)
155
+             (let ((new-cdr (list record)))
156
+               (if cur
157
+                   (setf (cdr cur) new-cdr)
158
+                   (setf records new-cdr))
159
+               (setf cur new-cdr))))
160
+      (loop (let* ((line (funcall get-record file nil 'eof)))
161
+              (when (eql line 'eof)
162
+                (return records))
163
+              (with-simple-restart (skip-record "Skip line ~s" line)
164
+                (collect-record (coerce (parse-format format line)
165
+                                        'vector))))))))
166
+
167
+(defun treeify (strings)
168
+  (declare (optimize (speed 3))
169
+           (inline data-lens:over data-lens:transform-tail data-lens:applicable-when data-lens:of-min-length
170
+                   data-lens:on data-lens:over data-lens:slice data-lens:compress-runs
171
+                   data-lens:combine-matching-lists data-lens:juxt data-lens:element data-lens:sorted))
172
+  (let* ((strip-prefixes
173
+          (alexandria:compose
174
+           (data-lens:over
175
+            (data-lens:transform-tail 
176
+             (data-lens:over
177
+              (data-lens:transform-head (data-lens:slice 1)))))
178
+           (data-lens:compress-runs
179
+            :collector 'data-lens:combine-matching-lists)))
180
+         (extract-keys-and-sort
181
+          (alexandria:compose (data-lens:over
182
+                               (data-lens:juxt (alexandria:compose
183
+                                                (data-lens:element 0)
184
+                                                (data-lens:element 0))
185
+                                               'identity))
186
+                              (data-lens:sorted 'char<
187
+                                                :key (alexandria:compose
188
+                                                      (data-lens:element 0)
189
+                                                      (data-lens:element 0)))))
190
+         (recurse
191
+          (data-lens:over
192
+           (data-lens:transform-tail
193
+            (data-lens:applicable-when
194
+             (lambda (x)
195
+               (if (equal (caar x) "")
196
+                   (cons (cons nil (cdar x))
197
+                         (treeify (cdr x)))
198
+                   (treeify x)))
199
+             (data-lens:of-min-length 2)))))
200
+         (step (data-lens:on strip-prefixes extract-keys-and-sort)))
201
+    
202
+    (funcall (alexandria:compose recurse step) strings)))
203
+
204
+
205
+
206
+
207
+(defparameter +months+
208
+  '("January"
209
+    "February"
210
+    "March"
211
+    "April"
212
+    "May"
213
+    "June"
214
+    "July"
215
+    "August"
216
+    "September"
217
+    "October"
218
+    "November"
219
+    "December"))