git.fiddlerwoaroof.com
Browse code

Optimize parser a bit; specify correct deps

Ed Langley authored on 25/07/2018 01:31:22
Showing 3 changed files
... ...
@@ -10,7 +10,8 @@
10 10
                #:serapeum
11 11
                #:smug 
12 12
                :local-time
13
-               :uuid)
13
+               :uuid
14
+               :fwoar.lisputils)
14 15
   :in-order-to ((test-op (test-op :cl-edn/test)))
15 16
   :components ((:file "package")
16 17
                (:file "edn" :depends-on ("package" "synthesize"))
... ...
@@ -1,5 +1,14 @@
1 1
 (in-package :edn)
2 2
 
3
+(defun .0-or-more (parser)
4
+  (lambda (input)
5
+    (loop
6
+       for remaining-input = input then (cdr result)
7
+       for result = (first (funcall parser remaining-input))
8
+       while (and (car result) (> (length remaining-input) 0))
9
+       collect (car result) into matches
10
+       finally (return (list (cons matches remaining-input))))))
11
+
3 12
 (defun .satisfies (predicate &rest args)
4 13
   (.bind (.item)
5 14
          (lambda (x)
... ...
@@ -18,20 +27,16 @@
18 27
            (.identity (cons x xs)))
19 28
          (.identity ())))
20 29
 
21
-(defun .one-or-more (parser)
22
-  (.let* ((x parser)
23
-          (y (.zero-or-more parser)))
24
-    (.identity (cons x y))))
25
-
26 30
 (defun .elements ()
27
-  (.zero-or-more (.progn (.s)
28
-                         (.element))))
31
+  (.0-or-more (.progn (.s)
32
+                      (.element))))
29 33
 
30 34
 (defun .s ()
31
-  (.zero-or-more
32
-   (.or (.whitespace)
33
-        (.comment)
34
-        (.discarded-element))))
35
+  (.first
36
+   (.0-or-more
37
+    (.or (.whitespace)
38
+         (.comment)
39
+         (.discarded-element)))))
35 40
 
36 41
 (defun .whitespace ()
37 42
   (.one-of '(#\space
... ...
@@ -40,42 +45,40 @@
40 45
              #\newline
41 46
              #\,)))
42 47
 
48
+(defmacro read-if (s test)
49
+  `(when (funcall (lambda (_)
50
+                    ,test)
51
+                  (peek-char nil ,s))
52
+     (read-char s)))
53
+
54
+(defun parse-whitespace (s)
55
+  (read-if s (member _ '(#\space #\, #\tab #\return #\newline))))
56
+
43 57
 (defun .comment ()
44 58
   (.let* ((result (.prog2 (.char= #\;)
45
-                          (.zero-or-more (.and (.not (.or (.char= #\newline)
46
-                                                          (.char= #\nul)))
47
-                                               (.item)))
59
+                          (.first
60
+                           (.0-or-more
61
+                            (.and (.not (.or (.char= #\newline)
62
+                                             (.char= #\nul)))
63
+                                  (.item))))
48 64
                           (.or (.char= #\newline)
49 65
                                (.not (.item))))))
50 66
     (.identity (list :comment (coerce result 'string)))))
51 67
 
52 68
 (defun .discarded-element ()
53 69
   (.progn (.string= "#_")
54
-          (.s)
55 70
           (.element)))
56 71
 
57 72
 (defun .alt (&rest r)
58 73
   (reduce '.plus r))
59 74
 
60
-(defun .compound-element-start ()
61
-  (.or (.string= "#{")
62
-       (.char= #\{)
63
-       (.char= #\[)
64
-       (.char= #\()
65
-       (.char= #\{)))
66
-
67
-(defun .compound-element-finish (closing)
68
-  (lambda ()
69
-    (.prog1 (.first (.elements))
70
-            (.s)
71
-            (.char= closing))))
72
-
73 75
 (defun .map-element ()
74 76
   (.prog2 (.char= #\{)
75 77
           (.progn (.s)
76
-                  (.zero-or-more (.let* ((first (.prog1 (.element) (.s)))
77
-                                         (second (.prog1 (.element) (.s))))
78
-                                   (.identity (list :pair first second)))))
78
+                  (.first
79
+                   (.0-or-more (.let* ((first (.prog1 (.element) (.s)))
80
+                                          (second (.prog1 (.element) (.s))))
81
+                                    (.identity (list :pair first second))))))
79 82
           (.char= #\})))
80 83
 
81 84
 (defun .between (start-parser end-parser element-parser)
... ...
@@ -156,31 +159,45 @@
156 159
                   (name (.name)))
157 160
             (.identity (list :keyword ns name)))))
158 161
 
162
+(defun .juxt (a b)
163
+  (.let* ((first a)
164
+          (second b))
165
+    (.identity (list first second))))
166
+
159 167
 (defun .name ()
160
-  (.first (.plus (.let* ((first (.name-start-1))
161
-                         (rest (.zero-or-more (.name-constituent))))
162
-                   (.identity (format nil "~c~{~c~}" first rest)))
163
-                 (.let* ((first (.name-start-2))
164
-                         (second (.satisfies (complement #'digit-char-p)))
165
-                         (rest (.zero-or-more (.name-constituent))))
166
-                   (.identity (format nil "~c~c~{~c~}" first second rest))))))
168
+  (.let* ((prefix (.or (.let* ((first (.name-start-1)))
169
+                         (.identity (string first)))
170
+                       (.let* ((first (.juxt (.name-start-2)
171
+                                             (.satisfies (complement #'digit-char-p)))))
172
+                         (.identity (coerce first 'string)))))
173
+          (suffix (.0-or-more (.name-constituent))))
174
+    (.identity (concatenate 'string prefix suffix))))
175
+
176
+(defun name-start-1-p (c)
177
+  (member c
178
+          '(#\! #\* #\? #\_
179
+            #\$ #\% #\& #\=)))
180
+
181
+(defun name-start-2-p (c)
182
+  (member c '(#\. #\- #\+)))
167 183
 
168 184
 (defun .name-start-1 ()
169
-  (.satisfies (lambda (x)
170
-                (or (alpha-char-p x)
171
-                    (member x '(#\! #\* #\? #\_ #\$ #\% #\& #\=))))))
185
+  (.or (.satisfies 'alpha-char-p)
186
+       (.one-of '(#\! #\* #\? #\_ #\$ #\% #\& #\=))))
172 187
 
173 188
 (defun .name-start-2 ()
174
-  (.satisfies (lambda (x)
175
-                (or (alpha-char-p x)
176
-                    (member x '(#\. #\- #\+))))))
189
+  (.one-of '(#\. #\- #\+)))
190
+
191
+(defun name-constituent-p (c)
192
+  (or (alpha-char-p c)
193
+      (digit-char-p c)
194
+      (name-start-1-p c)
195
+      (name-start-2-p c)
196
+      (member c '(#\# #\:))))
177 197
 
178 198
 (defun .name-constituent ()
179
-  (.or (.name-start-1)
180
-       (.name-start-2)
181
-       (.satisfies (lambda (x)
182
-                     (or (digit-char-p x)
183
-                         (member x '(#\# #\:)))))))
199
+  (.satisfies 'name-constituent-p))
200
+
184 201
 (defun apply-sign (sign num)
185 202
   (if sign
186 203
       (ecase sign
... ...
@@ -205,7 +222,7 @@
205 222
 (defun .frac ()
206 223
   (.let* ((nums (.first
207 224
                  (.progn (.char= #\.)
208
-                         (.zero-or-more (.digit))))))
225
+                         (.0-or-more (.digit))))))
209 226
     (.identity
210 227
      (if nums
211 228
          (let ((num (parse-integer (coerce nums 'string))))
... ...
@@ -236,7 +253,7 @@
236 253
       (typecase signed
237 254
         (integer (if (member flag '(nil #\N))
238 255
                      (.identity signed)
239
-                     (.fail)))
256
+                     (.identity (coerce signed 'double-float))))
240 257
         (float (if (member flag '(nil #\M))
241 258
                    (.identity signed)
242 259
                    (.fail)))))))
... ...
@@ -266,7 +283,7 @@
266 283
 (defun .cardinal ()
267 284
   (.let* ((nums (.or (.first
268 285
                       (.let* ((first (.non-zero-digit))
269
-                              (rest (.zero-or-more
286
+                              (rest (.0-or-more
270 287
                                      (.digit))))
271 288
                         (.identity (list* first rest))))
272 289
                      (.let* ((c (.digit)))
... ...
@@ -305,27 +322,83 @@
305 322
         (.not (.char= #\\))
306 323
         (.item)))
307 324
 
308
-(defun .string-escape ()
309
-  (.let* ((esc (.or (.char= #\")
310
-                    (.char= #\b)
311
-                    (.char= #\t)
312
-                    (.char= #\n)
313
-                    (.char= #\f)
314
-                    (.char= #\r)
315
-                    (.char= #\\))))
316
-    (.identity (format nil "\\~c" esc))))
325
+(defun translate-escape (c)
326
+  (ecase c
327
+    ((#\" #\\) c)
328
+    (#\t #\tab)
329
+    (#\n #\newline)
330
+    (#\r #\return)
331
+    (#\b #\backspace)
332
+    (#\f #.(code-char 12))))
333
+
334
+(defun parse-string-ending-old (s)
335
+  (let ((pos 0)
336
+        (done nil))
337
+    (flet ((consume-char ()
338
+             (prog1 (elt s pos)
339
+               (setf done (= pos (length s)))
340
+               (incf pos))))
341
+      (let ((result (loop
342
+                       for char = (serapeum:case-let (next (consume-char))
343
+                                    (#\\ (translate-escape (consume-char)))
344
+                                    (#\" nil)
345
+                                    (t next))
346
+                       while char 
347
+                       when (= pos (length s)) do (return nil)
348
+                       collect char)))
349
+        (if result
350
+            (values (coerce result 'string) pos)
351
+            (values nil 0))))))
352
+
353
+(defun translate-escapes (s)
354
+  (let ((parts (coerce (fwoar.string-utils:split #\\ s) 'list)))
355
+    (serapeum:string-join (list* (car parts)
356
+                                 (mapcan (lambda (part)
357
+                                           (list (translate-escape (elt part 0))
358
+                                                 (subseq part 1)))
359
+                                         (cdr parts))))))
360
+
361
+(defun parse-string-ending (s)
362
+  (declare (optimize (speed 3))
363
+           (type simple-string s))
364
+  (loop
365
+     for possible-quote = (position #\" s) then (position #\" s
366
+                                                          :start (1+ possible-quote))
367
+
368
+     while possible-quote
369
+     when (not (char= #\\ (aref s (1- possible-quote)))) do
370
+       (return (values (translate-escapes (subseq s 0 possible-quote))
371
+                       (1+ possible-quote)))))
317 372
 
318 373
 (defun combine (list)
319 374
   (format nil "~{~a~}" list))
320 375
 
376
+(define-condition invalid-string-ending (error)
377
+  ())
378
+
321 379
 (defun .string ()
322 380
   (.let* ((string (.prog2 (.char= #\")
323
-                          (.zero-or-more (.or (.string-char)
324
-                                              (.progn (.char= #\\)
325
-                                                      (.string-escape))))
381
+                          (.first
382
+                           (.0-or-more (.or (.string-char)
383
+                                            (.let* ((escape-char (.progn (.char= #\\)
384
+                                                                         (.string-escape))))
385
+                                              (.identity (translate-escape escape-char))))))
326 386
                           (.char= #\"))))
327 387
     (.identity (list :string (combine string)))))
328 388
 
389
+(defun .string-ending ()
390
+  (lambda (input)
391
+    (multiple-value-bind (ending count) (parse-string-ending input)
392
+      (if (> count 0)
393
+          (list (cons ending
394
+                      (subseq input count)))
395
+          nil))))
396
+
397
+(defun .string.old ()
398
+  (.let* ((string (.progn (.char= #\")
399
+                          (.string-ending))))
400
+    (.identity (list :string string))))
401
+
329 402
 (defun read-edn (s)
330 403
   (car
331 404
    (smug:parse (.prog1 (.elements)
... ...
@@ -32,6 +32,9 @@
32 32
   (should be float-equal
33 33
           -0.01d0
34 34
           (edn:read-edn "-0.1e-1"))
35
+  (should be float-equal
36
+          0d0
37
+          (edn:read-edn "0M"))
35 38
   (should be float-equal
36 39
           -0.01d0
37 40
           (edn:read-edn "-0.1e-1M"))
... ...
@@ -62,3 +65,51 @@
62 65
               (:map (:pair edn-primitives:nil edn-primitives:true)
63 66
                     (:pair edn-primitives:false edn-primitives:nil)))))
64 67
           (edn:read-edn "(#{[{nil true,false nil}]})")))
68
+
69
+(deftest maps ()
70
+  (should be equal
71
+          '(:map (:pair 1 2))
72
+          (edn:read-edn "{1 2 }"))
73
+  (should be equal
74
+          '(:map (:pair 1 2))
75
+          (edn:read-edn "{ 1 2}"))
76
+  (should be equal
77
+          '(:map (:pair 1 2))
78
+          (edn:read-edn "{1      2}"))
79
+  (should be equal
80
+          '(:map (:pair 1 2))
81
+          (edn:read-edn "{ 1 2 }"))
82
+  (should be equal
83
+          '(:map (:pair 1 2))
84
+          (edn:read-edn "{     1      2     }")))
85
+
86
+(deftest translate-escape ()
87
+  (flet ((translates-to (in out)
88
+           (should be eql
89
+                   out
90
+                   (edn::translate-escape in))))
91
+    (translates-to #\" #\")
92
+    (translates-to #\\ #\\)
93
+    (translates-to #\b (code-char 8))
94
+    (translates-to #\f (code-char 12))
95
+    (translates-to #\n (code-char 10))
96
+    (translates-to #\r (code-char 13))
97
+    (translates-to #\t (code-char 9))))
98
+
99
+(deftest .string-ending ()
100
+  (should be equal
101
+          "foobar"
102
+          ""
103
+          (smug:parse (edn::.string-ending) "foobar\""))
104
+  (should be equal
105
+          "foobar"
106
+          "asdf"
107
+          (smug:parse (edn::.string-ending) "foobar\"asdf"))
108
+  (should be equal
109
+          "foobar\"qwer"
110
+          "asdf"
111
+          (smug:parse (edn::.string-ending) "foobar\\\"qwer\"asdf"))
112
+  (should be equal
113
+          (format nil "foobar~%qwer")
114
+          "asdf"
115
+          (smug:parse (edn::.string-ending) "foobar\\nqwer\"asdf")))