Browse code
Optimize parser a bit; specify correct deps
Ed Langley authored on 25/07/2018 01:31:22
Showing 3 changed files
Showing 3 changed files
... | ... |
@@ -1,5 +1,14 @@ |
1 | 1 |
(in-package :edn) |
2 | 2 |
|
3 |
+(defun .0-or-more (parser) |
|
4 |
+ (lambda (input) |
|
5 |
+ (loop |
|
6 |
+ for remaining-input = input then (cdr result) |
|
7 |
+ for result = (first (funcall parser remaining-input)) |
|
8 |
+ while (and (car result) (> (length remaining-input) 0)) |
|
9 |
+ collect (car result) into matches |
|
10 |
+ finally (return (list (cons matches remaining-input)))))) |
|
11 |
+ |
|
3 | 12 |
(defun .satisfies (predicate &rest args) |
4 | 13 |
(.bind (.item) |
5 | 14 |
(lambda (x) |
... | ... |
@@ -18,20 +27,16 @@ |
18 | 27 |
(.identity (cons x xs))) |
19 | 28 |
(.identity ()))) |
20 | 29 |
|
21 |
-(defun .one-or-more (parser) |
|
22 |
- (.let* ((x parser) |
|
23 |
- (y (.zero-or-more parser))) |
|
24 |
- (.identity (cons x y)))) |
|
25 |
- |
|
26 | 30 |
(defun .elements () |
27 |
- (.zero-or-more (.progn (.s) |
|
28 |
- (.element)))) |
|
31 |
+ (.0-or-more (.progn (.s) |
|
32 |
+ (.element)))) |
|
29 | 33 |
|
30 | 34 |
(defun .s () |
31 |
- (.zero-or-more |
|
32 |
- (.or (.whitespace) |
|
33 |
- (.comment) |
|
34 |
- (.discarded-element)))) |
|
35 |
+ (.first |
|
36 |
+ (.0-or-more |
|
37 |
+ (.or (.whitespace) |
|
38 |
+ (.comment) |
|
39 |
+ (.discarded-element))))) |
|
35 | 40 |
|
36 | 41 |
(defun .whitespace () |
37 | 42 |
(.one-of '(#\space |
... | ... |
@@ -40,42 +45,40 @@ |
40 | 45 |
#\newline |
41 | 46 |
#\,))) |
42 | 47 |
|
48 |
+(defmacro read-if (s test) |
|
49 |
+ `(when (funcall (lambda (_) |
|
50 |
+ ,test) |
|
51 |
+ (peek-char nil ,s)) |
|
52 |
+ (read-char s))) |
|
53 |
+ |
|
54 |
+(defun parse-whitespace (s) |
|
55 |
+ (read-if s (member _ '(#\space #\, #\tab #\return #\newline)))) |
|
56 |
+ |
|
43 | 57 |
(defun .comment () |
44 | 58 |
(.let* ((result (.prog2 (.char= #\;) |
45 |
- (.zero-or-more (.and (.not (.or (.char= #\newline) |
|
46 |
- (.char= #\nul))) |
|
47 |
- (.item))) |
|
59 |
+ (.first |
|
60 |
+ (.0-or-more |
|
61 |
+ (.and (.not (.or (.char= #\newline) |
|
62 |
+ (.char= #\nul))) |
|
63 |
+ (.item)))) |
|
48 | 64 |
(.or (.char= #\newline) |
49 | 65 |
(.not (.item)))))) |
50 | 66 |
(.identity (list :comment (coerce result 'string))))) |
51 | 67 |
|
52 | 68 |
(defun .discarded-element () |
53 | 69 |
(.progn (.string= "#_") |
54 |
- (.s) |
|
55 | 70 |
(.element))) |
56 | 71 |
|
57 | 72 |
(defun .alt (&rest r) |
58 | 73 |
(reduce '.plus r)) |
59 | 74 |
|
60 |
-(defun .compound-element-start () |
|
61 |
- (.or (.string= "#{") |
|
62 |
- (.char= #\{) |
|
63 |
- (.char= #\[) |
|
64 |
- (.char= #\() |
|
65 |
- (.char= #\{))) |
|
66 |
- |
|
67 |
-(defun .compound-element-finish (closing) |
|
68 |
- (lambda () |
|
69 |
- (.prog1 (.first (.elements)) |
|
70 |
- (.s) |
|
71 |
- (.char= closing)))) |
|
72 |
- |
|
73 | 75 |
(defun .map-element () |
74 | 76 |
(.prog2 (.char= #\{) |
75 | 77 |
(.progn (.s) |
76 |
- (.zero-or-more (.let* ((first (.prog1 (.element) (.s))) |
|
77 |
- (second (.prog1 (.element) (.s)))) |
|
78 |
- (.identity (list :pair first second))))) |
|
78 |
+ (.first |
|
79 |
+ (.0-or-more (.let* ((first (.prog1 (.element) (.s))) |
|
80 |
+ (second (.prog1 (.element) (.s)))) |
|
81 |
+ (.identity (list :pair first second)))))) |
|
79 | 82 |
(.char= #\}))) |
80 | 83 |
|
81 | 84 |
(defun .between (start-parser end-parser element-parser) |
... | ... |
@@ -156,31 +159,45 @@ |
156 | 159 |
(name (.name))) |
157 | 160 |
(.identity (list :keyword ns name))))) |
158 | 161 |
|
162 |
+(defun .juxt (a b) |
|
163 |
+ (.let* ((first a) |
|
164 |
+ (second b)) |
|
165 |
+ (.identity (list first second)))) |
|
166 |
+ |
|
159 | 167 |
(defun .name () |
160 |
- (.first (.plus (.let* ((first (.name-start-1)) |
|
161 |
- (rest (.zero-or-more (.name-constituent)))) |
|
162 |
- (.identity (format nil "~c~{~c~}" first rest))) |
|
163 |
- (.let* ((first (.name-start-2)) |
|
164 |
- (second (.satisfies (complement #'digit-char-p))) |
|
165 |
- (rest (.zero-or-more (.name-constituent)))) |
|
166 |
- (.identity (format nil "~c~c~{~c~}" first second rest)))))) |
|
168 |
+ (.let* ((prefix (.or (.let* ((first (.name-start-1))) |
|
169 |
+ (.identity (string first))) |
|
170 |
+ (.let* ((first (.juxt (.name-start-2) |
|
171 |
+ (.satisfies (complement #'digit-char-p))))) |
|
172 |
+ (.identity (coerce first 'string))))) |
|
173 |
+ (suffix (.0-or-more (.name-constituent)))) |
|
174 |
+ (.identity (concatenate 'string prefix suffix)))) |
|
175 |
+ |
|
176 |
+(defun name-start-1-p (c) |
|
177 |
+ (member c |
|
178 |
+ '(#\! #\* #\? #\_ |
|
179 |
+ #\$ #\% #\& #\=))) |
|
180 |
+ |
|
181 |
+(defun name-start-2-p (c) |
|
182 |
+ (member c '(#\. #\- #\+))) |
|
167 | 183 |
|
168 | 184 |
(defun .name-start-1 () |
169 |
- (.satisfies (lambda (x) |
|
170 |
- (or (alpha-char-p x) |
|
171 |
- (member x '(#\! #\* #\? #\_ #\$ #\% #\& #\=)))))) |
|
185 |
+ (.or (.satisfies 'alpha-char-p) |
|
186 |
+ (.one-of '(#\! #\* #\? #\_ #\$ #\% #\& #\=)))) |
|
172 | 187 |
|
173 | 188 |
(defun .name-start-2 () |
174 |
- (.satisfies (lambda (x) |
|
175 |
- (or (alpha-char-p x) |
|
176 |
- (member x '(#\. #\- #\+)))))) |
|
189 |
+ (.one-of '(#\. #\- #\+))) |
|
190 |
+ |
|
191 |
+(defun name-constituent-p (c) |
|
192 |
+ (or (alpha-char-p c) |
|
193 |
+ (digit-char-p c) |
|
194 |
+ (name-start-1-p c) |
|
195 |
+ (name-start-2-p c) |
|
196 |
+ (member c '(#\# #\:)))) |
|
177 | 197 |
|
178 | 198 |
(defun .name-constituent () |
179 |
- (.or (.name-start-1) |
|
180 |
- (.name-start-2) |
|
181 |
- (.satisfies (lambda (x) |
|
182 |
- (or (digit-char-p x) |
|
183 |
- (member x '(#\# #\:))))))) |
|
199 |
+ (.satisfies 'name-constituent-p)) |
|
200 |
+ |
|
184 | 201 |
(defun apply-sign (sign num) |
185 | 202 |
(if sign |
186 | 203 |
(ecase sign |
... | ... |
@@ -205,7 +222,7 @@ |
205 | 222 |
(defun .frac () |
206 | 223 |
(.let* ((nums (.first |
207 | 224 |
(.progn (.char= #\.) |
208 |
- (.zero-or-more (.digit)))))) |
|
225 |
+ (.0-or-more (.digit)))))) |
|
209 | 226 |
(.identity |
210 | 227 |
(if nums |
211 | 228 |
(let ((num (parse-integer (coerce nums 'string)))) |
... | ... |
@@ -236,7 +253,7 @@ |
236 | 253 |
(typecase signed |
237 | 254 |
(integer (if (member flag '(nil #\N)) |
238 | 255 |
(.identity signed) |
239 |
- (.fail))) |
|
256 |
+ (.identity (coerce signed 'double-float)))) |
|
240 | 257 |
(float (if (member flag '(nil #\M)) |
241 | 258 |
(.identity signed) |
242 | 259 |
(.fail))))))) |
... | ... |
@@ -266,7 +283,7 @@ |
266 | 283 |
(defun .cardinal () |
267 | 284 |
(.let* ((nums (.or (.first |
268 | 285 |
(.let* ((first (.non-zero-digit)) |
269 |
- (rest (.zero-or-more |
|
286 |
+ (rest (.0-or-more |
|
270 | 287 |
(.digit)))) |
271 | 288 |
(.identity (list* first rest)))) |
272 | 289 |
(.let* ((c (.digit))) |
... | ... |
@@ -305,27 +322,83 @@ |
305 | 322 |
(.not (.char= #\\)) |
306 | 323 |
(.item))) |
307 | 324 |
|
308 |
-(defun .string-escape () |
|
309 |
- (.let* ((esc (.or (.char= #\") |
|
310 |
- (.char= #\b) |
|
311 |
- (.char= #\t) |
|
312 |
- (.char= #\n) |
|
313 |
- (.char= #\f) |
|
314 |
- (.char= #\r) |
|
315 |
- (.char= #\\)))) |
|
316 |
- (.identity (format nil "\\~c" esc)))) |
|
325 |
+(defun translate-escape (c) |
|
326 |
+ (ecase c |
|
327 |
+ ((#\" #\\) c) |
|
328 |
+ (#\t #\tab) |
|
329 |
+ (#\n #\newline) |
|
330 |
+ (#\r #\return) |
|
331 |
+ (#\b #\backspace) |
|
332 |
+ (#\f #.(code-char 12)))) |
|
333 |
+ |
|
334 |
+(defun parse-string-ending-old (s) |
|
335 |
+ (let ((pos 0) |
|
336 |
+ (done nil)) |
|
337 |
+ (flet ((consume-char () |
|
338 |
+ (prog1 (elt s pos) |
|
339 |
+ (setf done (= pos (length s))) |
|
340 |
+ (incf pos)))) |
|
341 |
+ (let ((result (loop |
|
342 |
+ for char = (serapeum:case-let (next (consume-char)) |
|
343 |
+ (#\\ (translate-escape (consume-char))) |
|
344 |
+ (#\" nil) |
|
345 |
+ (t next)) |
|
346 |
+ while char |
|
347 |
+ when (= pos (length s)) do (return nil) |
|
348 |
+ collect char))) |
|
349 |
+ (if result |
|
350 |
+ (values (coerce result 'string) pos) |
|
351 |
+ (values nil 0)))))) |
|
352 |
+ |
|
353 |
+(defun translate-escapes (s) |
|
354 |
+ (let ((parts (coerce (fwoar.string-utils:split #\\ s) 'list))) |
|
355 |
+ (serapeum:string-join (list* (car parts) |
|
356 |
+ (mapcan (lambda (part) |
|
357 |
+ (list (translate-escape (elt part 0)) |
|
358 |
+ (subseq part 1))) |
|
359 |
+ (cdr parts)))))) |
|
360 |
+ |
|
361 |
+(defun parse-string-ending (s) |
|
362 |
+ (declare (optimize (speed 3)) |
|
363 |
+ (type simple-string s)) |
|
364 |
+ (loop |
|
365 |
+ for possible-quote = (position #\" s) then (position #\" s |
|
366 |
+ :start (1+ possible-quote)) |
|
367 |
+ |
|
368 |
+ while possible-quote |
|
369 |
+ when (not (char= #\\ (aref s (1- possible-quote)))) do |
|
370 |
+ (return (values (translate-escapes (subseq s 0 possible-quote)) |
|
371 |
+ (1+ possible-quote))))) |
|
317 | 372 |
|
318 | 373 |
(defun combine (list) |
319 | 374 |
(format nil "~{~a~}" list)) |
320 | 375 |
|
376 |
+(define-condition invalid-string-ending (error) |
|
377 |
+ ()) |
|
378 |
+ |
|
321 | 379 |
(defun .string () |
322 | 380 |
(.let* ((string (.prog2 (.char= #\") |
323 |
- (.zero-or-more (.or (.string-char) |
|
324 |
- (.progn (.char= #\\) |
|
325 |
- (.string-escape)))) |
|
381 |
+ (.first |
|
382 |
+ (.0-or-more (.or (.string-char) |
|
383 |
+ (.let* ((escape-char (.progn (.char= #\\) |
|
384 |
+ (.string-escape)))) |
|
385 |
+ (.identity (translate-escape escape-char)))))) |
|
326 | 386 |
(.char= #\")))) |
327 | 387 |
(.identity (list :string (combine string))))) |
328 | 388 |
|
389 |
+(defun .string-ending () |
|
390 |
+ (lambda (input) |
|
391 |
+ (multiple-value-bind (ending count) (parse-string-ending input) |
|
392 |
+ (if (> count 0) |
|
393 |
+ (list (cons ending |
|
394 |
+ (subseq input count))) |
|
395 |
+ nil)))) |
|
396 |
+ |
|
397 |
+(defun .string.old () |
|
398 |
+ (.let* ((string (.progn (.char= #\") |
|
399 |
+ (.string-ending)))) |
|
400 |
+ (.identity (list :string string)))) |
|
401 |
+ |
|
329 | 402 |
(defun read-edn (s) |
330 | 403 |
(car |
331 | 404 |
(smug:parse (.prog1 (.elements) |
... | ... |
@@ -32,6 +32,9 @@ |
32 | 32 |
(should be float-equal |
33 | 33 |
-0.01d0 |
34 | 34 |
(edn:read-edn "-0.1e-1")) |
35 |
+ (should be float-equal |
|
36 |
+ 0d0 |
|
37 |
+ (edn:read-edn "0M")) |
|
35 | 38 |
(should be float-equal |
36 | 39 |
-0.01d0 |
37 | 40 |
(edn:read-edn "-0.1e-1M")) |
... | ... |
@@ -62,3 +65,51 @@ |
62 | 65 |
(:map (:pair edn-primitives:nil edn-primitives:true) |
63 | 66 |
(:pair edn-primitives:false edn-primitives:nil))))) |
64 | 67 |
(edn:read-edn "(#{[{nil true,false nil}]})"))) |
68 |
+ |
|
69 |
+(deftest maps () |
|
70 |
+ (should be equal |
|
71 |
+ '(:map (:pair 1 2)) |
|
72 |
+ (edn:read-edn "{1 2 }")) |
|
73 |
+ (should be equal |
|
74 |
+ '(:map (:pair 1 2)) |
|
75 |
+ (edn:read-edn "{ 1 2}")) |
|
76 |
+ (should be equal |
|
77 |
+ '(:map (:pair 1 2)) |
|
78 |
+ (edn:read-edn "{1 2}")) |
|
79 |
+ (should be equal |
|
80 |
+ '(:map (:pair 1 2)) |
|
81 |
+ (edn:read-edn "{ 1 2 }")) |
|
82 |
+ (should be equal |
|
83 |
+ '(:map (:pair 1 2)) |
|
84 |
+ (edn:read-edn "{ 1 2 }"))) |
|
85 |
+ |
|
86 |
+(deftest translate-escape () |
|
87 |
+ (flet ((translates-to (in out) |
|
88 |
+ (should be eql |
|
89 |
+ out |
|
90 |
+ (edn::translate-escape in)))) |
|
91 |
+ (translates-to #\" #\") |
|
92 |
+ (translates-to #\\ #\\) |
|
93 |
+ (translates-to #\b (code-char 8)) |
|
94 |
+ (translates-to #\f (code-char 12)) |
|
95 |
+ (translates-to #\n (code-char 10)) |
|
96 |
+ (translates-to #\r (code-char 13)) |
|
97 |
+ (translates-to #\t (code-char 9)))) |
|
98 |
+ |
|
99 |
+(deftest .string-ending () |
|
100 |
+ (should be equal |
|
101 |
+ "foobar" |
|
102 |
+ "" |
|
103 |
+ (smug:parse (edn::.string-ending) "foobar\"")) |
|
104 |
+ (should be equal |
|
105 |
+ "foobar" |
|
106 |
+ "asdf" |
|
107 |
+ (smug:parse (edn::.string-ending) "foobar\"asdf")) |
|
108 |
+ (should be equal |
|
109 |
+ "foobar\"qwer" |
|
110 |
+ "asdf" |
|
111 |
+ (smug:parse (edn::.string-ending) "foobar\\\"qwer\"asdf")) |
|
112 |
+ (should be equal |
|
113 |
+ (format nil "foobar~%qwer") |
|
114 |
+ "asdf" |
|
115 |
+ (smug:parse (edn::.string-ending) "foobar\\nqwer\"asdf"))) |