git.fiddlerwoaroof.com
Browse code

Initialz commit: working lexer, partial parser

fiddlerwoaroof authored on 30/09/2016 09:29:26
Showing 6 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1 @@
1
+This is the stub README.txt for the "php_pre" project.
0 2
new file mode 100644
... ...
@@ -0,0 +1,111 @@
1
+(in-package :php.lexer)
2
+
3
+(eval-when (:compile-toplevel :execute :load-toplevel)
4
+  (ql:quickload :osicat)
5
+  (osicat-posix:setenv "CC" "clang") 
6
+  (ql:quickload :net.didierverna.clon))
7
+
8
+(net.didierverna.clon:defsynopsis (:postfix "PHP-FILE")
9
+  (text :contents "lex and dump the lexical structure of a php file")
10
+  (flag :short-name "t" :long-name "tokens-only"
11
+	:description "Show only tokens")
12
+  (flag :short-name "r" :long-name "show-remainder"
13
+	:description "Show leftovers after parsing")
14
+  (flag :short-name "h" :long-name "help"
15
+	:description "Show this help"))
16
+
17
+
18
+;; (defsynopsis (:postfix "TIMESHEETS ...")
19
+;;   (text :contents "A program for managing logs of hours worked")
20
+;;   (group (:header "Display options")
21
+;;          (flag :short-name "s" :long-name "status"
22
+;;                :description "Print a short summary of work status")
23
+;;          (flag :short-name "W"
24
+;;                :long-name "ignore-whitespace"
25
+;;                :description "Ignore whitespace errors in input")
26
+;;          (flag :short-name "i" :long-name "interactive"
27
+;;                :description "Run interactively"))
28
+;;   (group (:header "Sort options")
29
+;;          (flag :short-name "r"
30
+;;                :long-name "reverse"
31
+;;                :description "Reverse the sort direction")
32
+;;          (flag :short-name "c"
33
+;;                :long-name "client"
34
+;;                :description "Sort records by client") 
35
+;;          (flag :short-name "dr"
36
+;;                :long-name "daily-report"
37
+;;                :description "Print a daily report for the passed logs"))
38
+;;   (group (:header "Freshbooks")
39
+;;          (flag :long-name "post-hours"
40
+;;                :description "Post hours to freshbooks (requires manual setup of Freshbooks keys)"))
41
+;;   (group (:header "Self-test options")
42
+;;          (flag :long-name "dependencies"
43
+;;                :description "Graph the dependencies of this project")
44
+;;          (flag :long-name "run-tests"
45
+;;                :description "Run the tests")
46
+;;          (enum :long-name "output-style"
47
+;;                :description "The kind of output to produce"
48
+;;                :default-value :normal
49
+;;                :enum '(:xunit :normal)))
50
+;;   (group (:header "Reformat options")
51
+;;          (flag :long-name "reformat-file"
52
+;;                :short-name "f"
53
+;;                :description "Read the current timesheet file and dump, correcting any whitespace or formatting errors"))
54
+;;   (group (:header "Generic options")
55
+;;          (flag :short-name "v" :long-name "version"
56
+;;                :description "Show the program version")
57
+;;))
58
+
59
+;; (defun pprint-log-main ()
60
+;;   (net.didierverna.clon:make-context)
61
+;;   (tagbody
62
+;;     start
63
+;;     (restart-case
64
+;;       (cond
65
+;;         ((getopt :long-name "help") (help))
66
+;;         ((getopt :long-name "version") (show-version))
67
+;;         ((getopt :long-name "dependencies") (format t (tempores.package-grapher::graph-tempores-packages)))
68
+;;         ((getopt :long-name "post-hours") (let ((*print-pretty* nil))
69
+;;                                             (loop for item in (tempores.freshbooks::post-time-entries-main)
70
+;;                                                   do (format t "Posted an entry")
71
+;;                                                   do (plump:serialize item)
72
+;;                                                   finally (format t "Don't forget to archive time file."))))
73
+;;         ((getopt :long-name "run-tests") (tests-main (getopt :long-name "output-style")))
74
+;;         ((getopt :long-name "reformat-file") (reformat-main))
75
+;;         ((getopt :long-name "daily-report") (print-daily-report (remainder)))
76
+;;         (t (with-tempores-configuration ()
77
+;;              (pprint-log
78
+;;                (remainder)
79
+;;                :client (getopt :long-name "client")
80
+;;                :interactive (getopt :long-name "interactive")
81
+;;                :ignore-whitespace (getopt :long-name "ignore-whitespace")
82
+;;                :status (getopt :long-name "status")
83
+;;                :reverse (getopt :long-name "reverse")))))
84
+;;       (retry () (go start))
85
+;;       (abort ()))))
86
+
87
+(defun main ()
88
+  (net.didierverna.clon:make-context)
89
+  (cond
90
+    ((net.didierverna.clon:getopt :long-name "help") (net.didierverna.clon:help))
91
+    (t (let* ((*print-pretty* t)
92
+	      (*print-right-margin* 80)
93
+	      (tokens-only (net.didierverna.clon:getopt :long-name "tokens-only"))
94
+	      (show-remainder (net.didierverna.clon:getopt :long-name "show-remainder")))
95
+	 (multiple-value-bind (result leftovers)
96
+	     (parse (.input-file)
97
+		    (slurp-file (car (net.didierverna.clon:remainder))))
98
+	   (when tokens-only
99
+	     (setf result (remove-if-not (lambda (x) (eq x :token))
100
+					 result :key #'car))) 
101
+	   
102
+	   (print result)
103
+	   (when show-remainder
104
+	     (fresh-line)
105
+	     (terpri)
106
+	     (print leftovers)))))))
107
+
108
+(defun make-executable ()
109
+  (net.didierverna.clon:dump "php-lex" main
110
+        :compression 8
111
+        :purify t))
0 112
new file mode 100644
... ...
@@ -0,0 +1,5 @@
1
+;;;; package.lisp
2
+
3
+(defpackage #:php_pre
4
+  (:use #:cl #:smug))
5
+
0 6
new file mode 100644
... ...
@@ -0,0 +1,314 @@
1
+(defpackage :php.parser
2
+  (:use :cl :smug :serapeum :alexandria))
3
+
4
+(in-package :php.parser)
5
+
6
+'(:token (<token-type> *value))
7
+
8
+(defmacro if-token (token &body body)
9
+  (once-only (token)
10
+    `(if (and (consp ,token)
11
+	      (eq (car ,token)
12
+		  :token))
13
+	 (progn ,@body)
14
+	 (error (format nil "~s is not a token" ,token)))))
15
+
16
+(defun get-token-type (token)
17
+  (if-token token
18
+    (cadr token)))
19
+
20
+(defun get-token-args (token)
21
+  (if-token token
22
+    (cddr token)))
23
+
24
+(defmethod input-empty-p ((input null))
25
+  t)
26
+
27
+(defmethod input-empty-p ((input cons))
28
+  nil)
29
+
30
+(defmethod input-first ((input cons))
31
+  (car input))
32
+
33
+(defmethod input-rest ((input cons))
34
+  (cdr input))
35
+
36
+(defun .match-lexical-element (keyword)
37
+  (.bind (.item)
38
+	 (lambda (element)
39
+	   (if (eq keyword
40
+		   (car element))
41
+	       (.identity element)
42
+	       (.fail)))))
43
+
44
+(defun .match-token (&optional type)
45
+  (.let* ((matched-token (.match-lexical-element :token)))
46
+    (if type
47
+	(if (eq type (get-token-type matched-token))
48
+	    (.identity matched-token)
49
+	    (.fail))
50
+	(.identity matched-token))))
51
+
52
+(defun .match-keyword (&optional name)
53
+  (.let* ((tok (.match-token :keyword)))
54
+    (if name
55
+	(if (equal name (get-token-args tok))
56
+	    (.identity tok)
57
+	    (.fail))
58
+	(.identity tok))))
59
+
60
+(defun .match-whitespace ()
61
+  (.match-lexical-element :whitespace))
62
+
63
+(defun .match-operator (&optional op)
64
+  (.let* ((matched-op (.or (.match-token :operator)
65
+			   (.match-token :punctuator)
66
+			   (.match-token :paired-punctuator))))
67
+    (if (or (not op) 
68
+	    (equal (get-token-args matched-op)
69
+		   op))
70
+	(.identity matched-op)
71
+	(.fail))))
72
+
73
+(defun .match-punctuator (&optional sym)
74
+  (.or (.match-unpaired-punctuator sym)
75
+       (.match-paired-punctuator sym)))
76
+
77
+(defun .match-unpaired-punctuator (&optional sym)
78
+  (.let* ((punct (.match-token :punctuator)))
79
+    (if (or (not sym)
80
+	    (equal (get-token-args punct)
81
+		   sym))
82
+	(.identity punct)
83
+	(.fail))))
84
+
85
+(defun .match-paired-punctuator (&optional sym)
86
+  (.let* ((punct (.match-token :paired-punctuator)))
87
+    (if (or (not sym)
88
+	    (equal (get-token-args punct)
89
+		   sym))
90
+	(.identity punct)
91
+	(.fail))))
92
+
93
+(defun .expression ()
94
+  (.item))
95
+
96
+(defun .parenthesized-expression ()
97
+  (.prog2 (.match-paired-punctuator "(")
98
+	  (.expression)
99
+	  (.match-paired-punctuator ")")))
100
+
101
+(defun .match-variable ()
102
+  (.match-token :variable))
103
+
104
+(defun qualified-name-arguments (qualified-name-token)
105
+  (apply #'values (coerce (get-token-args qualified-name-token)
106
+			  'list)))
107
+
108
+(defun qualified-name-namespace (qualified-name-token)
109
+  (nth-value 0 (qualified-name-arguments qualified-name-token)))
110
+
111
+(defun qualified-name-name (qualified-name-token)
112
+  (nth-value 1 (qualified-name-arguments qualified-name-token)))
113
+
114
+(defun .match-qualified-name (&key (namespace nil namespace-p) name)
115
+  (.let* ((matched-name (.match-token :qualified-name)))
116
+    (if (or namespace-p name)
117
+	(multiple-value-bind (uw-namespace uw-name) (qualified-name-arguments matched-name)
118
+	  (cond ((and namespace-p (not (equal namespace uw-namespace)))
119
+		 (.fail))
120
+		((and name (not (equal name uw-name)))
121
+		 (.fail))
122
+		(t (.identity matched-name))))
123
+	(.identity matched-name))))
124
+
125
+(defun .match-name (&optional name)
126
+  (.match-qualified-name :namespace nil :name name))
127
+
128
+(defun .match-literal ()
129
+  (.match-token :literal))
130
+
131
+
132
+
133
+;;; Expressions
134
+
135
+(defun .function-definition-header ()
136
+  (.progn (.match-keyword "function")))
137
+
138
+(defun .primary-expression ()
139
+  (php.lexer::tag-result :expression
140
+    (.or (.match-variable)
141
+	 (.match-qualified-name)
142
+	 (.match-literal)
143
+	 (.constant-expression)
144
+	 ;(.intrinsic)
145
+	 ;(.anonymous-function-creation-expression)
146
+	 ;(.parenthesized-expression)
147
+	 )))
148
+
149
+(defun .scalar-type ()
150
+  (.or (.match-name "bool")
151
+       (.match-name "float")
152
+       (.match-name "int")
153
+       (.match-name "string")))
154
+
155
+; Add scalar-type / qualified-name
156
+(defun .type-declaration ()
157
+  (.or (.match-keyword "array")
158
+       (.match-keyword "callable")
159
+       (.scalar-type)
160
+       (.match-qualified-name)))
161
+
162
+(defun .parameter-declaration ()
163
+  (php.lexer::tag-result :parameter-declaration
164
+    (.let* ((declared-type (.optional (.type-declaration)))
165
+	    (reference-marker (.optional (.match-operator "&")))
166
+	    (variable-declaration (.match-variable))
167
+	    (default-value (.optional
168
+			    (.progn (.match-operator "=")
169
+				    (.constant-expression)))))
170
+      (.identity (list variable-declaration
171
+		       :type declared-type
172
+		       :is-reference (and reference-marker t)
173
+		       :default-value default-value)))))
174
+
175
+(defun .variadic-parameter ()
176
+  (php.lexer::tag-result :variadic-parameter
177
+    (.let* ((declared-type (.optional (.type-declaration)))
178
+	    (reference-marker (.optional (.match-operator "&")))
179
+	    (variable-declaration (.progn (.match-operator "...")
180
+					  (.match-variable))))
181
+      (.identity (list variable-declaration
182
+		       :type declared-type
183
+		       :is-reference (and reference-marker t))))))
184
+
185
+(defun .simple-parameter-declaration-list ()
186
+  (.or (.let* ((parameters (.first
187
+			    (.map 'list (.prog1 (.parameter-declaration)
188
+						(.match-unpaired-punctuator ","))
189
+				  :at-least 0)))
190
+	       (last-parameter (.parameter-declaration)))
191
+	 (.identity (append parameters
192
+			    (list last-parameter))))
193
+       (.let* ((res (.parameter-declaration)))
194
+	 (.identity (list res)))))
195
+
196
+(defun .variadic-declaration-list ()
197
+  (.let* ((normal-params (.optional (.simple-parameter-declaration-list)))
198
+	  (variadic-param (.progn (.match-unpaired-punctuator ",")
199
+				  (.variadic-parameter))))
200
+    (.identity (append normal-params
201
+		       (list variadic-param)))))
202
+
203
+(defun .parameter-declaration-list ()
204
+  (.or (.simple-parameter-declaration-list)
205
+       (.variadic-declaration-list)))
206
+
207
+(defun .return-type ()
208
+  (.progn (.match-operator ":")
209
+	  (.or (.match-qualified-name :name "void")
210
+	       (.type-declaration))))
211
+
212
+(defun .function-definition-header ()
213
+  (php.lexer::tag-result :function-header
214
+    (.progn (.match-keyword "function")
215
+	    (.let* ((return-by-reference (.optional (.match-operator "&")))
216
+		    (function-name (.match-name))
217
+		    (parameters (.optional (.prog2 (.match-paired-punctuator "(")
218
+						   (.parameter-declaration-list)
219
+						   (.match-paired-punctuator ")"))))
220
+		    (return-type (.optional (.return-type))))
221
+	      (.identity
222
+	       (list :function
223
+		     :name function-name
224
+		     :parameters parameters
225
+		     :return-type return-type
226
+		     :return-by-reference (not (null return-by-reference))))))))
227
+
228
+(defun .function-definition ()
229
+  (php.lexer::tag-result :function-definition
230
+    (.let* ((header (.function-definition-header))
231
+	    (body (.compound-statement)))
232
+      (.identity
233
+       (list header body)))))
234
+
235
+(defun .array-element-initializer ()
236
+  (flet ((.element-key () (.expression))
237
+	 (.element-value () (.expression)))
238
+    (.or (.let* ((key (.prog1 (.element-key)
239
+			      (.match-operator "=>")))
240
+		 (is-ref (.optional (.match-operator "&")))
241
+		 (value (.element-value)))
242
+	   (.identity (list key value :is-reference (and is-ref t))))
243
+	 (.let* ((is-ref (.optional (.match-operator "&")))
244
+		 (value (.element-value)))
245
+	   (.identity (list value :is-reference (and is-ref t)))))))
246
+
247
+(defun .array-initializer-list ()
248
+  (.let* ((first (.array-element-initializer))
249
+	  (rest (.map 'list (.progn (.match-punctuator)
250
+				    (.array-element-initializer))
251
+		      :at-least 0)))
252
+    (.identity (cons first rest))))
253
+
254
+(defun .array-initializer ()
255
+  (.optional (.array-initializer-list)))
256
+
257
+(defun .array-creation-expression ()
258
+  (php.lexer::tag-result :array
259
+    (.let* ((opening (.or (.progn (.match-keyword "array")
260
+				  (.match-paired-punctuator "("))
261
+			  (.match-paired-punctuator "[")))
262
+	    (list (.array-initializer))
263
+	    (closing  (.match-paired-punctuator
264
+		       (if (string= "(" (get-token-args opening))
265
+			   ")"
266
+			   "]"))))
267
+      closing
268
+      (.identity list))))
269
+
270
+(defun .constant-expression ()
271
+  (.or (.array-creation-expression)
272
+       (.expression)))
273
+
274
+(defparameter *fun-tion*
275
+  "function fib($x) {
276
+  array_merge([1,2,3], [2,3,4]);
277
+  if ($x == 0) return 1;
278
+  if ($x == 1) return 1;
279
+  if ($x >  1) return fib($x-1) + fib($x-2);
280
+}")
281
+
282
+(defparameter *function-tokens*
283
+  (parse (php.lexer::.input-file)
284
+	 *fun-tion*))
285
+
286
+(defparameter *uut*
287
+  (parse (php.lexer::.input-file)
288
+	 "callable &$foo = array_merge([1,2,3], [4,5,6]);"))
289
+
290
+
291
+(defvar *xml-output* (make-synonym-stream '*standard-output*))
292
+(flet ((escape-cdata-end (string)
293
+	 (string-replace-all "]]>" string "]]>]]&gt;<![CDATA[")))
294
+  (defgeneric to-xml (tag)
295
+    (:method ((tag cons))
296
+      (format *xml-output* "~&<~(~a~)>~{~a~}</~(~a~)>~%"
297
+	      (car tag)
298
+	      (mapcar #'(lambda (x)
299
+			  (with-output-to-string (*xml-output*)
300
+			    (to-xml x)))
301
+		      (cdr tag))
302
+	      (car tag)))
303
+    (:method ((tag integer))
304
+      (format *xml-output* "<![CDATA[~a]]>" tag))
305
+
306
+    (:method :around ((tag string))
307
+	     (format *xml-output* "<![CDATA[~a]]>"
308
+		     (call-next-method)))
309
+    (:method ((tag string))
310
+      (escape-cdata-end tag))))
311
+
312
+(defmacro flip-values (form)
313
+  `(multiple-value-bind (a b) ,form
314
+     (values b a)))
0 315
new file mode 100644
... ...
@@ -0,0 +1,14 @@
1
+;;;; php_pre.asd
2
+
3
+(asdf:defsystem #:php_pre
4
+  :description "Describe php_pre here"
5
+  :author "Your Name <your.name@example.com>"
6
+  :license "Specify license here"
7
+  :depends-on (#:fwoar.lisputils
8
+               #:alexandria
9
+               #:smug
10
+               #:serapeum)
11
+  :serial t
12
+  :components ((:file "package")
13
+               (:file "php_pre")))
14
+
0 15
new file mode 100644
... ...
@@ -0,0 +1,148 @@
1
+;;;; php_pre.lisp
2
+
3
+(in-package #:php_pre)
4
+
5
+(defclass statement ()
6
+  ((statement-keyword :initarg :keyword :accessor statement-keyword)
7
+   (param :initarg :param :accessor param)
8
+   (body :initarg :body :accessor body)))
9
+
10
+(defmethod print-object ((object statement) s)
11
+  (print-unreadable-object (object s :type t :identity t)
12
+    (format s "~a (~d expressions)"
13
+	    (statement-keyword object)
14
+	    (length (body object)))))
15
+
16
+(defun make-statement (type param body)
17
+  (make-instance 'statement :keyword type :param param :body body))
18
+
19
+(defun .constituent ()
20
+  (.is #'alpha-char-p))
21
+
22
+(defun .keyword ()
23
+  (.first
24
+   (.map 'string (.constituent))))
25
+
26
+(defun .whitespace-char ()
27
+  (.or (.char= #\space)
28
+       (.is-not #'graphic-char-p)))
29
+
30
+(defun .comment ()
31
+  (.or (.first
32
+	(.prog2 (.string= "//")
33
+		(.word-list)
34
+		(.char= #\newline)
35
+		(.optional (.whitespace))))
36
+       (.first
37
+	(.prog2 (.string= "/*")
38
+		(.map 'string (.and (.not (.string= "*/"))
39
+				    (.item)))
40
+		(.string= "*/")
41
+		(.optional (.whitespace))))))
42
+
43
+(defun .whitespace ()
44
+  (.first (.map 'string (.whitespace-char))))
45
+
46
+(defun .word-terminator ()
47
+  (.or (.whitespace-char)
48
+       (.is (lambda (x)
49
+	      (or (char= x #\{)
50
+		  (char= x #\})
51
+		  (char= x #\})
52
+		  (char= x #\;))))))
53
+
54
+(defun .array-el ()
55
+  (.let* ((first (.first (.map 'string
56
+			       (.prog2 (.optional (.whitespace))
57
+				       (.is-not (lambda (it)
58
+						  (member it '(#\= #\,))))
59
+				       (.optional (.whitespace))))))
60
+	  (second (.optional (.progn (.string= "=>")
61
+				     (.optional (.whitespace))
62
+				     (.first
63
+				      (.map 'string
64
+					    (.is-not #'char= #\,))))))
65
+	  (divider (.optional (.char= #\,))))
66
+    divider
67
+    (.identity (cons first second))))
68
+
69
+(defun .php-array ()
70
+  (.let* ((begin (.char= #\[))
71
+	  (els (.map 'list (.array-el) :at-least 0))
72
+	  (end (.char= #\])))
73
+    (.identity (list :array begin els end))))
74
+
75
+(defun .word ()
76
+  (.or (.first (.concatenate 'string
77
+			     (.string= "'")
78
+			     (.map 'string (.is-not #'char= #\'))
79
+			     (.string= "'")))
80
+       (.first (.concatenate 'string
81
+			     (.string= "\"")
82
+			     (.map 'string (.is-not #'char= #\"))
83
+			     (.string= "\"")))
84
+       (.first (.map 'string (.and (.not (.word-terminator))
85
+				   (.item))))))
86
+
87
+(defun .word-list (&optional (word-parser '.word))
88
+  (.map 'list
89
+	(.progn (.optional (.whitespace))
90
+		(.first (funcall word-parser)))))
91
+
92
+(defun .line (&optional (word-parser '.word))
93
+  (.prog1 (.word-list)
94
+	  (.progn (.optional (.whitespace))
95
+		  (.char= #\;))))
96
+
97
+(defun .paren-word ()
98
+  (.map 'string (.and (.not (.or (.word-terminator)
99
+				 (.char= #\( )
100
+				 (.char= #\) )))
101
+		      (.item))))
102
+
103
+(defun .statement-list ()
104
+  (.map 'list (.or (.prog1 (.first (.word-list))
105
+			   (.optional (.whitespace))
106
+			   (.char= #\;))
107
+		   (.progn (.optional (.whitespace))
108
+			   (.first (.statement))))
109
+	:at-least 0))
110
+
111
+(defun .statement-start ()
112
+  (.let* ((type (.progn (.optional (.whitespace))
113
+			(.first (.keyword))))
114
+	  (param (.optional (.or (.prog2 (.progn (.optional (.whitespace))
115
+						 (.char= #\( ))
116
+					 (.word-list)
117
+					 (.progn (.optional (.whitespace))
118
+						 (.char= #\) )))
119
+				 (.word-list))))
120
+	  (_ (.progn (.whitespace)
121
+		     (.char= #\{))))
122
+    (.identity (cons type param))))
123
+
124
+(defun .statement ()
125
+  (.let* ((start (.progn (.prog1 (.map 'list
126
+				       (.progn (.optional (.whitespace))
127
+					       (.comment))
128
+				       :at-least 0)
129
+				 (.optional (.whitespace)))
130
+			 (.statement-start)))
131
+	  (statements (.progn (.whitespace)
132
+			      (.statement-list)))
133
+	  (_ (.progn (.optional (.whitespace))
134
+		     (.char= #\}))))
135
+    (destructuring-bind (type . param) start
136
+      (.identity (make-statement type param statements)))))
137
+
138
+(defun slurp (fn)
139
+  (with-open-file (s fn)
140
+    (let ((result (make-string (file-length s))))
141
+      (read-sequence result s)
142
+      result)))
143
+
144
+(defun .main-parser ()
145
+  (.map 'list (.or (.prog1 (.word-list) (.char= #\;))
146
+		   (.first
147
+		    (.progn (.optional (.whitespace))
148
+			    (.statement))))))