git.fiddlerwoaroof.com
parser/parser-macros.scm
4e987026
 ;;; Macro definitions for the parser & lexer.
 
 
 ;;; This macro allows debugging of the lexer.  Before releasing, this can
 ;;; be replaced by (begin ,@body) for faster code.
 
 (define-syntax (trace-parser tag . body)
 ;  `(begin 
 ;     (let* ((k (tracing-parse/entry ',tag))
 ;	    (res (begin ,@body)))
 ;       (tracing-parse/exit ',tag k res)
 ;       res))
   (declare (ignore tag))
   `(begin ,@body)
   )
 
 ;;; Macros used by the lexer.
 
 ;;; The lexer used a macro, char-case, to dispatch on the syntactic catagory of
 ;;; a character.  These catagories (processed at compile time) are defined
 ;;; here.  Note that some of these definitions use the char-code
 ;;; directly and would need updating for different character sets.
 
 (define *lex-definitions*
   '((vtab 11)  ; define by ascii code to avoid relying of the reader
     (formfeed 12) 
     (whitechar #\newline #\space #\tab formfeed vtab)
     (small #\a - #\z)
     (large #\A - #\Z)
     (digit #\0 - #\9)
     (symbol #\! #\# #\$ #\% #\& #\* #\+ #\. #\/ #\< #\= #\> #\? #\@
       #\\ #\^ #\|)
     (presymbol #\- #\~)
     (exponent #\e #\E)
     (graphic large small digit
              #\! #\" #\# #\$ #\% #\& #\' #\( #\) #\* #\+
              #\, #\- #\. #\/ #\: #\; #\< #\= #\> #\? #\@
 	     #\[ #\\ #\] #\^ #\_ #\` #\{ #\| #\} #\~)
     (charesc #\a #\b #\f #\n #\r #\t #\v #\\ #\" #\' #\&)
     (cntrl large #\@ #\[ #\\ #\] #\^ #\_)))
 
 ;;; The char-case macro is similar to case using characters to select.
 ;;; The following capabilities are added by char-case:
 ;;;   pre-defined constants are denoted by symbols (defined above)
 ;;;   ranges of characters are represented using -.  For example,
 ;;;     (#\a - #\z #\A - #\Z) denotes all alphabetics.
 ;;;   numbers refer to the char code of a character.
 ;;; The generated code is optimized somewhat to take advantage of
 ;;; consecutive character ranges.  With a little work, this could be
 ;;; implemented using jump tables someday.
 
 (define-syntax (char-case exp . alts)
   (expand-char-case exp alts))
 
 (define (expand-char-case exp alts)
   (let ((temp (gensym)))
     `(let ((,temp ,exp))
        ,(expand-char-case1 temp alts))))
 
 (define (expand-char-case1 temp alts)
   (if (null? alts)
       '()
       (let* ((alt (car alts))
 	     (test (car alt))
 	     (body (cons 'begin (cdr alt)))
 	     (rest (expand-char-case1 temp (cdr alts))))
 	(cond ((eq? test 'else)
 	       body)
 	      (else
 	       `(if (or ,@(gen-char-tests temp
 			     (if (pair? test) test (list test))))
 		    ,body
 		    ,rest))))))
 
 (define (gen-char-tests temp tests)
   (gen-char-tests-1 temp
 	(sort-list (gather-char-tests tests) (function char<?))))
 
 (define (gen-char-tests-1 temp chars)
   (cond ((null? chars)
 	 '())
 	((long-enough-run? chars 3)
 	 (gen-range-check temp (car chars) (car chars) (cdr chars)))
 	(else
 	 `((char=? ,temp ',(car chars))
 	   ,@(gen-char-tests-1 temp (cdr chars))))))
 
 (define (gen-range-check temp first current chars)
   (if (and (pair? chars) (consec-chars? current (car chars)))
       (gen-range-check temp first (car chars) (cdr chars))
       `((and (char>=? ,temp ',first)
 	     (char<=? ,temp ',current))
 	,@(gen-char-tests-1 temp chars))))
 
 (define (consec-chars? c1 c2)
   (eqv? (+ 1 (char->integer c1)) (char->integer c2)))
 
 (define (long-enough-run? l n)
   (or (eqv? n 1)
       (and (pair? (cdr l))
 	   (consec-chars? (car l) (cadr l))
 	   (long-enough-run? (cdr l) (1- n)))))
 
 (define (gather-char-tests tests)
   (cond ((null? tests)
 	 '())
 	((symbol? (car tests))
 	 (let ((new-test (assq (car tests) *lex-definitions*)))
 	   (if new-test
 	       (gather-char-tests (append (cdr new-test) (cdr tests)))
 	       (error "Unknown character class: ~A~%" (car tests)))))
 	((integer? (car tests))
 	 (cons (integer->char (car tests))
 	       (gather-char-tests (cdr tests))))
 	((and (pair? (cdr tests)) (eq? '- (cadr tests)))
 	 (letrec ((fn (lambda (a z)
 			(if (char>? a z)
 			    (gather-char-tests (cdddr tests))
 			    (cons a (funcall
 				      fn (integer->char
 					 (+ 1 (char->integer a))) z))))))
 	   (funcall fn (car tests) (caddr tests))))
 	((char? (car tests))
 	 (cons (car tests) (gather-char-tests (cdr tests))))
 	(else
 	 (error "Invalid selector in char-case: ~A~%" (car tests)))))
 
 ;;; This macro scans a list of characters on a given syntaxtic catagory.
 ;;; The current character is always included in the resulting list.
 
 (define-syntax (scan-list-of char-type)
  `(letrec ((test-next (lambda ()
 		       (char-case *char*
 			(,char-type
 			 (let ((c *char*))
 			   (advance-char)
 			   (cons c (funcall test-next))))
 			(else '())))))
     (let ((c *char*))
       (advance-char)
       (cons c (funcall test-next)))))
 
 ;;; This macro tests for string equality in which the strings are
 ;;; represented by lists of characters.  The comparisons are expanded
 ;;; inline (really just a little partial evaluation going on here!) for
 ;;; fast execution.  The tok argument evaluate to a list of chars.  The string
 ;;; argument must be a string constant, which is converted to characters
 ;;; as the macro expands.
 
 (define-syntax (string=/list? tok string)
   (let ((temp (gensym)))
     `(let ((,temp ,tok))
        ,(expand-string=/list? temp (string->list string)))))
 
 (define (expand-string=/list? var chars)
   (if (null? chars)
       `(null? ,var)
       (let ((new-temp (gensym)))
 	`(and (pair? ,var)
 	      (char=? (car ,var) ',(car chars))
 	      (let ((,new-temp (cdr ,var)))
 		,(expand-string=/list? new-temp (cdr chars)))))))
 
 ;;; This macro extends the string equality defined above to search a
 ;;; list of reserved words quickly for keywords.  It does this by a case
 ;;; dispatch on the first character of the string and then processing
 ;;; the remaining characters wirh string=/list.  This would go a little
 ;;; faster with recursive char-case statements, but I'm a little too
 ;;; lazy at for this at the moment.  If a keyword is found is emitted
 ;;; as a symbol.  If not, the token string is emitted with the token
 ;;; type indicated.  Assume the string being scanned is a list of
 ;;; chars assigned to a var.  (Yeah - I know - I should add a gensym
 ;;; var for this argument!!).
 
 (define-syntax (parse-reserved var token-type . reserved-words)
  (let ((sorted-rws (sort-list reserved-words (function string<?))))
   `(let ((thunk (lambda () (emit-token/string ',token-type ,var))))
     (char-case (car ,var)
      ,@(expand-parse-reserved var
         (group-by-first-char (list (car sorted-rws)) (cdr sorted-rws)))
       (else (funcall thunk))))))
 
 (define (group-by-first-char group rest)
   (cond ((null? rest)
 	 (list group))
 	((char=? (string-ref (car group) 0)
 		 (string-ref (car rest) 0))
 	 (group-by-first-char (append group (list (car rest))) (cdr rest)))
 	(else
 	 (cons group (group-by-first-char (list (car rest)) (cdr rest))))))
 
 (define (expand-parse-reserved var groups)
   (if (null? groups)
       '()
       `((,(string-ref (caar groups) 0)
 	 (cond ,@(expand-parse-reserved/group var (car groups))
 	       (else (funcall thunk))))
 	,@(expand-parse-reserved var (cdr groups)))))
 
 (define (expand-parse-reserved/group var group)
   (if (null? group)
       '()
       `(((string=/list? (cdr ,var)
 	     ,(substring (car group) 1 (string-length (car group))))
 	 (emit-token ',(string->symbol (car group))))
 	,@(expand-parse-reserved/group var (cdr group)))))
 
 
 ;;; The following macros are used by the parser.
 
 ;;; The primary macro used by the parser is token-case, which dispatches
 ;;; on the type of the current token (this is always *token* - unlike the
 ;;; lexer, no lookahead is provided; however, some of these dispatches are
 ;;; procedures that do a limited lookahead.  The problem with lookahead is that
 ;;; the layout rule adds tokens which are not visible looking into the
 ;;; token stream directly.
 
 ;;; Unlike char-case, the token is normally advanced unless the selector
 ;;; includes `no-advance'.  The final else also avoids advancing the token.
 
 ;;; In addition to raw token types, more complex types can be used.  These
 ;;; are defined here.  The construct `satisfies fn' calls the indicated
 ;;; function to determine whether the current token matches.
 
 ;;; If the token type to be matched is not a constant, the construct
 ;;; `unquote var' matches the current token against the type in the var.
 
 (define *predefined-syntactic-catagories* '(
   (+ satisfies at-varsym/+?)
   (- satisfies at-varsym/-?)
   (tycon no-advance conid)
   (tyvar no-advance varid)
   (var no-advance varid satisfies at-varsym/paren?)
   (con no-advance conid satisfies at-consym/paren?)
   (name no-advance var con)
   (consym/paren no-advance satisfies at-consym/paren?)
   (varsym? no-advance varsym)
   (consym? no-advance consym)
   (varid? no-advance varid)
   (conid? no-advance conid)
   (op no-advance varsym consym \`)
   (varop no-advance varsym satisfies at-varid/quoted?)
   (conop no-advance consym satisfies at-conid/quoted?)
   (modid no-advance conid)
   (literal no-advance integer float char string)
   (numeric no-advance integer float)
   (k no-advance integer)
   (+k no-advance satisfies at-+k?)
   (-n no-advance satisfies at--n?)
   (apat-start no-advance varid conid literal _ \( \[ \~)
   (pat-start no-advance - apat-start)
   (atype-start no-advance tycon tyvar \( \[)
   (aexp-start no-advance varid conid \( \[ literal)
   ))
 
 ;;; The format of token-case is
 ;;;  (token-case
 ;;;    (sel1 . e1) (sel2 . e2) ... [(else . en)])
 ;;; If the sel is a symbol it is the same as a singleton list: (@ x) = ((@) x)
 
 ;;; Warning: this generates rather poor code!  Should be fixed up someday.
 
 (define-syntax (token-case . alts)
   `(cond ,@(map (function gen-token-case-alt) alts)))
 
 (define (gen-token-case-alt alt)
   (let ((test (car alt))
 	(code (cdr alt)))
     (cond ((eq? test 'else)
 	   `(else ,@code))
 	  ((symbol? test)
 	   (gen-token-case-alt-1 (expand-catagories (list test)) code))
 	  (else
 	   (gen-token-case-alt-1 (expand-catagories test) code)))))
 
 (define (expand-catagories terms)
   (if (null? terms)
       terms
       (let ((a (assq (car terms) *predefined-syntactic-catagories*))
 	    (r (expand-catagories (cdr terms))))
 	(if (null? a)
 	    (cons (car terms) r)
 	    (expand-catagories (append (cdr a) r))))))
 
 (define (gen-token-case-alt-1 test code)
   `((or ,@(gen-token-test test))
     ,@(if (memq 'no-advance test) '() '((advance-token)))
     ,@code))
 
 (define (gen-token-test test)
   (cond ((null? test)
 	 '())
 	((eq? (car test) 'no-advance)
 	 (gen-token-test (cdr test)))
 	((eq? (car test) 'unquote)
 	 (cons `(eq? *token* ,(cadr test)) (gen-token-test (cddr test))))
 	((eq? (car test) 'satisfies)
 	 (cons (list (cadr test)) (gen-token-test (cddr test))))
 	(else
 	 (cons `(eq? *token* ',(car test)) (gen-token-test (cdr test))))))
 
 ;;; require-tok requires a specific token to be at the scanner.  If it
 ;;; is found, the token is advanced over.  Otherwise, the error
 ;;; routine is called.
 
 (define-syntax (require-token tok error-handler)
   `(token-case
     (,tok '())
     (else ,error-handler)))
 
 ;;; The save-parser-context macro captures the current line & file and
 ;;; attaches it to the ast node generated.
 
 (define-syntax (save-parser-context . body)
   (let ((temp1 (gensym))
 	(temp2 (gensym)))
     `(let ((,temp1 (capture-current-line))
 	   (,temp2 (begin ,@body)))
        (setf (ast-node-line-number ,temp2) ,temp1)
        ,temp2)))
 
 (define (capture-current-line)
   (make source-pointer (line *current-line*) (file *current-file*)))
 
 (define-syntax (push-decl-list decl place)
   `(setf ,place (nconc ,place (list ,decl))))