4e987026 |
;;; Macro definitions for the parser & lexer.
;;; This macro allows debugging of the lexer. Before releasing, this can
;;; be replaced by (begin ,@body) for faster code.
(define-syntax (trace-parser tag . body)
; `(begin
; (let* ((k (tracing-parse/entry ',tag))
; (res (begin ,@body)))
; (tracing-parse/exit ',tag k res)
; res))
(declare (ignore tag))
`(begin ,@body)
)
;;; Macros used by the lexer.
;;; The lexer used a macro, char-case, to dispatch on the syntactic catagory of
;;; a character. These catagories (processed at compile time) are defined
;;; here. Note that some of these definitions use the char-code
;;; directly and would need updating for different character sets.
(define *lex-definitions*
'((vtab 11) ; define by ascii code to avoid relying of the reader
(formfeed 12)
(whitechar #\newline #\space #\tab formfeed vtab)
(small #\a - #\z)
(large #\A - #\Z)
(digit #\0 - #\9)
(symbol #\! #\# #\$ #\% #\& #\* #\+ #\. #\/ #\< #\= #\> #\? #\@
#\\ #\^ #\|)
(presymbol #\- #\~)
(exponent #\e #\E)
(graphic large small digit
#\! #\" #\# #\$ #\% #\& #\' #\( #\) #\* #\+
#\, #\- #\. #\/ #\: #\; #\< #\= #\> #\? #\@
#\[ #\\ #\] #\^ #\_ #\` #\{ #\| #\} #\~)
(charesc #\a #\b #\f #\n #\r #\t #\v #\\ #\" #\' #\&)
(cntrl large #\@ #\[ #\\ #\] #\^ #\_)))
;;; The char-case macro is similar to case using characters to select.
;;; The following capabilities are added by char-case:
;;; pre-defined constants are denoted by symbols (defined above)
;;; ranges of characters are represented using -. For example,
;;; (#\a - #\z #\A - #\Z) denotes all alphabetics.
;;; numbers refer to the char code of a character.
;;; The generated code is optimized somewhat to take advantage of
;;; consecutive character ranges. With a little work, this could be
;;; implemented using jump tables someday.
(define-syntax (char-case exp . alts)
(expand-char-case exp alts))
(define (expand-char-case exp alts)
(let ((temp (gensym)))
`(let ((,temp ,exp))
,(expand-char-case1 temp alts))))
(define (expand-char-case1 temp alts)
(if (null? alts)
'()
(let* ((alt (car alts))
(test (car alt))
(body (cons 'begin (cdr alt)))
(rest (expand-char-case1 temp (cdr alts))))
(cond ((eq? test 'else)
body)
(else
`(if (or ,@(gen-char-tests temp
(if (pair? test) test (list test))))
,body
,rest))))))
(define (gen-char-tests temp tests)
(gen-char-tests-1 temp
(sort-list (gather-char-tests tests) (function char<?))))
(define (gen-char-tests-1 temp chars)
(cond ((null? chars)
'())
((long-enough-run? chars 3)
(gen-range-check temp (car chars) (car chars) (cdr chars)))
(else
`((char=? ,temp ',(car chars))
,@(gen-char-tests-1 temp (cdr chars))))))
(define (gen-range-check temp first current chars)
(if (and (pair? chars) (consec-chars? current (car chars)))
(gen-range-check temp first (car chars) (cdr chars))
`((and (char>=? ,temp ',first)
(char<=? ,temp ',current))
,@(gen-char-tests-1 temp chars))))
(define (consec-chars? c1 c2)
(eqv? (+ 1 (char->integer c1)) (char->integer c2)))
(define (long-enough-run? l n)
(or (eqv? n 1)
(and (pair? (cdr l))
(consec-chars? (car l) (cadr l))
(long-enough-run? (cdr l) (1- n)))))
(define (gather-char-tests tests)
(cond ((null? tests)
'())
((symbol? (car tests))
(let ((new-test (assq (car tests) *lex-definitions*)))
(if new-test
(gather-char-tests (append (cdr new-test) (cdr tests)))
(error "Unknown character class: ~A~%" (car tests)))))
((integer? (car tests))
(cons (integer->char (car tests))
(gather-char-tests (cdr tests))))
((and (pair? (cdr tests)) (eq? '- (cadr tests)))
(letrec ((fn (lambda (a z)
(if (char>? a z)
(gather-char-tests (cdddr tests))
(cons a (funcall
fn (integer->char
(+ 1 (char->integer a))) z))))))
(funcall fn (car tests) (caddr tests))))
((char? (car tests))
(cons (car tests) (gather-char-tests (cdr tests))))
(else
(error "Invalid selector in char-case: ~A~%" (car tests)))))
;;; This macro scans a list of characters on a given syntaxtic catagory.
;;; The current character is always included in the resulting list.
(define-syntax (scan-list-of char-type)
`(letrec ((test-next (lambda ()
(char-case *char*
(,char-type
(let ((c *char*))
(advance-char)
(cons c (funcall test-next))))
(else '())))))
(let ((c *char*))
(advance-char)
(cons c (funcall test-next)))))
;;; This macro tests for string equality in which the strings are
;;; represented by lists of characters. The comparisons are expanded
;;; inline (really just a little partial evaluation going on here!) for
;;; fast execution. The tok argument evaluate to a list of chars. The string
;;; argument must be a string constant, which is converted to characters
;;; as the macro expands.
(define-syntax (string=/list? tok string)
(let ((temp (gensym)))
`(let ((,temp ,tok))
,(expand-string=/list? temp (string->list string)))))
(define (expand-string=/list? var chars)
(if (null? chars)
`(null? ,var)
(let ((new-temp (gensym)))
`(and (pair? ,var)
(char=? (car ,var) ',(car chars))
(let ((,new-temp (cdr ,var)))
,(expand-string=/list? new-temp (cdr chars)))))))
;;; This macro extends the string equality defined above to search a
;;; list of reserved words quickly for keywords. It does this by a case
;;; dispatch on the first character of the string and then processing
;;; the remaining characters wirh string=/list. This would go a little
;;; faster with recursive char-case statements, but I'm a little too
;;; lazy at for this at the moment. If a keyword is found is emitted
;;; as a symbol. If not, the token string is emitted with the token
;;; type indicated. Assume the string being scanned is a list of
;;; chars assigned to a var. (Yeah - I know - I should add a gensym
;;; var for this argument!!).
(define-syntax (parse-reserved var token-type . reserved-words)
(let ((sorted-rws (sort-list reserved-words (function string<?))))
`(let ((thunk (lambda () (emit-token/string ',token-type ,var))))
(char-case (car ,var)
,@(expand-parse-reserved var
(group-by-first-char (list (car sorted-rws)) (cdr sorted-rws)))
(else (funcall thunk))))))
(define (group-by-first-char group rest)
(cond ((null? rest)
(list group))
((char=? (string-ref (car group) 0)
(string-ref (car rest) 0))
(group-by-first-char (append group (list (car rest))) (cdr rest)))
(else
(cons group (group-by-first-char (list (car rest)) (cdr rest))))))
(define (expand-parse-reserved var groups)
(if (null? groups)
'()
`((,(string-ref (caar groups) 0)
(cond ,@(expand-parse-reserved/group var (car groups))
(else (funcall thunk))))
,@(expand-parse-reserved var (cdr groups)))))
(define (expand-parse-reserved/group var group)
(if (null? group)
'()
`(((string=/list? (cdr ,var)
,(substring (car group) 1 (string-length (car group))))
(emit-token ',(string->symbol (car group))))
,@(expand-parse-reserved/group var (cdr group)))))
;;; The following macros are used by the parser.
;;; The primary macro used by the parser is token-case, which dispatches
;;; on the type of the current token (this is always *token* - unlike the
;;; lexer, no lookahead is provided; however, some of these dispatches are
;;; procedures that do a limited lookahead. The problem with lookahead is that
;;; the layout rule adds tokens which are not visible looking into the
;;; token stream directly.
;;; Unlike char-case, the token is normally advanced unless the selector
;;; includes `no-advance'. The final else also avoids advancing the token.
;;; In addition to raw token types, more complex types can be used. These
;;; are defined here. The construct `satisfies fn' calls the indicated
;;; function to determine whether the current token matches.
;;; If the token type to be matched is not a constant, the construct
;;; `unquote var' matches the current token against the type in the var.
(define *predefined-syntactic-catagories* '(
(+ satisfies at-varsym/+?)
(- satisfies at-varsym/-?)
(tycon no-advance conid)
(tyvar no-advance varid)
(var no-advance varid satisfies at-varsym/paren?)
(con no-advance conid satisfies at-consym/paren?)
(name no-advance var con)
(consym/paren no-advance satisfies at-consym/paren?)
(varsym? no-advance varsym)
(consym? no-advance consym)
(varid? no-advance varid)
(conid? no-advance conid)
(op no-advance varsym consym \`)
(varop no-advance varsym satisfies at-varid/quoted?)
(conop no-advance consym satisfies at-conid/quoted?)
(modid no-advance conid)
(literal no-advance integer float char string)
(numeric no-advance integer float)
(k no-advance integer)
(+k no-advance satisfies at-+k?)
(-n no-advance satisfies at--n?)
(apat-start no-advance varid conid literal _ \( \[ \~)
(pat-start no-advance - apat-start)
(atype-start no-advance tycon tyvar \( \[)
(aexp-start no-advance varid conid \( \[ literal)
))
;;; The format of token-case is
;;; (token-case
;;; (sel1 . e1) (sel2 . e2) ... [(else . en)])
;;; If the sel is a symbol it is the same as a singleton list: (@ x) = ((@) x)
;;; Warning: this generates rather poor code! Should be fixed up someday.
(define-syntax (token-case . alts)
`(cond ,@(map (function gen-token-case-alt) alts)))
(define (gen-token-case-alt alt)
(let ((test (car alt))
(code (cdr alt)))
(cond ((eq? test 'else)
`(else ,@code))
((symbol? test)
(gen-token-case-alt-1 (expand-catagories (list test)) code))
(else
(gen-token-case-alt-1 (expand-catagories test) code)))))
(define (expand-catagories terms)
(if (null? terms)
terms
(let ((a (assq (car terms) *predefined-syntactic-catagories*))
(r (expand-catagories (cdr terms))))
(if (null? a)
(cons (car terms) r)
(expand-catagories (append (cdr a) r))))))
(define (gen-token-case-alt-1 test code)
`((or ,@(gen-token-test test))
,@(if (memq 'no-advance test) '() '((advance-token)))
,@code))
(define (gen-token-test test)
(cond ((null? test)
'())
((eq? (car test) 'no-advance)
(gen-token-test (cdr test)))
((eq? (car test) 'unquote)
(cons `(eq? *token* ,(cadr test)) (gen-token-test (cddr test))))
((eq? (car test) 'satisfies)
(cons (list (cadr test)) (gen-token-test (cddr test))))
(else
(cons `(eq? *token* ',(car test)) (gen-token-test (cdr test))))))
;;; require-tok requires a specific token to be at the scanner. If it
;;; is found, the token is advanced over. Otherwise, the error
;;; routine is called.
(define-syntax (require-token tok error-handler)
`(token-case
(,tok '())
(else ,error-handler)))
;;; The save-parser-context macro captures the current line & file and
;;; attaches it to the ast node generated.
(define-syntax (save-parser-context . body)
(let ((temp1 (gensym))
(temp2 (gensym)))
`(let ((,temp1 (capture-current-line))
(,temp2 (begin ,@body)))
(setf (ast-node-line-number ,temp2) ,temp1)
,temp2)))
(define (capture-current-line)
(make source-pointer (line *current-line*) (file *current-file*)))
(define-syntax (push-decl-list decl place)
`(setf ,place (nconc ,place (list ,decl))))
|