;;; Macro definitions for the parser & lexer. ;;; This macro allows debugging of the lexer. Before releasing, this can ;;; be replaced by (begin ,@body) for faster code. (define-syntax (trace-parser tag . body) ; `(begin ; (let* ((k (tracing-parse/entry ',tag)) ; (res (begin ,@body))) ; (tracing-parse/exit ',tag k res) ; res)) (declare (ignore tag)) `(begin ,@body) ) ;;; Macros used by the lexer. ;;; The lexer used a macro, char-case, to dispatch on the syntactic catagory of ;;; a character. These catagories (processed at compile time) are defined ;;; here. Note that some of these definitions use the char-code ;;; directly and would need updating for different character sets. (define *lex-definitions* '((vtab 11) ; define by ascii code to avoid relying of the reader (formfeed 12) (whitechar #\newline #\space #\tab formfeed vtab) (small #\a - #\z) (large #\A - #\Z) (digit #\0 - #\9) (symbol #\! #\# #\$ #\% #\& #\* #\+ #\. #\/ #\< #\= #\> #\? #\@ #\\ #\^ #\|) (presymbol #\- #\~) (exponent #\e #\E) (graphic large small digit #\! #\" #\# #\$ #\% #\& #\' #\( #\) #\* #\+ #\, #\- #\. #\/ #\: #\; #\< #\= #\> #\? #\@ #\[ #\\ #\] #\^ #\_ #\` #\{ #\| #\} #\~) (charesc #\a #\b #\f #\n #\r #\t #\v #\\ #\" #\' #\&) (cntrl large #\@ #\[ #\\ #\] #\^ #\_))) ;;; The char-case macro is similar to case using characters to select. ;;; The following capabilities are added by char-case: ;;; pre-defined constants are denoted by symbols (defined above) ;;; ranges of characters are represented using -. For example, ;;; (#\a - #\z #\A - #\Z) denotes all alphabetics. ;;; numbers refer to the char code of a character. ;;; The generated code is optimized somewhat to take advantage of ;;; consecutive character ranges. With a little work, this could be ;;; implemented using jump tables someday. (define-syntax (char-case exp . alts) (expand-char-case exp alts)) (define (expand-char-case exp alts) (let ((temp (gensym))) `(let ((,temp ,exp)) ,(expand-char-case1 temp alts)))) (define (expand-char-case1 temp alts) (if (null? alts) '() (let* ((alt (car alts)) (test (car alt)) (body (cons 'begin (cdr alt))) (rest (expand-char-case1 temp (cdr alts)))) (cond ((eq? test 'else) body) (else `(if (or ,@(gen-char-tests temp (if (pair? test) test (list test)))) ,body ,rest)))))) (define (gen-char-tests temp tests) (gen-char-tests-1 temp (sort-list (gather-char-tests tests) (function char=? ,temp ',first) (char<=? ,temp ',current)) ,@(gen-char-tests-1 temp chars)))) (define (consec-chars? c1 c2) (eqv? (+ 1 (char->integer c1)) (char->integer c2))) (define (long-enough-run? l n) (or (eqv? n 1) (and (pair? (cdr l)) (consec-chars? (car l) (cadr l)) (long-enough-run? (cdr l) (1- n))))) (define (gather-char-tests tests) (cond ((null? tests) '()) ((symbol? (car tests)) (let ((new-test (assq (car tests) *lex-definitions*))) (if new-test (gather-char-tests (append (cdr new-test) (cdr tests))) (error "Unknown character class: ~A~%" (car tests))))) ((integer? (car tests)) (cons (integer->char (car tests)) (gather-char-tests (cdr tests)))) ((and (pair? (cdr tests)) (eq? '- (cadr tests))) (letrec ((fn (lambda (a z) (if (char>? a z) (gather-char-tests (cdddr tests)) (cons a (funcall fn (integer->char (+ 1 (char->integer a))) z)))))) (funcall fn (car tests) (caddr tests)))) ((char? (car tests)) (cons (car tests) (gather-char-tests (cdr tests)))) (else (error "Invalid selector in char-case: ~A~%" (car tests))))) ;;; This macro scans a list of characters on a given syntaxtic catagory. ;;; The current character is always included in the resulting list. (define-syntax (scan-list-of char-type) `(letrec ((test-next (lambda () (char-case *char* (,char-type (let ((c *char*)) (advance-char) (cons c (funcall test-next)))) (else '()))))) (let ((c *char*)) (advance-char) (cons c (funcall test-next))))) ;;; This macro tests for string equality in which the strings are ;;; represented by lists of characters. The comparisons are expanded ;;; inline (really just a little partial evaluation going on here!) for ;;; fast execution. The tok argument evaluate to a list of chars. The string ;;; argument must be a string constant, which is converted to characters ;;; as the macro expands. (define-syntax (string=/list? tok string) (let ((temp (gensym))) `(let ((,temp ,tok)) ,(expand-string=/list? temp (string->list string))))) (define (expand-string=/list? var chars) (if (null? chars) `(null? ,var) (let ((new-temp (gensym))) `(and (pair? ,var) (char=? (car ,var) ',(car chars)) (let ((,new-temp (cdr ,var))) ,(expand-string=/list? new-temp (cdr chars))))))) ;;; This macro extends the string equality defined above to search a ;;; list of reserved words quickly for keywords. It does this by a case ;;; dispatch on the first character of the string and then processing ;;; the remaining characters wirh string=/list. This would go a little ;;; faster with recursive char-case statements, but I'm a little too ;;; lazy at for this at the moment. If a keyword is found is emitted ;;; as a symbol. If not, the token string is emitted with the token ;;; type indicated. Assume the string being scanned is a list of ;;; chars assigned to a var. (Yeah - I know - I should add a gensym ;;; var for this argument!!). (define-syntax (parse-reserved var token-type . reserved-words) (let ((sorted-rws (sort-list reserved-words (function stringsymbol (car group)))) ,@(expand-parse-reserved/group var (cdr group))))) ;;; The following macros are used by the parser. ;;; The primary macro used by the parser is token-case, which dispatches ;;; on the type of the current token (this is always *token* - unlike the ;;; lexer, no lookahead is provided; however, some of these dispatches are ;;; procedures that do a limited lookahead. The problem with lookahead is that ;;; the layout rule adds tokens which are not visible looking into the ;;; token stream directly. ;;; Unlike char-case, the token is normally advanced unless the selector ;;; includes `no-advance'. The final else also avoids advancing the token. ;;; In addition to raw token types, more complex types can be used. These ;;; are defined here. The construct `satisfies fn' calls the indicated ;;; function to determine whether the current token matches. ;;; If the token type to be matched is not a constant, the construct ;;; `unquote var' matches the current token against the type in the var. (define *predefined-syntactic-catagories* '( (+ satisfies at-varsym/+?) (- satisfies at-varsym/-?) (tycon no-advance conid) (tyvar no-advance varid) (var no-advance varid satisfies at-varsym/paren?) (con no-advance conid satisfies at-consym/paren?) (name no-advance var con) (consym/paren no-advance satisfies at-consym/paren?) (varsym? no-advance varsym) (consym? no-advance consym) (varid? no-advance varid) (conid? no-advance conid) (op no-advance varsym consym \`) (varop no-advance varsym satisfies at-varid/quoted?) (conop no-advance consym satisfies at-conid/quoted?) (modid no-advance conid) (literal no-advance integer float char string) (numeric no-advance integer float) (k no-advance integer) (+k no-advance satisfies at-+k?) (-n no-advance satisfies at--n?) (apat-start no-advance varid conid literal _ \( \[ \~) (pat-start no-advance - apat-start) (atype-start no-advance tycon tyvar \( \[) (aexp-start no-advance varid conid \( \[ literal) )) ;;; The format of token-case is ;;; (token-case ;;; (sel1 . e1) (sel2 . e2) ... [(else . en)]) ;;; If the sel is a symbol it is the same as a singleton list: (@ x) = ((@) x) ;;; Warning: this generates rather poor code! Should be fixed up someday. (define-syntax (token-case . alts) `(cond ,@(map (function gen-token-case-alt) alts))) (define (gen-token-case-alt alt) (let ((test (car alt)) (code (cdr alt))) (cond ((eq? test 'else) `(else ,@code)) ((symbol? test) (gen-token-case-alt-1 (expand-catagories (list test)) code)) (else (gen-token-case-alt-1 (expand-catagories test) code))))) (define (expand-catagories terms) (if (null? terms) terms (let ((a (assq (car terms) *predefined-syntactic-catagories*)) (r (expand-catagories (cdr terms)))) (if (null? a) (cons (car terms) r) (expand-catagories (append (cdr a) r)))))) (define (gen-token-case-alt-1 test code) `((or ,@(gen-token-test test)) ,@(if (memq 'no-advance test) '() '((advance-token))) ,@code)) (define (gen-token-test test) (cond ((null? test) '()) ((eq? (car test) 'no-advance) (gen-token-test (cdr test))) ((eq? (car test) 'unquote) (cons `(eq? *token* ,(cadr test)) (gen-token-test (cddr test)))) ((eq? (car test) 'satisfies) (cons (list (cadr test)) (gen-token-test (cddr test)))) (else (cons `(eq? *token* ',(car test)) (gen-token-test (cdr test)))))) ;;; require-tok requires a specific token to be at the scanner. If it ;;; is found, the token is advanced over. Otherwise, the error ;;; routine is called. (define-syntax (require-token tok error-handler) `(token-case (,tok '()) (else ,error-handler))) ;;; The save-parser-context macro captures the current line & file and ;;; attaches it to the ast node generated. (define-syntax (save-parser-context . body) (let ((temp1 (gensym)) (temp2 (gensym))) `(let ((,temp1 (capture-current-line)) (,temp2 (begin ,@body))) (setf (ast-node-line-number ,temp2) ,temp1) ,temp2))) (define (capture-current-line) (make source-pointer (line *current-line*) (file *current-file*))) (define-syntax (push-decl-list decl place) `(setf ,place (nconc ,place (list ,decl))))