;;; This file abstracts the representation of tokens. It is used by both ;;; the lexer & parser. This also contains routines for converting ;;; individual tokens to ast structure. Routines used by the ;;; token-case macro in `satisfies' clauses are here too. ;;; Lexer routines for emitting tokens: (define (emit-token type . args) (cond (*on-new-line?* (push (list 'line *start-line* *start-col*) *tokens*)) (*save-col?* (push (list 'col *start-col*) *tokens*))) (push (cons type args) *tokens*) (setf *on-new-line?* '#f) (setf *save-col?* (memq type '(|where| |of| |let|)))) (define (emit-token/string type string-as-list) (emit-token type (list->string string-as-list))) ;;; Parser routines: ;;; These routines take care of the token stream in the parser. They ;;; maintain globals for the current token and its location. ;;; Globals used: ;;; *token-stream* remaining tokens to be parsed ;;; *token* current token type ;;; *token-args* current token arguments ;;; *layout-stack* columns at which layout is being done ;;; *current-line* current line the scanner is on ;;; *current-col* current col; valid at start of line & after where,let,of ;;; *current-file* (define (init-token-stream tokens) (setf *token-stream* tokens) (setf *layout-stack* '()) (advance-token)) (define (advance-token) (cond ((null? *token-stream*) (setf *token* 'eof)) (else (let* ((token (car *token-stream*))) (setf *token-stream* (cdr *token-stream*)) (advance-token-1 (car token) (cdr token)))))) (define (advance-token-1 type args) (cond ((eq? type 'file) (setf *current-file* (car args)) (advance-token)) ((eq? type 'col) (setf *current-col* (car args)) (advance-token)) ((eq? type 'line) ;; assume blank lines have been removed (let ((line (car args)) (col (cadr args))) (setf *current-line* line) (setf *current-col* col) (setf *token-stream* (resolve-layout *token-stream* *layout-stack*))) (advance-token)) (else (setf *token* type) (setf *token-args* args) type))) (define (insert-extra-token tok-type stream) ; used by layout (cons (list tok-type) stream)) ;;; This looks for the { to decide of layout will apply. If so, the layout ;;; stack is pushed. The body function, fn, is called with a boolean which ;;; tells it the whether layout rule is in force. ;;; *** The CMU CL compiler barfs with some kind of internal error ;;; *** on this function. See the revised definition below. ;(define (start-layout fn) ; (token-case ; (\{ (funcall fn '#f)) ; (else ; (let/cc recovery-fn ; (push (cons *current-col* (lambda () ; (let ((res (funcall fn '#t))) ; (funcall recovery-fn res)))) ; *layout-stack*) ; (funcall fn '#t))))) (define (start-layout fn) (token-case (\{ (funcall fn '#f)) (else (let/cc recovery-fn (start-layout-1 fn recovery-fn))))) (define (start-layout-1 fn recovery-fn) (push (cons *current-col* (lambda () (let ((res (funcall fn '#t))) (funcall recovery-fn res)))) *layout-stack*) (funcall fn '#t)) (define (layout-col x) (car x)) (define (layout-recovery-fn x) (cdr x)) (define (close-layout in-layout?) (cond (in-layout? (setf *layout-stack* (cdr *layout-stack*)) (token-case ($\} '()) ; the advance-token routine may have inserted this (else '()))) (else (token-case (\} '()) (else (signal-missing-brace)))))) (define (signal-missing-brace) (parser-error 'missing-brace "Missing `}'.")) (define (resolve-layout stream layout-stack) (if (null? layout-stack) stream (let ((col (layout-col (car layout-stack)))) (declare (type fixnum col)) (cond ((= (the fixnum *current-col*) col) (insert-extra-token '\; stream)) ((< (the fixnum *current-col*) col) (insert-extra-token '$\} (resolve-layout stream (cdr layout-stack)))) (else stream) )))) ;;; The following routines are used for backtracking. This is a bit of ;;; a hack at the moment. (define (save-scanner-state) (vector *token* *token-args* *token-stream* *layout-stack* *current-line* *current-col*)) (define (restore-excursion state) (setf *token* (vector-ref state 0)) (setf *token-args* (vector-ref state 1)) (setf *token-stream* (vector-ref state 2)) (setf *layout-stack* (vector-ref state 3)) (setf *current-line* (vector-ref state 4)) (setf *current-col* (vector-ref state 5))) (define (eq-token? type) (eq? type *token*)) (define (eq-token-arg? str) (string=? str (car *token-args*))) ;;; lookahead into the token stream (define (peek-1-type) (peek-toks 0 *token-stream*)) (define (peek-2-type) (peek-toks 1 *token-stream*)) ;;; This is a Q&D way of looking ahead. It does not expand the layout ;;; as it goes so there may be missing } and ;. This should not matter ;;; in the places where this is used since these would be invalid anyway. ;;; To be safe, token types are rechecked while advancing to verify the ;;; lookahead. (define (peek-toks n toks) (declare (type fixnum n)) (cond ((null? toks) 'eof) ((memq (caar toks) '(col line)) (peek-toks n (cdr toks))) ((eqv? n 0) (caar toks)) (else (peek-toks (1- n) (cdr toks))))) ;; These routines handle the `satisfies' clauses used in token-case. (define (at-varsym/+?) (and (eq? *token* 'varsym) (string=? (car *token-args*) "+"))) (define (at-varsym/-?) (and (eq? *token* 'varsym) (string=? (car *token-args*) "-"))) (define (at-varsym/paren?) (and (eq? *token* '\() (eq? (peek-1-type) 'varsym) (eq? (peek-2-type) '\)))) (define (at-consym/paren?) (and (eq? *token* '\() (eq? (peek-1-type) 'consym) (eq? (peek-2-type) '\)))) (define (at-varid/quoted?) (and (eq? *token* '\`) (eq? (peek-1-type) 'varid))) (define (at-conid/quoted?) (and (eq? *token* '\`) (eq? (peek-1-type) 'conid))) (define (at-+k?) (and (at-varsym/+?) (eq? (peek-1-type) 'integer))) (define (at--n?) (and (at-varsym/-?) (memq (peek-1-type) '(integer float)))) ;;; The following routines convert the simplest tokens to AST structure. (define-local-syntax (return+advance x) `(let ((x ,x)) (advance-token) x)) (define (token->symbol) (return+advance (string->symbol (car *token-args*)))) (define (token->symbol/con) ; for conid, aconid (return+advance (string->symbol (add-con-prefix (car *token-args*))))) (define (var->symbol) (token-case (\( (token-case (varsym? (let ((res (token->symbol))) (token-case (\) res) (else (signal-missing-token "`)'" "var"))))) (else (signal-missing-token "" "var")))) (varid? (token->symbol)))) (define (var->ast) (let ((vname (var->symbol))) (make var-ref (name vname) (infix? '#f) (var *undefined-def*)))) (define (var->entity) (let ((vname (var->symbol))) (make entity-var (name vname)))) (define (con->symbol) (token-case (\( (token-case (consym? (let ((res (token->symbol/con))) (token-case (\) res) (else (signal-missing-token "`)'" "con"))))) (else (signal-missing-token "" "con")))) (conid? (token->symbol/con)))) (define (varop->symbol) (token-case (\` (token-case (varid? (let ((res (token->symbol))) (token-case (\` res) (else (signal-missing-token "``'" "varop"))))) (else (signal-missing-token "" "varop")))) (varsym? (token->symbol)))) (define (varop->ast) (let ((varop-name (varop->symbol))) (make var-ref (name varop-name) (infix? '#t) (var *undefined-def*)))) (define (conop->symbol) (token-case (\` (token-case (conid? (let ((res (token->symbol/con))) (token-case (\` res) (else (signal-missing-token "``'" "conop"))))) (else (signal-missing-token "" "conop")))) (consym? (token->symbol/con)))) (define (conop->ast) (let ((conop-name (conop->symbol))) (make con-ref (name conop-name) (infix? '#t) (con *undefined-def*)))) (define (op->symbol) (token-case (\` (token-case (conid? (let ((res (token->symbol/con))) (token-case (\` res) (else (signal-missing-token "``'" "op"))))) (varid? (let ((res (token->symbol))) (token-case (\` res) (else (signal-missing-token "``'" "op"))))) (else (signal-missing-token " or " "op")))) (consym? (token->symbol/con)) (varsym? (token->symbol)))) (define (con->ast) ; for conid, aconid (let ((name (con->symbol))) (make con-ref (name name) (con *undefined-def*) (infix? '#f)))) (define (pcon->ast) ; for aconid, conid (let ((name (con->symbol))) (make pcon (name name) (con *undefined-def*) (pats '()) (infix? '#f)))) (define (pconop->ast) ; for aconop, conop (let ((name (conop->symbol))) (make pcon (name name) (con *undefined-def*) (pats '()) (infix? '#t)))) (define (tycon->ast) ; for aconid (let ((name (token->symbol))) (make tycon (name name) (def *undefined-def*) (args '())))) (define (class->ast) ; for aconid (let ((name (token->symbol))) (make class-ref (name name) (class *undefined-def*)))) (define (tyvar->ast) ; for avarid (let ((name (token->symbol))) (make tyvar (name name)))) (define (token->integer) ; for integer (return+advance (car *token-args*))) (define (integer->ast) ; for integer (return+advance (make integer-const (value (car *token-args*))))) (define (float->ast) (return+advance (make float-const (numerator (car *token-args*)) (denominator (cadr *token-args*)) (exponent (caddr *token-args*))))) (define (string->ast) (return+advance (make string-const (value (car *token-args*))))) (define (char->ast) (return+advance (make char-const (value (car *token-args*))))) (define (literal->ast) (token-case ((no-advance integer) (integer->ast)) ((no-advance float) (float->ast)) ((no-advance string) (string->ast)) ((no-advance char) (char->ast))))