4e987026 |
;;; This file abstracts the representation of tokens. It is used by both
;;; the lexer & parser. This also contains routines for converting
;;; individual tokens to ast structure. Routines used by the
;;; token-case macro in `satisfies' clauses are here too.
;;; Lexer routines for emitting tokens:
(define (emit-token type . args)
(cond (*on-new-line?*
(push (list 'line *start-line* *start-col*) *tokens*))
(*save-col?*
(push (list 'col *start-col*) *tokens*)))
(push (cons type args) *tokens*)
(setf *on-new-line?* '#f)
(setf *save-col?* (memq type '(|where| |of| |let|))))
(define (emit-token/string type string-as-list)
(emit-token type (list->string string-as-list)))
;;; Parser routines:
;;; These routines take care of the token stream in the parser. They
;;; maintain globals for the current token and its location.
;;; Globals used:
;;; *token-stream* remaining tokens to be parsed
;;; *token* current token type
;;; *token-args* current token arguments
;;; *layout-stack* columns at which layout is being done
;;; *current-line* current line the scanner is on
;;; *current-col* current col; valid at start of line & after where,let,of
;;; *current-file*
(define (init-token-stream tokens)
(setf *token-stream* tokens)
(setf *layout-stack* '())
(advance-token))
(define (advance-token)
(cond ((null? *token-stream*)
(setf *token* 'eof))
(else
(let* ((token (car *token-stream*)))
(setf *token-stream* (cdr *token-stream*))
(advance-token-1 (car token) (cdr token))))))
(define (advance-token-1 type args)
(cond ((eq? type 'file)
(setf *current-file* (car args))
(advance-token))
((eq? type 'col)
(setf *current-col* (car args))
(advance-token))
((eq? type 'line) ;; assume blank lines have been removed
(let ((line (car args))
(col (cadr args)))
(setf *current-line* line)
(setf *current-col* col)
(setf *token-stream*
(resolve-layout *token-stream* *layout-stack*)))
(advance-token))
(else
(setf *token* type)
(setf *token-args* args)
type)))
(define (insert-extra-token tok-type stream) ; used by layout
(cons (list tok-type) stream))
;;; This looks for the { to decide of layout will apply. If so, the layout
;;; stack is pushed. The body function, fn, is called with a boolean which
;;; tells it the whether layout rule is in force.
;;; *** The CMU CL compiler barfs with some kind of internal error
;;; *** on this function. See the revised definition below.
;(define (start-layout fn)
; (token-case
; (\{ (funcall fn '#f))
; (else
; (let/cc recovery-fn
; (push (cons *current-col* (lambda ()
; (let ((res (funcall fn '#t)))
; (funcall recovery-fn res))))
; *layout-stack*)
; (funcall fn '#t)))))
(define (start-layout fn)
(token-case
(\{ (funcall fn '#f))
(else
(let/cc recovery-fn
(start-layout-1 fn recovery-fn)))))
(define (start-layout-1 fn recovery-fn)
(push (cons *current-col*
(lambda ()
(let ((res (funcall fn '#t)))
(funcall recovery-fn res))))
*layout-stack*)
(funcall fn '#t))
(define (layout-col x)
(car x))
(define (layout-recovery-fn x)
(cdr x))
(define (close-layout in-layout?)
(cond (in-layout?
(setf *layout-stack* (cdr *layout-stack*))
(token-case
($\} '()) ; the advance-token routine may have inserted this
(else '())))
(else
(token-case
(\} '())
(else
(signal-missing-brace))))))
(define (signal-missing-brace)
(parser-error 'missing-brace
"Missing `}'."))
(define (resolve-layout stream layout-stack)
(if (null? layout-stack)
stream
(let ((col (layout-col (car layout-stack))))
(declare (type fixnum col))
(cond ((= (the fixnum *current-col*) col)
(insert-extra-token '\; stream))
((< (the fixnum *current-col*) col)
(insert-extra-token
'$\} (resolve-layout stream (cdr layout-stack))))
(else
stream)
))))
;;; The following routines are used for backtracking. This is a bit of
;;; a hack at the moment.
(define (save-scanner-state)
(vector *token* *token-args* *token-stream* *layout-stack* *current-line*
*current-col*))
(define (restore-excursion state)
(setf *token* (vector-ref state 0))
(setf *token-args* (vector-ref state 1))
(setf *token-stream* (vector-ref state 2))
(setf *layout-stack* (vector-ref state 3))
(setf *current-line* (vector-ref state 4))
(setf *current-col* (vector-ref state 5)))
(define (eq-token? type)
(eq? type *token*))
(define (eq-token-arg? str)
(string=? str (car *token-args*)))
;;; lookahead into the token stream
(define (peek-1-type)
(peek-toks 0 *token-stream*))
(define (peek-2-type)
(peek-toks 1 *token-stream*))
;;; This is a Q&D way of looking ahead. It does not expand the layout
;;; as it goes so there may be missing } and ;. This should not matter
;;; in the places where this is used since these would be invalid anyway.
;;; To be safe, token types are rechecked while advancing to verify the
;;; lookahead.
(define (peek-toks n toks)
(declare (type fixnum n))
(cond ((null? toks)
'eof)
((memq (caar toks) '(col line))
(peek-toks n (cdr toks)))
((eqv? n 0)
(caar toks))
(else (peek-toks (1- n) (cdr toks)))))
;; These routines handle the `satisfies' clauses used in token-case.
(define (at-varsym/+?)
(and (eq? *token* 'varsym)
(string=? (car *token-args*) "+")))
(define (at-varsym/-?)
(and (eq? *token* 'varsym)
(string=? (car *token-args*) "-")))
(define (at-varsym/paren?)
(and (eq? *token* '\()
(eq? (peek-1-type) 'varsym)
(eq? (peek-2-type) '\))))
(define (at-consym/paren?)
(and (eq? *token* '\()
(eq? (peek-1-type) 'consym)
(eq? (peek-2-type) '\))))
(define (at-varid/quoted?)
(and (eq? *token* '\`)
(eq? (peek-1-type) 'varid)))
(define (at-conid/quoted?)
(and (eq? *token* '\`)
(eq? (peek-1-type) 'conid)))
(define (at-+k?)
(and (at-varsym/+?)
(eq? (peek-1-type) 'integer)))
(define (at--n?)
(and (at-varsym/-?)
(memq (peek-1-type) '(integer float))))
;;; The following routines convert the simplest tokens to AST structure.
(define-local-syntax (return+advance x)
`(let ((x ,x))
(advance-token)
x))
(define (token->symbol)
(return+advance
(string->symbol (car *token-args*))))
(define (token->symbol/con) ; for conid, aconid
(return+advance
(string->symbol (add-con-prefix (car *token-args*)))))
(define (var->symbol)
(token-case
(\( (token-case
(varsym?
(let ((res (token->symbol)))
(token-case
(\) res)
(else (signal-missing-token "`)'" "var")))))
(else (signal-missing-token "<varsym>" "var"))))
(varid? (token->symbol))))
(define (var->ast)
(let ((vname (var->symbol)))
(make var-ref (name vname) (infix? '#f) (var *undefined-def*))))
(define (var->entity)
(let ((vname (var->symbol)))
(make entity-var (name vname))))
(define (con->symbol)
(token-case
(\( (token-case
(consym?
(let ((res (token->symbol/con)))
(token-case
(\) res)
(else (signal-missing-token "`)'" "con")))))
(else (signal-missing-token "<consym>" "con"))))
(conid? (token->symbol/con))))
(define (varop->symbol)
(token-case
(\` (token-case
(varid?
(let ((res (token->symbol)))
(token-case
(\` res)
(else (signal-missing-token "``'" "varop")))))
(else (signal-missing-token "<varid>" "varop"))))
(varsym? (token->symbol))))
(define (varop->ast)
(let ((varop-name (varop->symbol)))
(make var-ref (name varop-name) (infix? '#t) (var *undefined-def*))))
(define (conop->symbol)
(token-case
(\` (token-case
(conid?
(let ((res (token->symbol/con)))
(token-case
(\` res)
(else (signal-missing-token "``'" "conop")))))
(else (signal-missing-token "<conid>" "conop"))))
(consym? (token->symbol/con))))
(define (conop->ast)
(let ((conop-name (conop->symbol)))
(make con-ref (name conop-name) (infix? '#t) (con *undefined-def*))))
(define (op->symbol)
(token-case
(\` (token-case
(conid?
(let ((res (token->symbol/con)))
(token-case
(\` res)
(else (signal-missing-token "``'" "op")))))
(varid?
(let ((res (token->symbol)))
(token-case
(\` res)
(else (signal-missing-token "``'" "op")))))
(else (signal-missing-token "<conid> or <varid>" "op"))))
(consym? (token->symbol/con))
(varsym? (token->symbol))))
(define (con->ast) ; for conid, aconid
(let ((name (con->symbol)))
(make con-ref (name name) (con *undefined-def*) (infix? '#f))))
(define (pcon->ast) ; for aconid, conid
(let ((name (con->symbol)))
(make pcon (name name) (con *undefined-def*) (pats '()) (infix? '#f))))
(define (pconop->ast) ; for aconop, conop
(let ((name (conop->symbol)))
(make pcon (name name) (con *undefined-def*) (pats '()) (infix? '#t))))
(define (tycon->ast) ; for aconid
(let ((name (token->symbol)))
(make tycon (name name) (def *undefined-def*) (args '()))))
(define (class->ast) ; for aconid
(let ((name (token->symbol)))
(make class-ref (name name) (class *undefined-def*))))
(define (tyvar->ast) ; for avarid
(let ((name (token->symbol)))
(make tyvar (name name))))
(define (token->integer) ; for integer
(return+advance
(car *token-args*)))
(define (integer->ast) ; for integer
(return+advance
(make integer-const (value (car *token-args*)))))
(define (float->ast)
(return+advance
(make float-const (numerator (car *token-args*))
(denominator (cadr *token-args*))
(exponent (caddr *token-args*)))))
(define (string->ast)
(return+advance
(make string-const (value (car *token-args*)))))
(define (char->ast)
(return+advance
(make char-const (value (car *token-args*)))))
(define (literal->ast)
(token-case
((no-advance integer) (integer->ast))
((no-advance float) (float->ast))
((no-advance string) (string->ast))
((no-advance char) (char->ast))))
|