git.fiddlerwoaroof.com
Raw Blame History
;;;  This file abstracts the representation of tokens.  It is used by both
;;;  the lexer & parser.  This also contains routines for converting
;;;  individual tokens to ast structure.  Routines used by the
;;;  token-case macro in `satisfies' clauses are here too.

;;; Lexer routines for emitting tokens:

(define (emit-token type . args)
  (cond (*on-new-line?*
	 (push (list 'line *start-line* *start-col*) *tokens*))
	(*save-col?*
	 (push (list 'col *start-col*) *tokens*)))
  (push (cons type args) *tokens*)
  (setf *on-new-line?* '#f)
  (setf *save-col?* (memq type '(|where| |of| |let|))))

(define (emit-token/string type string-as-list)
  (emit-token type (list->string string-as-list)))

;;; Parser routines:

;;;  These routines take care of the token stream in the parser.  They
;;;  maintain globals for the current token and its location.  

;;;  Globals used:
;;;   *token-stream*   remaining tokens to be parsed
;;;   *token*          current token type
;;;   *token-args*     current token arguments
;;;   *layout-stack*   columns at which layout is being done
;;;   *current-line*   current line the scanner is on
;;;   *current-col*    current col; valid at start of line & after where,let,of
;;;   *current-file*

(define (init-token-stream tokens)
  (setf *token-stream* tokens)
  (setf *layout-stack* '())
  (advance-token))

(define (advance-token)
  (cond ((null? *token-stream*)
	 (setf *token* 'eof))
	(else
	 (let* ((token (car *token-stream*)))
	   (setf *token-stream* (cdr *token-stream*))
	   (advance-token-1 (car token) (cdr token))))))

(define (advance-token-1 type args)
  (cond ((eq? type 'file)
	 (setf *current-file* (car args))
	 (advance-token))
	((eq? type 'col)
	 (setf *current-col* (car args))
	 (advance-token))
	((eq? type 'line)  ;; assume blank lines have been removed
	 (let ((line (car args))
	       (col (cadr args)))
	   (setf *current-line* line)
	   (setf *current-col* col)
	   (setf *token-stream*
		 (resolve-layout *token-stream* *layout-stack*)))
	 (advance-token))
	(else
	 (setf *token* type)
	 (setf *token-args* args)
	 type)))

(define (insert-extra-token tok-type stream) ; used by layout
  (cons (list tok-type) stream))

;;; This looks for the { to decide of layout will apply.  If so, the layout
;;; stack is pushed.  The body function, fn, is called with a boolean which
;;; tells it the whether layout rule is in force.

;;; *** The CMU CL compiler barfs with some kind of internal error
;;; *** on this function.  See the revised definition below.

;(define (start-layout fn)
;  (token-case
;   (\{ (funcall fn '#f))
;   (else
;    (let/cc recovery-fn
;      (push (cons *current-col* (lambda ()
;				  (let ((res (funcall fn '#t)))
;				    (funcall recovery-fn res))))
;	    *layout-stack*)
;      (funcall fn '#t)))))

(define (start-layout fn)
  (token-case
   (\{ (funcall fn '#f))
   (else
    (let/cc recovery-fn
      (start-layout-1 fn recovery-fn)))))

(define (start-layout-1 fn recovery-fn)
  (push (cons *current-col*
	      (lambda ()
		(let ((res (funcall fn '#t)))
		  (funcall recovery-fn res))))
	*layout-stack*)
  (funcall fn '#t))

(define (layout-col x)
  (car x))

(define (layout-recovery-fn x)
  (cdr x))

(define (close-layout in-layout?)
  (cond (in-layout?
	 (setf *layout-stack* (cdr *layout-stack*))
	 (token-case
	  ($\} '())   ; the advance-token routine may have inserted this
	  (else '())))
	(else
	 (token-case
	  (\} '())
	  (else
	   (signal-missing-brace))))))

(define (signal-missing-brace)
  (parser-error 'missing-brace
		"Missing `}'."))

(define (resolve-layout stream layout-stack)
  (if (null? layout-stack)
      stream
      (let ((col  (layout-col (car layout-stack))))
	(declare (type fixnum col))
	(cond ((= (the fixnum *current-col*) col)
	       (insert-extra-token '\; stream))
	      ((< (the fixnum *current-col*) col)
	       (insert-extra-token
	         '$\} (resolve-layout stream (cdr layout-stack))))
	      (else
	       stream)
	      ))))
	

;;; The following routines are used for backtracking.  This is a bit of
;;; a hack at the moment.

(define (save-scanner-state)
  (vector *token* *token-args* *token-stream* *layout-stack* *current-line*
	  *current-col*))

(define (restore-excursion state)
  (setf *token* (vector-ref state 0))
  (setf *token-args* (vector-ref state 1))
  (setf *token-stream* (vector-ref state 2))
  (setf *layout-stack* (vector-ref state 3))
  (setf *current-line* (vector-ref state 4))
  (setf *current-col* (vector-ref state 5)))

(define (eq-token? type)
  (eq? type *token*))

(define (eq-token-arg? str)
  (string=? str (car *token-args*)))

;;; lookahead into the token stream

(define (peek-1-type)
  (peek-toks 0 *token-stream*))

(define (peek-2-type)
  (peek-toks 1 *token-stream*))

;;; This is a Q&D way of looking ahead.  It does not expand the layout
;;; as it goes so there may be missing } and ;.  This should not matter
;;; in the places where this is used since these would be invalid anyway.
;;; To be safe, token types are rechecked while advancing to verify the
;;; lookahead.

(define (peek-toks n toks)
  (declare (type fixnum n))
  (cond ((null? toks)
	 'eof)
	((memq (caar toks) '(col line))
	 (peek-toks n (cdr toks)))
	((eqv? n 0)
	 (caar toks))
	(else (peek-toks (1- n) (cdr toks)))))

;; These routines handle the `satisfies' clauses used in token-case.

(define (at-varsym/+?)
  (and (eq? *token* 'varsym)
       (string=? (car *token-args*) "+")))

(define (at-varsym/-?)
  (and (eq? *token* 'varsym)
       (string=? (car *token-args*) "-")))

(define (at-varsym/paren?)
  (and (eq? *token* '\()
       (eq? (peek-1-type) 'varsym)
       (eq? (peek-2-type) '\))))

(define (at-consym/paren?)
  (and (eq? *token* '\()
       (eq? (peek-1-type) 'consym)
       (eq? (peek-2-type) '\))))

(define (at-varid/quoted?)
  (and (eq? *token* '\`)
       (eq? (peek-1-type) 'varid)))

(define (at-conid/quoted?)
  (and (eq? *token* '\`)
       (eq? (peek-1-type) 'conid)))

(define (at-+k?)
  (and (at-varsym/+?)
       (eq? (peek-1-type) 'integer)))

(define (at--n?)
  (and (at-varsym/-?)
       (memq (peek-1-type) '(integer float))))

;;;  The following routines convert the simplest tokens to AST structure.

(define-local-syntax (return+advance x)
  `(let ((x ,x))
     (advance-token)
     x))

(define (token->symbol)
 (return+advance
  (string->symbol (car *token-args*))))

(define (token->symbol/con)  ; for conid, aconid
 (return+advance
  (string->symbol (add-con-prefix (car *token-args*)))))

(define (var->symbol)
  (token-case
   (\( (token-case
	(varsym?
	 (let ((res (token->symbol)))
	   (token-case
	    (\) res)
	    (else (signal-missing-token "`)'" "var")))))
	(else (signal-missing-token "<varsym>" "var"))))
   (varid? (token->symbol))))

(define (var->ast)
  (let ((vname (var->symbol)))
    (make var-ref (name vname) (infix? '#f) (var *undefined-def*))))

(define (var->entity) 
  (let ((vname (var->symbol)))
    (make entity-var (name vname))))

(define (con->symbol)
  (token-case
   (\( (token-case
	(consym?
	 (let ((res (token->symbol/con)))
	   (token-case
	    (\) res)
	    (else (signal-missing-token "`)'" "con")))))
	(else (signal-missing-token "<consym>" "con"))))
   (conid? (token->symbol/con))))

(define (varop->symbol)
  (token-case
   (\` (token-case
	(varid?
	 (let ((res (token->symbol)))
	   (token-case
	    (\` res)
	    (else (signal-missing-token "``'" "varop")))))
	(else (signal-missing-token "<varid>" "varop"))))
   (varsym? (token->symbol))))

(define (varop->ast)
  (let ((varop-name (varop->symbol)))
    (make var-ref (name varop-name) (infix? '#t) (var *undefined-def*))))

(define (conop->symbol)
  (token-case
   (\` (token-case
	(conid?
	 (let ((res (token->symbol/con)))
	   (token-case
	    (\` res)
	    (else (signal-missing-token "``'" "conop")))))
	(else (signal-missing-token "<conid>" "conop"))))
   (consym? (token->symbol/con))))

(define (conop->ast)
  (let ((conop-name (conop->symbol)))
    (make con-ref (name conop-name) (infix? '#t) (con *undefined-def*))))

(define (op->symbol)
  (token-case
   (\` (token-case
	(conid?
	 (let ((res (token->symbol/con)))
	   (token-case
	    (\` res)
	    (else (signal-missing-token "``'" "op")))))
	(varid?
	 (let ((res (token->symbol)))
	   (token-case
	    (\` res)
	    (else (signal-missing-token "``'" "op")))))
	(else (signal-missing-token "<conid> or <varid>" "op"))))
   (consym? (token->symbol/con))
   (varsym? (token->symbol))))

(define (con->ast)  ; for conid, aconid
  (let ((name (con->symbol)))
    (make con-ref (name name) (con *undefined-def*) (infix? '#f))))

(define (pcon->ast) ; for aconid, conid
  (let ((name (con->symbol)))
    (make pcon (name name) (con *undefined-def*) (pats '()) (infix? '#f))))

(define (pconop->ast) ; for aconop, conop
  (let ((name (conop->symbol)))
    (make pcon (name name) (con *undefined-def*) (pats '()) (infix? '#t))))

(define (tycon->ast) ; for aconid
  (let ((name (token->symbol)))
    (make tycon (name name) (def *undefined-def*) (args '()))))

(define (class->ast) ; for aconid
  (let ((name (token->symbol)))
    (make class-ref (name name) (class *undefined-def*))))

(define (tyvar->ast) ; for avarid
  (let ((name (token->symbol)))
    (make tyvar (name name))))

(define (token->integer) ; for integer
 (return+advance
  (car *token-args*)))

(define (integer->ast) ; for integer
 (return+advance
  (make integer-const (value (car *token-args*)))))

(define (float->ast)
 (return+advance
  (make float-const (numerator (car *token-args*))
	            (denominator (cadr *token-args*))
	            (exponent (caddr *token-args*)))))

(define (string->ast)
 (return+advance
  (make string-const (value (car *token-args*)))))

(define (char->ast)
 (return+advance
  (make char-const (value (car *token-args*)))))

(define (literal->ast)
  (token-case
    ((no-advance integer) (integer->ast))
    ((no-advance float) (float->ast))
    ((no-advance string) (string->ast))
    ((no-advance char) (char->ast))))