(include "lib/core.scm")
(include "lib/pair.scm")
(include "lib/alist.scm")
(include "lib/string.scm")
(include "lib/frb.scm")
(include "lib/symbol.scm")
(include "lib/io.scm")
(include "parse/lexstep.scm")
(include "lib/lexer.scm")
;; parser tables
(datatype action
(:shift int)
(:reduce int int))
(datatype action-list
(:nil)
(:cons int (action) (action-list)))
(datatype goto-list
(:nil)
(:cons int int (goto-list)))
(include "parse/t2.scm")
(datatype item
(:nt symbol (list (item)))
(:t symbol string (range))
)
(datatype stack
(:empty)
(:elem (item) int (stack))
)
;; this isn't very modular. yet. I'd like to get a generator-based parse going on here.
;; might even obviate the need for tracking position in the AST. [since lexer position
;; can propagate to the current parse error].
(define (parse path)
(let ((file (file:open-read path))
(token-gen (make-lex-generator file))
(paren-stack (list:nil))
(indents (list:cons 0 (list:nil)))
(start-of-line #t)
(held-token eof-token)
(tok eof-token)
)
(define get-indent
;; XXX handle or disallow tabs
(token:t 'whitespace str _) -> (string-length str)
;; non-whitespace at the front of a line
(token:t _ _ _ _) -> 0)
(define (get-top-indent)
(match indents with
() -> 0
(indent . _) -> indent))
(define (next-token0)
;; process (i.e., filter/synthesize) the token stream
(let loop ()
(cond ((not (eq? held-token eof-token))
(set! tok held-token)
(set! held-token eof-token))
(else
(set! tok (token-gen))
;;(print "token-gen: ") (printn tok)
))
;;(print "next-token loop ") (printn start-of-line)
(if start-of-line
;; in this state we might emit INDENT/DEDENT
(match tok with
(token:t sym val range)
-> (let ((this-indent (get-indent tok))
(top-indent (get-top-indent)))
(set! start-of-line #f)
(set! held-token tok)
(cond ((> this-indent top-indent)
(set! indents (list:cons this-indent indents))
(token:t 'INDENT "" range))
((< this-indent top-indent)
(set! indents (cdr indents))
;; go around again, might be more DEDENT
(set! start-of-line #t)
(token:t 'DEDENT "" range))
(else
(loop)))))
;; in the middle of a line somewhere
(match tok with
(token:t 'NEWLINE _ _)
-> (match paren-stack with
() -> (begin (set! start-of-line #t) tok)
_ -> (loop))
(token:t 'whitespace _ _) -> (loop)
(token:t 'comment _ _) -> (loop)
(token:t _ _ _) -> tok
))
))
(define (next-token)
(let ((t (next-token0)))
(print-string "next-token: ") (printn t)
t))
(let ((stack (stack:empty)))
(define (get-state)
(match stack with
(stack:empty) -> 0
(stack:elem _ state _) -> state
))
(define (lookup-action state kind)
(let loop ((l actions[state]))
(match l with
(action-list:nil)
-> (error "missing action?")
(action-list:cons tkind action tl)
-> (if (eq? terminals[tkind] kind)
action
(loop tl)))))
(define (lookup-goto state nt)
(let loop ((l goto[state]))
(match l with
(goto-list:nil)
-> (error "missing goto?")
(goto-list:cons nt0 new-state tl)
-> (if (eq? nt0 nt)
new-state
(loop tl)))))
(define (pop-n n)
(let loop ((n n) (result (list:nil)))
(if (= n 0)
result
(loop (- n 1) (list:cons (pop) result)))))
(define (push item state)
(set! stack (stack:elem item state stack)))
(define (pop)
(match stack with
(stack:elem item _ rest) -> (begin (set! stack rest) item)
(stack:empty) -> (error "stack underflow")))
(let loop ((tok (next-token)))
(cond ((eq? tok eof-token) (pop) (pop))
(else
(print-string "token: ") (printn tok)
;;(print-string "state: ") (printn (get-state))
;;(print "indentation: ") (printn indentation)
(vcase token tok
((:t kind val range)
(let ((a (lookup-action (get-state) kind)))
(vcase action a
((:shift state)
(push (item:t kind val range) state)
(loop (next-token)))
((:reduce plen nt)
(let ((args (pop-n plen))
(next-state (lookup-goto (get-state) nt)))
(push (item:nt non-terminals[nt] args) next-state))
(loop tok)))
)))
)))
)))
(define (indent n)
(let loop ((n n))
(cond ((= n 0) #t)
(else
(print-string " ")
(loop (- n 1))))))
(define (print-parse-tree t)
(let loop0 ((d 0)
(t t))
(indent d)
(match t with
(item:t sym str range)
-> (begin (print range) (print-string " ") (print sym) (print-string " ") (printn str))
(item:nt sym items)
-> (begin
(printn sym)
(let loop1 ((l items))
(match l with
() -> #u
(hd . tl) -> (begin (loop0 (+ d 1) hd) (loop1 tl)))))
)))
;; print a parse tree out in a way that facilitates writing patterns for it.
(define ppt
(item:nt sym items) -> (begin (print-string "(item:nt ") (print sym) (print-string " ") (ppt-list items) (print-string ")"))
(item:t sym str range) -> (begin (print-string "(item:t ") (print sym) (print-string " \"") (print-string str) (print-string "\")"))
)
(define (ppt-list l)
(print-string "(")
(ppt-list2 l))
(define ppt-list2
() -> (print-string ")")
(hd . tl) -> (begin (ppt hd) (print-string " ") (ppt-list2 tl))
)
;; (datatype formal
;; (:var string)
;; ;;(:var-with-default string (expr))
;; )
;; (datatype ifclause
;; (:case (expr) (expr))
;; )
;; (datatype expr
;; (:int int)
;; (:string string)
;; (:varref string)
;; (:binary string (expr) (expr))
;; (:unary string (expr))
;; (:funcall (expr) (list (expr)))
;; (:getitem (expr) (expr))
;; (:getattr (expr) string)
;; (:lambda (list (formal)) (expr))
;; (:sequence (list (expr)))
;; (:function string (list (formal)) (expr))
;; (:if (list (ifclause)) (expr))
;; (:while (expr) (expr) (expr))
;; (:for (expr) (expr) (expr) (expr))
;; (:break)
;; (:continue)
;; (:pass)
;; (:raise (expr))
;; (:return (expr))
;; (:unparsed symbol (list (expr)))
;; )
(define (perror where x)
(print-string "decode error in ")
(print-string where)
(print-string ": ")
(printn x)
(error "decode error"))
(define p-expr
(let ((l (alist/new)))
;; store the parsing functions in an alist keyed by production rule.
(define (A key val)
(set! l (alist/add l key val)))
(A 'atom p-atom)
(lambda (x)
(match x with
(item:t _ _ _) -> (perror "p-expr" x)
(item:nt kind val) -> (let ((probe (alist/lookup l kind)))
(match probe with
(maybe:no) -> (expr:unparsed kind (p-list val))
(maybe:yes fun) -> (fun val)
))))))
(let ((t (if (> (sys:argc) 1) (parse sys:argv[1]) (parse "tests/parse_2.py"))))
(printn t)
(print-parse-tree t)
(ppt t)
(terpri)
(let ((exp (p-expr t)))
(ppt-expr 0 exp)
(print-string "\n")
exp
)
)