📄 earley.scm
字号:
;;; EARLEY -- Earley's parser, written by Marc Feeley.; (make-parser grammar lexer) is used to create a parser from the grammar; description `grammar' and the lexer function `lexer'.;; A grammar is a list of definitions. Each definition defines a non-terminal; by a set of rules. Thus a definition has the form: (nt rule1 rule2...).; A given non-terminal can only be defined once. The first non-terminal; defined is the grammar's goal. Each rule is a possibly empty list of; non-terminals. Thus a rule has the form: (nt1 nt2...). A non-terminal; can be any scheme value. Note that all grammar symbols are treated as; non-terminals. This is fine though because the lexer will be outputing; non-terminals.;; The lexer defines what a token is and the mapping between tokens and; the grammar's non-terminals. It is a function of one argument, the input,; that returns the list of tokens corresponding to the input. Each token is; represented by a list. The first element is some `user-defined' information; associated with the token and the rest represents the token's class(es) (as a; list of non-terminals that this token corresponds to).;; The result of `make-parser' is a function that parses the single input it; is given into the grammar's goal. The result is a `parse' which can be; manipulated with the procedures: `parse->parsed?', `parse->trees'; and `parse->nb-trees' (see below).;; Let's assume that we want a parser for the grammar;; S -> x = E; E -> E + E | V; V -> V y |;; and that the input to the parser is a string of characters. Also, assume we; would like to map the characters `x', `y', `+' and `=' into the corresponding; non-terminals in the grammar. Such a parser could be created with;; (make-parser; '(; (s (x = e)); (e (e + e) (v)); (v (v y) ()); ); (lambda (str); (map (lambda (char); (list char ; user-info = the character itself; (case char; ((#\x) 'x); ((#\y) 'y); ((#\+) '+); ((#\=) '=); (else (fatal-error "lexer error"))))); (string->list str))); );; An alternative definition (that does not check for lexical errors) is;; (make-parser; '(; (s (#\x #\= e)); (e (e #\+ e) (v)); (v (v #\y) ()); ); (lambda (str) (map (lambda (char) (list char char)) (string->list str))); );; To help with the rest of the discussion, here are a few definitions:;; An input pointer (for an input of `n' tokens) is a value between 0 and `n'.; It indicates a point between two input tokens (0 = beginning, `n' = end).; For example, if `n' = 4, there are 5 input pointers:;; input token1 token2 token3 token4; input pointers 0 1 2 3 4;; A configuration indicates the extent to which a given rule is parsed (this; is the common `dot notation'). For simplicity, a configuration is; represented as an integer, with successive configurations in the same; rule associated with successive integers. It is assumed that the grammar; has been extended with rules to aid scanning. These rules are of the; form `nt ->', and there is one such rule for every non-terminal. Note; that these rules are special because they only apply when the corresponding; non-terminal is returned by the lexer.;; A configuration set is a configuration grouped with the set of input pointers; representing where the head non-terminal of the configuration was predicted.;; Here are the rules and configurations for the grammar given above:;; S -> . \; 0 |; x -> . |; 1 |; = -> . |; 2 |; E -> . |; 3 > special rules (for scanning); + -> . |; 4 |; V -> . |; 5 |; y -> . |; 6 /; S -> . x . = . E .; 7 8 9 10; E -> . E . + . E .; 11 12 13 14; E -> . V .; 15 16; V -> . V . y .; 17 18 19; V -> .; 20;; Starters of the non-terminal `nt' are configurations that are leftmost; in a non-special rule for `nt'. Enders of the non-terminal `nt' are; configurations that are rightmost in any rule for `nt'. Predictors of the; non-terminal `nt' are configurations that are directly to the left of `nt'; in any rule.;; For the grammar given above,;; Starters of V = (17 20); Enders of V = (5 19 20); Predictors of V = (15 17)(define (make-parser grammar lexer) (define (non-terminals grammar) ; return vector of non-terminals in grammar (define (add-nt nt nts) (if (member nt nts) nts (cons nt nts))) ; use equal? for equality tests (let def-loop ((defs grammar) (nts '())) (if (pair? defs) (let* ((def (car defs)) (head (car def))) (let rule-loop ((rules (cdr def)) (nts (add-nt head nts))) (if (pair? rules) (let ((rule (car rules))) (let loop ((l rule) (nts nts)) (if (pair? l) (let ((nt (car l))) (loop (cdr l) (add-nt nt nts))) (rule-loop (cdr rules) nts)))) (def-loop (cdr defs) nts)))) (list->vector (reverse nts))))) ; goal non-terminal must be at index 0 (define (ind nt nts) ; return index of non-terminal `nt' in `nts' (let loop ((i (- (vector-length nts) 1))) (if (>= i 0) (if (equal? (vector-ref nts i) nt) i (loop (- i 1))) #f))) (define (nb-configurations grammar) ; return nb of configurations in grammar (let def-loop ((defs grammar) (nb-confs 0)) (if (pair? defs) (let ((def (car defs))) (let rule-loop ((rules (cdr def)) (nb-confs nb-confs)) (if (pair? rules) (let ((rule (car rules))) (let loop ((l rule) (nb-confs nb-confs)) (if (pair? l) (loop (cdr l) (+ nb-confs 1)) (rule-loop (cdr rules) (+ nb-confs 1))))) (def-loop (cdr defs) nb-confs)))) nb-confs))); First, associate a numeric identifier to every non-terminal in the; grammar (with the goal non-terminal associated with 0).;; So, for the grammar given above we get:;; s -> 0 x -> 1 = -> 4 e ->3 + -> 4 v -> 5 y -> 6 (let* ((nts (non-terminals grammar)) ; id map = list of non-terms (nb-nts (vector-length nts)) ; the number of non-terms (nb-confs (+ (nb-configurations grammar) nb-nts)) ; the nb of confs (starters (make-vector nb-nts '())) ; starters for every non-term (enders (make-vector nb-nts '())) ; enders for every non-term (predictors (make-vector nb-nts '())) ; predictors for every non-term (steps (make-vector nb-confs #f)) ; what to do in a given conf (names (make-vector nb-confs #f))) ; name of rules (define (setup-tables grammar nts starters enders predictors steps names) (define (add-conf conf nt nts class) (let ((i (ind nt nts))) (vector-set! class i (cons conf (vector-ref class i))))) (let ((nb-nts (vector-length nts))) (let nt-loop ((i (- nb-nts 1))) (if (>= i 0) (begin (vector-set! steps i (- i nb-nts)) (vector-set! names i (list (vector-ref nts i) 0)) (vector-set! enders i (list i)) (nt-loop (- i 1))))) (let def-loop ((defs grammar) (conf (vector-length nts))) (if (pair? defs) (let* ((def (car defs)) (head (car def))) (let rule-loop ((rules (cdr def)) (conf conf) (rule-num 1)) (if (pair? rules) (let ((rule (car rules))) (vector-set! names conf (list head rule-num)) (add-conf conf head nts starters) (let loop ((l rule) (conf conf)) (if (pair? l) (let ((nt (car l))) (vector-set! steps conf (ind nt nts)) (add-conf conf nt nts predictors) (loop (cdr l) (+ conf 1))) (begin (vector-set! steps conf (- (ind head nts) nb-nts)) (add-conf conf head nts enders) (rule-loop (cdr rules) (+ conf 1) (+ rule-num 1)))))) (def-loop (cdr defs) conf)))))))); Now, for each non-terminal, compute the starters, enders and predictors and; the names and steps tables. (setup-tables grammar nts starters enders predictors steps names); Build the parser description (let ((parser-descr (vector lexer nts starters enders predictors steps names))) (lambda (input) (define (ind nt nts) ; return index of non-terminal `nt' in `nts' (let loop ((i (- (vector-length nts) 1))) (if (>= i 0) (if (equal? (vector-ref nts i) nt) i (loop (- i 1))) #f))) (define (comp-tok tok nts) ; transform token to parsing format (let loop ((l1 (cdr tok)) (l2 '())) (if (pair? l1) (let ((i (ind (car l1) nts))) (if i (loop (cdr l1) (cons i l2)) (loop (cdr l1) l2))) (cons (car tok) (reverse l2))))) (define (input->tokens input lexer nts) (list->vector (map (lambda (tok) (comp-tok tok nts)) (lexer input)))) (define (make-states nb-toks nb-confs) (let ((states (make-vector (+ nb-toks 1) #f))) (let loop ((i nb-toks)) (if (>= i 0) (let ((v (make-vector (+ nb-confs 1) #f))) (vector-set! v 0 -1) (vector-set! states i v) (loop (- i 1))) states)))) (define (conf-set-get state conf) (vector-ref state (+ conf 1))) (define (conf-set-get* state state-num conf) (let ((conf-set (conf-set-get state conf))) (if conf-set conf-set (let ((conf-set (make-vector (+ state-num 6) #f))) (vector-set! conf-set 1 -3) ; old elems tail (points to head) (vector-set! conf-set 2 -1) ; old elems head (vector-set! conf-set 3 -1) ; new elems tail (points to head) (vector-set! conf-set 4 -1) ; new elems head (vector-set! state (+ conf 1) conf-set) conf-set)))) (define (conf-set-merge-new! conf-set) (vector-set! conf-set (+ (vector-ref conf-set 1) 5) (vector-ref conf-set 4)) (vector-set! conf-set 1 (vector-ref conf-set 3)) (vector-set! conf-set 3 -1) (vector-set! conf-set 4 -1)) (define (conf-set-head conf-set) (vector-ref conf-set 2)) (define (conf-set-next conf-set i) (vector-ref conf-set (+ i 5))) (define (conf-set-member? state conf i) (let ((conf-set (vector-ref state (+ conf 1)))) (if conf-set (conf-set-next conf-set i) #f))) (define (conf-set-adjoin state conf-set conf i) (let ((tail (vector-ref conf-set 3))) ; put new element at tail (vector-set! conf-set (+ i 5) -1) (vector-set! conf-set (+ tail 5) i) (vector-set! conf-set 3 i) (if (< tail 0) (begin (vector-set! conf-set 0 (vector-ref state 0)) (vector-set! state 0 conf))))) (define (conf-set-adjoin* states state-num l i) (let ((state (vector-ref states state-num))) (let loop ((l1 l)) (if (pair? l1) (let* ((conf (car l1)) (conf-set (conf-set-get* state state-num conf))) (if (not (conf-set-next conf-set i)) (begin (conf-set-adjoin state conf-set conf i) (loop (cdr l1))) (loop (cdr l1)))))))) (define (conf-set-adjoin** states states* state-num conf i) (let ((state (vector-ref states state-num)))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -