(in-package "USER") (defvar *trace* nil) (setf *trace* nil) (defun ptrace (lhs left-of-dot right-of-dot input) (format t "~a --> ~{~a~^ ~} . ~{~a~^ ~} [~{~a~^ ~}]~%" lhs left-of-dot right-of-dot input)) (defun snoc (item list) ;; ;; inverse cons(): add .item. as new last element to .list. ;; (append list (list item))) (defparameter *grammar* '((S NP VP) (NP Det N) (NP NP PP) (VP V) (VP V NP) (VP VP PP) (PP P NP) (Det the) (Det a) (P with) (Adj fat) (Adj cute) (N Adj N) (N woman) (N man) (N ball) (N scope) (N hat) (V snored) (V saw))) (defun rule-lhs (rule) (first rule)) (defun rule-rhs (rule) (rest rule)) (defun rules-starting-in (category) (loop for rule in *grammar* when (eq (first (rule-rhs rule)) category) collect rule)) (defstruct parse tree remaining-input start span) (defun complete-parses (parses) (remove-if-not #'null parses :key #'parse-remaining-input)) (defun make-tree (root daughters) (cons root daughters)) (defun tree-root (tree) (first tree)) (defun tree-daughters (tree) (rest tree)) (defun parse (input &key (position 0)) ;; ;; while there is (remaining) input, find all rules that have the next input ;; word as their first (i.e. leftmost) right-hand side symbol and then try to ;; instantiate the right-hand side in extend-parse() calls; here, the initial ;; dot position (the borderline between the .right-of-dot. and .left-of-dot. ;; parts of the rule right-hand side) is _behind_ the first symbol which at ;; the same time consumes the current input token. extend-parse() results ;; are accumulated in a single list by means of 'nconc' ;; (when input (loop for rule in (rules-starting-in (first input)) nconc (extend-parse (rule-lhs rule) (list (first (rule-rhs rule))) (rest (rule-rhs rule)) (rest input) :start position :span 1)))) (defun extend-parse (lhs left-of-dot right-of-dot input &key start span) (when *trace* (ptrace lhs left-of-dot right-of-dot input)) (if (null right-of-dot) ;; ;; a null() .right-of-dot. part means that the right-hand side of ;; this rule (postulation) has been successfully instantiated; record the ;; resulting --- possibly partial --- `parse' structure and use it as the ;; anchor for the upward projection: call extend-parse() again from the ;; same .start. position on all rules that have the root node of the new ;; result as their first symbol on the right-hand side. this step will ;; recursively build up constituent structures (bottom-up). our result ;; then is the union of the new `parse' structure with whatever the ;; upward projection yields. ;; no .input. is (directly) consumed in the upwards projection step. ;; (let ((parse (make-parse :tree (make-tree lhs left-of-dot) :remaining-input input :start start :span span))) (cons parse (loop for rule in (rules-starting-in lhs) nconc (extend-parse (rule-lhs rule) (list (parse-tree parse)) (rest (rule-rhs rule)) input :start start :span span)))) ;; ;; the non-empty .right-of-dot. indicates that the parse has to continue to ;; the right in order to instantiate the remaining daughters of this rule: ;; first call parse() from our current string position (i.e. the .start. of ;; this rule postulation plus its current extension) to consume more input ;; and then use extend-parse() to complete all results obtained that meet ;; our expections (have the next .right-of-dot. symbol as their root node). ;; the `tree' part of the parse() result is appended to the tail of ;; the current sequence of instantiated right-hand side elements ;; (.left-of-dot.), since it is a new daugther of this partial ;; instantiation, and adds its extension (`span') to the overall .span.). ;; here, whatever the parse() call failed to analyzed is our remaining ;; input. ;; (loop for parse in (parse input :position (+ start span)) nconc (when (eq (tree-root (parse-tree parse)) (first right-of-dot)) (extend-parse lhs (snoc (parse-tree parse) left-of-dot) (rest right-of-dot) (parse-remaining-input parse) :start start :span (+ span (parse-span parse)))))))