;; Bottom-Up Parser ;; ================ ;; Warnung: Dies ist nur ein Illustrationsprogramm! ;; Wer diesen extrem ineffizienten fuer nichttriviale Saetze ;; einsetzt, ist selber schuld. (defparameter *lexicon* '(("Peter" (cat . NN)) ("Mary" (cat . NN)) ("sleeps" (cat . VINT)) ("kills" (cat . VTRANS)) ("loves" (cat . VTRANS)) ("kills" (cat . N)) ("the" (cat . DET)) ("man" (cat . N)) ("cat" (cat . N)) ("with" (cat . P)) ("a" (cat . DET)) ("saw" (cat . N)) ("saw" (cat . VTRANS)) ) ) (defparameter *grammar* '((S (NP VP)) (NP (NN)) (NP (DET N)) (VP (VTRANS NP)) (VP (VINT)) (VP (VTRANS NP PP)) (VP (VINT PP)) (PP (P NP)) ) ) (defun split-string (string) ;; teilt einen String an Leerstellen auf ;; Annahme: nur eine Leerstelle zwischen Woertern (loop for position from 1 to (1- (length string)) with result = nil ;; akkumuliert Woerter rueckwaerts (mit push) with start = 0 ;; Anfang des aktuellen Worts do (when (eq (aref string position) #\space) (progn (push (subseq string start position) result)) (setf start (1+ position))) finally ;; fuege den uebrigen Teil des Strings an (nach dem letzten Space) (push (subseq string start) result) (return (reverse result)))) (defun make-lexical-trees (lexicon) ;; Erzeugt die moeglichen "lexikalischen" Baeume aus dem Lexikon. ;; Beispiel: (make-lexical-trees *lexicon*) ---> ;; ((N ("cat")) (N ("man")) (DET ("the")) (V ("kills")) (NN ("Mary")) ... (loop for entry in lexicon with result = nil ;; sammelt die Ergebnisse auf do (let* ((word (first entry)) (features (rest entry)) (category (cdr (assoc 'cat features))) ) (push (make-grammar-rule category (list word)) result) ) finally (return result))) (defun parse () ;; liest einen Satz und druckt einen moeglichen Parsebaum, wenn ;; einer gefunden wird. (let* ((sentence (progn (format t "Input: ") (read-line))) ;; "I go there" -> (("I") ("go") ("there")) (words (mapcar (lambda (x) (list x)) (split-string sentence))) ;; haenge die Grammatikregeln und lexikalischen Baeume aneinander. (expansions (append (make-lexical-trees *lexicon*) *grammar*)) ) ;; bottom-up-parse gibt einen Parsebaum oder NIL zurueck. (format t "Result: ~s" (bottom-up-parse words expansions)) ) ) (defun make-grammar-rule (lefthandside righthandside) ;; righthandside is a *list* of terminals/nonterminals ;; example for a rule: (S (NP VP)) (list lefthandside righthandside)) (defun rule-lhs (rule) ;; returns the left hand side of a rule. (first rule)) (defun rule-rhs (rule) ;; returns the right hand side of a rule (a list). (second rule)) (defun match (parse-tree-sequence righthandside) ;; tests if the LHSs of parse tree sequence match ;; the righthandside (of a certain rule, that could ;; apply to the sequence then) (loop for parsed-tree in parse-tree-sequence for rhs-elem in righthandside do (when (not (equal (rule-lhs parsed-tree) rhs-elem)) (return nil)) finally (return t)) ) (defun bottom-up-parse (forest rules) (if (and (= 1 (length forest)) (listp (first forest)) (eq (car (first forest)) 'S)) ;; ist ein vollstaendiger Satz gefunden? (first forest) ;; wenn ja, gib den (einzigen) Baum zurueck. (loop for rule in rules ;; sonst probiere alle Regeln aus do (let ( ;; an allen Stellen im "Wald" (result (loop for i from 0 to (1- (length forest)) do (let* ((n (length (rule-rhs rule))) (subsequence (subseq forest i (+ i n)))) ;; wenn eine Regel irgendwo passt (when (match subsequence (rule-rhs rule)) (return (bottom-up-parse ;; rufe die Funktion rekursiv auf ;; nachdem der neue Baum eingesetzt wurde (append (subseq forest 0 i) (list (make-grammar-rule (rule-lhs rule) subsequence)) (subseq forest (+ i n))) rules)))) finally (return nil)))) (when result (return result))) finally (return nil))))