%BibTeX @inproceedings{Adolphs_etal:11, title = {{YAGO}-{QA}: {A}nswering Questions by Structured Knowledge Queries}, author = {Peter Adolphs and Martin Theobald and Ulrich Sch{\"a}fer and Hans Uszkoreit and Gerhard Weikum}, booktitle ={Fifth {IEEE} International Conference on Semantic Computing}, year = {2011}, month = {September}, publisher = {{IEEE} Computer Society}, address = {Los Alamitos, CA, USA}, isbn = {978-0-7695-4492-2}, url = {http://domino.mpi-inf.mpg.de/intranet/ag5/ag5publ.nsf/AuthorEditorIndividualView/a524bf4c689cb769c12578f0003f9e27/$FILE/yagoqa.pdf?OpenElement}, pages = {158--161} } @InProceedings{dong-schaefer:2011:IJCNLP, author = {Cailing Dong and Ulrich Sch{\"a}fer}, title = {Ensemble-style Self-training on Citation Classification}, booktitle = {Proceedings of the 5th International Joint Conference on Natural Language Processing ({IJCNLP})}, month = {November}, year = {2011}, address = {Chiang Mai, Thailand}, pages = {623--631}, isbn = {978-974-466-564-5}, url = {http://www.aclweb.org/anthology/I11-1070} } @inproceedings{Wolska-Schaefer:2011:TIA, author = {Magdalena Wolska and Ulrich Sch{\"a}fer and The Nghia Pham}, title = {Bootstrapping a Domain-specific Terminological Taxonomy from Scientific Text}, booktitle = {Proceedings of the 9th International Conference on Terminology and Artificial Intelligence}, month = {November}, year = {2011}, pages = {17--23}, address = {Paris, France}, url = {http://tia2011.crim.fr/Proceedings/pdf/TIA05.pdf} } @inproceedings{Schaefer_etal:11, title = {The {ACL} {A}nthology {S}earchbench}, author = {Ulrich Sch{\"a}fer and Bernd Kiefer and Christian Spurk and J{\"o}rg Steffen and Rui Wang}, booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: {H}uman Language Technologies ({ACL} {HLT} 2011), System Demonstrations}, month = {June}, year = {2011}, pages = {7-13}, isbn = {978-974-466-564-5}, url = {http://www.aclweb.org/anthology/P11-4002}, publisher = {Association for Computational Linguistics ({ACL})} } @incollection{Bramantoro_etal:11, title = {Pipelining Software and Services for Language Processing}, author = {Arif Bramantoro and Ulrich Sch{\"a}fer and Toru Ishida}, booktitle= {The {L}anguage {Grid}: {S}ervice-Oriented Collective Intelligence for Language Resource Interoperability}, editor = {Toru Ishida and Donghui Lin}, publisher = {Springer}, series = {{LNCS} Cognitive Technologies Series}, year = 2011, chapter = 16, isbn = {978-3-642-21177-5}, pages = {247--262}, doi = {10.1007/978-3-642-21178-2_16} } @incollection{Schaefer_Kiefer:11, author = {Ulrich Sch{\"a}fer and Bernd Kiefer}, title = {Advances in Deep Parsing of Scholarly Paper Content}, booktitle = {Advanced Language Technologies for Digital Libraries}, editor = {Raffaella Bernardi and Sally Chambers and Bj{\"o}rn Gottfried and Fr{\'e}d{\'e}rique Segond and Ilya Zaihrayeu}, publisher = {Springer}, series = {{LNCS} Theoretical Computer Science Series}, number = {{LNCS} 6699}, isbn = {978-3-642-23159-9}, doi = {10.1007/978-3-642-23160-5_9}, pages = {135--153}, year = 2011, url = {http://www.dfki.de/web/forschung/iwi/publikationen/renameFileForDownload?filename=Schaefer_Kiefer_ALT4DL_2011.pdf&file_id=uploads_993} } @inproceedings{Schaefer_Spurk:10, author = {Ulrich Sch{\"a}fer and Christian Spurk}, title = {{TAKE} Scientist's Workbench: {S}emantic Search and Citation-Based Visual Navigation in Scholar Papers}, booktitle ={Fourth {IEEE} International Conference on Semantic Computing}, isbn = {978-0-7695-4154-9}, year = {2010}, month = {September}, pages = {317--324}, doi = {http://doi.ieeecomputersociety.org/10.1109/ICSC.2010.40}, publisher = {{IEEE} Computer Society}, address = {Los Alamitos, CA, USA}, } @inproceedings{Krieger_Schaefer:10, author = {Hans-Ulrich Krieger and Ulrich Sch{\"a}fer}, title = {{DL} Meet {FL}: {A} Bidirectional Mapping between Ontologies and Linguistic Knowledge}, booktitle = {Proceedings of the 23rd International Conference on Computational Linguistics ({COLING}-2010)}, month = {August}, address = {Beijing, China}, url = {http://www.aclweb.org/anthology-new/C/C10/C10-2067.pdf}, pages = {588--596}, year = 2010 } @inproceedings{Bramantoro_etal:10, author = {Arif Bramantoro, Ulrich Sch{\"a}fer and Toru Ishida}, title = {Towards an Integrated Architecture for Composite Language Services and Multiple Linguistic Processing Components}, booktitle = {Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}-2010)}, year = {2010}, month = {May}, address = {Valletta, Malta}, publisher = {European Language Resources Association ({ELRA})}, isbn = {2-9517408-6-7}, url = {http://www.lrec-conf.org/proceedings/lrec2010/pdf/717_Paper.pdf}, pages = {3506--3511} } @inproceedings{Schaefer_Kasterka:10, author = {Ulrich Sch{\"a}fer and Uwe Kasterka}, title = {Scientific Authoring Support: {A} Tool to Navigate in Typed Citation Graphs}, booktitle = {Proceedings of the {NAACL} {HLT} 2010 Workshop on Computational Linguistics and Writing: Writing Processes and Authoring Aids ({CL\&W}-2010)}, month = {June}, year = 2010, publisher = {{ACL}}, address = {Los Angeles, CA}, isbn = {978-1-932432-65-7}, pages = {7--14}, url = {http://www.aclweb.org/anthology-new/W/W10/W10-0402.pdf} } @inproceedings{Bramantoro_etal:08, author = {Arif Bramantoro and Masahiro Tanaka and Yohei Murakami and Ulrich Sch{\"a}fer and Toru Ishida}, title = {A Hybrid Integrated Architecture for Language Service Composition}, booktitle = {Proceedings of the {IEEE} 2008 International Conference on Web Services ({ICWS}-2008 Research Track)}, year = 2008, month = {October}, address = {Beijing, China}, publisher = {{IEEE} Computer Society}, isbn = {978-0-7695-3310-0/08}, url = {http://dx.doi.org/10.1109/ICWS.2008.79}, pages = {345--352} } @inproceedings{Schaefer_etal:08b, author = {Ulrich Sch{\"a}fer and Hans Uszkoreit and Christian Federmann and Torsten Marek and Yajing Zhang}, title = {Extracting and Querying Relations in Scientific Papers}, booktitle = {Proceedings of the 31st Annual {G}erman Conference on Artificial Intelligence, {KI}-2008}, address = {Kaiserslautern, Germany}, url = {http://dx.doi.org/10.1007/978-3-540-85845-4_16}, year = 2008, month = {September}, publisher = {Springer {LNAI} 5243}, pages = {127--134} } @inproceedings{Schaefer_etal:08a, author = {Ulrich Sch{\"a}fer and Hans Uszkoreit and Christian Federmann and Torsten Marek and Yajing Zhang}, title = {Extracting and Querying Relations in Scientific Papers on Language Technology}, booktitle = {Proceedings of {LREC}-2008}, publisher = {{ELRA}}, address = {Marrakesh, Morocco}, year = 2008, url = {http://www.dfki.de/dfkibib/publications/docs/hylap-aiama-lrec08.pdf}, month = {May} } @inproceedings{Schaefer:08a, author = {Ulrich Sch{\"a}fer}, title = {Shallow, Deep and Hybrid Processing with {UIMA} and {Heart} of {Gold}}, booktitle = {Proceedings of the {LREC}-2008 Workshop Towards Enhanced Interoperability for Large {HLT} Systems: {UIMA} for {NLP}}, address = {Marrakesh, Morocco}, publisher = {{ELRA}}, pages = {43--50}, year = 2008, url = {http://www.dfki.de/dfkibib/publications/docs/uima4hog2008.pdf}, month = {May} } @book{Schaefer:08b, Author = {Sch{\"a}fer, Ulrich}, Title = {Integrating Natural Language Processing Components with {XML} and {XSLT}: {R}epresentations and Hybrid Architectures}, publisher = {{VDM} {V}erlag {D}r. {M}{\"u}ller}, isbn = {978-3-836490-27-6}, month = {April}, year = {2008}, Address = {Saarbr{\"u}cken, Germany}, note = {Book}, } @phdthesis{Schaefer:07, Author = {Sch{\"a}fer, Ulrich}, Title = {Integrating Deep and Shallow Natural Language Processing Components -- {R}epresentations and Hybrid Architectures}, School = {Saarland University, Faculty of Mathematics and Computer Science}, Year = {2007}, Volume = {22}, Series = {Saarbr{\"u}cken Dissertations in Computational Linguistics and Language Technology}, Address = {Saarbr{\"u}cken, Germany}, Month = {June}, Abstract = {We describe basic concepts and software architectures for the integration of shallow and deep (linguistics-based, semantics-oriented) natural language processing (NLP) components. The main goal of this novel, hybrid integration paradigm is improving robustness of deep processing. After an introduction to constraint-based natural language parsing, we give an overview of typical shallow processing tasks. We introduce XML standoff markup as an additional abstraction layer that eases integration of NLP components, and propose the use of XSLT as a standardized and efficient transformation language for online NLP integration. In the main part of the thesis, we describe our contributions to three hybrid architecture frameworks that make use of these fundamentals. SProUT is a shallow system that uses elements of deep constraint-based processing, namely type hierarchy and typed feature structures. Whiteboard is the first hybrid architecture to integrate not only part-of-speech tagging, but also named entity recognition and topological parsing, with deep parsing. Finally, we present Heart of Gold, a middleware architecture that generalizes Whiteboard into various dimensions such as configurability, multilinguality and flexible processing strategies. We describe various applications that have been implemented using the hybrid frameworks such as structured named entity recognition, information extraction, creative document authoring support, deep question analysis, as well as evaluations. In Whiteboard, e.g., it could be shown that shallow pre-processing increases both coverage and efficiency of deep parsing by a factor of more than two. Heart of Gold not only forms the basis for applications that utilize semantics-oriented natural language analysis, but also constitutes a complex research instrument for experimenting with novel processing strategies combining deep and shallow methods, and eases replication and comparability of results.}, URL = {http://www.dfki.de/~uschaefer/diss/} } @conference{Schaefer_Beck:06, Author = {Sch{\"a}fer, Ulrich and Beck, Daniel}, Title = {Automatic Testing and Evaluation of Multilingual Language Technology Resources and Components}, Booktitle = {Proceedings of the 5th International Conference on Language Resources and Evaluation ({LREC}-2006)}, Year = {2006}, Publisher = {{ELRA}}, Address = {Genoa, Italy}, Month = {May}, Pages = {173--178}, Abstract = {We describe SProUTomat, a tool for daily building, testing and evaluating a complex general-purpose multilingual natural language text processor including its linguistic resources (lingware). Software and lingware are developed, maintained and extended in a distributed manner by multiple authors and projects, i.e., the source code stored in a version control system is modified frequently. The modular design of different, dedicated lingware modules like tokenizers, morphology, gazetteers, type hierarchy, rule formalism on the one hand increases flexibility and re-usability, but on the other hand may lead to fragility with respect to changes. Therefore, frequent testing as known from software engineering is necessary also for lingware to warrant a high level of quality and overall stability of the system. We describe the build, testing and evaluation methods for LT software and lingware we have developed on the basis of the open source, platform-independent Apache Ant tool and the configurable evaluation tool JTaCo.}, URL = {http://www.dfki.de/dfkibib/publications/docs/sproutomat.pdf} } @inproceedings{Schaefer:06a, Author = {Sch{\"a}fer, Ulrich}, Title = {Middleware for Creating and Combining Multi-dimensional {NLP} markup}, Booktitle = {Proceedings of the 5th Workshop on {NLP} and {XML} ({NLPXML}-2006): {M}ulti-dimensional Markup in Natural Language Processing, 11th Conference of the European Chapter of the Association for Computational Linguistics ({EACL}-2006)}, Year = {2006}, Address = {Trento, Italy}, Month = {April}, Pages = {81--84}, Abstract = {We present the Heart of Gold middleware by demonstrating three XML-based integration scenarios where multi-dimensional markup produced online by multilingual natural language processing (NLP) components is combined to deliver rich, robust linguistic markup for use in NLP-based applications like information extraction, question answering and semantic web. The scenarios include (1) robust deep-shallow integration, (2) shallow processing cascades, and (3) treebank storage of multi-dimensionally annotated texts.}, URL = {http://www.dfki.de/dfkibib/publications/docs/hognlpxml2006.pdf} } @inproceedings{Schaefer:06b, Author = {Sch{\"a}fer, Ulrich}, Title = {{OntoNERdIE} -- {M}apping and Linking Ontologies to Named Entity Recognition and Information Extraction Resources}, Booktitle = {Proceedings of the 5th International Conference on Language Resources and Evaluation ({LREC}-2006)}, Year = {2006}, Publisher = {{ELRA}}, Address = {Genoa, Italy}, Month = {May}, Pages = {1756--1761}, Abstract = {We describe an implemented offline procedure that maps OWL/RDF-encoded ontologies with large, dynamically maintained instance data to named entity recognition (NER) and information extraction (IE) engine resources, preserving hierarchical concept information and links back to the ontology concepts and instances. The main motivations are (i) improving NER/IE precision and recall in closed domains, (ii) exploiting linguistic knowledge (context, inflection, anaphora) for identifying ontology instances in texts more robustly, (iii) giving full access to ontology instances and concepts in natural language processing results, e.g. for subsequent ontology queries, navigation or inference, (iv) avoiding duplication of work in development and maintenance of similar resources in independent places, namely lingware and ontologies. We show an application in hybrid deep-shallow natural language processing that is e.g. used for question analysis in closed domains. Further applications could be automatic hyperlinking or other innovative semantic-web related applications.}, URL = {http://www.dfki.de/dfkibib/publications/docs/ontonerdie\_LREC2006\_final\_corrected.pdf} } @inproceedings{Waldron_etal:06, Author = {Ben Waldron and Ann Copestake and Ulrich Sch{\"a}fer and Bernd Kiefer}, Title = {Preprocessing and Tokenisation Standards in {DELPH-IN} Tools}, booktitle = {Proceedings of the 5th International Conference on Language Resources and Evaluation ({LREC}-2006)}, address = {Genoa, Italy}, pages = {2263--2268}, month = {May}, year = 2006, Abstract = {We discuss preprocessing and tokenisation standards within DELPH-IN, a large scale open-source collaboration providing multiple independent multilingual shallow and deep processors. We discuss (i) a component-specific XML interface format which has been used for some time to interface preprocessor results to the PET parser, and (ii) our implementation of a more generic XML interface format influenced heavily by the (ISO working draft) Morphosyntactic Annotation Framework (MAF). Our generic format encapsulates the information which may be passed from the preprocessing stage to a parser: it uses standoff-annotation, a lattice for the representation of structural ambiguity, intra-annotation dependencies and allows for highly structured annotation content. This work builds on the existing Heart of Gold middleware system, and previous work on Robust Minimal Recursion Semantics (RMRS) as part of an inter-component interface. We give examples of usage with a number of the DELPH-IN processing omponents and deep grammars.}, URL = {http://www.dfki.de/dfkibib/publications/docs/LREC2006-214.pdf} } @inproceedings{Bering_Schaefer:06, Author = {Christian Bering and Ulrich Sch{\"a}fer}, Title = {JTaCo \& SProUTomat: Automatic Evaluation and Testing of Multilingual Language Technology Resources and Components}, Booktitle = {Proceedings of the {LREC}-2006 Workshop on Quality Assurance and Quality Measurement for Language and Speech Resources}, Year = 2006, Address = {Genoa, Italy}, Month = {May}, pages = {42--47}, Abstract = {We describe JTaCo, a tool for automatic evaluation of language technology components against annotated corpora, and SProUTomat, a tool for building, testing and evaluating a complex general-purpose multilingual natural language text processor including its linguistic resources (lingware). The JTaCo tool can be used to define mappings between the markup of an annotated corpus and the markup produced by the natural language processor to be evaluated. JTaCo also generates detailed statistics and reports that help the user to inspect errors in the NLP output. SProUTomat embeds a batch version of JTaCo and runs it after compiling the complex NLP system and its multilingual resources. The resources are developed, maintained and extended in a distributed manner by multiple authors and projects, i.e., the source code stored in a version control system is modified frequently. The aim of JTaCo \& SProUTomat is to warrant a high level of quality and overall stability of the system and its lingware resources.}, URL = {http://www.dfki.de/dfkibib/publications/docs/qaqmlsr-sproutomat-final.pdf} } @article{Frank_etal:07, Author = {Frank, Anette and Krieger, Hans-Ulrich and Xu, Feiyu and Uszkoreit, Hans and Crysmann, Berthold and Sch{\"a}fer, Ulrich}, Title = {Question Answering from Structured Knowledge Sources}, Journal = {Journal of Applied Logics, Special Issue on Questions and Answers: Theoretical and Applied Perspectives}, Year = {2007}, month = {March}, Volume = {5}, Number = {1}, Pages = {20--48}, Abstract = {We present an implemented approach for domain-restricted question answering from structured knowledge sources, based on robust semantic analysis in a hybrid NLP system architecture. We perform question interpretation and answer extraction in an architecture that builds on a lexical-conceptual structure for question interpretation, which is interfaced with domain-specific concepts and properties in a structured knowledge base. Question interpretation involves a limited amount of domain-specific inferences, and accounts for higher-level quantificational questions. Question interpretation and answer extraction are modular components that interact in clearly defined ways. We derive so-called proto queries from the linguistic representations, which provide partial constraints for answer extraction from the underlying knowledge sources. The search queries we construct from proto queries effectively compute minimal spanning trees from the underlying knowledge sources. O ur approach naturally extends to multilingual question answering, and has been developed as a prototype system for two application domains: the domain of Nobel prize winners, and the domain of Language Technology, on the basis of the large ontology underlying the information portal LT World.}, URL = {http://dx.doi.org/10.1016/j.jal.2005.12.006} } @inproceedings{Frank_etal:05, Author = {Frank, Anette and Krieger, Hans-Ulrich and Xu, Feiyu and Uszkoreit, Hans and Crysmann, Berthold and J{\"o}rg, Brigitte and Sch{\"a}fer, Ulrich}, Title = {Querying Structured Knowledge Sources}, Booktitle = {Proceedings of the {AAAI-05} Workshop on Question Answering in Restricted Domains}, Year = {2005}, Pages = {10--19}, Address = {Pittsburgh, PA}, Month = {July}, Abstract = {We present an implemented approach for domain-restricted question answering from structured knowledge sources, based on robust semantic analysis in a hybrid NLP system architecture. We build on a lexical-semantic conceptual structure for question interpretation, which is interfaced with domain-specific concepts and properties in a structured knowledge base. Question interpretation involves a limited amount of domain-specific inferences and accounts for quantificational questions. We extract so-called proto queries from the linguistic representation, which provide partial constraints for answer extraction from the underlying knowledge sources. The search queries we construct from proto queries effectively constitute minimum spanning trees that restrict the possible answer candidates. Our approach naturally extends to multilingual question answering and has been developed as a prototype system for two application domains: the domain of Nobel prize winners and the domain of Language Technology, on the basis of the large ontology underlying the information portal LT World.}, URL = {http://www.dfki.de/dfkibib/publications/docs/ws1305FrankA.pdf} } @conference{Drozdzynski_etal:05, Author = {Drozdzynski, Witold and Krieger, Hans-Ulrich and Piskorski, Jakub and Sch{\"a}fer, Ulrich}, Title = {{SProUT} -- a General-Purpose {NLP} Framework Integrating Finite-State and Unification-based Grammar Formalisms}, Booktitle = {Proceedings of the 5th International Workshop on Finite-State Methods and Natural Language Processing}, Year = {2005}, Month = {September}, Publisher = {Springer Lecture Notes in Computer Science}, Address = {Helsinki, Finland}, URL = {http://www.springerlink.com/content/r643n1t015t20612/} } @inproceedings{Krieger_etal:05, Author = {Krieger, Hans-Ulrich and Drozdzynski, Witold and Piskorski, Jakub and Sch{\"a}fer, Ulrich and Xu, Feiyu}, Title = {A Bag of Useful Techniques for Unification-Based Finite-State Transducers}, Booktitle = {Proceedings of 7th {KONVENS}}, Year = {2004}, Pages = {105--112}, Editor = {Buchberger, Ernst}, Abstract = {We present several extensions to the shallow text processor sprout, viz., 1. a fast imperfect unifiability test, 2. a special form of sets together with a polymorphic lazy and destructive unification operation, 3. a cheap form of negation, 4. a weak unidirectional form of coreferences, 5. optional context-free stages in the shallow cascade, 6. a compile time type check, 7. compile time transition sorting under subsumption, 8. several output merging techniques, and 9. a compaction technique for lexical resources. The extensions have been found relevant in several projects and might be of importance to other systems, even to deep processing.}, URL = {http://www.dfki.de/dfkibib/publications/docs/sproutKONVENS2004.pdf} } @inproceedings{Uszkoreit_etal:04, Author = {Uszkoreit, Hans and Callmeier, Ulrich and Eisele, Andreas and Sch{\"a}fer, Ulrich and Siegel, Melanie and Uszkoreit, Jakob}, Title = {Hybrid Robust Deep and Shallow Semantic Processing for Creativity Support in Document Production}, Booktitle = {Proceedings of {KONVENS}-2004}, Year = {2004}, Month = {September}, Organization = {{\"O}GAI}, Pages = {209--216}, Address = {Vienna, Austria}, Abstract = {The research performed in the DeepThought project (http://www.project-deepthought.net/) aims at demonstrating the potential of deep linguistic processing if added to existing shallow methods that ensure robustness. Classical information retrieval is extended by high precision concept indexing and relation detection. We use this approach to demonstrate the feasibility of three ambitious applications, one of which is a tool for creativity support in document production and collective brainstorming. This application is described in detail in this paper. Common to all three applications, and the basis for their development is a platform for integrated linguistic processing. This platform is based on a generic software architecture that combines multiple NLP components and on robust minimal recursive semantics (RMRS) as a uniform representation language.}, URL = {http://www.dfki.de/dfkibib/publications/docs/dtkonvens.pdf} } @inproceedings{Frank_etal:04, Author = {Frank, Anette and Spreyer, Kathrin and Drozdzynski, Witold and Krieger, Hans-Ulrich and Sch{\"a}fer, Ulrich}, Title = {Constraint-Based {RMRS} Construction from Shallow Grammars}, Booktitle = {Proceedings of the {HPSG}-04 Conference Workshop on Semantics in Grammar Engineering}, Year = {2004}, Editor = {M{\"u}ller, Stefan}, Organization = {Center for Computational Linguistics, Katholieke Universiteit Leuven}, Publisher = {{CSLI} Publications, Stanford, CA}, Address = {Leuven, Belgium}, Pages = {393--413}, Month = {August}, URL = {http://www.dfki.de/dfkibib/publications/docs/frank-spreyer-drozdzynski-krieger-schaefer.pdf}, Abstract = {We present a constraint-based syntax-semantics interface for the construction of RMRS (Robust Minimal Recursion Semantics) representations from shallow grammars. The architecture is designed to allow modular interfaces to existing shallow grammars of various depth---ranging from chunk grammars to context-free stochastic grammars. We define modular semantics construction principles in a typed feature structure formalism that allow flexible adaptation to alternative grammars and different languages.} } @inproceedings{Drozdzynski_etal:04b, Author = {Drozdzynski, Witold and Krieger, Hans-Ulrich and Piskorski, Jakub and Sch{\"a}fer, Ulrich}, Title = {A Multilingual Content Production Tool for the Semantic Web}, Booktitle = {European Conference on Knowledge Engineering and Knowledge Management ({EKAW}-04)}, address = {Northamptonshire, UK}, Year = {2004}, Month = {October}, Abstract = {Automatic content extraction from unrestricted textual data constitutes a core technology for semantic web services. Intelligent content extraction must furthermore address the pecularities of the medium, i.e., must analyze natural language to a certain depth, in order to go beyond the realm of pure keyword-base approaches. This demo presents SProUT -- a novel general-purpose multilingual information extraction (IE) platform.}, URL = {http://www.dfki.de/dfkibib/publications/docs/sproutEKAW2004.pdf} } @inproceedings{Schaefer:04, Author = {Sch{\"a}fer, Ulrich}, Title = {Using {XSLT} for the Integration of Deep and Shallow Natural Language Processing Components}, Booktitle = {Proceedings of the {ESSLLI}-2004 Workhop on Combining Shallow and Deep Processing for {NLP}}, Year = {2004}, Editor = {Simov, Kiril and Hinrichs, Erhard}, Pages = {31--40}, Organization = {{ESSLLI} 2004}, Address = {Nancy, France}, Month = {August}, Abstract = {Whiteboard is a hybrid XML-based architecture that integrates deep and shallow natural language processing components. The online system consists of a fast HPSG parser that utilizes tokenization, PoS, morphology, lexical, named entity, phrase chunk and (for German) topological sentence eld analyses from shallow components. This integration increases robustness, directs the search space and hence reduces processing time of the deep parser. In this paper, we focus on one of the central integration facilities, the XSLT-based Whiteboard Annotation Transformer (WHAT), report on the bene ts of XSLT-based NLP component integration, and present examples of XSL transformation of shallow and deep annotations used in the integrated architecture. Furthermore, we report on a recent application of XSL transformation for the conversion of XML-encoded typed feature structures representation in the context of the DeepThought project where deep-shallow integration is performed on the basis of Robust Minimal Recursion Semantics (RMRS).}, URL = {http://www.dfki.de/dfkibib/publications/docs/Schaefer-ComShaDeP.pdf} } @inproceedings{Callmeier_etal:04, Author = {Callmeier, Ulrich and Eisele, Andreas and Sch{\"a}fer, Ulrich and Siegel, Melanie}, Title = {The {DeepThought} Core Architecture Framework}, Booktitle = {Proceedings of the 4th International Conference on Language Resources and Evaluation (LREC) 2004}, Year = {2004}, Pages = {1205--1208}, Publisher = {{ELRA}}, Address = {Lisbon, Portugal}, Month = {May}, Abstract = {The research performed in the DeepThought project aims at demonstrating the potential of deep linguistic processing if combined with shallow methods for robustness. Classical information retrieval is extended by high precision concept indexing and relation detection. On the basis of this approach, the feasibility of three ambitious applications will be demonstrated, namely: precise information extraction for business intelligence; email response management for customer relationship management; creativity support for document production and collective brainstorming. Common to these applications, and the basis for their development is the XML-based, RMRS-enabled core architecture framework that will be described in detail in this paper. The framework is not limited to the applications envisaged in the DeepThought project, but can also be employed e.g. to generate and make use of XML standoff annotation of documents and linguistic corpora, and in general for a wide range of NLP-based applications and research purposes.}, URL = {http://www.dfki.de/dfkibib/publications/docs/dtcaf-lrec2004-final.pdf} } @inproceedings{Lee_etal:04, Author = {Lee, Kiyong and Burnard, Lou and Romary, Laurent and de la Clergerie, Eric and Sch{\"a}fer, Ulrich and Declerck, Thierry and Bauman, Syd and Bunt, Harry and Cl{\'e}ment, Lionel and Erjavec, Tomaz and Roussanaly, Azim and Roux, Claude}, Title = {Towards an international standard on feature structure representation (2)}, Booktitle = {Proceedings of the {LREC}-2004 workshop on A Registry of Linguistic Data Categories within an Integrated Language Resources Repository Area}, Year = {2004}, Pages = {63--70}, Publisher = {{ELRA}}, Address = {Lisbon, Portugal}, Month = {May}, Abstract = {This paper describes the preliminary results of a joint initiative of the TEI (Text Encoding Initiative) Consortium and the ISO Committee TC 37SC 4 (Language Resource management) to provide a standard for the representation and interchange of feature structures. The paper published in the proceedings of this workshop is in fact an extension of a paper published in the LREC 2004 proceedings, and about 50\% are identical with it.}, URL = {http://www.dfki.de/dfkibib/publications/docs/09-lee.pdf} } @article{Drozdzynski_etal:04a, Journal = {K{\"u}nstliche Intelligenz}, Author = {Drozdzynski, Witold and Krieger, Hans-Ulrich and Piskorski, Jakub and Sch{\"a}fer, Ulrich and Xu, Feiyu}, Title = {Shallow Processing with Unification and Typed Feature Structures --- {F}oundations and Applications}, Pages = {17--23}, URL = {http://www.kuenstliche-intelligenz.de/fileadmin/template/main/archiv/2004\_1/sprout-web.pdf}, Abstract = {We present SProUT, a platform for the development of multilingual shallow text processing systems. A grammar in SProUT consists of a set of rules, where the left-hand side is a regular expression over typed feature structures (TFSs), representing the recognition pattern, and the right-hand side a TFS, specifying how the output structure looks like. The reusable core components of SProUT are a finite-state machine toolkit, a regular compiler, a finite-state machine interpreter, a typed feature structure package, and a set of linguistic processing resources. Several applications which make use of SProUT are presented. The system is implemented in Java and C(++), and runs under both MS Windows and Linux.}, Year = {2004}, Volume = {1} } @inproceedings{Frank_etal:03, Author = {Frank, Anette and Becker, Markus and Crysmann, Berthold and Kiefer, Bernd and Sch{\"a}fer, Ulrich}, Title = {Integrated Shallow and Deep Parsing: {TopP} meets {HPSG}}, Booktitle = {Proceedings of {ACL}-2003}, Address = {Sapporo, Japan}, publisher = {{ACL}}, Pages = {104--111}, Year = {2003}, Abstract = {We present a novel, data-driven method for integrated shallow and deep parsing. Mediated by an XML-based multi-layer annotation architecture, we interleave a robust, but accurate stochastic topological field parser of German with a constraintbased HPSG parser. Our annotation-based method for dovetailing shallow and deep phrasal constraints is highly flexible, allowing targeted and fine-grained guidance of constraint-based parsing. We conduct systematic experiments that demonstrate substantial performance gains.}, URL = {http://www.dfki.de/dfkibib/publications/docs/ACL2003\_Franketal.pdf} } @inproceedings{Busemann_etal:03, Author = {Busemann, Stephan and Drozdzynski, Witold and Krieger, Hans-Ulrich and Piskorski, Jakub and Sch{\"a}fer, Ulrich and Uszkoreit, Hans and Xu, Feiyu}, Title = {Integrating Information Extraction and Automatic Hyperlinking}, BookTitle = {Proceedings of the Interactive Posters/Demonstration at {ACL}-03}, Address = {Sapporo, Japan}, publisher = {{ACL}}, Pages = {117--120}, Year = {2003}, Abstract = {This paper presents a novel information system integrating advanced information extraction technology and automatic hyper-linking. Extracted entities are mapped into a domain ontology that relates concepts to a selection of hyperlinks. For information extraction, we use SProUT, a generic platform for the development and use of multilingual text processing components. By combining finite-state and unification-based formalisms, the grammar formalism used in SProUT offers both processing efficiency and a high degree of decalrativeness. The ExtraLink demo system showcases the extraction of relevant concepts from German texts in the tourism domain, offering the direct connection to associated web documents on demand.}, URL = {http://www.dfki.de/dfkibib/publications/docs/IntegratingIEandHyperlinking-ACL2003.pdf} } @inproceedings{Schaefer:03, Author = {Sch{\"a}fer, Ulrich} , Title = {{WHAT}: {A}n {XSLT}-based Infrastructure for the Integration of Natural Language Processing Components}, BookTitle = {Proceedings of the Workshop on the Software Engineering and Architecture of Language Technology Systems ({SEALTS}), {HLT-NAACL03}, May 31}, Address= {Edmonton, Canada}, URL = {http://www.dfki.de/dfkibib/publications/docs/schaeferwhat.pdf}, Pages = {9--13}, Abstract = {The idea of the Whiteboard project is to integrate deep and shallow natural language processing components in order to benefit from their synergy. The project came up with the first fully integrated hybrid system consisting of a fast HPSG parser that utilizes tokenization, PoS, morphology, lexical, named entity, phrase chunk and (for German) topological sentence field analyses from shallow components. This integration increases robustness, directs the search space and hence reduces processing time of the deep parser. In this paper, we focus on one of the central integration facilities, the XSLT-based Whiteboard Annotation Transformer (WHAT), report on the benefits of XSLT-based NLP component integration, and present examples of XSL transformation of shallow and deep annotations used in the integrated architecture. The infrastructure is open, portable and well suited for, but not restricted to the development of hybrid NLP architectures as well as NLP applications.}, Year = {2003} } @inproceedings{Bering_etal:03, Author = {Bering, Christian and Drozdzynski, Witold and Erbach, Gregor and Guasch, Clara and Homola, Petr and Lehmann, Sabine and Hong, Li and Krieger, Hans-Ulrich and Piskorski, Jakub and Sch{\"a}fer, Ulrich and Shimada, Atsuko and Siegel, Melanie and Xu, Feiyu and Ziegler-Eisele, Dorothee}, Title = {Corpora and evaluation tools for multilingual named entity grammar development}, BookTitle = {Proceedings of Multilingual Corpora Workshop at Corpus Linguistics}, Year = {2003}, Pages = {42--52}, Address = {Lancaster, UK}, Month = {March}, Abstract = {We present an effort for the development of multilingual named entity grammars in a unification-based finite-state formalism (SProUT). Following an extended version of the MUC7 standard, we have developed Named Entity Recognition grammars for German, Chinese, Japanese, French, Spanish, English, and Czech. The grammars recognize person names, organizations, geographical locations, currency, time and date expressions. Subgrammars and gazetteers are shared as much as possible for the grammars of the different languages. Multilingual corpora from the business domain are used for grammar development and evaluation. The annotation format (named entity and other linguistic information) is described. We present an evaluation tool which provides detailed statistics and diagnostics, allows for partial matching of annotations, and supports user-defined mappings between different annotation and grammar output formats.}, URL = {http://www.dfki.de/~feiyu/multi-corpus.pdf} } @inproceedings{Becker_etal:02, Author = {Becker, Markus and Drozdzynski, Witold and Krieger, Hans-Ulrich and Piskorski, Jakub and Sch{\"a}fer, Ulrich and Xu, Feiyu}, Title = {SProUT -- {S}hallow Processing with Typed Feature Structures and Unification}, BookTitle = {Proceedings of the International Conference on {NLP} ({ICON}-2002)}, Month = {December}, Address= {Mumbai, India}, URL = {http://www.dfki.de/dfkibib/publications/docs/sprout.pdf}, Abstract = {We present SProUT, a platform for the development of multilingual shallow text processing systems. A grammar in SProUT consists of a set of rules, where the left-hand side is a regular expression over typed feature structures (TFSs), representing the recognition pattern, and the right-hand side is a sequence of TFSs, specifying how the output structure looks like. The reusable core components fo SProUT are a finite-state machine toolkit, a regular compiler, a typed feature structure package, and a finite-state machine interpreter.}, Year = {2002} } @inproceedings{Crysmann_etal:02, Author = {Crysmann, Berthold and Frank, Anette and Kiefer, Bernd and Krieger, Hans-Ulrich and M{\"u}ller, Stefan and Neumann, G{\"u}nter and Piskorski, Jakub and Sch{\"a}fer, Ulrich and Siegel, Melanie and Uszkoreit, Hans and Xu, Feiyu}, Title = {An Integrated Architecture for Shallow and Deep Processing}, BookTitle = {Proceedings of {ACL}-2002}, publisher = {{ACL}}, Address= {Philadelphia, PA}, URL = {http://www.dfki.de/dfkibib/publications/docs/wb-acl02.pdf}, Abstract = {We present an architecture for the integration of shallow and deep NLP components which is aimed at flexible combination of different language technologies for a range of practical current and future applications. In particular, we describe the integration of a high-level HPSG parsing system with different high-performance shallow components, ranging from named entity recognition to chunk parsing and shallow clause recognition. The NLP components enrich a representation of natural language text with layers of new XML meta-information using a single shared data structure, called the text chart. We describe details of the integration methods, and show how information extraction and language checking applications for realworld German text benefit from a deep grammatical analysis.}, Month = {July}, Year = {2002} } @inproceedings{Neumann_Schaefer:02, Author = {Neumann, G{\"u}nter and Sch{\"a}fer, Ulrich}, Title = {Whiteboard -- {E}ine {XML}-basierte {A}rchitektur f{\"u}r die {A}nalyse nat{\"u}rlichsprachlicher {T}exte}, BookTitle = {Proceedings of Online 2002, 25th {E}uropean Congress Fair for Technical Communication}, Editor = {J{\"a}nichen, Stefan}, Volume = {{C}}, Address= {D{\"u}sseldorf, Germany}, Publisher = {Online {GmbH} {K}ongresse und {M}essen f{\"u}r technische {K}ommunikation}, Pages = {635.01--635.12}, URL = {http://www.dfki.de/dfkibib/publications/docs/Whiteboard-DFKI-online2002.pdf}, Year = {2002} } @mastersthesis{Schaefer:95, Author = {Sch{\"a}fer, Ulrich}, Title = {Parameterized Type Expansion in the Feature Structure Formalism {TDL}}, Address = {Saarbr{\"u}cken, Germany}, School = {Saarland University, Computer Science Department}, URL = {http://www.dfki.de/dfkibib/publications/docs/usthesis.pdf}, Year = {1995}, Abstract = {Over the last few years, unification-based grammar formalisms have become the predominant paradigm in natural language processing systems because of their monotonicity, declarativeness, and reversibility. From the viewpoint of computer science, typed feature structures can be seen as data structures that allow representation of linguistic knowledge in a uniform fashion. Type expansion is an operation that makes the constraints on a typed feature structure explicit and determines their satisfiability. We describe an efficient expansion algorithm that takes care of recursive type definitions and allows exploration of different expansion strategies through the use of control knowledge. This knowledge is specified in a separate layer, independently of grammatical information. Memoization of the type expansion function drastically reduces the number of unifications. In the second part, nonmonotonic extensions to TDL and the implementation of well-typedness checks are presented. Both are closely related to the type expansion algorithm. The algorithms have been implemented in Common Lisp and are integrated parts of TDL and a large natural language dialog system.} } @techreport{Krieger_Schaefer:95, Author = {Krieger, Hans-Ulrich and Sch{\"a}fer, Ulrich}, Title = {Efficient Parameterizable Type Expansion for Typed Feature Formalisms}, Address = {Saarbr{\"u}cken, Germany}, Institution = {{DFKI}}, Number = {{RR-95-18}}, Type = {Research Report}, URL = {http://www.coli.uni-sb.de/publikationen/softcopies/Krieger:1995:EPTb.pdf}, Year = {1995} } @inproceedings{Krieger_Schaefer:94a, Author = {Krieger, Hans-Ulrich and Sch{\"a}fer, Ulrich}, Title = {{TDL} -- {A} Type Description Language for Constraint-Based Grammars}, BookTitle = {Proceedings of the15th International Conference on Computational Linguistics ({COLING} -94)}, Volume = {2}, Address= {Kyoto, Japan}, Pages = {893--899}, URL = {http://www.coli.uni-sb.de/publikationen/softcopies/Krieger:1994:TTDa.pdf}, Year = {1994} } @techreport{Krieger_Schaefer:94b, Author = {Krieger, Hans-Ulrich and Sch{\"a}fer, Ulrich}, Title = {{TDL} -- {A} Type Description Language for {HPSG}. {P}art 2: User Guide}, Address = {Saarbr{\"u}cken}, Institution = {{DFKI}}, Number = {{D-94-14}}, Type = {Document}, URL = {http://www.coli.uni-sb.de/publikationen/softcopies/Krieger:1994:TTDc.pdf}, Year = {1994} } @techreport{Krieger_Schaefer:94c, Author = {Krieger, Hans-Ulrich and Sch{\"a}fer, Ulrich}, Title = {{TDL} -- {A} Type Description Language for {HPSG}. {P}art 1: Overview}, Address = {Saarbr{\"u}cken}, Institution = {{DFKI}}, Number = {{RR-94-37}}, Type = {Research Report}, URL = {http://www.coli.uni-sb.de/publikationen/softcopies/Krieger:1994:TTDb.pdf}, Year = {1994} } @inproceedings{Krieger_Schaefer:93b, Author = {Krieger, Hans-Ulrich and Sch{\"a}fer, Ulrich}, Title = {{TDL} -- {A} Type Description Language for Unification-Based Grammars}, BookTitle = {Proceedings of the Workshop on ``{N}euere {E}ntwicklungen der deklarativen {KI}-{P}rogrammierung"}, Editor = {Boley, Harold and Bry, Fran\c{c}ois and Geske, Ulrich}, Series= {Research Report}, Number= {{RR-93-35}}, Address= {Saarbr{\"u}cken, Germany}, Publisher = {{DFKI}}, Pages = {67--82}, URL = {http://www.coli.uni-sb.de/publikationen/softcopies/Krieger:1993:TTD.pdf}, Abstract = {This paper presents TDL, a typed feature-based representation language and inference system. Type definitions in TDL consist of type and feature constraints over the boolean connectives. TDL supports open- and closed-world reasoning over types and allows for partitions and incompatible types. Working with partially as well as with fully expanded types is possible. Efficient reasoning in TDL is accomplished through specialized modules.}, Year = {1993} } @techreport{Krieger_Schaefer:93a, Author = {Krieger, Hans-Ulrich and Sch{\"a}fer, Ulrich}, Title = {{TDL} {ExtraLight} User's Guide}, Address = {Saarbr{\"u}cken, Germany}, Institution = {{DFKI}}, Number = {{D-93-09}}, Type = {Document}, URL = {http://www.coli.uni-sb.de/publikationen/softcopies/Krieger:1993:TEU.pdf}, Abstract = {This paper serves as a user's guide to the first version of the description language TDL used for the specification of linguistic knowledge in the DISCO project of the DFKI.}, Year = {1993} }