Package dataprocessor :: Package input :: Module schemabasedreader
[hide private]
[frames] | no frames]

Source Code for Module dataprocessor.input.schemabasedreader

 1  #!/usr/bin/python 
 2  # -*- coding: utf-8 -*- 
 3   
 4  """ 
 5  Created on 28 Οκτ 2010 
 6   
 7  @author: Eleftherios Avramidis 
 8  """ 
 9   
10  import codecs 
11  from StringIO import StringIO 
12  from lxml import etree  
13  from lxml import objectify  
14   
15  SCHEMA = StringIO("""\ 
16      <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> 
17          <xs:element name="jcml" type="corpus" /> 
18          <xs:complexType name="corpus"> 
19              <xs:sequence> 
20                  <xs:element name="judgedsentence" type="parallelsentence" /> 
21              </xs:sequence> 
22          </xs:complexType> 
23          <xs:complexType name="parallelsentence"> 
24              <xs:sequence> 
25                  <xs:element name="src" type="sentence" /> 
26                  <xs:element name="tgt" type="sentence" /> 
27                  <xs:element name="ref" type="sentence" /> 
28              </xs:sequence> 
29              <xs:attribute name="id" type="xs:int" />  
30              <xs:attribute name="langsrc" type="xs:string"  /> 
31              <xs:attribute name="langtgt" type="xs:string" /> 
32              <xs:attribute name="rank" type="xs:string" use="optional" /> 
33              <xs:attribute name="testset" type="xs:string" use="optional" /> 
34              <xs:anyAttribute/> 
35          </xs:complexType>     
36          <xs:complexType name="sentence"> 
37              <xs:simpleContent> 
38                <xs:extension base="xs:string"> 
39                  <xs:attribute name="system" type="xs:string" /> 
40                </xs:extension> 
41              </xs:simpleContent> 
42          </xs:complexType> 
43      </xs:schema> 
44   """) 
45   
46  FILENAME = "../data/evaluations_all.jcml" 
47   
48 -class SchemaData(object):
49 """ 50 Imports and directly objectifies the XML input data, based on a Schema description 51 Development suspended as it was not possible to have optional arguments 52 """ 53 54
55 - def __init__(self):
56 """ 57 Constructor 58 """ 59 schema = etree.XMLSchema(file=SCHEMA) 60 parser = objectify.makeparser(schema = schema) 61 62 63 jcml_file = codecs.open(FILENAME, mode='r', encoding='utf-8') 64 jcml = jcml_file.read() 65 66 a = objectify.fromstring(jcml, parser)
67