1
2
3
4 """
5 Created on 28 Οκτ 2010
6
7 @author: Eleftherios Avramidis
8 """
9
10 import codecs
11 from StringIO import StringIO
12 from lxml import etree
13 from lxml import objectify
14
15 SCHEMA = StringIO("""\
16 <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
17 <xs:element name="jcml" type="corpus" />
18 <xs:complexType name="corpus">
19 <xs:sequence>
20 <xs:element name="judgedsentence" type="parallelsentence" />
21 </xs:sequence>
22 </xs:complexType>
23 <xs:complexType name="parallelsentence">
24 <xs:sequence>
25 <xs:element name="src" type="sentence" />
26 <xs:element name="tgt" type="sentence" />
27 <xs:element name="ref" type="sentence" />
28 </xs:sequence>
29 <xs:attribute name="id" type="xs:int" />
30 <xs:attribute name="langsrc" type="xs:string" />
31 <xs:attribute name="langtgt" type="xs:string" />
32 <xs:attribute name="rank" type="xs:string" use="optional" />
33 <xs:attribute name="testset" type="xs:string" use="optional" />
34 <xs:anyAttribute/>
35 </xs:complexType>
36 <xs:complexType name="sentence">
37 <xs:simpleContent>
38 <xs:extension base="xs:string">
39 <xs:attribute name="system" type="xs:string" />
40 </xs:extension>
41 </xs:simpleContent>
42 </xs:complexType>
43 </xs:schema>
44 """)
45
46 FILENAME = "../data/evaluations_all.jcml"
47
49 """
50 Imports and directly objectifies the XML input data, based on a Schema description
51 Development suspended as it was not possible to have optional arguments
52 """
53
54
56 """
57 Constructor
58 """
59 schema = etree.XMLSchema(file=SCHEMA)
60 parser = objectify.makeparser(schema = schema)
61
62
63 jcml_file = codecs.open(FILENAME, mode='r', encoding='utf-8')
64 jcml = jcml_file.read()
65
66 a = objectify.fromstring(jcml, parser)
67