1 """
2 @author: Eleftherios Avramidis
3 """
4
5 from xml.dom import minidom
6 from sentence.parallelsentence import ParallelSentence
7 from xml.sax.saxutils import escape
8
10 """
11 classdocs
12 """
13
14
15 - def __init__(self, data, doc_attributes={}):
16 """
17 Constructor
18 """
19 if isinstance (data , minidom.Document):
20 self.object_xml = data
21 elif isinstance(data, list):
22 self.object_xml = None
23 self.convert_to_xml(data, doc_attributes)
24 else:
25 self.object_xml = None
26 self.convert_to_xml(data.get_parallelsentences(), doc_attributes)
27
28
29 - def convert_to_xml(self, parallelsentences, doc_attributes={}):
30 """
31 Creates an XML for the document an populates that with the (parallel) sentences of the given object.
32 Resulting XML object gets stored as a variable.
33 @param parallelsentences: a list of ParallelSentence objects
34 """
35 doc_xml = minidom.Document( )
36 jcml = doc_xml.createElement("editing-task")
37
38 for attribute_key in doc_attributes.keys():
39 jcml.setAttribute(attribute_key, escape(str(doc_attributes[attribute_key])))
40
41 i=0
42
43
44 for ps in parallelsentences:
45
46 parallelsentence_xml = doc_xml.createElement("editing-item")
47
48
49 for attribute_key in ps.get_attributes().keys():
50 parallelsentence_xml.setAttribute( attribute_key , str(ps.get_attribute(attribute_key)) )
51
52
53 src_xml = self._create_xml_sentence(doc_xml, ps.get_source(), "source")
54 parallelsentence_xml.appendChild( src_xml )
55
56
57 for tgt in ps.get_translations():
58 tgt_xml = self._create_xml_sentence(doc_xml, tgt, "system")
59 parallelsentence_xml.appendChild( tgt_xml )
60
61
62 if ps.get_reference():
63 ref_xml = self._create_xml_sentence(doc_xml, ps.get_reference(), "post-edited")
64 parallelsentence_xml.appendChild( ref_xml )
65
66
67 jcml.appendChild(parallelsentence_xml)
68
69
70 i += 1
71
72 doc_xml.appendChild(jcml)
73 self.object_xml = doc_xml
74
75
76 - def write_to_file(self, filename):
77 file_object = open(filename, 'w')
78 file_object.write(self.object_xml.toprettyxml("\t","\n","utf-8"))
79 file_object.close()
80
81
82
83 - def _create_xml_sentence(self, doc_xml, sentence, tag):
84 """
85 Helper function that fetches the text and the attributes of a sentence
86 and wraps them up into a minidom XML sentenceect
87 """
88
89 sentence_xml = doc_xml.createElement(tag)
90
91 for attribute_key in sentence.get_attributes().keys():
92 sentence_xml.setAttribute(attribute_key, escape(str(sentence.get_attribute(attribute_key))))
93 textnode = escape(sentence.get_string().strip())
94 sentence_xml.appendChild(doc_xml.createTextNode(textnode))
95
96 return sentence_xml
97