Package dataprocessor :: Package output :: Module taraxuwriter
[hide private]
[frames] | no frames]

Source Code for Module dataprocessor.output.taraxuwriter

 1  """ 
 2  @author: Eleftherios Avramidis 
 3  """ 
 4   
 5  from xml.dom import minidom 
 6  from sentence.parallelsentence import ParallelSentence 
 7  from xml.sax.saxutils import escape 
 8  from xmlwriter import XmlWriter 
 9   
10   
11 -class TaraXUWriter(XmlWriter):
12 """ 13 classdocs 14 """ 15 16
17 - def __init__(self, data):
18 """ 19 Constructor 20 """ 21 if isinstance (data , minidom.Document): 22 self.object_xml = data 23 elif isinstance(data, list): 24 self.object_xml = None 25 self.convert_to_xml(data) 26 else: 27 self.object_xml = None 28 self.convert_to_xml(data.get_parallelsentences())
29 30
31 - def convert_to_xml(self, parallelsentences):
32 """ 33 Creates an XML for the document an populates that with the (parallel) sentences of the given object. 34 Resulting XML object gets stored as a variable. 35 @param parallelsentences: a list of ParallelSentence objects 36 """ 37 doc_xml = minidom.Document( ) 38 jcml = doc_xml.createElement("doc") 39 40 i=0 41 42 43 for ps in parallelsentences: 44 45 parallelsentence_xml = doc_xml.createElement("sentence") 46 47 #add attributes of parallel sentence 48 for attribute_key in ps.get_attributes().keys(): 49 parallelsentence_xml.setAttribute( attribute_key , ps.get_attribute( attribute_key ) ) 50 51 #add source as a child of parallel sentence 52 src_xml = self._create_xml_sentence(doc_xml, ps.get_source(), "source") 53 parallelsentence_xml.appendChild( src_xml ) 54 55 #add translations 56 for tgt in ps.get_translations(): 57 tgt_xml = self._create_xml_sentence(doc_xml, tgt, "target") 58 parallelsentence_xml.appendChild( tgt_xml ) 59 60 #add reference as a child of parallel sentence 61 if ps.get_reference(): 62 ref_xml = self._create_xml_sentence(doc_xml, ps.get_reference(), "reference") 63 parallelsentence_xml.appendChild( ref_xml ) 64 65 #append the newly populated parallel sentence to the document 66 jcml.appendChild(parallelsentence_xml) 67 68 #print ">", i 69 i +=1 70 71 doc_xml.appendChild(jcml) 72 self.object_xml = doc_xml
73 74
75 - def write_to_file(self, filename):
76 file_object = open(filename, 'w') 77 file_object.write(self.object_xml.toprettyxml("\t","\n","utf-8")) 78 file_object.close()
79 80 81
82 - def _create_xml_sentence(self, doc_xml, sentence, tag):
83 """ 84 Helper function that fetches the text and the attributes of a sentence 85 and wraps them up into a minidom XML sentenceect 86 """ 87 88 sentence_xml = doc_xml.createElement(tag) 89 90 for attribute_key in sentence.get_attributes().keys(): 91 sentence_xml.setAttribute(attribute_key, escape(sentence.get_attribute(attribute_key))) 92 textnode = escape(sentence.get_string().strip()) 93 sentence_xml.appendChild(doc_xml.createTextNode(textnode)) 94 95 return sentence_xml
96