Package dataprocessor :: Package output :: Module posteditingwriter
[hide private]
[frames] | no frames]

Source Code for Module dataprocessor.output.posteditingwriter

 1  """ 
 2  @author: Eleftherios Avramidis 
 3  """ 
 4   
 5  from xml.dom import minidom 
 6  from sentence.parallelsentence import ParallelSentence 
 7  from xml.sax.saxutils import escape 
 8   
9 -class PosteditingWriter(object):
10 """ 11 classdocs 12 """ 13 14
15 - def __init__(self, data, doc_attributes={}):
16 """ 17 Constructor 18 """ 19 if isinstance (data , minidom.Document): 20 self.object_xml = data 21 elif isinstance(data, list): 22 self.object_xml = None 23 self.convert_to_xml(data, doc_attributes) 24 else: 25 self.object_xml = None 26 self.convert_to_xml(data.get_parallelsentences(), doc_attributes)
27 28
29 - def convert_to_xml(self, parallelsentences, doc_attributes={}):
30 """ 31 Creates an XML for the document an populates that with the (parallel) sentences of the given object. 32 Resulting XML object gets stored as a variable. 33 @param parallelsentences: a list of ParallelSentence objects 34 """ 35 doc_xml = minidom.Document( ) 36 jcml = doc_xml.createElement("editing-task") 37 38 for attribute_key in doc_attributes.keys(): 39 jcml.setAttribute(attribute_key, escape(str(doc_attributes[attribute_key]))) 40 41 i=0 42 43 44 for ps in parallelsentences: 45 46 parallelsentence_xml = doc_xml.createElement("editing-item") 47 48 #add attributes of parallel sentence 49 for attribute_key in ps.get_attributes().keys(): 50 parallelsentence_xml.setAttribute( attribute_key , str(ps.get_attribute(attribute_key)) ) 51 52 #add source as a child of parallel sentence 53 src_xml = self._create_xml_sentence(doc_xml, ps.get_source(), "source") 54 parallelsentence_xml.appendChild( src_xml ) 55 56 #add translations 57 for tgt in ps.get_translations(): 58 tgt_xml = self._create_xml_sentence(doc_xml, tgt, "system") 59 parallelsentence_xml.appendChild( tgt_xml ) 60 61 #add reference as a child of parallel sentence 62 if ps.get_reference(): 63 ref_xml = self._create_xml_sentence(doc_xml, ps.get_reference(), "post-edited") 64 parallelsentence_xml.appendChild( ref_xml ) 65 66 #append the newly populated parallel sentence to the document 67 jcml.appendChild(parallelsentence_xml) 68 69 #print ">", i 70 i += 1 71 72 doc_xml.appendChild(jcml) 73 self.object_xml = doc_xml
74 75
76 - def write_to_file(self, filename):
77 file_object = open(filename, 'w') 78 file_object.write(self.object_xml.toprettyxml("\t","\n","utf-8")) 79 file_object.close()
80 81 82
83 - def _create_xml_sentence(self, doc_xml, sentence, tag):
84 """ 85 Helper function that fetches the text and the attributes of a sentence 86 and wraps them up into a minidom XML sentenceect 87 """ 88 89 sentence_xml = doc_xml.createElement(tag) 90 91 for attribute_key in sentence.get_attributes().keys(): 92 sentence_xml.setAttribute(attribute_key, escape(str(sentence.get_attribute(attribute_key)))) 93 textnode = escape(sentence.get_string().strip()) 94 sentence_xml.appendChild(doc_xml.createTextNode(textnode)) 95 96 return sentence_xml
97