Package dataprocessor :: Package sax :: Module saxwrapper
[hide private]
[frames] | no frames]

Source Code for Module dataprocessor.sax.saxwrapper

  1  #!/usr/bin/python 
  2  # -*- coding: utf-8 -*- 
  3   
  4  """ 
  5  @author: Eleftherios Avramidis 
  6  """ 
  7   
  8   
  9  from xml.sax.saxutils import XMLGenerator 
 10  from xml.sax import ContentHandler 
 11  from dataprocessor.input.genericreader import GenericReader 
 12  from dataprocessor.output.xmlwriter import GenericWriter 
 13  from sentence.sentence import SimpleSentence 
 14  from sentence.parallelsentence import ParallelSentence 
 15  import StringIO 
 16  from copy import deepcopy 
 17   
 18   
19 -class SaxWrapper(ContentHandler):
20 """ 21 Abstract SAX wrapper to facilitate use of older minidom processors. 22 """ 23 i = 0 24
25 - def __init__(self, element_focus, reader = GenericReader, writer = GenericWriter, filename_out = ""):
26 """ 27 """ 28 29 self.element_focus = element_focus 30 self.recording = False 31 self.buffer_generator = None 32 self.nodetext = u"" 33 self.reader = reader 34 self.writer = writer 35 self.stringbuffer = None 36 self.parallelsentences = [] 37 self.filename_out = filename_out 38 self.tobeparsedonce = []
39 #self.file_out = open(filename_out, 'w') 40 #self.finalgenerator = XMLGenerator(self.file_out, encoding="utf-8") 41 42
43 - def startElement(self, name, attrs=[]):
44 """ 45 Signals the start of an element (simplesentence or parallelsentence) 46 @param name: the name of the element 47 @type name: str 48 @param attrs: of the element type as a string and the attrs parameter holds an object of the Attributes interface containing the attributes of the element. 49 @type attrs: Attributes 50 """ 51 52 53 if name == self.element_focus: 54 self.recording = True 55 self.stringbuffer = StringIO.StringIO() 56 self.buffer_generator = XMLGenerator(self.stringbuffer, encoding="utf-8") 57 for (element_name, element_attrs) in self.tobeparsedonce: 58 self.buffer_generator.startElement(element_name, element_attrs) 59 if self.recording: 60 self.buffer_generator.startElement(name, attrs) 61 else: 62 self.tobeparsedonce.append((name, attrs))
63 #self.finalgenerator.startElement(name, attrs) 64 65 66
67 - def characters(self, ch):
68 """ 69 The Parser will call this method to report each chunk of character data. 70 We use it to store the string 71 @param ch: character being parsed 72 @type ch: str 73 """ 74 if self.recording: 75 self.nodetext = u"%s%s" % (self.nodetext, ch) 76 else: 77 #self.finalgenerator.characters(ch) 78 pass
79 80
81 - def endElement(self, name):
82 """ 83 Signals the end of an element. 84 Data stored in global vars of the class, time to create our objects and fire their processing 85 @param name: the name of the element 86 @type name: str 87 @param attrs: of the element type as a string and the attrs parameter holds an object of the Attributes interface containing the attributes of the element. 88 @type attrs: Attributes 89 """ 90 91 if not self.recording: 92 #self.finalgenerator.endElement(name) 93 self.tobeparsedonce.pop() 94 pass 95 else: 96 self.buffer_generator.characters(self.nodetext) 97 self.buffer_generator.endElement(name) 98 self.nodetext = u"" 99 100 if name == self.element_focus: 101 self.recording = False 102 tobeclosed = deepcopy(self.tobeparsedonce) 103 tobeclosed.reverse() 104 for (element_name, element_attrs) in tobeclosed: 105 self.buffer_generator.endElement(element_name) 106 self.stringbuffer.flush() 107 #self.reader = GenericReader 108 string = self.stringbuffer.getvalue() 109 110 reader = self.reader(string, True, True) 111 parallelsentence = reader.get_parallelsentences()[0] 112 #parallelsentence_xmlstring = self.writer().get_parallelsentence_string(parallelsentence) 113 #self.finalgenerator._write(parallelsentence_xmlstring) 114 self.i += 1 115 print self.i 116 self.parallelsentences.append(parallelsentence) 117 self.stringbuffer.close() 118 self.stringbuffer = None
119 120
121 - def endDocument(self):
122 self.writer(self.parallelsentences).write_to_file(self.filename_out)
123 #self.file_out.close() 124 #pass 125