Package dataprocessor :: Package ce :: Module ce
[hide private]
[frames] | no frames]

Source Code for Module dataprocessor.ce.ce

 1  ''' 
 2  Created on 26 Jun 2012 
 3   
 4  @author: Eleftherios Avramidis 
 5  ''' 
 6   
 7  import codecs 
 8  import sys 
 9  import tempfile 
10  import shutil 
11  from xml.etree.cElementTree import iterparse 
12   
13 -class SelectRank():
14 """ 15 This class reads a JCML file and exports 16 sentences with a particular rank to a text file 17 """
18 - def __init__(self, input_xml_filename, output_filename, desired_rank_value):
19 self.input_filename = input_xml_filename 20 self.output_filename = output_filename 21 self.desired_rank_value = desired_rank_value
22 23
24 - def convert(self):
25 source_xml_file = open(self.input_filename, "r") 26 target_file = open(self.output_filename, 'w') 27 # get an iterable 28 context = iterparse(source_xml_file, events=("start", "end")) 29 # turn it into an iterator 30 context = iter(context) 31 # get the root element 32 event, root = context.next() 33 34 35 rank_value = None 36 target_sentence = "" 37 for event, elem in context: 38 39 if event == "start" and elem.tag == self.TAG_TGT: 40 rank_value = elem.attrib["rank"] 41 42 elif event == "end" and elem.tag == self.TAG_TGT and float(rank_value) == float(self.desired_rank_value): 43 target_sentence = elem.text 44 45 elif event == "end" and elem.tag == self.TAG_SENT: 46 target_file.write(target_sentence) 47 target_sentence = "" 48 rank_value = None 49 50 root.clear() 51 52 target_file.close() 53 source_xml_file.close()
54