1 '''
2 Created on 26 Jun 2012
3
4 @author: Eleftherios Avramidis
5 '''
6
7 import codecs
8 import sys
9 import tempfile
10 import shutil
11 from xml.etree.cElementTree import iterparse
12
14 """
15 This class reads a JCML file and exports
16 sentences with a particular rank to a text file
17 """
18 - def __init__(self, input_xml_filename, output_filename, desired_rank_value):
22
23
25 source_xml_file = open(self.input_filename, "r")
26 target_file = open(self.output_filename, 'w')
27
28 context = iterparse(source_xml_file, events=("start", "end"))
29
30 context = iter(context)
31
32 event, root = context.next()
33
34
35 rank_value = None
36 target_sentence = ""
37 for event, elem in context:
38
39 if event == "start" and elem.tag == self.TAG_TGT:
40 rank_value = elem.attrib["rank"]
41
42 elif event == "end" and elem.tag == self.TAG_TGT and float(rank_value) == float(self.desired_rank_value):
43 target_sentence = elem.text
44
45 elif event == "end" and elem.tag == self.TAG_SENT:
46 target_file.write(target_sentence)
47 target_sentence = ""
48 rank_value = None
49
50 root.clear()
51
52 target_file.close()
53 source_xml_file.close()
54