1
2
3
4 """
5 @author: Eleftherios Avramidis
6 """
7
8
9 from sentence.sentence import SimpleSentence
10 from sentence.parallelsentence import ParallelSentence
11 from xml.sax import handler
12
14 """
15 """
16
17 - def __init__(self, rank_attribute_name, outfilename, testset):
18 """
19 @param out: file object to receive processed changes
20 @type out: file
21 @param feature_generators: list of feature generators to be applied
22 @type feature_generators: list
23 """
24
25 self.outfilename = outfilename
26 self.rank_attribute_name = rank_attribute_name
27 self.testset = testset
28
29 self.is_parallelsentence = False
30 self.is_simplesentence = False
31 self.passed_head = False
32
33
34 self.ps_attributes = {}
35 self.ss_attributes = {}
36
37 self.src = None
38 self.tgt = []
39 self.ref = None
40 self.annotations = []
41
42 self.ss_text = ""
43
44 self.set_tags()
45
46 self._encoding = "utf-8"
47
48 self.systems_performance = {}
49 self.parallelsentences = 0
50
51
64
67
69 outfile = open(self.outfilename, 'w')
70 for system in self.systems_performance:
71 self.systems_performance[system] = 1.00 * self.systems_performance[system] / self.parallelsentences
72 entry = "dfki_parseconf\tde-en\t%s\t%s\t%01.4f\n" % (self.testset, system, self.systems_performance[system])
73 outfile.write(entry)
74 outfile.close()
75
77 """
78 Signals the start of an element (simplesentence or parallelsentence)
79 @param name: the name of the element
80 @type name: str
81 @param attrs: of the element type as a string and the attrs parameter holds an object of the Attributes interface containing the attributes of the element.
82 @type attrs: Attributes
83 """
84 if name == self.TAG_SENT:
85
86
87 self.ss_text = u""
88 self.ps_attributes = {}
89 self.tgt = []
90 for att_name in attrs.getNames():
91 self.ps_attributes[att_name] = attrs.getValue(att_name)
92 self.is_parallelsentence = True
93
94
95
96
97
98
99
100
101 if name == self.TAG_ANNOTATION:
102 if not self.passed_head:
103 self.annotations.append(attrs.getValue("name"))
104
105 else:
106 print "Format error. Annotation must be declared in the beginning of the document"
107
108 elif name in [self.TAG_SRC, self.TAG_TGT, self.TAG_REF]:
109
110
111 self.ss_text = u""
112 self.ss_attributes = {}
113 for att_name in attrs.getNames():
114 self.ss_attributes[att_name] = attrs.getValue(att_name)
115 self.is_simplesentence = True
116
117
119 """
120 The Parser will call this method to report each chunk of character data.
121 We use it to store the string of the simplesentence
122 @param ch: character being parsed
123 @type ch: str
124 """
125 if self.is_simplesentence :
126 self.ss_text = u"%s%s" % (self.ss_text, ch)
127
128
130 """
131 Signals the end of an element.
132 Data stored in global vars of the class, time to create our objects and fire their processing
133 @param name: the name of the element
134 @type name: str
135 @param attrs: of the element type as a string and the attrs parameter holds an object of the Attributes interface containing the attributes of the element.
136 @type attrs: Attributes
137 """
138
139
140 self.ss_text = self.ss_text.strip()
141
142
143
144 if name == self.TAG_SRC:
145 self.src = SimpleSentence(self.ss_text, self.ss_attributes)
146 self.ss_text = u""
147 elif name == self.TAG_TGT:
148 self.tgt.append(SimpleSentence(self.ss_text, self.ss_attributes))
149 self.ss_text = u""
150 elif name == self.TAG_SENT:
151
152 parallelsentence = ParallelSentence(self.src, self.tgt, self.ref, self.ps_attributes)
153 self.parallelsentences +=1
154 rank_per_system = {}
155
156 for target in parallelsentence.get_translations():
157 system = target.get_attribute("system")
158 rank = int(float(target.get_attribute(self.rank_attribute_name)))
159 rank_per_system[system] = rank
160
161 for system in rank_per_system:
162 if rank_per_system[system] == min(rank_per_system.values()):
163 try:
164 self.systems_performance[system] += 1
165 except KeyError:
166 self.systems_performance[system] = 1
167