1 '''
2 Created on Apr 15, 2011
3
4 @author: Eleftherios Avramidis
5 '''
6
7 from parallelsentence import ParallelSentence
8 from dataset import DataSet
9 from collections import OrderedDict
10 import sys
11
13 '''
14 classdocs
15 '''
16
17
19 """
20 Collection of convenience functions for transforming parallel sentences with many ranks
21 into pairwise mode and vice versa. Most of the implementations here are ugly with many nested loops,
22 so a more object-oriented approach would be to go through the various DataSet types
23 """
24 self.rank_name = rank_name
25
27 if isinstance(parallelsentences, DataSet):
28 parallelsentences = parallelsentences.get_parallelsentences()
29
30
31 sentences_per_judgment = OrderedDict()
32
33 for parallelsentence in parallelsentences:
34 jid = int(parallelsentence.get_attribute("judgement_id"))
35 if jid in sentences_per_judgment:
36 sentences_per_judgment[jid].append(parallelsentence)
37 else:
38
39 sentences_per_judgment[jid]=[parallelsentence]
40
41 new_parallelsentences = []
42
43 for jid in sentences_per_judgment:
44 pairwise_sentences = sentences_per_judgment[jid]
45 rank_per_system = OrderedDict()
46 tranlsations_per_system = OrderedDict()
47 for pairwise_sentence in pairwise_sentences:
48 rank = int(pairwise_sentence.get_attribute(self.rank_name))
49
50
51 translation1 = pairwise_sentence.get_translations()[0]
52 if translation1.get_attribute("system") in rank_per_system:
53 rank_per_system[translation1.get_attribute("system")] += rank
54 else:
55 rank_per_system[translation1.get_attribute("system")] = rank
56 tranlsations_per_system[translation1.get_attribute("system")] = translation1
57
58 translation2 = pairwise_sentence.get_translations()[1]
59 if translation2.get_attribute("system") in rank_per_system:
60 rank_per_system[translation2.get_attribute("system")] -= rank
61 else:
62 rank_per_system[translation2.get_attribute("system")] = -1 * rank
63 tranlsations_per_system[translation2.get_attribute("system")] = translation2
64
65 i = 0
66 prev_rank = None
67 translations_new_rank = []
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84 for system in rank_per_system.keys():
85 if rank_per_system[system] != prev_rank:
86 i += 1
87
88
89
90 prev_rank = rank_per_system[system]
91 translation = tranlsations_per_system[system]
92 translation.add_attribute(self.rank_name, str(i))
93 translations_new_rank.append(translation)
94
95
96 src = pairwise_sentences[0].get_source()
97 attributes = pairwise_sentences[0].get_attributes()
98 del attributes[self.rank_name]
99 new_parallelsentence = ParallelSentence(src, translations_new_rank, None, attributes)
100 new_parallelsentences.append(new_parallelsentence)
101 return new_parallelsentences
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
120 """
121 Converts a the ranked system translations of one sentence into many sentences containing one translation pair each,
122 so that system output can be compared in a pairwise manner.
123 @param parallelsentence: the parallesentences than needs to be split into pairs
124 @type parallelsentence: ParallelSentence
125 @param allow_ties: sentences of equal performance (rank=0) will be included in the set, if this is set to True
126 @type allow_ties: boolean
127 @return a list of parallelsentences containing a pair of system translations and a universal rank value
128 """
129 source = parallelsentence.get_source()
130 translations = parallelsentence.get_translations()
131 pairwise_sentences = []
132 systems_parsed = []
133
134 for system_a in translations:
135 for system_b in translations:
136 if system_a == system_b:
137 continue
138 if system_b in systems_parsed and not exponential:
139 continue
140 systems_parsed.append(system_a)
141 rank = self._normalize_rank(system_a, system_b)
142 if not rank:
143 new_attributes = parallelsentence.get_attributes()
144 new_attributes["judgement_id"] = judgement_id
145
146 new_attributes[self.rank_name] = "-99"
147 pairwise_sentence = ParallelSentence(source, [system_a, system_b], None, new_attributes)
148 pairwise_sentences.append(pairwise_sentence)
149 elif rank != "0" or allow_ties:
150 new_attributes = parallelsentence.get_attributes()
151
152 new_attributes[self.rank_name] = rank
153 new_attributes["judgement_id"] = judgement_id
154 pairwise_sentence = ParallelSentence(source, [system_a, system_b], None, new_attributes)
155 pairwise_sentences.append(pairwise_sentence)
156
157 if rename_rank:
158 for system in translations:
159
160 try:
161 system.rename_attribute(self.rank_name, "orig_rank")
162 except KeyError:
163 print "didn't rename rank attribute"
164 pass
165
166 return pairwise_sentences
167
168
182
183
184
185
186
252
253
254
255
256
259
260
261
262
264 """
265 Receives two rank scores for the two respective system outputs, compares them and returns a universal
266 comparison value, namely -1 if the first system is better, +1 if the second system output is better,
267 and 0 if they are equally good.
268 """
269 try:
270 rank_a = system_a.get_attribute(self.rank_name)
271 rank_b = system_b.get_attribute(self.rank_name)
272 if rank_a < rank_b:
273 rank = "-1"
274 elif rank_a > rank_b:
275 rank = "1"
276 else:
277 rank = "0"
278 return rank
279 except KeyError:
280 return None
281