1
2
3 '''
4 Created on Jul 12, 2011
5
6 @author: jogin
7 '''
8
9
10 from parallelsentence import ParallelSentence
11 from copy import deepcopy
12
13
14
16 """
17 A pairwise parallel sentence, is a parallel sentence that contains output produced by only two systems.
18 @ivar src: the source sentence
19 @type src: SimpleSentence
20 @ivar translations: a tuple of two target sentences
21 @type translations: tuple(Simplesentence, SimpleSentence)
22 @ival reference: the reference translation
23 @type reference: L{SimpleSentence}
24 @ival attributes: a dict with the attributes at the parallel sentence level
25 @type attributes: dict{str : str}
26 @ivar rank_name: the name of the attribute that serves as the rank
27 @type rank_name: str
28 """
29
30 - def __init__(self, source=None, translations=[], systems=[], reference=None, attributes={}, rank_name = u"rank", **kwargs):
31 """
32 Constructor
33 @param source: the source text of the parallel sentence
34 @type source: SimpleSentence
35 @param translations: a pair of translations
36 @type translations: tuple of translations (SimpleSentence, SimpleSentence)
37 @param reference: The desired translation provided by the system
38 @type reference: SimpleSentence
39 @param the attributes: that describe the parallel sentence
40 @type attributes: dict { String name : String value }
41 @param systems: names of target systems
42 @type systems: tuple of strings
43 @param cast: set True if you want to initialize a pairwise parallel sentence out of a simple parallel sentence
44 """
45
46 cast = kwargs.setdefault("cast", None)
47 rankless = kwargs.setdefault("rankless",False)
48 invert_ranks = kwargs.setdefault("invert_ranks", False)
49
50
51 if cast:
52 self._cast(cast)
53 else:
54 self.src = source
55 self.tgt = translations
56 self.systems = systems
57 self.ref = reference
58 self.attributes = deepcopy(attributes)
59 self.rank_name = rank_name
60 if self.tgt and not rankless:
61 self._normalize_ranks(invert_ranks)
62
63
64 - def _cast(self, parallelsentence):
76
77
79 """
80 Reads the two rank scores for the two respective system outputs, compares them and sets a universal
81 comparison value, namely -1 if the first system is better, +1 if the second system output is better,
82 and 0 if they are equally good. The value is set as a new argument of the current object
83 @param invert_ranks: If set to True, it inverts the ranks (useful for non-penalty metrics)
84 @type invert_ranks: boolean
85 """
86
87 if invert_ranks:
88 factor = -1.00
89 else:
90 factor = 1.00
91
92 try:
93 rank_a = float(self.tgt[0].get_attribute(self.rank_name)) * factor
94 rank_b = float(self.tgt[1].get_attribute(self.rank_name)) * factor
95 except AttributeError:
96
97 return
98
99 if rank_a > rank_b:
100 rank = 1
101 elif rank_b > rank_a:
102 rank = -1
103 else:
104 rank = 0
105 self.attributes[self.rank_name] = str(rank)
106
107
108
111
114
117
119 new_attributes = deepcopy(self.attributes)
120 new_attributes[self.rank_name] = -1 * new_attributes[self.rank_name]
121 return PairwiseParallelSentence(self.src, (self.tgt[1], self.tgt[0]), (self.systems[1], self.systems[0]), self.ref )
122