1 '''
2 Created on 23 Feb 2012
3
4 @author: Eleftherios Avramidis
5 '''
6
7 from parallelsentence import ParallelSentence
8 from sentence import SimpleSentence
9
11 '''
12 A coupled parallelsentence contains two sources, two respective translations and their corresponding attributes
13 @ivar src: a tuple containing two sources
14 @type src: (L{SimpleSentence}, L{SimpleSentence})
15 @ivar tgt: a tuple containing two respective translations
16 @type tgt: (L{SimpleSentence}, L{SimpleSentence})
17 @ivar ref: not supported
18 @type ref: None or L{SimpleSentence}
19 @ivar attributes: a dict containing the attributes, as a result of merging the two sentences
20 @type attributes: {str, str}
21 '''
22
23
24 - def __init__(self, ps1, ps2 = None, **kwargs):
58
68
70 """
71 Generates rank attribute after comparing the scores of the two sentences
72 """
73 score1 = float(self.attributes["tgt-1_score"])
74 score2 = float(self.attributes["tgt-2_score"])
75 if score1 < score2:
76 rank = 1
77 elif score2 < score1:
78 rank = -1
79 else:
80 rank = 0
81 self.attributes["rank"] = str(rank)
82
83
85 """
86 Merges two dicts of attributes, so that nothing gets lost and overlapping attributes get prefixed
87 with the id of their original parallelsentence
88 @param attdict1: dict with the attributes of the first parallelsentence
89 @type attdict1: {str, str}
90 @param attdict2: dict with the attributes of the second parallelsentence
91 @type attdict2: {str, str}
92 @return: merged dict of attributes, prefixed if necessary
93 @rtype: {str, str}
94 """
95 attdict_merged = {}
96
97 for key in set(attdict1.keys() + attdict2.keys()):
98 if not attdict1.has_key(key):
99 attdict_merged[key] = attdict2[key]
100 elif not attdict2.has_key(key):
101 attdict_merged[key] = attdict1[key]
102 elif attdict1[key] == attdict2[key]:
103 attdict_merged[key] = attdict1[key]
104 else:
105 attdict_merged["ps1_{0}".format(key)] = attdict1[key]
106 attdict_merged["ps2_{0}".format(key)] = attdict2[key]
107
108 return attdict_merged
109
112
113
115 """
116 Reverses the attributes merging that took place in _prefix_parallelsentence_attributes
117 @return: two dicts containing the attributes of their respective sentences
118 @rtype: ({str: str}, {str: str})
119 """
120 attlist1 = []
121 attlist2 = []
122
123 for key, value in self.attributes.iteritems():
124 if not (key.startswith("ps1_") or key.startswith("ps2_")):
125 attlist1.append((key, value))
126 attlist2.append((key, value))
127 elif key.startswith("ps1_"):
128 attlist1.append((self._deprefix(key), value))
129 elif key.startswith("ps2_"):
130 attlist2.append((self._deprefix(key), value))
131
132 attdict1 = dict(attlist1)
133 attdict2 = dict(attlist2)
134
135 return attdict1, attdict2
136
137
139 """
140 Creates a flat dictionary of attributes for the doubled parallel sentence,
141 containing attributes from its ingredients
142 """
143 simplesentences_prefixes = [(ps1.get_source(), "src-1"), (ps2.get_source(), "src-2"), (ps1.get_translations()[0], "tgt-1"), (ps2.get_translations()[0], "tgt-2")]
144
145 for (simplesentence, prefix) in simplesentences_prefixes:
146
147 for attribute_name, attribute_value in simplesentence.get_attributes().iteritems():
148 prefixed_attribute_name = "{0}_{1}".format(prefix, attribute_name)
149 self.attributes[prefixed_attribute_name] = attribute_value
150
151
153 """
154 Override method, since nested attributes have been already propagated up the coupled sentence
155 """
156 return {}
157