Package sentence :: Module coupledparallelsentence
[hide private]
[frames] | no frames]

Source Code for Module sentence.coupledparallelsentence

  1  ''' 
  2  Created on 23 Feb 2012 
  3   
  4  @author: Eleftherios Avramidis 
  5  ''' 
  6   
  7  from parallelsentence import ParallelSentence 
  8  from sentence import SimpleSentence 
  9   
10 -class CoupledParallelSentence(ParallelSentence):
11 ''' 12 A coupled parallelsentence contains two sources, two respective translations and their corresponding attributes 13 @ivar src: a tuple containing two sources 14 @type src: (L{SimpleSentence}, L{SimpleSentence}) 15 @ivar tgt: a tuple containing two respective translations 16 @type tgt: (L{SimpleSentence}, L{SimpleSentence}) 17 @ivar ref: not supported 18 @type ref: None or L{SimpleSentence} 19 @ivar attributes: a dict containing the attributes, as a result of merging the two sentences 20 @type attributes: {str, str} 21 ''' 22 23
24 - def __init__(self, ps1, ps2 = None, **kwargs):
25 ''' 26 @param ps1: first parallel sentence of the couple 27 @type ps1: L{ParallelSentence} 28 @param ps2: second parallel sentence of the couple 29 @type ps2: L{ParallelSentence} 30 ''' 31 try: 32 self.compact = kwargs["compact"] 33 except: 34 self.compact = False 35 36 if not ps2: #wrap 37 self.src = ps1.src 38 self.tgt = ps1.tgt 39 self.ref = ps1.ref 40 self.attributes = ps1.attributes 41 42 else: #construct 43 44 if not self.compact: 45 self.src = (ps1.get_source(), ps2.get_source()) 46 if len(ps1.get_translations()) > 1 or len(ps2.get_translations()) > 1: 47 raise Exception 48 self.tgt = (ps1.get_translations()[0], ps2.get_translations()[0]) 49 else: 50 self.src = (SimpleSentence(), SimpleSentence()) 51 self.tgt = (SimpleSentence(), SimpleSentence()) 52 self.ref = None 53 54 #self.ref = (ps1.get_reference()[0], ps2.get_reference()[0]) 55 self.attributes = self._prefix_parallelsentence_attributes(ps1.get_attributes(), ps2.get_attributes()) 56 self._collapse_simplesentence_attributes(ps1, ps2) 57 self._generate_rank()
58
59 - def get_couple(self):
60 try: 61 ref1 = self.ref[0] 62 ref2 = self.ref[2] 63 except: 64 ref1 = None 65 ref2 = None 66 return (ParallelSentence(self.src[0], [self.tgt[0]], ref1, self._reconstruct_parallelsentence_attributes()[0]), 67 ParallelSentence(self.src[1], [self.tgt[1]], ref2, self._reconstruct_parallelsentence_attributes()[1]))
68
69 - def _generate_rank(self):
70 """ 71 Generates rank attribute after comparing the scores of the two sentences 72 """ 73 score1 = float(self.attributes["tgt-1_score"]) 74 score2 = float(self.attributes["tgt-2_score"]) 75 if score1 < score2: 76 rank = 1 77 elif score2 < score1: 78 rank = -1 79 else: 80 rank = 0 81 self.attributes["rank"] = str(rank)
82 83
84 - def _prefix_parallelsentence_attributes(self, attdict1, attdict2):
85 """ 86 Merges two dicts of attributes, so that nothing gets lost and overlapping attributes get prefixed 87 with the id of their original parallelsentence 88 @param attdict1: dict with the attributes of the first parallelsentence 89 @type attdict1: {str, str} 90 @param attdict2: dict with the attributes of the second parallelsentence 91 @type attdict2: {str, str} 92 @return: merged dict of attributes, prefixed if necessary 93 @rtype: {str, str} 94 """ 95 attdict_merged = {} 96 97 for key in set(attdict1.keys() + attdict2.keys()): 98 if not attdict1.has_key(key): 99 attdict_merged[key] = attdict2[key] 100 elif not attdict2.has_key(key): 101 attdict_merged[key] = attdict1[key] 102 elif attdict1[key] == attdict2[key]: 103 attdict_merged[key] = attdict1[key] 104 else: 105 attdict_merged["ps1_{0}".format(key)] = attdict1[key] 106 attdict_merged["ps2_{0}".format(key)] = attdict2[key] 107 108 return attdict_merged
109
110 - def _deprefix(self, attname):
111 return attname.replace("ps1_", "").replace("ps2_", "")
112 113
115 """ 116 Reverses the attributes merging that took place in _prefix_parallelsentence_attributes 117 @return: two dicts containing the attributes of their respective sentences 118 @rtype: ({str: str}, {str: str}) 119 """ 120 attlist1 = [] 121 attlist2 = [] 122 123 for key, value in self.attributes.iteritems(): 124 if not (key.startswith("ps1_") or key.startswith("ps2_")): 125 attlist1.append((key, value)) 126 attlist2.append((key, value)) 127 elif key.startswith("ps1_"): 128 attlist1.append((self._deprefix(key), value)) 129 elif key.startswith("ps2_"): 130 attlist2.append((self._deprefix(key), value)) 131 132 attdict1 = dict(attlist1) 133 attdict2 = dict(attlist2) 134 135 return attdict1, attdict2
136 137
138 - def _collapse_simplesentence_attributes(self, ps1, ps2):
139 """ 140 Creates a flat dictionary of attributes for the doubled parallel sentence, 141 containing attributes from its ingredients 142 """ 143 simplesentences_prefixes = [(ps1.get_source(), "src-1"), (ps2.get_source(), "src-2"), (ps1.get_translations()[0], "tgt-1"), (ps2.get_translations()[0], "tgt-2")] 144 145 for (simplesentence, prefix) in simplesentences_prefixes: 146 147 for attribute_name, attribute_value in simplesentence.get_attributes().iteritems(): 148 prefixed_attribute_name = "{0}_{1}".format(prefix, attribute_name) 149 self.attributes[prefixed_attribute_name] = attribute_value
150 151
152 - def get_nested_attributes(self):
153 """ 154 Override method, since nested attributes have been already propagated up the coupled sentence 155 """ 156 return {}
157