Package featuregenerator :: Module ratio_generator
[hide private]
[frames] | no frames]

Source Code for Module featuregenerator.ratio_generator

 1  """ 
 2   
 3  @author: Eleftherios Avramidis 
 4  """ 
 5  from __future__ import division 
 6  from featuregenerator import FeatureGenerator 
 7  from nltk.tokenize.punkt import PunktWordTokenizer 
 8   
 9   
10 -class RatioGenerator(FeatureGenerator):
11 """ 12 Computes the ratio of source features and target features, if they have the same name 13 """ 14 15
16 - def get_features_tgt(self, simplesentence, parallelsentence):
17 #get the length of the source 18 tgt_attributes = simplesentence.get_attributes() 19 src_attributes = parallelsentence.get_source().get_attributes() 20 21 attributes = {} 22 23 #if there are two features with the same name in bot src and target, calculate their ratio and add it 24 for tgt_attribute_name in tgt_attributes.keys(): 25 if tgt_attribute_name in src_attributes.keys(): 26 try: 27 new_attribute_name = "%s_ratio" % tgt_attribute_name 28 if new_attribute_name not in tgt_attributes.keys(): 29 #do calculations only if needed 30 tgt_attribute_value = float(tgt_attributes[tgt_attribute_name]) 31 src_attribute_value = float(src_attributes[tgt_attribute_name]) 32 if tgt_attribute_value == 0: 33 ratio = float('inf') 34 else: 35 ratio = 1.0 * src_attribute_value / tgt_attribute_value 36 attributes[new_attribute_name] = str(ratio) 37 except ValueError: 38 pass 39 40 41 return attributes
42