1 """
2
3 @author: Eleftherios Avramidis
4 """
5 from __future__ import division
6 from featuregenerator import FeatureGenerator
7 from nltk.tokenize.punkt import PunktWordTokenizer
8
9
11 """
12 Computes the ratio of source features and target features, if they have the same name
13 """
14
15
17
18 tgt_attributes = simplesentence.get_attributes()
19 src_attributes = parallelsentence.get_source().get_attributes()
20
21 attributes = {}
22
23
24 for tgt_attribute_name in tgt_attributes.keys():
25 if tgt_attribute_name in src_attributes.keys():
26 try:
27 new_attribute_name = "%s_ratio" % tgt_attribute_name
28 if new_attribute_name not in tgt_attributes.keys():
29
30 tgt_attribute_value = float(tgt_attributes[tgt_attribute_name])
31 src_attribute_value = float(src_attributes[tgt_attribute_name])
32 if tgt_attribute_value == 0:
33 ratio = float('inf')
34 else:
35 ratio = 1.0 * src_attribute_value / tgt_attribute_value
36 attributes[new_attribute_name] = str(ratio)
37 except ValueError:
38 pass
39
40
41 return attributes
42