1 """
2
3 @author: Eleftherios Avramidis
4 """
5 from featuregenerator.featuregenerator import FeatureGenerator
6 from nltk.tokenize.punkt import PunktWordTokenizer
7 from wer import wer
8
10 """
11 Class that provides a feature generator able to count the number of the tokens in the given simplesentences
12 sudo apt-get install python all dev
13 sudo pypi-install python-Levenshtein
14 """
15
16
17
18
20 """
21 Calculates word error rate for the given target sentence, against the reference sentence
22 @param simplesentence: The target sentence to be scored
23 @type simplesentence: sentence.sentence.SimpleSentence
24 @rtype: dict
25 @return: dictionary containing lenght attribute
26 """
27 target_untokenized = target.get_string()
28 ref_untokenized = parallelsentence.get_reference().get_string()
29 ref_tokens = PunktWordTokenizer().tokenize(ref_untokenized)
30
31
32
33 target_tokens = " ".join(PunktWordTokenizer().tokenize(target_untokenized))
34 wer_value = wer(target_tokens, [ref_tokens])
35 return {'wer': str(wer_value)}
36