Package featuregenerator :: Package wer :: Module werfeaturegenerator
[hide private]
[frames] | no frames]

Source Code for Module featuregenerator.wer.werfeaturegenerator

 1  """ 
 2   
 3  @author: Eleftherios Avramidis 
 4  """ 
 5  from featuregenerator.featuregenerator import FeatureGenerator 
 6  from nltk.tokenize.punkt import PunktWordTokenizer 
 7  from wer import wer 
 8   
9 -class WERFeatureGenerator(FeatureGenerator):
10 """ 11 Class that provides a feature generator able to count the number of the tokens in the given simplesentences 12 sudo apt-get install python all dev 13 sudo pypi-install python-Levenshtein 14 """ 15 16 17 18
19 - def get_features_tgt(self, target, parallelsentence):
20 """ 21 Calculates word error rate for the given target sentence, against the reference sentence 22 @param simplesentence: The target sentence to be scored 23 @type simplesentence: sentence.sentence.SimpleSentence 24 @rtype: dict 25 @return: dictionary containing lenght attribute 26 """ 27 target_untokenized = target.get_string() 28 ref_untokenized = parallelsentence.get_reference().get_string() 29 ref_tokens = PunktWordTokenizer().tokenize(ref_untokenized) 30 #print ref_untokenized 31 #print target_untokenized 32 33 target_tokens = " ".join(PunktWordTokenizer().tokenize(target_untokenized)) 34 wer_value = wer(target_tokens, [ref_tokens]) 35 return {'wer': str(wer_value)}
36