Package featuregenerator :: Package bleu :: Module bleugenerator
[hide private]
[frames] | no frames]

Source Code for Module featuregenerator.bleu.bleugenerator

 1  ''' 
 2  Created on 07.10.2011 
 3   
 4  @author: Eleftherios Avramidis 
 5  ''' 
 6   
 7  from featuregenerator.featuregenerator import FeatureGenerator 
 8  from nltk.tokenize.punkt import PunktWordTokenizer  
 9  from tempfile import mktemp 
10   
11  from os import unlink  
12  import os 
13  import subprocess 
14  import sys 
15  import codecs 
16  import bleu 
17   
18   
19     
20   
21 -class BleuGenerator(FeatureGenerator):
22 ''' 23 Provides BLEU score against the reference 24 ''' 25
26 - def get_features_tgt(self, target, parallelsentence):
27 """ 28 Calculates BLEU score for the given target sentence, against the reference sentence 29 @param simplesentence: The target sentence to be scored 30 @type simplesentence: sentence.sentence.SimpleSentence 31 @rtype: dict 32 @return: dictionary containing Levenshtein distance as an attribute 33 """ 34 target_untokenized = target.get_string() 35 try: 36 ref_untokenized = parallelsentence.get_reference().get_string() 37 38 bleu_value = bleu.score_sentence(target_untokenized, [ref_untokenized]) 39 return {'ref-bleu': '{:.4}'.format(bleu_value)} 40 except: 41 return {}
42 43 44
45 -class CrossBleuGenerator(FeatureGenerator):
46 ''' 47 Provides cross-BLEU score of the current target sentence against the others 48 ''' 49
50 - def get_features_tgt(self, translation, parallelsentence):
51 current_system_name = translation.get_attribute("system") 52 alltranslations = dict([(t.get_attribute("system"), t.get_string()) for t in parallelsentence.get_translations()]) 53 del(alltranslations[current_system_name]) 54 references = alltranslations.values() 55 bleu_value = bleu.score_sentence(translation.get_string(), references) 56 return {'cross-bleu': '{:.4}'.format(bleu_value)}
57 58 59 60 61 62 63 64 65 66 67 # def bleu(self, translation, reference): 68 # 69 # translation = " ".join(PunktWordTokenizer().tokenize(translation)) 70 # tfilename = mktemp(dir=u'/tmp/', suffix=u'.tgt.txt') 71 # tfile = codecs.open(tfilename, 'w', 'utf-8') 72 # tfile.write(translation) 73 # tfile.close() 74 # 75 # reference = " ".join(PunktWordTokenizer().tokenize(reference)) 76 # rfilename = mktemp(dir=u'/tmp/', suffix=u'.ref.txt') 77 # rfile = codecs.open(rfilename, 'w', 'utf-8') 78 # rfile.write(reference) 79 # rfile.close() 80 # 81 # ofilename = mktemp(dir=u'/tmp/', suffix=u'.out.txt') 82 # ofile = codecs.open(ofilename, 'w', 'utf-8') 83 # 84 # path = os.path.dirname(__file__) 85 # bleupath = os.path.join(path, "bleu") 86 # print bleupath 87 # subprocess.call([bleupath, "-s" , "-p", "-S", "-r", rfilename, tfilename], stdout = ofile) 88 # ofile.close() 89 # ofile = codecs.open(ofilename, 'r', 'utf-8') 90 # output = ofile.readline() 91 # output = float(output) 92 # return output 93 # 94 # 95