Trees | Indices | Help |
|
---|
|
1 ''' 2 Created on 07.10.2011 3 4 @author: Eleftherios Avramidis 5 ''' 6 7 from featuregenerator.featuregenerator import FeatureGenerator 8 from nltk.tokenize.punkt import PunktWordTokenizer 9 from tempfile import mktemp 10 11 from os import unlink 12 import os 13 import subprocess 14 import sys 15 import codecs 16 import bleu 17 18 19 2022 ''' 23 Provides BLEU score against the reference 24 ''' 2542 43 4427 """ 28 Calculates BLEU score for the given target sentence, against the reference sentence 29 @param simplesentence: The target sentence to be scored 30 @type simplesentence: sentence.sentence.SimpleSentence 31 @rtype: dict 32 @return: dictionary containing Levenshtein distance as an attribute 33 """ 34 target_untokenized = target.get_string() 35 try: 36 ref_untokenized = parallelsentence.get_reference().get_string() 37 38 bleu_value = bleu.score_sentence(target_untokenized, [ref_untokenized]) 39 return {'ref-bleu': '{:.4}'.format(bleu_value)} 40 except: 41 return {}46 ''' 47 Provides cross-BLEU score of the current target sentence against the others 48 ''' 4957 58 59 60 61 62 63 64 65 66 67 # def bleu(self, translation, reference): 68 # 69 # translation = " ".join(PunktWordTokenizer().tokenize(translation)) 70 # tfilename = mktemp(dir=u'/tmp/', suffix=u'.tgt.txt') 71 # tfile = codecs.open(tfilename, 'w', 'utf-8') 72 # tfile.write(translation) 73 # tfile.close() 74 # 75 # reference = " ".join(PunktWordTokenizer().tokenize(reference)) 76 # rfilename = mktemp(dir=u'/tmp/', suffix=u'.ref.txt') 77 # rfile = codecs.open(rfilename, 'w', 'utf-8') 78 # rfile.write(reference) 79 # rfile.close() 80 # 81 # ofilename = mktemp(dir=u'/tmp/', suffix=u'.out.txt') 82 # ofile = codecs.open(ofilename, 'w', 'utf-8') 83 # 84 # path = os.path.dirname(__file__) 85 # bleupath = os.path.join(path, "bleu") 86 # print bleupath 87 # subprocess.call([bleupath, "-s" , "-p", "-S", "-r", rfilename, tfilename], stdout = ofile) 88 # ofile.close() 89 # ofile = codecs.open(ofilename, 'r', 'utf-8') 90 # output = ofile.readline() 91 # output = float(output) 92 # return output 93 # 94 # 9551 current_system_name = translation.get_attribute("system") 52 alltranslations = dict([(t.get_attribute("system"), t.get_string()) for t in parallelsentence.get_translations()]) 53 del(alltranslations[current_system_name]) 54 references = alltranslations.values() 55 bleu_value = bleu.score_sentence(translation.get_string(), references) 56 return {'cross-bleu': '{:.4}'.format(bleu_value)}
Trees | Indices | Help |
|
---|
Generated by Epydoc 3.0.1 on Fri Jul 18 11:46:17 2014 | http://epydoc.sourceforge.net |