Package featuregenerator :: Package meteor :: Module meteor
[hide private]
[frames] | no frames]

Source Code for Module featuregenerator.meteor.meteor

  1  ''' 
  2  Created on 15 Jun 2012 
  3   
  4  @author: Eleftherios Avramidis 
  5  ''' 
  6   
  7  from py4j.java_gateway import JavaGateway 
  8  from py4j.java_gateway import GatewayClient 
  9  from py4j.java_gateway import java_import 
 10  from featuregenerator.languagefeaturegenerator import LanguageFeatureGenerator 
 11  from util.jvm import JVM 
 12  import sys 
 13   
14 -class MeteorGenerator(LanguageFeatureGenerator):
15 ''' 16 Uses an existing JavaGateway (Py4j) in order to perform METEOR scoring and 17 serve that as features. This Feature Generator overwrites the inherited get_features_tgt 18 function for scoring target vs. the embedded reference translation of the 19 ParallelSentence. See L{CrossMeteorGenerator} for target cross-scoring. 20 @ivar lang: The language code for the proper initialization of the included 21 language-dependent tool 22 @type lang: string 23 @ivar gateway: An already initialized Py4j java gateway 24 @type gateway: py4j.java_gateway.JavaGateway 25 @ival scorer: The initialized object of the MeteorScorer 26 @type scorer: edu.cmu.meteor.scorer.MeteorScorer 27 ''' 28 __name__ = "Meteor" 29
30 - def __init__(self, lang, java_classpath, dir_path):
31 ''' 32 Constructor 33 @param lang: The language code for the proper initialization of this language-dependent tool 34 @type lang: string 35 @param gateway: An already initialized Py4j java gateway 36 @type gateway: py4j.java_gateway.JavaGateway 37 ''' 38 self.lang = lang 39 self.jvm = JVM(java_classpath) 40 socket_no = self.jvm.socket_no 41 gatewayclient = GatewayClient('localhost', socket_no) 42 gateway = JavaGateway(gatewayclient, auto_convert=True, auto_field=True) 43 sys.stderr.write("Initialized local Java gateway with pid {} in socket {}\n".format(self.jvm.pid, socket_no)) 44 45 self.meteor_view = gateway.new_jvm_view() 46 #import necessary java packages from meteor jar 47 java_import(self.meteor_view, 'edu.cmu.meteor.scorer.*') 48 java_import(self.meteor_view, 'edu.cmu.meteor.util.*') 49 # java_import(self.meteor_view, '') 50 51 #pass the language setting into the meteor configuration object 52 config = self.meteor_view.MeteorConfiguration(); 53 config.setLanguage(lang); 54 #initialize object with the given config 55 self.scorer = self.meteor_view.MeteorScorer(config)
56 57
58 - def get_features_tgt(self, translation, parallelsentence):
59 try: 60 references = [parallelsentence.get_reference().get_string()] 61 stats = self.score_sentence(translation.get_string(), references) 62 stats = dict([("ref-{}".format(k),v) for k, v in stats.iteritems()]) 63 return stats 64 except: 65 return {}
66
67 - def score(self, target, references):
68 return self.score_sentence(target, references)
69
70 - def score_sentence(self, target, references):
71 72 ''' 73 Score using the METEOR metric given one translated sentence, given a list of reference translations 74 @param target: The text of the (machine-generated) translation 75 @type target: string 76 @param references: A list of the reference translations, text-only 77 @type references: [string, ...] 78 @return: A dictionary of the various METEOR scoring results, namely precision, recall, fragPenalty and score 79 @rtype: {string: string} 80 ''' 81 stats = self.scorer.getMeteorStats(target, references); 82 83 return {'meteor_precision' : '{:.4}'.format(stats.precision), 84 'meteor_recall' : '{:.4}'.format(stats.recall), 85 'meteor_fragPenalty' : '{:.4}'.format(stats.fragPenalty), 86 'meteor_score' : '{:.4}'.format(stats.score)}
87
88 - def full_score_sentences(self, sentence_tuples):
89 """ 90 Score many sentences using METEOR and return all basic scores. 91 @param sentence_tuples: a list of tuples generated out of the translated sentences. Each 92 tuple should contain one translated sentence and its list of references. 93 @type sentence_tuples: [tuple(str(translation), [str(reference), ...]), ...] 94 @return: a dictionary containing METEOR scores, name and value 95 @rtype: dict(score_name,score_value) 96 """ 97 aggregated_stats = self.meteor_view.MeteorStats() 98 99 for target, references in sentence_tuples: 100 stats = self.scorer.getMeteorStats(target, references) 101 aggregated_stats.addStats(stats) 102 103 self.scorer.computeMetrics(aggregated_stats) 104 return {'meteor_precision' : '{:.4}'.format(stats.precision), 105 'meteor_recall' : '{:.4}'.format(stats.recall), 106 'meteor_fragPenalty' : '{:.4}'.format(stats.fragPenalty), 107 'meteor_score' : '{:.4}'.format(stats.score)}
108
109 - def score_sentences(self, sentence_tuples):
110 """ 111 Score many sentences using METEOR metrics and return a float for the many score 112 @param sentence_tuples: a list of tuples generated out of the translated sentences. Each 113 tuple should contain one translated sentence and its list of references. 114 @type sentence_tuples: [tuple(str(translation), [str(reference), ...]), ...] 115 @return: the basic score float value 116 @rtype: float 117 """ 118 return float(self.full_score_sentences(sentence_tuples)['meteor_score'])
119 120
121 -class CrossMeteorGenerator(MeteorGenerator):
122 ''' 123 Overwrites the feature generation function, by allowing the provided target sentence 124 (i.e. translation) to be scored against the translations provided by the other systems 125 embedded in this Parallel Sentence. 126 ''' 127
128 - def get_features_tgt(self, translation, parallelsentence):
129 current_system_name = translation.get_attribute("system") 130 alltranslations = dict([(t.get_attribute("system"), t.get_string()) for t in parallelsentence.get_translations()]) 131 del(alltranslations[current_system_name]) 132 references = alltranslations.values() 133 stats = self.score_sentence(translation.get_string(), references) 134 stats = dict([("cross-{}".format(k),v) for k, v in stats.iteritems()]) 135 return stats
136