Package featuregenerator :: Module ibm1featuregenerator
[hide private]
[frames] | no frames]

Source Code for Module featuregenerator.ibm1featuregenerator

 1  ''' 
 2  Created on 17 Sept 2011 
 3   
 4  @author: Eleftherios Avramidis based on code from Maja Popovic, David Vilar 
 5  ''' 
 6   
 7  import sys 
 8  import gzip 
 9  import math 
10  from featuregenerator import FeatureGenerator 
11   
12 -class Ibm1FeatureGenerator(FeatureGenerator):
13 ''' 14 Provides features generation from IBM Model 1 (See Popovic et. al 2011) 15 ''' 16 17
18 - def __init__(self, lexicon_filename):
19 ''' 20 Load the lexicon into a dict 21 ''' 22 lextxt = open(lexicon_filename, 'r') 23 self.lex = {} 24 25 lexline = lextxt.readline() 26 lexs = lexline.split() 27 28 while lexline: 29 self.lex[lexs[0]+" "+lexs[1]]=lexs[2] 30 lexs = lexline.split() 31 lexline = lextxt.readline()
32 33
34 - def get_features_tgt(self, simplesentence, parallelsentence):
35 sline = parallelsentence.get_source().get_string() 36 tline = simplesentence.get_string() 37 ibm1score = self.get_ibm1score(sline, tline) 38 attributes = {'ibm1' : "%.4f" % ibm1score} 39 return attributes
40 41
42 - def get_ibm1score(self, sline, tline):
43 44 swords = sline.split() 45 twords = tline.split() 46 lex = self.lex 47 #tsScore = 1.0 48 #logtsScore = 0.0 49 50 logtsScore = -len(twords)*math.log10(1+len(swords)) 51 52 for tword in twords: 53 nullpair = "NULL "+tword 54 if lex.has_key(nullpair): 55 sScore = float(lex[nullpair]) 56 else: 57 sScore = 0.0 58 for sword in swords: 59 wordpair = sword+" "+tword 60 if lex.has_key(wordpair): 61 sScore += float(lex[wordpair]) 62 63 #tsScore *= sScore 64 if sScore==0: 65 logtsScore += -10.0 66 else: 67 logtsScore += math.log10(sScore) 68 69 return logtsScore
70