1 '''
2 Created on 17 Sept 2011
3
4 @author: Eleftherios Avramidis based on code from Maja Popovic, David Vilar
5 '''
6
7 import sys
8 import gzip
9 import math
10 from featuregenerator import FeatureGenerator
11
13 '''
14 Provides features generation from IBM Model 1 (See Popovic et. al 2011)
15 '''
16
17
19 '''
20 Load the lexicon into a dict
21 '''
22 lextxt = open(lexicon_filename, 'r')
23 self.lex = {}
24
25 lexline = lextxt.readline()
26 lexs = lexline.split()
27
28 while lexline:
29 self.lex[lexs[0]+" "+lexs[1]]=lexs[2]
30 lexs = lexline.split()
31 lexline = lextxt.readline()
32
33
40
41
43
44 swords = sline.split()
45 twords = tline.split()
46 lex = self.lex
47
48
49
50 logtsScore = -len(twords)*math.log10(1+len(swords))
51
52 for tword in twords:
53 nullpair = "NULL "+tword
54 if lex.has_key(nullpair):
55 sScore = float(lex[nullpair])
56 else:
57 sScore = 0.0
58 for sword in swords:
59 wordpair = sword+" "+tword
60 if lex.has_key(wordpair):
61 sScore += float(lex[wordpair])
62
63
64 if sScore==0:
65 logtsScore += -10.0
66 else:
67 logtsScore += math.log10(sScore)
68
69 return logtsScore
70