1 """
2 @author Eleftherios Avramidis based on original by Maja Popovic
3 """
4
5
6
7 import sys
8 import gzip
9 from nltk.tokenize.punkt import PunktWordTokenizer
10
11
12 sent = False
13
14
16 - def __init__(self, rpos=0, hpos=0, error=0):
17 self.rpos = rpos
18 self.hpos = hpos
19 self.error = error
20
21
22 -def wer(hypWords, refs):
23
24 if isinstance(hypWords, str):
25 hypWords = PunktWordTokenizer().tokenize(hypWords)
26 if isinstance(refs, str):
27 refs = PunktWordTokenizer().tokenize(refs)
28
29 totalHypLength = 0.0
30 totalRefLength = 0.0
31
32 totalWerCount = 0.0
33
34 nsent = 0
35
36
37 p = (0,0)
38
39 Q = {}
40 Q[p] = 0
41
42 B = {}
43 B[p] = levNode(0, 0, 0)
44
45
46
47
48
49
50
51 nsent += 1
52
53 minWer = 1000
54 bestRefLength = 0.0
55 bestSentWerCount = 0.0
56
57
58 maxLength=[]
59
60
61
62
63
64 totalHypLength += len(hypWords)
65
66
67
68
69 hyp = {}
70
71 i=1
72
73 while i <= len(hypWords):
74 hyp[i] = hypWords[i-1]
75 i+=1
76
77
78
79 nref = 0
80
81 for refWords in refs:
82
83
84
85
86 i=1
87
88 ref={}
89
90 while i <= len(refWords):
91 ref[i]=refWords[i-1]
92 i+=1
93
94
95
96
97 if len(refWords) > len(hypWords):
98 maxLength.append(len(refWords))
99 else:
100 maxLength.append(len(hypWords))
101
102
103
104
105
106 for nh in range(0, len(hyp)+1):
107 p = (0, nh)
108 Q[p]=nh
109 B[p] = levNode(0, nh-1, 3)
110
111
112
113
114 for nr in range(0, len(ref)+1):
115 p = (nr, 0)
116 Q[p]=nr
117 B[p] = levNode(nr-1, 0, 2)
118
119
120 p = (0, 0)
121 B[p] = levNode(-1, -1, -1)
122
123 p = (1, 0)
124 B[p] = levNode(0, 0, 2)
125
126 p = (0, 1)
127 B[p] = levNode(0, 0, 3)
128
129
130
131
132 for r in ref.keys():
133 for h in hyp.keys():
134 minQ = 1000
135 p = (r, h)
136 dp = (r-1, h)
137 ip = (r, h-1)
138 sp = (r-1, h-1)
139
140
141
142 s = 0
143 if hyp[h] != ref[r]:
144 s = 1
145 else:
146 s = 0
147
148 if Q[sp]+s < minQ:
149 minQ = Q[sp]+s
150 B[p] = levNode(r-1, h-1, s)
151
152 if Q[dp]+1 < minQ:
153 minQ = Q[dp]+1
154 B[p] = levNode(r-1, h, 2)
155
156 if Q[ip]+1 < minQ:
157 minQ = Q[ip]+1
158 B[p] = levNode(r, h-1, 3)
159
160 Q[p] = minQ
161
162
163
164
165
166
167
168
169 sentWerCount = 0.0
170
171 l = maxLength[nref]
172
173
174
175
176
177 p = (len(refWords), len(hypWords))
178
179 err = B[p].error
180
181
182 if err == 1 or err == 2 or err == 3:
183 sentWerCount+=1
184
185
186
187 rp = B[p].rpos
188 hp = B[p].hpos
189
190
191
192
193 while hp >= 0 and rp >= 0:
194 p1 = (rp, hp)
195 err = B[p1].error
196
197
198 if err == 1 or err == 2 or err ==3:
199 sentWerCount+=1
200
201 l -= 1
202
203 hp = B[p1].hpos
204 rp = B[p1].rpos
205
206
207
208
209
210 rLen = 0.00000001
211 if len(refWords) > 0:
212 rLen = len(refWords)
213
214 sentWer = sentWerCount/rLen
215 if sentWer < minWer:
216 minWer = sentWer
217 bestRefLength = len(refWords)
218 bestSentWerCount = sentWerCount
219
220 nref += 1
221
222 Q.clear()
223 B.clear()
224
225
226
227 totalRefLength += bestRefLength
228
229 totalWerCount += bestSentWerCount
230
231
232
233
234 return minWer
235