1 '''
2 Created on Sep 6, 2011
3
4 @author: Eleftherios Avramidis
5 '''
6
7 from nltk.tokenize.punkt import PunktWordTokenizer
8
10
11 hypothesis = PunktWordTokenizer().tokenize(hypothesis)
12 reference = PunktWordTokenizer().tokenize(reference)
13 return levenshtein(hypothesis, reference)
14
16 """
17 source: wikibooks
18 """
19
20
21 if len(s1) < len(s2):
22 return levenshtein(s2, s1)
23 if not s1:
24 return len(s2)
25
26 previous_row = xrange(len(s2) + 1)
27 for i, c1 in enumerate(s1):
28 current_row = [i + 1]
29 for j, c2 in enumerate(s2):
30 insertions = previous_row[j + 1] + 1
31 deletions = current_row[j] + 1
32 substitutions = previous_row[j] + (c1 != c2)
33 current_row.append(min(insertions, deletions, substitutions))
34 previous_row = current_row
35
36 return previous_row[-1]
37