Package featuregenerator :: Package levenshtein :: Module levenshtein
[hide private]
[frames] | no frames]

Source Code for Module featuregenerator.levenshtein.levenshtein

 1  ''' 
 2  Created on Sep 6, 2011 
 3   
 4  @author: Eleftherios Avramidis 
 5  ''' 
 6   
 7  from nltk.tokenize.punkt import PunktWordTokenizer 
 8   
9 -def levenshtein_tok(hypothesis, reference):
10 11 hypothesis = PunktWordTokenizer().tokenize(hypothesis) 12 reference = PunktWordTokenizer().tokenize(reference) 13 return levenshtein(hypothesis, reference)
14
15 -def levenshtein(s1, s2):
16 """ 17 source: wikibooks 18 """ 19 20 21 if len(s1) < len(s2): 22 return levenshtein(s2, s1) 23 if not s1: 24 return len(s2) 25 26 previous_row = xrange(len(s2) + 1) 27 for i, c1 in enumerate(s1): 28 current_row = [i + 1] 29 for j, c2 in enumerate(s2): 30 insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer 31 deletions = current_row[j] + 1 # than s2 32 substitutions = previous_row[j] + (c1 != c2) 33 current_row.append(min(insertions, deletions, substitutions)) 34 previous_row = current_row 35 36 return previous_row[-1]
37