1 '''
2 Created on Nov 16, 2011
3
4 @author: jogin
5 '''
6
7 import sys
8 sys.path.append("/home/Lukas Poustka/taraxuscripts")
9
10 from featuregenerator.levenshtein.levenshtein import levenshtein_tok
11
12
14 """
15 This script compares sentences from Open Office 2010 with sentences from Open Office 2011 (OpenOffice3) according to Levenshtein distance.
16 """
17 - def __init__(self, oldOOSnts, newOOSnts, newOOLinks='', oldOOLinks=''):
18 """
19 @param: oldOOSnts: old Open Office file with aligned sentences
20 @type: string
21 @param: newOOSnts: new Open Office file with aligned sentences
22 @type: string
23 @param: newOOLinks: new Open Office file with source links of aligned sentences
24 @type: string
25 @param: oldOOLinks: filename for saving links of sentence and source
26 @type: string
27 """
28 f = open(oldOOSnts)
29 oldSntsStr = f.read()
30 f.close()
31
32 f = open(newOOSnts)
33 newSntsStr = f.read()
34 f.close()
35
36 oldSnts = oldSntsStr.split('\n')
37 newSnts = newSntsStr.split('\n')
38
39
40
41 b = 0
42 f = open('log.txt','w')
43 for oldSnt in oldSnts:
44 a = 0
45 b += 1
46 if b < 42: continue
47 print b
48 for newSnt in newSnts:
49 if levenshtein_tok(oldSnt, newSnt) > 4: continue
50 else:
51
52 a += 1
53 print str(b), '-', str(a), ':'
54 print 'Ref:\n', oldSnt
55 print 'Candidate:\n', newSnt
56 print 'Levenshtein:', str(levenshtein_tok(oldSnt, newSnt)), '\n'
57 f.write(str(b)+' - '+str(a)+':')
58 f.write('Ref:\n'+oldSnt)
59 f.write('Candidate:\n'+newSnt)
60 f.write('Levenshtein: '+str(levenshtein_tok(oldSnt, newSnt))+'\n')
61 if b > 60: break
62 f.close()
63
64
65
66 OOLinkage('/media/DATA/Arbeit/DFKI/111102_OpenOffice/oolinkage/esde_src.c40.detok.txt', '/media/DATA/Arbeit/DFKI/111102_OpenOffice/oolinkage/de-es_tgt_detok.txt')
67
68
69
70
71
72
73
74
75