Package test :: Module coupleddataset_test
[hide private]
[frames] | no frames]

Source Code for Module test.coupleddataset_test

 1  ''' 
 2  Created on 27 Feb 2012 
 3   
 4  @author: Eleftherios Avramidis 
 5  ''' 
 6  import unittest 
 7  from dataprocessor.input.jcmlreader import JcmlReader 
 8  from sentence.coupleddataset import CoupledDataSet, OrangeCoupledDataSet, CoupledDataSetDisk 
 9  from dataprocessor.sax.saxps2jcml import Parallelsentence2Jcml 
10   
11   
12 -class CoupledDataSetTest(unittest.TestCase):
13
14 - def setUp(self):
15 self.input_file = "/home/Eleftherios Avramidis/taraxu_data/wmt12/qe/training_set/training-sample.jcml" 16 self.output_file = "/home/Eleftherios Avramidis/taraxu_data/wmt12/qe/training_set/training-sample.coupled.jcml" 17 self.simple_dataset = JcmlReader(self.input_file).get_dataset() 18 self.coupled_dataset = CoupledDataSet(construct = self.simple_dataset)
19 20
21 - def test_coupling(self):
22 coupled_parallelsentences = self.coupled_dataset.get_parallelsentences() 23 n = len(self.simple_dataset.get_parallelsentences()) 24 m = len(coupled_parallelsentences) 25 self.assertEqual(m, n*(n-1)/2, "The number of the couples generated is not the proper one") 26 Parallelsentence2Jcml(coupled_parallelsentences).write_to_file(self.output_file)
27
28 - def test_decoupling(self):
29 decoupled_dataset = self.coupled_dataset.get_single_set() 30 n = len(self.simple_dataset.get_parallelsentences()) 31 m = len(decoupled_dataset.get_parallelsentences()) 32 self.assertEqual(m, n, "Coupling and decoupling doesn't regenerate same number of sentences as in input") 33 Parallelsentence2Jcml(decoupled_dataset.get_parallelsentences()).write_to_file(self.output_file.replace("jcml", "decoupled.jcml"))
34
35 - def test_ondisk_vs_onmemory(self):
36 pass
37 # Parallelsentence2Jcml(self.coupled_dataset.get_parallelsentences()).write_to_file(self.output_file.replace("jcml", "memory.jcml")) 38 # coupledfile_disk = self.output_file.replace("jcml", "disk.jcml") 39 # coupledfile_memory = self.output_file.replace("jcml", "memory.jcml") 40 # CoupledDataSetDisk(self.simple_dataset).write(coupledfile_disk) 41 # coupled_dataset = CoupledDataSet(readfile = coupledfile_disk) 42 # Parallelsentence2Jcml(self.coupled_dataset).write_to_file(coupledfile_memory) 43 # self.assertEqual(self.coupled_dataset, coupled_dataset) 44 45 46 47 48 49 50 if __name__ == "__main__": 51 #import sys;sys.argv = ['', 'Test.testName'] 52 unittest.main() 53