Trees | Indices | Help |
|
---|
|
1 ''' 2 Created on 27 Feb 2012 3 4 @author: Eleftherios Avramidis 5 ''' 6 import unittest 7 from dataprocessor.input.jcmlreader import JcmlReader 8 from sentence.coupleddataset import CoupledDataSet, OrangeCoupledDataSet, CoupledDataSetDisk 9 from dataprocessor.sax.saxps2jcml import Parallelsentence2Jcml 10 111337 # Parallelsentence2Jcml(self.coupled_dataset.get_parallelsentences()).write_to_file(self.output_file.replace("jcml", "memory.jcml")) 38 # coupledfile_disk = self.output_file.replace("jcml", "disk.jcml") 39 # coupledfile_memory = self.output_file.replace("jcml", "memory.jcml") 40 # CoupledDataSetDisk(self.simple_dataset).write(coupledfile_disk) 41 # coupled_dataset = CoupledDataSet(readfile = coupledfile_disk) 42 # Parallelsentence2Jcml(self.coupled_dataset).write_to_file(coupledfile_memory) 43 # self.assertEqual(self.coupled_dataset, coupled_dataset) 44 45 46 47 48 49 50 if __name__ == "__main__": 51 #import sys;sys.argv = ['', 'Test.testName'] 52 unittest.main() 5315 self.input_file = "/home/Eleftherios Avramidis/taraxu_data/wmt12/qe/training_set/training-sample.jcml" 16 self.output_file = "/home/Eleftherios Avramidis/taraxu_data/wmt12/qe/training_set/training-sample.coupled.jcml" 17 self.simple_dataset = JcmlReader(self.input_file).get_dataset() 18 self.coupled_dataset = CoupledDataSet(construct = self.simple_dataset)19 2022 coupled_parallelsentences = self.coupled_dataset.get_parallelsentences() 23 n = len(self.simple_dataset.get_parallelsentences()) 24 m = len(coupled_parallelsentences) 25 self.assertEqual(m, n*(n-1)/2, "The number of the couples generated is not the proper one") 26 Parallelsentence2Jcml(coupled_parallelsentences).write_to_file(self.output_file)2729 decoupled_dataset = self.coupled_dataset.get_single_set() 30 n = len(self.simple_dataset.get_parallelsentences()) 31 m = len(decoupled_dataset.get_parallelsentences()) 32 self.assertEqual(m, n, "Coupling and decoupling doesn't regenerate same number of sentences as in input") 33 Parallelsentence2Jcml(decoupled_dataset.get_parallelsentences()).write_to_file(self.output_file.replace("jcml", "decoupled.jcml"))34
Trees | Indices | Help |
|
---|
Generated by Epydoc 3.0.1 on Fri Jul 18 11:46:15 2014 | http://epydoc.sourceforge.net |