Package support :: Package preprocessing :: Package jcml :: Module splitjcml
[hide private]
[frames] | no frames]

Source Code for Module support.preprocessing.jcml.splitjcml

 1  ''' 
 2  Created on 24 Oct 2012 
 3   
 4  @author: Eleftherios Avramidis 
 5  ''' 
 6   
 7  from dataprocessor.sax.saxps2jcml import Parallelsentence2Jcml 
 8  from dataprocessor.input.jcmlreader import JcmlReader 
 9  from sys import argv 
10  import sys 
11   
12 -def split_dataset_files(source_xml_file, target_file_1, target_file_2, ratio=0.1):
13 sys.stderr.write("Reading source file %s ...\n"% source_xml_file) 14 source_dataset = JcmlReader(source_xml_file).get_dataset() 15 dataset_part1, dataset_part2 = source_dataset.split(ratio) 16 sys.stderr.write("Writing first target file %s ...\n"% target_file_1) 17 Parallelsentence2Jcml(dataset_part1).write_to_file(target_file_1) 18 sys.stderr.write("Writing second target file %s ...\n"% target_file_2) 19 Parallelsentence2Jcml(dataset_part2).write_to_file(target_file_2)
20 21 if __name__ == '__main__': 22 23 ratio = 0.1 24 if len(argv) == 5: 25 ratio = float(argv[4]) 26 split_dataset_files(argv[1], argv[2], argv[3], ratio) 27