Package support :: Package preprocessing :: Package jcml :: Module wmt12baselinefeatures2jcml
[hide private]
[frames] | no frames]

Source Code for Module support.preprocessing.jcml.wmt12baselinefeatures2jcml

 1  ''' 
 2  Created on 1 Mar 2012 
 3   
 4  @author: Eleftherios Avramidis 
 5  ''' 
 6   
 7  import sys 
 8  from dataprocessor.sax.saxps2jcml import Parallelsentence2Jcml 
 9  from dataprocessor.input.jcmlreader import JcmlReader 
10  from xml.etree import ElementTree 
11   
12 -def wmt12baselinefeatures2jcml(features_description_filename, features_filename, input_jcml, output_jcml):
13 attribute_names = read_attribute_names(features_description_filename) 14 att_vector = get_attribute_vector(attribute_names, features_filename) 15 16 existing_dataset = JcmlReader(input_jcml).get_dataset() 17 existing_dataset.add_attribute_vector(att_vector, "ps") 18 Parallelsentence2Jcml(existing_dataset).write_to_file(output_jcml)
19 20
21 -def get_attribute_vector(attribute_names, features_filename):
22 f = open(features_filename, 'r') 23 att_vector = [] 24 25 for line in f: 26 values = line.split('\t') 27 atts = dict([(k, v) for k,v in zip(attribute_names, values)]) 28 att_vector.append(atts) 29 30 f.close() 31 return att_vector 32 33
34 -def read_attribute_names(features_description_filename):
35 tree = ElementTree.parse(features_description_filename) 36 feature_elements = tree.findall("feature") 37 return ["bb_{0}".format(f.get("index")) for f in feature_elements]
38 39 40 if __name__ == '__main__': 41 features_description_filename = sys.argv[1] 42 features_filename = sys.argv[2] 43 input_jcml = sys.argv[3] 44 output_jcml = sys.argv[4] 45 wmt12baselinefeatures2jcml(features_description_filename, features_filename, input_jcml, output_jcml) 46