Package dataprocessor :: Package output :: Module wmt11tabwriter
[hide private]
[frames] | no frames]

Source Code for Module dataprocessor.output.wmt11tabwriter

 1  """ 
 2  @author: Eleftherios Avramidis 
 3  """ 
 4   
 5  from sentence.dataset import DataSet 
 6  import codecs 
 7   
8 -class Wmt11TabWriter(object):
9 """ 10 classdocs 11 """ 12 13
14 - def __init__(self, data=None ,name="dfki", testset="testset", att_name="rank"):
15 """ 16 Constructor 17 """ 18 self.metric_name = name 19 self.testset = testset 20 self.att_name = att_name 21 22 if isinstance(data, list): 23 self.content = None 24 self.convert_to_tab(data) 25 elif isinstance(data, DataSet): 26 self.content = None 27 self.convert_to_tab(data.get_parallelsentences())
28
29 - def convert_to_tab(self, parallelsentences):
30 """ 31 Creates an tab for the document an populates that with the (parallel) sentences of the given object. 32 Resulting tab string gets stored as a variable. 33 @param parallelsentences: a list of ParallelSentence objects 34 """ 35 36 entries = [] 37 entries.append("<METRIC NAME>\t<LANG-PAIR>\t<TEST SET>\t<SYSTEM>\t<SEGMENT NUMBER>\t<SEGMENT SCORE>") 38 39 for ps in parallelsentences: 40 ps_att = ps.get_attributes() 41 if ps_att.get("testset"): 42 testset = ps_att["testset"] 43 else: 44 testset = self.testset 45 for tgt in ps.get_translations(): 46 t_att = tgt.get_attributes() 47 entry = "\t".join([self.metric_name, "%s-%s" % (ps_att["langsrc"], ps_att["langtgt"]), testset, t_att["system"], ps_att["id"], t_att[self.att_name]]) 48 entries.append(entry) 49 50 #entries = sorted (entries, key=lambda entry: entry.split("\t")[4]) 51 self.content = "\n".join(entries)
52
53 - def write_to_file(self, filename):
54 file_object = codecs.open(filename, 'w', 'utf-8') 55 file_object.write(self.content) 56 file_object.close()
57
58 - def write_to_file_nobuffer(self, filename, parallelsentences):
59 file_object = codecs.open(filename, 'w', 'utf-8') 60 61 file_object.write("<METRIC NAME>\t<LANG-PAIR>\t<TEST SET>\t<SYSTEM>\t<SEGMENT NUMBER>\t<SEGMENT SCORE>") 62 63 for ps in parallelsentences: 64 ps_att = ps.get_attributes() 65 if ps_att.get("testset"): 66 testset = ps_att["testset"] 67 else: 68 testset = self.testset 69 for tgt in ps.get_translations(): 70 t_att = tgt.get_attributes() 71 entry = "\t".join([self.metric_name, "%s-%s" % (ps_att["langsrc"], ps_att["langtgt"]), testset, t_att["system"], ps_att["id"], t_att[self.att_name]]) 72 entry = "%s\n" % entry 73 file_object.write(entry)
74 75 #entries = sorted (entries, key=lambda entry: entry.split("\t")[4]) 76