Package util :: Module paralleltemplate
[hide private]
[frames] | no frames]

Source Code for Module util.paralleltemplate

  1  ''' 
  2  Created on May 31, 2011 
  3   
  4  @author: Eleftherios Avramidis 
  5  ''' 
  6   
  7  # To install library in ubuntu run  
  8  # sudo apt-get install python-stdeb 
  9  # sudo pyp-install pprocess 
 10  import pprocess 
 11  from sentence.parallelsentence import ParallelSentence 
 12  from sentence.sentence import SimpleSentence 
 13  from featuregenerator.featuregenerator import FeatureGenerator 
 14   
15 -class ParallelTemplate(object):
16 """ 17 classdocs 18 """ 19
20 - def __init__(self):
21 """ 22 Constructor 23 """
24
25 -def taketime(string):
26 for i in range(2000000): 27 pass 28 print "done: " + string 29 return string
30 31
32 -class BerkeleyFeatureGenerator(FeatureGenerator):
33 - def __init__(self):
34 self.attributes = []
35 - def get_features_tgt(self, sentence, ps):
36 return {"berkeley" : taketime("BerkeleyFeatureGenerator")}
37 - def get_features_parallelsentence(self, ps):
38 return {"parallelsentenceberkeley" : taketime( 39 "parallelsentenceBerkeleyFeatureGenerator")}
40
41 -class ParseMatchFeatureGenerator(FeatureGenerator):
42 - def get_features_tgt(self, sentence, ps):
43 return {"parseMatch" : taketime("ParseMatchFeatureGenerator")}
44 - def get_features_parallelsentence(self, ps):
45 return {"parallelsentenceparseMatch" : taketime( 46 "parallelsentenceParseMatchFeatureGenerator")}
47
48 -class LengthFeatureGenerator(FeatureGenerator):
49 - def get_features_tgt(self, sentence, ps):
50 return {"length" : taketime("LengthFeatureGenerator")}
51 - def get_features_parallelsentence(self, ps):
52 return {"parallelsentencelength" : taketime( 53 "parallelsentenceLengthFeatureGenerator")}
54
55 -class SRILMFeatureGenerator(FeatureGenerator):
56 - def get_features_tgt(self, sentence, ps):
57 return {"SRILM" : sentence.get_attribute("system")}
58 - def get_features_parallelsentence(self, ps):
59 return {"parallelsentenceSRILM" : taketime( 60 "parallelsentenceSRILMFeatureGenerator")}
61
62 -class RatioGenerator(FeatureGenerator):
63 - def get_features_tgt(self, sentence, ps):
64 return {"ratio" : taketime("RatioGenerator")}
65 - def get_features_parallelsentence(self, ps):
66 return {"parallelsentenceratio" : taketime( 67 "parallelsentenceRatioGenerator")}
68
69 -class DiffGenerator(FeatureGenerator):
70 - def get_features_tgt(self, sentence, ps):
71 return {"diff" : taketime("DiffGenerator")}
72 - def get_features_parallelsentence(self, ps):
73 return {"parallelsentencediff" : taketime( 74 "parallelsentenceDiffGenerator")}
75 76
77 -def run_serial(args, sentence):
78 """ 79 Add features to the object of parallel sentence. 80 @param sentence: Object of ParallelSentence() 81 @type sentence: sentence.parallelsentence.ParallelSentence 82 @return: Object of ParallelSentence() with added features 83 @rtype: sentence.parallelsentence.ParallelSentence 84 """ 85 for arg in args: 86 sentence = arg.add_features_parallelsentence(arg(), sentence) 87 return sentence
88 89
90 -def run_parallel(parallelizedSeries, ps, nproc):
91 """ 92 Parallelize the execution of classes that can be executed parallel. It distributes the processes on more CPUs. 93 @param parallelizedSeries: list of lists of classes that can be executed parallel 94 @type parallelizedSeries: list 95 @param ps: empty object of parallel sentence 96 @type ps: sentence.parallelsentence.ParallelSentence 97 @param nproc: number of available CPUs 98 @type nproc: int 99 """ 100 results = pprocess.Map(limit=nproc, reuse=1) 101 parallel_function = results.manage(pprocess.MakeReusable(run_serial)) 102 ps0 = ps 103 n = 0 104 for parallelized in parallelizedSeries: 105 for args in parallelized: 106 parallel_function(args, ps) 107 # waits until the iteration with parallel processes is finished 108 parallel_results = results[0+n:len(parallelized)+n] 109 n = len(parallelized) 110 111 # merge the attributes 112 for ps in parallel_results: 113 ps0.merge_parallelsentence(ps)
114 # example in http://www.astrobetter.com/parallel-processing-in-python/ 115 116 117 if __name__ == '__main__': 118 119 # define series of tasks. Tasks in every generatorseries list will be 120 # applied one after the other 121 generatorseries1 = [BerkeleyFeatureGenerator, ParseMatchFeatureGenerator] 122 generatorseries2 = [LengthFeatureGenerator] 123 generatorseries3 = [SRILMFeatureGenerator] 124 125 # define parallel tasks. Tasks in a parallelize lists will be run 126 # in parallel 127 parallelized1 = [generatorseries1, generatorseries2, generatorseries3] 128 129 # serialized tasks 130 generatorseries4 = [RatioGenerator] 131 generatorseries5 = [DiffGenerator] 132 133 # parallelized tasks 134 parallelized2 = [generatorseries4, generatorseries5] 135 136 # the second list of parallelized tasks, has to be run in parallel AFTER 137 # the first list of parallelized tasks has finished 138 parallelizedSeries = [parallelized1, parallelized2] 139 140 src = SimpleSentence("I am a student") 141 tgt1 = SimpleSentence("Ich bin ein Student") 142 tgt1.add_attribute("system", "moses") 143 tgt2 = SimpleSentence("Ich bin Student") 144 tgt2.add_attribute("system", "lucy") 145 ps = ParallelSentence(src, [tgt1, tgt2]) 146 147 nproc = 2 # maximum number of simultaneous processes desired 148 149 run_parallel(parallelizedSeries, ps, nproc) 150