1
2
3 '''
4 This script provides
5 (a) the class that wraps the functionality of the ranking mechanism
6 (b) a command-line interactive interface for testing installation
7
8 Created on 2 Aug 2013
9
10 @author: Eleftherios Avramidis
11 '''
12
13 import time
14 import sys
15
16 from featuregenerator.parser.berkeley.berkeleyclient import BerkeleySocketFeatureGenerator
17 from sentence.sentence import SimpleSentence
18
19 from ml.lib.orange import OrangeRuntimeRanker
20 from sentence.parallelsentence import ParallelSentence
21
22 from bootstrap import ExperimentConfigParser
23 from featuregenerator.parser.berkeley.parsermatches import ParserMatches
24 from featuregenerator.lengthfeaturegenerator import LengthFeatureGenerator
25 from featuregenerator.meteor.meteor import CrossMeteorGenerator
26 from featuregenerator.preprocessor import Normalizer
27 from featuregenerator.preprocessor import Tokenizer
28 from featuregenerator.preprocessor import Truecaser
29
30 from py4j.java_gateway import GatewayClient, JavaGateway
31
32
33
35 """
36 A class that demonstrates the use of simple ranking pipeline. It provides
37 the function 'parse' that receives source and translation strings and
38 returns a ranked list
39 @ivar featuregenerators: List of initialized feature generator objects in the order that will be used
40 @type featuregenerators: [featuregenerator.featuregenerator.FeatureGenerator, ...]
41 @ivar ranker: Machine Learning class that handles ranking of items
42 @type ranker: ml.lib.orange
43 @ivar source_language: Language code for source language
44 @type source_language: str
45 @ivar target_language: Language code for target language
46 @type target_language: str
47 """
48 - def __init__(self, configfilenames, classifiername):
67
68
69 - def rank(self, source, translations):
96
97
107
108
110 for parser_name in [section for section in cfg.sections() if section.startswith("parser:")]:
111 if cfg.get(parser_name, "language") == language:
112 grammarfile = cfg.get(parser_name, "grammarfile")
113 sys.stderr.write("initializing socket parser with grammar file {}\n".format(grammarfile))
114 return BerkeleySocketFeatureGenerator(language, grammarfile, self.gateway)
115
117 java_classpath, dir_path = cfg.get_classpath()
118
119 if java_classpath:
120
121
122 socket_no = self.jvm.socket_no
123 self.gatewayclient = GatewayClient('localhost', socket_no)
124 self.gateway = JavaGateway(self.gatewayclient, auto_convert=True, auto_field=True)
125 sys.stderr.write("Initialized global Java gateway with pid {} in socket {}\n".format(self.jvm.pid, socket_no))
126 return self.gateway
127
128
130 """
131 Initialize the featuregenerators that handle superficial analysis of given translations
132 @param cfg: the loaded configuration object
133 """
134 source_language = cfg.get("general", "source_language")
135 target_language = cfg.get("general", "target_language")
136
137 src_parser = cfg.get_parser(source_language)
138 tgt_parser = cfg.get_parser(target_language)
139
140 langpair = (source_language, target_language)
141
142 featuregenerators = [
143 Normalizer(source_language),
144 Normalizer(target_language),
145 Tokenizer(source_language),
146 Tokenizer(target_language),
147
148 src_parser,
149 tgt_parser,
150
151 ParserMatches(langpair),
152
153
154 Truecaser(source_language, cfg.get_truecaser_model(source_language)),
155 Truecaser(target_language, cfg.get_truecaser_model(target_language)),
156
157 cfg.get_lm(source_language),
158 cfg.get_lm(target_language),
159
160 CrossMeteorGenerator(target_language, cfg.get_classpath()[0], cfg.get_classpath()[1]),
161 LengthFeatureGenerator()
162 ]
163
164 return featuregenerators
165
166
167 if __name__ == "__main__":
168 try:
169 classifier_filename = sys.argv[1]
170 configfilenames = sys.argv[2:]
171 except:
172 sys.exit("Syntax: python application.py <classifier_filename> <pipeline.config.1> [<pipeline.config.2> ...]")
173
174
175
176
177
178
179
180 autoranker = Autoranking(configfilenames, classifier_filename)
181
182 while 1==1:
183 source = raw_input("Source sentence (or 'exit') > ")
184 if source == "exit":
185 sys.exit("Exit requested")
186 doexit = False
187 i = 0
188 translations = []
189 while 1==1:
190 i+=1
191 translation = raw_input("Translation (or empty to continue) > ")
192 if translation!="":
193 translations.append(translation)
194 else:
195 break
196
197 result, description = autoranker.rank(source, translations)
198 print description
199 print "The right order of the given sentences is ", result
200