Package featuregenerator :: Package parser :: Package berkeley :: Package socket :: Module berkeleyparsersocket
[hide private]
[frames] | no frames]

Source Code for Module featuregenerator.parser.berkeley.socket.berkeleyparsersocket

  1  ''' 
  2  Created on Sep 21, 2011 
  3   
  4  @author: jogin 
  5  ''' 
  6   
  7  from py4j.java_gateway import JavaGateway 
  8  from py4j.java_gateway import GatewayClient 
  9  from py4j.java_gateway import java_import 
 10   
 11   #@UnresolvedImport 
 12   
 13  import subprocess 
 14  import time 
 15  import os 
 16  import sys 
 17  import random 
 18  import signal 
 19  from util.jvm import JVM 
 20   
 21   
 22  #def handler(self, signum): 
 23  #    sys.stderr.write("Parsing timeout\n") 
 24  #    raise Exception("parse_timeout") 
 25   
26 -class BerkeleyParserSocket():
27 """ 28 A flexible wrapper for the Berkeley parser. It starts the Berkeley parser as an object 29 which can be called as a Python object. It requires presence of external java libraries. 30 The advantage of this class (e.g. vs XMLRPC) is that it can fully control starting and 31 stopping the parsing engine within Python code. 32 """ 33 34 # def __init__(self, grammarfile, classpath):
35 - def __init__(self, grammarfile, gateway):
36 """ 37 fetches full parsing details from the Berkeley Engine and calculates full features upon request 38 @param grammarfile: Location of grammar file to be loaded 39 @type grammarfile: string 40 @param gateway: Initialize a java gateway 41 @type gateway: Py4J java gateway object 42 """ 43 self.grammarfile = grammarfile 44 45 bparser_class = os.path.dirname(__file__) 46 dir_socket = os.path.dirname(bparser_class) 47 dir_berkeley = os.path.dirname(dir_socket) 48 dir_parser = os.path.dirname(dir_berkeley) 49 ####MODIFIED FOR USE WITH COMMANDLINE THING CHECK IF RUFFUS VERSION FAILS 50 dir_src = os.path.dirname(dir_parser) 51 # dir_featuregenerator = os.path.dirname(dir_parser) 52 # dir_src = os.path.dirname(dir_featuregenerator) 53 dir_lib = os.path.join(dir_src, "support", "berkeleyserver", "lib") 54 55 print "Berkeley directory:" ,dir_lib 56 57 #self.classpath = [] 58 #self.classpath.append(dir_lib) 59 #self.classpath.append(bparser_class) 60 #print "final classpath " , self.classpath 61 # print "initializing Berkeley client" 62 ## try: 63 # # connect to the JVM 64 # classpath, dir_path = classpath 65 # 66 #since code ships without compiled java, we run this command to make sure that the necessary java .class file is ready 67 #subprocess.check_call(["javac", "-classpath", classpath, "%s/JavaServer.java" % dir_path]) 68 # 69 # # prepare and run Java server 70 # #cmd = "java -cp %s:%s:%s JavaServer" % (berkeley_parser_jar, py4j_jar, dir_path) 71 # cmd = ["java", "-cp", classpath, "JavaServer" ] 72 # cmd = " ".join(cmd) 73 # 74 # self.jvm = subprocess.Popen(cmd, shell=True, bufsize=0, stdout=subprocess.PIPE) #shell=True, 75 # self.jvm.stdout.flush() 76 # socket_no = int(self.jvm.stdout.readline().strip()) 77 # sys.stderr.write("Received socket number {0} from Java Server".format(socket_no)) 78 # self.socket = GatewayClient('localhost', socket_no) 79 # sys.stderr.write("Started java process with pid {} in socket {}".format(self.jvm.pid, socket_no)) 80 # 81 # gateway = JavaGateway(self.socket) 82 83 # except: 84 # self._reconnect(berkeley_parser_jar, py4j_jar) 85 self.parsername = random.randint(1,10000) 86 self._connect(gateway, grammarfile)
87 88 89
90 - def _connect(self, gateway, grammarfile):
91 module_view = gateway.new_jvm_view() 92 java_import(module_view, 'BParser') 93 94 # get the application instance 95 self.bp_obj = module_view.BParser(grammarfile) 96 sys.stderr.write("got BParser object\n")
97 98 99 # def _reconnect(self, berkeley_parser_jar, py4j_jar): 100 # #define running directory 101 # path = os.path.abspath(__file__) 102 # dir_path = os.path.dirname(path) 103 # 104 # #since code ships without compiled java, we run this command to make sure that the necessary java .class file is ready 105 # subprocess.check_call(["javac", "-classpath", "%s:%s:%s" % (berkeley_parser_jar, py4j_jar, dir_path), "%s/JavaServer.java" % dir_path]) 106 # 107 # 108 # # prepare and run Java server 109 # #cmd = "java -cp %s:%s:%s JavaServer" % (berkeley_parser_jar, py4j_jar, dir_path) 110 # cmd = ["java", "-cp", "%s:%s:%s" % (self.berkeley_parser_jar, py4j_jar, dir_path), "JavaServer" ] 111 # self.process = subprocess.Popen(cmd, close_fds=True) #shell=True, 112 # sys.stderr.write("Started java process with pid %d\n" % self.process.pid) 113 # 114 # # wait so that server starts 115 # time.sleep(2) 116 # self.gateway = JavaGateway() 117 # bpInstance = self.gateway.entry_point 118 # 119 # # call the method get_BP_obj() in java 120 # self.bp_obj = bpInstance.get_BP_obj(self.grammarfile) 121
122 - def parse(self, sentence_string):
123 """ 124 It calls the parse function on BParser object. 125 """ 126 127 # call the python function parse() on BParser object 128 # try: 129 sys.stderr.write("<p process='{0}' string='{1}'>\n".format(self.parsername, sentence_string)) 130 131 # signal.signal(signal.SIGALRM, handler) 132 # signal.alarm(20) 133 parseresult = None 134 # while not parseresult: 135 # try: 136 parseresult = self.bp_obj.parse(sentence_string) 137 # except: 138 # sys.stderr.write("Connection failed. Retrying ...") 139 # time.sleep(5) 140 141 # except Exception, exc: 142 # sys.stderr.write("Exception: {0}\n".format(exc)) 143 # parseresult = {} 144 145 146 # except: 147 # self._reconnect(self.berkeley_parser_jar, self.py4_jar) 148 # parseresult = self.bp_obj.parse(sentence_string) 149 # sys.stderr.write("{0} crashed, restarting object".format(self.parsername)) 150 sys.stderr.write("<\p process='{0}' string='{1}'>\n".format(self.parsername, sentence_string)) 151 152 return parseresult
153 154
155 - def parse_msg(self, sentence_string):
156 return self.bp_obj.parse(sentence_string)
157 158 159 # def __del__(self): 160 # self.jvm.terminate() 161 # 162 # def __del__(self): 163 # self.gateway.deluser() 164 # 165 # if self.gateway.getusers() == 0: 166 # self.gateway.shutdown() 167 # try: 168 # self.process.terminate() 169 # except: 170 # pass 171 172 173 # def __del__(self): 174 # """ 175 # Java server is terminated from here. 176 # """ 177 # self.bp_obj = None 178 179 180 181 # def __del__(self): 182 # """ 183 # Destroy object when object unloaded or program exited 184 # """ 185 # self.gateway.shutdown() 186 # #sys.stderr.write( "trying to close process %d\n" % self.process.pid) 187 # self.process.terminate() 188 189 190 if __name__ == "__main__": 191 java_classpath = ["/home/Eleftherios Avramidis/.local/share/py4j/py4j0.7.jar:/home/Eleftherios Avramidis/tools/qualitative/src/support/berkeleyserver/lib/BerkeleyParser.jar:/home/Eleftherios Avramidis/tools/qualitative/src/featuregenerator/parser/berkeley/socket"] 192 dir_path = "/home/Eleftherios Avramidis/workspace/qualitative/src/util" 193 jvm = JVM(java_classpath) 194 socket_no = jvm.socket_no 195 gatewayclient = GatewayClient('localhost', socket_no) 196 gateway = JavaGateway(gatewayclient, auto_convert=True, auto_field=True) 197 198 bps = BerkeleyParserSocket("/home/Eleftherios Avramidis/tools/berkeleyparser/grammars/eng_sm6.gr", gateway) 199 #bps2 = BerkeleyParserSocket("/home/Eleftherios Avramidis/taraxu_tools/berkeleyParser/grammars/eng_sm6.gr", "/home/Eleftherios Avramidis/workspace/TaraXUscripts/src/support/berkeley-server/lib/BerkeleyParser.jar", "/usr/share/py4j/py4j0.7.jar") 200 #print bps2.parse("This is a sentence") 201 #bps2.close() 202 print bps.parse("This is another sentence") 203 #bps.close() 204