Package featuregenerator :: Package languagechecker :: Module languagetool_socket
[hide private]
[frames] | no frames]

Source Code for Module featuregenerator.languagechecker.languagetool_socket

 1  ''' 
 2  Created on 30 Mar 2012 
 3   
 4  @author: Eleftherios Avramidis 
 5  ''' 
 6  from py4j.java_gateway import JavaGateway 
 7  from py4j.java_gateway import GatewayClient 
 8  from py4j.java_gateway import java_import 
 9  from featuregenerator.languagefeaturegenerator import LanguageFeatureGenerator 
10  import os, subprocess, sys 
11   
12   
13 -class LanguageToolSocketFeatureGenerator(LanguageFeatureGenerator):
14 ''' 15 classdocs 16 ''' 17 18 19 # def __init__(self, lang, classpath):
20 - def __init__(self, lang, gateway):
21 ''' 22 Constructor 23 ''' 24 self.lang = lang 25 26 # classpath, dir_path = classpath 27 # 28 # #since code ships without compiled java, we run this command to make sure that the necessary java .class file is ready 29 # subprocess.check_call(["javac", "-classpath", classpath, "%s/JavaServer.java" % dir_path]) 30 # 31 # # prepare and run Java server 32 # #cmd = "java -cp %s:%s:%s JavaServer" % (berkeley_parser_jar, py4j_jar, dir_path) 33 # cmd = ["java", "-cp", classpath, "JavaServer" ] 34 # cmd = " ".join(cmd) 35 # 36 # self.jvm = subprocess.Popen(cmd, shell=True, bufsize=0, stdout=subprocess.PIPE) #shell=True, 37 # self.jvm.stdout.flush() 38 # socket_no = int(self.jvm.stdout.readline().strip()) 39 # self.socket = GatewayClient('localhost', socket_no) 40 # sys.stderr.write("Started java process with pid {} in socket {}".format(self.jvm.pid, socket_no)) 41 # 42 # 43 # gatewayclient = self.socket 44 # gateway = JavaGateway(gatewayclient) 45 46 ltool_view = gateway.new_jvm_view() 47 java_import(ltool_view, 'org.languagetool.*') 48 49 tool_language = ltool_view.Language.getLanguageForShortName(lang) 50 self.ltool = ltool_view.JLanguageTool(tool_language) 51 self.ltool.activateDefaultPatternRules();
52 53
54 - def get_features_string(self, string):
55 atts = {} 56 matches = self.ltool.check(string) 57 errors = 0 58 total_error_chars = 0 59 for match in matches: 60 error_id = "lt_{}".format(match.getRule().getId()) 61 try: 62 atts[error_id] += 1 63 except KeyError: 64 atts[error_id] = 1 65 errors += 1 66 67 error_chars = match.getEndColumn() - match.getColumn() 68 error_chars_id = "lt_{}_chars".format(error_id) 69 try: 70 atts[error_chars_id] += error_chars 71 except KeyError: 72 atts[error_chars_id] = error_chars 73 total_error_chars += error_chars 74 75 #make every value a string 76 for k,v in atts.iteritems(): 77 atts[k] = str(v) 78 79 atts["lt_errors"] = str(errors) 80 atts["lt_errors_chars"] = str(total_error_chars) 81 82 return atts
83 84 85 # def __del__(self): 86 # self.jvm.terminate() 87