Package ml :: Package var :: Module svmrank
[hide private]
[frames] | no frames]

Source Code for Module ml.var.svmrank

 1  ''' 
 2  Created on 29 Aug 2012 
 3   
 4  @author: Eleftherios Avramidis 
 5  ''' 
 6  import os 
 7  import subprocess 
 8  import re 
 9  import sys 
10   
11 -class SvmRank(object):
12 ''' 13 classdocs 14 ''' 15
16 - def __init__(self, directory="~/taraxu_tools/svm_rank"):
17 ''' 18 Constructor 19 ''' 20 self.directory = os.path.expanduser(directory)
21 22
23 - def learn(self, **kwargs):
24 self.training_filename = kwargs.setdefault("training_filename", "/tmp/training.dat") 25 self.model_filename = kwargs.setdefault("model_filename", "/tmp/model.dat") 26 27 command = os.path.join(self.directory, "svm_rank_learn") 28 commandline = [command] 29 for argname, argvalue in kwargs.iteritems(): 30 if not argname in ["model_filename", "training_filename"]: 31 commandline.append("-{}".format(argname)) 32 commandline.append("{}".format(argvalue)) 33 commandline.append(self.training_filename) 34 commandline.append(self.model_filename) 35 print " ".join(commandline) 36 print subprocess.check_output(commandline)
37
38 - def classify(self, **kwargs):
39 # self.training_filename = kwargs.setdefault("training_filename", "/tmp/training.dat") 40 # self.model_filename = kwargs.setdefault("model_filename", "/tmp/model.dat") 41 test_filename = kwargs.setdefault("test_filename", None) 42 output_filename = kwargs.setdefault("output_filename", "/tmp/output.dat") 43 44 command = os.path.join(self.directory, "svm_rank_classify") 45 commandline = [command] 46 for argname, argvalue in kwargs.iteritems(): 47 if not argname in ["test_filename", "output_filename"]: 48 commandline.append("-{}".format(argname)) 49 commandline.append("{}".format(argvalue)) 50 commandline.append(test_filename) 51 commandline.append(self.model_filename) 52 commandline.append(output_filename) 53 sys.stderr.write(" ".join(commandline)) 54 55 #run command 56 output = subprocess.check_output(commandline) 57 58 #process output to get statistics 59 stats = self._stats_to_dict(output) 60 return stats
61
62 - def _stats_to_dict(self, string):
63 """ 64 Extracts the basic statistics from the verbal response of SVMRank 65 """ 66 stats = {} 67 stats["runtime"] = float(re.findall("Runtime \(without IO\) in cpu-seconds: ([\d.]*)", string)[0]) 68 stats["error"], stats["correct"], stats["incorrect"], stats["total"] = re.findall("Zero/one-error on test set: ([\d.]*)% \((\d*) correct, (\d*) incorrect, (\d*) total\)", string)[0] 69 stats["total_swapped"] = re.findall("Total Num Swappedpairs\s*:\s*(\d*)", string)[0] 70 return stats
71