1 '''
2 Created on 29 Aug 2012
3
4 @author: Eleftherios Avramidis
5 '''
6 import os
7 import subprocess
8 import re
9 import sys
10
12 '''
13 classdocs
14 '''
15
16 - def __init__(self, directory="~/taraxu_tools/svm_rank"):
17 '''
18 Constructor
19 '''
20 self.directory = os.path.expanduser(directory)
21
22
23 - def learn(self, **kwargs):
24 self.training_filename = kwargs.setdefault("training_filename", "/tmp/training.dat")
25 self.model_filename = kwargs.setdefault("model_filename", "/tmp/model.dat")
26
27 command = os.path.join(self.directory, "svm_rank_learn")
28 commandline = [command]
29 for argname, argvalue in kwargs.iteritems():
30 if not argname in ["model_filename", "training_filename"]:
31 commandline.append("-{}".format(argname))
32 commandline.append("{}".format(argvalue))
33 commandline.append(self.training_filename)
34 commandline.append(self.model_filename)
35 print " ".join(commandline)
36 print subprocess.check_output(commandline)
37
39
40
41 test_filename = kwargs.setdefault("test_filename", None)
42 output_filename = kwargs.setdefault("output_filename", "/tmp/output.dat")
43
44 command = os.path.join(self.directory, "svm_rank_classify")
45 commandline = [command]
46 for argname, argvalue in kwargs.iteritems():
47 if not argname in ["test_filename", "output_filename"]:
48 commandline.append("-{}".format(argname))
49 commandline.append("{}".format(argvalue))
50 commandline.append(test_filename)
51 commandline.append(self.model_filename)
52 commandline.append(output_filename)
53 sys.stderr.write(" ".join(commandline))
54
55
56 output = subprocess.check_output(commandline)
57
58
59 stats = self._stats_to_dict(output)
60 return stats
61
63 """
64 Extracts the basic statistics from the verbal response of SVMRank
65 """
66 stats = {}
67 stats["runtime"] = float(re.findall("Runtime \(without IO\) in cpu-seconds: ([\d.]*)", string)[0])
68 stats["error"], stats["correct"], stats["incorrect"], stats["total"] = re.findall("Zero/one-error on test set: ([\d.]*)% \((\d*) correct, (\d*) incorrect, (\d*) total\)", string)[0]
69 stats["total_swapped"] = re.findall("Total Num Swappedpairs\s*:\s*(\d*)", string)[0]
70 return stats
71