Package ml :: Package var :: Module classifier
[hide private]
[frames] | no frames]

Source Code for Module ml.var.classifier

  1  """ 
  2   
  3  @author: Eleftherios Avramidis 
  4  """ 
  5   
  6  #import pylab     
  7  import Orange 
  8  from Orange.data import Instance 
  9  from Orange.data import Table 
 10  from Orange.classification import Classifier 
 11  from Orange.classification.rules import rule_to_string 
 12  from Orange.classification.rules import RuleLearner 
 13  from Orange.classification.svm import get_linear_svm_weights 
 14  from Orange import feature 
 15  from Orange.classification import logreg 
 16  from Orange.statistics import distribution 
 17   
 18  import sys 
 19  import logging 
 20   
21 -class OrangeClassifier(Classifier):
22 """ 23 Basic wrapper to encapsulate common functions for all classifier subclasses 24 @ivar classifier: the wrapped classifier 25 @type classifier: L{Orange.classification.Classifier} 26 """
27 - def __new__(cls, wrapped):
28 return Classifier.__new__(cls, name=wrapped.name)
29
30 - def __init__(self, wrapped):
31 self.classifier = wrapped 32 for name, val in wrapped.__dict__.items(): 33 self.__dict__[name] = val 34 35 if self.classifier.__class__.__name__ in ["NaiveClassifier", "CN2UnorderedClassifier"]: 36 self.discrete_features = [feature.Descriptor.make(feat.name,feat.var_type,[],feat.values,0) for feat in self.classifier.domain.features if isinstance(feat, feature.Discrete)]
37
38 - def __call__(self, example, what=Orange.core.GetBoth):
39 example = Instance(self.classifier.domain, example) 40 return self.classifier(example, what)
41 42
43 - def classify_orange_table(self, orange_table, return_type=Classifier.GetBoth):
44 45 """ 46 Use the current classifier to classify the given orange table and return a vector (list) of the given values 47 @param orange_table: An orange table with unclassified instances, which we need to classify 48 @type orange_table: L{Orange.data.Table} 49 @param return_type: Specifies whether the classification of each intance should return only the predicted value, only the predicted distribution or both (default), 50 @type return_type: L{Orange.classification.Classifier.GetBoth} or L{Orange.classification.Classifier.GetProbabilities} or L{Orange.classification.Classifier.GetBoth)} 51 @return: a list of the classification results, one list item per instance 52 @rtype: [L{Orange.classification.Value}, ...] or [L{Orange.classification.Distribution}, ...] or [(L{Orange.classification.Value},L{Orange.classification.Distribution}), ...] 53 """ 54 #orange_table = Table() 55 56 if self.classifier.__class__.__name__ in ["NaiveClassifier", "CN2UnorderedClassifier"]: 57 orange_table = self.clean_discrete_features(orange_table) 58 59 resultvector = [] 60 for instance in orange_table: 61 value, distribution = self.classifier.__call__(instance, return_type) 62 resultvector.append((value.value, distribution)) 63 return resultvector
64 65
66 - def clean_discrete_features(self, orange_table):
67 #kill instances that do not fit training data 68 classifier_discrete_features = self.discrete_features 69 logging.debug(len(orange_table)) 70 i = 0 71 k = 0 72 for feat, status in classifier_discrete_features: 73 classifier_feat_values = set([val for val in feat.values]) 74 table_feat_values = set([val for val in orange_table.domain[feat.name].values]) 75 missing_values = table_feat_values - classifier_feat_values 76 77 if not missing_values: 78 continue 79 80 modus = distribution.Distribution(feat.name, orange_table).modus() 81 instances = set(orange_table.filter_ref({feat.name:list(missing_values)})) 82 for inst in instances: 83 inst[feat.name] = modus 84 85 i+=len(instances) 86 k+=1 87 sys.stderr.write("Warning: Reset {} appearances of {} discrete attributes\n".format(i, k)) 88 return orange_table
89
90 - def classify_dataset(self, dataset, return_type=Classifier.GetBoth):
91 pass
92
93 - def classify_parallelsentence(self, parallelsentence, return_type=Classifier.GetBoth):
94 pass
95 # def __call__(self, data): 96 # return self.classifier.__call__(data) 97 # 98 # def getFilteredLearner(self, n=5): 99 # return orngFSS.FilteredLearner(self, filter=orngFSS.FilterBestNAtts(n), name='%s_filtered' % self.name) 100 101
102 - def print_content(self, basename="classifier"):
103 """ 104 Depending on the type of the classifier, output its contents to an external file 105 @param basename: the filename without extension of the classifier 106 @type basename: string 107 """ 108 109 classifier_type = self.classifier.__class__.__name__ 110 111 logging.debug("====\nProceeding with printing information for classifier [%s]", classifier_type) 112 113 if classifier_type in ["NaiveClassifier", ]: 114 # textfilename = "{}.txt".format(basename) 115 # f = open(textfilename, "w") 116 ## f.write(self.conditional_distributions[0].items()) 117 # f.close() 118 # 119 # sepal_length, probabilities = zip(*self.conditional_distributions[0].items()) 120 # print sepal_length 121 # print probabilities 122 # 123 # p_setosa, p_versicolor = zip(*probabilities) 124 # 125 # imagefilename = "{}.png".format(basename) 126 # pylab.xlabel("sepal length") 127 # pylab.ylabel("probability") 128 # pylab.plot(sepal_length, p_setosa, label="setosa", linewidth=2) 129 # pylab.plot(sepal_length, p_versicolor, label="versicolor", linewidth=2) 130 ## pylab.plot(sepal_length, p_virginica, label="virginica", linewidth=2) 131 # pylab.legend(loc="best") 132 # pylab.savefig(imagefilename) 133 pass 134 135 #if we are talking about a rule learner, just print its rules out in the file 136 try: 137 weights = get_linear_svm_weights(self.classifier) 138 textfilename = "{}.weights.txt".format(basename) 139 f = open(textfilename, "w") 140 f.write("Fitted parameters: \nnu = {0}\ngamma = {1}\n\nWeights: \n".format(self.classifier.fitted_parameters[0], self.classifier.fitted_parameters[1])) 141 for weight_name, weight_value in weights.iteritems(): 142 f.write("{0}\t{1}\n".format(weight_name, weight_value)) 143 f.close() 144 except: 145 pass 146 147 try: 148 rules = self.classifier.rules 149 textfilename = "{}.rules.txt".format(basename) 150 f = open(textfilename, "w") 151 for r in rules: 152 f.write("{}\n".format(rule_to_string(r))) 153 f.close() 154 return 155 except: 156 pass 157 158 159 try: 160 textfilename = "{}.tree.txt".format(basename) 161 f = open(textfilename, "w") 162 f.write(self.classifier.to_string("leaf", "node")) 163 f.close() 164 165 graphics_filename = "{}.tree.dot".format(basename) 166 self.classifier.dot(graphics_filename, "leaf", "node") 167 except: 168 pass 169 170 try: 171 textfilename = "{}.dump.txt".format(basename) 172 f = open(textfilename, "w") 173 f.write(logreg.dump(self.classifier)) 174 f.close() 175 except: 176 pass
177 178 # def _print_tree(self, x, fileobj): 179 # if isinstance(x, Orange.classification.tree.TreeClassifier): 180 # self._print_tree0(x.tree, 0, fileobj) 181 # elif isinstance(x, Orange.classification.tree.Node): 182 # self._print_tree0(x, 0, fileobj) 183 # else: 184 # raise TypeError, "invalid parameter" 185 # 186 # def _print_tree0(self, node, level, fileobj): 187 # if not node: 188 # fileobj.write( " "*level + "<null node>\n") 189 # return 190 # if node.branch_selector: 191 # node_desc = node.branch_selector.class_var.name 192 # node_cont = node.distribution 193 # fileobj.write("\\n" + " "*level + "%s (%s)" % (node_desc, node_cont)) 194 # for i in range(len(node.branches)): 195 # fileobj.write("\\n" + " "*level + ": %s" % node.branch_descriptions[i]) 196 # self.print_tree0(node.branches[i], level+1) 197 # else: 198 # node_cont = node.distribution 199 # major_class = node.node_classifier.default_value 200 # fileobj.write("--> %s (%s)" % (major_class, node_cont)) 201 # 202 203 # if classifier_type == "SVMEasyLearner": 204 # 205 # elif 206