ml.var.classifier

22 """ 23 Basic wrapper to encapsulate common functions for all classifier subclasses 24 @ivar classifier: the wrapped classifier 25 @type classifier: L{Orange.classification.Classifier} 26 """

27 - def __new__(cls, wrapped):

28 return Classifier.__new__(cls, name=wrapped.name)

29

30 - def __init__(self, wrapped):

31 self.classifier = wrapped 32 for name, val in wrapped.__dict__.items(): 33 self.__dict__[name] = val 34 35 if self.classifier.__class__.__name__ in ["NaiveClassifier", "CN2UnorderedClassifier"]: 36 self.discrete_features = [feature.Descriptor.make(feat.name,feat.var_type,[],feat.values,0) for feat in self.classifier.domain.features if isinstance(feat, feature.Discrete)]

37

38 - def __call__(self, example, what=Orange.core.GetBoth):

39 example = Instance(self.classifier.domain, example) 40 return self.classifier(example, what)

41 42

43 - def classify_orange_table(self, orange_table, return_type=Classifier.GetBoth):

44 45 """ 46 Use the current classifier to classify the given orange table and return a vector (list) of the given values 47 @param orange_table: An orange table with unclassified instances, which we need to classify 48 @type orange_table: L{Orange.data.Table} 49 @param return_type: Specifies whether the classification of each intance should return only the predicted value, only the predicted distribution or both (default), 50 @type return_type: L{Orange.classification.Classifier.GetBoth} or L{Orange.classification.Classifier.GetProbabilities} or L{Orange.classification.Classifier.GetBoth)} 51 @return: a list of the classification results, one list item per instance 52 @rtype: [L{Orange.classification.Value}, ...] or [L{Orange.classification.Distribution}, ...] or [(L{Orange.classification.Value},L{Orange.classification.Distribution}), ...] 53 """ 54 #orange_table = Table() 55 56 if self.classifier.__class__.__name__ in ["NaiveClassifier", "CN2UnorderedClassifier"]: 57 orange_table = self.clean_discrete_features(orange_table) 58 59 resultvector = [] 60 for instance in orange_table: 61 value, distribution = self.classifier.__call__(instance, return_type) 62 resultvector.append((value.value, distribution)) 63 return resultvector

64 65

66 - def clean_discrete_features(self, orange_table):

67 #kill instances that do not fit training data 68 classifier_discrete_features = self.discrete_features 69 logging.debug(len(orange_table)) 70 i = 0 71 k = 0 72 for feat, status in classifier_discrete_features: 73 classifier_feat_values = set([val for val in feat.values]) 74 table_feat_values = set([val for val in orange_table.domain[feat.name].values]) 75 missing_values = table_feat_values - classifier_feat_values 76 77 if not missing_values: 78 continue 79 80 modus = distribution.Distribution(feat.name, orange_table).modus() 81 instances = set(orange_table.filter_ref({feat.name:list(missing_values)})) 82 for inst in instances: 83 inst[feat.name] = modus 84 85 i+=len(instances) 86 k+=1 87 sys.stderr.write("Warning: Reset {} appearances of {} discrete attributes\n".format(i, k)) 88 return orange_table

89

90 - def classify_dataset(self, dataset, return_type=Classifier.GetBoth):

91 pass

92

93 - def classify_parallelsentence(self, parallelsentence, return_type=Classifier.GetBoth):

94 pass

95 # def __call__(self, data): 96 # return self.classifier.__call__(data) 97 # 98 # def getFilteredLearner(self, n=5): 99 # return orngFSS.FilteredLearner(self, filter=orngFSS.FilterBestNAtts(n), name='%s_filtered' % self.name) 100 101

102 - def print_content(self, basename="classifier"):

103 """ 104 Depending on the type of the classifier, output its contents to an external file 105 @param basename: the filename without extension of the classifier 106 @type basename: string 107 """ 108 109 classifier_type = self.classifier.__class__.__name__ 110 111 logging.debug("====\nProceeding with printing information for classifier [%s]", classifier_type) 112 113 if classifier_type in ["NaiveClassifier", ]: 114 # textfilename = "{}.txt".format(basename) 115 # f = open(textfilename, "w") 116 ## f.write(self.conditional_distributions[0].items()) 117 # f.close() 118 # 119 # sepal_length, probabilities = zip(*self.conditional_distributions[0].items()) 120 # print sepal_length 121 # print probabilities 122 # 123 # p_setosa, p_versicolor = zip(*probabilities) 124 # 125 # imagefilename = "{}.png".format(basename) 126 # pylab.xlabel("sepal length") 127 # pylab.ylabel("probability") 128 # pylab.plot(sepal_length, p_setosa, label="setosa", linewidth=2) 129 # pylab.plot(sepal_length, p_versicolor, label="versicolor", linewidth=2) 130 ## pylab.plot(sepal_length, p_virginica, label="virginica", linewidth=2) 131 # pylab.legend(loc="best") 132 # pylab.savefig(imagefilename) 133 pass 134 135 #if we are talking about a rule learner, just print its rules out in the file 136 try: 137 weights = get_linear_svm_weights(self.classifier) 138 textfilename = "{}.weights.txt".format(basename) 139 f = open(textfilename, "w") 140 f.write("Fitted parameters: \nnu = {0}\ngamma = {1}\n\nWeights: \n".format(self.classifier.fitted_parameters[0], self.classifier.fitted_parameters[1])) 141 for weight_name, weight_value in weights.iteritems(): 142 f.write("{0}\t{1}\n".format(weight_name, weight_value)) 143 f.close() 144 except: 145 pass 146 147 try: 148 rules = self.classifier.rules 149 textfilename = "{}.rules.txt".format(basename) 150 f = open(textfilename, "w") 151 for r in rules: 152 f.write("{}\n".format(rule_to_string(r))) 153 f.close() 154 return 155 except: 156 pass 157 158 159 try: 160 textfilename = "{}.tree.txt".format(basename) 161 f = open(textfilename, "w") 162 f.write(self.classifier.to_string("leaf", "node")) 163 f.close() 164 165 graphics_filename = "{}.tree.dot".format(basename) 166 self.classifier.dot(graphics_filename, "leaf", "node") 167 except: 168 pass 169 170 try: 171 textfilename = "{}.dump.txt".format(basename) 172 f = open(textfilename, "w") 173 f.write(logreg.dump(self.classifier)) 174 f.close() 175 except: 176 pass

Source Code for Module ml.var.classifier