1 """
2
3 @author: Eleftherios Avramidis
4 """
5
6
7 import Orange
8 from Orange.data import Instance
9 from Orange.data import Table
10 from Orange.classification import Classifier
11 from Orange.classification.rules import rule_to_string
12 from Orange.classification.rules import RuleLearner
13 from Orange.classification.svm import get_linear_svm_weights
14 from Orange import feature
15 from Orange.classification import logreg
16 from Orange.statistics import distribution
17
18 import sys
19 import logging
20
22 """
23 Basic wrapper to encapsulate common functions for all classifier subclasses
24 @ivar classifier: the wrapped classifier
25 @type classifier: L{Orange.classification.Classifier}
26 """
29
31 self.classifier = wrapped
32 for name, val in wrapped.__dict__.items():
33 self.__dict__[name] = val
34
35 if self.classifier.__class__.__name__ in ["NaiveClassifier", "CN2UnorderedClassifier"]:
36 self.discrete_features = [feature.Descriptor.make(feat.name,feat.var_type,[],feat.values,0) for feat in self.classifier.domain.features if isinstance(feat, feature.Discrete)]
37
38 - def __call__(self, example, what=Orange.core.GetBoth):
41
42
44
45 """
46 Use the current classifier to classify the given orange table and return a vector (list) of the given values
47 @param orange_table: An orange table with unclassified instances, which we need to classify
48 @type orange_table: L{Orange.data.Table}
49 @param return_type: Specifies whether the classification of each intance should return only the predicted value, only the predicted distribution or both (default),
50 @type return_type: L{Orange.classification.Classifier.GetBoth} or L{Orange.classification.Classifier.GetProbabilities} or L{Orange.classification.Classifier.GetBoth)}
51 @return: a list of the classification results, one list item per instance
52 @rtype: [L{Orange.classification.Value}, ...] or [L{Orange.classification.Distribution}, ...] or [(L{Orange.classification.Value},L{Orange.classification.Distribution}), ...]
53 """
54
55
56 if self.classifier.__class__.__name__ in ["NaiveClassifier", "CN2UnorderedClassifier"]:
57 orange_table = self.clean_discrete_features(orange_table)
58
59 resultvector = []
60 for instance in orange_table:
61 value, distribution = self.classifier.__call__(instance, return_type)
62 resultvector.append((value.value, distribution))
63 return resultvector
64
65
67
68 classifier_discrete_features = self.discrete_features
69 logging.debug(len(orange_table))
70 i = 0
71 k = 0
72 for feat, status in classifier_discrete_features:
73 classifier_feat_values = set([val for val in feat.values])
74 table_feat_values = set([val for val in orange_table.domain[feat.name].values])
75 missing_values = table_feat_values - classifier_feat_values
76
77 if not missing_values:
78 continue
79
80 modus = distribution.Distribution(feat.name, orange_table).modus()
81 instances = set(orange_table.filter_ref({feat.name:list(missing_values)}))
82 for inst in instances:
83 inst[feat.name] = modus
84
85 i+=len(instances)
86 k+=1
87 sys.stderr.write("Warning: Reset {} appearances of {} discrete attributes\n".format(i, k))
88 return orange_table
89
92
95
96
97
98
99
100
101
102 - def print_content(self, basename="classifier"):
103 """
104 Depending on the type of the classifier, output its contents to an external file
105 @param basename: the filename without extension of the classifier
106 @type basename: string
107 """
108
109 classifier_type = self.classifier.__class__.__name__
110
111 logging.debug("====\nProceeding with printing information for classifier [%s]", classifier_type)
112
113 if classifier_type in ["NaiveClassifier", ]:
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133 pass
134
135
136 try:
137 weights = get_linear_svm_weights(self.classifier)
138 textfilename = "{}.weights.txt".format(basename)
139 f = open(textfilename, "w")
140 f.write("Fitted parameters: \nnu = {0}\ngamma = {1}\n\nWeights: \n".format(self.classifier.fitted_parameters[0], self.classifier.fitted_parameters[1]))
141 for weight_name, weight_value in weights.iteritems():
142 f.write("{0}\t{1}\n".format(weight_name, weight_value))
143 f.close()
144 except:
145 pass
146
147 try:
148 rules = self.classifier.rules
149 textfilename = "{}.rules.txt".format(basename)
150 f = open(textfilename, "w")
151 for r in rules:
152 f.write("{}\n".format(rule_to_string(r)))
153 f.close()
154 return
155 except:
156 pass
157
158
159 try:
160 textfilename = "{}.tree.txt".format(basename)
161 f = open(textfilename, "w")
162 f.write(self.classifier.to_string("leaf", "node"))
163 f.close()
164
165 graphics_filename = "{}.tree.dot".format(basename)
166 self.classifier.dot(graphics_filename, "leaf", "node")
167 except:
168 pass
169
170 try:
171 textfilename = "{}.dump.txt".format(basename)
172 f = open(textfilename, "w")
173 f.write(logreg.dump(self.classifier))
174 f.close()
175 except:
176 pass
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206