File: owrules.py

package info (click to toggle)
orange3 3.40.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 15,912 kB
  • sloc: python: 162,745; ansic: 622; makefile: 322; sh: 93; cpp: 77
file content (372 lines) | stat: -rw-r--r-- 15,257 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
from collections import OrderedDict

import numpy as np
from AnyQt.QtCore import Qt

from Orange.classification.rules import (
    WeightedRelativeAccuracyEvaluator, LaplaceAccuracyEvaluator,
    EntropyEvaluator, _RuleClassifier, _RuleLearner, get_dist
)
from Orange.data import Table
from Orange.widgets import gui
from Orange.widgets.settings import Setting
from Orange.widgets.utils.owlearnerwidget import OWBaseLearner
from Orange.widgets.utils.widgetpreview import WidgetPreview


class CustomRuleClassifier(_RuleClassifier):
    """
    Custom rule induction classifier. Instances are classifier following
    either an unordered set of rules or a decision list.
    """
    def __init__(self, domain, rule_list, params):
        super().__init__(domain, rule_list)
        assert params is not None

        self.rule_ordering = params["Rule ordering"]
        self.covering_algorithm = params["Covering algorithm"]
        self.params = params

    def predict(self, X):
        if (self.rule_ordering == "ordered" and
                self.covering_algorithm == "exclusive"):
            return self.ordered_predict(X)

        if (self.rule_ordering == "unordered" or
                self.covering_algorithm == "weighted"):
            return self.unordered_predict(X)


class CustomRuleLearner(_RuleLearner):
    """
    Custom CN2 inducer that construct either a list of ordered rules or
    a set of unordered rules. Returns a CustomRuleClassifier if called
    with data.

    See Also
    --------
    For more information about function calls and the algorithm, refer
    to the base rule induction learner.
    """
    name = 'Custom rule inducer'
    __returns__ = CustomRuleClassifier

    def __init__(self, preprocessors, base_rules, params):
        super().__init__(preprocessors, base_rules)
        self.progress_advance_callback = None
        assert params is not None
        self.params = params

        # top-level control procedure (rule ordering)
        self.rule_ordering = params["Rule ordering"]

        # top-level control procedure (covering algorithm)
        self.covering_algorithm = params["Covering algorithm"]
        if self.covering_algorithm == "exclusive":
            self.cover_and_remove = self.exclusive_cover_and_remove
        elif self.covering_algorithm == "weighted":
            self.gamma = params["Gamma"]
            self.cover_and_remove = self.weighted_cover_and_remove

        # bottom-level search procedure (search algorithm)
        self.rule_finder.search_algorithm.beam_width = params["Beam width"]

        # bottom-level search procedure (search strategy)
        self.rule_finder.search_strategy.constrain_continuous = True
        self.rule_finder.search_strategy.restrict_equality = params["Restrict to equality"]

        # bottom-level search procedure (search heuristics)
        evaluation_measure = params["Evaluation measure"]
        if evaluation_measure == "entropy":
            evaluator = EntropyEvaluator()
        elif evaluation_measure == "laplace":
            evaluator = LaplaceAccuracyEvaluator()
        elif evaluation_measure == "wracc":
            evaluator = WeightedRelativeAccuracyEvaluator()
        self.rule_finder.quality_evaluator = evaluator

        # bottom-level search procedure (over-fitting avoidance heuristics)
        min_rule_cov = params["Minimum rule coverage"]
        max_rule_length = params["Maximum rule length"]
        self.rule_finder.general_validator.min_covered_examples = min_rule_cov
        self.rule_finder.general_validator.max_rule_length = max_rule_length

        # bottom-level search procedure (over-fitting avoidance heuristics)
        default_alpha = params["Default alpha"]
        parent_alpha = params["Parent alpha"]
        self.rule_finder.significance_validator.default_alpha = default_alpha
        self.rule_finder.significance_validator.parent_alpha = parent_alpha

    def set_progress_advance_callback(self, f):
        """
        Assign callback to update the corresponding widget's progress
        bar after each generated rule. Callback is used to ensure that
        the progress bar is always accessed correctly (additional
        widgets may however use the generated learner).
        """
        self.progress_advance_callback = f

    def clear_progress_advance_callback(self):
        """
        Make sure to clear the callback function immediately after the
        classifier is trained.
        """
        self.progress_advance_callback = None

    def find_rules_and_measure_progress(self, X, Y, W, target_class,
                                        base_rules, domain, progress_amount):
        """
        The top-level control procedure of the separate-and-conquer
        algorithm. For given data and target class (may be None), return
        a list of rules which all must strictly adhere to the
        requirements of rule finder's validators.

        To induce decision lists (ordered rules), set target class to
        None. To induce rule sets (unordered rules), learn rules for
        each class individually, in regard to the original learning
        data.

        Parameters
        ----------
        X, Y, W : ndarray
            Learning data.
        target_class : int
            Index of the class to model.
        base_rules : list of Rule
            An optional list of initial rules to constrain the search.
        domain : Orange.data.domain.Domain
            Data domain, used to calculate class distributions.
        progress_amount: int, percentage
            Part of the learning algorithm covered by this function
            call.

        Returns
        -------
        rule_list : list of Rule
            Induced rules.
        """
        initial_class_dist = get_dist(Y, W, domain)
        rule_list = []

        # while data allows, continuously find new rules,
        # break the loop if min. requirements cannot be met,
        # after finding a rule, remove the instances covered
        while not self.data_stopping(X, Y, W, target_class, domain):

            # remember the distribution to correctly update progress
            temp_class_dist = get_dist(Y, W, domain)

            # generate a new rule that has not been seen before
            new_rule = self.rule_finder(X, Y, W, target_class, base_rules,
                                        domain, initial_class_dist, rule_list)

            # None when no new, unique rules that pass
            # the general requirements can be found
            if new_rule is None or self.rule_stopping(new_rule):
                break

            # exclusive or weighted
            X, Y, W = self.cover_and_remove(X, Y, W, new_rule)
            rule_list.append(new_rule)

            # update progress
            if self.progress_advance_callback is not None:
                progress = (((temp_class_dist[target_class] -
                              get_dist(Y, W, domain)[target_class])
                             / initial_class_dist[target_class]
                             * progress_amount) if target_class is not None else
                            ((temp_class_dist - get_dist(Y, W, domain)).sum()
                             / initial_class_dist.sum() * progress_amount))
                self.progress_advance_callback(progress)

        return rule_list

    def fit_storage(self, data):
        rule_list = []
        X, Y, W = data.X, data.Y, data.W if data.has_weights() else None
        Y = Y.astype(dtype=int)
        if self.rule_ordering == "ordered":
            rule_list = self.find_rules_and_measure_progress(
                X, Y, np.copy(W) if W is not None else None, None,
                self.base_rules, data.domain, progress_amount=1)
            # add the default rule, if required
            if (not rule_list or rule_list and rule_list[-1].length > 0 or
                    self.covering_algorithm == "weighted"):
                rule_list.append(
                    self.generate_default_rule(X, Y, W, data.domain))

        elif self.rule_ordering == "unordered":
            for curr_class in range(len(data.domain.class_var.values)):
                rule_list.extend(self.find_rules_and_measure_progress(
                    X, Y, np.copy(W) if W is not None else None,
                    curr_class, self.base_rules, data.domain,
                    progress_amount=1/len(data.domain.class_var.values)))
            # add the default rule
            rule_list.append(self.generate_default_rule(X, Y, W, data.domain))

        return CustomRuleClassifier(domain=data.domain, rule_list=rule_list,
                                    params=self.params)


class OWRuleLearner(OWBaseLearner):
    name = "CN2 Rule Induction"
    description = "Induce rules from data using CN2 algorithm."
    icon = "icons/CN2RuleInduction.svg"
    replaces = [
        "Orange.widgets.classify.owrules.OWRuleLearner",
    ]
    priority = 19
    keywords = "cn2 rule induction"

    LEARNER = CustomRuleLearner
    supports_sparse = False

    storage_orders = ["ordered", "unordered"]
    storage_covers = ["exclusive", "weighted"]
    storage_measures = ["entropy", "laplace", "wracc"]

    # default parameter values
    rule_ordering = Setting(0)
    covering_algorithm = Setting(0)
    gamma = Setting(0.7)
    evaluation_measure = Setting(0)
    restrict_equality = Setting(False)
    beam_width = Setting(5)
    min_covered_examples = Setting(1)
    max_rule_length = Setting(5)
    default_alpha = Setting(1.0)
    parent_alpha = Setting(1.0)
    checked_default_alpha = Setting(False)
    checked_parent_alpha = Setting(False)

    # actual widget elements
    base_rules = None
    gamma_spin = None

    def add_main_layout(self):
        # top-level control procedure
        top_box = gui.hBox(widget=self.controlArea, box=None)

        rule_ordering_box = gui.hBox(widget=top_box, box="Rule ordering")
        rule_ordering_rbs = gui.radioButtons(
            widget=rule_ordering_box, master=self, value="rule_ordering",
            callback=self.settings_changed, btnLabels=("Ordered", "Unordered"))
        rule_ordering_rbs.layout().setSpacing(7)

        covering_algorithm_box = gui.hBox(
            widget=top_box, box="Covering algorithm")
        covering_algorithm_rbs = gui.radioButtons(
            widget=covering_algorithm_box, master=self,
            value="covering_algorithm",
            callback=self.settings_changed,
            btnLabels=("Exclusive", "Weighted"))
        covering_algorithm_rbs.layout().setSpacing(7)

        insert_gamma_box = gui.vBox(widget=covering_algorithm_box, box=None)
        gui.separator(insert_gamma_box, 0, 14)
        self.gamma_spin = gui.doubleSpin(
            widget=insert_gamma_box, master=self, value="gamma", minv=0.0,
            maxv=1.0, step=0.01, label="γ:", orientation=Qt.Horizontal,
            callback=self.settings_changed, alignment=Qt.AlignRight,
            enabled=self.covering_algorithm == 1)

        # bottom-level search procedure (search bias)
        middle_box = gui.vBox(widget=self.controlArea, box="Rule search")

        gui.comboBox(
            widget=middle_box, master=self, value="evaluation_measure",
            label="Evaluation measure:", orientation=Qt.Horizontal,
            items=("Entropy", "Laplace accuracy", "WRAcc"),
            callback=self.settings_changed, contentsLength=3)

        gui.spin(
            widget=middle_box, master=self, value="beam_width", minv=1,
            maxv=100, step=1, label="Beam width:", orientation=Qt.Horizontal,
            callback=self.settings_changed, alignment=Qt.AlignRight,
            controlWidth=80)

        # bottom-level search procedure (over-fitting avoidance bias)
        bottom_box = gui.vBox(widget=self.controlArea, box="Rule filtering")

        gui.spin(
            widget=bottom_box, master=self, value="min_covered_examples", minv=1,
            maxv=10000, step=1, label="Minimum rule coverage:",
            orientation=Qt.Horizontal, callback=self.settings_changed,
            alignment=Qt.AlignRight, controlWidth=80)

        gui.spin(
            widget=bottom_box, master=self, value="max_rule_length",
            minv=1, maxv=100, step=1, label="Maximum rule length:",
            orientation=Qt.Horizontal, callback=self.settings_changed,
            alignment=Qt.AlignRight, controlWidth=80)

        gui.doubleSpin(
            widget=bottom_box, master=self, value="default_alpha", minv=0.0,
            maxv=1.0, step=0.01, label="Statistical significance (default α):",
            orientation=Qt.Horizontal, callback=self.settings_changed,
            alignment=Qt.AlignRight, controlWidth=80,
            checked="checked_default_alpha")

        gui.doubleSpin(
            widget=bottom_box, master=self, value="parent_alpha", minv=0.0,
            maxv=1.0, step=0.01, label="Relative significance (parent α):",
            orientation=Qt.Horizontal, callback=self.settings_changed,
            alignment=Qt.AlignRight, controlWidth=80,
            checked="checked_parent_alpha")

        gui.checkBox(
            widget=bottom_box, master=self, value="restrict_equality",
            label="Restrict operator for categorical values to equality",
            callback=self.settings_changed,
        )

    def settings_changed(self, *args, **kwargs):
        self.gamma_spin.setDisabled(self.covering_algorithm == 0)
        super().settings_changed(*args, **kwargs)

    def update_model(self):
        """
        Reimplemented from OWBaseLearner.
        """
        self.Error.out_of_memory.clear()
        self.model = None
        if self.check_data():
            try:
                self.model = self.learner(self.data)
            except MemoryError:
                self.Error.out_of_memory()
            else:
                self.model.name = self.effective_learner_name()
                self.model.instances = self.data
                self.valid_data = True
        self.Outputs.model.send(self.model)

    def create_learner(self):
        """
        Reimplemented from OWBaseLearner.
        """
        return self.LEARNER(
            preprocessors=self.preprocessors,
            base_rules=self.base_rules,
            params=self.get_learner_parameters()
        )

    def get_learner_parameters(self):
        return OrderedDict([
            ("Rule ordering", self.storage_orders[self.rule_ordering]),
            ("Covering algorithm", self.storage_covers[self.covering_algorithm]),
            ("Gamma", self.gamma),
            ("Evaluation measure", self.storage_measures[self.evaluation_measure]),
            ("Restrict to equality", self.restrict_equality),
            ("Beam width", self.beam_width),
            ("Minimum rule coverage", self.min_covered_examples),
            ("Maximum rule length", self.max_rule_length),
            ("Default alpha", (1.0 if not self.checked_default_alpha
                               else self.default_alpha)),
            ("Parent alpha", (1.0 if not self.checked_parent_alpha
                              else self.parent_alpha))
        ])


if __name__ == "__main__":  # pragma: no cover
    WidgetPreview(OWRuleLearner).run(Table("iris"))