File: instance.py

package info (click to toggle)
orange3 3.40.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 15,908 kB
  • sloc: python: 162,745; ansic: 622; makefile: 322; sh: 93; cpp: 77
file content (238 lines) | stat: -rw-r--r-- 7,911 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
from itertools import chain
from math import isnan
from numbers import Real, Integral

import numpy as np

from Orange.data import Value, Unknown, DiscreteVariable

__all__ = ["Instance"]


class Instance:
    def __init__(self, domain, data=None, id=None):
        """
        Construct a new data instance.

        :param domain: domain that describes the instance's variables
        :type domain: Orange.data.Domain
        :param data: instance's values
        :type data: Orange.data.Instance or a sequence of values
        :param id: instance id
        :type id: hashable value
        """
        if data is None and isinstance(domain, Instance):
            data = domain
            domain = data.domain

        self._domain = domain
        if data is None:
            self._x = np.repeat(Unknown, len(domain.attributes))
            self._y = np.repeat(Unknown, len(domain.class_vars))
            self._metas = np.array([var.Unknown for var in domain.metas],
                                   dtype=object)
            self._weight = 1
        elif isinstance(data, Instance) and data.domain == domain:
            self._x = np.array(data._x)
            self._y = np.atleast_1d(np.array(data._y))
            self._metas = np.array(data._metas)
            self._weight = data._weight
        else:
            self._x, self._y, self._metas = domain.convert(data)
            self._y = np.atleast_1d(self._y)
            self._weight = 1

        if id is not None:
            self.id = id
        else:
            from Orange.data import Table
            self.id = Table.new_id()

    @property
    def domain(self):
        """The domain describing the instance's values."""
        return self._domain

    @property
    def x(self):
        """
        Instance's attributes as a 1-dimensional numpy array whose length
        equals `len(self.domain.attributes)`.
        """
        return self._x

    @property
    def y(self):
        """
        Instance's classes as a 1-dimensional numpy array whose length
        equals `len(self.domain.attributes)`.
        """
        return self._y

    @property
    def metas(self):
        """
        Instance's meta attributes as a 1-dimensional numpy array whose length
        equals `len(self.domain.attributes)`.
        """
        return self._metas

    @property
    def list(self):
        """
        All instance's values, including attributes, classes and meta
        attributes, as a list whose length equals `len(self.domain.attributes)
        + len(self.domain.class_vars) + len(self.domain.metas)`.
        """
        n_self, n_metas = len(self), len(self._metas)
        return [self[i].value if i < n_self else self[n_self - i - 1].value
                for i in range(n_self + n_metas)]

    @property
    def weight(self):
        """The weight of the data instance. Default is 1."""
        return self._weight

    @weight.setter
    def weight(self, weight):
        self._weight = weight

    def __setitem__(self, key, value):
        if not isinstance(key, Integral):
            key = self._domain.index(key)
        value = self._domain[key].to_val(value)
        if key >= 0 and not isinstance(value, (int, float)):
            raise TypeError("Expected primitive value, got '%s'" %
                            type(value).__name__)

        if 0 <= key < len(self._domain.attributes):
            self._x[key] = value
        elif len(self._domain.attributes) <= key:
            self._y[key - len(self.domain.attributes)] = value
        else:
            self._metas[-1 - key] = value

    def __getitem__(self, key):
        idx = key if isinstance(key, Integral) else self._domain.index(key)
        if 0 <= idx < len(self._domain.attributes):
            value = self._x[idx]
        elif idx >= len(self._domain.attributes):
            if self._y.ndim == 0:
                value = self._y
            else:
                value = self._y[idx - len(self.domain.attributes)]
        else:
            value = self._metas[-1 - idx]
        var = self._domain[idx]
        if isinstance(key, DiscreteVariable) and var is not key:
            value = key.get_mapper_from(var)(value)
            var = key
        return Value(var, value)

    #TODO Should we return an instance of `object` if we have a meta attribute
    #     that is not Discrete or Continuous? E.g. when we have strings, we'd
    #     like to be able to use startswith, lower etc...
    #     Or should we even return Continuous as floats and use Value only
    #     for discrete attributes?!
    #     Same in Table.__getitem__

    @staticmethod
    def str_values(data, variables, limit=True):
        if limit:
            s = ", ".join(var.str_val(val)
                          for var, val in zip(variables, data[:5]))
            if len(data) > 5:
                s += ", ..."
            return s
        else:
            return ", ".join(var.str_val(val)
                             for var, val in zip(variables, data))

    def _str(self, limit):
        s = "[" + self.str_values(self._x, self._domain.attributes, limit)
        if self._domain.class_vars:
            s += " | " + \
                 self.str_values(self._y, self._domain.class_vars, limit)
        s += "]"
        if self._domain.metas:
            s += " {" + \
                 self.str_values(self._metas, self._domain.metas, limit) + \
                 "}"
        return s

    def __str__(self):
        return self._str(False)

    def __repr__(self):
        return self._str(True)

    def __eq__(self, other):
        if not isinstance(other, Instance):
            other = Instance(self._domain, other)

        def same(x1, x2):
            nan1 = np.isnan(x1)
            nan2 = np.isnan(x2)
            return np.array_equal(nan1, nan2) and \
                   np.array_equal(x1[~nan1], x2[~nan2])

        return same(self._x, other._x) and same(self._y, other._y) \
               and all(m1 == m2 or
                       type(m1) == type(m2) == float and isnan(m1) and isnan(m2)
                       for m1, m2 in zip(self._metas, other._metas))

    @classmethod
    def __hash__(cls):
        raise TypeError(f"unhashable type: '{type(cls.__name__)}'")

    def __iter__(self):
        return chain(iter(self._x), iter(self._y))

    def values(self):
        return (Value(var, val)
                for var, val in zip(self.domain.variables, self))

    def __len__(self):
        return len(self._x) + len(self._y)

    def attributes(self):
        """Return iterator over the instance's attributes"""
        return iter(self._x)

    def classes(self):
        """Return iterator over the instance's class attributes"""
        return iter(self._y)

    # A helper function for get_class and set_class
    def _check_single_class(self):
        if not self._domain.class_vars:
            raise TypeError("Domain has no class variable")
        elif len(self._domain.class_vars) > 1:
            raise TypeError("Domain has multiple class variables")

    def get_class(self):
        """
        Return the class value as an instance of :obj:`Orange.data.Value`.
        Throws an exception if there are multiple classes.
        """
        self._check_single_class()
        return Value(self._domain.class_var, self._y[0])

    def get_classes(self):
        """
        Return the class value as a list of instances of
        :obj:`Orange.data.Value`.
        """
        return (Value(var, value)
                for var, value in zip(self._domain.class_vars, self._y))

    def set_class(self, value):
        """
        Set the instance's class. Throws an exception if there are multiple
        classes.
        """
        self._check_single_class()
        if not isinstance(value, Real):
            self._y[0] = self._domain.class_var.to_val(value)
        else:
            self._y[0] = value