1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
|
from typing import Union
import numpy as np
from orangewidget.utils.signals import LazyValue
from Orange.data import Domain, DiscreteVariable, Table
from Orange.data.util import get_unique_names
ANNOTATED_DATA_SIGNAL_NAME = "Data"
ANNOTATED_DATA_FEATURE_NAME = "Selected"
def add_columns(domain, attributes=(), class_vars=(), metas=()):
"""Construct a new domain with new columns added to the specified place
Parameters
----------
domain : Domain
source domain
attributes
list of variables to append to attributes from source domain
class_vars
list of variables to append to class_vars from source domain
metas
list of variables to append to metas from source domain
Returns
-------
Domain
"""
attributes = domain.attributes + tuple(attributes)
class_vars = domain.class_vars + tuple(class_vars)
metas = domain.metas + tuple(metas)
return Domain(attributes, class_vars, metas)
def domain_with_annotation_column(
data: Union[Table, Domain],
values=("No", "Yes"),
var_name=ANNOTATED_DATA_FEATURE_NAME):
domain = data if isinstance(data, Domain) else data.domain
var = DiscreteVariable(get_unique_names(domain, var_name), values)
class_vars, metas = domain.class_vars, domain.metas
if not domain.class_vars:
class_vars += (var, )
else:
metas += (var, )
return Domain(domain.attributes, class_vars, metas), var
def _table_with_annotation_column(data, values, column_data, var_name):
domain, var = domain_with_annotation_column(data, values, var_name)
if not data.domain.class_vars:
column_data = column_data.reshape((len(data), ))
else:
column_data = column_data.reshape((len(data), 1))
table = data.transform(domain)
with table.unlocked(table.Y if not data.domain.class_vars else table.metas):
table[:, var] = column_data
return table
def create_annotated_table(data, selected_indices):
"""
Returns data with concatenated flag column. Flag column represents
whether data instance has been selected (Yes) or not (No), which is
determined in selected_indices parameter.
:param data: Table
:param selected_indices: list or ndarray
:return: Table
"""
if data is None:
return None
annotated = np.zeros((len(data), 1))
if selected_indices is not None:
annotated[selected_indices] = 1
return _table_with_annotation_column(
data, ("No", "Yes"), annotated, ANNOTATED_DATA_FEATURE_NAME)
def lazy_annotated_table(data, selected_indices):
domain, _ = domain_with_annotation_column(data)
return LazyValue[Table](
lambda: create_annotated_table(data, selected_indices),
length=len(data), domain=domain)
def create_groups_table(data, selection,
include_unselected=True,
var_name=ANNOTATED_DATA_FEATURE_NAME,
values=None):
if data is None:
return None
values, max_sel = group_values(selection, include_unselected, values)
if include_unselected:
# Place Unselected instances in the "last group", so that the group
# colors and scatter diagram marker colors will match
mask = (selection != 0)
selection = selection.copy()
selection[mask] = selection[mask] - 1
selection[~mask] = selection[~mask] = max_sel
else:
mask = np.flatnonzero(selection)
data = data[mask]
selection = selection[mask] - 1
return _table_with_annotation_column(data, values, selection, var_name)
def lazy_groups_table(data, selection, include_unselected=True,
var_name=ANNOTATED_DATA_FEATURE_NAME, values=None):
length = len(data) if include_unselected else np.sum(selection != 0)
values, _ = group_values(selection, include_unselected, values)
domain, _ = domain_with_annotation_column(data, values, var_name)
return LazyValue[Table](
lambda: create_groups_table(data, selection, include_unselected,
var_name, values),
length=length, domain=domain
)
def group_values(selection, include_unselected, values):
max_sel = np.max(selection)
if values is None:
values = ["G{}".format(i + 1) for i in range(max_sel)]
if include_unselected:
values.append("Unselected")
return values, max_sel
|