File: utils.py

package info (click to toggle)
orange3 3.40.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 15,908 kB
  • sloc: python: 162,745; ansic: 622; makefile: 322; sh: 93; cpp: 77
file content (118 lines) | stat: -rw-r--r-- 4,273 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import numpy as np
from itertools import product
import requests

def get_groupIndex_to_featureIndices(featureIndex_to_groupIndex):
    groupIndex_to_featureIndices = {}
    for featureIndex, groupIndex in enumerate(featureIndex_to_groupIndex):
        if groupIndex not in groupIndex_to_featureIndices:
            groupIndex_to_featureIndices[groupIndex] = set()
        groupIndex_to_featureIndices[groupIndex].add(featureIndex)
    return groupIndex_to_featureIndices

def get_support_indices(betas):
    return np.where(np.abs(betas) > 1e-9)[0]

def get_nonsupport_indices(betas):
    return np.where(np.abs(betas) <= 1e-9)[0]

def normalize_X(X):
    X_mean = np.mean(X, axis=0)
    X_norm = np.linalg.norm(X-X_mean, axis=0)
    scaled_feature_indices = np.where(X_norm >= 1e-9)[0]
    X_normalized = X-X_mean
    X_normalized[:, scaled_feature_indices] = X_normalized[:, scaled_feature_indices]/X_norm[[scaled_feature_indices]]
    return X_normalized, X_mean, X_norm, scaled_feature_indices

def compute_logisticLoss_from_yXB(yXB):
    # shape of yXB is (n, )
    return np.sum(np.log(1.+np.exp(-yXB)))

def compute_logisticLoss_from_ExpyXB(ExpyXB):
    # shape of ExpyXB is (n, )
    return np.sum(np.log(1.+np.reciprocal(ExpyXB)))

def compute_logisticLoss_from_betas_and_yX(betas, yX):
    # shape of betas is (p, )
    # shape of yX is (n, p)
    yXB = yX.dot(betas)
    return compute_logisticLoss_from_yXB(yXB)

def compute_logisticLoss_from_X_y_beta0_betas(X, y, beta0, betas):
    XB = X.dot(betas) + beta0
    yXB = y * XB
    return compute_logisticLoss_from_yXB(yXB)

def convert_y_to_neg_and_pos_1(y):
    y_max, y_min = np.min(y), np.max(y)
    y_transformed = -1 + 2 * (y-y_min)/(y_max-y_min) # convert y to -1 and 1
    return y_transformed

def isEqual_upTo_8decimal(a, b):
    if np.isscalar(a):
        return abs(a - b) < 1e-8
    return np.max(np.abs(a - b)) < 1e-8

def isEqual_upTo_16decimal(a, b):
    if np.isscalar(a):
        return abs(a - b) < 1e-16
    return np.max(np.abs(a - b)) < 1e-16

def insertIntercept_asFirstColOf_X(X):
    n = len(X)
    intercept = np.ones((n, 1))
    X_with_intercept = np.hstack((intercept, X))
    return X_with_intercept

def get_all_product_booleans(sparsity=5):
    # build list of lists:
    all_lists = []
    for i in range(sparsity):
        all_lists.append([0, 1])
    all_products = list(product(*all_lists))
    all_products = [list(elem) for elem in all_products]
    return np.array(all_products)

def download_file_from_google_drive(id, destination):
    # link: https://stackoverflow.com/a/39225272/5040208
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id , 'confirm': 1 }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)

def get_confirm_token(response):
    # link: https://stackoverflow.com/a/39225272/5040208
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    # link: https://stackoverflow.com/a/39225272/5040208
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

def check_bounds(bound,  bound_name, num_features):
    if isinstance(bound, (float, int)):
        assert bound >= 0 if bound_name == "ub" else bound <= 0, f"{bound_name} needs to be >= 0" if bound_name == "ub" else f"{bound_name} needs to be <= 0"
    elif isinstance(bound, list):
        bound = np.asarray(bound)
        assert len(bound) == num_features, f"{bound_name}s for the features need to have the same length as the number of features"
        assert np.all(bound >= 0 if bound_name == "ub" else bound <= 0), f"all of {bound_name}s needs to be >= 0" if bound_name == "ub" else f"all of {bound_name}s needs to be <= 0"
    else:
        raise ValueError(f"{bound_name} needs to be a float, int, or list")
    
    return bound