1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
|
import numpy as np
# Y_DYTPE is the dtype to which the targets y are converted to. This is also
# dtype for leaf values, gains, and sums of gradients / hessians. The gradients
# and hessians arrays are stored as floats to avoid using too much memory.
Y_DTYPE = np.float64
X_DTYPE = np.float64
X_BINNED_DTYPE = np.uint8 # hence max_bins == 256
# dtype for gradients and hessians arrays
G_H_DTYPE = np.float32
X_BITSET_INNER_DTYPE = np.uint32
HISTOGRAM_DTYPE = np.dtype([
('sum_gradients', Y_DTYPE), # sum of sample gradients in bin
('sum_hessians', Y_DTYPE), # sum of sample hessians in bin
('count', np.uint32), # number of samples in bin
])
PREDICTOR_RECORD_DTYPE = np.dtype([
('value', Y_DTYPE),
('count', np.uint32),
('feature_idx', np.uint32),
('num_threshold', X_DTYPE),
('missing_go_to_left', np.uint8),
('left', np.uint32),
('right', np.uint32),
('gain', Y_DTYPE),
('depth', np.uint32),
('is_leaf', np.uint8),
('bin_threshold', X_BINNED_DTYPE),
('is_categorical', np.uint8),
# The index of the corresponding bitsets in the Predictor's bitset arrays.
# Only used if is_categorical is True
('bitset_idx', np.uint32)
])
ALMOST_INF = 1e300 # see LightGBM AvoidInf()
|