1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
|
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring
import unittest
import numpy as np
from Orange.data import Table
from Orange.evaluation import CrossValidation, CA, RMSE
from Orange.classification import RandomForestLearner
from Orange.regression import RandomForestRegressionLearner
from Orange.tests import test_filename
class RandomForestTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.iris = Table('iris')
cls.housing = Table('housing')
def test_RandomForest(self):
forest = RandomForestLearner()
cv = CrossValidation(k=10)
results = cv(self.iris, [forest])
ca = CA(results)
self.assertGreater(ca, 0.9)
self.assertLess(ca, 0.99)
def test_predict_single_instance(self):
forest = RandomForestLearner()
c = forest(self.iris)
for ins in self.iris:
c(ins)
val, prob = c(ins, c.ValueProbs)
def test_predict_table(self):
forest = RandomForestLearner()
c = forest(self.iris)
c(self.iris)
vals, probs = c(self.iris, c.ValueProbs)
def test_predict_numpy(self):
forest = RandomForestLearner()
c = forest(self.iris)
c(self.iris.X)
vals, probs = c(self.iris.X, c.ValueProbs)
def test_RandomForestRegression(self):
forest = RandomForestRegressionLearner()
cv = CrossValidation(k=10)
results = cv(self.housing, [forest])
_ = RMSE(results)
def test_predict_single_instance_reg(self):
forest = RandomForestRegressionLearner()
model = forest(self.housing)
for ins in self.housing:
pred = model(ins)
self.assertGreater(pred, 0)
def test_predict_table_reg(self):
forest = RandomForestRegressionLearner()
model = forest(self.housing)
pred = model(self.housing)
self.assertEqual(len(self.housing), len(pred))
self.assertGreater(all(pred), 0)
def test_predict_numpy_reg(self):
forest = RandomForestRegressionLearner()
model = forest(self.housing)
pred = model(self.housing.X)
self.assertEqual(len(self.housing), len(pred))
self.assertGreater(all(pred), 0)
def test_classification_scorer(self):
learner = RandomForestLearner()
scores = learner.score_data(self.iris)
self.assertEqual(scores.shape[1], len(self.iris.domain.attributes))
self.assertNotEqual(sum(scores[0]), 0)
self.assertEqual(['petal length', 'petal width'],
sorted([self.iris.domain.attributes[i].name
for i in np.argsort(scores[0])[-2:]]))
def test_regression_scorer(self):
learner = RandomForestRegressionLearner()
scores = learner.score_data(self.housing)
self.assertEqual(['LSTAT', 'RM'],
sorted([self.housing.domain.attributes[i].name
for i in np.argsort(scores[0])[-2:]]))
def test_scorer_feature(self):
np.random.seed(42)
data = Table(test_filename('datasets/test4.tab'))
learner = RandomForestLearner()
scores = learner.score_data(data)
for i, attr in enumerate(data.domain.attributes):
np.random.seed(42)
score = learner.score_data(data, attr)
np.testing.assert_array_almost_equal(score, scores[:, i])
def test_get_classification_trees(self):
n = 5
forest = RandomForestLearner(n_estimators=n)
model = forest(self.iris)
self.assertEqual(len(model.trees), n)
tree = model.trees[0]
self.assertEqual(tree(self.iris[0]), 0)
def test_get_regression_trees(self):
n = 5
forest = RandomForestRegressionLearner(n_estimators=n)
model = forest(self.housing)
self.assertEqual(len(model.trees), n)
tree = model.trees[0]
tree(self.housing[0])
def test_max_features_cls(self):
data = Table("heart_disease")
forest_1 = RandomForestLearner(random_state=0, max_features=1)
model_1 = forest_1(data[1:])
forest_2 = RandomForestLearner(random_state=0, max_features=1.)
model_2 = forest_2(data[1:])
diff = np.sum(np.abs(model_1(data[:1], ret=model_2.Probs) -
model_2(data[:1], ret=model_2.Probs)))
self.assertGreaterEqual(diff, 0.2)
def test_max_features_reg(self):
data = self.housing
forest_1 = RandomForestRegressionLearner(random_state=0, max_features=1)
model_1 = forest_1(data[2:])
forest_2 = RandomForestRegressionLearner(random_state=0, max_features=1.)
model_2 = forest_2(data[2:])
self.assertNotEqual(model_1(data[:2]).tolist(),
model_2(data[:2]).tolist())
def test_supports_weights(self):
self.assertTrue(RandomForestRegressionLearner().supports_weights)
self.assertTrue(RandomForestLearner().supports_weights)
if __name__ == "__main__":
unittest.main()
|