File: test_generic.py

package info (click to toggle)
python-clickhouse-driver 0.2.5-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,516 kB
  • sloc: python: 10,950; pascal: 42; makefile: 31; sh: 3
file content (197 lines) | stat: -rw-r--r-- 6,462 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import types

try:
    import numpy as np
    import pandas as pd
except ImportError:
    np = None
    pd = None

from tests.testcase import BaseTestCase
from tests.numpy.testcase import NumpyBaseTestCase


class GenericTestCase(NumpyBaseTestCase):
    n = 10

    def test_columnar(self):
        rv = self.client.execute(
            'SELECT number FROM numbers({})'.format(self.n), columnar=True
        )

        self.assertEqual(len(rv), 1)
        self.assertIsInstance(rv[0], (np.ndarray, ))

    def test_rowwise(self):
        rv = self.client.execute(
            'SELECT number FROM numbers({})'.format(self.n)
        )

        self.assertEqual(len(rv), self.n)
        self.assertIsInstance(rv[0], (np.ndarray, ))

    def test_insert_not_supported(self):
        data = [np.array(range(self.n))]

        with self.create_table('a Int32'):
            with self.assertRaises(ValueError) as e:
                self.client.execute(
                    'INSERT INTO test (a) VALUES', data
                )

            self.assertEqual(
                'NumPy inserts is only allowed with columnar=True',
                str(e.exception)
            )

    def test_with_column_types(self):
        rv = self.client.execute(
            'SELECT CAST(2 AS Int32) AS x', with_column_types=True
        )

        self.assertEqual(rv, ([(2, )], [('x', 'Int32')]))


class NumpyProgressTestCase(NumpyBaseTestCase):
    def test_select_with_progress(self):
        progress = self.client.execute_with_progress('SELECT 2')
        self.assertEqual(
            list(progress),
            [(1, 0), (1, 0)] if self.server_version > (20,) else [(1, 0)]
        )
        self.assertEqual(progress.get_result(), [(2,)])
        self.assertTrue(self.client.connection.connected)

    def test_select_with_progress_no_progress_obtaining(self):
        progress = self.client.execute_with_progress('SELECT 2')
        self.assertEqual(progress.get_result(), [(2,)])


class NumpyIteratorTestCase(NumpyBaseTestCase):
    def test_select_with_iter(self):
        result = self.client.execute_iter(
            'SELECT number FROM system.numbers LIMIT 10'
        )
        self.assertIsInstance(result, types.GeneratorType)

        self.assertEqual(list(result), list(zip(range(10))))
        self.assertEqual(list(result), [])

    def test_select_with_iter_with_column_types(self):
        result = self.client.execute_iter(
            'SELECT CAST(number AS UInt32) as number '
            'FROM system.numbers LIMIT 10',
            with_column_types=True
        )
        self.assertIsInstance(result, types.GeneratorType)

        self.assertEqual(
            list(result),
            [[('number', 'UInt32')]] + list(zip(range(10)))
        )
        self.assertEqual(list(result), [])


class DataFrameTestCase(NumpyBaseTestCase):
    def test_query_simple(self):
        df = self.client.query_dataframe(
            'SELECT CAST(number AS Int64) AS x FROM system.numbers LIMIT 100'
        )

        self.assertTrue(df.equals(pd.DataFrame({'x': range(100)})))

    def test_query_replace_whitespace_in_column_names(self):
        df = self.client.query_dataframe(
            'SELECT number AS "test me" FROM system.numbers LIMIT 100'
        )

        self.assertIn('test_me', df)

    def test_insert_simple(self):
        n = 10
        df = pd.DataFrame({
            'a': range(n),
            'b': [float(x) for x in range(n)]
        })

        with self.create_table('a Int64, b Float64'):
            rv = self.client.insert_dataframe('INSERT INTO test VALUES', df)
            self.assertEqual(rv, n)
            df2 = self.client.query_dataframe('SELECT * FROM test ORDER BY a')
            self.assertTrue(df.equals(df2))

    def test_insert_chunking(self):
        with self.create_table('a Int64'):
            rv = self.client.execute(
                'INSERT INTO test VALUES', [np.array(range(3))], columnar=True,
                settings={'insert_block_size': 1}
            )
            self.assertEqual(rv, 3)

    def test_insert_not_ordered_columns(self):
        n = 10
        df = pd.DataFrame({
            'b': range(n),
            'a': [str(x) for x in range(n)]
        })[['b', 'a']]

        with self.create_table('a String, b Float64'):
            rv = self.client.insert_dataframe(
                'INSERT INTO test (a, b) VALUES', df
            )
            self.assertEqual(rv, n)

    def test_empty_frame_shape(self):
        df = self.client.query_dataframe(
            'SELECT number AS a, number AS a FROM system.numbers LIMIT 0'
        )

        self.assertEqual(df.shape, (0, 2))

    def test_data_less_columns_than_expected(self):
        with self.create_table('a Int8, b Int8'):
            with self.assertRaises(ValueError) as e:
                df = pd.DataFrame([1, 2, 3], columns=['a'])
                self.client.insert_dataframe('INSERT INTO test VALUES', df)
            expected = "DataFrame missing required columns: ['b']"
            self.assertEqual(str(e.exception), expected)

    def test_data_different_columns_than_expected(self):
        with self.create_table('a Int8, b Int8'):
            with self.assertRaises(ValueError) as e:
                df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'c'])
                self.client.insert_dataframe('INSERT INTO test VALUES', df)
            expected = "DataFrame missing required columns: ['b']"
            self.assertEqual(str(e.exception), expected)


class NoNumPyTestCase(BaseTestCase):
    def setUp(self):
        super(NoNumPyTestCase, self).setUp()

        try:
            import numpy  # noqa: F401
            import pandas  # noqa: F401
        except Exception:
            pass

        else:
            self.skipTest('NumPy extras are installed')

    def test_runtime_error_without_numpy(self):
        with self.assertRaises(RuntimeError) as e:
            with self.created_client(settings={'use_numpy': True}) as client:
                client.execute('SELECT 1')

        self.assertEqual(
            'Extras for NumPy must be installed', str(e.exception)
        )

    def test_query_dataframe(self):
        with self.assertRaises(RuntimeError) as e:
            with self.created_client(settings={'use_numpy': True}) as client:
                client.query_dataframe('SELECT 1 AS x')

        self.assertEqual(
            'Extras for NumPy must be installed', str(e.exception)
        )