File: test_helpers.py

package info (click to toggle)
azure-kusto-python 4.1.1-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 1,184 kB
  • sloc: python: 9,329; sh: 13; makefile: 3
file content (76 lines) | stat: -rw-r--r-- 3,340 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License

import json
import os
import pytest

from azure.kusto.data.helpers import dataframe_from_result_table
from azure.kusto.data.response import KustoResponseDataSetV2


PANDAS = False
try:
    import pandas
    import numpy

    PANDAS = True
except:
    pass


class TestDataFrameFromResultsTable:
    """Tests the dataframe_from_result_table helper function"""

    @pytest.mark.skipif(not PANDAS, reason="requires pandas")
    def test_dataframe_from_result_table(self):
        """Test conversion of KustoResultTable to pandas.DataFrame, including fixes for certain column types"""

        with open(os.path.join(os.path.dirname(__file__), "input", "dataframe.json"), "r") as response_file:
            data = response_file.read()

        response = KustoResponseDataSetV2(json.loads(data))
        df = dataframe_from_result_table(response.primary_results[0])

        assert df.iloc[0].RecordName == "now"
        assert type(df.iloc[0].RecordTime) is pandas._libs.tslibs.timestamps.Timestamp
        assert all(getattr(df.iloc[0].RecordTime, k) == v for k, v in {"year": 2021, "month": 12, "day": 22, "hour": 11, "minute": 43, "second": 00}.items())
        assert type(df.iloc[0].RecordBool) is numpy.bool_
        assert df.iloc[0].RecordBool == True
        assert type(df.iloc[0].RecordInt) is numpy.int32
        assert df.iloc[0].RecordInt == 5678
        assert type(df.iloc[0].RecordReal) is numpy.float64
        assert df.iloc[0].RecordReal == 3.14159

        # Kusto datetime(0000-01-01T00:00:00Z), which Pandas can't represent.
        assert df.iloc[1].RecordName == "earliest datetime"
        assert type(df.iloc[1].RecordTime) is pandas._libs.tslibs.nattype.NaTType
        assert pandas.isnull(df.iloc[1].RecordReal)

        # Kusto datetime(9999-12-31T23:59:59Z), which Pandas can't represent.
        assert df.iloc[2].RecordName == "latest datetime"
        assert type(df.iloc[2].RecordTime) is pandas._libs.tslibs.nattype.NaTType
        assert type(df.iloc[2].RecordReal) is numpy.float64
        assert df.iloc[2].RecordReal == numpy.inf

        # Pandas earliest datetime
        assert df.iloc[3].RecordName == "earliest pandas datetime"
        assert type(df.iloc[3].RecordTime) is pandas._libs.tslibs.timestamps.Timestamp
        assert type(df.iloc[3].RecordReal) is numpy.float64
        assert df.iloc[3].RecordReal == -numpy.inf

        # Pandas latest datetime
        assert df.iloc[4].RecordName == "latest pandas datetime"
        assert type(df.iloc[4].RecordTime) is pandas._libs.tslibs.timestamps.Timestamp

        # Kusto 600000000 ticks timedelta
        assert df.iloc[5].RecordName == "timedelta ticks"
        assert type(df.iloc[5].RecordTime) is pandas._libs.tslibs.timestamps.Timestamp
        assert type(df.iloc[5].RecordOffset) is pandas._libs.tslibs.timestamps.Timedelta
        assert df.iloc[5].RecordOffset == pandas.to_timedelta("00:01:00")

        # Kusto timedelta(1.01:01:01.0) ==
        assert df.iloc[6].RecordName == "timedelta string"
        assert type(df.iloc[6].RecordTime) is pandas._libs.tslibs.timestamps.Timestamp
        assert type(df.iloc[6].RecordOffset) is pandas._libs.tslibs.timestamps.Timedelta
        assert df.iloc[6].RecordOffset == pandas.to_timedelta("1 days 01:01:01")