File: test_3088_select_columns_supports_literal_dots.py

package info (click to toggle)
python-awkward 2.8.9-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 24,932 kB
  • sloc: python: 178,875; cpp: 33,828; sh: 432; makefile: 21; javascript: 8
file content (75 lines) | stat: -rw-r--r-- 2,350 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE

from __future__ import annotations

import os

import pytest

import awkward as ak


def array_with_dotted_fields():
    return ak.Array(
        [
            {
                "x": [
                    {
                        "y": {
                            "z": [1, 2, 3],
                            "w.1": 4,
                        }
                    }
                ]
            }
        ]
    )


def test_alternative_specifiers():
    array = array_with_dotted_fields()
    form = array.layout.form
    assert form.select_columns("*") == form
    assert form.select_columns([("x", "y", "w.1")]) == form.select_columns("x.y.w*")
    assert form.select_columns([["x", "y", "w.1"], "x.y.z"]) == form


def test_columns_with_dots_from_parquet(tmp_path):
    _pq = pytest.importorskip("pyarrow.parquet")
    array = array_with_dotted_fields()
    parquet_file = os.path.join(tmp_path, "test_3088_array1.parquet")
    ak.to_parquet(array, parquet_file)
    array_selected = ak.from_parquet(parquet_file, columns=[("x", "y", "w.1")])
    assert array_selected.to_list() == [
        {
            "x": [
                {
                    "y": {
                        #  "z": [1, 2, 3],  Excluded
                        "w.1": 4,  # Selected
                    }
                }
            ]
        }
    ]

    ambig_array = ak.Array(
        [
            {
                "crazy": {
                    "dot": [11, 12, 13],
                },
                "crazy.dot": [21, 22, 23],
            }
        ]
    )
    parquet_file_ambig = os.path.join(tmp_path, "test_3088_array_ambig.parquet")
    ak.to_parquet(ambig_array, parquet_file_ambig)
    ambig_selected = ak.from_parquet(parquet_file_ambig, columns=[("crazy.dot",)])
    # Note: Currently, pyarrow.parquet cannot distinguish dots as separators
    # from dots as field names. It builds a dict of all possible indices,
    # and returns those. Even so, we still need the ability within Awkward to
    # disambiguate these two, which we now have. We would need further
    # feature work to create column name substitutions to work around this pyarrow
    # limitation should this be justified.
    assert ak.array_equal(ambig_selected, ambig_array)  # Slurped everything.