File: test_merge.py

package info (click to toggle)
stactools 0.5.3-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 23,796 kB
  • sloc: python: 4,498; xml: 554; sh: 395; makefile: 34
file content (161 lines) | stat: -rw-r--r-- 4,846 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import os
from pathlib import Path
from typing import List

import pystac
import pytest
from click.testing import CliRunner
from stactools.cli.cli import cli
from stactools.core import move_all_assets

from tests import test_data


@pytest.fixture(scope="function")
def two_planet_disaster_subsets(tmp_path: Path):
    """Fixture that makes two copies of subset of the planet
    disaster data, each containing a single item. Updates the collection
    extents to match the single items.

    Returns a list of collection paths in the temporary directory.
    """
    item_ids = ["20170831_172754_101c", "20170831_162740_ssc1d1"]
    new_cols = []
    for item_id in item_ids:
        col = pystac.Collection.from_file(
            test_data.get_path("data-files/planet-disaster/collection.json")
        )
        for item in list(col.get_items(recursive=True)):
            if item.id != item_id:
                item.get_parent().remove_item(item.id)
        col.update_extent_from_items()
        col.normalize_hrefs(str(tmp_path / item_id))
        col.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)
        move_all_assets(col, copy=True)
        col.save()
        new_cols.append(col.get_self_href())

    return new_cols


def test_merge_moves_assets(two_planet_disaster_subsets: List[str]):
    col_paths = two_planet_disaster_subsets

    runner = CliRunner()
    result = runner.invoke(cli, ["merge", "-a", col_paths[0], col_paths[1]])
    assert result.exit_code == 0

    target_col = pystac.read_file(col_paths[1])

    items = list(target_col.get_items(recursive=True))
    assert len(items) == 2

    for item in items:
        for asset in item.assets.values():
            assert os.path.dirname(asset.get_absolute_href()) == os.path.dirname(
                item.get_self_href()
            )


def test_merge_as_child(two_planet_disaster_subsets: List[str]):
    col_paths = two_planet_disaster_subsets

    runner = CliRunner()
    result = runner.invoke(cli, ["merge", "-a", "-c", col_paths[0], col_paths[1]])
    assert result.exit_code == 0

    target_col = pystac.read_file(col_paths[1])

    links = list(target_col.get_child_links())
    assert len(links) == 2
    for child in links:
        assert os.path.exists(child.get_absolute_href())


def test_merge_updates_collection_extent(two_planet_disaster_subsets: List[str]):
    col_paths = two_planet_disaster_subsets
    extent1 = pystac.read_file(col_paths[0]).extent
    extent2 = pystac.read_file(col_paths[1]).extent

    xmin = min([extent1.spatial.bboxes[0][0], extent2.spatial.bboxes[0][0]])
    time_max = max(
        [
            extent1.temporal.intervals[0][1],
            extent2.temporal.intervals[0][1],
        ]
    )

    runner = CliRunner()
    result = runner.invoke(cli, ["merge", col_paths[0], col_paths[1]])
    assert result.exit_code == 0

    result_extent = pystac.read_file(col_paths[1]).extent

    def set_of_values(x):
        result = set([])
        if type(x) is dict:
            result |= set_of_values(list(x.values()))
        elif type(x) is list:
            for e in x:
                if type(e) is list:
                    result |= set_of_values(e)
                else:
                    result.add(e)
        return result

    # Make sure it didn't just carry forward the old extent
    assert set_of_values(result_extent.spatial.bboxes) != set_of_values(
        extent2.spatial.bboxes
    )

    assert set_of_values(result_extent.temporal.intervals) != set_of_values(
        extent2.temporal.intervals
    )

    assert result_extent.spatial.bboxes[0][0] == xmin
    assert result_extent.temporal.intervals[0][1] == time_max


def test_merges_assets(tmp_path: Path, tmp_planet_disaster: pystac.Collection):
    col0 = tmp_planet_disaster
    item_id = "2017831_195552_SS02"

    item = col0.get_item(item_id, recursive=True)

    new_col1 = col0.clone()
    new_col1.clear_children()

    item1 = item.clone()
    del item1.assets["visual"]
    new_col1.add_item(item1)

    new_col1.normalize_hrefs(str(tmp_path / "a"))
    new_col1.save()

    new_col2 = col0.clone()
    new_col2.clear_children()

    item2 = item.clone()
    del item2.assets["full-jpg"]
    new_col2.add_item(item2)

    new_col2.normalize_hrefs(str(tmp_path / "b"))
    new_col2.save()

    runner = CliRunner()
    result = runner.invoke(
        cli,
        [
            "merge",
            str(tmp_path / "a" / "collection.json"),
            str(tmp_path / "b" / "collection.json"),
            "--move-assets",
            "--ignore-conflicts",
        ],
    )
    assert result.exit_code == 0

    target_col = pystac.read_file(str(tmp_path / "b" / "collection.json"))
    result_item = target_col.get_item(item_id, recursive=True)
    assert "visual" in result_item.assets
    assert "full-jpg" in result_item.assets