File: gdal2xyz.py

package info (click to toggle)
gdal 3.6.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 89,664 kB
  • sloc: cpp: 1,136,033; ansic: 197,355; python: 35,910; java: 5,511; xml: 4,011; sh: 3,950; cs: 2,443; yacc: 1,047; makefile: 288
file content (399 lines) | stat: -rw-r--r-- 14,544 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
#!/usr/bin/env python3
###############################################################################
# $Id$
#
# Project:  GDAL
# Purpose:  Script to translate GDAL supported raster into XYZ ASCII
#           point stream.
# Author:   Frank Warmerdam, warmerdam@pobox.com
#
###############################################################################
# Copyright (c) 2002, Frank Warmerdam <warmerdam@pobox.com>
# Copyright (c) 2020-2021, Idan Miara <idan@miara.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
###############################################################################
import sys
import textwrap
from numbers import Number, Real
from typing import Optional, Sequence, Tuple, Union

import numpy as np

from osgeo import gdal
from osgeo_utils.auxiliary.base import PathLikeOrStr
from osgeo_utils.auxiliary.gdal_argparse import GDALArgumentParser, GDALScript
from osgeo_utils.auxiliary.numpy_util import GDALTypeCodeAndNumericTypeCodeFromDataSet
from osgeo_utils.auxiliary.progress import (
    OptionalProgressCallback,
    get_progress_callback,
)
from osgeo_utils.auxiliary.util import PathOrDS, get_bands, open_ds


def gdal2xyz(
    srcfile: PathOrDS,
    dstfile: PathLikeOrStr = None,
    srcwin: Optional[Sequence[int]] = None,
    skip: Union[int, Sequence[int]] = 1,
    band_nums: Optional[Sequence[int]] = None,
    delim: str = " ",
    skip_nodata: bool = False,
    src_nodata: Optional[Union[Sequence, Number]] = None,
    dst_nodata: Optional[Union[Sequence, Number]] = None,
    return_np_arrays: bool = False,
    pre_allocate_np_arrays: bool = True,
    progress_callback: OptionalProgressCallback = ...,
) -> Optional[Tuple]:
    """
    translates a raster file (or dataset) into xyz format

    skip - how many rows/cols to skip each iteration
    srcwin (xoff, yoff, xsize, ysize) - Selects a subwindow from the source image for copying based on pixel/line location.
    band_nums - selected input bands to process, None to process all.
    delim - the delimiter to use between values in a line
    skip_nodata - Exclude the output lines with nodata value (as determined by srcnodata)
    src_nodata - The nodata value of the dataset (for skipping or replacing)
        default (`None`) - Use the dataset NoDataValue;
        `Sequence`/`Number` - use the given nodata value (per band or per dataset).
    dst_nodata - Replace source nodata with a given nodata. Has an effect only if not setting `-skipnodata`
        default(`None`) - use srcnodata, no replacement;
        `Sequence`/`Number` - replace the `srcnodata` with the given nodata value (per band or per dataset).
    srcfile - The source dataset filename or dataset object
    dstfile - The output dataset filename; for dstfile=None - if return_np_arrays=False then output will be printed to stdout
    return_np_arrays - return numpy arrays of the result, otherwise returns None
    pre_allocate_np_arrays - pre-allocated result arrays.
        Should be faster unless skip_nodata and the input is very sparse thus most data points will be skipped.
    progress_callback - progress callback function. use None for quiet or Ellipsis for using the default callback
    """

    result = None

    progress_callback = get_progress_callback(progress_callback)

    # Open source file.
    ds = open_ds(srcfile)
    if ds is None:
        raise Exception(f"Could not open {srcfile}.")

    bands = get_bands(ds, band_nums)
    band_count = len(bands)

    gt = ds.GetGeoTransform()

    # Collect information on all the source files.
    if srcwin is None:
        srcwin = (0, 0, ds.RasterXSize, ds.RasterYSize)

    dt, np_dt = GDALTypeCodeAndNumericTypeCodeFromDataSet(ds)

    # Open the output file.
    if dstfile is not None:
        dst_fh = open(dstfile, "wt")
    elif return_np_arrays:
        dst_fh = None
    else:
        dst_fh = sys.stdout

    if dst_fh:
        if dt == gdal.GDT_Int32 or dt == gdal.GDT_UInt32:
            band_format = (("%d" + delim) * len(bands)).rstrip(delim) + "\n"
        else:
            band_format = (("%g" + delim) * len(bands)).rstrip(delim) + "\n"

        # Setup an appropriate print format.
        if (
            abs(gt[0]) < 180
            and abs(gt[3]) < 180
            and abs(ds.RasterXSize * gt[1]) < 180
            and abs(ds.RasterYSize * gt[5]) < 180
        ):
            frmt = "%.10g" + delim + "%.10g" + delim + "%s"
        else:
            frmt = "%.3f" + delim + "%.3f" + delim + "%s"

    if isinstance(src_nodata, Number):
        src_nodata = [src_nodata] * band_count
    elif src_nodata is None:
        src_nodata = list(band.GetNoDataValue() for band in bands)
    if None in src_nodata:
        src_nodata = None
    if src_nodata is not None:
        src_nodata = np.asarray(src_nodata, dtype=np_dt)

    if isinstance(dst_nodata, Number):
        dst_nodata = [dst_nodata] * band_count
    if (dst_nodata is None) or (None in dst_nodata) or (src_nodata is None):
        dst_nodata = None
    if dst_nodata is not None:
        dst_nodata = np.asarray(dst_nodata, dtype=np_dt)

    skip_nodata = skip_nodata and (src_nodata is not None)
    replace_nodata = (not skip_nodata) and (dst_nodata is not None)
    process_nodata = skip_nodata or replace_nodata

    if isinstance(skip, Sequence):
        x_skip, y_skip = skip
    else:
        x_skip = y_skip = skip

    x_off, y_off, x_size, y_size = srcwin
    bands_count = len(bands)

    nXBlocks = (x_size - x_off) // x_skip
    nYBlocks = (y_size - y_off) // y_skip
    progress_end = nXBlocks * nYBlocks
    progress_curr = 0
    progress_prev = -1
    progress_parts = 100

    if return_np_arrays:
        size = progress_end if pre_allocate_np_arrays else 0
        all_geo_x = np.empty(size)
        all_geo_y = np.empty(size)
        all_data = np.empty((size, band_count), dtype=np_dt)

    # Loop emitting data.
    idx = 0
    for y in range(y_off, y_off + y_size, y_skip):

        size = bands_count if pre_allocate_np_arrays else 0
        data = np.empty((size, x_size), dtype=np_dt)  # dims: (bands_count, x_size)
        for i_bnd, band in enumerate(bands):
            band_data = band.ReadAsArray(x_off, y, x_size, 1)  # read one band line
            if pre_allocate_np_arrays:
                data[i_bnd] = band_data[0]
            else:
                data = np.append(data, band_data, axis=0)

        for x_i in range(0, x_size, x_skip):

            progress_curr += 1
            if progress_callback:
                progress_frac = progress_curr / progress_end
                progress = int(progress_frac * progress_parts)
                if progress > progress_prev:
                    progress_prev = progress
                    progress_callback(progress_frac)

            x_i_data = data[:, x_i]  # single pixel, dims: (bands)
            if process_nodata and np.array_equal(src_nodata, x_i_data):
                if skip_nodata:
                    continue
                elif replace_nodata:
                    x_i_data = dst_nodata

            x = x_i + x_off

            geo_x = gt[0] + (x + 0.5) * gt[1] + (y + 0.5) * gt[2]
            geo_y = gt[3] + (x + 0.5) * gt[4] + (y + 0.5) * gt[5]

            if dst_fh:
                band_str = band_format % tuple(x_i_data)
                line = frmt % (float(geo_x), float(geo_y), band_str)
                dst_fh.write(line)
            if return_np_arrays:
                if pre_allocate_np_arrays:
                    all_geo_x[idx] = geo_x
                    all_geo_y[idx] = geo_y
                    all_data[idx] = x_i_data
                else:
                    all_geo_x = np.append(all_geo_x, geo_x)
                    all_geo_y = np.append(all_geo_y, geo_y)
                    all_data = np.append(all_data, [x_i_data], axis=0)
            idx += 1

    if return_np_arrays:
        nodata = None if skip_nodata else dst_nodata if replace_nodata else src_nodata
        if idx != progress_curr:
            all_geo_x = all_geo_x[:idx]
            all_geo_y = all_geo_y[:idx]
            all_data = all_data[:idx, :]
        result = all_geo_x, all_geo_y, all_data.transpose(), nodata

    return result


class GDAL2XYZ(GDALScript):
    def __init__(self):
        super().__init__()
        self.title = "Translates a raster file into xyz format"
        self.description = textwrap.dedent(
            """\
            The gdal2xyz utility can be used to translate a raster file into xyz format.
            It can be used as an alternative to gdal_translate of=xyz,
            But supporting other options, for example:
            * Select more then one band;
            * Skip or replace nodata value;
            * Return the output as numpy arrays."""
        )

    def get_parser(self, argv) -> GDALArgumentParser:
        parser = self.parser

        parser.add_argument(
            "-skip",
            dest="skip",
            action="store_true",
            default=1,
            help="How many rows/cols to skip in each iteration.",
        )

        parser.add_argument(
            "-srcwin",
            metavar=("xoff", "yoff", "xsize", "ysize"),
            dest="srcwin",
            type=float,
            nargs=4,
            help="Selects a subwindow from the source image for copying based on pixel/line location",
        )

        parser.add_argument(
            "-b",
            "-band",
            "--band",
            dest="band_nums",
            metavar="band",
            type=int,
            nargs="+",
            help="Select bands from the input spectral bands for output. "
            "Bands are numbered from 1 in the order spectral bands are specified. "
            "Multiple -b switches may be used. When no -b switch is used, the first band will be used."
            "In order to use all input bands set -allbands or -b 0..",
        )

        parser.add_argument(
            "-allbands",
            "--allbands",
            dest="allbands",
            action="store_true",
            help="Select all input bands.",
        )

        parser.add_argument(
            "-csv",
            dest="delim",
            const=",",
            default=" ",
            action="store_const",
            help="Use comma instead of space as a delimiter.",
        )

        parser.add_argument(
            "-skipnodata",
            "--skipnodata",
            "-skip_nodata",
            dest="skip_nodata",
            action="store_true",
            help="Exclude the output lines with nodata value (as determined by srcnodata).",
        )

        parser.add_argument(
            "-srcnodata",
            "-nodatavalue",
            dest="src_nodata",
            type=Real,
            nargs="*",
            help="The nodata value of the dataset (for skipping or replacing) "
            "Default (None) - Use the dataset nodata value; "
            "Sequence/Number - Use the given nodata value (per band or per dataset).",
        )

        parser.add_argument(
            "-dstnodata",
            dest="dst_nodata",
            type=Real,
            nargs="*",
            help="Replace source nodata with a given nodata. "
            "Has an effect only if not setting -skipnodata. "
            "Default(None) - Use srcnodata, no replacement; "
            "Sequence/Number - Replace the srcnodata with the given nodata value "
            "(per band or per dataset).",
        )

        parser.add_argument(
            "srcfile",
            metavar="src_dataset",
            type=str,
            help="The source dataset name. It can be either file name, "
            "URL of data source or subdataset name for multi-dataset files.",
        )

        parser.add_argument(
            "dstfile",
            metavar="dst_dataset",
            type=str,
            help="The destination file name.",
        )

        return parser

    def parse(self, argv) -> dict:
        def checkInt(s):
            try:
                int(s)
                return True
            except ValueError:
                return False

        # We allowed "-b band_number_1 [... band_number_X] srcfile dstfile" syntax
        # before switching to ArgParse. But ArgParse doesn't like that.
        # So manually parse -b argument and strip it before ArgParse.
        count = len(argv)
        new_argv = []
        i = 0
        band_nums = []
        while i < count:
            arg = argv[i]
            if arg in ("-b", "-band", "--band"):
                if i == count - 1:
                    raise Exception(f"Missing argument following {arg}: ")
                i += 1
                if not checkInt(argv[i]):
                    raise Exception(f"Argument following {arg} should be a integer")
                while i < count and checkInt(argv[i]):
                    band_nums.append(int(argv[i]))
                    i += 1
            else:
                i += 1
                new_argv.append(arg)

        kwargs = super(GDAL2XYZ, self).parse(new_argv)
        if band_nums:
            kwargs["band_nums"] = band_nums
        return kwargs

    def augment_kwargs(self, kwargs) -> dict:
        if kwargs.get("allbands"):
            kwargs["band_nums"] = None
        elif not kwargs.get("band_nums"):
            kwargs["band_nums"] = 1
        del kwargs["allbands"]
        return kwargs

    def doit(self, **kwargs):
        return gdal2xyz(**kwargs)


def main(argv=sys.argv):
    return GDAL2XYZ().main(argv)


if __name__ == "__main__":
    sys.exit(main(sys.argv))