File: polygonize.cpp

package info (click to toggle)
gdal 3.12.2%2Bdfsg-1
links: PTS, VCS
area: main
in suites: sid
size: 92,396 kB
sloc: cpp: 1,224,305; ansic: 206,456; python: 26,284; java: 6,001; xml: 4,769; sh: 3,869; cs: 2,513; yacc: 1,306; makefile: 214
file content (622 lines) | stat: -rw-r--r-- 25,472 bytes
parent folder | download | duplicates (2)
/******************************************************************************
 * Project:  GDAL
 * Purpose:  Raster to Polygon Converter
 * Author:   Frank Warmerdam, warmerdam@pobox.com
 *
 ******************************************************************************
 * Copyright (c) 2008, Frank Warmerdam
 * Copyright (c) 2009-2020, Even Rouault <even dot rouault at spatialys.com>
 *
 * SPDX-License-Identifier: MIT
 ****************************************************************************/

#include "cpl_port.h"
#include "gdal_alg.h"

#include <stddef.h>
#include <stdio.h>
#include <cstdlib>
#include <string.h>

#include <algorithm>
#include <limits>
#include <map>
#include <memory>
#include <utility>
#include <vector>

#include "gdal_alg_priv.h"
#include "gdal.h"
#include "ogr_api.h"
#include "ogr_core.h"
#include "cpl_conv.h"
#include "cpl_error.h"
#include "cpl_progress.h"
#include "cpl_string.h"
#include "cpl_vsi.h"

#include "polygonize_polygonizer.h"

using namespace gdal::polygonizer;

/************************************************************************/
/*                          GPMaskImageData()                           */
/*                                                                      */
/*      Mask out image pixels to a special nodata value if the mask     */
/*      band is zero.                                                   */
/************************************************************************/

template <class DataType>
static CPLErr GPMaskImageData(GDALRasterBandH hMaskBand, GByte *pabyMaskLine,
                              int iY, int nXSize, DataType *panImageLine)

{
    const CPLErr eErr = GDALRasterIO(hMaskBand, GF_Read, 0, iY, nXSize, 1,
                                     pabyMaskLine, nXSize, 1, GDT_Byte, 0, 0);
    if (eErr != CE_None)
        return eErr;

    for (int i = 0; i < nXSize; i++)
    {
        if (pabyMaskLine[i] == 0)
            panImageLine[i] = GP_NODATA_MARKER;
    }

    return CE_None;
}

/************************************************************************/
/*                           GDALPolygonizeT()                          */
/************************************************************************/

template <class DataType, class EqualityTest>
static CPLErr GDALPolygonizeT(GDALRasterBandH hSrcBand,
                              GDALRasterBandH hMaskBand, OGRLayerH hOutLayer,
                              int iPixValField, CSLConstList papszOptions,
                              GDALProgressFunc pfnProgress, void *pProgressArg,
                              GDALDataType eDT)

{
    VALIDATE_POINTER1(hSrcBand, "GDALPolygonize", CE_Failure);
    VALIDATE_POINTER1(hOutLayer, "GDALPolygonize", CE_Failure);

    if (pfnProgress == nullptr)
        pfnProgress = GDALDummyProgress;

    const int nConnectedness =
        CSLFetchNameValue(papszOptions, "8CONNECTED") ? 8 : 4;

    /* -------------------------------------------------------------------- */
    /*      Confirm our output layer will support feature creation.         */
    /* -------------------------------------------------------------------- */
    if (!OGR_L_TestCapability(hOutLayer, OLCSequentialWrite))
    {
        CPLError(CE_Failure, CPLE_AppDefined,
                 "Output feature layer does not appear to support creation "
                 "of features in GDALPolygonize().");
        return CE_Failure;
    }

    /* -------------------------------------------------------------------- */
    /*      Allocate working buffers.                                       */
    /* -------------------------------------------------------------------- */
    const int nXSize = GDALGetRasterBandXSize(hSrcBand);
    const int nYSize = GDALGetRasterBandYSize(hSrcBand);
    if (nXSize > std::numeric_limits<int>::max() - 2)
    {
        CPLError(CE_Failure, CPLE_AppDefined, "Too wide raster");
        return CE_Failure;
    }

    DataType *panLastLineVal =
        static_cast<DataType *>(VSI_MALLOC2_VERBOSE(sizeof(DataType), nXSize));
    DataType *panThisLineVal =
        static_cast<DataType *>(VSI_MALLOC2_VERBOSE(sizeof(DataType), nXSize));
    GInt32 *panLastLineId =
        static_cast<GInt32 *>(VSI_MALLOC2_VERBOSE(sizeof(GInt32), nXSize));
    GInt32 *panThisLineId =
        static_cast<GInt32 *>(VSI_MALLOC2_VERBOSE(sizeof(GInt32), nXSize));

    GByte *pabyMaskLine = static_cast<GByte *>(VSI_MALLOC_VERBOSE(nXSize));

    if (panLastLineVal == nullptr || panThisLineVal == nullptr ||
        panLastLineId == nullptr || panThisLineId == nullptr ||
        pabyMaskLine == nullptr)
    {
        CPLFree(panThisLineId);
        CPLFree(panLastLineId);
        CPLFree(panThisLineVal);
        CPLFree(panLastLineVal);
        CPLFree(pabyMaskLine);
        return CE_Failure;
    }

    /* -------------------------------------------------------------------- */
    /*      Get the geotransform, if there is one, so we can convert the    */
    /*      vectors into georeferenced coordinates.                         */
    /* -------------------------------------------------------------------- */
    GDALGeoTransform gt;
    bool bGotGeoTransform = false;
    const char *pszDatasetForGeoRef =
        CSLFetchNameValue(papszOptions, "DATASET_FOR_GEOREF");
    if (pszDatasetForGeoRef)
    {
        auto poSrcDS = std::unique_ptr<GDALDataset>(GDALDataset::Open(
            pszDatasetForGeoRef, GDAL_OF_RASTER | GDAL_OF_VERBOSE_ERROR));
        if (poSrcDS)
        {
            bGotGeoTransform = poSrcDS->GetGeoTransform(gt) == CE_None;
        }
    }
    else
    {
        auto poSrcDS = GDALRasterBand::FromHandle(hSrcBand)->GetDataset();
        if (poSrcDS)
        {
            bGotGeoTransform = poSrcDS->GetGeoTransform(gt) == CE_None;
        }
    }
    if (!bGotGeoTransform)
    {
        gt = GDALGeoTransform();
    }

    /* -------------------------------------------------------------------- */
    /*      The first pass over the raster is only used to build up the     */
    /*      polygon id map so we will know in advance what polygons are     */
    /*      what on the second pass.                                        */
    /* -------------------------------------------------------------------- */
    GDALRasterPolygonEnumeratorT<DataType, EqualityTest> oFirstEnum(
        nConnectedness);

    CPLErr eErr = CE_None;

    for (int iY = 0; eErr == CE_None && iY < nYSize; iY++)
    {
        eErr = GDALRasterIO(hSrcBand, GF_Read, 0, iY, nXSize, 1, panThisLineVal,
                            nXSize, 1, eDT, 0, 0);

        if (eErr == CE_None && hMaskBand != nullptr)
            eErr = GPMaskImageData(hMaskBand, pabyMaskLine, iY, nXSize,
                                   panThisLineVal);

        if (eErr != CE_None)
            break;

        if (iY == 0)
            eErr = oFirstEnum.ProcessLine(nullptr, panThisLineVal, nullptr,
                                          panThisLineId, nXSize)
                       ? CE_None
                       : CE_Failure;
        else
            eErr = oFirstEnum.ProcessLine(panLastLineVal, panThisLineVal,
                                          panLastLineId, panThisLineId, nXSize)
                       ? CE_None
                       : CE_Failure;

        if (eErr != CE_None)
            break;

        // Swap lines.
        std::swap(panLastLineVal, panThisLineVal);
        std::swap(panLastLineId, panThisLineId);

        /* --------------------------------------------------------------------
         */
        /*      Report progress, and support interrupts. */
        /* --------------------------------------------------------------------
         */
        if (!pfnProgress(0.10 * ((iY + 1) / static_cast<double>(nYSize)), "",
                         pProgressArg))
        {
            CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
            eErr = CE_Failure;
        }
    }

    /* -------------------------------------------------------------------- */
    /*      Make a pass through the maps, ensuring every polygon id         */
    /*      points to the final id it should use, not an intermediate       */
    /*      value.                                                          */
    /* -------------------------------------------------------------------- */
    if (eErr == CE_None)
        oFirstEnum.CompleteMerges();

    /* -------------------------------------------------------------------- */
    /*      We will use a new enumerator for the second pass primarily      */
    /*      so we can preserve the first pass map.                          */
    /* -------------------------------------------------------------------- */
    GDALRasterPolygonEnumeratorT<DataType, EqualityTest> oSecondEnum(
        nConnectedness);

    OGRPolygonWriter<DataType> oPolygonWriter{
        hOutLayer, iPixValField, gt,
        atoi(CSLFetchNameValueDef(papszOptions, "COMMIT_INTERVAL", "100000"))};
    Polygonizer<GInt32, DataType> oPolygonizer{-1, &oPolygonWriter};
    TwoArm *paoLastLineArm =
        static_cast<TwoArm *>(VSI_CALLOC_VERBOSE(sizeof(TwoArm), nXSize + 2));
    TwoArm *paoThisLineArm =
        static_cast<TwoArm *>(VSI_CALLOC_VERBOSE(sizeof(TwoArm), nXSize + 2));

    if (paoThisLineArm == nullptr || paoLastLineArm == nullptr)
    {
        eErr = CE_Failure;
    }
    else
    {
        for (int i = 0; i < nXSize + 2; ++i)
        {
            paoLastLineArm[i].poPolyInside = oPolygonizer.getTheOuterPolygon();
        }
    }

    /* ==================================================================== */
    /*      Second pass during which we will actually collect polygon       */
    /*      edges as geometries.                                            */
    /* ==================================================================== */
    for (int iY = 0; eErr == CE_None && iY < nYSize + 1; iY++)
    {
        /* --------------------------------------------------------------------
         */
        /*      Read the image data. */
        /* --------------------------------------------------------------------
         */
        if (iY < nYSize)
        {
            eErr = GDALRasterIO(hSrcBand, GF_Read, 0, iY, nXSize, 1,
                                panThisLineVal, nXSize, 1, eDT, 0, 0);
            if (eErr == CE_None && hMaskBand != nullptr)
                eErr = GPMaskImageData(hMaskBand, pabyMaskLine, iY, nXSize,
                                       panThisLineVal);
        }

        if (eErr != CE_None)
            continue;

        /* --------------------------------------------------------------------
         */
        /*      Determine what polygon the various pixels belong to (redoing */
        /*      the same thing done in the first pass above). */
        /* --------------------------------------------------------------------
         */
        if (iY == nYSize)
        {
            for (int iX = 0; iX < nXSize; iX++)
                panThisLineId[iX] =
                    decltype(oPolygonizer)::THE_OUTER_POLYGON_ID;
        }
        else if (iY == 0)
        {
            eErr = oSecondEnum.ProcessLine(nullptr, panThisLineVal, nullptr,
                                           panThisLineId, nXSize)
                       ? CE_None
                       : CE_Failure;
        }
        else
        {
            eErr = oSecondEnum.ProcessLine(panLastLineVal, panThisLineVal,
                                           panLastLineId, panThisLineId, nXSize)
                       ? CE_None
                       : CE_Failure;
        }

        if (eErr != CE_None)
            continue;

        if (iY < nYSize)
        {
            for (int iX = 0; iX < nXSize; iX++)
            {
                // TODO: maybe we can reserve -1 as the lookup result for -1 polygon id in the panPolyIdMap,
                //       so the this expression becomes: panLastLineId[iX] = *(oFirstEnum.panPolyIdMap + panThisLineId[iX]).
                //       This would eliminate the condition checking.
                panLastLineId[iX] =
                    panThisLineId[iX] == -1
                        ? -1
                        : oFirstEnum.panPolyIdMap[panThisLineId[iX]];
            }

            if (!oPolygonizer.processLine(panLastLineId, panLastLineVal,
                                          paoThisLineArm, paoLastLineArm, iY,
                                          nXSize))
            {
                eErr = CE_Failure;
            }
            else
            {
                eErr = oPolygonWriter.getErr();
            }
        }
        else
        {
            if (!oPolygonizer.processLine(panThisLineId, panLastLineVal,
                                          paoThisLineArm, paoLastLineArm, iY,
                                          nXSize))
            {
                eErr = CE_Failure;
            }
            else
            {
                eErr = oPolygonWriter.getErr();
            }
        }

        if (eErr != CE_None)
            continue;

        /* --------------------------------------------------------------------
         */
        /*      Swap pixel value, and polygon id lines to be ready for the */
        /*      next line. */
        /* --------------------------------------------------------------------
         */
        std::swap(panLastLineVal, panThisLineVal);
        std::swap(panLastLineId, panThisLineId);
        std::swap(paoThisLineArm, paoLastLineArm);

        /* --------------------------------------------------------------------
         */
        /*      Report progress, and support interrupts. */
        /* --------------------------------------------------------------------
         */
        if (!pfnProgress(
                std::min(1.0, 0.10 + 0.90 * ((iY + 1) /
                                             static_cast<double>(nYSize))),
                "", pProgressArg))
        {
            CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
            eErr = CE_Failure;
        }
    }

    if (!oPolygonWriter.Finalize())
        eErr = CE_Failure;

    /* -------------------------------------------------------------------- */
    /*      Cleanup                                                         */
    /* -------------------------------------------------------------------- */
    CPLFree(panThisLineId);
    CPLFree(panLastLineId);
    CPLFree(panThisLineVal);
    CPLFree(panLastLineVal);
    CPLFree(paoThisLineArm);
    CPLFree(paoLastLineArm);
    CPLFree(pabyMaskLine);

    return eErr;
}

/******************************************************************************/
/*                       GDALFloatAlmostEquals()                              */
/* Code (originally) from:                                                    */
/* http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm  */
/******************************************************************************/

template <typename FloatType, typename IntType>
static inline bool GDALFloatAlmostEquals(FloatType A, FloatType B,
                                         unsigned maxUlps)
{
    static_assert(sizeof(FloatType) == sizeof(IntType));
    // This function will allow maxUlps-1 floats between A and B.

    // Make sure maxUlps is non-negative and small enough that the default NAN
    // won't compare as equal to anything.
    if (maxUlps >= 4 * 1024 * 1024)
    {
        CPLError(CE_Failure, CPLE_IllegalArg, "Invalid maxUlps");
        return false;
    }

    const auto MapToInteger = [](FloatType x)
    {
        IntType i = 0;
        memcpy(&i, &x, sizeof(i));

        constexpr int NBITS = 8 * static_cast<int>(sizeof(i)) - 1;
        constexpr IntType SHIFT = (static_cast<IntType>(1) << NBITS);

        // Make i lexicographically ordered with negative values
        // remapped to the [0, SHIFT[ range and positive values in the
        // [SHIFT, UINT_MAX[ range
        if ((i >> NBITS) != 0)
            i = SHIFT - (i & ~SHIFT);
        else
            i += SHIFT;
        return i;
    };

    const auto aInt = MapToInteger(A);
    const auto bInt = MapToInteger(B);

    return ((aInt > bInt) ? aInt - bInt : bInt - aInt) <= maxUlps;
}

bool GDALFloatAlmostEquals(float A, float B, unsigned maxUlps)
{
    return GDALFloatAlmostEquals<float, uint32_t>(A, B, maxUlps);
}

/******************************************************************************/
/*                         GDALDoubleAlmostEquals()                           */
/******************************************************************************/

bool GDALDoubleAlmostEquals(double A, double B, unsigned maxUlps)
{
    return GDALFloatAlmostEquals<double, uint64_t>(A, B, maxUlps);
}

/************************************************************************/
/*                           GDALPolygonize()                           */
/************************************************************************/

/**
 * Create polygon coverage from raster data.
 *
 * This function creates vector polygons for all connected regions of pixels in
 * the raster sharing a common pixel value.  Optionally each polygon may be
 * labeled with the pixel value in an attribute.  Optionally a mask band
 * can be provided to determine which pixels are eligible for processing.
 *
 * Note that currently the source pixel band values are read into a
 * signed 64bit integer buffer (Int64), so floating point or complex
 * bands will be implicitly truncated before processing. If you want to use a
 * version using 32bit or 64bit float buffers, see GDALFPolygonize().
 *
 * Polygon features will be created on the output layer, with polygon
 * geometries representing the polygons.  The polygon geometries will be
 * in the georeferenced coordinate system of the image (based on the
 * geotransform of the source dataset).  It is acceptable for the output
 * layer to already have features.  Note that GDALPolygonize() does not
 * set the coordinate system on the output layer.  Application code should
 * do this when the layer is created, presumably matching the raster
 * coordinate system.
 *
 * The algorithm used attempts to minimize memory use so that very large
 * rasters can be processed.  However, if the raster has many polygons
 * or very large/complex polygons, the memory use for holding polygon
 * enumerations and active polygon geometries may grow to be quite large.
 *
 * The algorithm will generally produce very dense polygon geometries, with
 * edges that follow exactly on pixel boundaries for all non-interior pixels.
 * For non-thematic raster data (such as satellite images) the result will
 * essentially be one small polygon per pixel, and memory and output layer
 * sizes will be substantial.  The algorithm is primarily intended for
 * relatively simple thematic imagery, masks, and classification results.
 *
 * @param hSrcBand the source raster band to be processed.
 * @param hMaskBand an optional mask band.  All pixels in the mask band with a
 * value other than zero will be considered suitable for collection as
 * polygons.
 * @param hOutLayer the vector feature layer to which the polygons should
 * be written.
 * @param iPixValField the attribute field index indicating the feature
 * attribute into which the pixel value of the polygon should be written. Or
 * -1 to indicate that the pixel value must not be written.
 * @param papszOptions a name/value list of additional options
 * <ul>
 * <li>8CONNECTED=8: May be set to "8" to use 8 connectedness.
 * Otherwise 4 connectedness will be applied to the algorithm</li>
 * <li>DATASET_FOR_GEOREF=dataset_name: Name of a dataset from which to read
 * the geotransform. This useful if hSrcBand has no related dataset, which is
 * typical for mask bands.</li>
 * <li>COMMIT_INTERVAL=num:
 * (GDAL >= 3.12) Interval in number of features at which transactions must be
 * flushed. A value of 0 means that no transactions are opened.
 * A negative value means a single transaction.
 * The default value is 100000.
 * The function takes care of issuing the starting transaction and committing
 * the final one.
 * </li>
 * </ul>
 * @param pfnProgress callback for reporting algorithm progress matching the
 * GDALProgressFunc() semantics.  May be NULL.
 * @param pProgressArg callback argument passed to pfnProgress.
 *
 * @return CE_None on success or CE_Failure on a failure.
 */

CPLErr CPL_STDCALL GDALPolygonize(GDALRasterBandH hSrcBand,
                                  GDALRasterBandH hMaskBand,
                                  OGRLayerH hOutLayer, int iPixValField,
                                  char **papszOptions,
                                  GDALProgressFunc pfnProgress,
                                  void *pProgressArg)

{
    return GDALPolygonizeT<std::int64_t, IntEqualityTest>(
        hSrcBand, hMaskBand, hOutLayer, iPixValField, papszOptions, pfnProgress,
        pProgressArg, GDT_Int64);
}

/************************************************************************/
/*                           GDALFPolygonize()                           */
/************************************************************************/

/**
 * Create polygon coverage from raster data.
 *
 * This function creates vector polygons for all connected regions of pixels in
 * the raster sharing a common pixel value.  Optionally each polygon may be
 * labeled with the pixel value in an attribute.  Optionally a mask band
 * can be provided to determine which pixels are eligible for processing.
 *
 * The source pixel band values are read into a 32-bit float buffer, or 64-bit
 * float if the source band is 64-bit float itself. If you want
 * to use a (probably faster) version using signed 32bit integer buffer, see
 * GDALPolygonize().
 *
 * Polygon features will be created on the output layer, with polygon
 * geometries representing the polygons.  The polygon geometries will be
 * in the georeferenced coordinate system of the image (based on the
 * geotransform of the source dataset).  It is acceptable for the output
 * layer to already have features.  Note that GDALFPolygonize() does not
 * set the coordinate system on the output layer.  Application code should
 * do this when the layer is created, presumably matching the raster
 * coordinate system.
 *
 * The algorithm used attempts to minimize memory use so that very large
 * rasters can be processed.  However, if the raster has many polygons
 * or very large/complex polygons, the memory use for holding polygon
 * enumerations and active polygon geometries may grow to be quite large.
 *
 * The algorithm will generally produce very dense polygon geometries, with
 * edges that follow exactly on pixel boundaries for all non-interior pixels.
 * For non-thematic raster data (such as satellite images) the result will
 * essentially be one small polygon per pixel, and memory and output layer
 * sizes will be substantial.  The algorithm is primarily intended for
 * relatively simple thematic imagery, masks, and classification results.
 *
 * @param hSrcBand the source raster band to be processed.
 * @param hMaskBand an optional mask band.  All pixels in the mask band with a
 * value other than zero will be considered suitable for collection as
 * polygons.
 * @param hOutLayer the vector feature layer to which the polygons should
 * be written.
 * @param iPixValField the attribute field index indicating the feature
 * attribute into which the pixel value of the polygon should be written. Or
 * -1 to indicate that the pixel value must not be written.
 * @param papszOptions a name/value list of additional options
 * <ul>
 * <li>8CONNECTED=8: May be set to "8" to use 8 connectedness.
 * Otherwise 4 connectedness will be applied to the algorithm</li>
 * <li>DATASET_FOR_GEOREF=dataset_name: Name of a dataset from which to read
 * the geotransform. This useful if hSrcBand has no related dataset, which is
 * typical for mask bands.</li>
 * <li>COMMIT_INTERVAL=num:
 * (GDAL >= 3.12) Interval in number of features at which transactions must be
 * flushed. A value of 0 means that no transactions are opened.
 * A negative value means a single transaction.
 * The default value is 100000.
 * The function takes care of issuing the starting transaction and committing
 * the final one.
 * </li>
 * </ul>
 * @param pfnProgress callback for reporting algorithm progress matching the
 * GDALProgressFunc() semantics.  May be NULL.
 * @param pProgressArg callback argument passed to pfnProgress.
 *
 * @return CE_None on success or CE_Failure on a failure.
 *
 */

CPLErr CPL_STDCALL GDALFPolygonize(GDALRasterBandH hSrcBand,
                                   GDALRasterBandH hMaskBand,
                                   OGRLayerH hOutLayer, int iPixValField,
                                   char **papszOptions,
                                   GDALProgressFunc pfnProgress,
                                   void *pProgressArg)

{
    if (GDALGetRasterDataType(hSrcBand) == GDT_Float64)
    {
        return GDALPolygonizeT<double, DoubleEqualityTest>(
            hSrcBand, hMaskBand, hOutLayer, iPixValField, papszOptions,
            pfnProgress, pProgressArg, GDT_Float64);
    }
    else
    {
        return GDALPolygonizeT<float, FloatEqualityTest>(
            hSrcBand, hMaskBand, hOutLayer, iPixValField, papszOptions,
            pfnProgress, pProgressArg, GDT_Float32);
    }
}