1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
|
/***********************************************************************
* pc_dimstats.c
*
* Support for "dimensional compression", which is a catch-all
* term for applying compression separately on each dimension
* of a PCPATCH collection of PCPOINTS.
*
* Depending on the character of the data, one of these schemes
* will be used:
*
* - run-length encoding
* - significant-bit removal
* - deflate
*
* PgSQL Pointcloud is free and open source software provided
* by the Government of Canada
* Copyright (c) 2013 Natural Resources Canada
*
***********************************************************************/
#include "pc_api_internal.h"
#include "stringbuffer.h"
#include <assert.h>
#include <stdarg.h>
PCDIMSTATS *pc_dimstats_make(const PCSCHEMA *schema)
{
PCDIMSTATS *pds;
pds = pcalloc(sizeof(PCDIMSTATS));
pds->ndims = schema->ndims;
pds->stats = pcalloc(pds->ndims * sizeof(PCDIMSTAT));
return pds;
}
void pc_dimstats_free(PCDIMSTATS *pds)
{
if (pds->stats)
pcfree(pds->stats);
pcfree(pds);
}
/*
typedef struct
{
uint32_t total_runs;
uint32_t total_commonbits;
uint32_t recommended_compression;
} PCDIMSTAT;
typedef struct
{
int32_t ndims;
uint32_t total_points;
uint32_t total_patches;
PCDIMSTAT *stats;
} PCDIMSTATS;
*/
char *pc_dimstats_to_string(const PCDIMSTATS *pds)
{
int i;
stringbuffer_t *sb = stringbuffer_create();
char *str;
stringbuffer_aprintf(
sb, "{\"ndims\":%d,\"total_points\":%d,\"total_patches\":%d,\"dims\":[",
pds->ndims, pds->total_points, pds->total_patches);
for (i = 0; i < pds->ndims; i++)
{
if (i)
stringbuffer_append(sb, ",");
stringbuffer_aprintf(sb,
"{\"total_runs\":%d,\"total_commonbits\":%d,"
"\"recommended_compression\":%d}",
pds->stats[i].total_runs,
pds->stats[i].total_commonbits,
pds->stats[i].recommended_compression);
}
stringbuffer_append(sb, "]}");
str = stringbuffer_getstringcopy(sb);
stringbuffer_destroy(sb);
return str;
}
int pc_dimstats_update(PCDIMSTATS *pds, const PCPATCH_DIMENSIONAL *pdl)
{
int i;
const PCSCHEMA *schema = pdl->schema;
/* Update global stats */
pds->total_points += pdl->npoints;
pds->total_patches += 1;
/* Update dimensional stats */
for (i = 0; i < pds->ndims; i++)
{
PCBYTES pcb = pdl->bytes[i];
pds->stats[i].total_runs += pc_bytes_run_count(&pcb);
pds->stats[i].total_commonbits += pc_bytes_sigbits_count(&pcb);
}
/* Update recommended compression schema */
for (i = 0; i < pds->ndims; i++)
{
PCDIMENSION *dim = pc_schema_get_dimension(schema, i);
/* Uncompressed size, foreach point, one value entry */
double raw_size = pds->total_points * dim->size;
/* RLE size, for each run, one count byte and one value entry */
double rle_size = pds->stats[i].total_runs * (dim->size + 1);
/* Sigbits size, for each patch, one header and n bits for each entry */
double avg_commonbits_per_patch =
pds->stats[i].total_commonbits / pds->total_patches;
double avg_uniquebits_per_patch = 8 * dim->size - avg_commonbits_per_patch;
double sigbits_size = pds->total_patches * 2 * dim->size +
pds->total_points * avg_uniquebits_per_patch / 8;
/* Default to ZLib */
pds->stats[i].recommended_compression = PC_DIM_ZLIB;
/* Only use rle and sigbits compression on integer values */
/* If we can do better than 4:1 we might beat zlib */
if (dim->interpretation != PC_DOUBLE)
{
/* If sigbits is better than 4:1, use that */
if (raw_size / sigbits_size > 1.6)
{
pds->stats[i].recommended_compression = PC_DIM_SIGBITS;
}
/* If RLE size is even better, use that. */
if (raw_size / rle_size > 4.0)
{
pds->stats[i].recommended_compression = PC_DIM_RLE;
}
}
}
return PC_SUCCESS;
}
|