1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
|
-- This file and its contents are licensed under the Apache License 2.0.
-- Please see the included NOTICE for copyright information and
-- LICENSE-APACHE for a copy of the license.
-- This function return a jsonb with the following keys:
-- - columns: an array of column names that shold be used for segment by
-- - confidence: a number between 0 and 10 (most confident) indicating how sure we are.
-- - message: a message that should be displayed to the user to evaluate the result.
CREATE OR REPLACE FUNCTION _timescaledb_functions.get_segmentby_defaults(
relation regclass
)
RETURNS JSONB LANGUAGE PLPGSQL AS
$BODY$
DECLARE
_table_name NAME;
_schema_name NAME;
_hypertable_row _timescaledb_catalog.hypertable;
_segmentby NAME;
_cnt int;
BEGIN
SELECT n.nspname, c.relname INTO STRICT _schema_name, _table_name
FROM pg_class c
INNER JOIN pg_namespace n ON (n.oid = c.relnamespace)
WHERE c.oid = relation;
SELECT * INTO STRICT _hypertable_row FROM _timescaledb_catalog.hypertable h WHERE h.table_name = _table_name AND h.schema_name = _schema_name;
--STEP 1 if column stats exist use unique indexes.
--Pick the column that comes first in any such indexes
--Select the column such that tuples are segmented evenly across distinct values.
--Note: this will only pick a column that is NOT unique in a multi-column unique index.
with index_attr as (
SELECT
a.attnum, min(a.pos) as pos
FROM (
SELECT indkey, indnkeyatts
FROM pg_catalog.pg_index
WHERE indisunique AND indrelid = relation
) i
INNER JOIN LATERAL (
SELECT * FROM unnest(i.indkey) WITH ORDINALITY
) a(attnum, pos) ON TRUE
WHERE a.pos <= i.indnkeyatts
GROUP BY a.attnum
),
stats_with_stddev as (
SELECT
a.attname,
i.pos,
ROUND(stddev_pop(freqs)::numeric, 5) as freq_stddev
FROM index_attr i
INNER JOIN pg_attribute a ON a.attnum = i.attnum AND a.attrelid = relation
INNER JOIN pg_type t ON t.oid = a.atttypid
INNER JOIN pg_stats s ON s.attname = a.attname
AND s.schemaname = _schema_name
AND s.tablename = _table_name
AND s.inherited = true
LEFT JOIN LATERAL unnest(s.most_common_freqs) as freqs ON TRUE
WHERE a.attname NOT IN (
SELECT column_name
FROM _timescaledb_catalog.dimension d
WHERE d.hypertable_id = _hypertable_row.id
)
AND s.n_distinct > 1
-- exclude date/time type category
AND t.typcategory NOT IN ('D')
GROUP BY a.attname, i.pos
)
SELECT attname
INTO _segmentby
FROM stats_with_stddev
ORDER BY pos ASC, freq_stddev ASC NULLS LAST
LIMIT 1;
IF FOUND THEN
return json_build_object('columns', json_build_array(_segmentby), 'confidence', 10);
END IF;
--STEP 2 if column stats exist and no unique indexes use non-unique indexes.
--Pick the column that comes first in any such indexes
--Select the column such that tuples are segmented evenly across distinct values.
with index_attr as (
SELECT
a.attnum, min(a.pos) as pos
FROM
(select indkey, indnkeyatts from pg_catalog.pg_index where NOT indisunique and indrelid = relation) i
INNER JOIN LATERAL
(select * from unnest(i.indkey) with ordinality) a(attnum, pos) ON (TRUE)
WHERE a.pos <= i.indnkeyatts
GROUP BY 1
),
stats_with_stddev as (
SELECT
a.attname,
i.pos,
ROUND(stddev_pop(freqs)::numeric, 5) as freq_stddev
FROM index_attr i
INNER JOIN pg_attribute a ON a.attnum = i.attnum AND a.attrelid = relation
INNER JOIN pg_type t ON t.oid = a.atttypid
INNER JOIN pg_stats s ON s.attname = a.attname
AND s.schemaname = _schema_name
AND s.tablename = _table_name
AND s.inherited = true
LEFT JOIN LATERAL unnest(s.most_common_freqs) as freqs ON TRUE
WHERE a.attname NOT IN (
SELECT column_name
FROM _timescaledb_catalog.dimension d
WHERE d.hypertable_id = _hypertable_row.id
)
AND s.n_distinct > 1
AND t.typcategory NOT IN ('D')
GROUP BY a.attname, i.pos
)
SELECT attname
INTO _segmentby
FROM stats_with_stddev
ORDER BY pos ASC, freq_stddev ASC NULLS LAST
LIMIT 1;
IF FOUND THEN
return json_build_object('columns', json_build_array(_segmentby), 'confidence', 8);
END IF;
--STEP 3 if column stats exist but there are no indexes
--Select the column such that tuples are segmented evenly across distinct values.
with stats_with_stddev as (
SELECT
a.attname,
ROUND(stddev_pop(freqs)::numeric, 5) as freq_stddev
FROM pg_attribute a
INNER JOIN pg_type t ON t.oid = a.atttypid
INNER JOIN pg_stats s ON s.attname = a.attname
AND s.schemaname = _schema_name
AND s.tablename = _table_name
AND s.inherited = true
LEFT JOIN LATERAL unnest(s.most_common_freqs) as freqs ON TRUE
WHERE a.attrelid = relation
AND a.attname NOT IN (
SELECT column_name
FROM _timescaledb_catalog.dimension d
WHERE d.hypertable_id = _hypertable_row.id
)
AND s.n_distinct > 1
AND t.typcategory NOT IN ('D')
GROUP BY a.attname
)
SELECT attname
INTO _segmentby
FROM stats_with_stddev
ORDER BY freq_stddev ASC NULLS LAST
LIMIT 1;
IF FOUND THEN
return json_build_object('columns', json_build_array(_segmentby), 'confidence', 7);
END IF;
--STEP 4 if column stats do not exist use non-unique indexes. Pick the column that comes first in any such indexes. Ties are broken arbitrarily.
with index_attr as (
SELECT
a.attnum, min(a.pos) as pos
FROM
(select indkey, indnkeyatts from pg_catalog.pg_index where NOT indisunique and indrelid = relation) i
INNER JOIN LATERAL
(select * from unnest(i.indkey) with ordinality) a(attnum, pos) ON (TRUE)
WHERE a.pos <= i.indnkeyatts
GROUP BY 1
)
SELECT
a.attname INTO _segmentby
FROM
index_attr i
INNER JOIN
pg_attribute a on (a.attnum = i.attnum AND a.attrelid = relation)
INNER JOIN
pg_type t ON t.oid = a.atttypid
LEFT JOIN
pg_catalog.pg_attrdef ad ON (ad.adrelid = relation AND ad.adnum = a.attnum)
LEFT JOIN pg_stats s ON s.attname = a.attname
AND s.schemaname = _schema_name
AND s.tablename = _table_name
AND s.inherited = true
WHERE
a.attname NOT IN (SELECT column_name FROM _timescaledb_catalog.dimension d WHERE d.hypertable_id = _hypertable_row.id)
AND s.n_distinct is null
AND a.attidentity = '' AND (ad.adbin IS NULL OR pg_get_expr(adbin, adrelid) not like 'nextval%')
AND t.typcategory NOT IN ('D')
ORDER BY i.pos
LIMIT 1;
IF FOUND THEN
return json_build_object(
'columns', json_build_array(_segmentby),
'confidence', 5,
'message', 'Please make sure '|| _segmentby||' is not a unique column and appropriate for a segment by');
END IF;
--STEP 5 if column stats do not exist and no non-unique indexes, use unique indexes. Pick the column that comes first in any such indexes. Ties are broken arbitrarily.
with index_attr as (
SELECT
a.attnum, min(a.pos) as pos
FROM
(select indkey, indnkeyatts from pg_catalog.pg_index where indisunique and indrelid = relation) i
INNER JOIN LATERAL
(select * from unnest(i.indkey) with ordinality) a(attnum, pos) ON (TRUE)
WHERE a.pos <= i.indnkeyatts
GROUP BY 1
)
SELECT
a.attname INTO _segmentby
FROM
index_attr i
INNER JOIN
pg_attribute a on (a.attnum = i.attnum AND a.attrelid = relation)
INNER JOIN
pg_type t ON t.oid = a.atttypid
LEFT JOIN
pg_catalog.pg_attrdef ad ON (ad.adrelid = relation AND ad.adnum = a.attnum)
LEFT JOIN pg_stats s ON s.attname = a.attname
AND s.schemaname = _schema_name
AND s.tablename = _table_name
AND s.inherited = true
WHERE
a.attname NOT IN (SELECT column_name FROM _timescaledb_catalog.dimension d WHERE d.hypertable_id = _hypertable_row.id)
AND s.n_distinct is null
AND a.attidentity = '' AND (ad.adbin IS NULL OR pg_get_expr(adbin, adrelid) not like 'nextval%')
AND t.typcategory NOT IN ('D')
ORDER BY i.pos
LIMIT 1;
IF FOUND THEN
return json_build_object(
'columns', json_build_array(_segmentby),
'confidence', 5,
'message', 'Please make sure '|| _segmentby||' is not a unique column and appropriate for a segment by');
END IF;
--are there any indexed columns that are not dimemsions and are not serial/identity?
with index_attr as (
SELECT
a.attnum, min(a.pos) as pos
FROM
(select indkey, indnkeyatts from pg_catalog.pg_index where indisunique and indrelid = relation) i
INNER JOIN LATERAL
(select * from unnest(i.indkey) with ordinality) a(attnum, pos) ON (TRUE)
WHERE a.pos <= i.indnkeyatts
GROUP BY 1
)
SELECT
count(*) INTO STRICT _cnt
FROM
index_attr i
INNER JOIN
pg_attribute a on (a.attnum = i.attnum AND a.attrelid = relation)
INNER JOIN
pg_type t ON t.oid = a.atttypid
LEFT JOIN
pg_catalog.pg_attrdef ad ON (ad.adrelid = relation AND ad.adnum = a.attnum)
WHERE
a.attname NOT IN (SELECT column_name FROM _timescaledb_catalog.dimension d WHERE d.hypertable_id = _hypertable_row.id)
AND a.attidentity = '' AND (ad.adbin IS NULL OR pg_get_expr(adbin, adrelid) not like 'nextval%')
AND t.typcategory NOT IN ('D');
IF _cnt > 0 THEN
--there are many potential candidates. We do not have enough information to choose one.
return json_build_object(
'columns', json_build_array(),
'confidence', 0,
'message', 'Several columns are potential segment by candidates and we do not have enough information to choose one. Please use the segment_by option to explicitly specify the segment_by column');
ELSE
--there are no potential candidates. There is a good chance no segment by is the correct choice.
return json_build_object(
'columns', json_build_array(),
'confidence', 5,
'message', 'You do not have any indexes on columns that can be used for segment_by and thus we are not using segment_by for converting to columnstore. Please make sure you are not missing any indexes');
END IF;
END
$BODY$ SET search_path TO pg_catalog, pg_temp;
-- This function return a jsonb with the following keys:
-- - clauses: an array of column names and sort order key words that shold be used for order by.
-- - confidence: a number between 0 and 10 (most confident) indicating how sure we are.
-- - message: a message that should be shown to the user to evaluate the result.
CREATE OR REPLACE FUNCTION _timescaledb_functions.get_orderby_defaults(
relation regclass, segment_by_cols text[]
)
RETURNS JSONB LANGUAGE PLPGSQL AS
$BODY$
DECLARE
_table_name NAME;
_schema_name NAME;
_hypertable_row _timescaledb_catalog.hypertable;
_orderby_names NAME[];
_dimension_names NAME[];
_first_index_attrs NAME[];
_orderby_clauses text[];
_confidence int;
BEGIN
SELECT n.nspname, c.relname INTO STRICT _schema_name, _table_name
FROM pg_class c
INNER JOIN pg_namespace n ON (n.oid = c.relnamespace)
WHERE c.oid = relation;
SELECT * INTO STRICT _hypertable_row FROM _timescaledb_catalog.hypertable h WHERE h.table_name = _table_name AND h.schema_name = _schema_name;
--start with the unique index columns minus the segment by columns
with index_attr as (
SELECT
a.attnum, min(a.pos) as pos
FROM
--is there a better way to pick the right unique index if there are multiple?
(select indkey, indnkeyatts from pg_catalog.pg_index where indisunique and indrelid = relation limit 1) i
INNER JOIN LATERAL
(select * from unnest(i.indkey) with ordinality) a(attnum, pos) ON (TRUE)
WHERE a.pos <= i.indnkeyatts
GROUP BY 1
)
SELECT
array_agg(a.attname ORDER BY i.pos) INTO _orderby_names
FROM
index_attr i
INNER JOIN
pg_attribute a on (a.attnum = i.attnum AND a.attrelid = relation)
WHERE
NOT(a.attname::text = ANY (segment_by_cols));
if _orderby_names is null then
_orderby_names := array[]::name[];
_confidence := 5;
else
_confidence := 8;
end if;
--add dimension colomns to the end. A dimension column like time should probably always be part of the order by.
SELECT
array_agg(d.column_name) INTO _dimension_names
FROM _timescaledb_catalog.dimension d
WHERE
d.hypertable_id = _hypertable_row.id
AND NOT(d.column_name::text = ANY (_orderby_names))
AND NOT(d.column_name::text = ANY (segment_by_cols));
_orderby_names := _orderby_names || _dimension_names;
--add the first attribute of any index
with index_attr as (
SELECT
a.attnum, min(a.pos) as pos
FROM
(select indkey, indnkeyatts from pg_catalog.pg_index where indrelid = relation) i
INNER JOIN LATERAL
(select * from unnest(i.indkey) with ordinality) a(attnum, pos) ON (TRUE)
WHERE a.pos = 1
GROUP BY 1
)
SELECT
array_agg(a.attname ORDER BY i.pos) INTO _first_index_attrs
FROM
index_attr i
INNER JOIN
pg_attribute a on (a.attnum = i.attnum AND a.attrelid = relation)
WHERE
NOT(a.attname::text = ANY (_orderby_names))
AND NOT(a.attname::text = ANY (segment_by_cols));
_orderby_names := _orderby_names || _first_index_attrs;
--add DESC to any dimensions
SELECT
coalesce(array_agg(
CASE WHEN d.column_name IS NULL THEN
format('%I', a.colname)
ELSE
format('%I DESC', a.colname)
END ORDER BY pos), array[]::text[]) INTO STRICT _orderby_clauses
FROM unnest(_orderby_names) WITH ORDINALITY as a(colname, pos)
LEFT JOIN _timescaledb_catalog.dimension d ON (d.column_name = a.colname AND d.hypertable_id = _hypertable_row.id);
return json_build_object('clauses', _orderby_clauses, 'confidence', _confidence);
END
$BODY$ SET search_path TO pg_catalog, pg_temp;
|