1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
|
drop table if exists t;
create table t (v double precision);
drop table if exists datasets;
create table datasets (ds_name text, ds_sql text);
insert into datasets values ('uniform', 'with d as (select pow(random(), 1) as v from generate_series(1,%s))
insert into t select v from (select v, generate_series(1, %s + (%s * random())::int) from d) foo');
insert into datasets values ('pow(2)', 'with d as (select pow(random(), 2) as v from generate_series(1,%s))
insert into t select v from (select v, generate_series(1, %s + (%s * random())::int) from d) foo');
insert into datasets values ('pow(4)', 'with d as (select pow(random(), 4) as v from generate_series(1,%s))
insert into t select v from (select v, generate_series(1, %s + (%s * random())::int) from d) foo');
insert into datasets values ('pow(0.5)', 'with d as (select pow(random(), 0.5) as v from generate_series(1,%s))
insert into t select v from (select v, generate_series(1, %s + (%s * random())::int) from d) foo');
insert into datasets values ('pow(0.25)', 'with d as (select pow(random(), 0.25) as v from generate_series(1,%s))
insert into t select v from (select v, generate_series(1, %s + (%s * random())::int) from d) foo');
insert into datasets values ('1 - pow(2)', 'with d as (select 1.0 - pow(random(), 2) as v from generate_series(1,%s))
insert into t select v from (select v, generate_series(1, %s + (%s * random())::int) from d) foo');
insert into datasets values ('1 - pow(4)', 'with d as (select 1.0 - pow(random(), 4) as v from generate_series(1,%s))
insert into t select v from (select v, generate_series(1, %s + (%s * random())::int) from d) foo');
insert into datasets values ('1 - pow(0.5)', 'with d as (select 1.0 - pow(random(), 0.5) as v from generate_series(1,%s))
insert into t select v from (select v, generate_series(1, %s + (%s * random())::int) from d) foo');
insert into datasets values ('1 - pow(0.25)', 'with d as (select 1.0 - pow(random(), 0.25) as v from generate_series(1,%s))
insert into t select v from (select v, generate_series(1, %s + (%s * random())::int) from d) foo');
create or replace function test_queries(npercentiles int, p double precision, nvalues int, minvalues int, maxvalues int, out dataset text,
out simple_random double precision, out simple_asc double precision, out simple_desc double precision,
out preagg_random double precision, out preagg_asc double precision, out preagg_desc double precision,
out simple_asc_cmp double precision, out simple_desc_cmp double precision,
out preagg_random_cmp double precision, out preagg_asc_cmp double precision,
out preagg_desc_cmp double precision)
returns setof record language plpgsql as $$
declare
d record;
perc_cont_percs double precision[];
simple_random_percs double precision[];
simple_asc_percs double precision[];
simple_desc_percs double precision[];
preagg_random_percs double precision[];
preagg_asc_percs double precision[];
preagg_desc_percs double precision[];
percs double precision[];
run int;
tmp_simple_random double precision;
tmp_simple_asc double precision;
tmp_simple_desc double precision;
tmp_preagg_random double precision;
tmp_preagg_asc double precision;
tmp_preagg_desc double precision;
begin
raise notice 'percentiles % range % values % min % max %', npercentiles, p, nvalues, minvalues, maxvalues;
-- generate percentiles
select array_agg(x) into percs from (
select i::double precision / npercentiles as x from generate_series(1,npercentiles) s(i)
) foo where x <= p or x > 1.0 - p;
for d in (select * from datasets order by ds_name) loop
simple_random := 0;
simple_asc := 0;
simple_desc := 0;
preagg_random := 0;
preagg_asc := 0;
preagg_desc := 0;
for run in 1..10 loop
-- rebuild the table
execute 'truncate t';
execute format(d.ds_sql, nvalues, minvalues, (maxvalues - minvalues));
execute 'analyze t';
dataset := d.ds_name;
select percentile_cont(percs) within group (order by v) into perc_cont_percs from (select * from t) d;
select tdigest_percentile(v, 100, percs) into simple_random_percs from (select * from t order by random()) d;
select tdigest_percentile(v, 100, percs) into simple_asc_percs from (select * from t order by v) d;
select tdigest_percentile(v, 100, percs) into simple_desc_percs from (select * from t order by v desc) d;
select tdigest_percentile(v, c, 100, percs) into preagg_random_percs from (select v, count(*) as c from t group by v order by random()) d;
select tdigest_percentile(v, c, 100, percs) into preagg_asc_percs from (select v, count(*) as c from t group by v order by v) d;
select tdigest_percentile(v, c, 100, percs) into preagg_desc_percs from (select v, count(*) as c from t group by v order by v desc) d;
select sqrt(sum(pow(a-b,2))) into tmp_simple_random from (select unnest(perc_cont_percs) as a, unnest(simple_random_percs) as b) d;
select sqrt(sum(pow(a-b,2))) into tmp_simple_asc from (select unnest(perc_cont_percs) as a, unnest(simple_asc_percs) as b) d;
select sqrt(sum(pow(a-b,2))) into tmp_simple_desc from (select unnest(perc_cont_percs) as a, unnest(simple_desc_percs) as b) d;
select sqrt(sum(pow(a-b,2))) into tmp_preagg_random from (select unnest(perc_cont_percs) as a, unnest(preagg_random_percs) as b) d;
select sqrt(sum(pow(a-b,2))) into tmp_preagg_asc from (select unnest(perc_cont_percs) as a, unnest(preagg_asc_percs) as b) d;
select sqrt(sum(pow(a-b,2))) into tmp_preagg_desc from (select unnest(perc_cont_percs) as a, unnest(preagg_desc_percs) as b) d;
simple_random := simple_random + tmp_simple_random / 10;
simple_asc := simple_asc + tmp_simple_asc / 10;
simple_desc := simple_desc + tmp_simple_desc / 10;
preagg_random := preagg_random + tmp_preagg_random / 10;
preagg_asc := preagg_asc + tmp_preagg_asc / 10;
preagg_desc := preagg_desc + tmp_preagg_desc / 10;
end loop;
simple_asc_cmp := round((simple_asc / simple_random)::numeric, 2);
simple_desc_cmp := round((simple_desc / simple_random)::numeric, 2);
preagg_random_cmp := round((preagg_random / simple_random)::numeric, 2);
preagg_asc_cmp := round((preagg_asc / simple_random)::numeric, 2);
preagg_desc_cmp := round((preagg_desc / simple_random)::numeric, 2);
simple_random := round(simple_random::numeric, 6);
simple_asc := round(simple_asc::numeric, 6);
simple_desc := round(simple_desc::numeric, 6);
preagg_random := round(preagg_random::numeric, 6);
preagg_asc := round(preagg_asc::numeric, 6);
preagg_desc := round(preagg_desc::numeric, 6);
return next;
end loop;
return;
end;
$$;
select * from test_queries(1000, 0.01, 10000, 1, 1);
select * from test_queries(1000, 0.05, 10000, 1, 1);
select * from test_queries(1000, 0.1, 10000, 1, 1);
select * from test_queries(1000, 0.2, 10000, 1, 1);
select * from test_queries(1000, 0.3, 10000, 1, 1);
select * from test_queries(1000, 0.4, 10000, 1, 1);
select * from test_queries(1000, 0.5, 10000, 1, 1);
select * from test_queries(1000, 0.01, 1000, 10, 20);
select * from test_queries(1000, 0.05, 1000, 10, 20);
select * from test_queries(1000, 0.1, 1000, 10, 20);
select * from test_queries(1000, 0.2, 1000, 10, 20);
select * from test_queries(1000, 0.3, 1000, 10, 20);
select * from test_queries(1000, 0.4, 1000, 10, 20);
select * from test_queries(1000, 0.5, 1000, 10, 20);
select * from test_queries(1000, 0.01, 10000, 10, 20);
select * from test_queries(1000, 0.05, 10000, 10, 20);
select * from test_queries(1000, 0.1, 10000, 10, 20);
select * from test_queries(1000, 0.2, 10000, 10, 20);
select * from test_queries(1000, 0.3, 10000, 10, 20);
select * from test_queries(1000, 0.4, 10000, 10, 20);
select * from test_queries(1000, 0.5, 10000, 10, 20);
|