File: hash_func.test

package info (click to toggle)
duckdb 1.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (253 lines) | stat: -rw-r--r-- 5,440 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# name: test/sql/function/generic/hash_func.test
# description: Test HASH function
# group: [generic]

# Unsupported types

foreach datatype ANY HASH POINTER

statement error
SELECT HASH(NULL::${datatype});
----

endloop

# NULLS of all scalar types should produce the same value on all platforms

foreach datatype <alltypes> CHAR BLOB DATE TIME TIMETZ TIMESTAMP TIMESTAMPTZ DECIMAL(16,4) UUID

query I
SELECT HASH(NULL::${datatype});
----
13787848793156543929

endloop

#
# Nested types
#

# Structs
statement ok
CREATE TABLE structs AS
	SELECT * FROM (VALUES
		({'i': 5, 's': 'string'}),
		({'i': -2, 's': NULL}),
		({'i': NULL, 's': 'not null'}),
		({'i': NULL, 's': NULL}),
		(NULL)
) tbl(s);

query II
SELECT s, HASH(s) FROM structs
----
{'i': 5, 's': string}	312378390946197788
{'i': -2, 's': NULL}	13311620765177879553
{'i': NULL, 's': not null}	12187543307399756733
{'i': NULL, 's': NULL}	18212156630472451589
NULL	18212156630472451589

# Lists
statement ok
CREATE TABLE lists AS
	SELECT * FROM (VALUES
		([1], ['TGTA']),
		([1, 2], ['CGGT']),
		([], ['CCTC']),
		([1, 2, 3], ['TCTA']),
		([1, 2, 3, 4, 5], ['AGGG']),
		(NULL, NULL)
) tbl(li, lg);

query II
SELECT li, HASH(li) FROM lists
----
[1]	4717996019076358352
[1, 2]	6530802887144669425
[]	13787848793156543929
[1, 2, 3]	12722334483198565868
[1, 2, 3, 4, 5]	6649915151332802727
NULL	13787848793156543929

# These should all be different
query II
SELECT lg, HASH(lg) FROM lists
----
[TGTA]	2473061308111828075
[CGGT]	17252230290449032892
[CCTC]	12469451733100292545
[TCTA]	16441147910138644840
[AGGG]	6734708784738468094
NULL	13787848793156543929

# Maps
statement ok
CREATE TABLE maps AS
	SELECT * FROM (VALUES
		(MAP([1], ['TGTA'])),
		(MAP([1, 2], ['CGGT', 'CCTC'])),
		(MAP([], [])),
		(MAP([1, 2, 3], ['TCTA', NULL, 'CGGT'])),
		(MAP([1, 2, 3, 4, 5], ['TGTA', 'CGGT', 'CCTC', 'TCTA', 'AGGG'])),
		(NULL)
) tbl(m);

query II
SELECT m, HASH(m) FROM maps
----
{1=TGTA}	7235425910004250312
{1=CGGT, 2=CCTC}	1011047862598495049
{}	13787848793156543929
{1=TCTA, 2=NULL, 3=CGGT}	6001596667924474868
{1=TGTA, 2=CGGT, 3=CCTC, 4=TCTA, 5=AGGG}	16287978232011168685
NULL	13787848793156543929

statement ok
CREATE TABLE map_as_list AS
	SELECT * FROM (VALUES
		([{'key':1, 'value':'TGTA'}]),
		([{'key':1, 'value':'CGGT'}, {'key':2, 'value':'CCTC'}]),
		([]),
		([{'key':1, 'value':'TCTA'}, {'key':2, 'value':NULL}, {'key':3, 'value':'CGGT'}]),
		([{'key':1, 'value':'TGTA'}, {'key':2, 'value':'CGGT'}, {'key':3, 'value':'CCTC'}, {'key':4, 'value':'TCTA'}, {'key':5, 'value':'AGGG'}]),
		(NULL)
) tbl(m);

# Because the map has physical type LIST, it creates an identical hash when the same values are stored as list of key/val structs
query I nosort map_hashes
SELECT HASH(m) FROM maps

query I nosort map_hashes
SELECT HASH(m) FROM map_as_list


# Enums
statement ok
CREATE TYPE resistor AS ENUM (
	'black',
	'brown',
	'red',
	'orange',
	'yellow',
	'green',
	'blue',
	'violet',
	'grey',
	'white'
);

statement ok
CREATE TABLE enums (r resistor);

statement ok
INSERT INTO enums VALUES
	('black'),
	('brown'),
	('red'),
	('orange'),
	('yellow'),
	('green'),
	('blue'),
	('violet'),
	('grey'),
	('white'),
	(NULL)
;

query II
SELECT r, HASH(r) FROM enums;
----
black	0
brown	4717996019076358352
red	2060787363917578834
orange	8131803788478518982
yellow	8535942711051191036
green	4244145009296420692
blue	8888402906861678137
violet	8736873150706563146
grey	14111048738911615569
white	17319221087726947361
NULL	13787848793156543929

#
# Variadic arguments
#

# Zero arguments are not allowed
statement error
SELECT HASH();
----

statement error
SELECT r, HASH() FROM enums;
----

# Multiple arguments of any kind are accepted
query II
SELECT r, HASH(r, 'capacitor') FROM enums;
----
black	16797622758688705282
brown	12620868779234625953
red	17584344400128560708
orange	268160620305560594
yellow	895888387990267895
green	16089427619650030004
blue	10156864916169405730
violet	3549084991787980581
grey	17281098274178594641
white	1655957553588749778
NULL	12320705626460735678

query II
SELECT r, HASH('2022-02-12'::DATE, r) FROM enums;
----
black	4250466044961212059
brown	8900520483163022923
red	2766849995292148937
orange	5342755900462846045
yellow	5515065604690625639
green	7471453529827791
blue	4730260654388144290
violet	4882794310426623697
grey	17953657405078846666
white	14602512259699608250
NULL	9630093706189153058

query II
SELECT r, HASH(r, r) FROM enums;
----
black	0
brown	523193599206204019
red	111573794787247892
orange	11131893570948557270
yellow	10594212293773127177
green	914862583577390562
blue	2211471294594404377
violet	11628961430775669869
grey	14203064203985765890
white	1133846801649713905
NULL	18212156630472451589

#
# Bugs
#

# Issue #2498: Identical nested lists should have the same hash
statement ok
CREATE TABLE issue2498 AS SELECT * FROM (VALUES
	(24, {'x': [{'l4': [52, 53]}, {'l4': [54, 55]}]}),
	(34, {'x': [{'l4': [52, 53]}, {'l4': [54, 55]}]})
) tbl(v, k);

query II
SELECT k, HASH(k) FROM issue2498
----
{'x': [{'l4': [52, 53]}, {'l4': [54, 55]}]}	14225696893928945203
{'x': [{'l4': [52, 53]}, {'l4': [54, 55]}]}	14225696893928945203

# CombineHashScalar used to have an issue that made combining a hash with itself yield hashes that all have
# a similar number of trailing zero's, so approx_count_distinct was off by a lot (and we had more collisions in HTs)
query I
select approx_count_distinct((range, range)) > 800_000 from range(1_000_000)
----
1