File: capi_aggregate_functions.cpp

package info (click to toggle)
duckdb 1.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (440 lines) | stat: -rw-r--r-- 16,761 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
#include "capi_tester.hpp"

using namespace duckdb;
using namespace std;

struct WeightedSumState {
	int64_t sum;
	uint64_t count;
};

idx_t WeightedSumSize(duckdb_function_info info) {
	return sizeof(WeightedSumState);
}

void WeightedSumInit(duckdb_function_info info, duckdb_aggregate_state state_p) {
	auto state = reinterpret_cast<WeightedSumState *>(state_p);
	state->sum = 0;
	state->count = 0;
}

void WeightedSumUpdate(duckdb_function_info info, duckdb_data_chunk input, duckdb_aggregate_state *states) {
	auto state = reinterpret_cast<WeightedSumState **>(states);
	auto row_count = duckdb_data_chunk_get_size(input);
	auto input_vector = duckdb_data_chunk_get_vector(input, 0);
	auto input_data = static_cast<int64_t *>(duckdb_vector_get_data(input_vector));
	auto input_validity = duckdb_vector_get_validity(input_vector);

	if (duckdb_data_chunk_get_column_count(input) == 1) {
		// single argument
		for (idx_t i = 0; i < row_count; i++) {
			if (duckdb_validity_row_is_valid(input_validity, i)) {
				state[i]->sum += input_data[i];
				state[i]->count++;
			}
		}
	} else {
		// two arguments
		auto weight_vector = duckdb_data_chunk_get_vector(input, 1);
		auto weight_data = static_cast<int64_t *>(duckdb_vector_get_data(weight_vector));
		auto weight_validity = duckdb_vector_get_validity(weight_vector);
		for (idx_t i = 0; i < row_count; i++) {
			if (duckdb_validity_row_is_valid(input_validity, i) && duckdb_validity_row_is_valid(weight_validity, i)) {
				state[i]->sum += input_data[i] * weight_data[i];
				state[i]->count++;
			}
		}
	}
}

void WeightedSumCombine(duckdb_function_info info, duckdb_aggregate_state *source_p, duckdb_aggregate_state *target_p,
                        idx_t count) {
	auto source = reinterpret_cast<WeightedSumState **>(source_p);
	auto target = reinterpret_cast<WeightedSumState **>(target_p);

	for (idx_t i = 0; i < count; i++) {
		target[i]->sum += source[i]->sum;
		target[i]->count += source[i]->count;
	}
}

void WeightedSumFinalize(duckdb_function_info info, duckdb_aggregate_state *source_p, duckdb_vector result, idx_t count,
                         idx_t offset) {
	auto source = reinterpret_cast<WeightedSumState **>(source_p);
	auto result_data = static_cast<int64_t *>(duckdb_vector_get_data(result));
	duckdb_vector_ensure_validity_writable(result);
	auto result_validity = duckdb_vector_get_validity(result);

	for (idx_t i = 0; i < count; i++) {
		if (source[i]->count == 0) {
			duckdb_validity_set_row_invalid(result_validity, offset + i);
		} else {
			result_data[offset + i] = source[i]->sum;
		}
	}
}

static duckdb_aggregate_function CAPIGetAggregateFunction(duckdb_connection connection, const char *name,
                                                          idx_t parameter_count = 2) {
	// create an aggregate function
	auto function = duckdb_create_aggregate_function();
	duckdb_aggregate_function_set_name(nullptr, name);
	duckdb_aggregate_function_set_name(function, nullptr);
	duckdb_aggregate_function_set_name(function, name);
	duckdb_aggregate_function_set_name(function, name);

	// add a two bigint parameters
	auto type = duckdb_create_logical_type(DUCKDB_TYPE_BIGINT);
	duckdb_aggregate_function_add_parameter(nullptr, type);
	duckdb_aggregate_function_add_parameter(function, nullptr);
	for (idx_t idx = 0; idx < parameter_count; idx++) {
		duckdb_aggregate_function_add_parameter(function, type);
	}

	// set the return type to bigint
	duckdb_aggregate_function_set_return_type(nullptr, type);
	duckdb_aggregate_function_set_return_type(function, nullptr);
	duckdb_aggregate_function_set_return_type(function, type);
	duckdb_destroy_logical_type(&type);

	// set up the function
	duckdb_aggregate_function_set_functions(nullptr, nullptr, nullptr, nullptr, nullptr, nullptr);
	duckdb_aggregate_function_set_functions(function, nullptr, nullptr, nullptr, nullptr, nullptr);
	duckdb_aggregate_function_set_functions(function, WeightedSumSize, WeightedSumInit, WeightedSumUpdate,
	                                        WeightedSumCombine, WeightedSumFinalize);
	return function;
}

static void CAPIRegisterWeightedSum(duckdb_connection connection, const char *name, duckdb_state expected_outcome) {
	duckdb_state status;

	// create an aggregate function
	auto function = CAPIGetAggregateFunction(connection, name);
	// register and cleanup
	status = duckdb_register_aggregate_function(connection, function);
	REQUIRE(status == expected_outcome);

	duckdb_destroy_aggregate_function(&function);
	duckdb_destroy_aggregate_function(&function);
	duckdb_destroy_aggregate_function(nullptr);
}

struct CAPICallbacks {
	duckdb_aggregate_state_size state_size;
	duckdb_aggregate_init_t init;
	duckdb_aggregate_update_t update;
	duckdb_aggregate_combine_t combine;
	duckdb_aggregate_finalize_t finalize;
};

idx_t CallbackSize(duckdb_function_info info) {
	auto callbacks = (CAPICallbacks *)duckdb_aggregate_function_get_extra_info(info);
	return callbacks->state_size(info);
}

void CallbackInit(duckdb_function_info info, duckdb_aggregate_state state_p) {
	auto callbacks = (CAPICallbacks *)duckdb_aggregate_function_get_extra_info(info);
	callbacks->init(info, state_p);
}

void CallbackUpdate(duckdb_function_info info, duckdb_data_chunk input, duckdb_aggregate_state *states) {
	auto callbacks = (CAPICallbacks *)duckdb_aggregate_function_get_extra_info(info);
	callbacks->update(info, input, states);
}

void CallbackCombine(duckdb_function_info info, duckdb_aggregate_state *source_p, duckdb_aggregate_state *target_p,
                     idx_t count) {
	auto callbacks = (CAPICallbacks *)duckdb_aggregate_function_get_extra_info(info);
	callbacks->combine(info, source_p, target_p, count);
}

void CallbackFinalize(duckdb_function_info info, duckdb_aggregate_state *source_p, duckdb_vector result, idx_t count,
                      idx_t offset) {
	auto callbacks = (CAPICallbacks *)duckdb_aggregate_function_get_extra_info(info);
	callbacks->finalize(info, source_p, result, count, offset);
}

static void CAPIRegisterWeightedSumExtraInfo(duckdb_connection connection, const char *name,
                                             duckdb_state expected_outcome) {
	duckdb_state status;

	// create an aggregate function
	auto function = duckdb_create_aggregate_function();
	duckdb_aggregate_function_set_name(function, name);

	// add a two bigint parameters
	auto type = duckdb_create_logical_type(DUCKDB_TYPE_BIGINT);
	duckdb_aggregate_function_add_parameter(function, type);
	duckdb_aggregate_function_add_parameter(function, type);

	// set the return type to bigint
	duckdb_aggregate_function_set_return_type(function, type);
	duckdb_destroy_logical_type(&type);

	auto callback_ptr = malloc(sizeof(CAPICallbacks));
	auto callback_struct = (CAPICallbacks *)callback_ptr;
	callback_struct->state_size = WeightedSumSize;
	callback_struct->init = WeightedSumInit;
	callback_struct->update = WeightedSumUpdate;
	callback_struct->combine = WeightedSumCombine;
	callback_struct->finalize = WeightedSumFinalize;

	duckdb_aggregate_function_set_extra_info(function, callback_ptr, free);

	// set up the function
	duckdb_aggregate_function_set_functions(function, CallbackSize, CallbackInit, CallbackUpdate, CallbackCombine,
	                                        CallbackFinalize);

	// register and cleanup
	status = duckdb_register_aggregate_function(connection, function);
	REQUIRE(status == expected_outcome);

	duckdb_destroy_aggregate_function(&function);
}

TEST_CASE("Test Aggregate Functions C API", "[capi]") {
	typedef void (*register_function_t)(duckdb_connection, const char *, duckdb_state);

	duckdb::vector<register_function_t> register_functions {CAPIRegisterWeightedSum, CAPIRegisterWeightedSumExtraInfo};
	for (auto &register_function : register_functions) {
		CAPITester tester;
		duckdb::unique_ptr<CAPIResult> result;

		REQUIRE(tester.OpenDatabase(nullptr));
		register_function(tester.connection, "my_weighted_sum", DuckDBSuccess);
		// try to register it again - this should be an error
		register_function(tester.connection, "my_weighted_sum", DuckDBError);

		// now call it
		result = tester.Query("SELECT my_weighted_sum(40, 2)");
		REQUIRE_NO_FAIL(*result);
		REQUIRE(result->Fetch<int64_t>(0, 0) == 80);

		result = tester.Query("SELECT my_weighted_sum(40, NULL)");
		REQUIRE_NO_FAIL(*result);
		REQUIRE(result->IsNull(0, 0));

		result = tester.Query("SELECT my_weighted_sum(NULL, 2)");
		REQUIRE_NO_FAIL(*result);
		REQUIRE(result->IsNull(0, 0));

		result = tester.Query("SELECT my_weighted_sum(i, 2) FROM range(100) t(i)");
		REQUIRE_NO_FAIL(*result);
		REQUIRE(result->Fetch<int64_t>(0, 0) == 9900);

		result = tester.Query("SELECT i % 2 AS gr, my_weighted_sum(i, 2) FROM range(100) t(i) GROUP BY gr ORDER BY gr");
		REQUIRE_NO_FAIL(*result);
		REQUIRE(result->Fetch<int64_t>(0, 0) == 0);
		REQUIRE(result->Fetch<int64_t>(1, 0) == 4900);
		REQUIRE(result->Fetch<int64_t>(0, 1) == 1);
		REQUIRE(result->Fetch<int64_t>(1, 1) == 5000);
	}
}

struct RepeatedStringAggState {
	char *data;
	idx_t size;
};

idx_t RepeatedStringAggSize(duckdb_function_info info) {
	return sizeof(RepeatedStringAggState);
}

void RepeatedStringAggInit(duckdb_function_info info, duckdb_aggregate_state state_p) {
	auto state = reinterpret_cast<RepeatedStringAggState *>(state_p);
	state->data = nullptr;
	state->size = 0;
}

void RepeatedStringAggUpdate(duckdb_function_info info, duckdb_data_chunk input, duckdb_aggregate_state *states) {
	auto state = reinterpret_cast<RepeatedStringAggState **>(states);
	auto row_count = duckdb_data_chunk_get_size(input);
	auto input_vector = duckdb_data_chunk_get_vector(input, 0);
	auto input_data = static_cast<duckdb_string_t *>(duckdb_vector_get_data(input_vector));
	auto input_validity = duckdb_vector_get_validity(input_vector);

	auto weight_vector = duckdb_data_chunk_get_vector(input, 1);
	auto weight_data = static_cast<int64_t *>(duckdb_vector_get_data(weight_vector));
	auto weight_validity = duckdb_vector_get_validity(weight_vector);
	for (idx_t i = 0; i < row_count; i++) {
		if (!duckdb_validity_row_is_valid(input_validity, i) || !duckdb_validity_row_is_valid(weight_validity, i)) {
			continue;
		}
		auto length = duckdb_string_t_length(input_data[i]);
		auto data = duckdb_string_t_data(input_data + i);
		auto weight = weight_data[i];
		if (weight < 0) {
			duckdb_aggregate_function_set_error(info, "Weight must be >= 0");
			return;
		}
		auto new_data = (char *)malloc(state[i]->size + length * weight + 1);
		if (state[i]->size > 0) {
			memcpy((void *)(new_data), state[i]->data, state[i]->size);
		}
		if (state[i]->data) {
			free((void *)(state[i]->data));
		}
		idx_t offset = state[i]->size;
		for (idx_t rep_idx = 0; rep_idx < static_cast<idx_t>(weight); rep_idx++) {
			memcpy((void *)(new_data + offset), data, length);
			offset += length;
		}
		state[i]->data = new_data;
		state[i]->size = offset;
		state[i]->data[state[i]->size] = '\0';
	}
}

void RepeatedStringAggCombine(duckdb_function_info info, duckdb_aggregate_state *source_p,
                              duckdb_aggregate_state *target_p, idx_t count) {
	auto source = reinterpret_cast<RepeatedStringAggState **>(source_p);
	auto target = reinterpret_cast<RepeatedStringAggState **>(target_p);

	for (idx_t i = 0; i < count; i++) {
		if (source[i]->size == 0) {
			continue;
		}
		auto new_data = (char *)malloc(target[i]->size + source[i]->size + 1);
		if (target[i]->size > 0) {
			memcpy((void *)new_data, target[i]->data, target[i]->size);
		}
		if (target[i]->data) {
			free((void *)target[i]->data);
		}
		memcpy((void *)(new_data + target[i]->size), source[i]->data, source[i]->size);
		target[i]->data = new_data;
		target[i]->size += source[i]->size;
		target[i]->data[target[i]->size] = '\0';
	}
}

void RepeatedStringAggFinalize(duckdb_function_info info, duckdb_aggregate_state *source_p, duckdb_vector result,
                               idx_t count, idx_t offset) {
	auto source = reinterpret_cast<RepeatedStringAggState **>(source_p);
	duckdb_vector_ensure_validity_writable(result);
	auto result_validity = duckdb_vector_get_validity(result);

	for (idx_t i = 0; i < count; i++) {
		if (!source[i]->data) {
			duckdb_validity_set_row_invalid(result_validity, offset + i);
		} else {
			duckdb_vector_assign_string_element_len(result, offset + i, reinterpret_cast<const char *>(source[i]->data),
			                                        source[i]->size);
		}
	}
}

void RepeatedStringAggDestructor(duckdb_aggregate_state *states, idx_t count) {
	auto source = reinterpret_cast<RepeatedStringAggState **>(states);
	for (idx_t i = 0; i < count; i++) {
		if (source[i]->data) {
			free(source[i]->data);
		}
	}
}

static void CAPIRegisterRepeatedStringAgg(duckdb_connection connection) {
	duckdb_state status;

	// create an aggregate function
	auto function = duckdb_create_aggregate_function();
	duckdb_aggregate_function_set_name(function, "repeated_string_agg");

	// add a varchar/bigint parameter
	auto varchar_type = duckdb_create_logical_type(DUCKDB_TYPE_VARCHAR);
	auto bigint_type = duckdb_create_logical_type(DUCKDB_TYPE_BIGINT);
	duckdb_aggregate_function_add_parameter(function, varchar_type);
	duckdb_aggregate_function_add_parameter(function, bigint_type);

	// set the return type to varchar
	duckdb_aggregate_function_set_return_type(function, varchar_type);
	duckdb_destroy_logical_type(&varchar_type);
	duckdb_destroy_logical_type(&bigint_type);

	// set up the function
	duckdb_aggregate_function_set_functions(function, RepeatedStringAggSize, RepeatedStringAggInit,
	                                        RepeatedStringAggUpdate, RepeatedStringAggCombine,
	                                        RepeatedStringAggFinalize);
	duckdb_aggregate_function_set_destructor(function, RepeatedStringAggDestructor);

	// register and cleanup
	status = duckdb_register_aggregate_function(connection, function);
	REQUIRE(status == DuckDBSuccess);

	duckdb_destroy_aggregate_function(&function);
}

TEST_CASE("Test String Aggregate Function", "[capi]") {
	CAPITester tester;
	duckdb::unique_ptr<CAPIResult> result;

	REQUIRE(tester.OpenDatabase(nullptr));
	CAPIRegisterRepeatedStringAgg(tester.connection);

	// now call it
	result = tester.Query("SELECT repeated_string_agg('x', 2)");
	REQUIRE_NO_FAIL(*result);
	REQUIRE(result->Fetch<string>(0, 0) == "xx");

	result = tester.Query("SELECT repeated_string_agg('', 2)");
	REQUIRE_NO_FAIL(*result);
	REQUIRE(result->Fetch<string>(0, 0) == "");

	result = tester.Query("SELECT repeated_string_agg('abcdefgh', 3)");
	REQUIRE_NO_FAIL(*result);
	REQUIRE(result->Fetch<string>(0, 0) == "abcdefghabcdefghabcdefgh");

	result = tester.Query("SELECT repeated_string_agg(NULL, 2)");
	REQUIRE_NO_FAIL(*result);
	REQUIRE(result->IsNull(0, 0));

	REQUIRE_FAIL(tester.Query("SELECT repeated_string_agg('x', -1)"));

	result = tester.Query(
	    "SELECT repeated_string_agg(CASE WHEN i%10=0 THEN i::VARCHAR ELSE '' END, 2) FROM range(100) t(i)");
	REQUIRE_NO_FAIL(*result);
	REQUIRE(result->Fetch<string>(0, 0) == "00101020203030404050506060707080809090");
}

static void CAPIRegisterWeightedSumOverloads(duckdb_connection connection, const char *name,
                                             duckdb_state expected_outcome) {
	duckdb_state status;

	auto function_set = duckdb_create_aggregate_function_set(name);
	// create an aggregate function with 2 parameters
	auto function = CAPIGetAggregateFunction(connection, name, 1);
	duckdb_add_aggregate_function_to_set(function_set, function);
	duckdb_destroy_aggregate_function(&function);

	// create an aggregate function with 3 parameters
	function = CAPIGetAggregateFunction(connection, name, 2);
	duckdb_add_aggregate_function_to_set(function_set, function);
	duckdb_destroy_aggregate_function(&function);

	// register and cleanup
	status = duckdb_register_aggregate_function_set(connection, function_set);
	REQUIRE(status == expected_outcome);

	duckdb_destroy_aggregate_function_set(&function_set);
	duckdb_destroy_aggregate_function_set(&function_set);
	duckdb_destroy_aggregate_function_set(nullptr);
}

TEST_CASE("Test Aggregate Function Overloads C API", "[capi]") {
	CAPITester tester;
	duckdb::unique_ptr<CAPIResult> result;

	REQUIRE(tester.OpenDatabase(nullptr));
	CAPIRegisterWeightedSumOverloads(tester.connection, "my_weighted_sum", DuckDBSuccess);
	// try to register it again - this should be an error
	CAPIRegisterWeightedSumOverloads(tester.connection, "my_weighted_sum", DuckDBError);

	// now call it
	result = tester.Query("SELECT my_weighted_sum(40)");
	REQUIRE_NO_FAIL(*result);
	REQUIRE(result->Fetch<int64_t>(0, 0) == 40);

	result = tester.Query("SELECT my_weighted_sum(40, 2)");
	REQUIRE_NO_FAIL(*result);
	REQUIRE(result->Fetch<int64_t>(0, 0) == 80);
}