File: read_text_and_blob.test

package info (click to toggle)
duckdb 1.5.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 299,196 kB
  • sloc: cpp: 865,414; ansic: 57,292; python: 18,871; sql: 12,663; lisp: 11,751; yacc: 7,412; lex: 1,682; sh: 747; makefile: 558
file content (82 lines) | stat: -rw-r--r-- 3,629 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# name: test/sql/table_function/read_text_and_blob.test
# description: Test read_files function
# group: [table_function]

query I
SELECT COUNT(*) FROM read_text('test/sql/table_function/files/*.txt');
----
3

query I
SELECT COUNT(*) FROM read_blob('test/sql/table_function/files/*');
----
4

query IIII
SELECT * FROM read_text('test/sql/table_function/files/nonexistentfile.txt') ORDER BY ALL;
----

query I
SELECT parse_path(filename) FROM read_text('test/sql/table_function/files/nonexistentfile.txt') ORDER BY ALL;
----

query I
SELECT parse_path(filename) FROM read_text(['test/sql/table_function/files/one.txt', 'test/sql/table_function/files/two.txt']) ORDER BY ALL;
----
[test, sql, table_function, files, one.txt]
[test, sql, table_function, files, two.txt]

query III
SELECT parse_path(filename), size, content FROM read_blob('test/sql/table_function/files/four.blob');
----
[test, sql, table_function, files, four.blob]	178	PK\x03\x04\x0A\x00\x00\x00\x00\x00\xACi=X\x14t\xCE\xC7\x0A\x00\x00\x00\x0A\x00\x00\x00\x09\x00\x1C\x00four.blobUT\x09\x00\x03c\x96\xB7ee\x96\xB7eux\x0B\x00\x01\x04\xF5\x01\x00\x00\x04\x14\x00\x00\x00F\xC3\xB6\xC3\xB6 B\xC3\xA4rPK\x01\x02\x1E\x03\x0A\x00\x00\x00\x00\x00\xACi=X\x14t\xCE\xC7\x0A\x00\x00\x00\x0A\x00\x00\x00\x09\x00\x18\x00\x00\x00\x00\x00\x01\x00\x00\x00\xA4\x81\x00\x00\x00\x00four.blobUT\x05\x00\x03c\x96\xB7eux\x0B\x00\x01\x04\xF5\x01\x00\x00\x04\x14\x00\x00\x00PK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00O\x00\x00\x00M\x00\x00\x00\x00\x00

statement error
SELECT parse_path(filename), size, content FROM read_text('test/sql/table_function/files/four.blob');
----
Invalid Input Error: read_text: could not read content of file 'test/sql/table_function/files/four.blob' as valid UTF-8 encoded text. You may want to use read_blob instead.

query III
SELECT size, parse_path(filename), content  FROM read_text('test/sql/table_function/files/*.txt') ORDER BY filename, size;
----
12	[test, sql, table_function, files, one.txt]	Hello World!
2	[test, sql, table_function, files, three.txt]	42
10	[test, sql, table_function, files, two.txt]	Föö Bär

# Test that the last_modified timestamp is reasonably correct
query I
SELECT last_modified > '2024-01-01' AND last_modified < '2500-01-01' FROM read_blob('test/sql/table_function/files/*');
----
true
true
true
true

# test parsing hive partitioning scheme
query IIII
select parse_path(filename)[-6:], size, part, date from read_blob('{DATA_DIR}/parquet-testing/hive-partitioning/simple/*/*/test.parquet') order by filename
----
[parquet-testing, hive-partitioning, simple, 'part=a', 'date=2012-01-01', test.parquet]	266	a	2012-01-01
[parquet-testing, hive-partitioning, simple, 'part=b', 'date=2013-01-01', test.parquet]	266	b	2013-01-01

query IIII
select parse_path(filename)[-6:], size, part, date from read_text('{DATA_DIR}/parquet-testing/hive-partitioning/simple/*/*/test.parquet') order by filename
----
[parquet-testing, hive-partitioning, simple, 'part=a', 'date=2012-01-01', test.parquet]	266	a	2012-01-01
[parquet-testing, hive-partitioning, simple, 'part=b', 'date=2013-01-01', test.parquet]	266	b	2013-01-01


# Union by name is not supported
statement error
select filename from read_blob('.*', union_by_name := true);
----
Binder Error: Invalid named parameter "union_by_name" for function read_blob


# Searching for non-existing remote file should return empty result set (like local files)
# This is probably wrong behavior in our httpfs implementation, but at least its consistent in this case.
require httpfs

query I
select filename from read_blob('s3://does-not-exist1144/date=2025-10-11/file.parquet');
----