File: unaccent.sql

package info (click to toggle)
postgresql-18 18~beta3-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 155,816 kB
  • sloc: ansic: 993,154; sql: 127,411; perl: 58,874; xml: 30,905; yacc: 21,023; lex: 9,000; makefile: 6,880; sh: 5,353; cpp: 984; python: 710; asm: 40; sed: 3
file content (48 lines) | stat: -rw-r--r-- 1,502 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
/*
 * This test must be run in a database with UTF-8 encoding,
 * because other encodings don't support all the characters used.
 */

SELECT getdatabaseencoding() <> 'UTF8'
       AS skip_test \gset
\if :skip_test
\quit
\endif

CREATE EXTENSION unaccent;

SET client_encoding TO 'UTF8';

SELECT unaccent('foobar');
SELECT unaccent('ёлка');
SELECT unaccent('ЁЖИК');
SELECT unaccent('˃˖˗˜');
SELECT unaccent('À');  -- Remove combining diacritical 0x0300
SELECT unaccent('℃℉'); -- degree signs
SELECT unaccent('℗'); -- sound recording copyright
SELECT unaccent('1½'); -- math expression with whitespace
SELECT unaccent('〝'); -- quote

SELECT unaccent('unaccent', 'foobar');
SELECT unaccent('unaccent', 'ёлка');
SELECT unaccent('unaccent', 'ЁЖИК');
SELECT unaccent('unaccent', '˃˖˗˜');
SELECT unaccent('unaccent', 'À');
SELECT unaccent('unaccent', '℃℉');
SELECT unaccent('unaccent', '℗');
SELECT unaccent('unaccent', '1½');
SELECT unaccent('unaccent', '〝');

SELECT ts_lexize('unaccent', 'foobar');
SELECT ts_lexize('unaccent', 'ёлка');
SELECT ts_lexize('unaccent', 'ЁЖИК');
SELECT ts_lexize('unaccent', '˃˖˗˜');
SELECT ts_lexize('unaccent', 'À');
SELECT ts_lexize('unaccent', '℃℉');
SELECT ts_lexize('unaccent', '℗');
SELECT ts_lexize('unaccent', '1½');
SELECT ts_lexize('unaccent', '〝');

-- Controversial case.  Black-Letter Capital H (U+210C) is translated by
-- Latin-ASCII.xml as 'x', but it should be 'H'.
SELECT unaccent('ℌ');