File: pcre-utf8

package info (click to toggle)
grep 3.3-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 13,404 kB
  • sloc: ansic: 77,930; sh: 10,347; perl: 567; makefile: 364; awk: 71; sed: 16
file content (40 lines) | stat: -rwxr-xr-x 1,209 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#! /bin/sh
# Ensure that, with -P, Unicode \p{} symbols are correctly matched.
#
# Copyright (C) 2012-2018 Free Software Foundation, Inc.
#
# Copying and distribution of this file, with or without modification,
# are permitted in any medium without royalty provided the copyright
# notice and this notice are preserved.

. "${srcdir=.}/init.sh"; path_prepend_ ../src
require_en_utf8_locale_
LC_ALL=en_US.UTF-8 require_pcre_

fail=0

echo '$' | LC_ALL=en_US.UTF-8 grep -qP '\p{S}' \
  || skip_ 'PCRE support is compiled out, or it does not support properties'

euro='\342\202\254 euro'
printf "$euro\\n" > in || framework_failure_

# The euro sign has the unicode "Symbol" property, so this must match:
LC_ALL=en_US.UTF-8 grep -P '^\p{S}' in > out || fail=1
compare in out || fail=1

# This RE must *not* match in the C locale, because the first
# byte is not a "Symbol".
LC_ALL=C grep -P '^\p{S}' in > out && fail=1
compare /dev/null out || fail=1

LC_ALL=en_US.UTF-8 grep -P '^. euro$' in > out2 || fail=1
compare in out2 || fail=1

LC_ALL=en_US.UTF-8 grep -oP '. euro' in > out3 || fail=1
compare in out3 || fail=1

LC_ALL=en_US.UTF-8 grep -P '^\P{S}' in > out4
compare /dev/null out4 || fail=1

Exit $fail