File: 16regex.t

package info (click to toggle)
libunicode-linebreak-perl 0.0.20140601-2%2Bdeb8u2
  • links: PTS, VCS
  • area: main
  • in suites: jessie
  • size: 4,124 kB
  • ctags: 1,278
  • sloc: ansic: 32,274; perl: 783; makefile: 2
file content (67 lines) | stat: -rw-r--r-- 1,767 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
use strict;
use Test::More;
require "t/lb.pl";

my $splitre;
BEGIN {
    $splitre = eval q{ qr{
        (?<=^url:) |
            (?<=[/]) (?=[^/]) |
            (?<=[^-.]) (?=[-~.,_?\#%=&]) |
            (?<=[=&]) (?=.)
        }iox };
    if ($@) {
	diag $@;
	plan skip_all => "Perl may have a bug (cf. perlbug #82302).";
    } else {
	plan tests => 6;
    }
}

# Regex matching most of URL-like strings.
my $URIre = qr{
    \b
	(?:url:)?
	(?:[a-z][-0-9a-z+.]+://|news:|mailto:)
	[\x21-\x7E]+
    }iox;

# Breaking URIs according to some CMoS rules.
sub breakURI {
    # 17.11 1.1: [/] ÷ [^/]
    # 17.11 2:   [-] ×
    # 6.17 2:   [.] ×
    # 17.11 1.2: ÷ [-~.,_?#%]
    # 17.11 1.3: ÷ [=&]
    # 17.11 1.3: [=&] ÷
    # Default:  ALL × ALL
    my @c = split m{$splitre}, $_[1];
    # Won't break punctuations at end of matches.
    while (2 <= scalar @c and $c[$#c] =~ /^[\".:;,>]+$/) {
	my $c = pop @c;
	$c[$#c] .= $c;
    }
    @c;
}

# [REGEX, SUB] pair
dotest('uri', 'uri.break', ColumnsMax => 1,
       Prep => [$URIre, \&breakURI]);
dotest('uri', 'uri.nonbreak', ColumnsMax => 1,
       Prep => [$URIre, sub { ($_[1]) }]);
# [STRING, SUB] pair
dotest('uri', 'uri.nonbreak', ColumnsMax => 1,
       Prep => ["$URIre", sub { ($_[1]) }]);
# multiple patterns
dotest('uri', 'uri.break', ColumnsMax => 1,
       Prep => [$URIre, \&breakURI],
       Prep => [qr{ftp://[\x21-\x7e]+}, sub { ($_[1]) } ]);
dotest('uri', 'uri.break.http', ColumnsMax => 1,
       Prep => [qr{ftp://[\x21-\x7e]+}, sub { ($_[1]) } ],
       Prep => [$URIre, \&breakURI]);
dotest('uri', 'uri.nonbreak', ColumnsMax => 1,
       Prep => [qr{ftp://[\x21-\x7e]+}, sub { ($_[1]) } ],
       Prep => [qr{http://[\x21-\x7e]+}, sub { ($_[1]) } ],
       Prep => [$URIre, \&breakURI]);

1;