File: regex.pl

package info (click to toggle)
polymake 4.14-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 35,888 kB
  • sloc: cpp: 168,933; perl: 43,407; javascript: 31,575; ansic: 3,007; java: 2,654; python: 632; sh: 268; xml: 117; makefile: 61
file content (226 lines) | stat: -rw-r--r-- 9,313 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
#  Copyright (c) 1997-2024
#  Ewgenij Gawrilow, Michael Joswig, and the polymake team
#  Technische Universität Berlin, Germany
#  https://polymake.org
#
#  This program is free software; you can redistribute it and/or modify it
#  under the terms of the GNU General Public License as published by the
#  Free Software Foundation; either version 2, or (at your option) any
#  later version: http://www.gnu.org/licenses/gpl.txt.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#-------------------------------------------------------------------------------

use strict;
use namespaces;
use warnings qw(FATAL void syntax misc);

package Polymake;

# an identifier (alphanumeric, first character not a digit)
declare $id_re = qr{(?> (?!\d)\w+ )}x;

# a property name (alphanumeric, first character is a letter)
declare $prop_name_re = qr{(?> [a-zA-Z]\w* )}x;

# a list of identifiers (separated by commas)
declare $ids_re = qr{(?> $id_re (?: \s*,\s* $id_re)* )}xo;

# a property name with optional attributes
declare $prop_name_attrs_re = qr{(?> $prop_name_re (?: \( \s* $ids_re \s* \) )? )}xo;

# a hierarchical identifier (identifiers connected with dots)
declare $hier_id_re = qr{(?> $id_re (?: \. $id_re)* )}xo;

# property path, property names connected with dots
declare $prop_path_re = qr{(?> $prop_name_re (?: \. $prop_name_re)* )}xo;

# a list of hierarchical identifiers (separated by commas)
declare $hier_ids_re = qr{(?> $hier_id_re (?: \s*,\s* $hier_id_re)* )}xo;

# list of alternative property paths
declare $prop_path_alt_re = qr{(?> $prop_path_re (?: \s*\|\s* $prop_path_re)* )}xo;

# property path with optional attributes
declare $prop_path_attrs_re = qr{(?> $prop_name_attrs_re (?: \. $prop_name_attrs_re)* )}xo;

# a lone identifier
declare $id_only_re = qr{ ($id_re) \s*$ }xo;

# fully qualified name
declare $qual_id_re = qr{(?> $id_re (?: :: $id_re)* )}xo;

# a list of fully qualified names
declare $qual_ids_re = qr{(?> $qual_id_re (?: \s*,\s* $qual_id_re)* )}xo;

# unqualified name
declare $unqual_id_re=qr{(?<!::) (?<!\w) $id_re (?! :: )}xo;

# a single or double quote
declare $anon_quote_re=qr{ ['"] }x;

# a single or double quote in a named capturing group
declare $quote_re=qr{ (?'quote' $anon_quote_re) }xo;

# anything but a quote character
declare $non_quote_re=qr{ [^"'] }x;

# anything but a quote character or white space
declare $non_quote_space_re=qr{ [^"'\s] }x;

# a sequence of characters not being delimiters (paired brackets or quotes), escaped delimiters, or arrows
my $non_delim_chars = qr( [^()\[\]{}<>'"\#]++
                          | (?<=\\)(?<!\\\\) [()\[\]{}<>"'\#]
                          | (?<=[-=])(?<!<=) > )x;

my $non_delim = qr{(?: $non_delim_chars )*+ }xo;

# same in a multi-line string, including comments
my $non_delim_multi = qr{(?: $non_delim_chars | (?m:\#.*\n) )*+ }xo;

declare $single_quoted_re=qr{(?: [^']+ | (?<=\\)(?<!\\\\) ' )*+}x;
declare $double_quoted_re=qr{(?: [^"]+ | (?<=\\)(?<!\\\\) " )*+}x;

# a sequence in quotes
declare $anon_quoted_re=qr{(?: ' $single_quoted_re ' | " $double_quoted_re " )}xo;

# a sequence in quotes, capturing the contents
declare $quoted_re=qr{(?: ' (?'quoted' $single_quoted_re) ' |
                          " (?'quoted' $double_quoted_re) " )}xo;

# an expression in parentheses, braces, brackets, or quotes
# 1 capturing group
declare $confined_re = qr{ ( \( $non_delim (?: (?-1) $non_delim )* \) |
                             \[ $non_delim (?: (?-1) $non_delim )* \] |
                             \{ $non_delim (?: (?-1) $non_delim )* \} |
                              < $non_delim (?: (?-1) $non_delim )*  > |
                              $anon_quoted_re ) }xo;

# same in a multi-line string, including comments
declare $confined_multiline_re = qr{ ( \( $non_delim_multi (?: (?-1) $non_delim_multi )* \) |
                                       \[ $non_delim_multi (?: (?-1) $non_delim_multi )* \] |
                                       \{ $non_delim_multi (?: (?-1) $non_delim_multi )* \} |
                                        < $non_delim_multi (?: (?-1) $non_delim_multi )*  > |
                                        $anon_quoted_re ) }xo;

# a piece of code with proper nested embraced and quoted subexpressions
# 1 capturing group
declare $balanced_re=qr{ $non_delim (?: $confined_re $non_delim )* }xo;

# as above, but allowing some unmatched open braces
# used in TAB completion
declare $open_balanced_re=qr{ (?: $non_delim (?: \( (?: $balanced_re \) )?+ |
                                                 \[ (?: $balanced_re \] )?+ |
                                                 \{ (?: $balanced_re \} )?+ |
                                                  < (?: $balanced_re  > )?+ |
                                                  $anon_quoted_re           |
                                                  $ ) )* }xo;

# a piece of code with all quoted strings properly closed
declare $quote_balanced_re=qr{ $non_quote_re* (?: $anon_quoted_re $non_quote_re* )* }xo;

# parameter list in angle brackets, recursively referring to the outer group
# this is a fragment of a regex, referring to a group outside this string, therefore not defined as qr{}
my $param_list='(?: \s*<\s* (?-1) (?: \s*,\s* (?-1) )* \s*> )?+';

# property type, possibly parameterized
# 1 capturing group
declare $type_re=qr{($qual_id_re $param_list)}xo;

# a list of types (separated by commas)
# 1 capturing group
declare $types_re=qr{ $type_re (?: \s*,\s* (?-1) )*+ }xo;

# a type expression qualifying some following name
# (can't simply write $type_re :: $hier_id_re here because of the greedy nature of the former)
# 1 capturing group
declare $type_qual_re=qr{ $id_re (?: :: $id_re)* (?: \s*<\s* $types_re \s*>\s* )? (?= ::) }xo;

# an expression constructing a type: either a type name, optionally qualified and parameterized, or a piece of code
# 4 capturing groups
declare $type_expr_re=qr{ (?: (?= [\(\{]) (?'dynamic' $confined_re) | (?'static' $type_re)) }xo;

# a list of expressions constructing types (separated by commas)
# 5 capturing groups
declare $type_exprs_re=qr{ ($type_expr_re (?: \s*,\s* (?-5))*+) }xo;

# a type parameter in a declaration of a complex type or a function
# 6 capturing groups
declare $type_param_re=qr{ (?'name' $id_re) (?: \s*=\s* (?'default' $type_expr_re) )?+ }xo;

# list of type parameters in angle brackets with optional default values
# 7 capturing groups
declare $type_params_re=qr{ \s*<\s* (?'tparams' ($type_param_re (?: \s*,\s* (?-7) )*+ )) \s*> }xo;

# list of type parameters in angle brackets with optional default values
# 7 capturing groups
declare $type_params_variadic_re=qr{ \s*<\s* (?'tparams' ($type_param_re (?: \s*,\s* (?-7) )*+ ))
                                             (?('default') | (?: \s* (?'ellipsis' \.\.\.))? ) \s*> }xo;

# type-checking expressions
declare $typechecks_re=qr{ \s*\[ (?'typecheck' $balanced_re) \] }xo;

# beginning of a declaration of a type or function
declare $parametrized_decl_re=qr{ (?'lead_name' $id_re) (?: $type_params_re (?: $typechecks_re )?+ )?+ }xo;

# function declaration with optional signature
declare $sub_re=qr{ (?'name' $id_re) \b (?: \s*\( (?'signature' $balanced_re) \) )? }xo;

# function declaration with optional template parameters and signature
declare $parametrized_sub_re=qr{ $parametrized_decl_re (?: \s*\( (?'signature' $balanced_re) \) )?+ (?= [\s:;\{]) }xo;

# overloaded function declaration with optional labels
declare $labeled_sub_re=qr{ (?:(?'label' $hier_id_re) \s*:\s*)? $parametrized_sub_re }xo;

# attributes of an argument in a function signature, maybe empty
declare $func_arg_attrs_re=qr{(?=\s*[:&]) (?> (?: \s*:\s* $id_re )* (?: \s* &&? (?: \s* const\b)?)? )}xo;

# rule input clause
declare $rule_input_re=qr{ $hier_id_re (?: \s*[|,]\s* $hier_id_re )* }xo;

# filename (directory part stripped)
declare $filename_re=qr{ ([^/]+) $ }x;

# directory name (file part stripped)
declare $directory_re=qr{ ^(.*) / [^/]+$ }x;

# directory part of a command, expecting trailing arguments
declare $directory_of_cmd_re=qr{ ^(.*) / [^/]+ (?: $ | \s)}x;

# an empty line with possible comments
declare $nonsignificant_line_re=qr{^ [ \t]* (?:\#.*)? $}xm;

# beginning of a complete statement
declare $statement_start_re=qr{(?: ^ | [;\}] )\s*}x;

# beginning of a function argument list
declare $args_start_re=qr{(?'args_start' \s+ | \s*\(\s* )}x;

# an expression in a list, like an argument in a function call
# This pattern tries hard to recognize commas within type expressions,
# while all other < and > signs being interpreted as comparsion operators
declare $expression_re=qr{ (?: (?! <) $confined_re | (?> \$$qual_id_re | $type_re ) | [^,'"()\[\]{}]*+ )+ }xo;

# variable sigil (scalar, array or hash)
declare $var_sigil_re=qr{(?: (?'scalar'\$)|[\@%] )}xo;

# end of a source file
declare $end_of_source_file="__END__";
declare $end_of_source_file_re=qr/^${end_of_source_file}$/o;

sub trim_spaces {
   my ($string)=@_;
   $string =~ s/^\s+//;
   $string =~ s/\s+$//;
   $string
}

1;

# Local Variables:
# cperl-indent-level:3
# indent-tabs-mode:nil
# End: