File: perlrecharclass.html

package info (click to toggle)
perl-doc-html 5.26.0-4
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 39,400 kB
  • sloc: xml: 36; makefile: 7
file content (1465 lines) | stat: -rw-r--r-- 115,196 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
  <title>perlrecharclass - perldoc.perl.org</title>
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  <meta http-equiv="Content-Language" content="en-gb">
  <link rel="search" type="application/opensearchdescription+xml" title="Search perldoc.perl.org" href="/static/search.xml"/>
  <link href="static/css-20100830.css" rel="stylesheet" rev="stylesheet" type="text/css" media="screen">
  <link href="static/exploreperl.css" rel="stylesheet" rev="stylesheet" type="text/css">
</head>

<body onLoad="perldoc.startup();" onPageShow="if (event.persisted) perldoc.startup();">
    <div id="page">
      
      <div id="header">
	<div id="homepage_link">
	  <a href="index.html"></a>
	</div>
	<div id="strapline">
	  Perl Programming Documentation
	</div>
	<div id="download_link" class="download">
	  <a href="http://www.perl.org/get.html">Download Perl</a>
	</div>
	<div id="explore_link" class="download">
	  <a id="explore_anchor" href="#">Explore</a>
	</div>
      </div>
      
      <div id="body">
        <div id="left_column">
          <div class="side_group">
            
	    <div class="side_panel doc_panel">
              <p>Manual</p>
              <ul>
                <li><a href="index-overview.html">Overview</a>
                <li><a href="index-tutorials.html">Tutorials</a>
                <li><a href="index-faq.html">FAQs</a>
                <li><a href="index-history.html">History / Changes</a>
                <li><a href="index-licence.html">License</a>
              </ul>
            </div>
            <div class="side_panel doc_panel">
              <p>Reference</p>
              <ul>
                <li><a href="index-language.html">Language</a>
                <li><a href="index-functions.html">Functions</a>
                <li><a href="perlop.html">Operators</a>
                <li><a href="perlvar.html">Special Variables</a>
                <li><a href="index-pragmas.html">Pragmas</a>
                <li><a href="index-utilities.html">Utilities</a>
                <li><a href="index-internals.html">Internals</a>
                <li><a href="index-platforms.html">Platform Specific</a>
              </ul>
            </div>
            <div class="side_panel doc_panel">
              <p>Modules</p>
              <ul>
		<li>
		
                
                  
		    
		  
		
                  
		    
		  
		
                  
		    
		  
		
                  
		    
		  
		
                  
		    
		  
		
                  
		    
		  
		
                  
		    
		  
		
                  
		    
		  
		
                  
		    
		  
		
                  
		
                  
		
                  
		    
		  
		
                  
		    
		  
		
                  
		    
		  
		
                  
		    
		  
		
                  
		    
		  
		
                  
		
                  
		
                  
		    
		  
		
                  
		    
		  
		
                  
		    
		  
		
                  
		
                  
		
                  
		    
		  
		
                  
		
                  
		
		
                    <a href="index-modules-A.html">A</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-B.html">B</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-C.html">C</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-D.html">D</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-E.html">E</a>
                    
                      
                        <li>
                      
                    
                
                    <a href="index-modules-F.html">F</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-G.html">G</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-H.html">H</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-I.html">I</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-L.html">L</a>
                    
                      
                        <li>
                      
                    
                
                    <a href="index-modules-M.html">M</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-N.html">N</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-O.html">O</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-P.html">P</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-S.html">S</a>
                    
                      
                        <li>
                      
                    
                
                    <a href="index-modules-T.html">T</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-U.html">U</a>
                    
                      
                        &bull;
                      
                    
                
                    <a href="index-modules-X.html">X</a>
                    
                
              </ul>
            </div>
            
	      <div class="side_panel doc_panel">
		<p>Tools</p>
		<ul>
		  <li><a href="preferences.html">Preferences</a>
		</ul>
	      </div>
            
          </div>
        </div>
        <div id="centre_column">
          <div id="content_header">
            <div id="title_bar">
              <div id="page_name">
                <h1>perlrecharclass</h1>
              </div>
              <div id="perl_version">
                Perl 5 version 26.0 documentation
              </div>
              <div class="page_links" id="page_links_top">
                <a href="#" onClick="toolbar.goToTop();return false;">Go to top</a>
		
              </div>
	      <div class="page_links" id="page_links_bottom">
		
                  <a href="#" id="page_index_toggle">Show page index</a> &bull;
		
                <a href="#" id="recent_pages_toggle">Show recent pages</a>		
	      </div>
	      <div id="search_form">
		<form action="search.html" method="GET" id="search">
		  <input type="text" name="q" id="search_box" alt="Search">
		</form>
	      </div>
            </div>
            <div id="breadcrumbs">
                
    <a href="index.html">Home</a> &gt;
    
      
        <a href="index-language.html">Language reference</a> &gt;
      
    
    perlrecharclass
  

            </div>
          </div>
          <div id="content_body">
	    <!--[if lt IE 7]>
 <div class="noscript">
   <p>
     <strong>It looks like you're using Internet Explorer 6. This is a very old
     browser which does not offer full support for modern websites.</strong>
   </p>
   <p>
     Unfortunately this means that this website will not work on
     your computer.
   </p>
   <p>
     Don't miss out though! To view the site (and get a better experience from
     many other websites), simply upgrade to
     <a href="http://www.microsoft.com/windows/Internet-explorer/default.aspx">Internet
Explorer 8</a>
     or download an alternative browser such as
     <a href="http://www.mozilla.com/en-US/firefox/firefox.html">Firefox</a>,
     <a href="http://www.apple.com/safari/download/">Safari</a>, or
     <a href="http://www.google.co.uk/chrome">Google Chrome</a>.
   </p>
   <p>
     All of these browsers are free. If you're using a PC at work, you may
     need to contact your IT administrator.
   </p>
 </div>
<![endif]-->
	    <noscript>
	      <div class="noscript">
	      <p>
                <strong>Please note: Many features of this site require JavaScript. You appear to have JavaScript disabled,
	        or are running a non-JavaScript capable web browser.</strong>
	      </p>
	      <p>
		To get the best experience, please enable JavaScript or download a modern web browser such as <a href="http://www.microsoft.com/windows/Internet-explorer/default.aspx">Internet Explorer 8</a>, <a href="http://www.mozilla.com/en-US/firefox/firefox.html">Firefox</a>, <a href="http://www.apple.com/safari/download/">Safari</a>, or <a href="http://www.google.co.uk/chrome">Google Chrome</a>.
              </p>
	      </div>
	    </noscript>

	    <div id="recent_pages" class="hud_container">
	      <div id="recent_pages_header" class="hud_header">
		<div id="recent_pages_close" class="hud_close"><a href="#" onClick="recentPages.hide();return false;"></a></div>
		<div id="recent_pages_title" class="hud_title"><span class="hud_span_top">Recently read</span></div>
		<div id="recent_pages_topright" class="hud_topright"></div>
	      </div>
	      <div id="recent_pages_content" class="hud_content">
	      </div>
	      <div id="recent_pages_footer" class="hud_footer">
		<div id="recent_pages_bottomleft" class="hud_bottomleft"></div>
		<div id="recent_pages_bottom" class="hud_bottom"><span class="hud_span_bottom"></span></div>
		<div id="recent_pages_resize" class="hud_resize"></div>
	      </div>
	    </div>
  
	    <div id="from_search"></div>
            <h1>perlrecharclass</h1>


  <!--    -->
<ul><li><a href="#NAME">NAME
</a><li><a href="#DESCRIPTION">DESCRIPTION</a><ul><li><a href="#The-dot">The dot</a><li><a href="#Backslash-sequences">Backslash sequences
       
    
 </a><li><a href="#Bracketed-Character-Classes">Bracketed Character Classes</a></ul></ul><a name="NAME"></a><h1>NAME
</h1>
<p>perlrecharclass - Perl Regular Expression Character Classes</p>
<a name="DESCRIPTION"></a><h1>DESCRIPTION</h1>
<p>The top level documentation about Perl regular expressions
is found in <a href="perlre.html">perlre</a>.</p>
<p>This manual page discusses the syntax and use of character
classes in Perl regular expressions.</p>
<p>A character class is a way of denoting a set of characters
in such a way that one character of the set is matched.
It's important to remember that: matching a character class
consumes exactly one character in the source string. (The source
string is the string the regular expression is matched against.)</p>
<p>There are three types of character classes in Perl regular
expressions: the dot, backslash sequences, and the form enclosed in square
brackets.  Keep in mind, though, that often the term "character class" is used
to mean just the bracketed form.  Certainly, most Perl documentation does that.</p>
<a name="The-dot"></a><h2>The dot</h2>
<p>The dot (or period), <code class="inline">.</code> is probably the most used, and certainly
the most well-known character class. By default, a dot matches any
character, except for the newline. That default can be changed to
add matching the newline by using the <i>single line</i> modifier:
for the entire regular expression with the <code class="inline">/s</code> modifier, or
locally with <code class="inline">(?s)</code>  (and even globally within the scope of
<a href="re.html#'%2fflags'-mode">use re &#39;/s&#39; </a>).  (The <code class="inline"><a href="#%5cN">\N</a></code> backslash
sequence, described
below, matches any character except newline without regard to the
<i>single line</i> modifier.)</p>
<p>Here are some examples:</p>
<pre class="verbatim"><ol><li> "a"  =~  /./       # Match</li><li> "."  =~  /./       # Match</li><li> ""   =~  /./       # No match (dot has to match a character)</li><li> "\n" =~  /./       # No match (dot does not match a newline)</li><li> "\n" =~  /./s      # Match (global 'single line' modifier)</li><li> "\n" =~  /(?s:.)/  # Match (local 'single line' modifier)</li><li> "ab" =~  /^.$/     # No match (dot matches one character)</li></ol></pre><a name="Backslash-sequences"></a><h2>Backslash sequences
       
    
 </h2>
<p>A backslash sequence is a sequence of characters, the first one of which is a
backslash.  Perl ascribes special meaning to many such sequences, and some of
these are character classes.  That is, they match a single character each,
provided that the character belongs to the specific set of characters defined
by the sequence.</p>
<p>Here's a list of the backslash sequences that are character classes.  They
are discussed in more detail below.  (For the backslash sequences that aren't
character classes, see <a href="perlrebackslash.html">perlrebackslash</a>.)</p>
<pre class="verbatim"><ol><li> \<span class="w">d</span>             <span class="w">Match</span> <span class="w">a</span> <span class="w">decimal</span> <span class="w">digit</span> <span class="w">character</span>.</li><li> \<span class="w">D</span>             <span class="w">Match</span> <span class="w">a</span> <span class="w">non</span>-<span class="w">decimal</span>-<span class="w">digit</span> <span class="w">character</span>.</li><li> \<span class="w">w</span>             <span class="w">Match</span> <span class="w">a</span> <span class="q">&quot;word&quot;</span> <span class="w">character</span>.</li><li> \<span class="w">W</span>             <span class="w">Match</span> <span class="w">a</span> <span class="w">non</span>-<span class="q">&quot;word&quot;</span> <span class="w">character</span>.</li><li> \<span class="q">s             Match a whitespace character.</span></li><li> <span class="q"> \S             Match a non-whitespace character.</span></li><li> <span class="q"> \h             Match</span> <span class="w">a</span> <span class="w">horizontal</span> <span class="w">whitespace</span> <span class="w">character</span>.</li><li> \<span class="w">H</span>             <span class="w">Match</span> <span class="w">a</span> <span class="w">character</span> <span class="w">that</span> <span class="w">isn&#39;t</span> <span class="w">horizontal</span> <span class="w">whitespace</span>.</li><li> \<span class="w">v</span>             <span class="w">Match</span> <span class="w">a</span> <span class="w">vertical</span> <span class="w">whitespace</span> <span class="w">character</span>.</li><li> \<span class="w">V</span>             <span class="w">Match</span> <span class="w">a</span> <span class="w">character</span> <span class="w">that</span> <span class="w">isn&#39;t</span> <span class="w">vertical</span> <span class="w">whitespace</span>.</li><li> \<span class="w">N</span>             <span class="w">Match</span> <span class="w">a</span> <span class="w">character</span> <span class="w">that</span> <span class="w">isn&#39;t</span> <span class="w">a</span> <span class="w">newline</span>.</li><li> \<span class="w">pP</span><span class="cm">,</span> \<span class="i">p</span><span class="s">{</span><span class="w">Prop</span><span class="s">}</span>  <span class="w">Match</span> <span class="w">a</span> <span class="w">character</span> <span class="w">that</span> <span class="w">has</span> <span class="w">the</span> <a class="l_k" href="functions/given.html">given</a> <span class="w">Unicode</span> <span class="w">property</span>.</li><li> \<span class="w">PP</span><span class="cm">,</span> \<span class="i">P</span><span class="s">{</span><span class="w">Prop</span><span class="s">}</span>  <span class="w">Match</span> <span class="w">a</span> <span class="w">character</span> <span class="w">that</span> <span class="w">doesn&#39;t</span> <span class="w">have</span> <span class="w">the</span> <span class="w">Unicode</span> <span class="w">property</span></li></ol></pre><a name="%5cN"></a><h3>\N</h3>
<p><code class="inline">\<span class="w">N</span></code>
, available starting in v5.12, like the dot, matches any
character that is not a newline. The difference is that <code class="inline">\<span class="w">N</span></code>
 is not influenced
by the <i>single line</i> regular expression modifier (see <a href="#The-dot">The dot</a> above).  Note
that the form <code class="inline">\<span class="i">N</span><span class="s">{</span>...<span class="s">}</span></code>
 may mean something completely different.  When the
<code class="inline"><span class="s">{</span>...<span class="s">}</span></code>
 is a <a href="perlre.html#Quantifiers">quantifier</a>, it means to match a non-newline
character that many times.  For example, <code class="inline">\<span class="i">N</span><span class="s">{</span><span class="n">3</span><span class="s">}</span></code>
 means to match 3
non-newlines; <code class="inline">\<span class="w">N</span><span class="s">{</span><span class="n">5</span><span class="cm">,</span><span class="s">}</span></code>
 means to match 5 or more non-newlines.  But if <code class="inline"><span class="s">{</span>...<span class="s">}</span></code>

is not a legal quantifier, it is presumed to be a named character.  See
<a href="charnames.html">charnames</a> for those.  For example, none of <code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">COLON</span><span class="s">}</span></code>
, <code class="inline">\N{4F}</code>, and
<code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">F4</span><span class="s">}</span></code>
 contain legal quantifiers, so Perl will try to find characters whose
names are respectively <code class="inline"><span class="w">COLON</span></code>
, <code class="inline">4F</code>, and <code class="inline"><span class="w">F4</span></code>
.</p>
<a name="Digits"></a><h3>Digits</h3>
<p><code class="inline">\<span class="w">d</span></code>
 matches a single character considered to be a decimal <i>digit</i>.
If the <code class="inline"><span class="q">/a</span></code>
 regular expression modifier is in effect, it matches [0-9].
Otherwise, it
matches anything that is matched by <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Digit</span><span class="s">}</span></code>
, which includes [0-9].
(An unlikely possible exception is that under locale matching rules, the
current locale might not have <code class="inline"><span class="s">[</span><span class="n">0</span>-<span class="n">9</span><span class="s">]</span></code>
 matched by <code class="inline">\<span class="w">d</span></code>
, and/or might match
other characters whose code point is less than 256.  The only such locale
definitions that are legal would be to match <code class="inline"><span class="s">[</span><span class="n">0</span>-<span class="n">9</span><span class="s">]</span></code>
 plus another set of
10 consecutive digit characters;  anything else would be in violation of
the C language standard, but Perl doesn't currently assume anything in
regard to this.)</p>
<p>What this means is that unless the <code class="inline"><span class="q">/a</span></code>
 modifier is in effect <code class="inline">\<span class="w">d</span></code>
 not
only matches the digits '0' - '9', but also Arabic, Devanagari, and
digits from other languages.  This may cause some confusion, and some
security issues.</p>
<p>Some digits that <code class="inline">\<span class="w">d</span></code>
 matches look like some of the [0-9] ones, but
have different values.  For example, BENGALI DIGIT FOUR (U+09EA) looks
very much like an ASCII DIGIT EIGHT (U+0038).  An application that
is expecting only the ASCII digits might be misled, or if the match is
<code class="inline">\<span class="w">d</span>+</code>
, the matched string might contain a mixture of digits from
different writing systems that look like they signify a number different
than they actually do.  <a href="Unicode/UCD.html#num()">num() in Unicode::UCD</a> can
be used to safely
calculate the value, returning <code class="inline"><a class="l_k" href="functions/undef.html">undef</a></code> if the input string contains
such a mixture.</p>
<p>What <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Digit</span><span class="s">}</span></code>
 means (and hence <code class="inline">\<span class="w">d</span></code>
 except under the <code class="inline"><span class="q">/a</span></code>

modifier) is <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">General_Category</span>=<span class="w">Decimal_Number</span><span class="s">}</span></code>
, or synonymously,
<code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">General_Category</span>=<span class="w">Digit</span><span class="s">}</span></code>
.  Starting with Unicode version 4.1, this
is the same set of characters matched by <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Numeric_Type</span>=<span class="w">Decimal</span><span class="s">}</span></code>
.
But Unicode also has a different property with a similar name,
<code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Numeric_Type</span>=<span class="w">Digit</span><span class="s">}</span></code>
, which matches a completely different set of
characters.  These characters are things such as <code class="inline"><span class="w">CIRCLED</span> <span class="w">DIGIT</span> <span class="w">ONE</span></code>

or subscripts, or are from writing systems that lack all ten digits.</p>
<p>The design intent is for <code class="inline">\<span class="w">d</span></code>
 to exactly match the set of characters
that can safely be used with "normal" big-endian positional decimal
syntax, where, for example 123 means one 'hundred', plus two 'tens',
plus three 'ones'.  This positional notation does not necessarily apply
to characters that match the other type of "digit",
<code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Numeric_Type</span>=<span class="w">Digit</span><span class="s">}</span></code>
, and so <code class="inline">\<span class="w">d</span></code>
 doesn't match them.</p>
<p>The Tamil digits (U+0BE6 - U+0BEF) can also legally be
used in old-style Tamil numbers in which they would appear no more than
one in a row, separated by characters that mean "times 10", "times 100",
etc.  (See <a href="http://www.unicode.org/notes/tn21">http://www.unicode.org/notes/tn21</a>.)</p>
<p>Any character not matched by <code class="inline">\<span class="w">d</span></code>
 is matched by <code class="inline">\<span class="w">D</span></code>
.</p>
<a name="Word-characters"></a><h3>Word characters</h3>
<p>A <code class="inline">\<span class="w">w</span></code>
 matches a single alphanumeric character (an alphabetic character, or a
decimal digit); or a connecting punctuation character, such as an
underscore ("_"); or a "mark" character (like some sort of accent) that
attaches to one of those.  It does not match a whole word.  To match a
whole word, use <code class="inline">\<span class="w">w</span>+</code>
.  This isn't the same thing as matching an
English word, but in the ASCII range it is the same as a string of
Perl-identifier characters.</p>
<ul>
<li><a name="If-the-%2fa-modifier-is-in-effect-..."></a><b>If the <code class="inline"><span class="q">/a</span></code>
 modifier is in effect ...</b>
<p><code class="inline">\<span class="w">w</span></code>
 matches the 63 characters [a-zA-Z0-9_].</p>
</li>
<li><a name="otherwise-..."></a><b>otherwise ...</b>
<ul>
<li><a name="For-code-points-above-255-..."></a><b>For code points above 255 ...</b>
<p><code class="inline">\<span class="w">w</span></code>
 matches the same as <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Word</span><span class="s">}</span></code>
 matches in this range.  That is,
it matches Thai letters, Greek letters, etc.  This includes connector
punctuation (like the underscore) which connect two words together, or
diacritics, such as a <code class="inline"><span class="w">COMBINING</span> <span class="w">TILDE</span></code>
 and the modifier letters, which
are generally used to add auxiliary markings to letters.</p>
</li>
<li><a name="For-code-points-below-256-..."></a><b>For code points below 256 ...</b>
<ul>
<li><a name="if-locale-rules-are-in-effect-..."></a><b>if locale rules are in effect ...</b>
<p><code class="inline">\<span class="w">w</span></code>
 matches the platform's native underscore character plus whatever
the locale considers to be alphanumeric.</p>
</li>
<li><a name="if%2c-instead%2c-Unicode-rules-are-in-effect-..."></a><b>if, instead, Unicode rules are in effect ...</b>
<p><code class="inline">\<span class="w">w</span></code>
 matches exactly what <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Word</span><span class="s">}</span></code>
 matches.</p>
</li>
<li><a name="otherwise-..."></a><b>otherwise ...</b>
<p><code class="inline">\<span class="w">w</span></code>
 matches [a-zA-Z0-9_].</p>
</li>
</ul>
</li>
</ul>
</li>
</ul>
<p>Which rules apply are determined as described in <a href="perlre.html#Which-character-set-modifier-is-in-effect%3f">Which character set modifier is in effect? in perlre</a>.</p>
<p>There are a number of security issues with the full Unicode list of word
characters.  See <a href="http://unicode.org/reports/tr36">http://unicode.org/reports/tr36</a>.</p>
<p>Also, for a somewhat finer-grained set of characters that are in programming
language identifiers beyond the ASCII range, you may wish to instead use the
more customized <a href="#Unicode-Properties">Unicode Properties</a>, <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">ID_Start</span><span class="s">}</span></code>
,
<code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">ID_Continue</span><span class="s">}</span></code>
, <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">XID_Start</span><span class="s">}</span></code>
, and <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">XID_Continue</span><span class="s">}</span></code>
.  See
<a href="http://unicode.org/reports/tr31">http://unicode.org/reports/tr31</a>.</p>
<p>Any character not matched by <code class="inline">\<span class="w">w</span></code>
 is matched by <code class="inline">\<span class="w">W</span></code>
.</p>
<a name="Whitespace"></a><h3>Whitespace</h3>
<p><code class="inline">\s</code> matches any single character considered whitespace.</p>
<ul>
<li><a name="If-the-%2fa-modifier-is-in-effect-..."></a><b>If the <code class="inline"><span class="q">/a</span></code>
 modifier is in effect ...</b>
<p>In all Perl versions, <code class="inline">\s</code> matches the 5 characters [\t\n\f\r ]; that
is, the horizontal tab,
the newline, the form feed, the carriage return, and the space.
Starting in Perl v5.18, it also matches the vertical tab, <code class="inline">\<span class="w">cK</span></code>
.
See note <code class="inline"><span class="s">[</span><span class="n">1</span><span class="s">]</span></code>
 below for a discussion of this.</p>
</li>
<li><a name="otherwise-..."></a><b>otherwise ...</b>
<ul>
<li><a name="For-code-points-above-255-..."></a><b>For code points above 255 ...</b>
<p><code class="inline">\s</code> matches exactly the code points above 255 shown with an "s" column
in the table below.</p>
</li>
<li><a name="For-code-points-below-256-..."></a><b>For code points below 256 ...</b>
<ul>
<li><a name="if-locale-rules-are-in-effect-..."></a><b>if locale rules are in effect ...</b>
<p><code class="inline">\s</code> matches whatever the locale considers to be whitespace.</p>
</li>
<li><a name="if%2c-instead%2c-Unicode-rules-are-in-effect-..."></a><b>if, instead, Unicode rules are in effect ...</b>
<p><code class="inline">\s</code> matches exactly the characters shown with an "s" column in the
table below.</p>
</li>
<li><a name="otherwise-..."></a><b>otherwise ...</b>
<p><code class="inline">\s</code> matches [\t\n\f\r ] and, starting in Perl
v5.18, the vertical tab, <code class="inline">\<span class="w">cK</span></code>
.
(See note <code class="inline"><span class="s">[</span><span class="n">1</span><span class="s">]</span></code>
 below for a discussion of this.)
Note that this list doesn't include the non-breaking space.</p>
</li>
</ul>
</li>
</ul>
</li>
</ul>
<p>Which rules apply are determined as described in <a href="perlre.html#Which-character-set-modifier-is-in-effect%3f">Which character set modifier is in effect? in perlre</a>.</p>
<p>Any character not matched by <code class="inline">\s</code> is matched by <code class="inline">\<span class="w">S</span></code>
.</p>
<p><code class="inline">\<span class="w">h</span></code>
 matches any character considered horizontal whitespace;
this includes the platform's space and tab characters and several others
listed in the table below.  <code class="inline">\<span class="w">H</span></code>
 matches any character
not considered horizontal whitespace.  They use the platform's native
character set, and do not consider any locale that may otherwise be in
use.</p>
<p><code class="inline">\<span class="w">v</span></code>
 matches any character considered vertical whitespace;
this includes the platform's carriage return and line feed characters (newline)
plus several other characters, all listed in the table below.
<code class="inline">\<span class="w">V</span></code>
 matches any character not considered vertical whitespace.
They use the platform's native character set, and do not consider any
locale that may otherwise be in use.</p>
<p><code class="inline">\<span class="w">R</span></code>
 matches anything that can be considered a newline under Unicode
rules. It can match a multi-character sequence. It cannot be used inside
a bracketed character class; use <code class="inline">\<span class="w">v</span></code>
 instead (vertical whitespace).
It uses the platform's
native character set, and does not consider any locale that may
otherwise be in use.
Details are discussed in <a href="perlrebackslash.html">perlrebackslash</a>.</p>
<p>Note that unlike <code class="inline">\s</code> (and <code class="inline">\<span class="w">d</span></code>
 and <code class="inline">\<span class="w">w</span></code>
), <code class="inline">\<span class="w">h</span></code>
 and <code class="inline">\<span class="w">v</span></code>
 always match
the same characters, without regard to other factors, such as the active
locale or whether the source string is in UTF-8 format.</p>
<p>One might think that <code class="inline">\s</code> is equivalent to <code class="inline"><span class="s">[</span>\<span class="w">h</span>\<span class="w">v</span><span class="s">]</span></code>
. This is indeed true
starting in Perl v5.18, but prior to that, the sole difference was that the
vertical tab (<code class="inline"><span class="q">&quot;\cK&quot;</span></code>
) was not matched by <code class="inline">\s</code>.</p>
<p>The following table is a complete listing of characters matched by
<code class="inline">\s</code>, <code class="inline">\<span class="w">h</span></code>
 and <code class="inline">\<span class="w">v</span></code>
 as of Unicode 6.3.</p>
<p>The first column gives the Unicode code point of the character (in hex format),
the second column gives the (Unicode) name. The third column indicates
by which class(es) the character is matched (assuming no locale is in
effect that changes the <code class="inline">\s</code> matching).</p>
<pre class="verbatim"><ol><li> <span class="n">0x0009</span>        <span class="w">CHARACTER</span> <span class="w">TABULATION</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x000a</span>              <span class="w">LINE</span> <span class="w">FEED</span> <span class="s">(</span><span class="w">LF</span><span class="s">)</span>    <span class="w">vs</span></li><li> <span class="n">0x000b</span>             <span class="w">LINE</span> <span class="w">TABULATION</span>    <span class="w">vs</span>  <span class="s">[</span><span class="n">1</span><span class="s">]</span></li><li> <span class="n">0x000c</span>              <span class="w">FORM</span> <span class="w">FEED</span> <span class="s">(</span><span class="w">FF</span><span class="s">)</span>    <span class="w">vs</span></li><li> <span class="n">0x000d</span>        <span class="w">CARRIAGE</span> <span class="w">RETURN</span> <span class="s">(</span><span class="w">CR</span><span class="s">)</span>    <span class="w">vs</span></li><li> <span class="n">0x0020</span>                       <span class="w">SPACE</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x0085</span>             <span class="w">NEXT</span> <span class="w">LINE</span> <span class="s">(</span><span class="w">NEL</span><span class="s">)</span>    <span class="w">vs</span>  <span class="s">[</span><span class="n">2</span><span class="s">]</span></li><li> <span class="n">0x00a0</span>              <span class="w">NO</span>-<span class="w">BREAK</span> <span class="w">SPACE</span>   <span class="w">h</span> <span class="q">s  [2]</span></li><li> <span class="q"> 0x1680</span>            <span class="w">OGHAM</span> <span class="w">SPACE</span> <span class="w">MARK</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x2000</span>                     <span class="w">EN</span> <span class="w">QUAD</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x2001</span>                     <span class="w">EM</span> <span class="w">QUAD</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x2002</span>                    <span class="w">EN</span> <span class="w">SPACE</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x2003</span>                    <span class="w">EM</span> <span class="w">SPACE</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x2004</span>          <span class="w">THREE</span>-<span class="w">PER</span>-<span class="w">EM</span> <span class="w">SPACE</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x2005</span>           <span class="w">FOUR</span>-<span class="w">PER</span>-<span class="w">EM</span> <span class="w">SPACE</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x2006</span>            <span class="w">SIX</span>-<span class="w">PER</span>-<span class="w">EM</span> <span class="w">SPACE</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x2007</span>                <span class="w">FIGURE</span> <span class="w">SPACE</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x2008</span>           <span class="w">PUNCTUATION</span> <span class="w">SPACE</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x2009</span>                  <span class="w">THIN</span> <span class="w">SPACE</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x200a</span>                  <span class="w">HAIR</span> <span class="w">SPACE</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x2028              LINE SEPARATOR    vs</span></li><li> <span class="q"> 0x2029</span>         <span class="w">PARAGRAPH</span> <span class="w">SEPARATOR</span>    <span class="w">vs</span></li><li> <span class="n">0x202f</span>       <span class="w">NARROW</span> <span class="w">NO</span>-<span class="w">BREAK</span> <span class="w">SPACE</span>   <span class="w">h</span> <span class="q">s</span></li><li> <span class="q"> 0x205f   MEDIUM MATHEMATICAL SPACE   h s</span></li><li> <span class="q"> 0x3000</span>           <span class="w">IDEOGRAPHIC</span> <span class="w">SPACE</span>   <span class="w">h</span> <span class="q">s</span></li></ol></pre><ul>
<li><a name="%5b1%5d"></a><b>[1]</b>
<p>Prior to Perl v5.18, <code class="inline">\s</code> did not match the vertical tab.
<code class="inline"><span class="s">[</span>^\<span class="w">S</span>\<span class="w">cK</span><span class="s">]</span></code>
 (obscurely) matches what <code class="inline">\s</code> traditionally did.</p>
</li>
<li><a name="%5b2%5d"></a><b>[2]</b>
<p>NEXT LINE and NO-BREAK SPACE may or may not match <code class="inline">\s</code> depending
on the rules in effect.  See
<a href="#Whitespace">the beginning of this section</a>.</p>
</li>
</ul>
<a name="Unicode-Properties"></a><h3>Unicode Properties</h3>
<p><code class="inline">\<span class="w">pP</span></code>
 and <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Prop</span><span class="s">}</span></code>
 are character classes to match characters that fit given
Unicode properties.  One letter property names can be used in the <code class="inline">\<span class="w">pP</span></code>
 form,
with the property name following the <code class="inline">\<span class="w">p</span></code>
, otherwise, braces are required.
When using braces, there is a single form, which is just the property name
enclosed in the braces, and a compound form which looks like <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">name</span>=<span class="w">value</span><span class="s">}</span></code>
,
which means to match if the property "name" for the character has that particular
"value".
For instance, a match for a number can be written as <code class="inline"><span class="q">/\pN/</span></code>
 or as
<code class="inline"><span class="q">/\p{Number}/</span></code>
, or as <code class="inline"><span class="q">/\p{Number=True}/</span></code>
.
Lowercase letters are matched by the property <i>Lowercase_Letter</i> which
has the short form <i>Ll</i>. They need the braces, so are written as <code class="inline"><span class="q">/\p{Ll}/</span></code>
 or
<code class="inline"><span class="q">/\p{Lowercase_Letter}/</span></code>
, or <code class="inline"><span class="q">/\p{General_Category=Lowercase_Letter}/</span></code>

(the underscores are optional).
<code class="inline"><span class="q">/\pLl/</span></code>
 is valid, but means something different.
It matches a two character string: a letter (Unicode property <code class="inline">\<span class="w">pL</span></code>
),
followed by a lowercase <code class="inline"><span class="w">l</span></code>
.</p>
<p>If locale rules are not in effect, the use of
a Unicode property will force the regular expression into using Unicode
rules, if it isn't already.</p>
<p>Note that almost all properties are immune to case-insensitive matching.
That is, adding a <code class="inline">/i</code> regular expression modifier does not change what
they match.  There are two sets that are affected.  The first set is
<code class="inline"><span class="w">Uppercase_Letter</span></code>
,
<code class="inline"><span class="w">Lowercase_Letter</span></code>
,
and <code class="inline"><span class="w">Titlecase_Letter</span></code>
,
all of which match <code class="inline"><span class="w">Cased_Letter</span></code>
 under <code class="inline">/i</code> matching.
The second set is
<code class="inline"><span class="w">Uppercase</span></code>
,
<code class="inline"><span class="w">Lowercase</span></code>
,
and <code class="inline"><span class="w">Titlecase</span></code>
,
all of which match <code class="inline"><span class="w">Cased</span></code>
 under <code class="inline">/i</code> matching.
(The difference between these sets is that some things, such as Roman
numerals, come in both upper and lower case, so they are <code class="inline"><span class="w">Cased</span></code>
, but
aren't considered to be letters, so they aren't <code class="inline"><span class="w">Cased_Letter</span></code>
s. They're
actually <code class="inline"><span class="w">Letter_Number</span></code>
s.)
This set also includes its subsets <code class="inline"><span class="w">PosixUpper</span></code>
 and <code class="inline"><span class="w">PosixLower</span></code>
, both
of which under <code class="inline">/i</code> match <code class="inline"><span class="w">PosixAlpha</span></code>
.</p>
<p>For more details on Unicode properties, see <a href="perlunicode.html#Unicode-Character-Properties">Unicode Character Properties in perlunicode</a>; for a
complete list of possible properties, see
<a href="perluniprops.html#Properties-accessible-through-%5cp%7b%7d-and-%5cP%7b%7d">Properties accessible through \p{} and \P{} in perluniprops</a>,
which notes all forms that have <code class="inline">/i</code> differences.
It is also possible to define your own properties. This is discussed in
<a href="perlunicode.html#User-Defined-Character-Properties">User-Defined Character Properties in perlunicode</a>.</p>
<p>Unicode properties are defined (surprise!) only on Unicode code points.
Starting in v5.20, when matching against <code class="inline">\<span class="w">p</span></code>
 and <code class="inline">\<span class="w">P</span></code>
, Perl treats
non-Unicode code points (those above the legal Unicode maximum of
0x10FFFF) as if they were typical unassigned Unicode code points.</p>
<p>Prior to v5.20, Perl raised a warning and made all matches fail on
non-Unicode code points.  This could be somewhat surprising:</p>
<pre class="verbatim"><ol><li> <a class="l_k" href="functions/chr.html">chr</a><span class="s">(</span><span class="n">0x110000</span><span class="s">)</span> =~ \<span class="i">p</span><span class="s">{</span><span class="w">ASCII_Hex_Digit</span>=<span class="w">True</span><span class="s">}</span>     <span class="c"># Fails on Perls &lt; v5.20.</span></li><li> <a class="l_k" href="functions/chr.html">chr</a><span class="s">(</span><span class="n">0x110000</span><span class="s">)</span> =~ \<span class="i">p</span><span class="s">{</span><span class="w">ASCII_Hex_Digit</span>=<span class="w">False</span><span class="s">}</span>    <span class="c"># Also fails on Perls</span></li><li>                                               <span class="c"># &lt; v5.20</span></li></ol></pre><p>Even though these two matches might be thought of as complements, until
v5.20 they were so only on Unicode code points.</p>
<h4>Examples</h4>
<pre class="verbatim"><ol><li> "a"  =~  /\w/      # Match, "a" is a 'word' character.</li><li> "7"  =~  /\w/      # Match, "7" is a 'word' character as well.</li><li> "a"  =~  /\d/      # No match, "a" isn't a digit.</li><li> "7"  =~  /\d/      # Match, "7" is a digit.</li><li> " "  =~  /\s/      # Match, a space is whitespace.</li><li> "a"  =~  /\D/      # Match, "a" is a non-digit.</li><li> "7"  =~  /\D/      # No match, "7" is not a non-digit.</li><li> " "  =~  /\S/      # No match, a space is not non-whitespace.</li><li></li><li> " "  =~  /\h/      # Match, space is horizontal whitespace.</li><li> " "  =~  /\v/      # No match, space is not vertical whitespace.</li><li> "\r" =~  /\v/      # Match, a return is vertical whitespace.</li><li></li><li> "a"  =~  /\pL/     # Match, "a" is a letter.</li><li> "a"  =~  /\p{Lu}/  # No match, /\p{Lu}/ matches upper case letters.</li><li></li><li> "\x{0e0b}" =~ /\p{Thai}/  # Match, \x{0e0b} is the character</li><li>                           # 'THAI CHARACTER SO SO', and that's in</li><li>                           # Thai Unicode class.</li><li> "a"  =~  /\P{Lao}/ # Match, as "a" is not a Laotian character.</li></ol></pre><p>It is worth emphasizing that <code class="inline">\<span class="w">d</span></code>
, <code class="inline">\<span class="w">w</span></code>
, etc, match single characters, not
complete numbers or words. To match a number (that consists of digits),
use <code class="inline">\<span class="w">d</span>+</code>
; to match a word, use <code class="inline">\<span class="w">w</span>+</code>
.  But be aware of the security
considerations in doing so, as mentioned above.</p>
<a name="Bracketed-Character-Classes"></a><h2>Bracketed Character Classes</h2>
<p>The third form of character class you can use in Perl regular expressions
is the bracketed character class.  In its simplest form, it lists the characters
that may be matched, surrounded by square brackets, like this: <code class="inline"><span class="s">[</span><span class="w">aeiou</span><span class="s">]</span></code>
.
This matches one of <code class="inline"><span class="w">a</span></code>
, <code class="inline"><span class="w">e</span></code>
, <code class="inline"><span class="w">i</span></code>
, <code class="inline"><span class="w">o</span></code>
 or <code class="inline"><span class="w">u</span></code>
.  Like the other
character classes, exactly one character is matched.* To match
a longer string consisting of characters mentioned in the character
class, follow the character class with a <a href="perlre.html#Quantifiers">quantifier</a>.  For
instance, <code class="inline"><span class="s">[</span><span class="w">aeiou</span><span class="s">]</span>+</code>
 matches one or more lowercase English vowels.</p>
<p>Repeating a character in a character class has no
effect; it's considered to be in the set only once.</p>
<p>Examples:</p>
<pre class="verbatim"><ol><li> <span class="q">&quot;e&quot;</span>  =~  <span class="q">/[aeiou]/</span>        <span class="c"># Match, as &quot;e&quot; is listed in the class.</span></li><li> <span class="q">&quot;p&quot;</span>  =~  <span class="q">/[aeiou]/</span>        <span class="c"># No match, &quot;p&quot; is not listed in the class.</span></li><li> <span class="q">&quot;ae&quot;</span> =~  <span class="q">/^[aeiou]$/</span>      <span class="c"># No match, a character class only matches</span></li><li>                           <span class="c"># a single character.</span></li><li> <span class="q">&quot;ae&quot;</span> =~  <span class="q">/^[aeiou]+$/</span>     <span class="c"># Match, due to the quantifier.</span></li><li></li><li> -------</li></ol></pre><p>* There are two exceptions to a bracketed character class matching a
single character only.  Each requires special handling by Perl to make
things work:</p>
<ul>
<li>
<p>When the class is to match caselessly under <code class="inline">/i</code> matching rules, and a
character that is explicitly mentioned inside the class matches a
multiple-character sequence caselessly under Unicode rules, the class
will also match that sequence.  For example, Unicode says that the
letter <code class="inline"><span class="w">LATIN</span> <span class="w">SMALL</span> <span class="w">LETTER</span> <span class="w">SHARP</span> <span class="w">S</span></code>
 should match the sequence <code class="inline"><span class="w">ss</span></code>

under <code class="inline">/i</code> rules.  Thus,</p>
<pre class="verbatim"><ol><li> <span class="q">&#39;ss&#39;</span> =~ <span class="q">/\A\N{LATIN SMALL LETTER SHARP S}\z/i</span>             <span class="c"># Matches</span></li><li> <span class="q">&#39;ss&#39;</span> =~ <span class="q">/\A[aeioust\N{LATIN SMALL LETTER SHARP S}]\z/i</span>    <span class="c"># Matches</span></li></ol></pre><p>For this to happen, the class must not be inverted (see <a href="#Negation">Negation</a>)
and the character must be explicitly specified, and not be part of a
multi-character range (not even as one of its endpoints).  (<a href="#Character-Ranges">Character Ranges</a> will be explained shortly.) Therefore,</p>
<pre class="verbatim"><ol><li> <span class="q">&#39;ss&#39;</span> =~ <span class="q">/\A[\0-\x{ff}]\z/</span><span class="w">ui</span>       <span class="c"># Doesn&#39;t match</span></li><li> <span class="q">&#39;ss&#39;</span> =~ <span class="q">/\A[\0-\N{LATIN SMALL LETTER SHARP S}]\z/</span><span class="w">ui</span>   <span class="c"># No match</span></li><li> <span class="q">&#39;ss&#39;</span> =~ <span class="q">/\A[\xDF-\xDF]\z/</span><span class="w">ui</span>   <span class="c"># Matches on ASCII platforms, since</span></li><li>                               <span class="c"># \xDF is LATIN SMALL LETTER SHARP S,</span></li><li>                               <span class="c"># and the range is just a single</span></li><li>                               <span class="c"># element</span></li></ol></pre><p>Note that it isn't a good idea to specify these types of ranges anyway.</p>
</li>
<li>
<p>Some names known to <code class="inline">\<span class="i">N</span><span class="s">{</span>...<span class="s">}</span></code>
 refer to a sequence of multiple characters,
instead of the usual single character.  When one of these is included in
the class, the entire sequence is matched.  For example,</p>
<pre class="verbatim"><ol><li>  <span class="q">&quot;\N{TAMIL LETTER KA}\N{TAMIL VOWEL SIGN AU}&quot;</span></li><li>                              =~ <span class="q">/ ^ [\N{TAMIL SYLLABLE KAU}]  $ /x</span><span class="sc">;</span></li></ol></pre><p>matches, because <code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">TAMIL</span> <span class="w">SYLLABLE</span> <span class="w">KAU</span><span class="s">}</span></code>
 is a named sequence
consisting of the two characters matched against.  Like the other
instance where a bracketed class can match multiple characters, and for
similar reasons, the class must not be inverted, and the named sequence
may not appear in a range, even one where it is both endpoints.  If
these happen, it is a fatal error if the character class is within the
scope of <a href="re.html#'strict'-mode">use re &#39;strict </a>, or within an extended
<a href="#Extended-Bracketed-Character-Classes">(?[...]) </a> class; otherwise
only the first code point is used (with a <code class="inline"><span class="w">regexp</span></code>
-type warning
raised).</p>
</li>
</ul>
<a name="Special-Characters-Inside-a-Bracketed-Character-Class"></a><h3>Special Characters Inside a Bracketed Character Class</h3>
<p>Most characters that are meta characters in regular expressions (that
is, characters that carry a special meaning like <code class="inline">.</code>, <code class="inline"><span class="i">*</span></code>
, or <code class="inline">(</code>) lose
their special meaning and can be used inside a character class without
the need to escape them. For instance, <code class="inline"><span class="s">[</span><span class="s">(</span><span class="s">)</span><span class="s">]</span></code>
 matches either an opening
parenthesis, or a closing parenthesis, and the parens inside the character
class don't group or capture.  Be aware that, unless the pattern is
evaluated in single-quotish context, variable interpolation will take
place before the bracketed class is parsed:</p>
<pre class="verbatim"><ol><li> <span class="i">$,</span> = <span class="q">&quot;\t| &quot;</span><span class="sc">;</span></li><li> <span class="i">$a</span> =~ <span class="q">m&#39;[$,]&#39;</span><span class="sc">;</span>        <span class="c"># single-quotish: matches &#39;$&#39; or &#39;,&#39;</span></li><li> <span class="i">$a</span> =~ <span class="q">q{[$,]}</span><span class="q">&#39;        # same</span></li><li> <span class="q"> $a =~ m/[$,]/;        # double-quotish: matches &quot;\t&quot;, &quot;|&quot;, or &quot; &quot;</span></li></ol></pre><p>Characters that may carry a special meaning inside a character class are:
<code class="inline">\</code>
, <code class="inline">^</code>, <code class="inline">-</code>
, <code class="inline">[</code> and <code class="inline">]</code>, and are discussed below. They can be
escaped with a backslash, although this is sometimes not needed, in which
case the backslash may be omitted.</p>
<p>The sequence <code class="inline">\<span class="w">b</span></code>
 is special inside a bracketed character class. While
outside the character class, <code class="inline">\<span class="w">b</span></code>
 is an assertion indicating a point
that does not have either two word characters or two non-word characters
on either side, inside a bracketed character class, <code class="inline">\<span class="w">b</span></code>
 matches a
backspace character.</p>
<p>The sequences
<code class="inline">\<span class="w">a</span></code>
,
<code class="inline">\<span class="w">c</span></code>
,
<code class="inline">\<span class="w">e</span></code>
,
<code class="inline">\<span class="w">f</span></code>
,
<code class="inline">\<span class="w">n</span></code>
,
<code class="inline">\N{<i>NAME</i>}</code>,
<code class="inline">\N{U+<i>hex char</i>}</code>,
<code class="inline">\<span class="w">r</span></code>
,
<code class="inline">\<span class="w">t</span></code>
,
and
<code class="inline">\<span class="w">x</span></code>

are also special and have the same meanings as they do outside a
bracketed character class.</p>
<p>Also, a backslash followed by two or three octal digits is considered an octal
number.</p>
<p>A <code class="inline">[</code> is not special inside a character class, unless it's the start of a
POSIX character class (see <a href="#POSIX-Character-Classes">POSIX Character Classes</a> below). It normally does
not need escaping.</p>
<p>A <code class="inline">]</code> is normally either the end of a POSIX character class (see
<a href="#POSIX-Character-Classes">POSIX Character Classes</a> below), or it signals the end of the bracketed
character class.  If you want to include a <code class="inline">]</code> in the set of characters, you
must generally escape it.</p>
<p>However, if the <code class="inline">]</code> is the <i>first</i> (or the second if the first
character is a caret) character of a bracketed character class, it
does not denote the end of the class (as you cannot have an empty class)
and is considered part of the set of characters that can be matched without
escaping.</p>
<p>Examples:</p>
<pre class="verbatim"><ol><li> <span class="q">&quot;+&quot;</span>   =~ <span class="q">/[+?*]/</span>     <span class="c">#  Match, &quot;+&quot; in a character class is not special.</span></li><li> <span class="q">&quot;\cH&quot;</span> =~ <span class="q">/[\b]/</span>      <span class="c">#  Match, \b inside in a character class</span></li><li>                      <span class="c">#  is equivalent to a backspace.</span></li><li> <span class="q">&quot;]&quot;</span>   =~ <span class="q">/[][]/</span>      <span class="c">#  Match, as the character class contains</span></li><li>                      <span class="c">#  both [ and ].</span></li><li> <span class="q">&quot;[]&quot;</span>  =~ <span class="q">/[[]]/</span>      <span class="c">#  Match, the pattern contains a character class</span></li><li>                      <span class="c">#  containing just [, and the character class is</span></li><li>                      <span class="c">#  followed by a ].</span></li></ol></pre><a name="Bracketed-Character-Classes-and-the-%2fxx-pattern-modifier"></a><h3>Bracketed Character Classes and the <code class="inline"><span class="q">/xx</span></code>
 pattern modifier</h3>
<p>Normally SPACE and TAB characters have no special meaning inside a
bracketed character class; they are just added to the list of characters
matched by the class.  But if the <a href="perlre.html#%2fx-and-%2fxx">/xx </a>
pattern modifier is in effect, they are generally ignored and can be
added to improve readability.  They can't be added in the middle of a
single construct:</p>
<pre class="verbatim"><ol><li> <span class="q">/ [ \x{10 FFFF} ] /xx</span>  <span class="c"># WRONG!</span></li></ol></pre><p>The SPACE in the middle of the hex constant is illegal.</p>
<p>To specify a literal SPACE character, you can escape it with a
backslash, like:</p>
<pre class="verbatim"><ol><li> <span class="q">/[ a e i o u \  ]/xx</span></li></ol></pre><p>This matches the English vowels plus the SPACE character.</p>
<p>For clarity, you should already have been using <code class="inline">\<span class="w">t</span></code>
 to specify a
literal tab, and <code class="inline">\<span class="w">t</span></code>
 is unaffected by <code class="inline"><span class="q">/xx</span></code>
.</p>
<a name="Character-Ranges"></a><h3>Character Ranges</h3>
<p>It is not uncommon to want to match a range of characters. Luckily, instead
of listing all characters in the range, one may use the hyphen (<code class="inline">-</code>
).
If inside a bracketed character class you have two characters separated
by a hyphen, it's treated as if all characters between the two were in
the class. For instance, <code class="inline"><span class="s">[</span><span class="n">0</span>-<span class="n">9</span><span class="s">]</span></code>
 matches any ASCII digit, and <code class="inline">[a-m]</code>
matches any lowercase letter from the first half of the ASCII alphabet.</p>
<p>Note that the two characters on either side of the hyphen are not
necessarily both letters or both digits. Any character is possible,
although not advisable.  <code class="inline">['-?]</code> contains a range of characters, but
most people will not know which characters that means.  Furthermore,
such ranges may lead to portability problems if the code has to run on
a platform that uses a different character set, such as EBCDIC.</p>
<p>If a hyphen in a character class cannot syntactically be part of a range, for
instance because it is the first or the last character of the character class,
or if it immediately follows a range, the hyphen isn't special, and so is
considered a character to be matched literally.  If you want a hyphen in
your set of characters to be matched and its position in the class is such
that it could be considered part of a range, you must escape that hyphen
with a backslash.</p>
<p>Examples:</p>
<pre class="verbatim"><ol><li> <span class="s">[</span><span class="w">a</span>-z<span class="s">]</span>       <span class="c">#  Matches a character that is a lower case ASCII letter.</span></li><li> <span class="s">[</span><span class="w">a</span>-<span class="w">fz</span><span class="s">]</span>      <span class="c">#  Matches any letter between &#39;a&#39; and &#39;f&#39; (inclusive) or</span></li><li>             <span class="c">#  the letter &#39;z&#39;.</span></li><li> <span class="s">[</span>-z<span class="s">]</span>        <span class="c">#  Matches either a hyphen (&#39;-&#39;) or the letter &#39;z&#39;.</span></li><li> <span class="s">[</span><span class="w">a</span>-f-<span class="q">m]     #  Matches any letter between &#39;a&#39; and &#39;f&#39; (inclusive), the</span></li><li>             <span class="q">             #  hyphen (&#39;-&#39;), or the letter &#39;m&#39;.</span></li><li> <span class="q"> [&#39;-?]</span>       <span class="c">#  Matches any of the characters  &#39;()*+,-./0123456789:;&lt;=&gt;?</span></li><li>             <span class="c">#  (But not on an EBCDIC platform).</span></li><li> <span class="s">[</span>\<span class="i">N</span><span class="s">{</span><span class="w">APOSTROPHE</span><span class="s">}</span>-\<span class="i">N</span><span class="s">{</span><span class="w">QUESTION</span> <span class="w">MARK</span><span class="s">}</span><span class="s">]</span></li><li>             <span class="c">#  Matches any of the characters  &#39;()*+,-./0123456789:;&lt;=&gt;?</span></li><li>             <span class="c">#  even on an EBCDIC platform.</span></li><li> <span class="s">[</span>\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">27</span><span class="s">}</span>-\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">3</span><span class="w">F</span><span class="s">}</span><span class="s">]</span> <span class="c"># Same. (U+27 is &quot;&#39;&quot;, and U+3F is &quot;?&quot;)</span></li></ol></pre><p>As the final two examples above show, you can achieve portablity to
non-ASCII platforms by using the <code class="inline">\<span class="i">N</span><span class="s">{</span>...<span class="s">}</span></code>
 form for the range
endpoints.  These indicate that the specified range is to be interpreted
using Unicode values, so <code class="inline"><span class="s">[</span>\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">27</span><span class="s">}</span>-\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">3</span><span class="w">F</span><span class="s">}</span><span class="s">]</span></code>
 means to match
<code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">27</span><span class="s">}</span></code>
, <code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">28</span><span class="s">}</span></code>
, <code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">29</span><span class="s">}</span></code>
, ..., <code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">3</span><span class="w">D</span><span class="s">}</span></code>
, <code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">3</span><span class="w">E</span><span class="s">}</span></code>
,
and <code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">3</span><span class="w">F</span><span class="s">}</span></code>
, whatever the native code point versions for those are.
These are called "Unicode" ranges.  If either end is of the <code class="inline">\<span class="i">N</span><span class="s">{</span>...<span class="s">}</span></code>

form, the range is considered Unicode.  A <code class="inline"><span class="w">regexp</span></code>
 warning is raised
under <code class="inline"><span class="q">&quot;use re &#39;strict&#39;&quot;</span></code>
 if the other endpoint is specified
non-portably:</p>
<pre class="verbatim"><ol><li> <span class="s">[</span>\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">00</span><span class="s">}</span>-\<span class="w">x09</span><span class="s">]</span>    <span class="c"># Warning under re &#39;strict&#39;; \x09 is non-portable</span></li><li> <span class="s">[</span>\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">00</span><span class="s">}</span>-\<span class="w">t</span><span class="s">]</span>      <span class="c"># No warning;</span></li></ol></pre><p>Both of the above match the characters <code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">00</span><span class="s">}</span></code>
 <code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">01</span><span class="s">}</span></code>
, ...
<code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">08</span><span class="s">}</span></code>
, <code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">09</span><span class="s">}</span></code>
, but the <code class="inline">\<span class="w">x09</span></code>
 looks like it could be a
mistake so the warning is raised (under <code class="inline"><span class="w">re</span> <span class="q">&#39;strict&#39;</span></code>
) for it.</p>
<p>Perl also guarantees that the ranges <code class="inline"><span class="w">A</span>-<span class="w">Z</span></code>
, <code class="inline"><span class="w">a</span>-z</code>
, <code class="inline"><span class="n">0</span>-<span class="n">9</span></code>
, and any
subranges of these match what an English-only speaker would expect them
to match on any platform.  That is, <code class="inline"><span class="s">[</span><span class="w">A</span>-<span class="w">Z</span><span class="s">]</span></code>
 matches the 26 ASCII
uppercase letters;
<code class="inline"><span class="s">[</span><span class="w">a</span>-z<span class="s">]</span></code>
 matches the 26 lowercase letters; and <code class="inline"><span class="s">[</span><span class="n">0</span>-<span class="n">9</span><span class="s">]</span></code>
 matches the 10
digits.  Subranges, like <code class="inline"><span class="s">[</span><span class="w">h</span>-k<span class="s">]</span></code>
, match correspondingly, in this case
just the four letters <code class="inline"><span class="q">&quot;h&quot;</span></code>
, <code class="inline"><span class="q">&quot;i&quot;</span></code>
, <code class="inline"><span class="q">&quot;j&quot;</span></code>
, and <code class="inline"><span class="q">&quot;k&quot;</span></code>
.  This is the
natural behavior on ASCII platforms where the code points (ordinal
values) for <code class="inline"><span class="q">&quot;h&quot;</span></code>
 through <code class="inline"><span class="q">&quot;k&quot;</span></code>
 are consecutive integers (0x68 through
0x6B).  But special handling to achieve this may be needed on platforms
with a non-ASCII native character set.  For example, on EBCDIC
platforms, the code point for <code class="inline"><span class="q">&quot;h&quot;</span></code>
 is 0x88, <code class="inline"><span class="q">&quot;i&quot;</span></code>
 is 0x89, <code class="inline"><span class="q">&quot;j&quot;</span></code>
 is
0x91, and <code class="inline"><span class="q">&quot;k&quot;</span></code>
 is 0x92.   Perl specially treats <code class="inline"><span class="s">[</span><span class="w">h</span>-k<span class="s">]</span></code>
 to exclude the
seven code points in the gap: 0x8A through 0x90.  This special handling is
only invoked when the range is a subrange of one of the ASCII uppercase,
lowercase, and digit ranges, AND each end of the range is expressed
either as a literal, like <code class="inline"><span class="q">&quot;A&quot;</span></code>
, or as a named character (<code class="inline">\<span class="i">N</span><span class="s">{</span>...<span class="s">}</span></code>
,
including the <code class="inline">\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+...</code>
 form).</p>
<p>EBCDIC Examples:</p>
<pre class="verbatim"><ol><li> <span class="s">[</span><span class="w">i</span>-<span class="w">j</span><span class="s">]</span>               <span class="c">#  Matches either &quot;i&quot; or &quot;j&quot;</span></li><li> <span class="s">[</span><span class="w">i</span>-\<span class="i">N</span><span class="s">{</span><span class="w">LATIN</span> <span class="w">SMALL</span> <span class="w">LETTER</span> <span class="w">J</span><span class="s">}</span><span class="s">]</span>  <span class="c"># Same</span></li><li> <span class="s">[</span><span class="w">i</span>-\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">6</span><span class="w">A</span><span class="s">}</span><span class="s">]</span>        <span class="c">#  Same</span></li><li> <span class="s">[</span>\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">69</span><span class="s">}</span>-\<span class="i">N</span><span class="s">{</span><span class="w">U</span>+<span class="n">6</span><span class="w">A</span><span class="s">}</span><span class="s">]</span> <span class="c">#  Same</span></li><li> <span class="s">[</span>\<span class="i">x</span><span class="s">{</span><span class="n">89</span><span class="s">}</span>-\<span class="i">x</span><span class="s">{</span><span class="n">91</span><span class="s">}</span><span class="s">]</span>     <span class="c">#  Matches 0x89 (&quot;i&quot;), 0x8A .. 0x90, 0x91 (&quot;j&quot;)</span></li><li> <span class="s">[</span><span class="w">i</span>-\<span class="i">x</span><span class="s">{</span><span class="n">91</span><span class="s">}</span><span class="s">]</span>          <span class="c">#  Same</span></li><li> <span class="s">[</span>\<span class="i">x</span><span class="s">{</span><span class="n">89</span><span class="s">}</span>-<span class="w">j</span><span class="s">]</span>          <span class="c">#  Same</span></li><li> <span class="s">[</span><span class="w">i</span>-<span class="w">J</span><span class="s">]</span>               <span class="c">#  Matches, 0x89 (&quot;i&quot;) .. 0xC1 (&quot;J&quot;); special</span></li><li>                     <span class="c">#  handling doesn&#39;t apply because range is mixed</span></li><li>                     <span class="c">#  case</span></li></ol></pre><a name="Negation"></a><h3>Negation</h3>
<p>It is also possible to instead list the characters you do not want to
match. You can do so by using a caret (<code class="inline">^</code>) as the first character in the
character class. For instance, <code class="inline">[^a-z]</code> matches any character that is not a
lowercase ASCII letter, which therefore includes more than a million
Unicode code points.  The class is said to be "negated" or "inverted".</p>
<p>This syntax make the caret a special character inside a bracketed character
class, but only if it is the first character of the class. So if you want
the caret as one of the characters to match, either escape the caret or
else don't list it first.</p>
<p>In inverted bracketed character classes, Perl ignores the Unicode rules
that normally say that named sequence, and certain characters should
match a sequence of multiple characters use under caseless <code class="inline">/i</code>
matching.  Following those rules could lead to highly confusing
situations:</p>
<pre class="verbatim"><ol><li> <span class="q">&quot;ss&quot;</span> =~ <span class="q">/^[^\xDF]+$/</span><span class="w">ui</span><span class="sc">;</span>   <span class="c"># Matches!</span></li></ol></pre><p>This should match any sequences of characters that aren't <code class="inline">\<span class="w">xDF</span></code>
 nor
what <code class="inline">\<span class="w">xDF</span></code>
 matches under <code class="inline">/i</code>.  <code class="inline"><span class="q">&quot;s&quot;</span></code>
 isn't <code class="inline">\<span class="w">xDF</span></code>
, but Unicode
says that <code class="inline"><span class="q">&quot;ss&quot;</span></code>
 is what <code class="inline">\<span class="w">xDF</span></code>
 matches under <code class="inline">/i</code>.  So which one
"wins"? Do you fail the match because the string has <code class="inline"><span class="w">ss</span></code>
 or accept it
because it has an <code class="inline"><a class="l_k" href="functions/s.html">s</a></code> followed by another <code class="inline"><a class="l_k" href="functions/s.html">s</a></code>?  Perl has chosen the
latter.  (See note in <a href="#Bracketed-Character-Classes">Bracketed Character Classes</a> above.)</p>
<p>Examples:</p>
<pre class="verbatim"><ol><li> "e"  =~  /[^aeiou]/   #  No match, the 'e' is listed.</li><li> "x"  =~  /[^aeiou]/   #  Match, as 'x' isn't a lowercase vowel.</li><li> "^"  =~  /[^^]/       #  No match, matches anything that isn't a caret.</li><li> "^"  =~  /[x^]/       #  Match, caret is not special here.</li></ol></pre><a name="Backslash-Sequences"></a><h3>Backslash Sequences</h3>
<p>You can put any backslash sequence character class (with the exception of
<code class="inline">\<span class="w">N</span></code>
 and <code class="inline">\<span class="w">R</span></code>
) inside a bracketed character class, and it will act just
as if you had put all characters matched by the backslash sequence inside the
character class. For instance, <code class="inline"><span class="s">[</span><span class="w">a</span>-f\<span class="w">d</span><span class="s">]</span></code>
 matches any decimal digit, or any
of the lowercase letters between 'a' and 'f' inclusive.</p>
<p><code class="inline">\<span class="w">N</span></code>
 within a bracketed character class must be of the forms <code class="inline">\N{<i>name</i>}</code>
or <code class="inline">\N{U+<i>hex char</i>}</code>, and NOT be the form that matches non-newlines,
for the same reason that a dot <code class="inline">.</code> inside a bracketed character class loses
its special meaning: it matches nearly anything, which generally isn't what you
want to happen.</p>
<p>Examples:</p>
<pre class="verbatim"><ol><li> /[\p{Thai}\d]/     # Matches a character that is either a Thai</li><li>                    # character, or a digit.</li><li> /[^\p{Arabic}()]/  # Matches a character that is neither an Arabic</li><li>                    # character, nor a parenthesis.</li></ol></pre><p>Backslash sequence character classes cannot form one of the endpoints
of a range.  Thus, you can't say:</p>
<pre class="verbatim"><ol><li> <span class="q">/[\p{Thai}-\d]/</span>     <span class="c"># Wrong!</span></li></ol></pre><a name="POSIX-Character-Classes"></a><h3>POSIX Character Classes
  
      
      </h3>
<p>POSIX character classes have the form <code class="inline">[:class:]</code>, where <i>class</i> is the
name, and the <code class="inline">[:</code> and <code class="inline">:]</code> delimiters. POSIX character classes only appear
<i>inside</i> bracketed character classes, and are a convenient and descriptive
way of listing a group of characters.</p>
<p>Be careful about the syntax,</p>
<pre class="verbatim"><ol><li> # Correct:</li><li> $string =~ /[[:alpha:]]/</li><li></li><li> # Incorrect (will warn):</li><li> $string =~ /[:alpha:]/</li></ol></pre><p>The latter pattern would be a character class consisting of a colon,
and the letters <code class="inline"><span class="w">a</span></code>
, <code class="inline"><span class="w">l</span></code>
, <code class="inline"><span class="w">p</span></code>
 and <code class="inline"><span class="w">h</span></code>
.</p>
<p>POSIX character classes can be part of a larger bracketed character class.
For example,</p>
<pre class="verbatim"><ol><li> [01[:alpha:]%]</li></ol></pre><p>is valid and matches '0', '1', any alphabetic character, and the percent sign.</p>
<p>Perl recognizes the following POSIX character classes:</p>
<pre class="verbatim"><ol><li> <span class="w">alpha</span>  <span class="w">Any</span> <span class="w">alphabetical</span> <span class="w">character</span> <span class="s">(</span><span class="q">&quot;[A-Za-z]&quot;</span><span class="s">)</span>.</li><li> <span class="w">alnum</span>  <span class="w">Any</span> <span class="w">alphanumeric</span> <span class="w">character</span> <span class="s">(</span><span class="q">&quot;[A-Za-z0-9]&quot;</span><span class="s">)</span>.</li><li> <span class="w">ascii</span>  <span class="w">Any</span> <span class="w">character</span> <span class="w">in</span> <span class="w">the</span> <span class="w">ASCII</span> <span class="w">character</span> <span class="w">set</span>.</li><li> <span class="w">blank</span>  <span class="w">A</span> <span class="w">GNU</span> <span class="w">extension</span><span class="cm">,</span> <span class="w">equal</span> <span class="w">to</span> <span class="w">a</span> <span class="w">space</span> <a class="l_k" href="functions/or.html">or</a> <span class="w">a</span> <span class="w">horizontal</span> <span class="w">tab</span> <span class="s">(</span><span class="q">&quot;\t&quot;</span><span class="s">)</span>.</li><li> <span class="w">cntrl</span>  <span class="w">Any</span> <span class="w">control</span> <span class="w">character</span>.  <span class="w">See</span> <span class="w">Note</span> <span class="s">[</span><span class="n">2</span><span class="s">]</span> <span class="w">below</span>.</li><li> <span class="w">digit</span>  <span class="w">Any</span> <span class="w">decimal</span> <span class="w">digit</span> <span class="s">(</span><span class="q">&quot;[0-9]&quot;</span><span class="s">)</span><span class="cm">,</span> <span class="w">equivalent</span> <span class="w">to</span> <span class="q">&quot;\d&quot;</span>.</li><li> <span class="w">graph</span>  <span class="w">Any</span> <span class="w">printable</span> <span class="w">character</span><span class="cm">,</span> <span class="w">excluding</span> <span class="w">a</span> <span class="w">space</span>.  <span class="w">See</span> <span class="w">Note</span> <span class="s">[</span><span class="n">3</span><span class="s">]</span> <span class="w">below</span>.</li><li> <span class="w">lower</span>  <span class="w">Any</span> <span class="w">lowercase</span> <span class="w">character</span> <span class="s">(</span><span class="q">&quot;[a-z]&quot;</span><span class="s">)</span>.</li><li> <a class="l_k" href="functions/print.html">print</a>  <span class="i">Any</span> <span class="w">printable</span> <span class="w">character</span><span class="cm">,</span> <span class="w">including</span> <span class="w">a</span> <span class="w">space</span>.  <span class="w">See</span> <span class="w">Note</span> <span class="s">[</span><span class="n">4</span><span class="s">]</span> <span class="w">below</span>.</li><li> <span class="w">punct</span>  <span class="w">Any</span> <span class="w">graphical</span> <span class="w">character</span> <span class="w">excluding</span> <span class="q">&quot;word&quot;</span> <span class="w">characters</span>.  <span class="w">Note</span> <span class="s">[</span><span class="n">5</span><span class="s">]</span>.</li><li> <span class="w">space</span>  <span class="w">Any</span> <span class="w">whitespace</span> <span class="w">character</span>. <span class="q">&quot;\s&quot;</span> <span class="w">including</span> <span class="w">the</span> <span class="w">vertical</span> <span class="i">tab</span></li><li>        <span class="s">(</span><span class="q">&quot;\cK&quot;</span><span class="s">)</span>.</li><li> <span class="w">upper</span>  <span class="w">Any</span> <span class="w">uppercase</span> <span class="w">character</span> <span class="s">(</span><span class="q">&quot;[A-Z]&quot;</span><span class="s">)</span>.</li><li> <span class="w">word</span>   <span class="w">A</span> <span class="w">Perl</span> <span class="w">extension</span> <span class="s">(</span><span class="q">&quot;[A-Za-z0-9_]&quot;</span><span class="s">)</span><span class="cm">,</span> <span class="w">equivalent</span> <span class="w">to</span> <span class="q">&quot;\w&quot;</span>.</li><li> <span class="w">xdigit</span> <span class="w">Any</span> <span class="w">hexadecimal</span> <span class="w">digit</span> <span class="s">(</span><span class="q">&quot;[0-9a-fA-F]&quot;</span><span class="s">)</span>.</li></ol></pre><p>Like the <a href="#Unicode-Properties">Unicode properties</a>, most of the POSIX
properties match the same regardless of whether case-insensitive (<code class="inline">/i</code>)
matching is in effect or not.  The two exceptions are <code class="inline">[:upper:]</code> and
<code class="inline">[:lower:]</code>.  Under <code class="inline">/i</code>, they each match the union of <code class="inline">[:upper:]</code> and
<code class="inline">[:lower:]</code>.</p>
<p>Most POSIX character classes have two Unicode-style <code class="inline">\<span class="w">p</span></code>
 property
counterparts.  (They are not official Unicode properties, but Perl extensions
derived from official Unicode properties.)  The table below shows the relation
between POSIX character classes and these counterparts.</p>
<p>One counterpart, in the column labelled "ASCII-range Unicode" in
the table, matches only characters in the ASCII character set.</p>
<p>The other counterpart, in the column labelled "Full-range Unicode", matches any
appropriate characters in the full Unicode character set.  For example,
<code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Alpha</span><span class="s">}</span></code>
 matches not just the ASCII alphabetic characters, but any
character in the entire Unicode character set considered alphabetic.
An entry in the column labelled "backslash sequence" is a (short)
equivalent.</p>
<pre class="verbatim"><ol><li> <span class="s">[</span><span class="s">[</span><span class="co">:</span>...<span class="co">:</span><span class="s">]</span><span class="s">]</span>      <span class="w">ASCII</span>-<span class="w">range</span>          <span class="w">Full</span>-<span class="w">range</span>  <span class="w">backslash</span>  <span class="w">Note</span></li><li>                 <span class="w">Unicode</span>              <span class="w">Unicode</span>     <span class="w">sequence</span></li><li> -----------------------------------------------------</li><li>   <span class="w">alpha</span>      \<span class="i">p</span><span class="s">{</span><span class="w">PosixAlpha</span><span class="s">}</span>       \<span class="i">p</span><span class="s">{</span><span class="w">XPosixAlpha</span><span class="s">}</span></li><li>   <span class="w">alnum</span>      \<span class="i">p</span><span class="s">{</span><span class="w">PosixAlnum</span><span class="s">}</span>       \<span class="i">p</span><span class="s">{</span><span class="w">XPosixAlnum</span><span class="s">}</span></li><li>   <span class="w">ascii</span>      \<span class="i">p</span><span class="s">{</span><span class="w">ASCII</span><span class="s">}</span></li><li>   <span class="w">blank</span>      \<span class="i">p</span><span class="s">{</span><span class="w">PosixBlank</span><span class="s">}</span>       \<span class="i">p</span><span class="s">{</span><span class="w">XPosixBlank</span><span class="s">}</span>  \<span class="w">h</span>      <span class="s">[</span><span class="n">1</span><span class="s">]</span></li><li>                                   or \<span class="i">p</span><span class="s">{</span><span class="w">HorizSpace</span><span class="s">}</span>        <span class="s">[</span><span class="n">1</span><span class="s">]</span></li><li>   <span class="w">cntrl</span>      \<span class="i">p</span><span class="s">{</span><span class="w">PosixCntrl</span><span class="s">}</span>       \<span class="i">p</span><span class="s">{</span><span class="w">XPosixCntrl</span><span class="s">}</span>          <span class="s">[</span><span class="n">2</span><span class="s">]</span></li><li>   <span class="w">digit</span>      \<span class="i">p</span><span class="s">{</span><span class="w">PosixDigit</span><span class="s">}</span>       \<span class="i">p</span><span class="s">{</span><span class="w">XPosixDigit</span><span class="s">}</span>  \<span class="w">d</span></li><li>   <span class="w">graph</span>      \<span class="i">p</span><span class="s">{</span><span class="w">PosixGraph</span><span class="s">}</span>       \<span class="i">p</span><span class="s">{</span><span class="w">XPosixGraph</span><span class="s">}</span>          <span class="s">[</span><span class="n">3</span><span class="s">]</span></li><li>   <span class="w">lower</span>      \<span class="i">p</span><span class="s">{</span><span class="w">PosixLower</span><span class="s">}</span>       \<span class="i">p</span><span class="s">{</span><span class="w">XPosixLower</span><span class="s">}</span></li><li>   <a class="l_k" href="functions/print.html">print</a>      \<span class="i">p</span><span class="s">{</span><span class="w">PosixPrint</span><span class="s">}</span>       \<span class="i">p</span><span class="s">{</span><span class="w">XPosixPrint</span><span class="s">}</span>          <span class="s">[</span><span class="n">4</span><span class="s">]</span></li><li>   <span class="w">punct</span>      \<span class="i">p</span><span class="s">{</span><span class="w">PosixPunct</span><span class="s">}</span>       \<span class="i">p</span><span class="s">{</span><span class="w">XPosixPunct</span><span class="s">}</span>          <span class="s">[</span><span class="n">5</span><span class="s">]</span></li><li>              \<span class="i">p</span><span class="s">{</span><span class="w">PerlSpace</span><span class="s">}</span>        \<span class="i">p</span><span class="s">{</span><span class="w">XPerlSpace</span><span class="s">}</span>   \<span class="q">s      [6]</span></li><li>   <span class="q">   space      \p{PosixSpace</span><span class="s">}</span>       \<span class="i">p</span><span class="s">{</span><span class="w">XPosixSpace</span><span class="s">}</span>          <span class="s">[</span><span class="n">6</span><span class="s">]</span></li><li>   <span class="w">upper</span>      \<span class="i">p</span><span class="s">{</span><span class="w">PosixUpper</span><span class="s">}</span>       \<span class="i">p</span><span class="s">{</span><span class="w">XPosixUpper</span><span class="s">}</span></li><li>   <span class="w">word</span>       \<span class="i">p</span><span class="s">{</span><span class="w">PosixWord</span><span class="s">}</span>        \<span class="i">p</span><span class="s">{</span><span class="w">XPosixWord</span><span class="s">}</span>   \<span class="w">w</span></li><li>   <span class="w">xdigit</span>     \<span class="i">p</span><span class="s">{</span><span class="w">PosixXDigit</span><span class="s">}</span>      \<span class="i">p</span><span class="s">{</span><span class="w">XPosixXDigit</span><span class="s">}</span></li></ol></pre><ul>
<li><a name="%5b1%5d"></a><b>[1]</b>
<p><code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Blank</span><span class="s">}</span></code>
 and <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">HorizSpace</span><span class="s">}</span></code>
 are synonyms.</p>
</li>
<li><a name="%5b2%5d"></a><b>[2]</b>
<p>Control characters don't produce output as such, but instead usually control
the terminal somehow: for example, newline and backspace are control characters.
On ASCII platforms, in the ASCII range, characters whose code points are
between 0 and 31 inclusive, plus 127 (<code class="inline"><span class="w">DEL</span></code>
) are control characters; on
EBCDIC platforms, their counterparts are control characters.</p>
</li>
<li><a name="%5b3%5d"></a><b>[3]</b>
<p>Any character that is <i>graphical</i>, that is, visible. This class consists
of all alphanumeric characters and all punctuation characters.</p>
</li>
<li><a name="%5b4%5d"></a><b>[4]</b>
<p>All printable characters, which is the set of all graphical characters
plus those whitespace characters which are not also controls.</p>
</li>
<li><a name="%5b5%5d"></a><b>[5]</b>
<p><code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">PosixPunct</span><span class="s">}</span></code>
 and <code class="inline">[[:punct:]]</code> in the ASCII range match all
non-controls, non-alphanumeric, non-space characters:
<code class="inline">[-!"#$%&amp;'()*+,./:;&lt;=&gt;?@[\\\]^_`{|}~]</code> (although if a locale is in effect,
it could alter the behavior of <code class="inline">[[:punct:]]</code>).</p>
<p>The similarly named property, <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Punct</span><span class="s">}</span></code>
, matches a somewhat different
set in the ASCII range, namely
<code class="inline"><span class="s">[</span>-!<span class="q">&quot;#%&amp;&#39;()*,./:;?@[\\\]_{}]</span></code>
.  That is, it is missing the nine
characters <code class="inline"><span class="s">[</span><span class="i">$+</span>&lt;=&gt;^<span class="q">`|~]</span></code>
.
This is because Unicode splits what POSIX considers to be punctuation into two
categories, Punctuation and Symbols.</p>
<p><code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">XPosixPunct</span><span class="s">}</span></code>
 and (under Unicode rules) <code class="inline">[[:punct:]]</code>, match what
<code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">PosixPunct</span><span class="s">}</span></code>
 matches in the ASCII range, plus what <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Punct</span><span class="s">}</span></code>

matches.  This is different than strictly matching according to
<code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Punct</span><span class="s">}</span></code>
.  Another way to say it is that
if Unicode rules are in effect, <code class="inline">[[:punct:]]</code> matches all characters
that Unicode considers punctuation, plus all ASCII-range characters that
Unicode considers symbols.</p>
</li>
<li><a name="%5b6%5d"></a><b>[6]</b>
<p><code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">XPerlSpace</span><span class="s">}</span></code>
 and <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Space</span><span class="s">}</span></code>
 match identically starting with Perl
v5.18.  In earlier versions, these differ only in that in non-locale
matching, <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">XPerlSpace</span><span class="s">}</span></code>
 did not match the vertical tab, <code class="inline">\<span class="w">cK</span></code>
.
Same for the two ASCII-only range forms.</p>
</li>
</ul>
<p>There are various other synonyms that can be used besides the names
listed in the table.  For example, <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">XPosixAlpha</span><span class="s">}</span></code>
 can be written as
<code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Alpha</span><span class="s">}</span></code>
.  All are listed in
<a href="perluniprops.html#Properties-accessible-through-%5cp%7b%7d-and-%5cP%7b%7d">Properties accessible through \p{} and \P{} in perluniprops</a>.</p>
<p>Both the <code class="inline">\<span class="w">p</span></code>
 counterparts always assume Unicode rules are in effect.
On ASCII platforms, this means they assume that the code points from 128
to 255 are Latin-1, and that means that using them under locale rules is
unwise unless the locale is guaranteed to be Latin-1 or UTF-8.  In contrast, the
POSIX character classes are useful under locale rules.  They are
affected by the actual rules in effect, as follows:</p>
<ul>
<li><a name="If-the-%2fa-modifier%2c-is-in-effect-..."></a><b>If the <code class="inline"><span class="q">/a</span></code>
 modifier, is in effect ...</b>
<p>Each of the POSIX classes matches exactly the same as their ASCII-range
counterparts.</p>
</li>
<li><a name="otherwise-..."></a><b>otherwise ...</b>
<ul>
<li><a name="For-code-points-above-255-..."></a><b>For code points above 255 ...</b>
<p>The POSIX class matches the same as its Full-range counterpart.</p>
</li>
<li><a name="For-code-points-below-256-..."></a><b>For code points below 256 ...</b>
<ul>
<li><a name="if-locale-rules-are-in-effect-..."></a><b>if locale rules are in effect ...</b>
<p>The POSIX class matches according to the locale, except:</p>
<ul>
<li><a name="word"></a><b><code class="inline"><span class="w">word</span></code>
</b>
<p>also includes the platform's native underscore character, no matter what
the locale is.</p>
</li>
<li><a name="ascii"></a><b><code class="inline"><span class="w">ascii</span></code>
</b>
<p>on platforms that don't have the POSIX <code class="inline"><span class="w">ascii</span></code>
 extension, this matches
just the platform's native ASCII-range characters.</p>
</li>
<li><a name="blank"></a><b><code class="inline"><span class="w">blank</span></code>
</b>
<p>on platforms that don't have the POSIX <code class="inline"><span class="w">blank</span></code>
 extension, this matches
just the platform's native tab and space characters.</p>
</li>
</ul>
</li>
<li><a name="if%2c-instead%2c-Unicode-rules-are-in-effect-..."></a><b>if, instead, Unicode rules are in effect ...</b>
<p>The POSIX class matches the same as the Full-range counterpart.</p>
</li>
<li><a name="otherwise-..."></a><b>otherwise ...</b>
<p>The POSIX class matches the same as the ASCII range counterpart.</p>
</li>
</ul>
</li>
</ul>
</li>
</ul>
<p>Which rules apply are determined as described in
<a href="perlre.html#Which-character-set-modifier-is-in-effect%3f">Which character set modifier is in effect? in perlre</a>.</p>
<p>It is proposed to change this behavior in a future release of Perl so that
whether or not Unicode rules are in effect would not change the
behavior:  Outside of locale, the POSIX classes
would behave like their ASCII-range counterparts.  If you wish to
comment on this proposal, send email to <code class="inline"><span class="w">perl5</span>-<span class="w">porters</span><span class="i">@perl</span>.<span class="w">org</span></code>
.</p>
<h4>Negation of POSIX character classes
</h4>
<p>A Perl extension to the POSIX character class is the ability to
negate it. This is done by prefixing the class name with a caret (<code class="inline">^</code>).
Some examples:</p>
<pre class="verbatim"><ol><li>     <span class="w">POSIX</span>         <span class="w">ASCII</span>-<span class="w">range</span>     <span class="w">Full</span>-<span class="w">range</span>  <span class="w">backslash</span></li><li>                    <span class="w">Unicode</span>         <span class="w">Unicode</span>    <span class="w">sequence</span></li><li> -----------------------------------------------------</li><li> <span class="s">[</span><span class="s">[</span><span class="co">:</span>^<span class="w">digit</span><span class="co">:</span><span class="s">]</span><span class="s">]</span>   \<span class="i">P</span><span class="s">{</span><span class="w">PosixDigit</span><span class="s">}</span>  \<span class="i">P</span><span class="s">{</span><span class="w">XPosixDigit</span><span class="s">}</span>   \<span class="w">D</span></li><li> <span class="s">[</span><span class="s">[</span><span class="co">:</span>^<span class="w">space</span><span class="co">:</span><span class="s">]</span><span class="s">]</span>   \<span class="i">P</span><span class="s">{</span><span class="w">PosixSpace</span><span class="s">}</span>  \<span class="i">P</span><span class="s">{</span><span class="w">XPosixSpace</span><span class="s">}</span></li><li>                \<span class="i">P</span><span class="s">{</span><span class="w">PerlSpace</span><span class="s">}</span>   \<span class="i">P</span><span class="s">{</span><span class="w">XPerlSpace</span><span class="s">}</span>    \<span class="w">S</span></li><li> <span class="s">[</span><span class="s">[</span><span class="co">:</span>^<span class="w">word</span><span class="co">:</span><span class="s">]</span><span class="s">]</span>    \<span class="i">P</span><span class="s">{</span><span class="w">PerlWord</span><span class="s">}</span>    \<span class="i">P</span><span class="s">{</span><span class="w">XPosixWord</span><span class="s">}</span>    \<span class="w">W</span></li></ol></pre><p>The backslash sequence can mean either ASCII- or Full-range Unicode,
depending on various factors as described in <a href="perlre.html#Which-character-set-modifier-is-in-effect%3f">Which character set modifier is in effect? in perlre</a>.</p>
<h4>[= =] and [. .]</h4>
<p>Perl recognizes the POSIX character classes <code class="inline"><span class="s">[</span>=<span class="w">class</span>=<span class="s">]</span></code>
 and
<code class="inline">[.class.]</code>, but does not (yet?) support them.  Any attempt to use
either construct raises an exception.</p>
<h4>Examples</h4>
<pre class="verbatim"><ol><li> <span class="q">/[[:digit:]]/</span>            <span class="c"># Matches a character that is a digit.</span></li><li> /<span class="s">[</span><span class="n">01</span><span class="s">[</span><span class="co">:</span><span class="w">lower</span><span class="co">:</span><span class="s">]</span><span class="s">]</span><span class="q">/          # Matches a character that is either a</span></li><li>                          <span class="q">                          # lowercase letter, or &#39;0&#39; or &#39;1&#39;.</span></li><li> <span class="q"> /</span><span class="s">[</span><span class="s">[</span><span class="co">:</span><span class="w">digit</span><span class="co">:</span><span class="s">]</span><span class="s">[</span><span class="co">:</span>^<span class="w">xdigit</span><span class="co">:</span><span class="s">]</span><span class="s">]</span><span class="q">/ # Matches a character that can be anything</span></li><li>                          <span class="q">                          # except the letters &#39;a&#39; to &#39;f&#39; and &#39;A&#39; to</span></li><li>                          <span class="q">                          # &#39;F&#39;.  This is because the main character</span></li><li>                          <span class="q">                          # class is composed of two POSIX character</span></li><li>                          <span class="q">                          # classes that are ORed together, one that</span></li><li>                          <span class="q">                          # matches any digit, and the other that</span></li><li>                          <span class="q">                          # matches anything that isn&#39;t a hex digit.</span></li><li>                          <span class="q">                          # The OR adds the digits, leaving only the</span></li><li>                          <span class="q">                          # letters &#39;a&#39; to &#39;f&#39; and &#39;A&#39; to &#39;F&#39; excluded.</span></li></ol></pre><a name="Extended-Bracketed-Character-Classes"></a><h3>Extended Bracketed Character Classes

</h3>
<p>This is a fancy bracketed character class that can be used for more
readable and less error-prone classes, and to perform set operations,
such as intersection. An example is</p>
<pre class="verbatim"><ol><li> <span class="q">/(?[ \p{Thai} &amp; \p{Digit} ])/</span></li></ol></pre><p>This will match all the digit characters that are in the Thai script.</p>
<p>This is an experimental feature available starting in 5.18, and is
subject to change as we gain field experience with it.  Any attempt to
use it will raise a warning, unless disabled via</p>
<pre class="verbatim"><ol><li> <a class="l_k" href="functions/no.html">no</a> <span class="w">warnings</span> <span class="q">&quot;experimental::regex_sets&quot;</span><span class="sc">;</span></li></ol></pre><p>Comments on this feature are welcome; send email to
<code class="inline"><span class="w">perl5</span>-<span class="w">porters</span><span class="i">@perl</span>.<span class="w">org</span></code>
.</p>
<p>The rules used by <a href="re.html#'strict'-mode">use re &#39;strict </a> apply to this
construct.</p>
<p>We can extend the example above:</p>
<pre class="verbatim"><ol><li> <span class="q">/(?[ ( \p{Thai} + \p{Lao} ) &amp; \p{Digit} ])/</span></li></ol></pre><p>This matches digits that are in either the Thai or Laotian scripts.</p>
<p>Notice the white space in these examples.  This construct always has
the <code class="inline"><span class="q">/xx</span></code>
 modifier turned on within it.</p>
<p>The available binary operators are:</p>
<pre class="verbatim"><ol><li> <span class="i">&amp;    intersection</span></li><li> +    <span class="w">union</span></li><li> |    <span class="w">another</span> <span class="w">name</span> <a class="l_k" href="functions/for.html">for</a> <span class="q">&#39;+&#39;</span><span class="cm">,</span> <span class="w">hence</span> <span class="w">means</span> <span class="w">union</span></li><li> -    <span class="w">subtraction</span> <span class="s">(</span><span class="w">the</span> <span class="w">result</span> <span class="w">matches</span> <span class="w">the</span> <span class="w">set</span> <span class="w">consisting</span> <span class="w">of</span> <span class="w">those</span></li><li>      <span class="w">code</span> <span class="w">points</span> <span class="w">matched</span> <span class="w">by</span> <span class="w">the</span> <span class="w">first</span> <span class="w">operand</span><span class="cm">,</span> <span class="w">excluding</span> <span class="w">any</span> <span class="w">that</span></li><li>      <span class="w">are</span> <span class="w">also</span> <span class="w">matched</span> <span class="w">by</span> <span class="w">the</span> <span class="w">second</span> <span class="w">operand</span><span class="s">)</span></li><li> ^    <span class="w">symmetric</span> <span class="w">difference</span> <span class="s">(</span><span class="w">the</span> <span class="w">union</span> <span class="w">minus</span> <span class="w">the</span> <span class="w">intersection</span><span class="s">)</span>.  <span class="w">This</span></li><li>      <span class="w">is</span> <span class="w">like</span> <span class="w">an</span> <span class="w">exclusive</span> <a class="l_k" href="functions/or.html">or</a><span class="cm">,</span> <span class="w">in</span> <span class="w">that</span> <span class="w">the</span> <span class="w">result</span> <span class="w">is</span> <span class="w">the</span> <span class="w">set</span> <span class="w">of</span> <span class="w">code</span></li><li>      <span class="w">points</span> <span class="w">that</span> <span class="w">are</span> <span class="w">matched</span> <span class="w">by</span> <span class="w">either</span><span class="cm">,</span> <span class="w">but</span> <a class="l_k" href="functions/not.html">not</a> <span class="w">both</span><span class="cm">,</span> <span class="w">of</span> <span class="w">the</span></li><li>      <span class="w">operands</span>.</li></ol></pre><p>There is one unary operator:</p>
<pre class="verbatim"><ol><li> !    <span class="w">complement</span></li></ol></pre><p>All the binary operators left associate; <code class="inline"><span class="q">&quot;&amp;&quot;</span></code>
 is higher precedence
than the others, which all have equal precedence.  The unary operator
right associates, and has highest precedence.  Thus this follows the
normal Perl precedence rules for logical operators.  Use parentheses to
override the default precedence and associativity.</p>
<p>The main restriction is that everything is a metacharacter.  Thus,
you cannot refer to single characters by doing something like this:</p>
<pre class="verbatim"><ol><li> <span class="q">/(?[ a + b ])/</span> <span class="c"># Syntax error!</span></li></ol></pre><p>The easiest way to specify an individual typable character is to enclose
it in brackets:</p>
<pre class="verbatim"><ol><li> <span class="q">/(?[ [a] + [b] ])/</span></li></ol></pre><p>(This is the same thing as <code class="inline"><span class="s">[</span><span class="w">ab</span><span class="s">]</span></code>
.)  You could also have said the
equivalent:</p>
<pre class="verbatim"><ol><li> <span class="q">/(?[[ a b ]])/</span></li></ol></pre><p>(You can, of course, specify single characters by using, <code class="inline">\<span class="i">x</span><span class="s">{</span>...<span class="s">}</span></code>
,
<code class="inline">\<span class="i">N</span><span class="s">{</span>...<span class="s">}</span></code>
, etc.)</p>
<p>This last example shows the use of this construct to specify an ordinary
bracketed character class without additional set operations.  Note the
white space within it.  This is allowed because <code class="inline"><span class="q">/xx</span></code>
 is
automatically turned on within this construct.</p>
<p>All the other escapes accepted by normal bracketed character classes are
accepted here as well.</p>
<p>Because this construct compiles under
<a href="re.html#'strict'-mode">use re &#39;strict </a>,  unrecognized escapes that
generate warnings in normal classes are fatal errors here, as well as
all other warnings from these class elements, as well as some
practices that don't currently warn outside <code class="inline"><span class="w">re</span> <span class="q">&#39;strict&#39;</span></code>
.  For example
you cannot say</p>
<pre class="verbatim"><ol><li> <span class="q">/(?[ [ \xF ] ])/</span>     <span class="c"># Syntax error!</span></li></ol></pre><p>You have to have two hex digits after a braceless <code class="inline">\<span class="w">x</span></code>
 (use a leading
zero to make two).  These restrictions are to lower the incidence of
typos causing the class to not match what you thought it would.</p>
<p>If a regular bracketed character class contains a <code class="inline">\<span class="w">p</span><span class="s">{</span><span class="s">}</span></code>
 or <code class="inline">\<span class="w">P</span><span class="s">{</span><span class="s">}</span></code>
 and
is matched against a non-Unicode code point, a warning may be
raised, as the result is not Unicode-defined.  No such warning will come
when using this extended form.</p>
<p>The final difference between regular bracketed character classes and
these, is that it is not possible to get these to match a
multi-character fold.  Thus,</p>
<pre class="verbatim"><ol><li> <span class="q">/(?[ [\xDF] ])/iu</span></li></ol></pre><p>does not match the string <code class="inline"><span class="w">ss</span></code>
.</p>
<p>You don't have to enclose POSIX class names inside double brackets,
hence both of the following work:</p>
<pre class="verbatim"><ol><li> <span class="q">/(?[ [:word:] - [:lower:] ])/</span></li><li> /<span class="s">(</span><span class="q">?[ [[:word:]] - [[:lower:]] ])/</span></li></ol></pre><p>Any contained POSIX character classes, including things like <code class="inline">\<span class="w">w</span></code>
 and <code class="inline">\<span class="w">D</span></code>

respect the <code class="inline"><span class="q">/a</span></code>
 (and <code class="inline"><span class="q">/aa</span></code>
) modifiers.</p>
<p><code class="inline"><span class="s">(</span><span class="q">?[ ])</span></code>
 is a regex-compile-time construct.  Any attempt to use
something which isn't knowable at the time the containing regular
expression is compiled is a fatal error.  In practice, this means
just three limitations:</p>
<dl>
<dt>1</dt><dd>
<p>When compiled within the scope of <code class="inline"><a class="l_k" href="functions/use.html">use</a> <span class="w">locale</span></code>
 (or the <code class="inline"><span class="q">/l</span></code>
 regex
modifier), this construct assumes that the execution-time locale will be
a UTF-8 one, and the generated pattern always uses Unicode rules.  What
gets matched or not thus isn't dependent on the actual runtime locale, so
tainting is not enabled.  But a <code class="inline"><span class="w">locale</span></code>
 category warning is raised
if the runtime locale turns out to not be UTF-8.</p>
</dd>
<dt>2</dt><dd>
<p>Any
<a href="perlunicode.html#User-Defined-Character-Properties">user-defined property</a>
used must be already defined by the time the regular expression is
compiled (but note that this construct can be used instead of such
properties).</p>
</dd>
<dt>3</dt><dd>
<p>A regular expression that otherwise would compile
using <code class="inline">/d</code> rules, and which uses this construct will instead
use <code class="inline"><span class="q">/u</span></code>
.  Thus this construct tells Perl that you don't want
<code class="inline">/d</code> rules for the entire regular expression containing it.</p>
</dd>
</dl>
<p>Note that skipping white space applies only to the interior of this
construct.  There must not be any space between any of the characters
that form the initial <code class="inline"><span class="s">(</span><span class="q">?[</span></code>
.  Nor may there be space between the
closing <code class="inline">]<span class="p">)</span></code>
 characters.</p>
<p>Just as in all regular expressions, the pattern can be built up by
including variables that are interpolated at regex compilation time.
Care must be taken to ensure that you are getting what you expect.  For
example:</p>
<pre class="verbatim"><ol><li> <a class="l_k" href="functions/my.html">my</a> <span class="i">$thai_or_lao</span> = <span class="q">&#39;\p{Thai} + \p{Lao}&#39;</span><span class="sc">;</span></li><li> ...</li><li> <span class="q">qr/(?[ \p{Digit} &amp; $thai_or_lao ])/</span><span class="sc">;</span></li></ol></pre><p>compiles to</p>
<pre class="verbatim"><ol><li> <span class="q">qr/(?[ \p{Digit} &amp; \p{Thai} + \p{Lao} ])/</span><span class="sc">;</span></li></ol></pre><p>But this does not have the effect that someone reading the code would
likely expect, as the intersection applies just to <code class="inline">\<span class="i">p</span><span class="s">{</span><span class="w">Thai</span><span class="s">}</span></code>
,
excluding the Laotian.  Pitfalls like this can be avoided by
parenthesizing the component pieces:</p>
<pre class="verbatim"><ol><li> <a class="l_k" href="functions/my.html">my</a> <span class="i">$thai_or_lao</span> = <span class="q">&#39;( \p{Thai} + \p{Lao} )&#39;</span><span class="sc">;</span></li></ol></pre><p>But any modifiers will still apply to all the components:</p>
<pre class="verbatim"><ol><li> <a class="l_k" href="functions/my.html">my</a> <span class="i">$lower</span> = <span class="q">&#39;\p{Lower} + \p{Digit}&#39;</span><span class="sc">;</span></li><li> <span class="q">qr/(?[ \p{Greek} &amp; $lower ])/i</span><span class="sc">;</span></li></ol></pre><p>matches upper case things.  You can avoid surprises by making the
components into instances of this construct by compiling them:</p>
<pre class="verbatim"><ol><li> <a class="l_k" href="functions/my.html">my</a> <span class="i">$thai_or_lao</span> = <span class="q">qr/(?[ \p{Thai} + \p{Lao} ])/</span><span class="sc">;</span></li><li> <a class="l_k" href="functions/my.html">my</a> <span class="i">$lower</span> = <span class="q">qr/(?[ \p{Lower} + \p{Digit} ])/</span><span class="sc">;</span></li></ol></pre><p>When these are embedded in another pattern, what they match does not
change, regardless of parenthesization or what modifiers are in effect
in that outer pattern.</p>
<p>Due to the way that Perl parses things, your parentheses and brackets
may need to be balanced, even including comments.  If you run into any
examples, please send them to <code class="inline"><span class="w">perlbug</span><span class="i">@perl</span>.<span class="w">org</span></code>
, so that we can have a
concrete example for this man page.</p>
<p>We may change it so that things that remain legal uses in normal bracketed
character classes might become illegal within this experimental
construct.  One proposal, for example, is to forbid adjacent uses of the
same character, as in <code class="inline"><span class="s">(</span><span class="q">?[ [aa] ])</span></code>
.  The motivation for such a change
is that this usage is likely a typo, as the second "a" adds nothing.</p>




  <div id="page_index" class="hud_container">
    <div id="page_index_header" class="hud_header">
      <div id="page_index_close" class="hud_close"><a href="#" onClick="pageIndex.hide();return false;"></a></div>
      <div id="page_index_title" class="hud_title"><span class="hud_span_top">Page index</span></div>
      <div id="page_index_topright" class="hud_topright"></div>
    </div>
    <div id="page_index_content" class="hud_content">
      <ul><li><a href="#NAME">NAME
</a><li><a href="#DESCRIPTION">DESCRIPTION</a><ul><li><a href="#The-dot">The dot</a><li><a href="#Backslash-sequences">Backslash sequences
       
    
 </a><li><a href="#Bracketed-Character-Classes">Bracketed Character Classes</a></ul></ul>
    </div>
    <div id="page_index_footer" class="hud_footer">
      <div id="page_index_bottomleft" class="hud_bottomleft"></div>
      <div id="page_index_bottom" class="hud_bottom"><span class="hud_span_bottom"></span></div>
      <div id="page_index_resize" class="hud_resize"></div>
    </div>
  </div>


	    &nbsp;
          </div>
          <div id="content_footer">
          </div>
        </div>
        <div class="clear"></div>
      </div>
      
    <div id="footer">
      <div id="footer_content">
        <div id="footer_strapline">
          perldoc.perl.org - Official documentation for the Perl programming language
        </div>
        <div id="footer_links">
          <div id="address">
            <p class="name">Contact details</p>
            <p class="address">
	      Site maintained by <a href="mailto:jj@jonallen.info">Jon Allen (JJ)</a><br>
	    </p>
            <p class="contact">
              Documentation maintained by the <a href="http://lists.cpan.org/showlist.cgi?name=perl5-porters">Perl 5 Porters</a>
            </p>
          </div>
          <ul class="f1">
            <li>Manual
              <ul class="f2">
                <li><a href="index-overview.html">Overview</a>
                <li><a href="index-tutorials.html">Tutorials</a>
                <li><a href="index-faq.html">FAQs</a>
                <li><a href="index-history.html">Changes</a>
              </ul>
            <li>Reference
              <ul class="f2">
                <li><a href="index-language.html">Language</a>
                <li><a href="index-functions.html">Functions</a>
                <li><a href="perlop.html">Operators</a>
                <li><a href="perlvar.html">Variables</a>
              </ul>
            <li>Modules
              <ul class="f2">
                <li><a href="index-modules-A.html">Modules</a>
                <li><a href="index-pragmas.html">Pragmas</a>
                <li><a href="index-utilities.html">Utilities</a>
              </ul>
            <li>Misc
              <ul class="f2">
                <li><a href="index-licence.html">License</a>
                <li><a href="index-internals.html">Internals</a>
                <li><a href="index-platforms.html">Platforms</a>
              </ul>          </ul>
          <div class="clear"></div>
        </div>
      </div>
      <div id="footer_end">
      </div>
    </div>
      
    </div>
      <script language="JavaScript" type="text/javascript" src="static/exploreperl.js"></script>
      <script language="JavaScript" src="static/combined-20100403.js" type="text/javascript"></script>
<script language="JavaScript" type="text/javascript">
  perldoc.setPath(0);
  perldoc.pageName    = 'perlrecharclass';
  perldoc.pageAddress = 'perlrecharclass.html';
  perldoc.contentPage = 1;
  explorePerl.render();
  explorePerl.addEvents('explore_anchor');
</script>
    
  </body>
</html>