1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
|
#![cfg_attr(feature = "pattern", feature(pattern))]
use regex;
// Due to macro scoping rules, this definition only applies for the modules
// defined below. Effectively, it allows us to use the same tests for both
// native and dynamic regexes.
//
// This is also used to test the various matching engines. This one exercises
// the normal code path which automatically chooses the engine based on the
// regex and the input. Other dynamic tests explicitly set the engine to use.
macro_rules! regex_new {
($re:expr) => {{
use regex::Regex;
Regex::new($re)
}};
}
macro_rules! regex {
($re:expr) => {
regex_new!($re).unwrap()
};
}
macro_rules! regex_set_new {
($re:expr) => {{
use regex::RegexSet;
RegexSet::new($re)
}};
}
macro_rules! regex_set {
($res:expr) => {
regex_set_new!($res).unwrap()
};
}
// Must come before other module definitions.
include!("macros_str.rs");
include!("macros.rs");
mod api;
mod api_str;
mod crazy;
mod flags;
mod fowler;
mod misc;
mod multiline;
mod noparse;
mod regression;
mod regression_fuzz;
mod replace;
mod searcher;
mod set;
mod shortest_match;
mod suffix_reverse;
#[cfg(feature = "unicode")]
mod unicode;
#[cfg(feature = "unicode-perl")]
mod word_boundary;
#[cfg(feature = "unicode-perl")]
mod word_boundary_unicode;
#[test]
fn disallow_non_utf8() {
assert!(regex::Regex::new(r"(?-u)\xFF").is_err());
assert!(regex::Regex::new(r"(?-u).").is_err());
assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err());
assert!(regex::Regex::new(r"(?-u)☃").is_err());
}
#[test]
fn disallow_octal() {
assert!(regex::Regex::new(r"\0").is_err());
}
#[test]
fn allow_octal() {
assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok());
}
#[test]
fn oibits() {
use regex::bytes;
use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
use std::panic::{RefUnwindSafe, UnwindSafe};
fn assert_send<T: Send>() {}
fn assert_sync<T: Sync>() {}
fn assert_unwind_safe<T: UnwindSafe>() {}
fn assert_ref_unwind_safe<T: RefUnwindSafe>() {}
assert_send::<Regex>();
assert_sync::<Regex>();
assert_unwind_safe::<Regex>();
assert_ref_unwind_safe::<Regex>();
assert_send::<RegexBuilder>();
assert_sync::<RegexBuilder>();
assert_unwind_safe::<RegexBuilder>();
assert_ref_unwind_safe::<RegexBuilder>();
assert_send::<bytes::Regex>();
assert_sync::<bytes::Regex>();
assert_unwind_safe::<bytes::Regex>();
assert_ref_unwind_safe::<bytes::Regex>();
assert_send::<bytes::RegexBuilder>();
assert_sync::<bytes::RegexBuilder>();
assert_unwind_safe::<bytes::RegexBuilder>();
assert_ref_unwind_safe::<bytes::RegexBuilder>();
assert_send::<RegexSet>();
assert_sync::<RegexSet>();
assert_unwind_safe::<RegexSet>();
assert_ref_unwind_safe::<RegexSet>();
assert_send::<RegexSetBuilder>();
assert_sync::<RegexSetBuilder>();
assert_unwind_safe::<RegexSetBuilder>();
assert_ref_unwind_safe::<RegexSetBuilder>();
assert_send::<bytes::RegexSet>();
assert_sync::<bytes::RegexSet>();
assert_unwind_safe::<bytes::RegexSet>();
assert_ref_unwind_safe::<bytes::RegexSet>();
assert_send::<bytes::RegexSetBuilder>();
assert_sync::<bytes::RegexSetBuilder>();
assert_unwind_safe::<bytes::RegexSetBuilder>();
assert_ref_unwind_safe::<bytes::RegexSetBuilder>();
}
// See: https://github.com/rust-lang/regex/issues/568
#[test]
fn oibits_regression() {
use regex::Regex;
use std::panic;
let _ = panic::catch_unwind(|| Regex::new("a").unwrap());
}
// See: https://github.com/rust-lang/regex/issues/750
#[test]
#[cfg(target_pointer_width = "64")]
fn regex_is_reasonably_small() {
use std::mem::size_of;
use regex::bytes;
use regex::{Regex, RegexSet};
assert_eq!(16, size_of::<Regex>());
assert_eq!(16, size_of::<RegexSet>());
assert_eq!(16, size_of::<bytes::Regex>());
assert_eq!(16, size_of::<bytes::RegexSet>());
}
// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
// See: CVE-2022-24713
//
// We test that our regex compiler will correctly return a "too big" error when
// we try to use a very large repetition on an *empty* sub-expression.
//
// At the time this test was written, the regex compiler does not represent
// empty sub-expressions with any bytecode instructions. In effect, it's an
// "optimization" to leave them out, since they would otherwise correspond
// to an unconditional JUMP in the regex bytecode (i.e., an unconditional
// epsilon transition in the NFA graph). Therefore, an empty sub-expression
// represents an interesting case for the compiler's size limits. Since it
// doesn't actually contribute any additional memory to the compiled regex
// instructions, the size limit machinery never detects it. Instead, it just
// dumbly tries to compile the empty sub-expression N times, where N is the
// repetition size.
//
// When N is very large, this will cause the compiler to essentially spin and
// do nothing for a decently large amount of time. It causes the regex to take
// quite a bit of time to compile, despite the concrete syntax of the regex
// being quite small.
//
// The degree to which this is actually a problem is somewhat of a judgment
// call. Some regexes simply take a long time to compile. But in general, you
// should be able to reasonably control this by setting lower or higher size
// limits on the compiled object size. But this mitigation doesn't work at all
// for this case.
//
// This particular test is somewhat narrow. It merely checks that regex
// compilation will, at some point, return a "too big" error. Before the
// fix landed, this test would eventually fail because the regex would be
// successfully compiled (after enough time elapsed). So while this test
// doesn't check that we exit in a reasonable amount of time, it does at least
// check that we are properly returning an error at some point.
#[test]
fn big_empty_regex_fails() {
use regex::Regex;
let result = Regex::new("(?:){4294967295}");
assert!(result.is_err());
}
// Below is a "billion laughs" variant of the previous test case.
#[test]
fn big_empty_reps_chain_regex_fails() {
use regex::Regex;
let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}");
assert!(result.is_err());
}
// Below is another situation where a zero-length sub-expression can be
// introduced.
#[test]
fn big_zero_reps_regex_fails() {
use regex::Regex;
let result = Regex::new(r"x{0}{4294967295}");
assert!(result.is_err());
}
// Testing another case for completeness.
#[test]
fn empty_alt_regex_fails() {
use regex::Regex;
let result = Regex::new(r"(?:|){4294967295}");
assert!(result.is_err());
}
|