1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
|
// This is a regression test for a bug in how special states are handled. The
// fuzzer found a case where a state returned true for 'is_special_state' but
// *didn't* return true for 'is_dead_state', 'is_quit_state', 'is_match_state',
// 'is_start_state' or 'is_accel_state'. This in turn tripped a debug assertion
// in the core matching loop that requires 'is_special_state' being true to
// imply that one of the other routines returns true.
//
// We fixed this by adding some validation to both dense and sparse DFAs that
// checks that this property is true for every state ID in the DFA.
#[test]
fn invalid_special_state() {
let data = include_bytes!(
"testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838",
);
let _ = fuzz_run(data);
}
// This is an interesting case where a fuzzer generated a DFA with
// a transition to a state ID that decoded as a valid state, but
// where the ID itself did not point to one of the two existing
// states for this particular DFA. This combined with marking this
// transition's state ID as special but without actually making one of the
// 'is_{dead,quit,match,start,accel}_state' predicates return true ended up
// tripping the 'debug_assert(dfa.is_quit_state(sid))' code in the search
// routine.
//
// We fixed this in alloc mode by checking that every transition points to a
// valid state ID. Technically this bug still exists in core-only mode, but
// it's not clear how to fix it. And it's worth pointing out that the search
// routine won't panic in production. It will just provide invalid results. And
// that's acceptable within the contract of DFA::from_bytes.
#[test]
fn transition_to_invalid_but_valid_state() {
let data = include_bytes!(
"testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9",
);
let _ = fuzz_run(data);
}
// Another one caught by the fuzzer where it generated a DFA that reported a
// start state as a match state. Since matches are always delayed by one byte,
// start states specifically cannot be match states. And indeed, the search
// code relies on this.
#[test]
fn start_state_is_not_match_state() {
let data = include_bytes!(
"testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000",
);
let _ = fuzz_run(data);
}
// This is variation on 'transition_to_invalid_but_valid_state', but happens
// to a start state. Namely, the fuzz data here builds a DFA with a start
// state ID that is incorrect but points to a sequence of bytes that satisfies
// state decoding validation. This errant state in turn has a non-zero number
// of transitions, and its those transitions that point to a state that does
// *not* satisfy state decoding validation. But we never checked those. So the
// fix here was to add validation of the transitions off of the start state.
#[test]
fn start_state_has_valid_transitions() {
let data = include_bytes!(
"testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98",
);
let _ = fuzz_run(data);
}
// This fuzz input generated a DFA with a state whose ID was in the match state
// ID range, but where the state itself was encoded with zero pattern IDs. We
// added validation code to check this case.
#[test]
fn match_state_inconsistency() {
let data = include_bytes!(
"testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570",
);
let _ = fuzz_run(data);
}
// This fuzz input generated a DFA with a state whose ID was in the accelerator
// range, but who didn't have any accelerators. This violated an invariant that
// assumes that if 'dfa.is_accel_state(sid)' returns true, then the state must
// have some accelerators.
#[test]
fn invalid_accelerators() {
let data = include_bytes!(
"testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b",
);
let _ = fuzz_run(data);
}
// This fuzz input generated a DFA with a state whose EOI transition led to
// a quit state, which is generally considered illegal. Why? Because the EOI
// transition is defined over a special sentinel alphabet element and one
// cannot configure a DFA to "quit" on that sentinel.
#[test]
fn eoi_transition_to_quit_state() {
let data = include_bytes!(
"testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9",
);
let _ = fuzz_run(data);
}
// This is the code from the fuzz target. Kind of sucks to duplicate it here,
// but this is fundamentally how we interpret the date.
fn fuzz_run(given_data: &[u8]) -> Option<()> {
use regex_automata::dfa::Automaton;
if given_data.len() < 2 {
return None;
}
let haystack_len = usize::from(given_data[0]);
let haystack = given_data.get(1..1 + haystack_len)?;
let given_dfa_bytes = given_data.get(1 + haystack_len..)?;
// We help the fuzzer along by adding a preamble to the bytes that should
// at least make these first parts valid. The preamble expects a very
// specific sequence of bytes, so it makes sense to just force this.
let label = "rust-regex-automata-dfa-sparse\x00\x00";
assert_eq!(0, label.len() % 4);
let endianness_check = 0xFEFFu32.to_ne_bytes().to_vec();
let version_check = 2u32.to_ne_bytes().to_vec();
let mut dfa_bytes: Vec<u8> = vec![];
dfa_bytes.extend(label.as_bytes());
dfa_bytes.extend(&endianness_check);
dfa_bytes.extend(&version_check);
dfa_bytes.extend(given_dfa_bytes);
// This is the real test: checking that any input we give to
// DFA::from_bytes will never result in a panic.
let (dfa, _) =
regex_automata::dfa::sparse::DFA::from_bytes(&dfa_bytes).ok()?;
let _ = dfa.try_search_fwd(®ex_automata::Input::new(haystack));
Some(())
}
|