1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
|
/// Find and parse sign and get remaining bytes.
#[inline]
fn parse_sign<'a>(bytes: &'a [u8]) -> (bool, &'a [u8]) {
match bytes.get(0) {
Some(&b'+') => (true, &bytes[1..]),
Some(&b'-') => (false, &bytes[1..]),
_ => (true, bytes),
}
}
// Convert u8 to digit.
#[inline]
fn to_digit(c: u8) -> Option<u32> {
(c as char).to_digit(10)
}
// Add digit from exponent.
#[inline]
fn add_digit_i32(value: i32, digit: u32) -> Option<i32> {
return value.checked_mul(10)?.checked_add(digit as i32);
}
// Subtract digit from exponent.
#[inline]
fn sub_digit_i32(value: i32, digit: u32) -> Option<i32> {
return value.checked_mul(10)?.checked_sub(digit as i32);
}
// Convert character to digit.
#[inline]
fn is_digit(c: u8) -> bool {
to_digit(c).is_some()
}
// Split buffer at index.
#[inline]
fn split_at_index<'a>(digits: &'a [u8], index: usize) -> (&'a [u8], &'a [u8]) {
(&digits[..index], &digits[index..])
}
/// Consume until a an invalid digit is found.
///
/// - `digits` - Slice containing 0 or more digits.
#[inline]
fn consume_digits<'a>(digits: &'a [u8]) -> (&'a [u8], &'a [u8]) {
// Consume all digits.
let mut index = 0;
while index < digits.len() && is_digit(digits[index]) {
index += 1;
}
split_at_index(digits, index)
}
// Trim leading 0s.
#[inline]
fn ltrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] {
let count = bytes.iter().take_while(|&&si| si == b'0').count();
&bytes[count..]
}
// Trim trailing 0s.
#[inline]
fn rtrim_zero<'a>(bytes: &'a [u8]) -> &'a [u8] {
let count = bytes.iter().rev().take_while(|&&si| si == b'0').count();
let index = bytes.len() - count;
&bytes[..index]
}
// PARSERS
// -------
/// Parse the exponent of the float.
///
/// * `exponent` - Slice containing the exponent digits.
/// * `is_positive` - If the exponent sign is positive.
fn parse_exponent(exponent: &[u8], is_positive: bool) -> i32 {
// Parse the sign bit or current data.
let mut value: i32 = 0;
match is_positive {
true => {
for c in exponent {
value = match add_digit_i32(value, to_digit(*c).unwrap()) {
Some(v) => v,
None => return i32::max_value(),
};
}
},
false => {
for c in exponent {
value = match sub_digit_i32(value, to_digit(*c).unwrap()) {
Some(v) => v,
None => return i32::min_value(),
};
}
},
}
value
}
pub fn case_insensitive_starts_with<'a, 'b, Iter1, Iter2>(mut x: Iter1, mut y: Iter2) -> bool
where
Iter1: Iterator<Item = &'a u8>,
Iter2: Iterator<Item = &'b u8>,
{
// We use a faster optimization here for ASCII letters, which NaN
// and infinite strings **must** be. [A-Z] is 0x41-0x5A, while
// [a-z] is 0x61-0x7A. Therefore, the xor must be 0 or 32 if they
// are case-insensitive equal, but only if at least 1 of the inputs
// is an ASCII letter.
loop {
let yi = y.next();
if yi.is_none() {
return true;
}
let yi = *yi.unwrap();
let is_not_equal = x.next().map_or(true, |&xi| {
let xor = xi ^ yi;
xor != 0 && xor != 0x20
});
if is_not_equal {
return false;
}
}
}
/// Parse float from input bytes, returning the float and the remaining bytes.
///
/// * `bytes` - Array of bytes leading with float-data.
pub fn parse_float<'a, F>(bytes: &'a [u8]) -> (F, &'a [u8])
where
F: minimal_lexical::Float,
{
let start = bytes;
// Parse the sign.
let (is_positive, bytes) = parse_sign(bytes);
// Check NaN, Inf, Infinity
if case_insensitive_starts_with(bytes.iter(), b"NaN".iter()) {
let mut float = F::from_bits(F::EXPONENT_MASK | (F::HIDDEN_BIT_MASK >> 1));
if !is_positive {
float = -float;
}
return (float, &bytes[3..]);
} else if case_insensitive_starts_with(bytes.iter(), b"Infinity".iter()) {
let mut float = F::from_bits(F::EXPONENT_MASK);
if !is_positive {
float = -float;
}
return (float, &bytes[8..]);
} else if case_insensitive_starts_with(bytes.iter(), b"inf".iter()) {
let mut float = F::from_bits(F::EXPONENT_MASK);
if !is_positive {
float = -float;
}
return (float, &bytes[3..]);
}
// Extract and parse the float components:
// 1. Integer
// 2. Fraction
// 3. Exponent
let (integer_slc, bytes) = consume_digits(bytes);
let (fraction_slc, bytes) = match bytes.first() {
Some(&b'.') => consume_digits(&bytes[1..]),
_ => (&bytes[..0], bytes),
};
let (exponent, bytes) = match bytes.first() {
Some(&b'e') | Some(&b'E') => {
// Extract and parse the exponent.
let (is_positive, bytes) = parse_sign(&bytes[1..]);
let (exponent, bytes) = consume_digits(bytes);
(parse_exponent(exponent, is_positive), bytes)
},
_ => (0, bytes),
};
if bytes.len() == start.len() {
return (F::from_u64(0), bytes);
}
// Note: You may want to check and validate the float data here:
// 1). Many floats require integer or fraction digits, if a fraction
// is present.
// 2). All floats require either integer or fraction digits.
// 3). Some floats do not allow a '+' sign before the significant digits.
// 4). Many floats require exponent digits after the exponent symbol.
// 5). Some floats do not allow a '+' sign before the exponent.
// We now need to trim leading and trailing 0s from the integer
// and fraction, respectively. This is required to make the
// fast and moderate paths more efficient, and for the slow
// path.
let integer_slc = ltrim_zero(integer_slc);
let fraction_slc = rtrim_zero(fraction_slc);
// Create the float and return our data.
let mut float: F =
minimal_lexical::parse_float(integer_slc.iter(), fraction_slc.iter(), exponent);
if !is_positive {
float = -float;
}
(float, bytes)
}
macro_rules! b {
($x:literal) => {
$x.as_bytes()
};
}
#[test]
fn f32_test() {
assert_eq!(
(184467440000000000000.0, b!("\x00\x00006")),
parse_float::<f32>(b"000184467440737095516150\x00\x00006")
);
}
#[test]
fn f64_test() {
assert_eq!(
(184467440737095500000.0, b!("\x00\x00006")),
parse_float::<f64>(b"000184467440737095516150\x00\x00006")
);
}
|