1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
|
(* ========================================================================== *)
(* FLOATING POINT DEFINITIONS *)
(* ========================================================================== *)
(* needs "IEEE/common.hl";; *)
(* needs "IEEE/fixed.hl";; *)
(* References: *)
(* *)
(* [ 1 ] John Harrison, "A Machine-Checked Theory of Floating Point *)
(* Arithmetic", from Lecture Notes in Computer Science, Vol. 1690, *)
(* September 1999. *)
(* [ 2 ] David Goldberg, "What Every Computer Scientist Should Know About *)
(* Floating-Point Arithmetic", from Computing Surveys, March 1991 *)
(* issue. *)
(* [ 3 ] Nicholas Higham, "Accuracy and Stability of Numerical Algorithms", *)
(* Second Edition, 2002. *)
(* [ 4 ] IEEE 754 Standard for Floating Point Arithmetic, 2008. *)
(* *)
(* A large part of this formalization is based on John Harrison's work. *)
(* ========================================================================== *)
(* -------------------------------------------------------------------------- *)
(* Floating point format *)
(* -------------------------------------------------------------------------- *)
(* Fix r:num > 1 and even, p:num > 1. A floating point number is a real *)
(* number that can be written as *)
(* *)
(* +/- f * r^(e - p + 1) *)
(* *)
(* where *)
(* *)
(* -- e is an integer *)
(* -- 0 < f < r^p (so zero is not a floating point number) *)
(* *)
(* Why is p > 1? If p = 0, the set of floating point numbers is empty. If *)
(* p = 1, round to even is not possible for some scenarios. For example, take *)
(* r = 4 and p = 1. If x lies half way between 3 * 4^(1 - p + 1) and *)
(* 1 * 4^(2 - p + 1), there is no way to round to even, since both enclosing *)
(* floating point numbers have odd fractions. *)
(* *)
(* Why is r > 1? If r = 0 or 1, the set of floating point numbers is *)
(* empty (no f satisfies 0 < f < r^p for those r). *)
(* *)
(* r needs to be even for a couple reasons. *)
(* *)
(* #1: *)
(* *)
(* I should confess first: *)
(* *)
(* The formalization below of floating point round to nearest will *not* *)
(* round to even *when r is odd*, and will in fact be biased to round toward *)
(* zero. Rounding the remainder to even does not round the floating point *)
(* number to even *when r is odd*. After looking at #2 and #3 below, you *)
(* might see why. *)
(* *)
(* Goldberg claims we always assume r is even, so I took it at face value at *)
(* first. Here are a couple more compelling reasons I came up with; there *)
(* could be a more fundamental reason. *)
(* *)
(* #2: *)
(* *)
(* If r is odd, fixed point round to nearest would be biased to round toward *)
(* zero. For example, take r = 2, p = 2, and e = 1; then e - p + 1 = 0, and *)
(* the possible fixed point numbers are *)
(* *)
(* -2 * 1 -1 * 1 0 * 1 1 * 1 2 * 1 *)
(* *)
(* We are equally likely to round down as we are to round up, when we tie, *)
(* and we are equally likely to round *away* from zero as we are to round *)
(* toward zero. *)
(* *)
(* Now take r = 3, p = 2, and e = 1; then e - p + 1 = 0, and the possible *)
(* fixed point numbers are *)
(* *)
(* -3 * 1 -2 * 1 -1 * 1 0 * 1 1 * 1 2 * 1 3 * 1 *)
(* *)
(* If we round to even, we are still equally likely to round down as we are *)
(* to round up, when we tie; but on the positive side, we're more likely to *)
(* round down, and on the negative side, we're more likely to round up. *)
(* *)
(* Notice that no matter how we try to break ties, the positive/negative side *)
(* will be biased to round a certain direction. *)
(* *)
(* Floating point numbers aren't as badly biased when r is odd, but there is *)
(* still some interesting patterns. For r = 4, p = 2, the set of possible *)
(* normalized fractions is 4 - 15, and a section looks like: *)
(* *)
(* | | *)
(* | | | | *)
(* 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, (16=4^2) *)
(* | | | | *)
(* | | *)
(* *)
(* When we round to even for ties, we are equally likely to round up as we *)
(* are to round down--but also, within each subsection, we are equally likely *)
(* to round up as we are down (e.g., in the 4 - 8 interval, the rounding *)
(* pattern is down-up-down-up). *)
(* *)
(* When r is odd, the pattern is more peculiar. For r = 3, p = 2, the set *)
(* of possible normalized fractions is 3 - 8, and a section looks like: *)
(* *)
(* | | *)
(* | | | *)
(* 3, 4, 5, 6, 7, 8, (9=3^2) *)
(* | | | *)
(* | | *)
(* *)
(* When we round to even for ties, we are equally likely to round up as we *)
(* are to round down--but in the (3 - 6) interval, the pattern is *)
(* up-down-up, while in the (6 - 9) interval, the pattern is down-up-down. If *)
(* we tried to use a consistent rounding pattern in each, then we would be *)
(* biased to round a certain direction. *)
(* *)
(* #3: *)
(* *)
(* If we want the machine epsilon to be a floating point number, r needs to *)
(* be even. The machine epsilon is *)
(* *)
(* (r / 2) * r^(-p) *)
(* *)
(* When r is even, r = 2k, so mach eps = k * r^(-p). This is a floating *)
(* point number with f = k and e = -1 (and could also be normalized). *)
(* *)
(* When r is odd, the mach eps is not a floating number in that format. For *)
(* example, take r = 3 and p = 2; then mach eps = 1/6, but 1/6 is half way *)
(* between 4/27 and 5/27, the closest floating point numbers to it. *)
let is_valid_flformat = define
`is_valid_flformat (r:num, p:num) = (1 < r /\ (EVEN r) /\ (1 < p))`;;
let flformat_typbij = new_type_definition
"flformat"
("mk_flformat", "dest_flformat")
(prove (`?(fmt:num#num). is_valid_flformat fmt`,
EXISTS_TAC `(2:num, 2:num)` THEN
REWRITE_TAC[is_valid_flformat] THEN
ARITH_TAC));;
let flr = define
`flr (fmt:flformat) = (FST (dest_flformat fmt))`;;
let flp = define
`flp (fmt:flformat) = (SND (dest_flformat fmt))`;;
let is_frac_and_exp = define
`is_frac_and_exp (fmt:flformat) (x:real) (f:num) (e:int) =
(0 < f /\
f < (flr fmt) EXP (flp fmt) /\
abs(x) = &f * &(flr fmt) ipow (e - &(flp fmt) + &1))`;;
let is_float = define
`is_float (fmt:flformat) (x:real) =
(?(f:num) (e:int) . is_frac_and_exp(fmt) x f e)`;;
let to_fformat = define
`to_fformat (fmt:flformat) (e:int) = (mk_fformat ((flr fmt), (flp fmt), e))`;;
(* -------------------------------------------------------------------------- *)
(* Normalization *)
(* -------------------------------------------------------------------------- *)
(* x = +/- m * r^e + y *)
let greatest_e = define
`greatest_e (fmt:flformat) (x:real) =
sup_int({ (z:int) | &(flr fmt) ipow z <= abs(x) })`;;
let greatest_m = define
`greatest_m (fmt:flformat) (x:real) =
sup_num({ (m:num) | &m * &(flr fmt) ipow (greatest_e(fmt) x) <= abs(x) })`;;
let greatest_r = define
`greatest_r (fmt:flformat) (x:real) =
(if (&0 <= x)
then
(x - &(greatest_m(fmt) x) * &(flr fmt) ipow (greatest_e(fmt) x))
else
(x + &(greatest_m(fmt) x) * &(flr fmt) ipow (greatest_e(fmt) x)))`;;
(* -------------------------------------------------------------------------- *)
(* Rounding *)
(* -------------------------------------------------------------------------- *)
let flround = define
`flround (fmt:flformat) (mode:roundmode) (x:real) =
(let m = (greatest_m(fmt) x)
and e = (greatest_e(fmt) x)
and y = (greatest_r(fmt) x)
in
(if (&0 <= x)
then
(&m * &(flr fmt) ipow e +
(fround(to_fformat fmt e) mode y))
else
(-- (&m * &(flr fmt) ipow e) +
(fround(to_fformat fmt e) mode y))))`;;
(* -------------------------------------------------------------------------- *)
(* Machine Epsilon *)
(* -------------------------------------------------------------------------- *)
(* Definition: For a flformat fmt, the _machine epsilon_ is *)
(* *)
(* fl_eps = r^(-p + 1) / 2 *)
(* *)
(* This is *a little bit more than* the worst possible relative error when *)
(* rounding to nearest. *)
let fl_eps = define
`fl_eps (fmt:flformat) = (&(flr fmt) ipow (&1 - &(flp fmt))) / &2`;;
|