File: Binary16Conversion.java

package info (click to toggle)
openjdk-21 21.0.8%2B9-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 823,976 kB
sloc: java: 5,613,338; xml: 1,643,607; cpp: 1,296,296; ansic: 420,291; asm: 404,850; objc: 20,994; sh: 15,271; javascript: 11,245; python: 6,895; makefile: 2,362; perl: 357; awk: 351; sed: 172; jsp: 24; csh: 3
file content (424 lines) | stat: -rw-r--r-- 19,055 bytes
parent folder | download | duplicates (7)
/*
 * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

/*
 * @test
 * @bug 8289551
 * @summary Verify conversion between float and the binary16 format
 * @library ../Math
 * @build FloatConsts
 * @run main Binary16Conversion
 * @run main/othervm -XX:+UnlockDiagnosticVMOptions
 * -XX:DisableIntrinsic=_float16ToFloat,_floatToFloat16 Binary16Conversion
 */

public class Binary16Conversion {
    public static void main(String... argv) {
        int errors = 0;
        errors += binary16RoundTrip();
        // Note that helper methods do sign-symmetric testing
        errors += binary16CardinalValues();
        errors += roundFloatToBinary16();
        errors += roundFloatToBinary16HalfWayCases();
        errors += roundFloatToBinary16FullBinade();
        errors += alternativeImplementation();

        if (errors > 0)
            throw new RuntimeException(errors + " errors");
    }

    /*
     * Put all 16-bit values through a conversion loop and make sure
     * the values are preserved (NaN bit patterns notwithstanding).
     */
    private static int binary16RoundTrip() {
        int errors = 0;
        for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
            short s = (short)i;
            float f =  Float.float16ToFloat(s);
            short s2 = Float.floatToFloat16(f);

            if (!Binary16.equivalent(s, s2)) {
                errors++;
                System.out.println("Roundtrip failure on " +
                                   Integer.toHexString(0xFFFF & (int)s) +
                                   "\t got back " + Integer.toHexString(0xFFFF & (int)s2));
            }
        }
        return errors;
    }

    private static int binary16CardinalValues() {
        int errors = 0;
        // Encode short value for different binary16 cardinal values as an
        // integer-valued float.
        float[][] testCases = {
            {Binary16.POSITIVE_ZERO,         +0.0f},
            {Binary16.MIN_VALUE,              0x1.0p-24f},
            {Binary16.MAX_SUBNORMAL,          0x1.ff8p-15f},
            {Binary16.MIN_NORMAL,             0x1.0p-14f},
            {Binary16.ONE,                    1.0f},
            {Binary16.MAX_VALUE,              65504.0f},
            {Binary16.POSITIVE_INFINITY,      Float.POSITIVE_INFINITY},
        };

        // Check conversions in both directions

        // short -> float
        for (var testCase : testCases) {
            errors += compareAndReportError((short)testCase[0],
                                            testCase[1]);
        }

        // float -> short
        for (var testCase : testCases) {
            errors += compareAndReportError(testCase[1],
                                            (short)testCase[0]);
        }

        return errors;
    }

    private static int roundFloatToBinary16() {
        int errors = 0;

        float[][] testCases = {
            // Test all combinations of LSB, round, and sticky bit

            // LSB = 0, test combination of round and sticky
            {0x1.ff8000p-1f,       (short)0x3bfe},              // round = 0, sticky = 0
            {0x1.ff8010p-1f,       (short)0x3bfe},              // round = 0, sticky = 1
            {0x1.ffa000p-1f,       (short)0x3bfe},              // round = 1, sticky = 0
            {0x1.ffa010p-1f,       (short)0x3bff},              // round = 1, sticky = 1 => ++

            // LSB = 1, test combination of round and sticky
            {0x1.ffc000p-1f,       Binary16.ONE-1},             // round = 0, sticky = 0
            {0x1.ffc010p-1f,       Binary16.ONE-1},             // round = 0, sticky = 1
            {0x1.ffe000p-1f,       Binary16.ONE},               // round = 1, sticky = 0 => ++
            {0x1.ffe010p-1f,       Binary16.ONE},               // round = 1, sticky = 1 => ++

            // Test subnormal rounding
            // Largest subnormal binary16 0x03ff => 0x1.ff8p-15f; LSB = 1
            {0x1.ff8000p-15f,      Binary16.MAX_SUBNORMAL},     // round = 0, sticky = 0
            {0x1.ff8010p-15f,      Binary16.MAX_SUBNORMAL},     // round = 0, sticky = 1
            {0x1.ffc000p-15f,      Binary16.MIN_NORMAL},        // round = 1, sticky = 0 => ++
            {0x1.ffc010p-15f,      Binary16.MIN_NORMAL},        // round = 1, sticky = 1 => ++

            // Test rounding near binary16 MIN_VALUE
            // Smallest in magnitude subnormal binary16 value 0x0001 => 0x1.0p-24f
            // Half-way case,0x1.0p-25f, and smaller should round down to zero
            {0x1.fffffep-26f,      Binary16.POSITIVE_ZERO},     // nextDown in float
            {0x1.000000p-25f,      Binary16.POSITIVE_ZERO},
            {0x1.000002p-25f,      Binary16.MIN_VALUE},         // nextUp in float
            {0x1.100000p-25f,      Binary16.MIN_VALUE},

            // Test rounding near overflow threshold
            // Largest normal binary16 number 0x7bff => 0x1.ffcp15f; LSB = 1
            {0x1.ffc000p15f,       Binary16.MAX_VALUE},         // round = 0, sticky = 0
            {0x1.ffc010p15f,       Binary16.MAX_VALUE},         // round = 0, sticky = 1
            {0x1.ffe000p15f,       Binary16.POSITIVE_INFINITY}, // round = 1, sticky = 0 => ++
            {0x1.ffe010p15f,       Binary16.POSITIVE_INFINITY}, // round = 1, sticky = 1 => ++
        };

        for (var testCase : testCases) {
            errors += compareAndReportError(testCase[0],
                                            (short)testCase[1]);
        }
        return errors;
    }

    private static int roundFloatToBinary16HalfWayCases() {
        int errors = 0;

        // Test rounding of exact half-way cases between each pair of
        // finite exactly-representable binary16 numbers. Also test
        // rounding of half-way +/- ulp of the *float* value.
        // Additionally, test +/- float ulp of the endpoints. (Other
        // tests in this file make sure all short values round-trip so
        // that doesn't need to be tested here.)

        for (int i = Binary16.POSITIVE_ZERO; // 0x0000
             i    <= Binary16.MAX_VALUE;     // 0x7bff
             i += 2) {     // Check every even/odd pair once
            short lower = (short) i;
            short upper = (short)(i+1);

            float lowerFloat = Float.float16ToFloat(lower);
            float upperFloat = Float.float16ToFloat(upper);
            assert lowerFloat < upperFloat;

            float midway = (lowerFloat + upperFloat) * 0.5f; // Exact midpoint

            errors += compareAndReportError(Math.nextUp(lowerFloat),   lower);
            errors += compareAndReportError(Math.nextDown(midway),     lower);

            // Under round to nearest even, the midway point will
            // round *down* to the (even) lower endpoint.
            errors += compareAndReportError(              midway,      lower);

            errors += compareAndReportError(Math.nextUp(  midway),     upper);
            errors += compareAndReportError(Math.nextDown(upperFloat), upper);
        }

        // More testing around the overflow threshold
        // Binary16.ulp(Binary16.MAX_VALUE) == 32.0f; test around Binary16.MAX_VALUE + 1/2 ulp
        float binary16_MAX_VALUE = Float.float16ToFloat(Binary16.MAX_VALUE);
        float binary16_MAX_VALUE_halfUlp = binary16_MAX_VALUE + 16.0f;

        errors += compareAndReportError(Math.nextDown(binary16_MAX_VALUE), Binary16.MAX_VALUE);
        errors += compareAndReportError(              binary16_MAX_VALUE,  Binary16.MAX_VALUE);
        errors += compareAndReportError(Math.nextUp(  binary16_MAX_VALUE), Binary16.MAX_VALUE);

        // Binary16.MAX_VALUE is an "odd" value since its LSB = 1 so
        // the half-way value greater than Binary16.MAX_VALUE should
        // round up to the next even value, in this case Binary16.POSITIVE_INFINITY.
        errors += compareAndReportError(Math.nextDown(binary16_MAX_VALUE_halfUlp), Binary16.MAX_VALUE);
        errors += compareAndReportError(              binary16_MAX_VALUE_halfUlp,  Binary16.POSITIVE_INFINITY);
        errors += compareAndReportError(Math.nextUp(  binary16_MAX_VALUE_halfUlp), Binary16.POSITIVE_INFINITY);

        return errors;
    }

    private static int compareAndReportError(float input,
                                             short expected) {
        // Round to nearest even is sign symmetric
        return compareAndReportError0( input,                 expected) +
               compareAndReportError0(-input, Binary16.negate(expected));
    }

    private static int compareAndReportError0(float input,
                                              short expected) {
        short actual = Float.floatToFloat16(input);
        if (!Binary16.equivalent(actual, expected)) {
            System.out.println("Unexpected result of converting " +
                               Float.toHexString(input) +
                               " to short. Expected 0x" + Integer.toHexString(0xFFFF & expected) +
                               " got 0x" + Integer.toHexString(0xFFFF & actual));
            return 1;
            }
        return 0;
    }

    private static int compareAndReportError0(short input,
                                              float expected) {
        float actual = Float.float16ToFloat(input);
        if (Float.compare(actual, expected) != 0) {
            System.out.println("Unexpected result of converting " +
                               Integer.toHexString(input & 0xFFFF) +
                               " to float. Expected " + Float.toHexString(expected) +
                               " got " + Float.toHexString(actual));
            return 1;
            }
        return 0;
    }

    private static int compareAndReportError(short input,
                                             float expected) {
        // Round to nearest even is sign symmetric
        return compareAndReportError0(                input,   expected) +
               compareAndReportError0(Binary16.negate(input), -expected);
    }

    private static int roundFloatToBinary16FullBinade() {
        int errors = 0;

        // For each float value between 1.0 and less than 2.0
        // (i.e. set of float values with an exponent of 0), convert
        // each value to binary16 and then convert that binary16 value
        // back to float.
        //
        // Any exponent could be used; the maximum exponent for normal
        // values would not exercise the full set of code paths since
        // there is an up-front check on values that would overflow,
        // which correspond to a ripple-carry of the significand that
        // bumps the exponent.
        short previous = (short)0;
        for (int i = Float.floatToIntBits(1.0f);
             i <= Float.floatToIntBits(Math.nextDown(2.0f));
             i++) {
            // (Could also express the loop control directly in terms
            // of floating-point operations, incrementing by ulp(1.0),
            // etc.)

            float f = Float.intBitsToFloat(i);
            short f_as_bin16 = Float.floatToFloat16(f);
            short f_as_bin16_down = (short)(f_as_bin16 - 1);
            short f_as_bin16_up   = (short)(f_as_bin16 + 1);

            // Across successive float values to convert to binary16,
            // the binary16 results should be semi-monotonic,
            // non-decreasing in this case.

            // Only positive binary16 values so can compare using integer operations
            if (f_as_bin16 < previous) {
                errors++;
                System.out.println("Semi-monotonicity violation observed on " +
                                   Integer.toHexString(0xfff & f_as_bin16));
            }
            previous = f_as_bin16;

            // If round-to-nearest was correctly done, when exactly
            // mapped back to float, f_as_bin16 should be at least as
            // close as either of its neighbors to the original value
            // of f.

            float f_prime_down = Float.float16ToFloat(f_as_bin16_down);
            float f_prime      = Float.float16ToFloat(f_as_bin16);
            float f_prime_up   = Float.float16ToFloat(f_as_bin16_up);

            float f_prime_diff = Math.abs(f - f_prime);
            if (f_prime_diff == 0.0) {
                continue;
            }
            float f_prime_down_diff = Math.abs(f - f_prime_down);
            float f_prime_up_diff   = Math.abs(f - f_prime_up);

            if (f_prime_diff > f_prime_down_diff ||
                f_prime_diff > f_prime_up_diff) {
                errors++;
                System.out.println("Round-to-nearest violation on converting " +
                                   Float.toHexString(f) + " to binary16 and back.");
            }
        }
        return errors;
    }

    private static int alternativeImplementation() {
        int errors = 0;

        // For exhaustive test of all float values use
        // for (long ell = Integer.MIN_VALUE; ell <= Integer.MAX_VALUE; ell++) {

        for (long ell   = Float.floatToIntBits(2.0f);
             ell       <= Float.floatToIntBits(4.0f);
             ell++) {
            float f = Float.intBitsToFloat((int)ell);
            short s1 = Float.floatToFloat16(f);
            short s2 =    altFloatToFloat16(f);

            if (s1 != s2) {
                errors++;
                System.out.println("Different conversion of float value " + Float.toHexString(f));
            }
        }

        return errors;
    }

    /*
     * Rely on float operations to do rounding in both normal and
     * subnormal binary16 cases.
     */
    public static short altFloatToFloat16(float f) {
        int doppel = Float.floatToRawIntBits(f);
        short sign_bit = (short)((doppel & 0x8000_0000) >> 16);

        if (Float.isNaN(f)) {
            // Preserve sign and attempt to preserve significand bits
            return (short)(sign_bit
                    | 0x7c00 // max exponent + 1
                    // Preserve high order bit of float NaN in the
                    // binary16 result NaN (tenth bit); OR in remaining
                    // bits into lower 9 bits of binary 16 significand.
                    | (doppel & 0x007f_e000) >> 13 // 10 bits
                    | (doppel & 0x0000_1ff0) >> 4  //  9 bits
                    | (doppel & 0x0000_000f));     //  4 bits
        }

        float abs_f = Math.abs(f);

        // The overflow threshold is binary16 MAX_VALUE + 1/2 ulp
        if (abs_f >= (65504.0f + 16.0f) ) {
            return (short)(sign_bit | 0x7c00); // Positive or negative infinity
        } else {
            // Smallest magnitude nonzero representable binary16 value
            // is equal to 0x1.0p-24; half-way and smaller rounds to zero.
            if (abs_f <= 0x1.0p-25f) { // Covers float zeros and subnormals.
                return sign_bit; // Positive or negative zero
            }

            // Dealing with finite values in exponent range of
            // binary16 (when rounding is done, could still round up)
            int exp = Math.getExponent(f);
            assert -25 <= exp && exp <= 15;
            short signif_bits;

            if (exp <= -15) { // scale down to float subnormal range to do rounding
                // Use a float multiply to compute the correct
                // trailing significand bits for a binary16 subnormal.
                //
                // The exponent range of normalized binary16 subnormal
                // values is [-24, -15]. The exponent range of float
                // subnormals is [-149, -140]. Multiply abs_f down by
                // 2^(-125) -- since (-125 = -149 - (-24)) -- so that
                // the trailing bits of a subnormal float represent
                // the correct trailing bits of a binary16 subnormal.
                exp = -15; // Subnormal encoding using -E_max.
                float f_adjust = abs_f * 0x1.0p-125f;

                // In case the significand rounds up and has a carry
                // propagate all the way up, take the bottom 11 bits
                // rather than bottom 10 bits. Adding this value,
                // rather than OR'ing htis value, will cause the right
                // exponent adjustment.
                signif_bits = (short)(Float.floatToRawIntBits(f_adjust) & 0x07ff);
                return (short)(sign_bit | ( ((exp + 15) << 10) + signif_bits ) );
            } else {
                // Scale down to subnormal range to round off excess bits
                int scalingExp = -139 - exp;
                float scaled = Math.scalb(Math.scalb(f, scalingExp),
                                                       -scalingExp);
                exp = Math.getExponent(scaled);
                doppel = Float.floatToRawIntBits(scaled);

                signif_bits = (short)((doppel & 0x007f_e000) >>
                                      (FloatConsts.SIGNIFICAND_WIDTH - 11));
                return (short)(sign_bit | ( ((exp + 15) << 10) | signif_bits ) );
            }
        }
    }

    public static class Binary16 {
        public static final short POSITIVE_INFINITY = (short)0x7c00;
        public static final short MAX_VALUE         = 0x7bff;
        public static final short ONE               = 0x3c00;
        public static final short MIN_NORMAL        = 0x0400;
        public static final short MAX_SUBNORMAL     = 0x03ff;
        public static final short MIN_VALUE         = 0x0001;
        public static final short POSITIVE_ZERO     = 0x0000;

        public static boolean isNaN(short binary16) {
            return ((binary16 & 0x7c00) == 0x7c00) // Max exponent and...
                && ((binary16 & 0x03ff) != 0 );    // significand nonzero.
        }

        public static short negate(short binary16) {
            return (short)(binary16 ^ 0x8000 ); // Flip only sign bit.
        }

        public static boolean equivalent(short bin16_1, short bin16_2) {
            return (bin16_1 == bin16_2) ||
                isNaN(bin16_1) && isNaN(bin16_2);
        }
    }
}