File: float16_t_aottest.cpp

package info (click to toggle)
halide 21.0.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 55,752 kB
  • sloc: cpp: 289,334; ansic: 22,751; python: 7,486; makefile: 4,299; sh: 2,508; java: 1,549; javascript: 282; pascal: 207; xml: 127; asm: 9
file content (126 lines) | stat: -rw-r--r-- 3,944 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#include "HalideRuntime.h"
#include <cmath>
#include <limits>
#include <stdio.h>
#include <stdlib.h>

void h_assert(bool condition, const char *msg) {
    if (!condition) {
        printf("FAIL: %s\n", msg);
        abort();
    }
}

float float_from_bits(uint32_t bits) {
    union {
        float asFloat;
        uint32_t asUInt;
    } out;
    out.asUInt = bits;
    return out.asFloat;
}

double double_from_bits(uint64_t bits) {
    union {
        double asDouble;
        uint64_t asUInt;
    } out;
    out.asUInt = bits;
    return out.asDouble;
}

int main() {
    uint16_t inputs[] = {
        0x0000,  // +ve zero
        0x8000,  // -ve zero
        0x7c00,  // +ve infinity
        0xfc00,  // -ve infinity
        0x7e00,  // quiet NaN
        0x7bff,  // Largest +ve normal number
        0xfbff,  // Smallest -ve normal number
        0x0001,  // Smallest +ve subnormal number
        0x8001,  // Largest -ve subnormal number
        0x0002,  // 2nd smallest +ve subnormal number
        0x8002,  // 2nd largest -ve subnormal number
        0x0003,  // 3rd smallest subnormal number
        0x03ff,  // Largest subnormal
        0x03fe,  // 2nd largest subnormal
        0x3c00,  // 1.0
        0xbc00   // -1.0
    };

    // Unfortunately we can't use the C99 hex float format
    // here because MSVC doesn't support them
    float expectedF[] = {
        0.0f,
        -0.0f,
        std::numeric_limits<float>::infinity(),
        -std::numeric_limits<float>::infinity(),
        std::numeric_limits<float>::quiet_NaN(),
        65504.0f,
        -65504.0f,
        (1.0f) / (1 << 24),
        (-1.0f) / (1 << 24),
        (1.0f) / (1 << 23),           // 0x1.000000p-23
        (-1.0f) / (1 << 23),          // -0x1.000000p-23
        (1.5f) / (1 << 23),           // 0x1.800000p-23
        float_from_bits(0x387fc000),  // 0x1.ff8000p-15,
        float_from_bits(0x387f8000),  // 0x1.ff0000p-15,
        1.0f,
        -1.0f};

    double expectedD[] = {
        0.0,
        -0.0,
        std::numeric_limits<double>::infinity(),
        -std::numeric_limits<double>::infinity(),
        std::numeric_limits<double>::quiet_NaN(),
        65504.0,
        -65504.0,
        (1.0) / (1 << 24),
        (-1.0) / (1 << 24),
        (1.0) / (1 << 23),                     // 0x1.000000000000p-23
        (-1.0) / (1 << 23),                    // -0x1.000000000000p-23
        (1.5) / (1 << 23),                     // 0x1.800000000000p-23
        double_from_bits(0x3f0ff80000000000),  // 0x1.ff8000000000p-15,
        double_from_bits(0x3f0ff00000000000),  // 0x1.ff0000000000p-15,
        1.0,
        -1.0};

    h_assert(sizeof(inputs) / sizeof(uint16_t) == sizeof(expectedF) / sizeof(float),
             "size of half array doesn't match float array");
    h_assert(sizeof(inputs) / sizeof(uint16_t) == sizeof(expectedD) / sizeof(double),
             "size of half array doesn't match double array");

    for (unsigned int index = 0; index < sizeof(inputs) / sizeof(uint16_t); ++index) {
        uint16_t in = inputs[index];
        union {
            float asFloat;
            uint32_t asUInt;
        } outF;
        outF.asFloat = halide_float16_bits_to_float(in);
        union {
            double asDouble;
            uint64_t asUInt;
        } outD;
        outD.asDouble = halide_float16_bits_to_double(in);

        union {
            float asFloat;
            uint32_t asUInt;
        } expectedFValue;
        expectedFValue.asFloat = expectedF[index];

        union {
            double asDouble;
            uint64_t asUInt;
        } expectedDValue;
        expectedDValue.asDouble = expectedD[index];

        // Compare bits because NaN in not comparable
        h_assert(outF.asUInt == expectedFValue.asUInt, "Failed to match on convert to float");
        h_assert(outD.asUInt == expectedDValue.asUInt, "Failed to match on convert to double");
    }
    printf("Success!\n");
    return 0;
}