1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
/* This file is part of the dynarmic project.
* Copyright (c) 2021 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <catch2/benchmark/catch_benchmark.hpp>
#include <catch2/catch_test_macros.hpp>
#include <fmt/printf.h>
#include <mcl/stdint.hpp>
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
extern "C" u32 rsqrt_inaccurate(u32);
extern "C" u32 rsqrt_full(u32);
extern "C" u32 rsqrt_full_gpr(u32);
extern "C" u32 rsqrt_full_nb(u32);
extern "C" u32 rsqrt_full_nb2(u32);
extern "C" u32 rsqrt_full_nb_gpr(u32);
extern "C" u32 rsqrt_newton(u32);
extern "C" u32 rsqrt_hack(u32);
using namespace Dynarmic;
extern "C" u32 rsqrt_fallback(u32 value) {
FP::FPCR fpcr;
FP::FPSR fpsr;
return FP::FPRSqrtEstimate(value, fpcr, fpsr);
}
extern "C" u32 _rsqrt_fallback(u32 value) {
return rsqrt_fallback(value);
}
void Test(u32 value) {
FP::FPCR fpcr;
FP::FPSR fpsr;
const u32 expect = FP::FPRSqrtEstimate(value, fpcr, fpsr);
const u32 full = rsqrt_full(value);
const u32 full_gpr = rsqrt_full_gpr(value);
const u32 newton = rsqrt_newton(value);
const u32 hack = rsqrt_hack(value);
if (expect != full || expect != full_gpr || expect != newton || expect != hack) {
fmt::print("{:08x} = {:08x} : {:08x} : {:08x} : {:08x} : {:08x}\n", value, expect, full, full_gpr, newton, hack);
REQUIRE(expect == full);
REQUIRE(expect == full_gpr);
REQUIRE(expect == newton);
REQUIRE(expect == hack);
}
}
TEST_CASE("RSqrt Tests", "[fp][.]") {
Test(0x00000000);
Test(0x80000000);
Test(0x7f8b7201);
Test(0x7f800000);
Test(0x7fc00000);
Test(0xff800000);
Test(0xffc00000);
Test(0xff800001);
for (u64 i = 0; i < 0x1'0000'0000; i++) {
const u32 value = static_cast<u32>(i);
Test(value);
}
}
TEST_CASE("Benchmark RSqrt", "[fp][.]") {
BENCHMARK("Inaccurate") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_inaccurate(value);
}
return total;
};
BENCHMARK("Full divss") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_full(value);
}
return total;
};
BENCHMARK("Full divss (GPR)") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_full_gpr(value);
}
return total;
};
BENCHMARK("Full divss (NB)") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_full_nb(value);
}
return total;
};
BENCHMARK("Full divss (NB2)") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_full_nb2(value);
}
return total;
};
BENCHMARK("Full divss (NB + GPR)") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_full_nb_gpr(value);
}
return total;
};
BENCHMARK("One Newton iteration") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_newton(value);
}
return total;
};
BENCHMARK("Ugly Hack") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_hack(value);
}
return total;
};
BENCHMARK("Softfloat") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_fallback(value);
}
return total;
};
}
|