1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
#include "simd_op_check.h"
#include "Halide.h"
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
using namespace Halide;
using namespace Halide::ConciseCasts;
namespace {
class SimdOpCheckPowerPC : public SimdOpCheckTest {
public:
SimdOpCheckPowerPC(Target t, int w = 768, int h = 128)
: SimdOpCheckTest(t, w, h) {
use_vsx = target.has_feature(Target::VSX);
use_power_arch_2_07 = target.has_feature(Target::POWER_ARCH_2_07);
}
void add_tests() override {
if (target.arch == Target::POWERPC) {
check_altivec_all();
}
}
void check_altivec_all() {
Expr f32_1 = in_f32(x), f32_2 = in_f32(x + 16), f32_3 = in_f32(x + 32);
Expr f64_1 = in_f64(x), f64_2 = in_f64(x + 16), f64_3 = in_f64(x + 32);
Expr i8_1 = in_i8(x), i8_2 = in_i8(x + 16), i8_3 = in_i8(x + 32);
Expr u8_1 = in_u8(x), u8_2 = in_u8(x + 16), u8_3 = in_u8(x + 32);
Expr i16_1 = in_i16(x), i16_2 = in_i16(x + 16), i16_3 = in_i16(x + 32);
Expr u16_1 = in_u16(x), u16_2 = in_u16(x + 16), u16_3 = in_u16(x + 32);
Expr i32_1 = in_i32(x), i32_2 = in_i32(x + 16), i32_3 = in_i32(x + 32);
Expr u32_1 = in_u32(x), u32_2 = in_u32(x + 16), u32_3 = in_u32(x + 32);
Expr i64_1 = in_i64(x), i64_2 = in_i64(x + 16), i64_3 = in_i64(x + 32);
Expr u64_1 = in_u64(x), u64_2 = in_u64(x + 16), u64_3 = in_u64(x + 32);
// Expr bool_1 = (f32_1 > 0.3f), bool_2 = (f32_2 < -0.3f), bool_3 = (f32_3 != -0.34f);
// Basic AltiVec SIMD instructions.
for (int w = 1; w <= 4; w++) {
// Vector Integer Add Instructions.
check("vaddsbs", 16 * w, i8_sat(i16(i8_1) + i16(i8_2)));
check("vaddshs", 8 * w, i16_sat(i32(i16_1) + i32(i16_2)));
check("vaddsws", 4 * w, i32_sat(i64(i32_1) + i64(i32_2)));
check("vaddubm", 16 * w, i8_1 + i8_2);
check("vadduhm", 8 * w, i16_1 + i16_2);
check("vadduwm", 4 * w, i32_1 + i32_2);
check("vaddubs", 16 * w, u8(min(u16(u8_1) + u16(u8_2), max_u8)));
check("vadduhs", 8 * w, u16(min(u32(u16_1) + u32(u16_2), max_u16)));
check("vadduws", 4 * w, u32(min(u64(u32_1) + u64(u32_2), max_u32)));
// Vector Integer Subtract Instructions.
check("vsubsbs", 16 * w, i8_sat(i16(i8_1) - i16(i8_2)));
check("vsubshs", 8 * w, i16_sat(i32(i16_1) - i32(i16_2)));
check("vsubsws", 4 * w, i32_sat(i64(i32_1) - i64(i32_2)));
check("vsububm", 16 * w, i8_1 - i8_2);
check("vsubuhm", 8 * w, i16_1 - i16_2);
check("vsubuwm", 4 * w, i32_1 - i32_2);
check("vsububs", 16 * w, u8(max(i16(u8_1) - i16(u8_2), 0)));
check("vsubuhs", 8 * w, u16(max(i32(u16_1) - i32(u16_2), 0)));
check("vsubuws", 4 * w, u32(max(i64(u32_1) - i64(u32_2), 0)));
// Vector Integer Average Instructions.
check("vavgsb", 16 * w, i8((i16(i8_1) + i16(i8_2) + 1) / 2));
check("vavgub", 16 * w, u8((u16(u8_1) + u16(u8_2) + 1) / 2));
check("vavgsh", 8 * w, i16((i32(i16_1) + i32(i16_2) + 1) / 2));
check("vavguh", 8 * w, u16((u32(u16_1) + u32(u16_2) + 1) / 2));
check("vavgsw", 4 * w, i32((i64(i32_1) + i64(i32_2) + 1) / 2));
check("vavguw", 4 * w, u32((u64(u32_1) + u64(u32_2) + 1) / 2));
// Vector Integer Maximum and Minimum Instructions
check("vmaxsb", 16 * w, max(i8_1, i8_2));
check("vmaxub", 16 * w, max(u8_1, u8_2));
check("vmaxsh", 8 * w, max(i16_1, i16_2));
check("vmaxuh", 8 * w, max(u16_1, u16_2));
check("vmaxsw", 4 * w, max(i32_1, i32_2));
check("vmaxuw", 4 * w, max(u32_1, u32_2));
check("vminsb", 16 * w, min(i8_1, i8_2));
check("vminub", 16 * w, min(u8_1, u8_2));
check("vminsh", 8 * w, min(i16_1, i16_2));
check("vminuh", 8 * w, min(u16_1, u16_2));
check("vminsw", 4 * w, min(i32_1, i32_2));
check("vminuw", 4 * w, min(u32_1, u32_2));
// Vector Floating-Point Arithmetic Instructions
check(use_vsx || use_power_arch_2_07 ? "xvaddsp" : "vaddfp", 4 * w, f32_1 + f32_2);
check(use_vsx || use_power_arch_2_07 ? "xvsubsp" : "vsubfp", 4 * w, f32_1 - f32_2);
check(use_vsx || use_power_arch_2_07 ? "xvmaddasp" : "vmaddfp", 4 * w, f32_1 * f32_2 + f32_3);
// check("vnmsubfp", 4, f32_1 - f32_2 * f32_3);
// Vector Floating-Point Maximum and Minimum Instructions
check("vmaxfp", 4 * w, max(f32_1, f32_2));
check("vminfp", 4 * w, min(f32_1, f32_2));
}
// Check these if target supports VSX.
if (use_vsx) {
for (int w = 1; w <= 4; w++) {
// VSX Vector Floating-Point Arithmetic Instructions
check("xvadddp", 2 * w, f64_1 + f64_2);
check("xvmuldp", 2 * w, f64_1 * f64_2);
check("xvsubdp", 2 * w, f64_1 - f64_2);
check("xvaddsp", 4 * w, f32_1 + f32_2);
check("xvmulsp", 4 * w, f32_1 * f32_2);
check("xvsubsp", 4 * w, f32_1 - f32_2);
check("xvmaxdp", 2 * w, max(f64_1, f64_2));
check("xvmindp", 2 * w, min(f64_1, f64_2));
}
}
// Check these if target supports POWER ISA 2.07 and above.
// These also include new instructions in POWER ISA 2.06.
if (use_power_arch_2_07) {
for (int w = 1; w <= 4; w++) {
check("vaddudm", 2 * w, i64_1 + i64_2);
check("vsubudm", 2 * w, i64_1 - i64_2);
check("vmaxsd", 2 * w, max(i64_1, i64_2));
check("vmaxud", 2 * w, max(u64_1, u64_2));
check("vminsd", 2 * w, min(i64_1, i64_2));
check("vminud", 2 * w, min(u64_1, u64_2));
}
}
}
private:
bool use_power_arch_2_07{false};
bool use_vsx{false};
const Var x{"x"}, y{"y"};
};
} // namespace
int main(int argc, char **argv) {
return SimdOpCheckTest::main<SimdOpCheckPowerPC>(
argc, argv,
{
// IMPORTANT:
// When adding new targets here, make sure to also update
// can_run_code in simd_op_check.h to include any new features used.
Target("powerpc-32-linux"),
Target("powerpc-32-linux-vsx"),
Target("powerpc-32-linux-power_arch_2_07"),
Target("powerpc-32-linux-power_arch_2_07-vsx"),
Target("powerpc-64-linux"),
Target("powerpc-64-linux-vsx"),
Target("powerpc-64-linux-power_arch_2_07"),
Target("powerpc-64-linux-power_arch_2_07-vsx"),
});
}
|