File: 1499.ispc

package info (click to toggle)
ispc 1.28.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 97,620 kB
  • sloc: cpp: 77,067; python: 8,303; yacc: 3,337; lex: 1,126; ansic: 631; sh: 475; makefile: 17
file content (26 lines) | stat: -rw-r--r-- 985 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
// This test checks that no redundant vmovaps instructions are generated for the code below.
// The assembler used to be the following:
//     vmovaps    LCPI0_0(%rip), %ymm0    ## ymm0 = [4294967295,4294967295,4294967295,4294967295,0,0,0,0]
//     vmaskmovps    (%rdi), %ymm0, %ymm1
//     vmaskmovps    (%rsi), %ymm0, %ymm2
//     vmovaps    %xmm1, %xmm1   // <-- redundant, as upper part of ymm1 is already zeros.
//     vmovaps    %xmm2, %xmm2   // <-- redundant, as upper part of ymm1 is already zeros.
//     vmulps    %ymm2, %ymm1, %ymm1
//     vmaskmovps    %ymm1, %ymm0, -16(%rsp)

// RUN: %{ispc} %s --target=avx2-i32x8 --emit-asm -o - | FileCheck %s

// REQUIRES: X86_ENABLED

struct FVector4 {
    float V[4];
};

unmasked uniform FVector4 Mul1(const uniform FVector4 &A, const uniform FVector4 &B) {
    uniform FVector4 Result;
// CHECK-NOT: vmovaps [[REG:%xmm[0-9]+]], [[REG]]
    foreach(i = 0 ... 4) {
        Result.V[i] = A.V[i] * B.V[i];
    }
    return Result;
}