File: gfx12_asm_vop3p_dpp8.s

package info (click to toggle)
llvm-toolchain-18 1%3A18.1.8-18
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 1,908,340 kB
  • sloc: cpp: 6,667,937; ansic: 1,440,452; asm: 883,619; python: 230,549; objc: 76,880; f90: 74,238; lisp: 35,989; pascal: 16,571; sh: 10,229; perl: 7,459; ml: 5,047; awk: 3,523; makefile: 2,987; javascript: 2,149; xml: 892; fortran: 649; cs: 573
file content (42 lines) | stat: -rw-r--r-- 2,853 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s

v_fma_mix_f32 v0, v1, v2, v3 dpp8:[2,2,2,2,4,4,4,4]
// GFX12: encoding: [0x00,0x00,0x20,0xcc,0xe9,0x04,0x0e,0x04,0x01,0x92,0x44,0x92]

v_fma_mix_f32 v0, v1, v2, v3 clamp dpp8:[2,2,2,2,4,4,4,4] fi:1
// GFX12: encoding: [0x00,0x80,0x20,0xcc,0xea,0x04,0x0e,0x04,0x01,0x92,0x44,0x92]

v_fma_mixlo_f16 v0, abs(v1), -v2, abs(v3) dpp8:[2,2,2,2,4,4,4,4]
// GFX12: encoding: [0x00,0x05,0x21,0xcc,0xe9,0x04,0x0e,0x44,0x01,0x92,0x44,0x92]

// For test purpose only. OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to all 1
v_fma_mixlo_f16 v0, abs(v1), -v2, abs(v3) op_sel:[1,0,0] op_sel_hi:[1,0,0] dpp8:[2,2,2,2,4,4,4,4]
// GFX12: encoding: [0x00,0x0d,0x21,0xcc,0xe9,0x04,0x0e,0x4c,0x01,0x92,0x44,0x92]

v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[0,1,1] neg_hi:[1,0,1] dpp8:[7,6,5,4,3,2,1,0]
// GFX12: encoding: [0x00,0x05,0x13,0xcc,0xe9,0x04,0x0e,0xc4,0x01,0x77,0x39,0x05]

v_dot4_f32_fp8_bf8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7]
// GFX12: v_dot4_f32_fp8_bf8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x40,0x24,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa]

v_dot4_f32_fp8_bf8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1
// GFX12: v_dot4_f32_fp8_bf8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x40,0x24,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa]

v_dot4_f32_bf8_fp8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7]
// GFX12: v_dot4_f32_bf8_fp8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x40,0x25,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa]

v_dot4_f32_bf8_fp8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1
// GFX12: v_dot4_f32_bf8_fp8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x40,0x25,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa]

v_dot4_f32_fp8_fp8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7]
// GFX12: v_dot4_f32_fp8_fp8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x40,0x26,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa]

v_dot4_f32_fp8_fp8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1
// GFX12: v_dot4_f32_fp8_fp8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x40,0x26,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa]

v_dot4_f32_bf8_bf8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7]
// GFX12: v_dot4_f32_bf8_bf8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x40,0x27,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa]

v_dot4_f32_bf8_bf8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1
// GFX12: v_dot4_f32_bf8_bf8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x40,0x27,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa]