File: gfx11_asm_vopd_features.s

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,998,520 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (180 lines) | stat: -rw-r--r-- 8,340 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck -check-prefix=GFX11 %s

//===----------------------------------------------------------------------===//
// A VOPD instruction can use one or more literals,
// provided that they are identical.
//===----------------------------------------------------------------------===//

// LITERAL

v_dual_mul_f32      v11, v1, v2                  ::  v_dual_mul_f32      v10, 0x24681357, v5
// GFX11: encoding: [0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]

// LITERAL*2

v_dual_mul_f32      v11, 0x24681357, v2          ::  v_dual_mul_f32      v10, 0x24681357, v5
// GFX11: encoding: [0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]

// LITERAL*2 (this is an unclear case because literals have different size, but SP3 accepts this code)

v_dual_add_f32      v6, 0xfe0b, v5               ::  v_dual_dot2acc_f32_f16  v255, 0xfe0b, v4
// GFX11: encoding: [0xff,0x0a,0x18,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00]

// LITERAL + KIMM

v_dual_add_f32      v5, 0xaf123456, v2           ::  v_dual_fmaak_f32     v6, v3, v1, 0xaf123456 ;
// GFX11: encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]

// KIMM + LITERAL

v_dual_fmamk_f32    v122, v74, 0xa0172923, v161  ::  v_dual_lshlrev_b32   v247, 0xa0172923, v99
// GFX11: encoding: [0x4a,0x43,0xa3,0xc8,0xff,0xc6,0xf6,0x7a,0x23,0x29,0x17,0xa0]

// KIMM + LITERAL (this is an unclear case because literals have different size, but SP3 accepts this code)

v_dual_fmamk_f32    v122, v74, 0xfe0b, v162      ::  v_dual_dot2acc_f32_f16  v247, 0xfe0b, v99
// GFX11: encoding: [0x4a,0x45,0x99,0xc8,0xff,0xc6,0xf6,0x7a,0x0b,0xfe,0x00,0x00]

// KIMM*2

v_dual_fmamk_f32    v122, 0xdeadbeef, 0xdeadbeef, v161 ::  v_dual_fmamk_f32  v123, 0xdeadbeef, 0xdeadbeef, v162
// GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde]

//===----------------------------------------------------------------------===//
// A VOPD instruction can use 2 scalar operands,
// but implicit VCC must be counted in.
//===----------------------------------------------------------------------===//

// 2 different SGPRs

v_dual_mul_f32      v0, s1, v2                   ::  v_dual_mul_f32       v3, s4, v5
// GFX11: encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00]

// SGPR + LITERAL

v_dual_fmaak_f32    v122, s74, v161, 2.741       ::  v_dual_and_b32       v247, v160, v98
// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]

v_dual_mov_b32      v247, v160                   ::  v_dual_fmaak_f32     v122, s74, v161, 2.741
// GFX11: encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40]

// SGPR*2 + LITERAL

v_dual_fmaak_f32    v122, s74, v161, 2.741       ::  v_dual_and_b32       v247, s74, v98
// GFX11: encoding: [0x4a,0x42,0x65,0xc8,0x4a,0xc4,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]

// SGPR + LITERAL*2

v_dual_fmaak_f32    v122, s74, v161, 2.741       ::  v_dual_fmamk_f32     v3, v6, 2.741, v1
// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40]

// SGPR*2 + LITERAL*2

v_dual_fmaak_f32    v122, s74, v161, 2.741       ::  v_dual_fmamk_f32     v3, s74, 2.741, v1
// GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x4a,0x02,0x02,0x7a,0x8b,0x6c,0x2f,0x40]

// LITERAL + VCC

v_dual_fmaak_f32    v122, v0, v161, 2.741       ::  v_dual_cndmask_b32   v1, v2, v3
// GFX11: encoding: [0x00,0x43,0x53,0xc8,0x02,0x07,0x00,0x7a,0x8b,0x6c,0x2f,0x40]

// LITERAL*2 + VCC

v_dual_fmaak_f32    v122, v0, v161, 2.741       ::  v_dual_cndmask_b32   v1, 2.741, v3
// GFX11: encoding: [0x00,0x43,0x53,0xc8,0xff,0x06,0x00,0x7a,0x8b,0x6c,0x2f,0x40]

// LITERAL*2 + VCC*2

v_dual_cndmask_b32  v255, 0xbabe, v2             ::  v_dual_cndmask_b32   v6, 0xbabe, v3
// GFX11: encoding: [0xff,0x04,0x52,0xca,0xff,0x06,0x06,0xff,0xbe,0xba,0x00,0x00]

// SGPR*2 + VCC

v_dual_add_f32      v255, s105, v2               ::  v_dual_cndmask_b32   v6, s105, v3
// GFX11: encoding: [0x69,0x04,0x12,0xc9,0x69,0x06,0x06,0xff]

// SGPR*2 + VCC*2

v_dual_cndmask_b32  v255, s1, v2                 ::  v_dual_cndmask_b32   v6, s1, v3
// GFX11: encoding: [0x01,0x04,0x52,0xca,0x01,0x06,0x06,0xff]

// VCC*2

v_dual_add_f32      v255, vcc_lo, v2             ::  v_dual_cndmask_b32   v6, v1, v3
// GFX11: encoding: [0x6a,0x04,0x12,0xc9,0x01,0x07,0x06,0xff]

//===----------------------------------------------------------------------===//
// One dst register must be even and the other odd.
//===----------------------------------------------------------------------===//

v_dual_mul_f32      v0, v10, v20                 ::  v_dual_mul_f32      v1, v11, v21
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2b,0x00,0x00]

v_dual_mul_f32      v1, v10, v20                 ::  v_dual_mul_f32      v0, v11, v21
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2b,0x00,0x01]

//===----------------------------------------------------------------------===//
// srcX0 and srcY0 must use different VGPR banks.
//===----------------------------------------------------------------------===//

v_dual_mul_f32      v0, v10, v20                 ::  v_dual_mul_f32      v1, v11, v21
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2b,0x00,0x00]

v_dual_mul_f32      v0, v10, v20                 ::  v_dual_mul_f32      v1, v12, v21
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0c,0x2b,0x00,0x00]

v_dual_mul_f32      v0, v10, v20                 ::  v_dual_mul_f32      v1, v13, v21
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0d,0x2b,0x00,0x00]

v_dual_mul_f32      v0, v10, v20                 ::  v_dual_mul_f32      v1, v15, v21
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0f,0x2b,0x00,0x00]

v_dual_mul_f32      v0, v10, v20                 ::  v_dual_mul_f32      v1, v16, v21
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x10,0x2b,0x00,0x00]

v_dual_mul_f32      v0, v10, v20                 ::  v_dual_mul_f32      v1, v17, v21
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x11,0x2b,0x00,0x00]

//===----------------------------------------------------------------------===//
// srcX1 and srcY1 must use different VGPR banks.
//===----------------------------------------------------------------------===//

v_dual_mul_f32      v0, v10, v20                 ::  v_dual_mul_f32      v1, v11, v21
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2b,0x00,0x00]

v_dual_mul_f32      v0, v10, v20                 ::  v_dual_mul_f32      v1, v11, v22
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2d,0x00,0x00]

v_dual_mul_f32      v0, v10, v20                 ::  v_dual_mul_f32      v1, v11, v23
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2f,0x00,0x00]

v_dual_mul_f32      v0, v10, v20                 ::  v_dual_mul_f32      v1, v11, v25
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x33,0x00,0x00]

v_dual_mul_f32      v0, v10, v20                 ::  v_dual_mul_f32      v1, v11, v26
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x35,0x00,0x00]

v_dual_mul_f32      v0, v10, v20                 ::  v_dual_mul_f32      v1, v11, v27
// GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x37,0x00,0x00]

//===----------------------------------------------------------------------===//
// srcX2 and srcY2 must use different VGPR banks.
//===----------------------------------------------------------------------===//

v_dual_fmamk_f32    v6, v1, 0xaf123456, v0       :: v_dual_fmamk_f32      v5, v2, 0xaf123456, v1
// GFX11: encoding: [0x01,0x01,0x84,0xc8,0x02,0x03,0x04,0x06,0x56,0x34,0x12,0xaf]

v_dual_fmamk_f32    v6, v1, 0xaf123456, v1       :: v_dual_fmamk_f32      v5, v2, 0xaf123456, v0
// GFX11: encoding: [0x01,0x03,0x84,0xc8,0x02,0x01,0x04,0x06,0x56,0x34,0x12,0xaf]

v_dual_fmac_f32     v6, v1, v2                   :: v_dual_fmamk_f32      v7, v2, 0xaf123456, v7
// GFX11: encoding: [0x01,0x05,0x04,0xc8,0x02,0x0f,0x06,0x06,0x56,0x34,0x12,0xaf]

v_dual_fmac_f32     v7, v1, v2                   :: v_dual_fmamk_f32      v6, v2, 0xaf123456, v6
// GFX11: encoding: [0x01,0x05,0x04,0xc8,0x02,0x0d,0x06,0x07,0x56,0x34,0x12,0xaf]

v_dual_fmamk_f32    v5, v1, 0xaf123456, v5       :: v_dual_fmac_f32       v6, v2, v3
// GFX11: encoding: [0x01,0x0b,0x80,0xc8,0x02,0x07,0x06,0x05,0x56,0x34,0x12,0xaf]

v_dual_fmamk_f32    v6, v1, 0xaf123456, v6       :: v_dual_fmac_f32       v5, v2, v3
// GFX11: encoding: [0x01,0x0d,0x80,0xc8,0x02,0x07,0x04,0x06,0x56,0x34,0x12,0xaf]