File: gfx11_asm_vopc_dpp.s

package info (click to toggle)
llvm-toolchain-15 1%3A15.0.6-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,554,644 kB
  • sloc: cpp: 5,922,452; ansic: 1,012,136; asm: 674,362; python: 191,568; objc: 73,855; f90: 42,327; lisp: 31,913; pascal: 11,973; javascript: 10,144; sh: 9,421; perl: 7,447; ml: 5,527; awk: 3,523; makefile: 2,520; xml: 885; cs: 573; fortran: 567
file content (233 lines) | stat: -rw-r--r-- 8,650 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s

// =========================================================
// dpp8
// =========================================================

// 32 bit

v_cmp_le_u16_dpp v1, v2 dpp8:[7,7,7,3,4,4,6,7] fi:1
// GFX11: encoding: [0xea,0x04,0x76,0x7c,0x01,0xff,0x47,0xfa]

v_cmp_le_i16_dpp v1, v2 dpp8:[7,7,7,3,4,4,6,7]
// GFX11: encoding: [0xe9,0x04,0x66,0x7c,0x01,0xff,0x47,0xfa]

// w32

v_cmp_le_i32_dpp vcc_lo, v1, v255 dpp8:[0,2,1,3,4,5,6,7]
// W32: encoding: [0xe9,0xfe,0x87,0x7c,0x01,0x50,0xc6,0xfa]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

v_cmp_tru_f32_dpp vcc_lo, v1, v2 dpp8:[0,2,1,3,4,5,6,7]
// W32: encoding: [0xe9,0x04,0x3e,0x7c,0x01,0x50,0xc6,0xfa]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// check vcc/vcc_lo have been added
v_cmp_lt_f32_dpp v1, v2 dpp8:[2,3,4,1,3,3,3,3]
// W32: v_cmp_lt_f32 vcc_lo, v1, v2 dpp8:[2,3,4,1,3,3,3,3] ; encoding: [0xe9,0x04,0x22,0x7c,0x01,0x1a,0xb3,0x6d]
// W64: v_cmp_lt_f32 vcc, v1, v2 dpp8:[2,3,4,1,3,3,3,3] ; encoding: [0xe9,0x04,0x22,0x7c,0x01,0x1a,0xb3,0x6d]

// w64

v_cmp_lt_u16_dpp vcc, v1, v2 dpp8:[7,6,5,3,4,2,1,0] fi:1
// W64: encoding: [0xea,0x04,0x72,0x7c,0x01,0x77,0x47,0x05]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// cmp class

v_cmp_class_f16_dpp vcc, v1, v2 dpp8:[7,6,5,3,4,2,1,0] fi:1
// W64: encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x47,0x05]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// cmpx

v_cmpx_f_i32_dpp v0, v2 dpp8:[7,6,5,3,4,2,1,0] fi:1
// GFX11: encoding: [0xea,0x04,0x80,0x7d,0x00,0x77,0x47,0x05]

v_cmpx_t_f32_dpp v255, v2 dpp8:[7,6,5,3,4,2,1,0]
// GFX11: encoding: [0xe9,0x04,0x3e,0x7d,0xff,0x77,0x47,0x05]

// cmpx class

v_cmpx_class_f16_dpp v12, v101 dpp8:[7,6,5,3,4,2,1,0]
// GFX11: encoding: [0xe9,0xca,0xfa,0x7d,0x0c,0x77,0x47,0x05]

// =========================================================
// 64 bit

v_cmp_lt_i16_e64_dpp s[50:51], v1, v22 dpp8:[7,6,5,3,4,2,1,0]
// W64: encoding: [0x32,0x00,0x31,0xd4,0xe9,0x2c,0x02,0x00,0x01,0x77,0x47,0x05]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

v_cmp_lt_i16_e64_dpp s10, v10, v2 dpp8:[7,6,5,3,4,2,1,0]
// W32: encoding: [0x0a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x0a,0x77,0x47,0x05]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// w32

v_cmp_gt_i32_e64_dpp s10, v1, v50 dpp8:[0,1,2,3,4,5,6,7] fi:1
// W32: encoding: [0x0a,0x00,0x44,0xd4,0xea,0x64,0x02,0x00,0x01,0x88,0xc6,0xfa]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

v_cmp_ngt_f32_e64_dpp s10, -v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1
// W32: encoding: [0x0a,0x00,0x1b,0xd4,0xea,0x04,0x02,0x20,0x01,0x88,0xc6,0xfa]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// w64

v_cmp_f_f32_e64_dpp s[10:11], v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1
// W64: encoding: [0x0a,0x00,0x10,0xd4,0xea,0x04,0x02,0x00,0x01,0x88,0xc6,0xfa]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

v_cmp_nle_f32_e64_dpp s[10:11], v2, v201 dpp8:[0,1,6,3,4,5,6,7]
// W64: encoding: [0x0a,0x00,0x1c,0xd4,0xe9,0x92,0x03,0x00,0x02,0x88,0xc7,0xfa]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

v_cmp_lt_f16_e64_dpp s[10:11], v240, -v2 dpp8:[0,1,6,3,4,5,6,7]
// W64: encoding: [0x0a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x40,0xf0,0x88,0xc7,0xfa]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// cmp class

// w32

v_cmp_class_f32_e64_dpp s10, v10, v2 dpp8:[0,1,6,3,4,5,6,7]
// W32: encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x0a,0x88,0xc7,0xfa]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// w64

v_cmp_class_f16_e64_dpp vcc, v1, v2 dpp8:[0,1,6,3,4,5,6,7]
// W64: encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x88,0xc7,0xfa]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// cmpx

v_cmpx_eq_i16_e64_dpp v5, v25 dpp8:[0,1,6,3,4,5,6,7] fi:1
// GFX11: encoding: [0x7e,0x00,0xb2,0xd4,0xea,0x32,0x02,0x00,0x05,0x88,0xc7,0xfa]

v_cmpx_ge_i32_e64_dpp v0, v3 dpp8:[0,1,6,3,4,5,6,7] fi:1
// GFX11: encoding: [0x7e,0x00,0xc6,0xd4,0xea,0x06,0x02,0x00,0x00,0x88,0xc7,0xfa]

// cmpx class

v_cmpx_class_f32_e64_dpp v1, v20 dpp8:[0,1,6,3,4,5,6,7] fi:1
// GFX11: encoding: [0x7e,0x00,0xfe,0xd4,0xea,0x28,0x02,0x00,0x01,0x88,0xc7,0xfa]

// ================================================================
// dpp
// ================================================================

// 32 bit

v_cmp_gt_u16_dpp v1, v2 row_shl:0x7 row_mask:0x0 bank_mask:0x0 fi:1
// GFX11: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x07,0x05,0x00]

v_cmp_gt_i16_dpp v1, v2 quad_perm:[1,3,1,0] row_mask:0x7
// GFX11: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1d,0x00,0x7f]

v_cmp_lt_f32 v1, -v2 quad_perm:[0,1,2,2]
// GFX11: encoding: [0xfa,0x04,0x22,0x7c,0x01,0xa4,0x40,0xff]

// w32

v_cmp_gt_i32_dpp vcc_lo, v1, v255 row_mirror bank_mask:0x2 fi:1
// W32: encoding: [0xfa,0xfe,0x89,0x7c,0x01,0x40,0x05,0xf2]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

v_cmp_f_f32_dpp vcc_lo, v1, v2 row_shl:0x7 row_mask:0x0 bank_mask:0x0
// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x07,0x01,0x00]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// w64

v_cmp_ge_u16_dpp vcc, v1, v2 row_share:0xa bound_ctrl:0
// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x5a,0x09,0xff]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// cmp class

v_cmp_class_f16_dpp vcc, v1, v2 row_half_mirror bound_ctrl:0
// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x09,0xff]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// cmpx

v_cmpx_t_i32_dpp v0, v2 row_shr:0xe row_mask:0x3 bank_mask:0xa bound_ctrl:0
// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x00,0x1e,0x09,0x3a]

v_cmpx_f_f32_dpp v255, v2 quad_perm:[2,3,0,0]
// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0xff,0x0e,0x00,0xff]

// cmpx class

v_cmpx_class_f16_dpp v12, v101 quad_perm:[2,3,0,0] fi:1
// GFX11: encoding: [0xfa,0xca,0xfa,0x7d,0x0c,0x0e,0x04,0xff]

v_cmpx_class_f16_dpp abs(v12), v101 quad_perm:[2,3,0,0]
// GFX11: encoding: [0xfa,0xca,0xfa,0x7d,0x0c,0x0e,0x20,0xff]

// =========================================================
// 64 bit

v_cmp_ge_i16_e64_dpp s[50:51], v1, v22 quad_perm:[2,2,3,1]
// W64: encoding: [0x32,0x00,0x36,0xd4,0xfa,0x2c,0x02,0x00,0x01,0x7a,0x00,0xff]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

v_cmp_ge_i16_e64_dpp s10, v10, v2 quad_perm:[0,2,3,1] row_mask:0x0
// W32: encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x0a,0x78,0x00,0x0f]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// w32

v_cmp_le_i32_e64_dpp s10, v1, v50 quad_perm:[0,2,3,1] row_mask:0x0
// W32: encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x64,0x02,0x00,0x01,0x78,0x00,0x0f]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

v_cmp_nle_f32_e64_dpp s10, -v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0
// W32: encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x20,0x01,0x1b,0x00,0x00]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// w64

v_cmp_t_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] bank_mask:0xe
// W64: encoding: [0x0a,0x00,0x1f,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xfe]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

v_cmp_ngt_f32_e64_dpp s[10:11], v2, v201 row_ror:7 bank_mask:0x1 bound_ctrl:0
// W64: encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x92,0x03,0x00,0x02,0x27,0x09,0xf1]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

v_cmp_lt_f16_e64_dpp s[10:11], v240, -v2 row_xmask:0x6 row_mask:0x0 fi:1
// W64: encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x40,0xf0,0x66,0x05,0x0f]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// cmp class

// w32

v_cmp_class_f32_e64_dpp s10, v10, v2 quad_perm:[2,2,3,1] bound_ctrl:0 fi:1
// W32: encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x0a,0x7a,0x0c,0xff]
// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// w64

v_cmp_class_f16_e64_dpp vcc, v1, v2 quad_perm:[1,2,3,1] bank_mask: 0x5 fi:1
// W64: encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x79,0x04,0xf5]
// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error

// cmpx

v_cmpx_eq_i16_e64_dpp v5, v25 quad_perm:[2,2,3,1] bound_ctrl:0 fi:1
// GFX11: encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x32,0x02,0x00,0x05,0x7a,0x0c,0xff]

v_cmpx_lt_i32_e64_dpp v0, v3 quad_perm:[3,2,1,0] bank_mask:0xe
// GFX11: encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x06,0x02,0x00,0x00,0x1b,0x00,0xfe]

// cmpx class

v_cmpx_class_f32_e64_dpp v1, v20 row_ror:7 bank_mask:0x1 bound_ctrl:0
// GFX11: encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x28,0x02,0x00,0x01,0x27,0x09,0xf1]