File: gfx11_dasm_wmma.txt

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (157 lines) | stat: -rw-r--r-- 11,966 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=W32 %s
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=W64 %s


# Test v_wmma_f32_16x16x16_f16

# W32: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23]                               ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c]
# W64: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19]                               ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c]
0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c

# W32: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], 1.0                                    ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0xca,0x1b]
# W64: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], 1.0                                    ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0xca,0x1b]
0x10,0x40,0x40,0xcc,0x00,0x11,0xca,0x1b

# W32: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x41,0x40,0xcc,0x00,0x11,0x42,0x3c]
# W64: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x41,0x40,0xcc,0x00,0x11,0x42,0x3c]
0x10,0x41,0x40,0xcc,0x00,0x11,0x42,0x3c

# W32: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x42,0x40,0xcc,0x00,0x11,0x42,0x5c]
# W64: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x42,0x40,0xcc,0x00,0x11,0x42,0x5c]
0x10,0x42,0x40,0xcc,0x00,0x11,0x42,0x5c

# W32: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,1,0] neg_hi:[1,1,0] ; encoding: [0x10,0x43,0x40,0xcc,0x00,0x11,0x42,0x7c]
# W64: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,1,0] neg_hi:[1,1,0] ; encoding: [0x10,0x43,0x40,0xcc,0x00,0x11,0x42,0x7c]
0x10,0x43,0x40,0xcc,0x00,0x11,0x42,0x7c


# Test v_wmma_f32_16x16x16_bf16

# W32: v_wmma_f32_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23]                               ; encoding: [0x10,0x40,0x41,0xcc,0x00,0x11,0x42,0x1c]
# W64: v_wmma_f32_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19]                               ; encoding: [0x10,0x40,0x41,0xcc,0x00,0x11,0x42,0x1c]
0x10,0x40,0x41,0xcc,0x00,0x11,0x42,0x1c

# W32: v_wmma_f32_16x16x16_bf16 v[16:23], v[0:7], v[8:15], 1.0                                    ; encoding: [0x10,0x40,0x41,0xcc,0x00,0x11,0xca,0x1b]
# W64: v_wmma_f32_16x16x16_bf16 v[16:19], v[0:7], v[8:15], 1.0                                    ; encoding: [0x10,0x40,0x41,0xcc,0x00,0x11,0xca,0x1b]
0x10,0x40,0x41,0xcc,0x00,0x11,0xca,0x1b

# W32: v_wmma_f32_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x41,0x41,0xcc,0x00,0x11,0x42,0x3c]
# W64: v_wmma_f32_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x41,0x41,0xcc,0x00,0x11,0x42,0x3c]
0x10,0x41,0x41,0xcc,0x00,0x11,0x42,0x3c

# W32: v_wmma_f32_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x42,0x41,0xcc,0x00,0x11,0x42,0x5c]
# W64: v_wmma_f32_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x42,0x41,0xcc,0x00,0x11,0x42,0x5c]
0x10,0x42,0x41,0xcc,0x00,0x11,0x42,0x5c

# W32: v_wmma_f32_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,1,0] neg_hi:[1,1,0] ; encoding: [0x10,0x43,0x41,0xcc,0x00,0x11,0x42,0x7c]
# W64: v_wmma_f32_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,1,0] neg_hi:[1,1,0] ; encoding: [0x10,0x43,0x41,0xcc,0x00,0x11,0x42,0x7c]
0x10,0x43,0x41,0xcc,0x00,0x11,0x42,0x7c


# Test v_wmma_f16_16x16x16_f16

# W32: v_wmma_f16_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23]                                ; encoding: [0x10,0x40,0x42,0xcc,0x00,0x11,0x42,0x1c]
# W64: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19]                                ; encoding: [0x10,0x40,0x42,0xcc,0x00,0x11,0x42,0x1c]
0x10,0x40,0x42,0xcc,0x00,0x11,0x42,0x1c

# W32: v_wmma_f16_16x16x16_f16 v[16:23], v[0:7], v[8:15], 1.0                                     ; encoding: [0x10,0x40,0x42,0xcc,0x00,0x11,0xca,0x1b]
# W64: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], 1.0                                     ; encoding: [0x10,0x40,0x42,0xcc,0x00,0x11,0xca,0x1b]
0x10,0x40,0x42,0xcc,0x00,0x11,0xca,0x1b

# W32: v_wmma_f16_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] op_sel:[0,0,1]                 ; encoding: [0x10,0x60,0x42,0xcc,0x00,0x11,0x42,0x1c]
# W64: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] op_sel:[0,0,1]                 ; encoding: [0x10,0x60,0x42,0xcc,0x00,0x11,0x42,0x1c]
0x10,0x60,0x42,0xcc,0x00,0x11,0x42,0x1c

# W32: v_wmma_f16_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0]  ; encoding: [0x10,0x41,0x42,0xcc,0x00,0x11,0x42,0x3c]
# W64: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0]  ; encoding: [0x10,0x41,0x42,0xcc,0x00,0x11,0x42,0x3c]
0x10,0x41,0x42,0xcc,0x00,0x11,0x42,0x3c

# W32: v_wmma_f16_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0]  ; encoding: [0x10,0x42,0x42,0xcc,0x00,0x11,0x42,0x5c]
# W64: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0]  ; encoding: [0x10,0x42,0x42,0xcc,0x00,0x11,0x42,0x5c]
0x10,0x42,0x42,0xcc,0x00,0x11,0x42,0x5c

# W32: v_wmma_f16_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,1,0] neg_hi:[1,1,0]  ; encoding: [0x10,0x43,0x42,0xcc,0x00,0x11,0x42,0x7c]
# W64: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,1,0] neg_hi:[1,1,0]  ; encoding: [0x10,0x43,0x42,0xcc,0x00,0x11,0x42,0x7c]
0x10,0x43,0x42,0xcc,0x00,0x11,0x42,0x7c


# Test v_wmma_bf16_16x16x16_bf16

# W32: v_wmma_bf16_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x40,0x43,0xcc,0x00,0x11,0x42,0x1c]
# W64: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x40,0x43,0xcc,0x00,0x11,0x42,0x1c]
0x10,0x40,0x43,0xcc,0x00,0x11,0x42,0x1c

# W32: v_wmma_bf16_16x16x16_bf16 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x40,0x43,0xcc,0x00,0x11,0xca,0x1b]
# W64: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x40,0x43,0xcc,0x00,0x11,0xca,0x1b]
0x10,0x40,0x43,0xcc,0x00,0x11,0xca,0x1b

# W32: v_wmma_bf16_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] op_sel:[0,0,1] ; encoding: [0x10,0x60,0x43,0xcc,0x00,0x11,0x42,0x1c]
# W64: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] op_sel:[0,0,1] ; encoding: [0x10,0x60,0x43,0xcc,0x00,0x11,0x42,0x1c]
0x10,0x60,0x43,0xcc,0x00,0x11,0x42,0x1c

# W32: v_wmma_bf16_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x41,0x43,0xcc,0x00,0x11,0x42,0x3c]
# W64: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x10,0x41,0x43,0xcc,0x00,0x11,0x42,0x3c]
0x10,0x41,0x43,0xcc,0x00,0x11,0x42,0x3c

# W32: v_wmma_bf16_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x42,0x43,0xcc,0x00,0x11,0x42,0x5c]
# W64: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x10,0x42,0x43,0xcc,0x00,0x11,0x42,0x5c]
0x10,0x42,0x43,0xcc,0x00,0x11,0x42,0x5c

# W32: v_wmma_bf16_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] neg_lo:[1,1,0] neg_hi:[1,1,0] ; encoding: [0x10,0x43,0x43,0xcc,0x00,0x11,0x42,0x7c]
# W64: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] neg_lo:[1,1,0] neg_hi:[1,1,0] ; encoding: [0x10,0x43,0x43,0xcc,0x00,0x11,0x42,0x7c]
0x10,0x43,0x43,0xcc,0x00,0x11,0x42,0x7c


# Test v_wmma_i32_16x16x16_iu8

# W32: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15]                               ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0x22,0x1c]
# W64: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11]                               ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0x22,0x1c]
0x08,0x40,0x44,0xcc,0x00,0x09,0x22,0x1c

# W32: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1                                      ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0x06,0x1a]
# W64: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], 1                                      ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0x06,0x1a]
0x08,0x40,0x44,0xcc,0x00,0x09,0x06,0x1a

# W32: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x44,0xcc,0x00,0x09,0x22,0x3c]
# W64: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x44,0xcc,0x00,0x09,0x22,0x3c]
0x08,0x41,0x44,0xcc,0x00,0x09,0x22,0x3c

# W32: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x44,0xcc,0x00,0x09,0x22,0x5c]
# W64: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x44,0xcc,0x00,0x09,0x22,0x5c]
0x08,0x42,0x44,0xcc,0x00,0x09,0x22,0x5c

# W32: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,1,0] neg_hi:[1,1,0] ; encoding: [0x08,0x43,0x44,0xcc,0x00,0x09,0x22,0x7c]
# W64: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,1,0] neg_hi:[1,1,0] ; encoding: [0x08,0x43,0x44,0xcc,0x00,0x09,0x22,0x7c]
0x08,0x43,0x44,0xcc,0x00,0x09,0x22,0x7c

# W32: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] clamp                         ; encoding: [0x08,0xc0,0x44,0xcc,0x00,0x09,0x22,0x1c]
# W64: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] clamp                         ; encoding: [0x08,0xc0,0x44,0xcc,0x00,0x09,0x22,0x1c]
0x08,0xc0,0x44,0xcc,0x00,0x09,0x22,0x1c


# Test v_wmma_i32_16x16x16_iu4

# W32: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11]                             ; encoding: [0x04,0x40,0x45,0xcc,0x00,0x05,0x12,0x1c]
# W64: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7]                               ; encoding: [0x04,0x40,0x45,0xcc,0x00,0x05,0x12,0x1c]
0x04,0x40,0x45,0xcc,0x00,0x05,0x12,0x1c

# W32: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], 1                                     ; encoding: [0x04,0x40,0x45,0xcc,0x00,0x05,0x06,0x1a]
# W64: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], 1                                      ; encoding: [0x04,0x40,0x45,0xcc,0x00,0x05,0x06,0x1a]
0x04,0x40,0x45,0xcc,0x00,0x05,0x06,0x1a

# W32: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x04,0x41,0x45,0xcc,0x00,0x05,0x12,0x3c]
# W64: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] neg_hi:[1,0,0] ; encoding: [0x04,0x41,0x45,0xcc,0x00,0x05,0x12,0x3c]
0x04,0x41,0x45,0xcc,0x00,0x05,0x12,0x3c

# W32: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x04,0x42,0x45,0xcc,0x00,0x05,0x12,0x5c]
# W64: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] neg_hi:[0,1,0] ; encoding: [0x04,0x42,0x45,0xcc,0x00,0x05,0x12,0x5c]
0x04,0x42,0x45,0xcc,0x00,0x05,0x12,0x5c

# W32: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,1,0] neg_hi:[1,1,0] ; encoding: [0x04,0x43,0x45,0xcc,0x00,0x05,0x12,0x7c]
# W64: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,1,0] neg_hi:[1,1,0] ; encoding: [0x04,0x43,0x45,0xcc,0x00,0x05,0x12,0x7c]
0x04,0x43,0x45,0xcc,0x00,0x05,0x12,0x7c

# W32: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] clamp                         ; encoding: [0x04,0xc0,0x45,0xcc,0x00,0x05,0x12,0x1c]
# W64: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] clamp                         ; encoding: [0x04,0xc0,0x45,0xcc,0x00,0x05,0x12,0x1c]
0x04,0xc0,0x45,0xcc,0x00,0x05,0x12,0x1c