File: matrix_transpose.spt

package info (click to toggle)
spirv-llvm-translator-15 15.0.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 14,028 kB
  • sloc: cpp: 43,715; lisp: 3,497; sh: 153; python: 43; makefile: 26
file content (174 lines) | stat: -rw-r--r-- 7,808 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
; RUN: llvm-spirv %s -to-binary -o %t.spv
; RUN: spirv-val %t.spv
; RUN: llvm-spirv -r %t.spv -o %t.bc
; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM

; CHECK-LLVM: %1 = load [4 x <4 x float>], [4 x <4 x float>]* %mtx4
; CHECK-LLVM: %2 = extractvalue [4 x <4 x float>] %1, 0
; CHECK-LLVM: %3 = extractvalue [4 x <4 x float>] %1, 1
; CHECK-LLVM: %4 = extractvalue [4 x <4 x float>] %1, 2
; CHECK-LLVM: %5 = extractvalue [4 x <4 x float>] %1, 3
; CHECK-LLVM: %6 = shufflevector <4 x float> %2, <4 x float> %3, <2 x i32> <i32 0, i32 4>
; CHECK-LLVM: %7 = shufflevector <4 x float> %4, <4 x float> %5, <2 x i32> <i32 0, i32 4>
; CHECK-LLVM: %8 = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-LLVM: %9 = insertvalue [4 x <4 x float>] undef, <4 x float> %8, 0
; CHECK-LLVM: %10 = shufflevector <4 x float> %2, <4 x float> %3, <2 x i32> <i32 1, i32 5>
; CHECK-LLVM: %11 = shufflevector <4 x float> %4, <4 x float> %5, <2 x i32> <i32 1, i32 5>
; CHECK-LLVM: %12 = shufflevector <2 x float> %10, <2 x float> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-LLVM: %13 = insertvalue [4 x <4 x float>] %9, <4 x float> %12, 1
; CHECK-LLVM: %14 = shufflevector <4 x float> %2, <4 x float> %3, <2 x i32> <i32 2, i32 6>
; CHECK-LLVM: %15 = shufflevector <4 x float> %4, <4 x float> %5, <2 x i32> <i32 2, i32 6>
; CHECK-LLVM: %16 = shufflevector <2 x float> %14, <2 x float> %15, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-LLVM: %17 = insertvalue [4 x <4 x float>] %13, <4 x float> %16, 2
; CHECK-LLVM: %18 = shufflevector <4 x float> %2, <4 x float> %3, <2 x i32> <i32 3, i32 7>
; CHECK-LLVM: %19 = shufflevector <4 x float> %4, <4 x float> %5, <2 x i32> <i32 3, i32 7>
; CHECK-LLVM: %20 = shufflevector <2 x float> %18, <2 x float> %19, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-LLVM: %21 = insertvalue [4 x <4 x float>] %17, <4 x float> %20, 3
; CHECK-LLVM: store [4 x <4 x float>] %21, [4 x <4 x float>]* %res4

; CHECK-LLVM: %22 = load [2 x <2 x float>], [2 x <2 x float>]* %mtx2
; CHECK-LLVM: %23 = extractvalue [2 x <2 x float>] %22, 0
; CHECK-LLVM: %24 = extractvalue [2 x <2 x float>] %22, 1
; CHECK-LLVM: %25 = shufflevector <2 x float> %23, <2 x float> %24, <2 x i32> <i32 0, i32 2>
; CHECK-LLVM: %26 = insertvalue [2 x <2 x float>] undef, <2 x float> %25, 0
; CHECK-LLVM: %27 = shufflevector <2 x float> %23, <2 x float> %24, <2 x i32> <i32 1, i32 3>
; CHECK-LLVM: %28 = insertvalue [2 x <2 x float>] %26, <2 x float> %27, 1
; CHECK-LLVM: store [2 x <2 x float>] %28, [2 x <2 x float>]* %res2

; CHECK-LLVM: %29 = load [3 x <3 x float>], [3 x <3 x float>]* %mtx3
; CHECK-LLVM: %30 = extractvalue [3 x <3 x float>] %29, 0
; CHECK-LLVM: %31 = extractvalue [3 x <3 x float>] %29, 1
; CHECK-LLVM: %32 = extractvalue [3 x <3 x float>] %29, 2
; CHECK-LLVM: %33 = extractelement <3 x float> %30, i32 0
; CHECK-LLVM: %34 = insertelement <3 x float> undef, float %33, i64 0
; CHECK-LLVM: %35 = extractelement <3 x float> %31, i32 0
; CHECK-LLVM: %36 = insertelement <3 x float> %34, float %35, i64 1
; CHECK-LLVM: %37 = extractelement <3 x float> %32, i32 0
; CHECK-LLVM: %38 = insertelement <3 x float> %36, float %37, i64 2
; CHECK-LLVM: %39 = insertvalue [3 x <3 x float>] undef, <3 x float> %38, 0
; CHECK-LLVM: %40 = extractelement <3 x float> %30, i32 1
; CHECK-LLVM: %41 = insertelement <3 x float> undef, float %40, i64 0
; CHECK-LLVM: %42 = extractelement <3 x float> %31, i32 1
; CHECK-LLVM: %43 = insertelement <3 x float> %41, float %42, i64 1
; CHECK-LLVM: %44 = extractelement <3 x float> %32, i32 1
; CHECK-LLVM: %45 = insertelement <3 x float> %43, float %44, i64 2
; CHECK-LLVM: %46 = insertvalue [3 x <3 x float>] %39, <3 x float> %45, 1
; CHECK-LLVM: %47 = extractelement <3 x float> %30, i32 2
; CHECK-LLVM: %48 = insertelement <3 x float> undef, float %47, i64 0
; CHECK-LLVM: %49 = extractelement <3 x float> %31, i32 2
; CHECK-LLVM: %50 = insertelement <3 x float> %48, float %49, i64 1
; CHECK-LLVM: %51 = extractelement <3 x float> %32, i32 2
; CHECK-LLVM: %52 = insertelement <3 x float> %50, float %51, i64 2
; CHECK-LLVM: %53 = insertvalue [3 x <3 x float>] %46, <3 x float> %52, 2
; CHECK-LLVM: store [3 x <3 x float>] %53, [3 x <3 x float>]* %res3

; CHECK-LLVM: %54 = load [4 x <3 x float>], [4 x <3 x float>]* %mtx43
; CHECK-LLVM: %55 = extractvalue [4 x <3 x float>] %54, 0
; CHECK-LLVM: %56 = extractvalue [4 x <3 x float>] %54, 1
; CHECK-LLVM: %57 = extractvalue [4 x <3 x float>] %54, 2
; CHECK-LLVM: %58 = extractvalue [4 x <3 x float>] %54, 3
; CHECK-LLVM: %59 = extractelement <3 x float> %55, i32 0
; CHECK-LLVM: %60 = insertelement <4 x float> undef, float %59, i64 0
; CHECK-LLVM: %61 = extractelement <3 x float> %56, i32 0
; CHECK-LLVM: %62 = insertelement <4 x float> %60, float %61, i64 1
; CHECK-LLVM: %63 = extractelement <3 x float> %57, i32 0
; CHECK-LLVM: %64 = insertelement <4 x float> %62, float %63, i64 2
; CHECK-LLVM: %65 = extractelement <3 x float> %58, i32 0
; CHECK-LLVM: %66 = insertelement <4 x float> %64, float %65, i64 3
; CHECK-LLVM: %67 = insertvalue [3 x <4 x float>] undef, <4 x float> %66, 0
; CHECK-LLVM: %68 = extractelement <3 x float> %55, i32 1
; CHECK-LLVM: %69 = insertelement <4 x float> undef, float %68, i64 0
; CHECK-LLVM: %70 = extractelement <3 x float> %56, i32 1
; CHECK-LLVM: %71 = insertelement <4 x float> %69, float %70, i64 1
; CHECK-LLVM: %72 = extractelement <3 x float> %57, i32 1
; CHECK-LLVM: %73 = insertelement <4 x float> %71, float %72, i64 2
; CHECK-LLVM: %74 = extractelement <3 x float> %58, i32 1
; CHECK-LLVM: %75 = insertelement <4 x float> %73, float %74, i64 3
; CHECK-LLVM: %76 = insertvalue [3 x <4 x float>] %67, <4 x float> %75, 1
; CHECK-LLVM: %77 = extractelement <3 x float> %55, i32 2
; CHECK-LLVM: %78 = insertelement <4 x float> undef, float %77, i64 0
; CHECK-LLVM: %79 = extractelement <3 x float> %56, i32 2
; CHECK-LLVM: %80 = insertelement <4 x float> %78, float %79, i64 1
; CHECK-LLVM: %81 = extractelement <3 x float> %57, i32 2
; CHECK-LLVM: %82 = insertelement <4 x float> %80, float %81, i64 2
; CHECK-LLVM: %83 = extractelement <3 x float> %58, i32 2
; CHECK-LLVM: %84 = insertelement <4 x float> %82, float %83, i64 3
; CHECK-LLVM: %85 = insertvalue [3 x <4 x float>] %76, <4 x float> %84, 2
; CHECK-LLVM: store [3 x <4 x float>] %85, [3 x <4 x float>]* %res34
; CHECK-LLVM: ret void

119734787 65536 458752 51 0
2 Capability Addresses
2 Capability Linkage
2 Capability Kernel
2 Capability Float64
2 Capability Matrix
3 MemoryModel 2 2
8 EntryPoint 6 40 "matrix_transpose"
3 Source 3 102000
3 Name 30 "res4"
3 Name 31 "mtx4"
3 Name 32 "res2"
3 Name 33 "mtx2"
3 Name 34 "res3"
3 Name 35 "mtx3"
3 Name 36 "res34"
3 Name 37 "mtx43"

2 TypeVoid 5
3 TypeFloat 6 32
4 TypeVector 7 6 4
4 TypeMatrix 8 7 4
4 TypePointer 9 7 8               ; 9  : Pointer to Matrix4x4

4 TypeVector 10 6 2
4 TypeMatrix 11 10 2
4 TypePointer 12 7 11             ; 12 : Pointer to Matrix2x2

4 TypeVector 13 6 3
4 TypeMatrix 14 13 3
4 TypePointer 15 7 14             ; 15 : Pointer to Matrix3x3

4 TypeMatrix 17 13 4
4 TypePointer 18 7 17             ; 18 : Pointer to Matrix4x3

4 TypeMatrix 20 7 3
4 TypePointer 21 7 20             ; 21 : Pointer to Matrix3x4

11 TypeFunction 29 5 9 9 12 12 15 15 21 18

5 Function 5 40 0 29
3 FunctionParameter 9 30
3 FunctionParameter 9 31
3 FunctionParameter 12 32
3 FunctionParameter 12 33
3 FunctionParameter 15 34
3 FunctionParameter 15 35
3 FunctionParameter 21 36
3 FunctionParameter 18 37

2 Label 50

; 4x4, fastpath
4 Load 8 41 31
4 Transpose 8 42 41
3 Store 30 42

; 2x2, fastpath
4 Load 11 43 33
4 Transpose 11 44 43
3 Store 32 44

; 3x3, slowpath
4 Load 14 45 35
4 Transpose 14 46 45
3 Store 34 46

; 3x4, slowpath
4 Load 17 47 37
4 Transpose 20 48 47
3 Store 36 48

1 Return

1 FunctionEnd