File: expand-isa-compatibility.patch

package info (click to toggle)
rocblas 6.4.4-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,082,920 kB
  • sloc: cpp: 245,009; f90: 50,012; python: 50,003; sh: 24,623; asm: 8,917; makefile: 147; ansic: 107; xml: 36; awk: 14
file content (181 lines) | stat: -rw-r--r-- 6,091 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
From: Cordell Bloor <cgmb@slerp.xyz>
Date: Wed, 31 May 2023 00:04:38 -0600
Subject: expand isa compatibility

This is not an ideal solution, but there are a number of ISAs that are
subsets of gfx900, gfx1010 and gfx1030. The simplest way to get
rocBLAS and Tensile to load the compatible kernels when running on
architectures compatible with those ISAs is to simply report the
GPU as being of the supported type.

There is no way this patch would be accepted upstream as it is expected
that they will implement a better solution... eventually.

Updated by @ckk to support HIP >= 6.

Forwarded: not-needed
Last-Update: 2025-07-06
---
 library/src/handle.cpp                             |  1 +
 library/src/rocblas_auxiliary.cpp                  | 46 ++++++++++++++++++++
 library/src/tensile_host.cpp                       |  1 +
 .../Tensile/Source/lib/source/hip/HipHardware.cpp  | 49 ++++++++++++++++++++++
 4 files changed, 97 insertions(+)

diff --git a/library/src/handle.cpp b/library/src/handle.cpp
index a17a198..e362f7f 100644
--- a/library/src/handle.cpp
+++ b/library/src/handle.cpp
@@ -21,6 +21,7 @@
  * ************************************************************************ */
 #include "handle.hpp"
 #include <cstdarg>
+#include <cstring>
 #include <limits>
 
 #ifdef WIN32
diff --git a/library/src/rocblas_auxiliary.cpp b/library/src/rocblas_auxiliary.cpp
index a5353de..c466dce 100644
--- a/library/src/rocblas_auxiliary.cpp
+++ b/library/src/rocblas_auxiliary.cpp
@@ -24,8 +24,10 @@
 #include "logging.hpp"
 #include "rocblas-auxiliary.h"
 #include <cctype>
+#include <cstring>
 #include <cstdlib>
 #include <memory>
+#include <unordered_set>
 #include <string>
 
 /* ============================================================================================ */
@@ -921,6 +923,50 @@ std::string rocblas_internal_get_arch_name()
     hipGetDevice(&deviceId);
     hipDeviceProp_t deviceProperties;
     hipGetDeviceProperties(&deviceProperties, deviceId);
+
+    // coerce to compatible arch
+#if HIP_VERSION_MAJOR >= 6
+    std::unordered_set<std::string> fallback_arches;
+
+    fallback_arches = {"gfx902", "gfx909", "gfx912"};
+    if (fallback_arches.count(deviceProperties.gcnArchName)) {
+        std::strcpy(deviceProperties.gcnArchName, "gfx900");
+    }
+    fallback_arches = {"gfx1011", "gfx1012", "gfx1013"};
+    if (fallback_arches.count(deviceProperties.gcnArchName)) {
+        std::strcpy(deviceProperties.gcnArchName, "gfx1010");
+    }
+    fallback_arches = {"gfx1031", "gfx1032", "gfx1033", "gfx1034", "gfx1035", "gfx1036"};
+    if (fallback_arches.count(deviceProperties.gcnArchName)) {
+        std::strcpy(deviceProperties.gcnArchName, "gfx1030");
+    }
+#else
+    switch(deviceProperties.gcnArch)
+    {
+    case 902:
+    case 909:
+    case 912:
+        deviceProperties.gcnArch = 900;
+        std::strcpy(deviceProperties.gcnArchName, "gfx900");
+        break;
+    case 1011:
+    case 1012:
+    case 1013:
+        deviceProperties.gcnArch = 1010;
+        std::strcpy(deviceProperties.gcnArchName, "gfx1010");
+        break;
+    case 1031:
+    case 1032:
+    case 1033:
+    case 1034:
+    case 1035:
+    case 1036:
+        deviceProperties.gcnArch = 1030;
+        std::strcpy(deviceProperties.gcnArchName, "gfx1030");
+        break;
+    }
+#endif // HIP_VERSION_MAJOR >= 6
+
     return ArchName<hipDeviceProp_t>{}(deviceProperties);
 }
 
diff --git a/library/src/tensile_host.cpp b/library/src/tensile_host.cpp
index d10670d..d324a57 100644
--- a/library/src/tensile_host.cpp
+++ b/library/src/tensile_host.cpp
@@ -47,6 +47,7 @@
 #include <Tensile/hip/HipUtils.hpp>
 #include <atomic>
 #include <complex>
+#include <cstring>
 #include <exception>
 #include <future>
 #include <iomanip>
diff --git a/tensile/Tensile/Source/lib/source/hip/HipHardware.cpp b/tensile/Tensile/Source/lib/source/hip/HipHardware.cpp
index 411d7c5..d8ac95d 100644
--- a/tensile/Tensile/Source/lib/source/hip/HipHardware.cpp
+++ b/tensile/Tensile/Source/lib/source/hip/HipHardware.cpp
@@ -24,6 +24,9 @@
  *
  *******************************************************************************/
 
+#include <cstring>
+#include <unordered_set>
+
 #include <Tensile/AMDGPU.hpp>
 #include <Tensile/hip/HipHardware.hpp>
 #include <Tensile/hip/HipUtils.hpp>
@@ -57,6 +60,52 @@ namespace Tensile
         {
             hipDeviceProp_t prop;
             HIP_CHECK_EXC(hipGetDeviceProperties(&prop, deviceId));
+
+            // coerce to compatible arch
+#if HIP_VERSION_MAJOR >= 6
+            std::unordered_set<std::string> fallback_arches;
+
+            fallback_arches = {"gfx902", "gfx909", "gfx912"};
+            if (fallback_arches.count(prop.gcnArchName)) {
+                std::strcpy(prop.gcnArchName, "gfx900");
+            }
+
+            fallback_arches = {"gfx1011", "gfx1012", "gfx1013"};
+            if (fallback_arches.count(prop.gcnArchName)) {
+                std::strcpy(prop.gcnArchName, "gfx1010");
+            }
+
+            fallback_arches = {"gfx1031", "gfx1032", "gfx1033", "gfx1034", "gfx1035", "gfx1036"};
+            if (fallback_arches.count(prop.gcnArchName)) {
+                std::strcpy(prop.gcnArchName, "gfx1030");
+            }
+#else
+            switch(prop.gcnArch)
+            {
+            case 902:
+            case 909:
+            case 912:
+                prop.gcnArch = 900;
+                std::strcpy(prop.gcnArchName, "gfx900");
+                break;
+            case 1011:
+            case 1012:
+            case 1013:
+                prop.gcnArch = 1010;
+                std::strcpy(prop.gcnArchName, "gfx1010");
+                break;
+            case 1031:
+            case 1032:
+            case 1033:
+            case 1034:
+            case 1035:
+            case 1036:
+                prop.gcnArch = 1030;
+                std::strcpy(prop.gcnArchName, "gfx1030");
+                break;
+            }
+#endif // HIP_VERSION_MAJOR >= 6
+
 #if HIP_VERSION >= 50220730
             int hip_version;
             HIP_CHECK_EXC(hipRuntimeGetVersion(&hip_version));