File: 0041-inline-bf16-functions.patch

package info (click to toggle)
rocm-hipamd 5.7.1-7
links: PTS, VCS
area: main
in suites: forky, sid
size: 19,836 kB
sloc: cpp: 243,188; ansic: 35,728; perl: 1,227; sh: 902; python: 588; asm: 166; makefile: 59
file content (73 lines) | stat: -rw-r--r-- 3,171 bytes
From: Cordell Bloor <cgmb@debian.org>
Date: Sat, 4 Oct 2025 00:28:49 -0600
Subject: inline bf16 functions

Resolves multiple-definition errors observed when building
libraries and applications:

  ggml: https://github.com/ggml-org/llama.cpp/pull/15296
  mscclpp: https://github.com/microsoft/mscclpp/issues/349

Bug-Debian: https://bugs.debian.org/1116585
---
 hipamd/include/hip/amd_detail/amd_hip_bf16.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/hipamd/include/hip/amd_detail/amd_hip_bf16.h b/hipamd/include/hip/amd_detail/amd_hip_bf16.h
index 50b8a18..fd526cf 100644
--- a/hipamd/include/hip/amd_detail/amd_hip_bf16.h
+++ b/hipamd/include/hip/amd_detail/amd_hip_bf16.h
@@ -134,7 +134,7 @@ __HOST_DEVICE__ inline float __bfloat162float(__hip_bfloat16 a) {
  * \ingroup HIP_INTRINSIC_BFLOAT16_CONV
  * \brief Converts float to bfloat16
  */
-__HOST_DEVICE__ __hip_bfloat16 __float2bfloat16(float f) {
+__HOST_DEVICE__ inline __hip_bfloat16 __float2bfloat16(float f) {
   __hip_bfloat16 ret;
   union {
     float fp32;
@@ -178,7 +178,7 @@ __HOST_DEVICE__ __hip_bfloat16 __float2bfloat16(float f) {
  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
  * \brief Converts and moves bfloat162 to float2
  */
-__HOST_DEVICE__ float2 __bfloat1622float2(const __hip_bfloat162 a) {
+__HOST_DEVICE__ inline float2 __bfloat1622float2(const __hip_bfloat162 a) {
   return float2{__bfloat162float(a.x), __bfloat162float(a.y)};
 }
 
@@ -206,7 +206,7 @@ __device__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h) { ret
  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
  * \brief Convert double to __hip_bfloat16
  */
-__HOST_DEVICE__ __hip_bfloat16 __double2bfloat16(const double a) {
+__HOST_DEVICE__ inline __hip_bfloat16 __double2bfloat16(const double a) {
   return __float2bfloat16((float)a);
 }
 
@@ -214,7 +214,7 @@ __HOST_DEVICE__ __hip_bfloat16 __double2bfloat16(const double a) {
  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
  * \brief Convert float2 to __hip_bfloat162
  */
-__HOST_DEVICE__ __hip_bfloat162 __float22bfloat162_rn(const float2 a) {
+__HOST_DEVICE__ inline __hip_bfloat162 __float22bfloat162_rn(const float2 a) {
   return __hip_bfloat162{__float2bfloat16(a.x), __float2bfloat16(a.y)};
 }
 
@@ -244,7 +244,7 @@ __device__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a) {
  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
  * \brief Converts high 16 bits of __hip_bfloat162 to float and returns the result
  */
-__HOST_DEVICE__ float __high2float(const __hip_bfloat162 a) { return __bfloat162float(a.y); }
+__HOST_DEVICE__ inline float __high2float(const __hip_bfloat162 a) { return __bfloat162float(a.y); }
 
 /**
  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
@@ -272,7 +272,7 @@ __device__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a) {
  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
  * \brief Converts low 16 bits of __hip_bfloat162 to float and returns the result
  */
-__HOST_DEVICE__ float __low2float(const __hip_bfloat162 a) { return __bfloat162float(a.x); }
+__HOST_DEVICE__ inline float __low2float(const __hip_bfloat162 a) { return __bfloat162float(a.x); }
 
 /**
  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV