File: composable_kernel.cmake

package info (click to toggle)
onnxruntime 1.21.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 333,732 kB
  • sloc: cpp: 3,153,079; python: 179,219; ansic: 109,131; asm: 37,791; cs: 34,424; perl: 13,070; java: 11,047; javascript: 6,330; pascal: 4,126; sh: 3,277; xml: 598; objc: 281; makefile: 59
file content (66 lines) | stat: -rw-r--r-- 3,616 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
set(PATCH_CLANG ${PROJECT_SOURCE_DIR}/patches/composable_kernel/Fix_Clang_Build.patch)
set(PATCH_GFX12X ${PROJECT_SOURCE_DIR}/patches/composable_kernel/Add_gfx12x_support.patch)

include(FetchContent)
onnxruntime_fetchcontent_declare(composable_kernel
  URL ${DEP_URL_composable_kernel}
  URL_HASH SHA1=${DEP_SHA1_composable_kernel}
  PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PATCH_CLANG} &&
                ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PATCH_GFX12X}
  EXCLUDE_FROM_ALL
)

FetchContent_GetProperties(composable_kernel)
if(NOT composable_kernel_POPULATED)
  FetchContent_Populate(composable_kernel)
  set(GPU_TARGETS ${CMAKE_HIP_ARCHITECTURES})
  set(BUILD_DEV OFF CACHE BOOL "Disable -Weverything, otherwise, error: 'constexpr' specifier is incompatible with C++98 [-Werror,-Wc++98-compat]" FORCE)
  # Exclude i8 device gemm instances due to excessive long compilation time and not being used
  set(DTYPES fp32 fp16 bf16 fp8)
  set(INSTANCES_ONLY ON)
  add_subdirectory(${composable_kernel_SOURCE_DIR} ${composable_kernel_BINARY_DIR} EXCLUDE_FROM_ALL)

  add_library(onnxruntime_composable_kernel_includes INTERFACE)
  target_include_directories(onnxruntime_composable_kernel_includes INTERFACE
    ${composable_kernel_SOURCE_DIR}/include
    ${composable_kernel_BINARY_DIR}/include
    ${composable_kernel_SOURCE_DIR}/library/include)
  target_compile_definitions(onnxruntime_composable_kernel_includes INTERFACE __fp32__ __fp16__ __bf16__)

  execute_process(
    COMMAND ${Python3_EXECUTABLE} ${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py
    --list_blobs ${composable_kernel_BINARY_DIR}/blob_list.txt
    COMMAND_ERROR_IS_FATAL ANY
  )
  file(STRINGS ${composable_kernel_BINARY_DIR}/blob_list.txt generated_fmha_srcs)
  add_custom_command(
    OUTPUT ${generated_fmha_srcs}
    COMMAND ${Python3_EXECUTABLE} ${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py --output_dir ${composable_kernel_BINARY_DIR}
    DEPENDS ${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py ${composable_kernel_BINARY_DIR}/blob_list.txt
  )
  set_source_files_properties(${generated_fmha_srcs} PROPERTIES LANGUAGE HIP GENERATED TRUE)
  add_custom_target(gen_fmha_srcs DEPENDS ${generated_fmha_srcs})  # dummy target for dependencies
  # code generation complete

  set(fmha_srcs
    ${generated_fmha_srcs}
    ${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/fmha_fwd.cpp
    ${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/fmha_fwd.hpp
    ${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/bias.hpp
    ${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/mask.hpp
  )
  add_library(onnxruntime_composable_kernel_fmha STATIC EXCLUDE_FROM_ALL ${generated_fmha_srcs})
  target_link_libraries(onnxruntime_composable_kernel_fmha PUBLIC onnxruntime_composable_kernel_includes)
  target_include_directories(onnxruntime_composable_kernel_fmha PUBLIC ${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha)
  add_dependencies(onnxruntime_composable_kernel_fmha gen_fmha_srcs)

  # ck tile only supports MI200+ GPUs at the moment
  get_target_property(archs onnxruntime_composable_kernel_fmha HIP_ARCHITECTURES)
  string(REPLACE "," ";" archs "${archs}")
  set(original_archs ${archs})
  list(FILTER archs INCLUDE REGEX "(gfx942|gfx90a)")
  if (NOT original_archs EQUAL archs)
    message(WARNING "ck tile only supports archs: ${archs} among the originally specified ${original_archs}")
  endif()
  set_target_properties(onnxruntime_composable_kernel_fmha PROPERTIES HIP_ARCHITECTURES "${archs}")
endif()