package info (click to toggle)
nvidia-cutlass 3.4.1%2Bds-2
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 48,488 kB
  • sloc: cpp: 206,571; ansic: 69,215; python: 25,487; sh: 16; makefile: 15

Folder: examples

d .. (parent)
d d rwxr-xr-x 59 00_basic_gemm
d d rwxr-xr-x 66 01_cutlass_utilities
d d rwxr-xr-x 63 02_dump_reg_shmem
d d rwxr-xr-x 4,096 03_visualize_layout
d d rwxr-xr-x 62 04_tile_iterator
d d rwxr-xr-x 61 05_batched_gemm
d d rwxr-xr-x 60 06_splitK_gemm
d d rwxr-xr-x 68 07_volta_tensorop_gemm
d d rwxr-xr-x 57 08_turing_tensorop_gemm
d d rwxr-xr-x 64 09_turing_tensorop_conv2dfprop
d d rwxr-xr-x 63 10_planar_complex
d d rwxr-xr-x 69 11_planar_complex_array
d d rwxr-xr-x 63 12_gemm_bias_relu
d d rwxr-xr-x 4,096 13_two_tensor_op_fusion
d d rwxr-xr-x 62 14_ampere_tf32_tensorop_gemm
d d rwxr-xr-x 130 15_ampere_sparse_tensorop_gemm
d d rwxr-xr-x 76 16_ampere_tensorop_conv2dfprop
d d rwxr-xr-x 71 17_fprop_per_channel_bias
d d rwxr-xr-x 82 18_ampere_fp64_tensorop_affine2_gemm
d d rwxr-xr-x 67 19_tensorop_canonical
d d rwxr-xr-x 63 20_simt_canonical
d d rwxr-xr-x 64 21_quaternion_gemm
d d rwxr-xr-x 64 22_quaternion_conv
d d rwxr-xr-x 73 23_ampere_gemm_operand_reduction_fusion
d d rwxr-xr-x 49 24_gemm_grouped
d d rwxr-xr-x 122 25_ampere_fprop_mainloop_fusion
d d rwxr-xr-x 77 26_ampere_wgrad_mainloop_fusion
d d rwxr-xr-x 93 27_ampere_3xtf32_fast_accurate_tensorop_gemm
d d rwxr-xr-x 91 28_ampere_3xtf32_fast_accurate_tensorop_fprop
d d rwxr-xr-x 71 29_ampere_3xtf32_fast_accurate_tensorop_complex_gemm
d d rwxr-xr-x 65 30_wgrad_split_k
d d rwxr-xr-x 59 31_basic_syrk
d d rwxr-xr-x 59 32_basic_trmm
d d rwxr-xr-x 64 33_ampere_3xtf32_tensorop_symm
d d rwxr-xr-x 57 34_transposed_conv2d
d d rwxr-xr-x 130 35_gemm_softmax
d d rwxr-xr-x 70 36_gather_scatter_fusion
d d rwxr-xr-x 134 37_gemm_layernorm_gemm_fusion
d d rwxr-xr-x 62 38_syr2k_grouped
d d rwxr-xr-x 106 39_gemm_permute
d d rwxr-xr-x 117 40_cutlass_py
d d rwxr-xr-x 4,096 41_fused_multi_head_attention
d d rwxr-xr-x 75 42_ampere_tensorop_group_conv
d d rwxr-xr-x 70 43_ell_block_sparse_gemm
d d rwxr-xr-x 131 44_multi_gemm_ir_and_codegen
d d rwxr-xr-x 4,096 45_dual_gemm
d d rwxr-xr-x 75 46_depthwise_simt_conv2dfprop
d d rwxr-xr-x 115 47_ampere_gemm_universal_streamk
d d rwxr-xr-x 68 48_hopper_warp_specialized_gemm
d d rwxr-xr-x 70 49_hopper_gemm_with_collective_builder
d d rwxr-xr-x 85 50_hopper_gemm_with_epilogue_swizzle
d d rwxr-xr-x 89 51_hopper_gett
d d rwxr-xr-x 4,096 52_hopper_gather_scatter_fusion
d d rwxr-xr-x 129 53_hopper_gemm_permute
d d rwxr-xr-x 121 54_hopper_fp8_warp_specialized_gemm
d d rwxr-xr-x 135 55_hopper_mixed_dtype_gemm
d d rwxr-xr-x 81 56_hopper_ptr_array_batched_gemm
d d rwxr-xr-x 59 57_hopper_grouped_gemm
d d rwxr-xr-x 42 60_cutlass_import
d d rwxr-xr-x 29 common
d d rwxr-xr-x 54 cute
d d rwxr-xr-x 4,096 python
- - rw-r--r-- 4,477 CMakeLists.txt