DEBSOURCES
Skip Quicknav
Home
Search
Documentation
Stats
About
sources
/
nvidia-cutlass
/
3.4.1%2Bds-2
/ examples
package info
(click to toggle)
nvidia-cutlass 3.4.1%2Bds-2
links:
PTS
,
VCS
area: contrib
in suites: forky, sid, trixie
size: 48,488 kB
sloc
: cpp: 206,571; ansic: 69,215; python: 25,487; sh: 16; makefile: 15
Folder: examples
.. (parent)
d
rwxr-xr-x
59
00_basic_gemm
d
rwxr-xr-x
66
01_cutlass_utilities
d
rwxr-xr-x
63
02_dump_reg_shmem
d
rwxr-xr-x
4,096
03_visualize_layout
d
rwxr-xr-x
62
04_tile_iterator
d
rwxr-xr-x
61
05_batched_gemm
d
rwxr-xr-x
60
06_splitK_gemm
d
rwxr-xr-x
68
07_volta_tensorop_gemm
d
rwxr-xr-x
57
08_turing_tensorop_gemm
d
rwxr-xr-x
64
09_turing_tensorop_conv2dfprop
d
rwxr-xr-x
63
10_planar_complex
d
rwxr-xr-x
69
11_planar_complex_array
d
rwxr-xr-x
63
12_gemm_bias_relu
d
rwxr-xr-x
4,096
13_two_tensor_op_fusion
d
rwxr-xr-x
62
14_ampere_tf32_tensorop_gemm
d
rwxr-xr-x
130
15_ampere_sparse_tensorop_gemm
d
rwxr-xr-x
76
16_ampere_tensorop_conv2dfprop
d
rwxr-xr-x
71
17_fprop_per_channel_bias
d
rwxr-xr-x
82
18_ampere_fp64_tensorop_affine2_gemm
d
rwxr-xr-x
67
19_tensorop_canonical
d
rwxr-xr-x
63
20_simt_canonical
d
rwxr-xr-x
64
21_quaternion_gemm
d
rwxr-xr-x
64
22_quaternion_conv
d
rwxr-xr-x
73
23_ampere_gemm_operand_reduction_fusion
d
rwxr-xr-x
49
24_gemm_grouped
d
rwxr-xr-x
122
25_ampere_fprop_mainloop_fusion
d
rwxr-xr-x
77
26_ampere_wgrad_mainloop_fusion
d
rwxr-xr-x
93
27_ampere_3xtf32_fast_accurate_tensorop_gemm
d
rwxr-xr-x
91
28_ampere_3xtf32_fast_accurate_tensorop_fprop
d
rwxr-xr-x
71
29_ampere_3xtf32_fast_accurate_tensorop_complex_gemm
d
rwxr-xr-x
65
30_wgrad_split_k
d
rwxr-xr-x
59
31_basic_syrk
d
rwxr-xr-x
59
32_basic_trmm
d
rwxr-xr-x
64
33_ampere_3xtf32_tensorop_symm
d
rwxr-xr-x
57
34_transposed_conv2d
d
rwxr-xr-x
130
35_gemm_softmax
d
rwxr-xr-x
70
36_gather_scatter_fusion
d
rwxr-xr-x
134
37_gemm_layernorm_gemm_fusion
d
rwxr-xr-x
62
38_syr2k_grouped
d
rwxr-xr-x
106
39_gemm_permute
d
rwxr-xr-x
117
40_cutlass_py
d
rwxr-xr-x
4,096
41_fused_multi_head_attention
d
rwxr-xr-x
75
42_ampere_tensorop_group_conv
d
rwxr-xr-x
70
43_ell_block_sparse_gemm
d
rwxr-xr-x
131
44_multi_gemm_ir_and_codegen
d
rwxr-xr-x
4,096
45_dual_gemm
d
rwxr-xr-x
75
46_depthwise_simt_conv2dfprop
d
rwxr-xr-x
115
47_ampere_gemm_universal_streamk
d
rwxr-xr-x
68
48_hopper_warp_specialized_gemm
d
rwxr-xr-x
70
49_hopper_gemm_with_collective_builder
d
rwxr-xr-x
85
50_hopper_gemm_with_epilogue_swizzle
d
rwxr-xr-x
89
51_hopper_gett
d
rwxr-xr-x
4,096
52_hopper_gather_scatter_fusion
d
rwxr-xr-x
129
53_hopper_gemm_permute
d
rwxr-xr-x
121
54_hopper_fp8_warp_specialized_gemm
d
rwxr-xr-x
135
55_hopper_mixed_dtype_gemm
d
rwxr-xr-x
81
56_hopper_ptr_array_batched_gemm
d
rwxr-xr-x
59
57_hopper_grouped_gemm
d
rwxr-xr-x
42
60_cutlass_import
d
rwxr-xr-x
29
common
d
rwxr-xr-x
54
cute
d
rwxr-xr-x
4,096
python
-
rw-r--r--
4,477
CMakeLists.txt