1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
|
bits 64
%macro amx 3
%define treg tmm %+ %1
%define treg2 tmm %+ %2
%define treg3 tmm %+ %3
%define zreg zmm %+ %1
ldtilecfg [rsi] ;AMX_TILE
sttilecfg [rdi] ;AMX_TILE
tilezero treg ;AMX_TILE
tileloadd treg, [rax] ;AMX_TILE
tileloadd treg, [rax+rdx] ;AMX_TILE
tileloadd treg, [rax+rdx*2] ;AMX_TILE
tileloaddt1 treg, [rax] ;AMX_TILE
tileloaddt1 treg, [rax+rdx] ;AMX_TILE
tileloaddt1 treg, [rax+rdx*2] ;AMX_TILE
tileloaddrs treg, [rax] ;AMX-MOVRS
tileloaddrs treg, [rax+rdx] ;AMX-MOVRS
tileloaddrs treg, [rax+rdx*2] ;AMX-MOVRS
tileloaddrst1 treg, [rax] ;AMX-MOVRS
tileloaddrst1 treg, [rax+rdx] ;AMX-MOVRS
tileloaddrst1 treg, [rax+rdx*2] ;AMX-MOVRS
tdpbf16ps treg, treg2, treg3 ;AMX-BF16
tdpbssd treg, treg2, treg3 ;AMX_INT8
tdpbusd treg, treg2, treg3 ;AMX_INT8
tdpbsud treg, treg2, treg3 ;AMX_INT8
tdpbuud treg, treg2, treg3 ;AMX_INT8
tdpfp16ps treg, treg2, treg3 ;AMX-FP16
tcmmimfp16ps treg, treg2, treg3 ;AMX-COMPLEX
tcmmrlfp16ps treg, treg2, treg3 ;AMX-COMPLEX
tmmultf32ps treg, treg2, treg3 ;AMX_TF32
tdpbf8ps treg, treg2, treg3 ;AMX-FP8
tdpbhf8ps treg, treg2, treg3 ;AMX-FP8
tdphbf8ps treg, treg2, treg3 ;AMX-FP8
tdphf8ps treg, treg2, treg3 ;AMX-FP8
tcvtrowd2ps zreg, treg, eax ;AMX-AVX512
tcvtrowd2ps zreg, treg, %1 ;AMX-AVX512
tcvtrowps2bf16h zreg, treg, eax ;AMX-AVX512
tcvtrowps2bf16h zreg, treg, %1 ;AMX-AVX512
tcvtrowps2bf16l zreg, treg, eax ;AMX-AVX512
tcvtrowps2bf16l zreg, treg, %1 ;AMX-AVX512
tcvtrowps2phh zreg, treg, eax ;AMX-AVX512
tcvtrowps2phh zreg, treg, %1 ;AMX-AVX512
tcvtrowps2phl zreg, treg, eax ;AMX-AVX512
tcvtrowps2phl zreg, treg, %1 ;AMX-AVX512
tilemovrow zreg, treg, eax ;AMX-AVX512
tilemovrow zreg, treg, %1 ;AMX-AVX512
; All the 16 AMX-TRANSPOSE instructions were removed from the 59th edition of
; "Intel Architecture Instruction Set Extensions and Future Features Programming Reference"
; September 2025, 319433-059
; Similar to PCOMMIT, they are tagged as 'NEVER'
[warning -obsolete-removed]
t2rpntlvwz0 treg, [rax] ;AMX-TRANSPOSE
t2rpntlvwz0 treg, [rax+rdx] ;AMX-TRANSPOSE
t2rpntlvwz0 treg, [rax+rdx*2] ;AMX-TRANSPOSE
t2rpntlvwz0t1 treg, [rax] ;AMX-TRANSPOSE
t2rpntlvwz0t1 treg, [rax+rdx] ;AMX-TRANSPOSE
t2rpntlvwz0t1 treg, [rax+rdx*2] ;AMX-TRANSPOSE
t2rpntlvwz1 treg, [rax] ;AMX-TRANSPOSE
t2rpntlvwz1 treg, [rax+rdx] ;AMX-TRANSPOSE
t2rpntlvwz1 treg, [rax+rdx*2] ;AMX-TRANSPOSE
t2rpntlvwz1t1 treg, [rax] ;AMX-TRANSPOSE
t2rpntlvwz1t1 treg, [rax+rdx] ;AMX-TRANSPOSE
t2rpntlvwz1t1 treg, [rax+rdx*2] ;AMX-TRANSPOSE
ttransposed treg, treg ;AMX-TRANSPOSE
t2rpntlvwz0rs treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz0rs treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz0rs treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz0rst1 treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz0rst1 treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz0rst1 treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz1rs treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz1rs treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz1rs treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz1rst1 treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz1rst1 treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
t2rpntlvwz1rst1 treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
ttdpbf16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-BF16
ttdpfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-FP16
ttcmmimfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-COMPLEX
ttcmmrlfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-COMPLEX
tconjtcmmimfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-COMPLEX
tconjtfp16 treg, treg ;AMX-TRANSPOSE + AMX-COMPLEX
ttmmultf32ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX_TF32
[warning *obsolete-removed]
tilestored [rax], treg ;AMX_TILE
tilestored [rax,rdx], treg ;AMX_TILE
tilestored [rax,rdx*2], treg ;AMX_TILE
tilerelease ;AMX_TILE
%endmacro
%assign n 0
%assign m 1
%assign l 2
%rep 8
amx n, m, l
%assign n ((n+1) % 8)
%assign m ((m+1) % 8)
%assign l ((l+1) % 8)
%endrep
|