1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
|
#mach: crisv32
#output: Basic clock cycles, total @: *\n
#output: Memory source stall cycles: 82\n
#output: Memory read-after-write stall cycles: 0\n
#output: Movem source stall cycles: 6\n
#output: Movem destination stall cycles: 880\n
#output: Movem address stall cycles: 4\n
#output: Multiplication source stall cycles: 18\n
#output: Jump source stall cycles: 6\n
#output: Branch misprediction stall cycles: 0\n
#output: Jump target stall cycles: 0\n
#sim: --cris-cycles=basic
.include "testutils.inc"
; Macros for testing correctness of movem destination stall
; cycles for various insn types. Beware: macro parameters can
; be comma or space-delimited. There are problems (i.e. bugs)
; with using space-delimited operands and operands with
; non-alphanumeric characters, like "[]-." so use comma for
; them. Lots of trouble passing empty parameters and parameters
; with comma. Ugh. FIXME: Report bugs, fix bugs, fix other
; shortcomings, fix that darn old macro-parameter-in-string.
; Helper macro. Unfortunately I find no cleaner way to unify
; one and two-operand cases, the main problem being the comma
; operand delimiter clashing with macro operand delimiter.
.macro t_S_x_y S insn x y=none
movem [r7],r6
.ifc \y,none
.ifc \S,none
\insn \x
.else
\insn\S \x
.endif
.else
.ifc \S,none
\insn \x,\y
.else
\insn\S \x,\y
.endif
.endif
nop
nop
nop
.endm
; An insn-type that has a single register operand. The register
; may or may not be a source register for the insn.
.macro t_r insn
t_S_x_y none,\insn,r3
t_S_x_y none,\insn,r8
.endm
; An insn-type that jumps to the destination of the register.
.macro t_r_j insn
move.d 0f,r7
move.d 1f,r8
move.d r8,r9
nop
nop
nop
.section ".rodata"
.p2align 5
0:
.dword 1f
.dword 1f
.dword 1f
.dword 1f
.dword 1f
.dword 1f
.dword 1f
.previous
t_r \insn
1:
.endm
; An insn-type that has a size-modifier and two register
; operands.
.macro t_xr_r S insn
t_S_x_y \S \insn r3 r8
t_S_x_y \S \insn r8 r3
move.d r3,r9
t_S_x_y \S \insn r4 r3
t_S_x_y \S \insn r8 r9
.endm
; An insn-type that has two register operands.
.macro t_r_r insn
t_xr_r none \insn
.endm
; An t_r_rx insn with a byte or word-size modifier.
.macro t_wbr_r insn
t_xr_r .b,\insn
t_xr_r .w,\insn
.endm
; Ditto with a dword-size modifier.
.macro t_dwbr_r insn
t_xr_r .d,\insn
t_wbr_r \insn
.endm
; An insn-type that has a size-modifier, a constant and a
; register operand.
.macro t_xc_r S insn
t_S_x_y \S \insn 24 r3
move.d r3,r9
t_S_x_y \S \insn 24 r8
.endm
; An insn-type that has a constant and a register operand.
.macro t_c_r insn
t_xc_r none \insn
.endm
; An t_c_r insn with a byte or word-size modifier.
.macro t_wbc_r insn
t_xc_r .b,\insn
t_xc_r .w,\insn
.endm
; Ditto with a dword-size modifier.
.macro t_dwbc_r insn
t_xc_r .d,\insn
t_wbc_r \insn
.endm
; An insn-type that has size-modifier, a memory operand and a
; register operand.
.macro t_xm_r S insn
move.d 9b,r8
t_S_x_y \S,\insn,[r4],r3
move.d r3,r9
t_S_x_y \S,\insn,[r8],r5
move.d r5,r9
t_S_x_y \S,\insn,[r3],r9
t_S_x_y \S,\insn,[r8],r9
.endm
; Ditto, to memory.
.macro t_xr_m S insn
move.d 9b,r8
t_S_x_y \S,\insn,r3,[r4]
t_S_x_y \S,\insn,r8,[r3]
t_S_x_y \S,\insn,r3,[r8]
t_S_x_y \S,\insn,r9,[r8]
.endm
; An insn-type that has a memory operand and a register operand.
.macro t_m_r insn
t_xm_r none \insn
.endm
; An t_m_r insn with a byte or word-size modifier.
.macro t_wbm_r insn
t_xm_r .b,\insn
t_xm_r .w,\insn
.endm
; Ditto with a dword-size modifier.
.macro t_dwbm_r insn
t_xm_r .d,\insn
t_wbm_r \insn
.endm
; Insn types of the regular type (r, c, m, size d w b).
.macro t_dwb insn
t_dwbr_r \insn
t_dwbc_r \insn
t_dwbm_r \insn
.endm
; Similar, sizes w b.
.macro t_wb insn
t_wbr_r \insn
t_wbc_r \insn
t_wbm_r \insn
.endm
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
startnostack
; Initialize registers so they don't contain unknowns.
move.d 9f,r7
move.d r7,r8
moveq 0,r9
; Movem source area. Register contents must be valid
; addresses, aligned on a cache boundary.
.section ".rodata"
.p2align 5
9:
.dword 9b
.dword 9b
.dword 9b
.dword 9b
.dword 9b
.dword 9b
.dword 9b
.dword 9b
.dword 9b
.dword 9b
.previous
; The actual tests. The numbers in the comments specify the
; number of movem destination stall cycles. Some of them may be
; filed as memory source address stalls, multiplication source
; stalls or jump source stalls, duly marked so.
t_r_r abs ; 3+3
t_dwb add ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
t_r_r addc ; (3+3+3)
t_c_r addc ; 3
t_m_r addc ; (3+3+3) (2 mem src)
t_dwb move ; (3+3)+(3+3+3)*2+3*2+(3+3+3)*3 (6 mem src)
t_xr_m .b move ; 3+3+3 (2 mem src)
t_xr_m .w move ; 3+3+3 (2 mem src)
t_xr_m .d move ; 3+3+3 (2 mem src)
t_S_x_y none addi r3.b r8 ; 3
t_S_x_y none addi r8.w r3 ; 3
t_S_x_y none addi r4.d r3 ; 3
t_S_x_y none addi r8.w r9
; Addo has three-operand syntax, so we have to expand (a useful
; subset of) "t_dwb".
t_S_x_y none addi r3.b "r8,acr" ; 3
t_S_x_y none addi r8.w "r3,acr" ; 3
t_S_x_y none addi r4.d "r3,acr" ; 3
t_S_x_y none addi r8.w "r9,acr"
t_S_x_y .b addo 42 "r8,acr"
t_S_x_y .w addo 4200 "r3,acr" ; 3
t_S_x_y .d addo 420000 "r3,acr" ; 3
move.d 9b,r8
t_S_x_y .d,addo,[r4],"r3,acr" ; 3 (1 mem src)
t_S_x_y .b,addo,[r3],"r8,acr" ; 3 (1 mem src)
t_S_x_y .w,addo,[r8],"r3,acr" ; 3
t_S_x_y .w,addo,[r8],"r9,acr"
; Similar for addoq.
t_S_x_y none addoq 42 "r8,acr"
t_S_x_y none addoq 42 "r3,acr" ; 3
t_c_r addq ; 3
t_wb adds ; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src)
t_wb addu ; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src)
t_dwb and ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
t_c_r andq ; 3
t_dwbr_r asr ; (3+3+3)*3
t_c_r asrq ; 3
t_dwbr_r bound ; (3+3+3)*3
t_dwbc_r bound ; 3*3
t_r_r btst ; (3+3+3)
t_c_r btstq ; 3
t_dwb cmp ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
t_c_r cmpq ; 3
t_wbc_r cmps ; 3*2
t_wbc_r cmpu ; 3*2
t_wbm_r cmps ; (3+3+3)*2 (4 mem src)
t_wbm_r cmpu ; (3+3+3)*2 (4 mem src)
t_r_r dstep ; (3+3+3)
; FIXME: idxd, fidxi, ftagd, ftagi when supported.
t_r_j jsr ; 3 (2 jump src)
t_r_j jump ; 3 (2 jump src)
t_c_r lapc.d
; The "quick operand" must be in range [. to .+15*2] so we can't
; use t_c_r.
t_S_x_y none lapcq .+4 r3
t_S_x_y none lapcq .+4 r8
t_dwbr_r lsl ; (3+3+3)*3
t_c_r lslq ; 3
t_dwbr_r lsr ; (3+3+3)*3
t_c_r lsrq ; 3
t_r_r lz ; 3+3
t_S_x_y none mcp srp r3 ; 3
t_S_x_y none mcp srp r8
t_c_r moveq
t_S_x_y none move srp r8
t_S_x_y none move srp r3
t_S_x_y none move r8 srp
t_S_x_y none move r3 srp ; 3
; FIXME: move supreg,Rd and move Rs,supreg when supported.
t_wb movs ; (3+3)*2+0+(3+3)*2 (4 mem src)
t_wb movu ; (3+3)*2+0+(3+3)*2 (4 mem src)
t_dwbr_r muls ; (3+3+3)*3 (9 mul src)
t_dwbr_r mulu ; (3+3+3)*3 (9 mul src)
t_dwbr_r neg ; (3+3)*3
t_r not ; 3 cycles.
t_dwb or ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
t_c_r orq ; 3
t_r seq
t_dwb sub ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
t_c_r subq ; 3
t_wb subs ; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src)
t_wb subu ; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src)
t_r swapw ; 3 cycles.
t_r swapnwbr ; 3 cycles.
t_r_j jsrc ; 3 (2 jump src)
t_r_r xor ; (3+3+3)
move.d 9b,r7
nop
nop
nop
t_xm_r none movem ; (3+3) (2 mem src, 1+1 movem addr)
; As implied by the comment, all movem destination penalty
; cycles (but one) are accounted for as memory source address
; and movem source penalties. There are also two movem address
; cache-line straddle penalties.
t_xr_m none movem ; (3+3+2+2) (2 mem, 6 movem src, +2 movem addr)
break 15
|