1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
|
//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
//
// Cell SPU 64-bit operations
//
//===----------------------------------------------------------------------===//
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// 64-bit comparisons:
//
// 1. The instruction sequences for vector vice scalar differ by a
// constant. In the scalar case, we're only interested in the
// top two 32-bit slots, whereas we're interested in an exact
// all-four-slot match in the vector case.
//
// 2. There are no "immediate" forms, since loading 64-bit constants
// could be a constant pool load.
//
// 3. i64 setcc results are i32, which are subsequently converted to a FSM
// mask when used in a select pattern.
//
// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
// [Note: this may be moot, since gb produces v4i32 or r32.]
//
// 5. The code sequences for r64 and v2i64 are probably overly conservative,
// compared to the code that gcc produces.
//
// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!)
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// selb instruction definition for i64. Note that the selection mask is
// a vector, produced by various forms of FSM:
def SELBr64_cond:
SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
[/* no pattern */]>;
// The generic i64 select pattern, which assumes that the comparison result
// is in a 32-bit register that contains a select mask pattern (i.e., gather
// bits result):
def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
(SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
// select the negative condition:
class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
(SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
// setcc the negative condition:
class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
Pat<(cond R64C:$rA, R64C:$rB),
(XORIr32 compare.Fragment, -1)>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// The i64 seteq fragment that does the scalar->vector conversion and
// comparison:
def CEQr64compare:
CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
(COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
// The i64 seteq fragment that does the vector comparison
def CEQv2i64compare:
CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
// i64 seteq (equality): the setcc result is i32, which is converted to a
// vector FSM mask when used in a select pattern.
//
// v2i64 seteq (equality): the setcc result is v4i32
multiclass CompareEqual64 {
// Plain old comparison, converts back to i32 scalar
def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
// SELB mask from FSM:
def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
(FSMv4i32 CEQr64compare.Fragment), R32C))>;
def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
(FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
}
defm I64EQ: CompareEqual64;
def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
// i64 setne:
def : I64SETCCNegCond<setne, I64EQr64>;
def : I64SELECTNegCond<setne, I64EQr64>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// i64 setugt/setule:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def CLGTr64ugt:
CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
(COPY_TO_REGCLASS R64C:$rB, VECREG))>;
def CLGTr64eq:
CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
(COPY_TO_REGCLASS R64C:$rB, VECREG))>;
def CLGTr64compare:
CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
(XSWDv2i64 CLGTr64ugt.Fragment),
CLGTr64eq.Fragment)>;
def CLGTv2i64ugt:
CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
def CLGTv2i64eq:
CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
def CLGTv2i64compare:
CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
(XSWDv2i64 CLGTr64ugt.Fragment),
CLGTv2i64eq.Fragment)>;
multiclass CompareLogicalGreaterThan64 {
// Plain old comparison, converts back to i32 scalar
def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
// SELB mask from FSM:
def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
(FSMv4i32 CLGTr64compare.Fragment), R32C))>;
def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
(FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
}
defm I64LGT: CompareLogicalGreaterThan64;
def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
// I64LGTv2i64.Fragment>;
// i64 setult:
def : I64SETCCNegCond<setule, I64LGTr64>;
def : I64SELECTNegCond<setule, I64LGTr64>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// i64 setuge/setult:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def CLGEr64compare:
CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
CLGTr64eq.Fragment)), 0xb)>;
def CLGEv2i64compare:
CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
CLGTv2i64eq.Fragment)), 0xf)>;
multiclass CompareLogicalGreaterEqual64 {
// Plain old comparison, converts back to i32 scalar
def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
// SELB mask from FSM:
def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
(FSMv4i32 CLGEr64compare.Fragment), R32C))>;
def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
(FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
}
defm I64LGE: CompareLogicalGreaterEqual64;
def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
I64LGEv2i64.Fragment>;
// i64 setult:
def : I64SETCCNegCond<setult, I64LGEr64>;
def : I64SELECTNegCond<setult, I64LGEr64>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// i64 setgt/setle:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def CGTr64sgt:
CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
(COPY_TO_REGCLASS R64C:$rB, VECREG))>;
def CGTr64eq:
CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
(COPY_TO_REGCLASS R64C:$rB, VECREG))>;
def CGTr64compare:
CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
(XSWDv2i64 CGTr64sgt.Fragment),
CGTr64eq.Fragment)>;
def CGTv2i64sgt:
CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
def CGTv2i64eq:
CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
def CGTv2i64compare:
CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
(XSWDv2i64 CGTr64sgt.Fragment),
CGTv2i64eq.Fragment)>;
multiclass CompareGreaterThan64 {
// Plain old comparison, converts back to i32 scalar
def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
// SELB mask from FSM:
def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
(FSMv4i32 CGTr64compare.Fragment), R32C))>;
def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
(FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
}
defm I64GT: CompareLogicalGreaterThan64;
def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
// I64GTv2i64.Fragment>;
// i64 setult:
def : I64SETCCNegCond<setle, I64GTr64>;
def : I64SELECTNegCond<setle, I64GTr64>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// i64 setge/setlt:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def CGEr64compare:
CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
CGTr64eq.Fragment)), 0xb)>;
def CGEv2i64compare:
CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
CGTv2i64eq.Fragment)), 0xf)>;
multiclass CompareGreaterEqual64 {
// Plain old comparison, converts back to i32 scalar
def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
// SELB mask from FSM:
def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
}
defm I64GE: CompareGreaterEqual64;
def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
I64GEv2i64.Fragment>;
// i64 setult:
def : I64SETCCNegCond<setlt, I64GEr64>;
def : I64SELECTNegCond<setlt, I64GEr64>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// v2i64, i64 add
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class v2i64_add_cg<dag lhs, dag rhs>:
CodeFrag<(CGv4i32 lhs, rhs)>;
class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
(COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
(COPY_TO_REGCLASS R64C:$rB, VECREG),
(v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
(v4i32 VECREG:$rCGmask)),
v2i64_add<(v2i64 VECREG:$rA),
(v2i64 VECREG:$rB),
(v4i32 VECREG:$rCGmask)>.Fragment>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// v2i64, i64 subtraction
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
(COPY_TO_REGCLASS
v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
(COPY_TO_REGCLASS R64C:$rB, VECREG),
v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
(COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
(v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
(v4i32 VECREG:$rCGmask)),
v2i64_sub<(v2i64 VECREG:$rA),
(v2i64 VECREG:$rB),
v2i64_sub_bg<(v2i64 VECREG:$rA),
(v2i64 VECREG:$rB)>.Fragment,
(v4i32 VECREG:$rCGmask)>.Fragment>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// v2i64, i64 multiply
//
// Note: i64 multiply is simply the vector->scalar conversion of the
// full-on v2i64 multiply, since the entire vector has to be manipulated
// anyway.
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class v2i64_mul_ahi64<dag rA> :
CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
class v2i64_mul_bhi64<dag rB> :
CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
class v2i64_mul_alo64<dag rB> :
CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
class v2i64_mul_blo64<dag rB> :
CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
class v2i64_mul_ashlq2<dag rA>:
CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
class v2i64_mul_ashlq4<dag rA>:
CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
class v2i64_mul_bshlq2<dag rB> :
CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
class v2i64_mul_bshlq4<dag rB> :
CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
class v2i64_highprod<dag rA, dag rB>:
CodeFrag<(Av4i32
(Av4i32
(MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3
v2i64_mul_ahi64<rA>.Fragment),
(MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3
v2i64_mul_bshlq4<rB>.Fragment)),
(Av4i32
(MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
v2i64_mul_ashlq4<rA>.Fragment),
(Av4i32
(MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
v2i64_mul_bhi64<rB>.Fragment),
(Av4i32
(MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
v2i64_mul_bhi64<rB>.Fragment),
(Av4i32
(MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
v2i64_mul_bshlq2<rB>.Fragment),
(MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
v2i64_mul_bshlq2<rB>.Fragment))))))>;
class v2i64_mul_a3_b3<dag rA, dag rB>:
CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
v2i64_mul_blo64<rB>.Fragment)>;
class v2i64_mul_a2_b3<dag rA, dag rB>:
CodeFrag<(SELBv4i32 (SHLQBYIv4i32
(MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
v2i64_mul_bshlq2<rB>.Fragment), 0x2),
(ILv4i32 0),
(FSMBIv4i32 0xc3c3))>;
class v2i64_mul_a3_b2<dag rA, dag rB>:
CodeFrag<(SELBv4i32 (SHLQBYIv4i32
(MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
v2i64_mul_ashlq2<rA>.Fragment), 0x2),
(ILv4i32 0),
(FSMBIv4i32 0xc3c3))>;
class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
class v2i64_mul<dag rA, dag rB, dag rCGmask>:
v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
(SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
(ILv4i32 0),
(FSMBIv4i32 0x0f0f)),
rCGmask>;
def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
(COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
(COPY_TO_REGCLASS R64C:$rB, VECREG),
(v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
(v4i32 VECREG:$rCGmask)),
v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
(v4i32 VECREG:$rCGmask)>.Fragment>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// f64 comparisons
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// selb instruction definition for i64. Note that the selection mask is
// a vector, produced by various forms of FSM:
def SELBf64_cond:
SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
[(set R64FP:$rT,
(select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;
|