File: PPCScheduleP8.td

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,998,520 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (337 lines) | stat: -rw-r--r-- 16,245 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
//===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the SchedModel for the POWER8 processor.
//
//===----------------------------------------------------------------------===//

def P8Model : SchedMachineModel {
  let IssueWidth = 8;
  let LoadLatency = 3;
  let MispredictPenalty = 16;
  let LoopMicroOpBufferSize = 60;
  let MicroOpBufferSize = 64;
  // TODO: Due to limitation of instruction definitions, non-P8 instructions
  // are required to be listed here. Change this after it got fixed.
  let CompleteModel = 0;
  let UnsupportedFeatures = [HasSPE, PrefixInstrs, MMA,
                             PairedVectorMemops, PCRelativeMemops,
                             IsISA3_0, IsISA3_1, IsISAFuture];
}

let SchedModel = P8Model in {
  // Power8 Pipeline Units:

  def P8_LU_LS_FX : ProcResource<6>;
  def P8_LU_LS : ProcResource<4> { let Super = P8_LU_LS_FX; }
  def P8_LS : ProcResource<2> { let Super = P8_LU_LS; }
  def P8_LU : ProcResource<2> { let Super = P8_LU_LS; }
  def P8_FX : ProcResource<2> { let Super = P8_LU_LS_FX; }
  def P8_DFU : ProcResource<1>;
  def P8_BR : ProcResource<1> { let BufferSize = 16; }
  def P8_CY : ProcResource<1>;
  def P8_CRL : ProcResource<1>;
  def P8_VMX : ProcResource<2>;
  def P8_PM : ProcResource<2> {
    // This is workaround for scheduler to respect latency of long permute chain.
    let BufferSize = 1;
    let Super = P8_VMX;
  }
  def P8_XS : ProcResource<2> { let Super = P8_VMX; }
  def P8_VX : ProcResource<2> { let Super = P8_VMX; }
  def P8_FPU : ProcResource<4>;
  // Units for scalar, 2xDouble and 4xSingle
  def P8_FP_Scal : ProcResource<2> { let Super = P8_FPU; }
  def P8_FP_2x64 : ProcResource<2> { let Super = P8_FPU; }
  def P8_FP_4x32 : ProcResource<2> { let Super = P8_FPU; }

  // Power8 Dispatch Ports:
  // Two ports to do loads or fixed-point operations.
  // Two ports to do stores, fixed-point loads, or fixed-point operations.
  // Two ports for fixed-point operations.
  // Two issue ports shared by 2 DFP/2 VSX/2 VMX/1 CY/1 DFP operations.
  // One for branch operations.
  // One for condition register operations.

  // TODO: Model dispatch of cracked instructions.

  // Six ports in total are available for fixed-point operations.
  def P8_PORT_ALLFX : ProcResource<6>;
  // Four ports in total are available for fixed-point load operations.
  def P8_PORT_FXLD : ProcResource<4> { let Super = P8_PORT_ALLFX; }
  // Two ports to do loads or fixed-point operations.
  def P8_PORT_LD_FX : ProcResource<2> { let Super = P8_PORT_FXLD; }
  // Two ports to do stores, fixed-point loads, or fixed-point operations.
  def P8_PORT_ST_FXLD_FX : ProcResource<2> { let Super = P8_PORT_FXLD; }
  // Two issue ports shared by two floating-point, two VSX, two VMX, one crypto,
  // and one DFP operations.
  def P8_PORT_VMX_FP : ProcResource<2>;
  // One port for branch operation.
  def P8_PORT_BR : ProcResource<1>;
  // One port for condition register operation.
  def P8_PORT_CR : ProcResource<1>;

  def P8_ISSUE_FX : SchedWriteRes<[P8_PORT_ALLFX]>;
  def P8_ISSUE_FXLD : SchedWriteRes<[P8_PORT_FXLD]>;
  def P8_ISSUE_LD : SchedWriteRes<[P8_PORT_LD_FX]>;
  def P8_ISSUE_ST : SchedWriteRes<[P8_PORT_ST_FXLD_FX]>;
  def P8_ISSUE_VSX : SchedWriteRes<[P8_PORT_VMX_FP]>;
  def P8_ISSUE_BR : SchedWriteRes<[P8_PORT_BR]>;
  def P8_ISSUE_CR : SchedWriteRes<[P8_PORT_CR]>;

  // Power8 Instruction Latency & Port Groups:

  def P8_LS_LU_NONE : SchedWriteRes<[P8_LU, P8_LS]>;
  def P8_LS_FP_NONE : SchedWriteRes<[P8_LS, P8_FPU]>;
  def P8_LU_or_LS_3C : SchedWriteRes<[P8_LU_LS]> { let Latency = 3; }
  def P8_LS_FX_3C : SchedWriteRes<[P8_LS, P8_FX]> { let Latency = 3; }
  def P8_LU_or_LS_or_FX_2C : SchedWriteRes<[P8_LU_LS_FX]> { let Latency = 2; }
  def P8_LU_or_LS_FX_3C : SchedWriteRes<[P8_LU_LS, P8_FX]> { let Latency = 3; }
  def P8_FX_NONE : SchedWriteRes<[P8_FX]>;
  def P8_FX_1C : SchedWriteRes<[P8_FX]> { let Latency = 1; }
  def P8_FX_2C : SchedWriteRes<[P8_FX]> { let Latency = 2; }
  def P8_FX_3C : SchedWriteRes<[P8_FX]> { let Latency = 3; }
  def P8_FX_5C : SchedWriteRes<[P8_FX]> { let Latency = 5; }
  def P8_FX_10C : SchedWriteRes<[P8_FX]> { let Latency = 10; }
  def P8_FX_23C : SchedWriteRes<[P8_FX]> { let Latency = 23; }
  def P8_FX_15C : SchedWriteRes<[P8_FX]> { let Latency = 15; }
  def P8_FX_41C : SchedWriteRes<[P8_FX]> { let Latency = 41; }
  def P8_BR_2C : SchedWriteRes<[P8_BR]> { let Latency = 2; }
  def P8_CR_NONE : SchedWriteRes<[P8_CRL]>;
  def P8_CR_3C : SchedWriteRes<[P8_CRL]> { let Latency = 3; }
  def P8_CR_5C : SchedWriteRes<[P8_CRL]> { let Latency = 5; }
  def P8_LU_5C : SchedWriteRes<[P8_LU]> { let Latency = 5; }
  def P8_LU_FX_5C : SchedWriteRes<[P8_LU, P8_FX]> { let Latency = 5; }
  def P8_LS_FP_FX_2C : SchedWriteRes<[P8_LS, P8_FPU, P8_FX]> { let Latency = 2; }
  def P8_LS_FP_FX_3C : SchedWriteRes<[P8_LS, P8_FPU, P8_FX]> { let Latency = 3; }
  def P8_LS_3C : SchedWriteRes<[P8_LS]> { let Latency = 3; }
  def P8_FP_3C : SchedWriteRes<[P8_FPU]> { let Latency = 3; }
  def P8_FP_Scal_6C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 6; }
  def P8_FP_4x32_6C : SchedWriteRes<[P8_FP_4x32]> { let Latency = 6; }
  def P8_FP_2x64_6C : SchedWriteRes<[P8_FP_2x64]> { let Latency = 6; }
  def P8_FP_26C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 26; }
  def P8_FP_28C : SchedWriteRes<[P8_FP_4x32]> { let Latency = 28; }
  def P8_FP_31C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 31; }
  def P8_FP_Scal_32C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 32; }
  def P8_FP_2x64_32C : SchedWriteRes<[P8_FP_2x64]> { let Latency = 32; }
  def P8_FP_4x32_32C : SchedWriteRes<[P8_FP_4x32]> { let Latency = 32; }
  def P8_FP_Scal_43C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 43; }
  def P8_FP_2x64_43C : SchedWriteRes<[P8_FP_2x64]> { let Latency = 43; }
  def P8_XS_2C : SchedWriteRes<[P8_XS]> { let Latency = 2; }
  def P8_PM_2C : SchedWriteRes<[P8_PM]> { let Latency = 2; }
  def P8_XS_4C : SchedWriteRes<[P8_XS]> { let Latency = 4; }
  def P8_VX_7C : SchedWriteRes<[P8_VX]> { let Latency = 7; }
  def P8_XS_9C : SchedWriteRes<[P8_XS]> { let Latency = 9; }
  def P8_CY_6C : SchedWriteRes<[P8_CY]> { let Latency = 6; }
  def P8_DFU_13C : SchedWriteRes<[P8_DFU]> { let Latency = 13; }
  def P8_DFU_15C : SchedWriteRes<[P8_DFU]> { let Latency = 15; }
  def P8_DFU_17C : SchedWriteRes<[P8_DFU]> { let Latency = 17; }
  def P8_DFU_25C : SchedWriteRes<[P8_DFU]> { let Latency = 25; }
  def P8_DFU_32C : SchedWriteRes<[P8_DFU]> { let Latency = 32; }
  def P8_DFU_34C : SchedWriteRes<[P8_DFU]> { let Latency = 34; }
  def P8_DFU_40C : SchedWriteRes<[P8_DFU]> { let Latency = 40; }
  def P8_DFU_90C : SchedWriteRes<[P8_DFU]> { let Latency = 90; }
  def P8_DFU_96C : SchedWriteRes<[P8_DFU]> { let Latency = 96; }
  def P8_DFU_172C : SchedWriteRes<[P8_DFU]> { let Latency = 172; }
  // Direct move instructions
  def P8_DM_5C : SchedWriteRes<[]> { let Latency = 5; }

  // Instructions of CR pipeline

  def : InstRW<[P8_CR_NONE, P8_ISSUE_CR], (instrs MFCR, MFCR8)>;
  def : InstRW<[P8_CR_3C, P8_ISSUE_CR], (instrs MFOCRF, MFOCRF8)>;
  def : InstRW<[P8_CR_5C, P8_ISSUE_CR], (instrs MFLR, MFLR8, MFCTR, MFCTR8)>;

  // Instructions of CY pipeline

  def : InstRW<[P8_CY_6C, P8_ISSUE_VSX], (instrs
    VCIPHER, VCIPHERLAST, VNCIPHER, VNCIPHERLAST, VPMSUMB, VPMSUMD, VPMSUMH, VPMSUMW, VSBOX)>;

  // Instructions of FPU pipeline

  def : InstRW<[P8_FP_26C, P8_ISSUE_VSX], (instrs (instregex "^FDIVS(_rec)?$"), XSDIVSP)>;
  def : InstRW<[P8_FP_28C, P8_ISSUE_VSX], (instrs XVDIVSP)>;
  def : InstRW<[P8_FP_31C, P8_ISSUE_VSX], (instregex "^FSQRTS(_rec)?$")>;
  def : InstRW<[P8_FP_Scal_32C, P8_ISSUE_VSX], (instrs FDIV, FDIV_rec, XSDIVDP)>;
  def : InstRW<[P8_FP_2x64_32C, P8_ISSUE_VSX], (instrs XVDIVDP)>;
  def : InstRW<[P8_FP_4x32_32C, P8_ISSUE_VSX], (instrs XVSQRTSP)>;
  def : InstRW<[P8_FP_Scal_43C, P8_ISSUE_VSX], (instrs FSQRT, FSQRT_rec, XSSQRTDP)>;
  def : InstRW<[P8_FP_2x64_43C, P8_ISSUE_VSX], (instrs XVSQRTDP)>;

  def : InstRW<[P8_FP_3C, P8_ISSUE_VSX], (instrs
    MTFSFI_rec, MTFSF_rec, MTFSFI, MTFSFIb, MTFSF, MTFSFb, MTFSB0, MTFSB1)>;

  def : InstRW<[P8_FP_Scal_6C, P8_ISSUE_VSX], (instrs
    (instregex "^F(N)?M(ADD|SUB)(S)?(_rec)?$"),
    (instregex "^XS(N)?M(ADD|SUB)(A|M)(D|S)P$"),
    (instregex "^FC(F|T)I(D|W)(U)?(S|Z)?(_rec)?$"),
    (instregex "^(F|XS)(ABS|CPSGN|ADD|MUL|NABS|RE|NEG|SUB|SEL|RSQRTE)(D|S)?(P)?(s)?(_rec)?$"),
    (instregex "^FRI(M|N|P|Z)(D|S)(_rec)?$"),
    (instregex "^XSCVDP(S|U)X(W|D)S(s)?$"),
    (instregex "^XSCV(S|U)XD(D|S)P$"),
    (instregex "^XSCV(D|S)P(S|D)P(N)?$"),
    (instregex "^XSRDPI(C|M|P|Z)?$"),
    FMR, FRSP, FMR_rec, FRSP_rec, XSRSP)>;

  def : InstRW<[P8_FP_4x32_6C, P8_ISSUE_VSX], (instrs
    (instregex "^XV(N)?M(ADD|SUB)(A|M)SP$"),
    (instregex "^VRFI(M|N|P|Z)$"),
    XVRSQRTESP, XVSUBSP, VADDFP, VEXPTEFP, VLOGEFP, VMADDFP, VNMSUBFP, VREFP,
    VRSQRTEFP, VSUBFP, XVCVSXWSP, XVCVUXWSP, XVMULSP, XVNABSSP, XVNEGSP, XVRESP,
    XVCVDPSP, XVCVSXDSP, XVCVUXDSP, XVABSSP, XVADDSP, XVCPSGNSP)>;

  def : InstRW<[P8_FP_2x64_6C, P8_ISSUE_VSX], (instrs
    (instregex "^XVR(D|S)PI(C|M|P|Z)?$"),
    (instregex "^XVCV(S|U)X(D|W)DP$"),
    (instregex "^XVCV(D|W|S)P(S|U)X(D|W)S$"),
    (instregex "^XV(N)?(M)?(RSQRTE|CPSGN|SUB|ADD|ABS|UL|NEG|RE)(A|M)?DP$"),
    XVCVSPDP)>;

  // Instructions of FX, LU or LS pipeline

  def : InstRW<[P8_FX_NONE, P8_ISSUE_FX], (instrs TDI, TWI, TD, TW, MTCRF, MTCRF8, MTOCRF, MTOCRF8)>;
  def : InstRW<[P8_FX_1C, P8_ISSUE_FX], (instregex "^RLWIMI(8)?$")>;
  // TODO: Pipeline of logical instructions might be LS or FX
  def : InstRW<[P8_FX_2C, P8_ISSUE_FX], (instrs
    (instregex "^(N|X)?(EQV|AND|OR)(I)?(S|C)?(8)?(_rec)?$"),
    (instregex "^EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
    (instregex "^RL(D|W)(I)?(NM|C)(L|R)?(8)?(_32)?(_64)?(_rec)?$"),
    (instregex "^S(L|R)(A)?(W|D)(I)?(8)?(_rec|_32)?$"),
    (instregex "^(ADD|SUBF)(M|Z)?(C|E)?(4|8)?O(_rec)?$"),
    (instregex "^(ADD|SUBF)(M|Z)?E(8)?_rec$"),
    (instregex "^(ADD|SUBF|NEG)(4|8)?_rec$"),
    NOP, ADDG6S, ADDG6S8, ADDZE, ADDZE8, ADDIC_rec, NEGO_rec, ADDC, ADDC8, SUBFC, SUBFC8,
    ADDC_rec, ADDC8_rec, SUBFC_rec, SUBFC8_rec, COPY, NEG8O_rec,
    RLDIMI, RLDIMI_rec, RLWIMI8_rec, RLWIMI_rec)>;

  def : InstRW<[P8_FX_3C], (instregex "^(POP)?CNT(LZ)?(B|W|D)(8)?(_rec)?$")>;
  def : InstRW<[P8_FX_5C, P8_ISSUE_FX], (instrs
    (instregex "^MUL(H|L)(I|W|D)(8)?(U|O)?(_rec)?$"),
    CMPDI,CMPWI,CMPD,CMPW,CMPLDI,CMPLWI,CMPLD,CMPLW,
    ISEL, ISEL8, MTLR, MTLR8, MTCTR, MTCTR8, MTCTR8loop, MTCTRloop)>;

  def : InstRW<[P8_FX_10C, P8_ISSUE_VSX], (instregex "^MFTB(8)?$")>;
  def : InstRW<[P8_FX_15C, P8_ISSUE_FX], (instregex "^DIVW(U)?$")>;

  def : InstRW<[P8_FX_23C, P8_ISSUE_FX], (instregex "^DIV(D|WE)(U)?$")>;
  def : InstRW<[P8_FX_41C], (instrs
    (instregex "^DIV(D|W)(E)?(U)?O(_rec)?$"),
    (instregex "^DIV(D|W)(E)?(U)?_rec$"),
    DIVDE, DIVDEU)>;

  def : InstRW<[P8_LS_3C, P8_ISSUE_FX], (instrs MFSR, MFSRIN)>;

  def : InstRW<[P8_LU_5C, P8_ISSUE_LD], (instrs
    LFS, LFSX, LFD, LFDX, LFDXTLS, LFDXTLS_, LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX,
    LVX, LVXL, LXSDX, LFIWAX, LFIWZX, LFSXTLS, LFSXTLS_, LXVB16X, LXVD2X, LXSIWZX,
    DFLOADf64, XFLOADf64, LIWZX)>;

  def : InstRW<[P8_LS_FX_3C, P8_ISSUE_FXLD], (instrs LQ)>;
  def : InstRW<[P8_LU_FX_5C, P8_ISSUE_LD], (instregex "^LF(D|S)U(X)?$")>;

  def : InstRW<[P8_LS_FP_NONE, P8_ISSUE_ST], (instrs
    STXSDX, STXVD2X, STXVW4X, STFIWX, STFS, STFSX, STFD, STFDX,
    STFDEPX, STFDXTLS, STFDXTLS_, STFSXTLS, STFSXTLS_, STXSIWX, STXSSP, STXSSPX)>;

  def : InstRW<[P8_LS_FP_FX_2C, P8_ISSUE_ST], (instrs STVEBX, STVEHX, STVEWX, STVX, STVXL)>;
  def : InstRW<[P8_LS_FP_FX_3C, P8_ISSUE_ST], (instregex "^STF(D|S)U(X)?$")>;

  def : InstRW<[P8_LS_LU_NONE, P8_ISSUE_ST], (instrs
    (instregex "^ST(B|H|W|D)(U)?(X)?(8|TLS)?(_)?(32)?$"),
    STBCIX, STBCX, STBEPX, STDBRX, STDCIX, STDCX, STHBRX, STHCIX, STHCX, STHEPX,
    STMW, STSWI, STWBRX, STWCIX, STWCX, STWEPX)>;

  def : InstRW<[P8_LU_or_LS_FX_3C, P8_ISSUE_FXLD],
    (instregex "^L(B|H|W|D)(A|Z)?(U)?(X)?(8|TLS)?(_)?(32)?$")>;

  def : InstRW<[P8_LU_or_LS_3C, P8_ISSUE_FXLD], (instrs
    LBARX, LBARXL, LBEPX, LBZCIX, LDARX, LDARXL, LDBRX, LDCIX, LFDEPX, LHARX, LHARXL, LHBRX, LXSIWAX,
    LHBRX8, LHEPX, LHZCIX, LMW, LSWI, LVSL, LVSR, LWARX, LWARXL, LWBRX, LWBRX8, LWEPX, LWZCIX)>;

  def : InstRW<[P8_LU_or_LS_or_FX_2C, P8_ISSUE_FX], (instrs
    (instregex "^ADDI(C)?(dtprel|tlsgd|toc)?(L)?(ADDR)?(32|8)?$"),
    (instregex "^ADDIS(dtprel|tlsgd|toc|gotTprel)?(HA)?(32|8)?$"),
    (instregex "^LI(S)?(8)?$"),
    (instregex "^ADD(M)?(E)?(4|8)?(TLS)?(_)?$"),
    (instregex "^SUBF(M|Z)?(E)?(IC)?(4|8)?$"),
    (instregex "^NEG(8)?(O)?$"))>;

  // Instructions of PM pipeline

  def : InstRW<[P8_PM_2C, P8_ISSUE_VSX], (instrs
    (instregex "^VPK(S|U)(H|W|D)(S|U)(M|S)$"),
    (instregex "^VUPK(H|L)(P|S)(H|B|W|X)$"),
    (instregex "^VSPLT(IS)?(B|H|W)(s)?$"),
    (instregex "^(XX|V)MRG(E|O|H|L)(B|H|W)$"),
    XXPERMDI, XXPERMDIs, XXSEL, XXSLDWI, XXSLDWIs, XXSPLTW, XXSPLTWs, VPERMXOR,
    VPKPX, VPERM, VBPERMQ, VGBBD, VSEL, VSL, VSLDOI, VSLO, VSR, VSRO)>;

  def : InstRW<[P8_XS_2C, P8_ISSUE_VSX], (instrs
    (instregex "^V(ADD|SUB)(S|U)(B|H|W|D)(M|S)$"),
    (instregex "^X(S|V)(MAX|MIN)(D|S)P$"),
    (instregex "^V(S)?(R)?(L)?(A)?(B|D|H|W)$"),
    (instregex "^VAVG(S|U)(B|H|W)$"),
    (instregex "^VM(AX|IN)(S|U)(B|H|W|D)$"),
    (instregex "^(XX|V)(L)?(N)?(X)?(AND|OR|EQV)(C)?$"),
    (instregex "^(X)?VCMP(EQ|GT|GE|B)(F|S|U)?(B|H|W|D|P|S)(P)?(_rec)?$"),
    (instregex "^VCLZ(B|H|W|D)$"),
    (instregex "^VPOPCNT(B|H|W)$"),
    XXLORf, XXLXORdpz, XXLXORspz, XXLXORz, VEQV, VMAXFP, VMINFP,
    VSHASIGMAD, VSHASIGMAW, VSUBCUW, VADDCUW, MFVSCR, MTVSCR)>;

  def : InstRW<[P8_XS_4C, P8_ISSUE_VSX], (instrs
    (instregex "^V(ADD|SUB)(E)?(C)?UQ(M)?$"),
    VPOPCNTD)>;

  def : InstRW<[P8_XS_9C, P8_ISSUE_CR], (instrs
    (instregex "^(F|XS)CMP(O|U)(D|S)(P)?$"),
    (instregex "^(F|XS|XV)T(DIV|SQRT)((D|S)P)?$"))>;

  // Instructions of VX pipeline

  def : InstRW<[P8_VX_7C, P8_ISSUE_VSX], (instrs
    (instregex "^V(M)?SUM(2|4)?(M|S|U)(B|H|W)(M|S)$"),
    (instregex "^VMUL(E|O)?(S|U)(B|H|W)(M)?$"),
    VMHADDSHS, VMHRADDSHS, VMLADDUHM)>;

  // Instructions of BR pipeline

  def : InstRW<[P8_BR_2C, P8_ISSUE_BR], (instrs
    (instregex "^(g)?B(C)?(C)?(CTR)?(L)?(A)?(R)?(L)?(8)?(_LD|_LWZ)?(always|into_toc|at)?(_RM)?(n)?$"),
    (instregex "^BD(N)?Z(L)?(R|A)?(L)?(m|p|8)?$"),
    (instregex "^BL(R|A)?(8)?(_NOP)?(_TLS)?(_)?(RM)?$"))>;

  // Instructions of DFP pipeline
  // DFP operations also use float/vector/crypto issue ports.
  def : InstRW<[P8_DFU_13C, P8_ISSUE_VSX], (instrs
    (instregex "^DTST(D|S)(C|F|G)(Q)?$"),
    (instregex "^D(Q|X)EX(Q)?(_rec)?$"),
    (instregex "^D(ADD|SUB|IEX|QUA|RRND|RINTX|RINTN|CTDP|DEDPD|ENBCD)(_rec)?$"),
    (instregex "^DSC(L|R)I(_rec)?$"),
    BCDADD_rec, BCDSUB_rec, DCMPO, DCMPU, DTSTEX, DQUAI)>;

  def : InstRW<[P8_DFU_15C, P8_ISSUE_VSX], (instrs
    (instregex "^DRINT(N|X)Q(_rec)?$"),
    DCMPOQ, DCMPUQ, DRRNDQ, DRRNDQ_rec, DIEXQ, DIEXQ_rec, DQUAIQ, DQUAIQ_rec,
    DTSTEXQ, DDEDPDQ, DDEDPDQ_rec, DENBCDQ, DENBCDQ_rec, DSCLIQ, DSCLIQ_rec,
    DSCRIQ, DSCRIQ_rec, DCTQPQ, DCTQPQ_rec)>;

  def : InstRW<[P8_DFU_17C, P8_ISSUE_VSX], (instregex "^D(ADD|SUB|QUA)Q(_rec)?$")>;
  def : InstRW<[P8_DFU_25C, P8_ISSUE_VSX], (instrs DRSP, DRSP_rec, DCTFIX, DCTFIX_rec)>;
  def : InstRW<[P8_DFU_32C, P8_ISSUE_VSX], (instrs DCFFIX, DCFFIX_rec)>;
  def : InstRW<[P8_DFU_34C, P8_ISSUE_VSX], (instrs DCFFIXQ, DCFFIXQ_rec)>;
  def : InstRW<[P8_DFU_40C, P8_ISSUE_VSX], (instrs DMUL, DMUL_rec)>;
  def : InstRW<[P8_DFU_90C, P8_ISSUE_VSX], (instrs DMULQ, DMULQ_rec)>;
  def : InstRW<[P8_DFU_96C, P8_ISSUE_VSX], (instrs DDIV, DDIV_rec)>;
  def : InstRW<[P8_DFU_172C, P8_ISSUE_VSX], (instrs DDIVQ, DDIVQ_rec)>;

  // Direct move instructions

   def : InstRW<[P8_DM_5C, P8_ISSUE_VSX], (instrs
     MFVRD, MFVSRD, MFVRWZ, MFVSRWZ, MTVRD, MTVSRD, MTVRWA, MTVSRWA, MTVRWZ, MTVSRWZ)>;
}