File: llvm-PR22923.patch

package info (click to toggle)
julia 1.0.3%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 49,452 kB
  • sloc: lisp: 236,453; ansic: 55,579; cpp: 25,603; makefile: 1,685; pascal: 1,130; sh: 956; asm: 86; xml: 76
file content (151 lines) | stat: -rw-r--r-- 6,949 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
From e060ffb4b20e294ecb8429bd8a925f9f12b63b17 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Mon, 29 Aug 2016 22:25:36 +0000
Subject: [PATCH] [PowerPC] Fix i8/i16 atomics for little-Endian targets
 without partword atomics

For little-Endian PowerPC, we generally target only P8 and later by default.
However, generic (older) 64-bit configurations are still an option, and in that
case, partword atomics are not available (e.g. stbcx.). To lower i8/i16 atomics
without true i8/i16 atomic operations, we emulate using i32 atomics in
combination with a bunch of shifting and masking, etc. The amount by which to
shift in little-Endian mode is different from the amount in big-Endian mode (it
is inverted -- meaning we can leave off the xor when computing the amount).

Fixes PR22923.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280022 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/PowerPC/PPCISelLowering.cpp | 18 ++++++++++++------
 test/CodeGen/PowerPC/atomic-2.ll       | 15 ++++++++++++++-
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index e89b6ca..f895b06 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8513,6 +8513,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
   // registers without caring whether they're 32 or 64, but here we're
   // doing actual arithmetic on the addresses.
   bool is64bit = Subtarget.isPPC64();
+  bool isLittleEndian = Subtarget.isLittleEndian();
   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -8542,7 +8543,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
                                           : &PPC::GPRCRegClass;
   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
-  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+  unsigned ShiftReg =
+    isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
@@ -8587,8 +8589,9 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
   }
   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
-  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
-      .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+  if (!isLittleEndian)
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
   if (is64bit)
     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
       .addReg(Ptr1Reg).addImm(0).addImm(61);
@@ -9293,6 +9296,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     // since we're actually doing arithmetic on them.  Other registers
     // can be 32-bit.
     bool is64bit = Subtarget.isPPC64();
+    bool isLittleEndian = Subtarget.isLittleEndian();
     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
 
     unsigned dest = MI.getOperand(0).getReg();
@@ -9319,7 +9323,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                             : &PPC::GPRCRegClass;
     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
-    unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+    unsigned ShiftReg =
+      isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
@@ -9374,8 +9379,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     }
     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
-    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
-        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+    if (!isLittleEndian)
+      BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+          .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
     if (is64bit)
       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
         .addReg(Ptr1Reg).addImm(0).addImm(61);
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 1857d5d..bafabdb 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=ppc64 | FileCheck %s
+; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
+; RUN: llc < %s -march=ppc64le | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
 ; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s
 ; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P8U
 
@@ -12,6 +13,8 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
 
 define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind {
 ; CHECK-LABEL: exchange_and_add8:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
 ; CHECK-P8U: lbarx
   %tmp = atomicrmw add i8* %mem, i8 %val monotonic
 ; CHECK-P8U: stbcx.
@@ -20,6 +23,8 @@ define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind {
 
 define i16 @exchange_and_add16(i16* %mem, i16 %val) nounwind {
 ; CHECK-LABEL: exchange_and_add16:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
 ; CHECK-P8U: lharx
   %tmp = atomicrmw add i16* %mem, i16 %val monotonic
 ; CHECK-P8U: sthcx.
@@ -38,6 +43,8 @@ define i64 @exchange_and_cmp(i64* %mem) nounwind {
 
 define i8 @exchange_and_cmp8(i8* %mem) nounwind {
 ; CHECK-LABEL: exchange_and_cmp8:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
 ; CHECK-P8U: lbarx
   %tmppair = cmpxchg i8* %mem, i8 0, i8 1 monotonic monotonic
   %tmp = extractvalue { i8, i1 } %tmppair, 0
@@ -48,6 +55,8 @@ define i8 @exchange_and_cmp8(i8* %mem) nounwind {
 
 define i16 @exchange_and_cmp16(i16* %mem) nounwind {
 ; CHECK-LABEL: exchange_and_cmp16:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
 ; CHECK-P8U: lharx
   %tmppair = cmpxchg i16* %mem, i16 0, i16 1 monotonic monotonic
   %tmp = extractvalue { i16, i1 } %tmppair, 0
@@ -66,6 +75,8 @@ define i64 @exchange(i64* %mem, i64 %val) nounwind {
 
 define i8 @exchange8(i8* %mem, i8 %val) nounwind {
 ; CHECK-LABEL: exchange8:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
 ; CHECK-P8U: lbarx
   %tmp = atomicrmw xchg i8* %mem, i8 1 monotonic
 ; CHECK-P8U: stbcx.
@@ -74,6 +85,8 @@ define i8 @exchange8(i8* %mem, i8 %val) nounwind {
 
 define i16 @exchange16(i16* %mem, i16 %val) nounwind {
 ; CHECK-LABEL: exchange16:
+; CHECK-BE: xori
+; CHECK-LE-NOT: xori
 ; CHECK-P8U: lharx
   %tmp = atomicrmw xchg i16* %mem, i16 1 monotonic
 ; CHECK-P8U: sthcx.