1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
|
//===-- runtime/copy.cpp -------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "copy.h"
#include "stack.h"
#include "terminator.h"
#include "type-info.h"
#include "flang/Runtime/allocatable.h"
#include "flang/Runtime/descriptor.h"
#include <cstring>
namespace Fortran::runtime {
namespace {
using StaticDescTy = StaticDescriptor<maxRank, true, 0>;
// A structure describing the data copy that needs to be done
// from one descriptor to another. It is a helper structure
// for CopyElement.
struct CopyDescriptor {
// A constructor specifying all members explicitly.
// The toAt and fromAt specify subscript storages that might be
// external to CopyElement, and cannot be modified.
// The copy descriptor only establishes toAtPtr_ and fromAtPtr_
// pointers to point to these storages.
RT_API_ATTRS CopyDescriptor(const Descriptor &to, const SubscriptValue toAt[],
const Descriptor &from, const SubscriptValue fromAt[],
std::size_t elements, bool usesStaticDescriptors = false)
: to_(to), from_(from), elements_(elements),
usesStaticDescriptors_(usesStaticDescriptors) {
toAtPtr_ = toAt;
fromAtPtr_ = fromAt;
}
// The number of elements to copy is initialized from the to descriptor.
// The current element subscripts are initialized from the lower bounds
// of the to and from descriptors.
RT_API_ATTRS CopyDescriptor(const Descriptor &to, const Descriptor &from,
bool usesStaticDescriptors = false)
: to_(to), from_(from), elements_(to.Elements()),
usesStaticDescriptors_(usesStaticDescriptors) {
to.GetLowerBounds(toAt_);
from.GetLowerBounds(fromAt_);
}
// Increment the toAt_ and fromAt_ subscripts to the next
// element.
RT_API_ATTRS void IncrementSubscripts(Terminator &terminator) {
// This method must not be called for copy descriptors
// using external non-modifiable subscript storage.
RUNTIME_CHECK(terminator, toAt_ == toAtPtr_ && fromAt_ == fromAtPtr_);
to_.IncrementSubscripts(toAt_);
from_.IncrementSubscripts(fromAt_);
}
// Descriptor of the destination.
const Descriptor &to_;
// A subscript specifying the current element position to copy to.
SubscriptValue toAt_[maxRank];
// A pointer to the storage of the 'to' subscript.
// It may point to toAt_ or to an external non-modifiable
// subscript storage.
const SubscriptValue *toAtPtr_{toAt_};
// Descriptor of the source.
const Descriptor &from_;
// A subscript specifying the current element position to copy from.
SubscriptValue fromAt_[maxRank];
// A pointer to the storage of the 'from' subscript.
// It may point to fromAt_ or to an external non-modifiable
// subscript storage.
const SubscriptValue *fromAtPtr_{fromAt_};
// Number of elements left to copy.
std::size_t elements_;
// Must be true, if the to and from descriptors are allocated
// by the CopyElement runtime. The allocated memory belongs
// to a separate stack that needs to be popped in correspondence
// with popping such a CopyDescriptor node.
bool usesStaticDescriptors_;
};
// A pair of StaticDescTy elements.
struct StaticDescriptorsPair {
StaticDescTy to;
StaticDescTy from;
};
} // namespace
RT_OFFLOAD_API_GROUP_BEGIN
RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[],
const Descriptor &from, const SubscriptValue fromAt[],
Terminator &terminator) {
if (!to.Addendum()) {
// Avoid the overhead of creating the work stacks below
// for the simple non-derived type cases, because the overhead
// might be noticeable over the total amount of work that
// needs to be done for the copy.
char *toPtr{to.Element<char>(toAt)};
char *fromPtr{from.Element<char>(fromAt)};
RUNTIME_CHECK(terminator, to.ElementBytes() == from.ElementBytes());
std::memcpy(toPtr, fromPtr, to.ElementBytes());
return;
}
#if !defined(RT_DEVICE_COMPILATION)
constexpr unsigned copyStackReserve{16};
constexpr unsigned descriptorStackReserve{6};
#else
// Always use dynamic allocation on the device to avoid
// big stack sizes. This may be tuned as needed.
constexpr unsigned copyStackReserve{0};
constexpr unsigned descriptorStackReserve{0};
#endif
// Keep a stack of CopyDescriptor's to avoid recursive calls.
Stack<CopyDescriptor, copyStackReserve> copyStack{terminator};
// Keep a separate stack of StaticDescTy pairs. These descriptors
// may be used for representing copies of Component::Genre::Data
// components (since they do not have their descriptors allocated
// in memory).
Stack<StaticDescriptorsPair, descriptorStackReserve> descriptorsStack{
terminator};
copyStack.emplace(to, toAt, from, fromAt, /*elements=*/std::size_t{1});
while (!copyStack.empty()) {
CopyDescriptor ¤tCopy{copyStack.top()};
std::size_t &elements{currentCopy.elements_};
if (elements == 0) {
// This copy has been exhausted.
if (currentCopy.usesStaticDescriptors_) {
// Pop the static descriptors, if they were used
// for the current copy.
descriptorsStack.pop();
}
copyStack.pop();
continue;
}
const Descriptor &curTo{currentCopy.to_};
const SubscriptValue *curToAt{currentCopy.toAtPtr_};
const Descriptor &curFrom{currentCopy.from_};
const SubscriptValue *curFromAt{currentCopy.fromAtPtr_};
char *toPtr{curTo.Element<char>(curToAt)};
char *fromPtr{curFrom.Element<char>(curFromAt)};
RUNTIME_CHECK(terminator, curTo.ElementBytes() == curFrom.ElementBytes());
// TODO: the memcpy can be optimized when both to and from are contiguous.
// Moreover, if we came here from an Component::Genre::Data component,
// all the per-element copies are redundant, because the parent
// has already been copied as a whole.
std::memcpy(toPtr, fromPtr, curTo.ElementBytes());
--elements;
if (elements != 0) {
currentCopy.IncrementSubscripts(terminator);
}
// Deep copy allocatable and automatic components if any.
if (const auto *addendum{curTo.Addendum()}) {
if (const auto *derived{addendum->derivedType()};
derived && !derived->noDestructionNeeded()) {
RUNTIME_CHECK(terminator,
curFrom.Addendum() && derived == curFrom.Addendum()->derivedType());
const Descriptor &componentDesc{derived->component()};
const typeInfo::Component *component{
componentDesc.OffsetElement<typeInfo::Component>()};
std::size_t nComponents{componentDesc.Elements()};
for (std::size_t j{0}; j < nComponents; ++j, ++component) {
if (component->genre() == typeInfo::Component::Genre::Allocatable ||
component->genre() == typeInfo::Component::Genre::Automatic) {
Descriptor &toDesc{
*reinterpret_cast<Descriptor *>(toPtr + component->offset())};
if (toDesc.raw().base_addr != nullptr) {
toDesc.set_base_addr(nullptr);
RUNTIME_CHECK(terminator, toDesc.Allocate() == CFI_SUCCESS);
const Descriptor &fromDesc{*reinterpret_cast<const Descriptor *>(
fromPtr + component->offset())};
copyStack.emplace(toDesc, fromDesc);
}
} else if (component->genre() == typeInfo::Component::Genre::Data &&
component->derivedType() &&
!component->derivedType()->noDestructionNeeded()) {
SubscriptValue extents[maxRank];
const typeInfo::Value *bounds{component->bounds()};
std::size_t elements{1};
for (int dim{0}; dim < component->rank(); ++dim) {
typeInfo::TypeParameterValue lb{
bounds[2 * dim].GetValue(&curTo).value_or(0)};
typeInfo::TypeParameterValue ub{
bounds[2 * dim + 1].GetValue(&curTo).value_or(0)};
extents[dim] = ub >= lb ? ub - lb + 1 : 0;
elements *= extents[dim];
}
if (elements != 0) {
const typeInfo::DerivedType &compType{*component->derivedType()};
// Place a pair of static descriptors onto the descriptors stack.
descriptorsStack.emplace();
StaticDescriptorsPair &descs{descriptorsStack.top()};
Descriptor &toCompDesc{descs.to.descriptor()};
toCompDesc.Establish(compType, toPtr + component->offset(),
component->rank(), extents);
Descriptor &fromCompDesc{descs.from.descriptor()};
fromCompDesc.Establish(compType, fromPtr + component->offset(),
component->rank(), extents);
copyStack.emplace(toCompDesc, fromCompDesc,
/*usesStaticDescriptors=*/true);
}
}
}
}
}
}
}
RT_OFFLOAD_API_GROUP_END
} // namespace Fortran::runtime
|