1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
|
#pragma once
#include <c10/macros/Export.h>
#include <atomic>
namespace c10::impl {
// This TLS controls whether or not we permanently associate PyObject
// with Tensor the first time it is allocated. When hermetic PyObject
// TLS is enabled (state is true), we DO NOT save PyObjects to Tensor,
// meaning you get a distinct PyObject whenever you execute the code in
// question.
struct C10_API HermeticPyObjectTLS {
static void set_state(bool state);
static bool get_state() {
// Hypothetical fastpath if torchdeploy/multipy isn't used. Per
// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf
// this qualifies relaxed access because it is a single-location data
// structure (only the boolean here).
//
// Forgetting about data races for a moment, is there a logical race?
//
// - Boolean only ever transitions from false to true. So the
// critical situation is when one interpreter is already running
// when a second interpreter switches haveState from false to true.
//
// - The first interpreter is indifferent whether or not it sees
// hasState true/false; obviously false works (this is what the
// interpreter was previously using; more directly, the interpreter
// calls into itself as the handler, so being hermetic is not
// required), and true simply means serviced python operator calls will
// be hermetic; in these cases it is expected to be functionally
// equivalent.
//
// - The second interpreter MUST see hasState true (as its requests will
// be forwarded to the first interpreter), but it is assumed that there
// is a synchronization between the interpreter initialization, and
// when we actually perform operations, so it is guaranteed to see
// hasState true.
//
// QED.
//
// This fastpath is currently disabled so that we can more easily test that
// hermetic mode works correctly even on stock build of PyTorch.
if (false && !haveState_.load(std::memory_order_relaxed))
return false;
return get_tls_state();
}
// Call this from the multipy/torchdeploy top level
static void init_state();
private:
// This only flipped once from false to true during torchdeploy/multipy
// initialization, and never again.
static std::atomic<bool> haveState_;
static bool get_tls_state();
};
} // namespace c10::impl
|