File: queue_ops.cc

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (74 lines) | stat: -rw-r--r-- 2,335 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#include <caffe2/ideep/ideep_utils.h>
#include <caffe2/queue/blobs_queue.h>

using namespace caffe2;

namespace {

class IDEEPCreateBlobsQueueOp final : public IDEEPOperator {
 public:
  USE_IDEEP_DEF_ALIASES();
  USE_IDEEP_OPERATOR_FUNCTIONS();

  IDEEPCreateBlobsQueueOp(const OperatorDef& operator_def, Workspace* ws)
      : IDEEPOperator(operator_def, ws),
        ws_(ws),
        name(operator_def.output().Get(0)) {}

  bool RunOnDevice() override {
    const auto capacity = GetSingleArgument("capacity", 1);
    const auto numBlobs = GetSingleArgument("num_blobs", 1);
    const auto enforceUniqueName =
        GetSingleArgument("enforce_unique_name", false);
    const auto fieldNames =
        OperatorBase::template GetRepeatedArgument<std::string>("field_names");
    CAFFE_ENFORCE_EQ(this->OutputSize(), 1);
    auto queuePtr = OperatorBase::Outputs()[0]
                        ->template GetMutable<std::shared_ptr<BlobsQueue>>();

    CAFFE_ENFORCE(queuePtr);
    *queuePtr = std::make_shared<BlobsQueue>(
        ws_, name, capacity, numBlobs, enforceUniqueName, fieldNames);
    return true;
  }

 private:
  Workspace* ws_{nullptr};
  const std::string name;
};

class IDEEPSafeEnqueueBlobsOp final : public IDEEPOperator {
 public:
  USE_IDEEP_DEF_ALIASES();
  USE_IDEEP_OPERATOR_FUNCTIONS();

  IDEEPSafeEnqueueBlobsOp(const OperatorDef& operator_def, Workspace* ws)
      : IDEEPOperator(operator_def, ws) {}

  bool RunOnDevice() override {
    auto queue =
        OperatorBase::Inputs()[0]->template Get<std::shared_ptr<BlobsQueue>>();
    CAFFE_ENFORCE(queue);
    auto size = queue->getNumBlobs();
    CAFFE_ENFORCE(
        // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
        OutputSize() == size + 1,
        "Expected " + caffe2::to_string(size + 1) + ", " +
            " got: " + caffe2::to_string(size));
    bool status = queue->blockingWrite(OperatorBase::Outputs());

    auto st = OperatorBase::Output<TensorCPU>(1, CPU);
    st->Resize();
    auto stat = st->template mutable_data<bool>();
    stat[0] = !status;
    return true;
  }
};

REGISTER_IDEEP_OPERATOR(CreateBlobsQueue, IDEEPCreateBlobsQueueOp);
SHOULD_NOT_DO_GRADIENT(IDEEPCreateBlobsQueueOp);

REGISTER_IDEEP_OPERATOR(SafeEnqueueBlobs, IDEEPSafeEnqueueBlobsOp);
SHOULD_NOT_DO_GRADIENT(IDEEPSafeEnqueueBlobsOp);

} // namespace