File: CopyBytes.h

package info (click to toggle)
pytorch 1.13.1%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 139,252 kB
  • sloc: cpp: 1,100,274; python: 706,454; ansic: 83,052; asm: 7,618; java: 3,273; sh: 2,841; javascript: 612; makefile: 323; xml: 269; ruby: 185; yacc: 144; objc: 68; lex: 44
file content (44 lines) | stat: -rw-r--r-- 1,229 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#pragma once

#include <c10/core/Device.h>

namespace c10 {

using CopyBytesFunction = void (*)(
    size_t nbytes,
    const void* src,
    Device src_device,
    void* dst,
    Device dst_device);

struct C10_API _CopyBytesFunctionRegisterer {
  _CopyBytesFunctionRegisterer(
      DeviceType from,
      DeviceType to,
      CopyBytesFunction func_sync,
      CopyBytesFunction func_async = nullptr);
};

#define REGISTER_COPY_BYTES_FUNCTION(from, to, ...)           \
  namespace {                                                 \
  static _CopyBytesFunctionRegisterer C10_ANONYMOUS_VARIABLE( \
      g_copy_function)(from, to, __VA_ARGS__);                \
  }

/*
 * WARNING: Implementations for this function are currently registered from
 * ATen and caffe2, not yet from c10. Don't use this if not either ATen
 * or caffe2 is present as well.
 * We can't move them yet, because the CUDA implementations aren't unified yet
 * between ATen and caffe2.
 * We're planning to move the implementations into c10/backend/xxx
 * to make c10 self contained again.
 */
C10_API void CopyBytes(
    size_t nbytes,
    const void* src,
    Device src_device,
    void* dst,
    Device dst_device,
    bool async);
} // namespace c10