1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
|
// pthreadpool header from https://github.com/Maratyszcza/pthreadpool
// for NNPACK
#ifndef CAFFE2_UTILS_PTHREADPOOL_H_
#define CAFFE2_UTILS_PTHREADPOOL_H_
#include "ThreadPoolCommon.h"
#include <stddef.h> // for size_t
#include <stdint.h> // for uint32_t
#if defined(USE_PTHREADPOOL)
// This is a hack.
// Mainly introduced here because
// 1. NNPACK can be compiled to use internal legacy threadpool implementation because much of C2 depends on that.
// 2. Then if we want to use NNPACK in PyTorch, which uses new pthreadpool, then we will supply new pthreadpool pointer
// to NNPACK. This will not work if NNPACK is compiled with internal legacy threadpool. Thus this guard
// along with changes in pthreadpool_impl.cc allows us to override that behavior.
// It enables us to use NNPACK from pytorch using `caffe2::pthreadpool_()`
namespace caffe2 {
class WithCastToNewThreadPool {
public:
explicit WithCastToNewThreadPool(bool use_new_threadpool);
~WithCastToNewThreadPool();
private:
bool use_new_threadpool_;
};
}
#endif
typedef struct pthreadpool* legacy_pthreadpool_t;
typedef void (*legacy_pthreadpool_function_1d_t)(void*, size_t);
typedef void (*legacy_pthreadpool_function_1d_tiled_t)(void*, size_t, size_t);
typedef void (*legacy_pthreadpool_function_2d_t)(void*, size_t, size_t);
typedef void (*legacy_pthreadpool_function_2d_tiled_t)(void*, size_t, size_t, size_t, size_t);
typedef void (*legacy_pthreadpool_function_3d_tiled_t)(
void*,
size_t,
size_t,
size_t,
size_t,
size_t,
size_t);
typedef void (*legacy_pthreadpool_function_4d_tiled_t)(
void*,
size_t,
size_t,
size_t,
size_t,
size_t,
size_t,
size_t,
size_t);
#ifdef __cplusplus
extern "C" {
#endif
/**
* Creates a thread pool with the specified number of threads.
*
* @param[in] threads_count The number of threads in the thread pool.
* A value of 0 has special interpretation: it creates a thread for each
* processor core available in the system.
*
* @returns A pointer to an opaque thread pool object.
* On error the function returns NULL and sets errno accordingly.
*/
// Returns internal threadpool impl.
legacy_pthreadpool_t legacy_pthreadpool_create(size_t threads_count);
/**
* Queries the number of threads in a thread pool.
*
* @param[in] threadpool The thread pool to query.
*
* @returns The number of threads in the thread pool.
*/
size_t legacy_pthreadpool_get_threads_count(legacy_pthreadpool_t threadpool);
/**
* Processes items in parallel using threads from a thread pool.
*
* When the call returns, all items have been processed and the thread pool is
* ready for a new task.
*
* @note If multiple threads call this function with the same thread pool, the
* calls are serialized.
*
* @param[in] threadpool The thread pool to use for parallelisation.
* @param[in] function The function to call for each item.
* @param[in] argument The first argument passed to the @a function.
* @param[in] items The number of items to process. The @a function
* will be called once for each item.
*/
void legacy_pthreadpool_compute_1d(
legacy_pthreadpool_t threadpool,
legacy_pthreadpool_function_1d_t function,
void* argument,
size_t range);
void legacy_pthreadpool_parallelize_1d(
legacy_pthreadpool_t threadpool,
legacy_pthreadpool_function_1d_t function,
void* argument,
size_t range,
uint32_t flags);
void legacy_pthreadpool_compute_1d_tiled(
legacy_pthreadpool_t threadpool,
legacy_pthreadpool_function_1d_tiled_t function,
void* argument,
size_t range,
size_t tile);
void legacy_pthreadpool_compute_2d(
legacy_pthreadpool_t threadpool,
legacy_pthreadpool_function_2d_t function,
void* argument,
size_t range_i,
size_t range_j);
void legacy_pthreadpool_compute_2d_tiled(
legacy_pthreadpool_t threadpool,
legacy_pthreadpool_function_2d_tiled_t function,
void* argument,
size_t range_i,
size_t range_j,
size_t tile_i,
size_t tile_j);
void legacy_pthreadpool_compute_3d_tiled(
legacy_pthreadpool_t threadpool,
legacy_pthreadpool_function_3d_tiled_t function,
void* argument,
size_t range_i,
size_t range_j,
size_t range_k,
size_t tile_i,
size_t tile_j,
size_t tile_k);
void legacy_pthreadpool_compute_4d_tiled(
legacy_pthreadpool_t threadpool,
legacy_pthreadpool_function_4d_tiled_t function,
void* argument,
size_t range_i,
size_t range_j,
size_t range_k,
size_t range_l,
size_t tile_i,
size_t tile_j,
size_t tile_k,
size_t tile_l);
/**
* Terminates threads in the thread pool and releases associated resources.
*
* @warning Accessing the thread pool after a call to this function constitutes
* undefined behaviour and may cause data corruption.
*
* @param[in,out] threadpool The thread pool to destroy.
*/
void legacy_pthreadpool_destroy(legacy_pthreadpool_t threadpool);
#ifdef USE_INTERNAL_PTHREADPOOL_IMPL
#define pthreadpool_t legacy_pthreadpool_t
#define pthreadpool_function_1d_t legacy_pthreadpool_function_1d_t
#define pthreadpool_function_1d_tiled_t legacy_pthreadpool_function_1d_tiled_t
#define pthreadpool_function_2d_t legacy_pthreadpool_function_2d_t
#define pthreadpool_function_2d_tiled_t legacy_pthreadpool_function_2d_tiled_t
#define pthreadpool_function_3d_tiled_t legacy_pthreadpool_function_3d_tiled_t
#define pthreadpool_function_4d_tiled_t legacy_pthreadpool_function_4d_tiled_t
#define pthreadpool_create legacy_pthreadpool_create
#define pthreadpool_destroy legacy_pthreadpool_destroy
#define pthreadpool_get_threads_count legacy_pthreadpool_get_threads_count
#define pthreadpool_compute_1d legacy_pthreadpool_compute_1d
#define pthreadpool_parallelize_1d legacy_pthreadpool_parallelize_1d
#define pthreadpool_compute_1d_tiled legacy_pthreadpool_compute_1d_tiled
#define pthreadpool_compute_2d legacy_pthreadpool_compute_2d
#define pthreadpool_compute_2d_tiled legacy_pthreadpool_compute_2d_tiled
#define pthreadpool_compute_3d_tiled legacy_pthreadpool_compute_3d_tiled
#define pthreadpool_compute_4d_tiled legacy_pthreadpool_compute_4d_tiled
#endif /* USE_INTERNAL_PTHREADPOOL_IMPL */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif // CAFFE2_UTILS_PTHREADPOOL_H_
|