1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
|
/*******************************************************************************
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
/// @example getting_started.cpp
/// @copybrief getting_started_cpp
/// > Annotated version: @ref getting_started_cpp
#include <cmath>
#include <numeric>
#include <stdexcept>
#include <vector>
#include "oneapi/dnnl/dnnl.hpp"
#include "oneapi/dnnl/dnnl_debug.h"
#include "example_utils.hpp"
using namespace dnnl;
// [Prologue]
/// @page getting_started_cpp oneDNN API Basic Workflow Tutorial
///
/// This C++ API example demonstrates the basics of the oneDNN programming model.
///
/// > Example code: @ref getting_started.cpp
///
/// This C++ API example demonstrates the basics of the oneDNN programming model:
/// - How to create oneDNN memory objects.
/// - How to get data from the user's buffer into a oneDNN memory object.
/// - How a tensor's logical dimensions and memory object formats relate.
/// - How to create oneDNN primitives.
/// - How to execute the primitives.
///
/// The example uses the ReLU operation and comprises the following steps:
/// 1. Creating @ref getting_started_cpp_sub1 to execute a primitive.
/// 2. Performing @ref getting_started_cpp_sub2.
/// 3. @ref getting_started_cpp_sub3 (using different flavors).
/// 4. @ref getting_started_cpp_sub4.
/// 5. @ref getting_started_cpp_sub5.
/// 6. @ref getting_started_cpp_sub6 (checking that the resulting image does
/// not contain negative values).
///
/// These steps are implemented in the @ref getting_started_cpp_tutorial, which
/// in turn is called from @ref getting_started_cpp_main (which is also
/// responsible for error handling).
///
/// @section getting_started_cpp_headers Public headers
///
/// To start using oneDNN we must first include the @ref dnnl.hpp
/// header file in the program. We also include @ref dnnl_debug.h in
/// example_utils.hpp, which contains some debugging facilities like returning
/// a string representation for common oneDNN C types.
// [Prologue]
/// @page getting_started_cpp
/// @section getting_started_cpp_tutorial getting_started_tutorial() function
///
void getting_started_tutorial(engine::kind engine_kind) {
/// @page getting_started_cpp
/// @subsection getting_started_cpp_sub1 Engine and stream
///
/// All oneDNN primitives and memory objects are attached to a
/// particular @ref dnnl::engine, which is an abstraction of a
/// computational device (see also @ref dev_guide_basic_concepts). The
/// primitives are created and optimized for the device they are attached
/// to and the memory objects refer to memory residing on the
/// corresponding device. In particular, that means neither memory objects
/// nor primitives that were created for one engine can be used on
/// another.
///
/// To create an engine, we should specify the @ref dnnl::engine::kind
/// and the index of the device of the given kind.
///
/// @snippet getting_started.cpp Initialize engine
// [Initialize engine]
engine eng(engine_kind, 0);
// [Initialize engine]
/// In addition to an engine, all primitives require a @ref dnnl::stream
/// for the execution. The stream encapsulates an execution context and is
/// tied to a particular engine.
///
/// The creation is pretty straightforward:
/// @snippet getting_started.cpp Initialize stream
// [Initialize stream]
stream engine_stream(eng);
// [Initialize stream]
/// In the simple cases, when a program works with one device only (e.g.
/// only on CPU), an engine and a stream can be created once and used
/// throughout the program. Some frameworks create singleton objects that
/// hold oneDNN engine and stream and use them throughout the code.
/// @subsection getting_started_cpp_sub2 Data preparation (code outside of oneDNN)
///
/// Now that the preparation work is done, let's create some data to work
/// with. We will create a 4D tensor in NHWC format, which is quite
/// popular in many frameworks.
///
/// Note that even though we work with one image only, the image tensor
/// is still 4D. The extra dimension (here N) corresponds to the
/// batch, and, in case of a single image, is equal to 1. It is pretty
/// typical to have the batch dimension even when working with a single
/// image.
///
/// In oneDNN, all CNN primitives assume that tensors have the batch
/// dimension, which is always the first logical dimension (see also @ref
/// dev_guide_conventions).
///
/// @snippet getting_started.cpp Create user's data
// [Create user's data]
const int N = 1, H = 13, W = 13, C = 3;
// Compute physical strides for each dimension
const int stride_N = H * W * C;
const int stride_H = W * C;
const int stride_W = C;
const int stride_C = 1;
// An auxiliary function that maps logical index to the physical offset
auto offset = [=](int n, int h, int w, int c) {
return n * stride_N + h * stride_H + w * stride_W + c * stride_C;
};
// The image size
const int image_size = N * H * W * C;
// Allocate a buffer for the image
std::vector<float> image(image_size);
// Initialize the image with some values
for (int n = 0; n < N; ++n)
for (int h = 0; h < H; ++h)
for (int w = 0; w < W; ++w)
for (int c = 0; c < C; ++c) {
int off = offset(
n, h, w, c); // Get the physical offset of a pixel
image[off] = -std::cos(off / 10.f);
}
// [Create user's data]
/// @subsection getting_started_cpp_sub3 Wrapping data into a oneDNN memory object
///
/// Now, having the image ready, let's wrap it in a @ref dnnl::memory
/// object to be able to pass the data to oneDNN primitives.
///
/// Creating @ref dnnl::memory comprises two steps:
/// 1. Initializing the @ref dnnl::memory::desc struct (also referred to
/// as a memory descriptor), which only describes the tensor data and
/// doesn't contain the data itself. Memory descriptors are used to
/// create @ref dnnl::memory objects and to initialize primitive
/// descriptors (shown later in the example).
/// 2. Creating the @ref dnnl::memory object itself (also referred to as
/// a memory object), based on the memory descriptor initialized in
/// step 1, an engine, and, optionally, a handle to data. The
/// memory object is used when a primitive is executed.
///
/// Thanks to the
/// [list initialization](https://en.cppreference.com/w/cpp/language/list_initialization)
/// introduced in C++11, it is possible to combine these two steps whenever
/// a memory descriptor is not used anywhere else but in creating a @ref
/// dnnl::memory object.
///
/// However, for the sake of demonstration, we will show both steps
/// explicitly.
/// @subsubsection getting_started_cpp_sub31 Memory descriptor
///
/// To initialize the @ref dnnl::memory::desc, we need to pass:
/// 1. The tensor's dimensions, **the semantic order** of which is
/// defined by **the primitive** that will use this memory
/// (descriptor).
///
/// @warning
/// Memory descriptors and objects are not aware of any meaning of
/// the data they describe or contain.
/// 2. The data type for the tensor (@ref dnnl::memory::data_type).
/// 3. The memory format tag (@ref dnnl::memory::format_tag) that
/// describes how the data is going to be laid out in the device's
/// memory. The memory format is required for the primitive to
/// correctly handle the data.
///
/// The code:
/// @snippet getting_started.cpp Init src_md
// [Init src_md]
auto src_md = memory::desc(
{N, C, H, W}, // logical dims, the order is defined by a primitive
memory::data_type::f32, // tensor's data type
memory::format_tag::nhwc // memory format, NHWC in this case
);
// [Init src_md]
/// The first thing to notice here is that we pass dimensions as `{N, C,
/// H, W}` while it might seem more natural to pass `{N, H, W, C}`, which
/// better corresponds to the user's code. This is because oneDNN
/// CNN primitives like ReLU always expect tensors in the following form:
///
/// | Spatial dim | Tensor dimensions
/// | :-- | :--
/// | 0D | \f$N \times C\f$
/// | 1D | \f$N \times C \times W\f$
/// | 2D | \f$N \times C \times H \times W\f$
/// | 3D | \f$N \times C \times D \times H \times W\f$
///
/// where:
/// - \f$N\f$ is a batch dimension (discussed above),
/// - \f$C\f$ is channel (aka feature maps) dimension, and
/// - \f$D\f$, \f$H\f$, and \f$W\f$ are spatial dimensions.
///
/// Now that the logical order of dimension is defined, we need to specify
/// the memory format (the third parameter), which describes how logical
/// indices map to the offset in memory. This is the place where the user's
/// format NHWC comes into play. oneDNN has different @ref
/// dnnl::memory::format_tag values that cover the most popular memory
/// formats like NCHW, NHWC, CHWN, and some others.
///
/// The memory descriptor for the image is called `src_md`. The `src` part
/// comes from the fact that the image will be a source for the ReLU
/// primitive (that is, we formulate memory names from the primitive
/// perspective; hence we will use `dst` to name the output memory). The
/// `md` is an initialism for Memory Descriptor.
/// @paragraph getting_started_cpp_sub311 Alternative way to create a memory descriptor
///
/// Before we continue with memory creation, let us show the alternative
/// way to create the same memory descriptor: instead of using the
/// @ref dnnl::memory::format_tag, we can directly specify the strides
/// of each tensor dimension:
/// @snippet getting_started.cpp Init alt_src_md
// [Init alt_src_md]
auto alt_src_md = memory::desc(
{N, C, H, W}, // logical dims, the order is defined by a primitive
memory::data_type::f32, // tensor's data type
{stride_N, stride_C, stride_H, stride_W} // the strides
);
// Sanity check: the memory descriptors should be the same
if (src_md != alt_src_md)
throw std::logic_error("Memory descriptor initialization mismatch.");
// [Init alt_src_md]
/// Just as before, the tensor's dimensions come in the `N, C, H, W` order
/// as required by CNN primitives. To define the physical memory format,
/// the strides are passed as the third parameter. Note that the order of
/// the strides corresponds to the order of the tensor's dimensions.
///
/// @warning
/// Using the wrong order might lead to incorrect results or even a
/// crash.
/// @subsubsection getting_started_cpp_sub32 Creating a memory object
///
/// Having a memory descriptor and an engine prepared, let's create
/// input and output memory objects for a ReLU primitive.
/// @snippet getting_started.cpp Create memory objects
// [Create memory objects]
// src_mem contains a copy of image after write_to_dnnl_memory function
auto src_mem = memory(src_md, eng);
write_to_dnnl_memory(image.data(), src_mem);
// For dst_mem the library allocates buffer
auto dst_mem = memory(src_md, eng);
// [Create memory objects]
/// We already have a memory buffer for the source memory object. We pass
/// it to the
/// @ref dnnl::memory::memory(const dnnl::memory::desc &, const dnnl::engine &, void *)
/// constructor that takes a buffer pointer as its last argument.
///
/// Let's use a constructor that instructs the library to allocate a
/// memory buffer for the `dst_mem` for educational purposes.
///
/// The key difference between these two are:
/// 1. The library will own the memory for `dst_mem` and will deallocate
/// it when `dst_mem` is destroyed. That means the memory buffer can
/// be used only while `dst_mem` is alive.
/// 2. Library-allocated buffers have good alignment, which typically
/// results in better performance.
///
/// @note
/// Memory allocated outside of the library and passed to oneDNN
/// should have good alignment for better performance.
///
/// In the subsequent section we will show how to get the buffer (pointer)
/// from the `dst_mem` memory object.
/// @subsection getting_started_cpp_sub4 Creating a ReLU primitive
///
/// Let's now create a ReLU primitive.
///
/// The library implements ReLU primitive as a particular algorithm of a
/// more general @ref dev_guide_eltwise primitive, which applies a specified
/// function to each and every element of the source tensor.
///
/// Just as in the case of @ref dnnl::memory, a user should always go
/// through (at least) two creation steps (which however, can be sometimes
/// combined thanks to C++11):
/// 1. Create an operation primitive descriptor (here @ref
/// dnnl::eltwise_forward::primitive_desc) that defines operation
/// parameters and is a **lightweight** descriptor of the actual
/// algorithm that **implements** the given operation.
/// The user can query different characteristics of the chosen
/// implementation such as memory consumptions and some others that will
/// be covered in the next topic (@ref memory_format_propagation_cpp).
/// 2. Create a primitive (here @ref dnnl::eltwise_forward) that can be
/// executed on memory objects to compute the operation.
///
/// oneDNN separates steps 2 and 3 to enable the user to inspect details of a
/// primitive implementation prior to creating the primitive. This may be
/// expensive, because, for example, oneDNN generates the optimized
/// computational code on the fly.
///
///@note
/// Primitive creation might be a very expensive operation, so consider
/// creating primitive objects once and executing them multiple times.
///
/// The code:
/// @snippet getting_started.cpp Create a ReLU primitive
// [Create a ReLU primitive]
// ReLU primitive descriptor, which corresponds to a particular
// implementation in the library
auto relu_pd = eltwise_forward::primitive_desc(
eng, // an engine the primitive will be created for
prop_kind::forward_inference, algorithm::eltwise_relu,
src_md, // source memory descriptor for an operation to work on
src_md, // destination memory descriptor for an operation to work on
0.f, // alpha parameter means negative slope in case of ReLU
0.f // beta parameter is ignored in case of ReLU
);
// ReLU primitive
auto relu = eltwise_forward(relu_pd); // !!! this can take quite some time
// [Create a ReLU primitive]
/// A note about variable names. Similar to the `_md` suffix used for
/// memory descriptor, we use `_d` for the operation descriptor names,
/// `_pd` for the primitive descriptors, and no suffix for primitives
/// themselves.
///
/// It is worth mentioning that we specified the exact tensor and its
/// memory format when we were initializing the `relu_d`. That means
/// `relu` primitive would perform computations with memory objects that
/// correspond to this description. This is the one and only one way of
/// creating non-compute-intensive primitives like @ref dev_guide_eltwise,
/// @ref dev_guide_batch_normalization, and others.
///
/// Compute-intensive primitives (like @ref dev_guide_convolution) have an
/// ability to define the appropriate memory format on their own. This is
/// one of the key features of the library and will be discussed in detail
/// in the next topic: @ref memory_format_propagation_cpp.
/// @subsection getting_started_cpp_sub5 Executing the ReLU primitive
///
/// Finally, let's execute the primitive and wait for its completion.
///
/// The input and output memory objects are passed to the `execute()`
/// method using a <tag, memory> map. Each tag specifies what kind of
/// tensor each memory object represents. All @ref dev_guide_eltwise
/// primitives require the map to have two elements: a source memory
/// object (input) and a destination memory (output).
///
/// A primitive is executed in a stream (the first parameter of the
/// `execute()` method). Depending on a stream kind, an execution might be
/// blocking or non-blocking. This means that we need to call @ref
/// dnnl::stream::wait before accessing the results.
///
/// @snippet getting_started.cpp Execute ReLU primitive
// [Execute ReLU primitive]
// Execute ReLU (out-of-place)
relu.execute(engine_stream, // The execution stream
{
// A map with all inputs and outputs
{DNNL_ARG_SRC, src_mem}, // Source tag and memory obj
{DNNL_ARG_DST, dst_mem}, // Destination tag and memory obj
});
// Wait the stream to complete the execution
engine_stream.wait();
// [Execute ReLU primitive]
/// The @ref dev_guide_eltwise is one of the primitives that support
/// in-place operations, meaning that the source and destination memory can
/// be the same. To perform in-place transformation, the user must pass the
/// same memory object for both the `DNNL_ARG_SRC` and
/// `DNNL_ARG_DST` tags:
/// @snippet getting_started.cpp Execute ReLU primitive in-place
// [Execute ReLU primitive in-place]
// Execute ReLU (in-place)
// relu.execute(engine_stream, {
// {DNNL_ARG_SRC, src_mem},
// {DNNL_ARG_DST, src_mem},
// });
// [Execute ReLU primitive in-place]
/// @page getting_started_cpp
/// @subsection getting_started_cpp_sub6 Obtaining the result and validation
///
/// Now that we have the computed result, let's validate that it is
/// actually correct. The result is stored in the `dst_mem` memory object.
/// So we need to obtain the C++ pointer to a buffer with data via @ref
/// dnnl::memory::get_data_handle() and cast it to the proper data type
/// as shown below.
///
/// @warning
/// The @ref dnnl::memory::get_data_handle() returns a raw handle
/// to the buffer, the type of which is engine specific. For the CPU
/// engine the buffer is always a pointer to `void`, which can safely
/// be used. However, for engines other than CPU the handle might be
/// runtime-specific type, such as `cl_mem` in case of GPU/OpenCL.
///
/// @snippet getting_started.cpp Check the results
// [Check the results]
// Obtain a buffer for the `dst_mem` and cast it to `float *`.
// This is safe since we created `dst_mem` as f32 tensor with known
// memory format.
std::vector<float> relu_image(image_size);
read_from_dnnl_memory(relu_image.data(), dst_mem);
/*
// Check the results
for (int n = 0; n < N; ++n)
for (int h = 0; h < H; ++h)
for (int w = 0; w < W; ++w)
for (int c = 0; c < C; ++c) {
int off = offset(
n, h, w, c); // get the physical offset of a pixel
float expected = image[off] < 0
? 0.f
: image[off]; // expected value
if (relu_image[off] != expected) {
std::cout << "At index(" << n << ", " << c << ", " << h
<< ", " << w << ") expect " << expected
<< " but got " << relu_image[off]
<< std::endl;
throw std::logic_error("Accuracy check failed.");
}
}
// [Check the results]
*/
}
/// @page getting_started_cpp
///
/// @section getting_started_cpp_main main() function
///
/// We now just call everything we prepared earlier.
///
/// Because we are using the oneDNN C++ API, we use exceptions to handle errors
/// (see @ref dev_guide_c_and_cpp_apis).
/// The oneDNN C++ API throws exceptions of type @ref dnnl::error,
/// which contains the error status (of type @ref dnnl_status_t) and a
/// human-readable error message accessible through regular `what()` method.
/// @page getting_started_cpp
/// @snippet getting_started.cpp Main
// [Main]
int main(int argc, char **argv) {
int exit_code = 0;
engine::kind engine_kind = parse_engine_kind(argc, argv);
try {
getting_started_tutorial(engine_kind);
} catch (dnnl::error &e) {
std::cout << "oneDNN error caught: " << std::endl
<< "\tStatus: " << dnnl_status2str(e.status) << std::endl
<< "\tMessage: " << e.what() << std::endl;
exit_code = 1;
} catch (std::string &e) {
std::cout << "Error in the example: " << e << "." << std::endl;
exit_code = 2;
} catch (std::exception &e) {
std::cout << "Error in the example: " << e.what() << "." << std::endl;
exit_code = 3;
}
std::cout << "Example " << (exit_code ? "failed" : "passed") << " on "
<< engine_kind2str_upper(engine_kind) << "." << std::endl;
finalize();
return exit_code;
}
// [Main]
/// @page getting_started_cpp
///
/// <b></b>
///
/// Upon compiling and run the example the output should be just:
///
/// ~~~
/// Example passed.
/// ~~~
///
/// Users are encouraged to experiment with the code to familiarize themselves
/// with the concepts. In particular, one of the changes that might be of
/// interest is to spoil some of the library calls to check how error handling
/// happens. For instance, if we replace
///
/// ~~~cpp
/// relu.execute(engine_stream, {
/// {DNNL_ARG_SRC, src_mem},
/// {DNNL_ARG_DST, dst_mem},
/// });
/// ~~~
///
/// with
///
/// ~~~cpp
/// relu.execute(engine_stream, {
/// {DNNL_ARG_SRC, src_mem},
/// // {DNNL_ARG_DST, dst_mem}, // Oops, forgot about this one
/// });
/// ~~~
///
/// we should get the following output:
///
/// ~~~
/// oneDNN error caught:
/// Status: invalid_arguments
/// Message: could not execute a primitive
/// Example failed.
/// ~~~
|