File: nppcore.h

package info (click to toggle)
nvidia-cuda-toolkit 12.4.1-2
  • links: PTS, VCS
  • area: non-free
  • in suites: forky, trixie
  • size: 18,505,836 kB
  • sloc: ansic: 203,477; cpp: 64,769; python: 34,699; javascript: 22,006; xml: 13,410; makefile: 3,085; sh: 2,343; perl: 352
file content (192 lines) | stat: -rw-r--r-- 7,426 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
 /* Copyright 2009-2022 NVIDIA CORPORATION & AFFILIATES.  All rights reserved. 
  * 
  * NOTICE TO LICENSEE: 
  * 
  * The source code and/or documentation ("Licensed Deliverables") are 
  * subject to NVIDIA intellectual property rights under U.S. and 
  * international Copyright laws. 
  * 
  * The Licensed Deliverables contained herein are PROPRIETARY and 
  * CONFIDENTIAL to NVIDIA and are being provided under the terms and 
  * conditions of a form of NVIDIA software license agreement by and 
  * between NVIDIA and Licensee ("License Agreement") or electronically 
  * accepted by Licensee.  Notwithstanding any terms or conditions to 
  * the contrary in the License Agreement, reproduction or disclosure 
  * of the Licensed Deliverables to any third party without the express 
  * written consent of NVIDIA is prohibited. 
  * 
  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 
  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 
  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  THEY ARE 
  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 
  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 
  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 
  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 
  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 
  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 
  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 
  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 
  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
  * OF THESE LICENSED DELIVERABLES. 
  * 
  * U.S. Government End Users.  These Licensed Deliverables are a 
  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 
  * 1995), consisting of "commercial computer software" and "commercial 
  * computer software documentation" as such terms are used in 48 
  * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government 
  * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and 
  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 
  * U.S. Government End Users acquire the Licensed Deliverables with 
  * only those rights set forth herein. 
  * 
  * Any use of the Licensed Deliverables in individual and commercial 
  * software must include, in the user documentation and internal 
  * comments to the code, the above Disclaimer and U.S. Government End 
  * Users Notice. 
  */ 
#ifndef NV_NPPCORE_H
#define NV_NPPCORE_H

#include <cuda_runtime_api.h>

/**
 * \file nppcore.h
 * Basic NPP functionality. 
 *  This file contains functions to query the NPP version as well as 
 *  info about the CUDA compute capabilities on a given computer.
 */
 
#include "nppdefs.h"

#ifdef __cplusplus
#ifdef NPP_PLUS
using namespace nppPlusV;
#else
extern "C" {
#endif
#endif
 
/** 
 * \page core_npp NPP Core
 * @defgroup core_npp NPP Core
 * Basic functions for library management, in particular library version
 * and device property query functions.
 * @{
 */

/**
 * Get the NPP library version.
 *
 * \return A struct containing separate values for major and minor revision 
 *      and build number.
 */
const NppLibraryVersion * 
nppGetLibVersion(void);

/**
 * Get the number of Streaming Multiprocessors (SM) on the active CUDA device.
 *
 * \return Number of SMs of the default CUDA device.
 */
int 
nppGetGpuNumSMs(void);

/**
 * Get the maximum number of threads per block on the active CUDA device.
 *
 * \return Maximum number of threads per block on the active CUDA device.
 */
int 
nppGetMaxThreadsPerBlock(void);

/**
 * Get the maximum number of threads per SM for the active GPU
 *
 * \return Maximum number of threads per SM for the active GPU
 */
int 
nppGetMaxThreadsPerSM(void);

/**
 * Get the maximum number of threads per SM, maximum threads per block, and number of SMs for the active GPU
 *
 * \return cudaSuccess for success, -1 for failure
 */
int 
nppGetGpuDeviceProperties(int * pMaxThreadsPerSM, int * pMaxThreadsPerBlock, int * pNumberOfSMs);

/** 
 * Get the name of the active CUDA device.
 *
 * \return Name string of the active graphics-card/compute device in a system.
 */
const char * 
nppGetGpuName(void);

/**
 * Get the NPP CUDA stream.
 * NPP enables concurrent device tasks via a global stream state varible.
 * The NPP stream by default is set to stream 0, i.e. non-concurrent mode.
 * A user can set the NPP stream to any valid CUDA stream. All CUDA commands
 * issued by NPP (e.g. kernels launched by the NPP library) are then
 * issed to that NPP stream.
 */
cudaStream_t
nppGetStream(void);

/**
 * Get the current NPP managed CUDA stream context as set by calls to nppSetStream().
 * NPP enables concurrent device tasks via an NPP maintained global stream state context.
 * The NPP stream by default is set to stream 0, i.e. non-concurrent mode.
 * A user can set the NPP stream to any valid CUDA stream which will update the current NPP managed stream state context 
 * or supply application initialized stream contexts to NPP calls. All CUDA commands
 * issued by NPP (e.g. kernels launched by the NPP library) are then
 * issed to the current NPP managed stream or to application supplied stream contexts depending on whether 
 * the stream context is passed to the NPP function or not.  NPP managed stream context calls (those without stream context parameters) 
 * can be intermixed with application managed stream context calls but any NPP managed stream context calls will always use the most recent 
 * stream set by nppSetStream() or the NULL stream if nppSetStream() has never been called. 
 */
NppStatus
nppGetStreamContext(NppStreamContext * pNppStreamContext);

/**
 * Get the number of SMs on the device associated with the current NPP CUDA stream.
 * NPP enables concurrent device tasks via a global stream state varible.
 * The NPP stream by default is set to stream 0, i.e. non-concurrent mode.
 * A user can set the NPP stream to any valid CUDA stream. All CUDA commands
 * issued by NPP (e.g. kernels launched by the NPP library) are then
 * issed to that NPP stream.  This call avoids a cudaGetDeviceProperties() call.
 */
unsigned int
nppGetStreamNumSMs(void);

/**
 * Get the maximum number of threads per SM on the device associated with the current NPP CUDA stream.
 * NPP enables concurrent device tasks via a global stream state varible.
 * The NPP stream by default is set to stream 0, i.e. non-concurrent mode.
 * A user can set the NPP stream to any valid CUDA stream. All CUDA commands
 * issued by NPP (e.g. kernels launched by the NPP library) are then
 * issed to that NPP stream.  This call avoids a cudaGetDeviceProperties() call.
 */
unsigned int
nppGetStreamMaxThreadsPerSM(void);

/**
 * Set the NPP CUDA stream.  This function now returns an error if a problem occurs with Cuda stream management. 
 *   This function should only be called if a call to nppGetStream() returns a stream number which is different from
 *   the desired stream since unnecessarily flushing the current stream can significantly affect performance.
 * \see nppGetStream()
 */
NppStatus
nppSetStream(cudaStream_t hStream);

/** @} core_npp */ 

#ifdef __cplusplus
#ifndef NPP_PLUS
} /* extern "C" */
#endif
#endif

#endif /* NV_NPPCORE_H */