1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
|
/*
* Copyright 2007-2023 NVIDIA Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CUDACOREDUMP_H__
#define __CUDACOREDUMP_H__
#include "cuda_stdint.h"
/*
* cudacoredump.h - Public format description of the CUDA coredump
*/
/* ==================== */
/* ===== Overview ===== */
/* ==================== */
/*
* CUDA coredumps are ELF files with the following identifying header fields:
* - abi: ELFOSABI_CUDA (0x33)
* - machine: EM_CUDA (0xbe)
* - type: ET_CORE (4)
*
* Coredump data is stored in separate sections, each of which is described
* below. Note that new fields can be added to each of these sections in new
* driver versions, so any coredump readers need to be careful and check each
* section's element size before accessing the fields that were not present
* in the baseline version of the section. Section descriptions below make it
* clear which fields might not be present.
*
* Coredump sections are named hierarchically, the hierarchy looks like this:
* - Global memory
* - CUDA device information
* - CUDA context information
* - Loaded modules information
* - Module's relocated ELF image (cubin)
* - Module's non-relocated ELF image (cubin)
* - CUDA grid information
* - Grid parameter memory
* - Grid constbank information
* - SM information
* - Block (CTA) information
* - Block shared memory
* - Warp information
* - Warp uniform registers
* - Warp uniform predicates
* - Thread information
* - Thread local memory
* - Thread registers
* - Thread predicates
* - Thread call stack
*
* More information about each particular section is given below.
*
* ===== Brief format history =====
*
* CUDA Driver r346:
* - Initial CUDA coredump functionality release
* CUDA Driver r400:
* - Added uniform registers and uniform predicates
* CUDA Driver r525:
* - Added cluster index and cluster dimensions
* - Added number of registers per warp
* CUDA Driver r550:
* - Added constbank information
*/
/* ======================================= */
/* ===== Section-related definitions ===== */
/* ======================================= */
#ifndef SHT_LOUSER
#define SHT_LOUSER 0x80000000
#endif
/* CUDA coredump section types.
* See corresponding sections below for usage.
*/
typedef enum {
CUDBG_SHT_MANAGED_MEM = SHT_LOUSER + 1,
CUDBG_SHT_GLOBAL_MEM = SHT_LOUSER + 2,
CUDBG_SHT_LOCAL_MEM = SHT_LOUSER + 3,
CUDBG_SHT_SHARED_MEM = SHT_LOUSER + 4,
CUDBG_SHT_DEV_REGS = SHT_LOUSER + 5,
CUDBG_SHT_ELF_IMG = SHT_LOUSER + 6,
CUDBG_SHT_RELF_IMG = SHT_LOUSER + 7,
CUDBG_SHT_BT = SHT_LOUSER + 8,
CUDBG_SHT_DEV_TABLE = SHT_LOUSER + 9,
CUDBG_SHT_CTX_TABLE = SHT_LOUSER + 10,
CUDBG_SHT_SM_TABLE = SHT_LOUSER + 11,
CUDBG_SHT_GRID_TABLE = SHT_LOUSER + 12,
CUDBG_SHT_CTA_TABLE = SHT_LOUSER + 13,
CUDBG_SHT_WP_TABLE = SHT_LOUSER + 14,
CUDBG_SHT_LN_TABLE = SHT_LOUSER + 15,
CUDBG_SHT_MOD_TABLE = SHT_LOUSER + 16,
CUDBG_SHT_DEV_PRED = SHT_LOUSER + 17,
CUDBG_SHT_PARAM_MEM = SHT_LOUSER + 18,
/* Since CUDA Driver r400 */
CUDBG_SHT_DEV_UREGS = SHT_LOUSER + 19,
CUDBG_SHT_DEV_UPRED = SHT_LOUSER + 20,
/* Since CUDA Driver r550 */
CUDBG_SHT_CB_TABLE = SHT_LOUSER + 21,
} CudbgSectionHeaderTypes;
/* CUDA section name prefixes.
* See corresponding sections below for usage.
*/
#define CUDBG_SHNAME_GLOBAL ".cudbg.global"
#define CUDBG_SHNAME_LOCAL ".cudbg.local"
#define CUDBG_SHNAME_SHARED ".cudbg.shared"
#define CUDBG_SHNAME_REGS ".cudbg.regs"
#define CUDBG_SHNAME_PARAM ".cudbg.param"
#define CUDBG_SHNAME_PRED ".cudbg.pred"
#define CUDBG_SHNAME_DEVTABLE ".cudbg.devtbl"
#define CUDBG_SHNAME_CTXTABLE ".cudbg.ctxtbl"
#define CUDBG_SHNAME_SMTABLE ".cudbg.smtbl"
#define CUDBG_SHNAME_GRIDTABLE ".cudbg.gridtbl"
#define CUDBG_SHNAME_CTATABLE ".cudbg.ctatbl"
#define CUDBG_SHNAME_WPTABLE ".cudbg.wptbl"
#define CUDBG_SHNAME_LNTABLE ".cudbg.lntbl"
#define CUDBG_SHNAME_BT ".cudbg.bt"
#define CUDBG_SHNAME_MODTABLE ".cudbg.modtbl"
#define CUDBG_SHNAME_ELFIMG ".cudbg.elfimg"
#define CUDBG_SHNAME_RELFIMG ".cudbg.relfimg"
/* Since CUDA Driver r400 */
#define CUDBG_SHNAME_UREGS ".cudbg.uregs"
#define CUDBG_SHNAME_UPRED ".cudbg.upred"
/* Since CUDA Driver r550 */
#define CUDBG_SHNAME_CBTABLE ".cudbg.cbankstbl"
/* ========================================= */
/* ===== Detailed section descriptions ===== */
/* ========================================= */
/* Global memory
*
* Contains raw global memory (does not have a separate struct in this file).
* These sections are dumped in order and their names contain the monotonically
* increasing index, will be referred to as <memIdx> below.
* Not present if dumping memory is disabled.
*
* Section name format: sprintf("%s.%d", CUDBG_SHNAME_GLOBAL, <memIdx>)
* Section header type: CUDBG_SHT_GLOBAL_MEM (or, if managed memory, CUDBG_SHT_MANAGED_MEM)
* Section element type: byte data
* Section addr: global address of the start of this memory block
* Section link: 0
* Section info: 0
*/
/*
* CUDA device table
*
* Contains descriptions of all CUDA devices visible to the application
* at the moment of coredump generation. Device index in this table is used
* in other section names, will be referred to as <devIdx> below.
* Only one such section per coredump file.
*
* Section name format: CUDBG_SHNAME_DEVTABLE (no suffix)
* Section header type: CUDBG_SHT_DEV_TABLE
* Section element type: CudbgDeviceTableEntry
* Section link: 0
* Section info: 0
*
* Related sections:
*
* 1. CUDA context table, per device - see below
*
* 2. CUDA grid table, per device - see below
*
* 3. SM information, per device - see below
*/
typedef struct {
/* Display name of the device
* This field is an index into the string table.
*/
uint64_t devName;
/* Internal name of the device
* This field is an index into the string table.
*/
uint64_t devType;
/* ISA version of the device
* This field is an index into the string table.
*/
uint64_t smType;
/* CUDA device ID */
uint32_t devId;
/* PCI bus ID of the device */
uint32_t pciBusId;
/* PCI device ID of the device */
uint32_t pciDevId;
/* Number of SMs this device has */
uint32_t numSMs;
/* Number of warps in each SM */
uint32_t numWarpsPerSM;
/* Number of lanes in each warp */
uint32_t numLanesPerWarp;
/* Maximum number of registers per lane
* Use CudbgGridTableEntry::numRegs and CudbgWarpTableEntry::numRegs
* for the actual number of registers per grid and warp.
*/
uint32_t numRegsPerLane;
/* Number of predicates per lane */
uint32_t numPredicatesPrLane;
/* Major version of the SM */
uint32_t smMajor;
/* Minor version of the SM */
uint32_t smMinor;
/* GPU instruction size in bytes */
uint32_t instructionSize;
/* Device status
* This field is of type CUDBGResult, see cudadebugger.h.
*/
uint32_t status;
/* ================================== */
/* ===== Since CUDA Driver r400 ===== */
/* ================================== */
/* Number of uniform registers per warp */
uint32_t numUniformRegsPrWarp;
/* Number of uniform predicates per warp */
uint32_t numUniformPredicatesPrWarp;
} CudbgDeviceTableEntry;
/*
* CUDA context table, per device
*
* Contains descriptions of all CUDA contexts for a particular device.
* Context index in this table is used in other section names,
* will be referred to as <ctxIdx> below.
*
* Section name format: sprintf("%s.dev%d", CUDBG_SHNAME_CTXTABLE, devIdx)
* Section header type: CUDBG_SHT_CTX_TABLE
* Section element type: CudbgContextTableEntry
* Section link: section header index of the (unique) CUDA device table
* Section info: devIdx
*
* Related sections:
*
* 1. Loaded modules table, per context - see below
*/
typedef struct {
/* Handle of this context */
uint64_t contextId;
/* Global address of the start of the shared memory window */
uint64_t sharedWindowBase;
/* Global address of the start of the local memory window */
uint64_t localWindowBase;
/* Global address of the start of the global memory window */
uint64_t globalWindowBase;
/* CUDA device ID of the containing device */
uint32_t deviceIdx;
/* Thread ID of the host thread that owns this context */
uint32_t tid;
} CudbgContextTableEntry;
/*
* Loaded modules table, per context
*
* Contains information about all modules loaded in a particular context.
* Module index in this table is used in other section names,
* will be referred to as <modIdx> below.
*
* Section name format: sprintf("%s.dev%d.ctx%d", CUDBG_SHNAME_MODTABLE, devIdx, ctxIdx)
* Section header type: CUDBG_SHT_MOD_TABLE
* Section element type: CudbgModuleTableEntry
* Section link: section header index of the corresponding CUDA context table
* Section info: ctxIdx
*
* Related sections:
*
* 1. Relocated ELF image (cubin) of the module
*
* Contains raw cubin data (does not have a separate struct in this file).
* There can be several sections with the same name since the module index
* is not a part of the section name. They can be distinguished by the
* section header's link or info fields.
*
* Section name format: sprintf("%s.dev%d.ctx%d", CUDBG_SHNAME_RELFIMG, devIdx, ctxIdx)
* Section header type: CUDBG_SHT_RELF_IMG
* Section element type: byte data
* Section link: section header index of the corresponding module table
* Section info: modIdx
*
* 2. Non-relocated ELF image (cubin) of the module
*
* Contains raw cubin data (does not have a separate struct in this file).
* There can be several sections with the same name since the module index
* is not a part of the section name. They can be distinguished by the
* section header's link or info fields.
* Not present if dumping non-relocated cubins is disabled.
*
* Section name format: sprintf("%s.dev%d.ctx%d", CUDBG_SHNAME_ELFIMG, devIdx, ctxIdx)
* Section header type: CUDBG_SHT_ELF_IMG
* Section element type: byte data
* Section link: section header index of the corresponding module table
* Section info: modIdx
*/
typedef struct {
/* Handle of the loaded module */
uint64_t moduleHandle;
} CudbgModuleTableEntry;
/*
* CUDA grid table, per device
*
* Contains descriptions of all grids running on the device at the moment of
* coredump generation. Grid index in this table is used in other section names,
* will be referred to as <gridIdx> below.
*
* Section name format: sprintf("%s.dev%d", CUDBG_SHNAME_GRIDTABLE, devIdx)
* Section header type: CUDBG_SHT_GRID_TABLE
* Section element type: CudbgGridTableEntry
* Section link: section header index of the (unique) CUDA device table
* Section info: devIdx
*
* Related sections:
*
* 1. CUDA grid param memory, per grid
*
* Contains raw grid parameter memory (does not have a separate struct
* in this file). Not present if dumping memory is disabled.
*
* Section name format: sprintf("%s.dev%d.grid%d", CUDBG_SHNAME_PARAM, devIdx, gridIdx)
* Section header type: CUDBG_SHT_PARAM_MEM
* Section element type: byte data
* Section link: section header index of the corresponding CUDA grid table
* Section info: gridIdx
*
* 2. CUDA constbank information, per grid - see below
*/
typedef struct {
/* Grid ID, an opaque 64bit number */
uint64_t gridId64;
/* Handle of the context containing this grid */
uint64_t contextId;
/* Handle of the kernel that this grid is executing */
uint64_t function;
/* Entry address of this grid's kernel */
uint64_t functionEntry;
/* Handler of the module containing this grid's kernel */
uint64_t moduleHandle;
/* ID of the parent grid (in case of a device-launched CDP grid) */
uint64_t parentGridId64;
/* Offset in the constbank 0 where the parameters start */
uint64_t paramsOffset;
/* Type of this grid's kernel
* This field is of type CUDBGKernelType, see cudadebugger.h.
*/
uint32_t kernelType;
/* Where this grid was launched from
* This field is of type CUDBGKernelOrigin, see cudadebugger.h.
*/
uint32_t origin;
/* Status of this grid at the moment of coredump generation
* This field is of type CUDBGGridStatus, see cudadebugger.h.
*/
uint32_t gridStatus;
/* Number of registers this grid uses */
uint32_t numRegs;
/* Grid dimension X */
uint32_t gridDimX;
/* Grid dimension Y */
uint32_t gridDimY;
/* Grid dimension Z */
uint32_t gridDimZ;
/* Block dimension X */
uint32_t blockDimX;
/* Block dimension Y */
uint32_t blockDimY;
/* Block dimension Z */
uint32_t blockDimZ;
/* Whether this grid's launch was blocking
* This field is semantically boolean.
*/
uint32_t attrLaunchBlocking;
/* Thread ID of the host thread that launched this grid */
uint32_t attrHostTid;
/* ================================== */
/* ===== Since CUDA Driver r525 ===== */
/* ================================== */
/* Cluster dimension X */
uint32_t clusterDimX;
/* Cluster dimension Y */
uint32_t clusterDimY;
/* Cluster dimension Z */
uint32_t clusterDimZ;
/* Padding, ignore */
uint32_t padding0;
} CudbgGridTableEntry;
/*
* CUDA constbank information, per grid
*
* Since CUDA Driver r550.
*
* Contains information about all constbanks for a given grid.
*
* Section name format: sprintf("%s.dev%u.grid%u", CUDBG_SHNAME_CBTABLE, devIdx, gridIdx)
* Section header type: CUDBG_SHT_CB_TABLE
* Section element type: CudbgConstBankTableEntry
* Section link: section header index of the corresponding CUDA grid table
* Section info: gridIdx
*/
typedef struct {
/* Global address of this constbank's start */
uint64_t addr;
/* Size of this constbank in bytes */
uint32_t size;
/* ID (number) of this constbank */
uint32_t bankId;
} CudbgConstBankTableEntry;
/*
* SM information, per device
*
* Contains information about all SMs for a given device. SM index in this
* table is used in other section names, will be referred to as <smIdx> below.
*
* Section name format: sprintf("%s.dev%d", CUDBG_SHNAME_SMTABLE, devIdx)
* Section header type: CUDBG_SHT_SM_TABLE
* Section element type: CudbgSmTableEntry
* Section link: section header index of the (unique) CUDA device table
* Section info: devIdx
*
* Related sections:
*
* 1. Block (CTA) information, per SM - see below
*/
typedef struct {
/* ID (number) of this SM */
uint32_t smId;
/* Padding, ignore */
uint32_t pad;
} CudbgSmTableEntry;
/*
* Block (CTA) information, per SM
*
* Contains information about all blocks for a given SM. Block index in this
* table is used in other section names, will be referred to as <ctaIdx> below.
*
* Section name format: sprintf("%s.dev%d.sm%d", CUDBG_SHNAME_CTATABLE, devIdx, smIdx)
* Section header type: CUDBG_SHT_CTA_TABLE
* Section element type: CudbgCTATableEntry
* Section link: section header index of corresponding SM table
* Section info: smIdx
*
* Related sections:
*
* 1. Block shared memory, per block
*
* Contains raw block shared memory (does not have a separate struct
* in this file). Not present if dumping memory is disabled.
*
* Section name format: sprintf("%s.dev%d.sm%d.cta%d", CUDBG_SHNAME_SHARED, devIdx, smIdx, ctaIdx)
* Section header type: CUDBG_SHT_SHARED_MEM
* Section element type: byte data
* Section link: section header index of the corresponding block table
* Section info: ctaIdx
*
* 2. Warp information, per block (CTA) - see below
*/
typedef struct {
/* Grid ID of the grid containing this block */
uint64_t gridId64;
/* Block index, X */
uint32_t blockIdxX;
/* Block index, Y */
uint32_t blockIdxY;
/* Block index, Z */
uint32_t blockIdxZ;
/* Padding, ignore */
uint32_t padding0;
/* ================================== */
/* ===== Since CUDA Driver r525 ===== */
/* ================================== */
/* Cluster index, X */
uint32_t clusterIdxX;
/* Cluster index, Y */
uint32_t clusterIdxY;
/* Cluster index, Z */
uint32_t clusterIdxZ;
/* Padding, ignore */
uint32_t padding1;
} CudbgCTATableEntry;
/*
* Warp information, per block (CTA)
*
* Contains information about all warps for a given CTA. Warp index in this
* table is used in other section names, will be referred to as <warpIdx> below.
*
* Section name format: sprintf("%s.dev%d.sm%d.cta%d", CUDBG_SHNAME_WPTABLE, devIdx, smIdx, ctaIdx)
* Section header type: CUDBG_SHT_WP_TABLE
* Section element type: CudbgWarpTableEntry
* Section link: section header index of corresponding block table
* Section info: ctaIdx
*
* Related sections:
*
* 1. Uniform registers, per warp
*
* Since CUDA Driver r400.
*
* Contains raw uniform registers memory (does not have a separate struct
* in this file). Not present if the device doesn't have uniform registers.
*
* Section name format: sprintf("%s.dev%d.sm%d.cta%d.wp%d", CUDBG_SHNAME_UREGS, devIdx, smIdx, ctaIdx, warpIdx)
* Section header type: CUDBG_SHT_DEV_UREGS
* Section element type: uint32_t data
* Section link: section header index of the corresponding warp table
* Section info: warpIdx
*
* 2. Uniform predicates, per warp
*
* Since CUDA Driver r400.
*
* Contains raw uniform predicates (does not have a separate struct in this
* file). Not present if the device doesn't have uniform predicates.
*
* Section name format: sprintf("%s.dev%d.sm%d.cta%d.wp%d", CUDBG_SHNAME_UPRED, devIdx, smIdx, ctaIdx, warpIdx)
* Section header type: CUDBG_SHT_DEV_UPRED
* Section element type: uint32_t data (each value is semantically boolean)
* Section link: section header index of the corresponding warp table
* Section info: warpIdx
*
* 3. Thread information, per warp - see below
*/
typedef struct {
/* PC which has triggered a warp error
* This field is only valid if errorPCValid is non-zero.
*/
uint64_t errorPC;
/* ID (number) of this warp */
uint32_t warpId;
/* Mask of the valid lanes */
uint32_t validLanesMask;
/* Mask of the active (non-diverged) lanes */
uint32_t activeLanesMask;
/* Indicates whether this warp has hit a breakpoint
* This field is semantically boolean.
*/
uint32_t isWarpBroken;
/* Indicates whether the errorPC field is valid
* This field is semantically boolean.
*/
uint32_t errorPCValid;
/* Padding, ignore */
uint32_t padding0;
/* ================================== */
/* ===== Since CUDA Driver r525 ===== */
/* ================================== */
/* Number of registers used by this warp */
uint32_t numRegs;
/* Padding, ignore */
uint32_t padding1;
} CudbgWarpTableEntry;
/*
* Thread information, per warp
*
* Contains information about all threads for a given warp. Thread index in this
* table is used in other section names, will be referred to as <laneIdx> below.
*
* Section name format: sprintf("%s.dev%d.sm%d.cta%d.wp%d", CUDBG_SHNAME_LNTABLE, devIdx, smIdx, ctaIdx, warpIdx)
* Section header type: CUDBG_SHT_LN_TABLE
* Section element type: CudbgThreadTableEntry
* Section link: section header index of corresponding warp table
* Section info: warpIdx
*
* Related sections:
*
* 1. Thread local memory, per thread
*
* Contains raw thread local memory (does not have a separate struct in this
* file). Not present if dumping memory is disabled.
*
* Section name format: sprintf("%s.dev%d.sm%d.cta%d.wp%d.ln%d", CUDBG_SHNAME_LOCAL, devIdx, smIdx, ctaIdx, warpIdx, laneIdx)
* Section header type: CUDBG_SHT_LOCAL_MEM
* Section element type: byte data
* Section addr: local memory address of the start of the dumped local memory
* Section link: section header index of the corresponding thread table
* Section info: laneIdx
*
* 2. Thread registers
*
* Contains raw registers memory (does not have a separate struct in this file).
*
* Section name format: sprintf("%s.dev%d.sm%d.cta%d.wp%d.ln%d", CUDBG_SHNAME_REGS, devIdx, smIdx, ctaIdx, warpIdx, laneIdx)
* Section header type: CUDBG_SHT_DEV_REGS
* Section element type: uint32_t data
* Section link: section header index of the corresponding thread table
* Section info: laneIdx
*
* 3. Thread predicates
*
* Contains raw predicates memory (does not have a separate struct in this file).
*
* Section name format: sprintf("%s.dev%d.sm%d.cta%d.wp%d.ln%d", CUDBG_SHNAME_PRED, devIdx, smIdx, ctaIdx, warpIdx, laneIdx)
* Section header type: CUDBG_SHT_DEV_PRED
* Section element type: uint32_t data (each value is semantically boolean)
* Section link: section header index of the corresponding thread table
* Section info: laneIdx
*
* 4. Thread call stack, per thread - see below
*/
typedef struct {
/* PC of this thread
* In coredumps generated with the new unified backend this will be
* a global address in GPU memory corresponding to the physical PC.
* In coredumps generated with the older classic backend this will be
* a global address in CPU memory where a copy of the GPU code is stored.
*/
uint64_t virtualPC;
/* Offset of this thread's PC from the start of the currently executing function */
uint64_t physPC;
/* Lane ID (number) of this thread */
uint32_t ln;
/* Thread index, X */
uint32_t threadIdxX;
/* Thread index, Y */
uint32_t threadIdxY;
/* Thread index, Z */
uint32_t threadIdxZ;
/* Exception hit by this thread, if any
* This field is of type CUDBGException_t, see cudadebugger.h.
*/
uint32_t exception;
/* Call depth of the call stack of this thread
* This field's value includes the number of frames that are in syscall.
*/
uint32_t callDepth;
/* Number of call stack frames that are in a syscall */
uint32_t syscallCallDepth;
/* Special CC register, only available on pre-Volta GPUs */
uint32_t ccRegister;
} CudbgThreadTableEntry;
/*
* Thread call stack, per thread
*
* Contains information about the stack frames for a given thread.
*
* Section name format: sprintf("%s.dev%d.sm%d.cta%d.wp%d.ln%d", CUDBG_SHNAME_BT, devIdx, smIdx, ctaIdx, warpIdx, laneIdx)
* Section header type: CUDBG_SHT_BT
* Section element type: CudbgBacktraceTableEntry
* Section link: section header index of corresponding thread table
* Section info: laneIdx
*/
typedef struct {
/* Offset of the return address from the start of the caller function */
uint64_t returnAddress;
/* Return address of this call stack frame
* In coredumps generated with the new unified backend this will be
* a global address in GPU memory containing the code to return to.
* In coredumps generated with the older classic backend this will be
* a global address in CPU memory containing a copy of the GPU code.
*/
uint64_t virtualReturnAddress;
/* Stack frame level */
uint32_t level;
/* Padding, ignore */
uint32_t pad;
} CudbgBacktraceTableEntry;
#endif // __CUDACOREDUMP_H__
|