File: searchindex.js

package info (click to toggle)
nvidia-cuda-toolkit 12.4.1-3
links: PTS, VCS
area: non-free
in suites: forky, sid
size: 18,505,836 kB
sloc: ansic: 203,477; cpp: 64,769; python: 34,699; javascript: 22,006; xml: 13,410; makefile: 3,085; sh: 2,343; perl: 352
file content (1 line) | stat: -rw-r--r-- 900,778 bytes
parent folder | download | duplicates (6)
const searchData = {"envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 5, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "repo_docs.ext.toctree": 2, "repo_docs.ext.mermaid": 1, "repo_docs.ext.enhanced_search": 2, "sphinx": 56}, "data": [{"id": 0, "doc_id": 1, "filename": "Archives/index.html", "domain_name": "page", "name": "Archives/index#nvidia-nsight-compute-archives", "display_name": "NVIDIA Nsight Compute Archives", "type": "section", "display_type": "Page section", "docname": "Archives/index", "anchor": "nvidia-nsight-compute-archives", "priority": -1, "content": "Below, you can find the current and past release information for NVIDIA Nsight Compute. Available Documentation Latest version of NVIDIA Nsight Compute NVIDIA Nsight Compute 2023.4 NVIDIA Nsight Compute 2023.3 NVIDIA Nsight Compute 2023.2 NVIDIA Nsight Compute 2023.1 NVIDIA Nsight Compute 2022.4 NVIDIA Nsight Compute 2022.3 NVIDIA Nsight Compute 2022.2 NVIDIA Nsight Compute 2022.1 NVIDIA Nsight Compute 2021.3 NVIDIA Nsight Compute 2021.2 NVIDIA Nsight Compute 2021.1 NVIDIA Nsight Compute 2020.3 NVIDIA Nsight Compute 2020.2 NVIDIA Nsight Compute 2020.1.2 NVIDIA Nsight Compute 2020.1 NVIDIA Nsight Compute 2019.5.1 NVIDIA Nsight Compute 2019.5 NVIDIA Nsight Compute 2019.4 NVIDIA Nsight Compute 2019.3 NVIDIA Nsight Compute 2019.1 Notices Notices ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND SEPARATELY, \u201cMATERIALS\u201d) ARE BEING PROVIDED \u201cAS IS.\u201d NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the consequences of use of such information or for any infringement of patents or other rights of third parties that may result from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems without express written approval of NVIDIA Corporation. Trademarks NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation in the U.S. and other countries. Other company and product names may be trademarks of the respective companies with which they are associated.", "keywords": []}, {"id": 1, "doc_id": 1, "filename": "Archives/index.html", "domain_name": "std", "name": "Archives/index", "display_name": "Archives", "type": "doc", "display_type": "Page", "docname": "Archives/index", "anchor": "", "priority": -1, "content": "Nsight Compute Archived Releases. Find documentation for previous versions of NVIDIA Nsight Compute.", "keywords": []}, {"id": 2, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#boost", "display_name": "Boost", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "boost", "priority": -1, "content": "Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the \u201cSoftware\u201d) to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED \u201cAS IS\u201d, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.", "keywords": []}, {"id": 3, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#breakpad", "display_name": "Breakpad", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "breakpad", "priority": -1, "content": "Copyright \u00a9 2006, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \u201cAS IS\u201d AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.", "keywords": []}, {"id": 4, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#flatbuffers", "display_name": "Flatbuffers", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "flatbuffers", "priority": -1, "content": "http://google.github.io/flatbuffers Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION Definitions. \u201cLicense\u201d shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. \u201cLicensor\u201d shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. \u201cLegal Entity\u201d shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, \u201ccontrol\u201d means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. \u201cYou\u201d (or \u201cYour\u201d) shall mean an individual or Legal Entity exercising permissions granted by this License. \u201cSource\u201d form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. \u201cObject\u201d form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. \u201cWork\u201d shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). \u201cDerivative Works\u201d shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. \u201cContribution\u201d shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, \u201csubmitted\u201d means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as \u201cNot a Contribution.\u201d \u201cContributor\u201d shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: You must give any other recipients of the Work or Derivative Works a copy of this License; and You must cause any modified files to carry prominent notices stating that You changed the files; and You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and If the Work includes a \u201cNOTICE\u201d text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an \u201cAS IS\u201d BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets \u201c[]\u201d replaced with your own identifying information. (Don\u2019t include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same \u201cprinted page\u201d as the copyright notice for easier identification within third-party archives. Copyright \u00a9 2014 Google Inc. Licensed under the Apache License, Version 2.0 (the \u201cLicense\u201d); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0", "keywords": []}, {"id": 5, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#font-cascadia-mono", "display_name": "Font - Cascadia Mono", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "font-cascadia-mono", "priority": -1, "content": "( https://github.com/microsoft/cascadia-code ) Copyright (c) 2019 - Present, Microsoft Corporation, with Reserved Font Name Cascadia Code. This Font Software is licensed under the SIL Open Font License, Version 1.1. This license is copied below, and is also available with a FAQ at: http://scripts.sil.org/OFL SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 PREAMBLE The goals of the Open Font License (OFL) are to stimulate worldwide development of collaborative font projects, to support the font creation efforts of academic and linguistic communities, and to provide a free and open framework in which fonts may be shared and improved in partnership with others. The OFL allows the licensed fonts to be used, studied, modified and redistributed freely as long as they are not sold by themselves. The fonts, including any derivative works, can be bundled, embedded, redistributed and/or sold with any software provided that any reserved names are not used by derivative works. The fonts and derivatives, however, cannot be released under any other type of license. The requirement for fonts to remain under this license does not apply to any document created using the fonts or their derivatives. DEFINITIONS \u201cFont Software\u201d refers to the set of files released by the Copyright Holder(s) under this license and clearly marked as such. This may include source files, build scripts and documentation. \u201cReserved Font Name\u201d refers to any names specified as such after the copyright statement(s). \u201cOriginal Version\u201d refers to the collection of Font Software components as distributed by the Copyright Holder(s). \u201cModified Version\u201d refers to any derivative made by adding to, deleting, or substituting \u2013 in part or in whole \u2013 any of the components of the Original Version, by changing formats or by porting the Font Software to a new environment. \u201cAuthor\u201d refers to any designer, engineer, programmer, technical writer or other person who contributed to the Font Software. PERMISSION &amp; CONDITIONS Permission is hereby granted, free of charge, to any person obtaining a copy of the Font Software, to use, study, copy, merge, embed, modify, redistribute, and sell modified and unmodified copies of the Font Software, subject to the following conditions: Neither the Font Software nor any of its individual components, in Original or Modified Versions, may be sold by itself. Original or Modified Versions of the Font Software may be bundled, redistributed and/or sold with any software, provided that each copy contains the above copyright notice and this license. These can be included either as stand-alone text files, human-readable headers or in the appropriate machine-readable metadata fields within text or binary files as long as those fields can be easily viewed by the user. No Modified Version of the Font Software may use the Reserved Font Name(s) unless explicit written permission is granted by the corresponding Copyright Holder. This restriction only applies to the primary font name as presented to the users. The name(s) of the Copyright Holder(s) or the Author(s) of the Font Software shall not be used to promote, endorse or advertise any Modified Version, except to acknowledge the contribution(s) of the Copyright Holder(s) and the Author(s) or with their explicit written permission. The Font Software, modified or unmodified, in part or in whole, must be distributed entirely under this license, and must not be distributed under any other license. The requirement for fonts to remain under this license does not apply to any document created using the Font Software. TERMINATION This license becomes null and void if any of the above conditions are not met. DISCLAIMER THE FONT SOFTWARE IS PROVIDED \u201cAS IS\u201d, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE.", "keywords": []}, {"id": 6, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#font-open-sans", "display_name": "Font - Open Sans", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "font-open-sans", "priority": -1, "content": "( https://fonts.google.com/specimen/Open+Sans ) Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION Definitions. \u201cLicense\u201d shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. \u201cLicensor\u201d shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. \u201cLegal Entity\u201d shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, \u201ccontrol\u201d means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. \u201cYou\u201d (or \u201cYour\u201d) shall mean an individual or Legal Entity exercising permissions granted by this License. \u201cSource\u201d form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. \u201cObject\u201d form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. \u201cWork\u201d shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). \u201cDerivative Works\u201d shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. \u201cContribution\u201d shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, \u201csubmitted\u201d means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as \u201cNot a Contribution.\u201d \u201cContributor\u201d shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: You must give any other recipients of the Work or Derivative Works a copy of this License; and You must cause any modified files to carry prominent notices stating that You changed the files; and You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and If the Work includes a \u201cNOTICE\u201d text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an \u201cAS IS\u201d BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets \u201c[]\u201d replaced with your own identifying information. (Don\u2019t include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same \u201cprinted page\u201d as the copyright notice for easier identification within third-party archives. Copyright \u00a9 2014 Google Inc. Licensed under the Apache License, Version 2.0 (the \u201cLicense\u201d); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0", "keywords": []}, {"id": 7, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#font-roboto", "display_name": "Font - Roboto", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "font-roboto", "priority": -1, "content": "( https://fonts.google.com/specimen/Roboto ) Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION Definitions. \u201cLicense\u201d shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. \u201cLicensor\u201d shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. \u201cLegal Entity\u201d shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, \u201ccontrol\u201d means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. \u201cYou\u201d (or \u201cYour\u201d) shall mean an individual or Legal Entity exercising permissions granted by this License. \u201cSource\u201d form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. \u201cObject\u201d form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. \u201cWork\u201d shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). \u201cDerivative Works\u201d shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. \u201cContribution\u201d shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, \u201csubmitted\u201d means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as \u201cNot a Contribution.\u201d \u201cContributor\u201d shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: You must give any other recipients of the Work or Derivative Works a copy of this License; and You must cause any modified files to carry prominent notices stating that You changed the files; and You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and If the Work includes a \u201cNOTICE\u201d text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an \u201cAS IS\u201d BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets \u201c[]\u201d replaced with your own identifying information. (Don\u2019t include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same \u201cprinted page\u201d as the copyright notice for easier identification within third-party archives. Copyright \u00a9 2014 Google Inc. Licensed under the Apache License, Version 2.0 (the \u201cLicense\u201d); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0", "keywords": []}, {"id": 8, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#json-for-modern-c", "display_name": "JSON for Modern C++", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "json-for-modern-c", "priority": -1, "content": "( https://nlohmann.github.io/json/ ) MIT License Copyright (c) 2013-2019 Niels Lohmann Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \u201cSoftware\u201d), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \u201cAS IS\u201d, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.", "keywords": []}, {"id": 9, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#libbacktrace", "display_name": "libbacktrace", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "libbacktrace", "priority": -1, "content": "Copyright (C) 2012-2016 Free Software Foundation, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS\u2019\u2019 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.", "keywords": []}, {"id": 10, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#libssh", "display_name": "LIBSSH", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "libssh", "priority": -1, "content": "Copyright \u00a9 2020 The libssh project. GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author&#x27;s reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the &quot;Lesser&quot; General Public License because it does Less to protect the user&#x27;s freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users&#x27; freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a &quot;work based on the library&quot; and a &quot;work that uses the library&quot;. The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called &quot;this License&quot;). Each licensee is addressed as &quot;you&quot;. A &quot;library&quot; means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The &quot;Library&quot;, below, refers to any such software library or work which has been distributed under these terms. A &quot;work based on the Library&quot; means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term &quot;modification&quot;.) &quot;Source code&quot; for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library&#x27;s complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a &quot;work that uses the Library&quot;. Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a &quot;work that uses the Library&quot; with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a &quot;work that uses the library&quot;. The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a &quot;work that uses the Library&quot; uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a &quot;work that uses the Library&quot; with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer&#x27;s own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable &quot;work that uses the Library&quot;, as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user&#x27;s computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the &quot;work that uses the Library&quot; must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients&#x27; exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and &quot;any later version&quot;, you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY &quot;AS IS&quot; WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. Linking with OpenSSL 17. In addition, as a special exception, we give permission to link the code of its release of libssh with the OpenSSL project&#x27;s &quot;OpenSSL&quot; library (or with modified versions of it that use the same license as the &quot;OpenSSL&quot; library), and distribute the linked executables. You must obey the GNU Lesser General Public License in all respects for all of the code used other than &quot;OpenSSL&quot;. If you modify this file, you may extend this exception to your version of the file, but you are not obligated to do so. If you do not wish to do so, delete this exception statement from your version. END OF TERMS AND CONDITIONS Copies of libssh source will be made available upon request in accordance with LPGL requirements.", "keywords": []}, {"id": 11, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#microsoft-detours", "display_name": "Microsoft Detours", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "microsoft-detours", "priority": -1, "content": "Microsoft Detours is used under the Professional license ( http://research.microsoft.com/en-us/projects/detours/ ). NVIDIA agrees to include in all copies of the NVIDIA Applications a proprietary rights notice that includes a reference to Microsoft software being included in such applications. NVIDIA shall not remove or obscure, but shall retain in the Software, any copyright, trademark, or patent notices that appear in the Software.", "keywords": []}, {"id": 12, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#nvidia-software-license-agreement", "display_name": "NVIDIA Software License Agreement", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "nvidia-software-license-agreement", "priority": -1, "content": "NVIDIA CORPORATION NVIDIA SOFTWARE LICENSE AGREEMENT IMPORTANT \u2014 READ BEFORE DOWNLOADING, INSTALLING, COPYING OR USING THE LICENSED SOFTWARE This Software License Agreement (\u201cSLA\u201d), made and entered into as of the time and date of click through action (\u201cEffective Date\u201d), is a legal agreement between you and NVIDIA Corporation (\u201cNVIDIA\u201d) and governs the use of the NVIDIA computer software and the documentation made available for use with such NVIDIA software. By downloading, installing, copying, or otherwise using the NVIDIA software and/or documentation, you agree to be bound by the terms of this SLA. If you do not agree to the terms of this SLA, do not download, install, copy or use the NVIDIA software or documentation. IF YOU ARE ENTERING INTO THIS SLA ON BEHALF OF A COMPANY OR OTHER LEGAL ENTITY, YOU REPRESENT THAT YOU HAVE THE LEGAL AUTHORITY TO BIND THE ENTITY TO THIS SLA, IN WHICH CASE \u201cYOU\u201d WILL MEAN THE ENTITY YOU REPRESENT. IF YOU DON\u2019T HAVE SUCH AUTHORITY, OR IF YOU DON\u2019T ACCEPT ALL THE TERMS AND CONDITIONS OF THIS SLA, THEN NVIDIA DOES NOT AGREE TO LICENSE THE LICENSED SOFTWARE TO YOU, AND YOU MAY NOT DOWNLOAD, INSTALL, COPY OR USE IT. LICENSE. 1.1 License Grant. Subject to the terms of the AGREEMENT, NVIDIA hereby grants you a non-exclusive, non-transferable license, without the right to sublicense (except as expressly set forth in a Supplement), during the applicable license term unless earlier terminated as provided below, to have Authorized Users install and use the Software, including modifications (if expressly permitted in a Supplement), in accordance with the Documentation. You are only licensed to activate and use Licensed Software for which you a have a valid license, even if during the download or installation you are presented with other product options. No Orders are binding on NVIDIA until accepted by NVIDIA. Your Orders are subject to the AGREEMENT. SLA Supplements : Certain Licensed Software licensed under this SLA may be subject to additional terms and conditions that will be presented to you in a Supplement for acceptance prior to the delivery of such Licensed Software under this SLA and the applicable Supplement. Licensed Software will only be delivered to you upon your acceptance of all applicable terms. 1.2 Limited Purpose Licenses . If your license is provided for one of the purposes indicated below, then notwithstanding contrary terms in Section 1.1 or in a Supplement, such licenses are for internal use and do not include any right or license to sub- license and distribute the Licensed Software or its output in any way in any public release, however limited, and/or in any manner that provides third parties with use of or access to the Licensed Software or its functionality or output, including (but not limited to) external alpha or beta testing or development phases. Further: Evaluation License . You may use evaluation licenses solely for your internal evaluation of the Licensed Software for broader adoption within your Enterprise or in connection with a NVIDIA product purchase decision, and such licenses have an expiration date as indicated by NVIDIA in its sole discretion (or ninety days from the date of download if no other duration is indicated). Educational/Academic License . You may use educational/academic licenses solely for educational purposes and all users must be enrolled or employed by an academic institution. If you do not meet NVIDIA\u2019s academic program requirements for educational institutions, you have no rights under this license. Test/Development License . You may use test/development licenses solely for your internal development, testing and/or debugging of your software applications or for interoperability testing with the Licensed Software, and such licenses have an expiration date as indicated by NVIDIA in its sole discretion (or one year from the date of download if no other duration is indicated). NVIDIA Confidential Information under the AGREEMENT includes output from Licensed Software developer tools identified as \u201cPro\u201d versions, where the output reveals functionality or performance data pertinent to NVIDIA hardware or software products. 1.3 Pre-Release Licenses . With respect to alpha, beta, preview, and other pre-release Software and Documentation ( \u201cPre- Release Licensed Software\u201d ) delivered to you under the AGREEMENT you acknowledge and agree that such Pre-Release Licensed Software (i) may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, accessibility, availability, and reliability standards relative to commercially provided NVIDIA software and documentation, and (ii) use of such Pre-Release Licensed Software may result in unexpected results, loss of data, project delays or other unpredictable damage or loss. THEREFORE, PRE-RELEASE LICENSED SOFTWARE IS NOT INTENDED FOR USE, AND SHOULD NOT BE USED, IN PRODUCTION OR BUSINESS-CRITICAL SYSTEMS. NVIDIA has no obligation to make available a commercial version of any Pre-Release Licensed Software and NVIDIA has the right to abandon development of Pre-Release Licensed Software at any time without liability. 1.4 Enterprise and Contractor Usage . You may allow your Enterprise employees and Contractors to access and use the Licensed Software pursuant to the terms of the AGREEMENT solely to perform work on your behalf, provided further that with respect to Contractors: (i) you obtain a written agreement from each Contractor which contains terms and obligations with respect to access to and use of Licensed Software no less protective of NVIDIA than those set forth in the AGREEMENT, and (ii) such Contractor\u2019s access and use expressly excludes any sublicensing or distribution rights for the Licensed Software. You are responsible for the compliance with the terms and conditions of the AGREEMENT by your Enterprise and Contractors. Any act or omission that, if committed by you, would constitute a breach of the AGREEMENT shall be deemed to constitute a breach of the AGREEMENT if committed by your Enterprise or Contractors. 1.5 Services . Except as expressly indicated in an Order, NVIDIA is under no obligation to provide support for the Licensed Software or to provide any patches, maintenance, updates or upgrades under the AGREEMENT. Unless patches, maintenance, updates or upgrades are provided with their separate governing terms and conditions, they constitute Licensed Software licensed to you under the AGREEMENT. LIMITATIONS. 2.1 License Restrictions . Except as expressly authorized in the AGREEMENT, you agree that you will not (nor authorize third parties to): (i) copy and use Software that was licensed to you for use in one or more NVIDIA hardware products in other unlicensed products (provided that copies solely for backup purposes are allowed); (ii) reverse engineer, decompile, disassemble (except to the extent applicable laws specifically require that such activities be permitted) or attempt to derive the source code, underlying ideas, algorithm or structure of Software provided to you in object code form; (iii) sell, transfer, assign, distribute, rent, loan, lease, sublicense or otherwise make available the Licensed Software or its functionality to third parties (a) as an application services provider or service bureau, (b) by operating hosted/virtual system environments, (c) by hosting, time sharing or providing any other type of services, or (d) otherwise by means of the internet; (iv) modify, translate or otherwise create any derivative works of any Licensed Software; (v) remove, alter, cover or obscure any proprietary notice that appears on or with the Licensed Software or any copies thereof; (vi) use the Licensed Software, or allow its use, transfer, transmission or export in violation of any applicable export control laws, rules or regulations; (vii) distribute, permit access to, or sublicense the Licensed Software as a stand-alone product; (viii) bypass, disable, circumvent or remove any form of copy protection, encryption, security or digital rights management or authentication mechanism used by NVIDIA in connection with the Licensed Software, or use the Licensed Software together with any authorization code, serial number, or other copy protection device not supplied by NVIDIA directly or through an authorized reseller; (ix) use the Licensed Software for the purpose of developing competing products or technologies or assisting a third party in such activities; (x) use the Licensed Software with any system or application where the use or failure of such system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss including, without limitation, use in connection with any nuclear, avionics, navigation, military, medical, life support or other life critical application (\u201cCritical Applications\u201d), unless the parties have entered into a Critical Applications agreement; (xi) distribute any modification or derivative work you make to the Licensed Software under or by reference to the same name as used by NVIDIA; or (xii) use the Licensed Software in any manner that would cause the Licensed Software to become subject to an Open Source License. Nothing in the AGREEMENT shall be construed to give you a right to use, or otherwise obtain access to, any source code from which the Software or any portion thereof is compiled or interpreted. You acknowledge that NVIDIA does not design, test, manufacture or certify the Licensed Software for use in the context of a Critical Application and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such use. You agree to defend, indemnify and hold harmless NVIDIA and its Affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney\u2019s fees and costs incident to establishing the right of indemnification) arising out of or related to you and your Enterprise, and their respective employees, contractors, agents, distributors, resellers, end users, officers and directors use of Licensed Software outside of the scope of the AGREEMENT or any other breach of the terms of the AGREEMENT. 2.2 Third Party License Obligations . You acknowledge and agree that the Licensed Software may include or incorporate third party technology (collectively \u201cThird Party Components\u201d), which is provided for use in or with the Software and not otherwise used separately. If the Licensed Software includes or incorporates Third Party Components, then the third-party pass-through terms and conditions (\u201cThird Party Terms\u201d) for the particular Third Party Component will be bundled with the Software or otherwise made available online as indicated by NVIDIA and will be incorporated by reference into the AGREEMENT. In the event of any conflict between the terms in the AGREEMENT and the Third Party Terms, the Third Party Terms shall govern. Copyright to Third Party Components are held by the copyright holders indicated in the copyright notices indicated in the Third Party Terms. Audio/Video Encoders and Decoders . You acknowledge and agree that it is your sole responsibility to obtain any additional third party licenses required to make, have made, use, have used, sell, import, and offer for sale your products or services that include or incorporate any Third Party Components and content relating to audio and/or video encoders and decoders from, including but not limited to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A., MPEG-LA, and Coding Technologies as NVIDIA does not grant to you under the AGREEMENT any necessary patent or other rights with respect to audio and/or video encoders and decoders. 2.3 Limited Rights . Your rights in the Licensed Software are limited to those expressly granted under the AGREEMENT and no other licenses are granted whether by implication, estoppel or otherwise. NVIDIA reserves all rights, title and interest in and to the Licensed Software not expressly granted under the AGREEMENT. 3. CONFIDENTIALITY. Neither party will use the other party\u2019s Confidential Information, except as necessary for the performance of the AGREEMENT, nor will either party disclose such Confidential Information to any third party, except to personnel of NVIDIA and its Affiliates, you, your Enterprise, your Enterprise Contractors, and each party\u2019s legal and financial advisors that have a need to know such Confidential Information for the performance of the AGREEMENT, provided that each such personnel, employee and Contractor is subject to a written agreement that includes confidentiality obligations consistent with those set forth herein. Each party will use all reasonable efforts to maintain the confidentiality of all of the other party\u2019s Confidential Information in its possession or control, but in no event less than the efforts that it ordinarily uses with respect to its own Confidential Information of similar nature and importance. The foregoing obligations will not restrict either party from disclosing the other party\u2019s Confidential Information or the terms and conditions of the AGREEMENT as required under applicable securities regulations or pursuant to the order or requirement of a court, administrative agency, or other governmental body, provided that the party required to make such disclosure (i) gives reasonable notice to the other party to enable it to contest such order or requirement prior to its disclosure (whether through protective orders or otherwise), (ii) uses reasonable effort to obtain confidential treatment or similar protection to the fullest extent possible to avoid such public disclosure, and (iii) discloses only the minimum amount of information necessary to comply with such requirements. 4. OWNERSHIP. You are not obligated to disclose to NVIDIA any modifications that you, your Enterprise or your Contractors make to the Licensed Software as permitted under the AGREEMENT. As between the parties, all modifications are owned by NVIDIA and licensed to you under the AGREEMENT unless otherwise expressly provided in a Supplement. The Licensed Software and all modifications owned by NVIDIA, and the respective Intellectual Property Rights therein, are and will remain the sole and exclusive property of NVIDIA or its licensors, whether the Licensed Software is separate from or combined with any other products or materials. You shall not engage in any act or omission that would impair NVIDIA\u2019s and/or its licensors\u2019 Intellectual Property Rights in the Licensed Software or any other materials, information, processes or subject matter proprietary to NVIDIA. NVIDIA\u2019s licensors are intended third party beneficiaries with the right to enforce provisions of the AGREEMENT with respect to their Confidential Information and/or Intellectual Property Rights. 5. FEEDBACK. You have no obligation to provide Feedback to NVIDIA. However, NVIDIA and/or its Affiliates may use and include any Feedback that you provide to improve the Licensed Software or other NVIDIA products, technologies or materials. Accordingly, if you provide Feedback, you agree that NVIDIA and/or its Affiliates, at their option, may, and may permit their licensees, to make, have made, use, have used, reproduce, license, distribute and otherwise commercialize the Feedback in the Licensed Software or in other NVIDIA products, technologies or materials without the payment of any royalties or fees to you. All Feedback becomes the sole property of NVIDIA and may be used in any manner NVIDIA sees fit, and you hereby assign to NVIDIA all of your right, title and interest in and to any Feedback. NVIDIA has no obligation to respond to Feedback or to incorporate Feedback into the Licensed Software. 6. NO WARRANTIES. THE LICENSED SOFTWARE AND ANY OTHER CONFIDENTIAL INFORMATION AND/OR SERVICES ARE PROVIDED BY NVIDIA \u201cAS IS\u201d AND \u201cWITH ALL FAULTS,\u201d AND NVIDIA EXPRESSLY DISCLAIMS ALL OTHER WARRANTIES OF ANY KIND OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF OPERABILITY, CONDITION, VALUE, ACCURACY OF DATA, OR QUALITY, AS WELL AS ANY WARRANTIES OF MERCHANTABILITY, SYSTEM INTEGRATION, WORKMANSHIP, SUITABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON- INFRINGEMENT, OR THE ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO WARRANTY IS MADE BY NVIDIA ON THE BASIS OF TRADE USAGE, COURSE OF DEALING OR COURSE OF TRADE. NVIDIA DOES NOT WARRANT THAT THE LICENSED SOFTWARE OR ANY OTHER CONFIDENTIAL INFORMATION AND/OR SERVICES PROVIDED BY NVIDIA UNDER THE AGREEMENT WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. YOU ACKNOWLEDGE THAT NVIDIA\u2019S OBLIGATIONS UNDER THE AGREEMENT ARE FOR THE BENEFIT OF YOU ONLY. Nothing in this warranty section affects any statutory rights of consumers or other recipients to the extent that they cannot be waived or limited by contract under applicable law. 7. LIMITATION OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA OR ITS LICENSORS SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THE AGREEMENT OR THE USE OR PERFORMANCE OF THE LICENSED SOFTWARE AND ANY OTHER CONFIDENTIAL INFORMATION AND/OR SERVICES PROVIDED BY NVIDIA UNDER THE AGREEMENT, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY. IN NO EVENT WILL NVIDIA\u2019S TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THE AGREEMENT EXCEED THE NET AMOUNTS RECEIVED BY NVIDIA FOR YOUR USE OF THE PARTICULAR LICENSED SOFTWARE DURING THE TWELVE (12) MONTHS BEFORE THE LIABILITY AROSE (or up to US$10.00 if you acquired the Licensed Software for no charge). THE NATURE OF THE LIABILITY, THE NUMBER OF CLAIMS OR SUITS OR THE NUMBER OF PARTIES WITHIN YOUR ENTERPRISE THAT ACCEPTED THE TERMS OF THE AGREEMENT SHALL NOT ENLARGE OR EXTEND THIS LIMIT. THE FOREGOING LIMITATIONS SHALL APPLY REGARDLESS OF WHETHER NVIDIA OR ITS LICENSORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES AND REGARDLESS OF WHETHER ANY REMEDY FAILS ITS ESSENTIAL PURPOSE. The disclaimers, exclusions and limitations of liability set forth in the AGREEMENT form an essential basis of the bargain between the parties, and, absent any such disclaimers, exclusions or limitations of liability, the provisions of the AGREEMENT, including, without limitation, the economic terms, would be substantially different. TERM AND TERMINATION. 8.1 AGREEMENT, Licenses and Services . This SLA shall become effective upon the Effective Date, each Supplement upon their acceptance, and both this SLA and Supplements shall continue in effect until your last access or use of the Licensed Software and/or services hereunder, unless earlier terminated as provided in this \u201cTerm and Termination\u201d section. Each Licensed Software license ends at the earlier of (a) the expiration of the applicable license term, or (b) termination of such license or the AGREEMENT. Each service ends at the earlier of (x) the expiration of the applicable service term, (y) termination of such service or the AGREEMENT, or (z) expiration or termination of the associated license and no credit or refund will be provided upon the expiration or termination of the associated license for any service fees paid. 8.2 Termination and Effect of Expiration or Termination . NVIDIA may terminate the AGREEMENT in whole or in part: (i) if you breach any term of the AGREEMENT and fail to cure such breach within thirty (30) days following notice thereof from NVIDIA (or immediately if you violate NVIDIA\u2019s Intellectual Property Rights); (ii) if you become the subject of a voluntary or involuntary petition in bankruptcy or any proceeding relating to insolvency, receivership, liquidation or composition for the benefit of creditors, if that petition or proceeding is not dismissed with prejudice within sixty (60) days after filing, or if you cease to do business; or (iii) if you commence or participate in any legal proceeding against NVIDIA, with respect to the Licensed Software that is the subject of the proceeding during the pendency of such legal proceeding. If you or your authorized NVIDIA reseller fail to pay license fees or service fees when due then NVIDIA may, in its sole discretion, suspend or terminate your license grants, services and any other rights provided under the AGREEMENT for the affected Licensed Software, in addition to any other remedies NVIDIA may have at law or equity. Upon any expiration or termination of the AGREEMENT, a license or a service provided hereunder, (a) any amounts owed to NVIDIA become immediately due and payable, (b) you must promptly discontinue use of the affected Licensed Software and/or service, and (c) you must promptly destroy or return to NVIDIA all copies of the affected Licensed Software and all portions thereof in your possession or control, and each party will promptly destroy or return to the other all of the other party\u2019s Confidential Information within its possession or control. Upon written request, you will certify in writing that you have complied with your obligations under this section. Upon expiration or termination of the AGREEMENT all provisions survive except for the license grant provisions. CONSENT TO COLLECTION AND USE OF INFORMATION. You hereby agree and acknowledge that the Software may access, collect non-personally identifiable information about your Enterprise computer systems in order to properly optimize such systems for use with the Software. To the extent that you use the Software, you hereby consent to all of the foregoing, and represent and warrant that you have the right to grant such consent. In addition, you agree that you are solely responsible for maintaining appropriate data backups and system restore points for your Enterprise systems, and that NVIDIA will have no responsibility for any damage or loss to such systems (including loss of data or access) arising from or relating to (a) any changes to the configuration, application settings, environment variables, registry, drivers, BIOS, or other attributes of the systems (or any part of such systems) initiated through the Software; or (b) installation of any Software or third party software patches initiated through the Software. In certain systems you may change your system update preferences by unchecking \u201cAutomatically check for updates\u201d in the \u201cPreferences\u201d tab of the control panel for the Software. In connection with the receipt of the Licensed Software or services you may receive access to links to third party websites and services and the availability of those links does not imply any endorsement by NVIDIA. NVIDIA encourages you to review the privacy statements on those sites and services that you choose to visit so that you can understand how they may collect, use and share personal information of individuals. NVIDIA is not responsible or liable for: (i) the availability or accuracy of such links; or (ii) the products, services or information available on or through such links; or (iii) the privacy statements or practices of sites and services controlled by other companies or organizations. To the extent that you or members of your Enterprise provide to NVIDIA during registration or otherwise personal information, you acknowledge that such information will be collected, used and disclosed by NVIDIA in accordance with NVIDIA\u2019s privacy policy, available at URL http://www.nvidia.com/object/privacy_policy.html . GENERAL. This SLA, any Supplements incorporated hereto, and Orders constitute the entire agreement of the parties with respect to the subject matter hereto and supersede all prior negotiations, conversations, or discussions between the parties relating to the subject matter hereto, oral or written, and all past dealings or industry custom. Any additional and/or conflicting terms and conditions on purchase order(s) or any other documents issued by you are null, void, and invalid. Any amendment or waiver under the AGREEMENT must be in writing and signed by representatives of both parties. The AGREEMENT and the rights and obligations thereunder may not be assigned by you, in whole or in part, including by merger, consolidation, dissolution, operation of law, or any other manner, without written consent of NVIDIA, and any purported assignment in violation of this provision shall be void and of no effect. NVIDIA may assign, delegate or transfer the AGREEMENT and its rights and obligations hereunder, and if to a non-Affiliate you will be notified. Each party acknowledges and agrees that the other is an independent contractor in the performance of the AGREEMENT, and each party is solely responsible for all of its employees, agents, contractors, and labor costs and expenses arising in connection therewith. The parties are not partners, joint ventures or otherwise affiliated, and neither has any authority to make any statements, representations or commitments of any kind to bind the other party without prior written consent. Neither party will be responsible for any failure or delay in its performance under the AGREEMENT (except for any payment obligations) to the extent due to causes beyond its reasonable control for so long as such force majeure event continues in effect. The AGREEMENT will be governed by and construed under the laws of the State of Delaware and the United States without regard to the conflicts of law provisions thereof and without regard to the United Nations Convention on Contracts for the International Sale of Goods. The parties consent to the personal jurisdiction of the federal and state courts located in Santa Clara County, California. You acknowledge and agree that a breach of any of your promises or agreements contained in the AGREEMENT may result in irreparable and continuing injury to NVIDIA for which monetary damages may not be an adequate remedy and therefore NVIDIA is entitled to seek injunctive relief as well as such other and further relief as may be appropriate. If any court of competent jurisdiction determines that any provision of the AGREEMENT is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. Unless otherwise specified, remedies are cumulative. The Licensed Software has been developed entirely at private expense and is \u201ccommercial items\u201d consisting of \u201ccommercial computer software\u201d and \u201ccommercial computer software documentation\u201d provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions set forth in the AGREEMENT pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (c)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2701 San Tomas Expressway, Santa Clara, CA 95050. You acknowledge that the Licensed Software described under the AGREEMENT is subject to export control under the U.S. Export Administration Regulations (EAR) and economic sanctions regulations administered by the U.S. Department of Treasury\u2019s Office of Foreign Assets Control (OFAC). Therefore, you may not export, reexport or transfer in-country the Licensed Software without first obtaining any license or other approval that may be required by BIS and/or OFAC. You are responsible for any violation of the U.S. or other applicable export control or economic sanctions laws, regulations and requirements related to the Licensed Software. By accepting this SLA, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the Licensed Software. Any notice delivered by NVIDIA to you under the AGREEMENT will be delivered via mail, email or fax. Please direct your legal notices or other correspondence to NVIDIA Corporation, 2701 San Tomas Expressway, Santa Clara, California 95050, United States of America, Attention: Legal Department. GLOSSARY OF TERMS Certain capitalized terms, if not otherwise defined elsewhere in this SLA, shall have the meanings set forth below: \u201c Affiliate \u201d means any legal entity that Owns, is Owned by, or is commonly Owned with a party. \u201cOwn\u201d means having more than 50% ownership or the right to direct the management of the entity. \u201c AGREEMENT \u201d means this SLA and all associated Supplements entered by the parties referencing this SLA. \u201c Authorized Users \u201d means your Enterprise individual employees and any of your Enterprise\u2019s Contractors, subject to the terms of the \u201cEnterprise and Contractors Usage\u201d section. \u201c Confidential Information \u201d means the Licensed Software (unless made publicly available by NVIDIA without confidentiality obligations), and any NVIDIA business, marketing, pricing, research and development, know-how, technical, scientific, financial status, proposed new products or other information disclosed by NVIDIA to you which, at the time of disclosure, is designated in writing as confidential or proprietary (or like written designation), or orally identified as confidential or proprietary or is otherwise reasonably identifiable by parties exercising reasonable business judgment, as confidential. Confidential Information does not and will not include information that: (i) is or becomes generally known to the public through no fault of or breach of the AGREEMENT by the receiving party; (ii) is rightfully known by the receiving party at the time of disclosure without an obligation of confidentiality; (iii) is independently developed by the receiving party without use of the disclosing party\u2019s Confidential Information; or (iv) is rightfully obtained by the receiving party from a third party without restriction on use or disclosure. \u201c Contractor \u201d means an individual who works primarily for your Enterprise on a contractor basis from your secure network. \u201c Documentation \u201d means the NVIDIA documentation made available for use with the Software, including (without limitation) user manuals, datasheets, operations instructions, installation guides, release notes and other materials provided to you under the AGREEMENT. \u201c Enterprise \u201d means you or any company or legal entity for which you accepted the terms of this SLA, and their subsidiaries of which your company or legal entity owns more than fifty percent (50%) of the issued and outstanding equity. \u201c Feedback \u201d means any and all suggestions, feature requests, comments or other feedback regarding the Licensed Software, including possible enhancements or modifications thereto. \u201c Intellectual Property Rights \u201d means all patent, copyright, trademark, trade secret, trade dress, trade names, utility models, mask work, moral rights, rights of attribution or integrity service marks, master recording and music publishing rights, performance rights, author\u2019s rights, database rights, registered design rights and any applications for the protection or registration of these rights, or other intellectual or industrial property rights or proprietary rights, howsoever arising and in whatever media, whether now known or hereafter devised, whether or not registered, (including all claims and causes of action for infringement, misappropriation or violation and all rights in any registrations and renewals), worldwide and whether existing now or in the future. \u201c Licensed Software \u201d means Software, Documentation and all modifications owned by NVIDIA. \u201c Open Source License \u201d includes, without limitation, a software license that requires as a condition of use, modification, and/or distribution of such software that the Software be (i) disclosed or distributed in source code form; (ii) be licensed for the purpose of making derivative works; or (iii) be redistributable at no charge. \u201c Order \u201d means a purchase order issued by you, a signed purchase agreement with you, or other ordering document issued by you to NVIDIA or a NVIDIA authorized reseller (including any on-line acceptance process) that references and incorporates the AGREEMENT and is accepted by NVIDIA. \u201c Software \u201d means the NVIDIA software programs licensed to you under the AGREEMENT including, without limitation, libraries, sample code, utility programs and programming code. \u201c Supplement \u201d means the additional terms and conditions beyond those stated in this SLA that apply to certain Licensed Software licensed hereunder.", "keywords": []}, {"id": 13, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#openssl", "display_name": "OpenSSL", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "openssl", "priority": -1, "content": "https://www.openssl.org Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION Definitions. \u201cLicense\u201d shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. \u201cLicensor\u201d shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. \u201cLegal Entity\u201d shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, \u201ccontrol\u201d means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. \u201cYou\u201d (or \u201cYour\u201d) shall mean an individual or Legal Entity exercising permissions granted by this License. \u201cSource\u201d form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. \u201cObject\u201d form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. \u201cWork\u201d shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). \u201cDerivative Works\u201d shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. \u201cContribution\u201d shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, \u201csubmitted\u201d means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as \u201cNot a Contribution.\u201d \u201cContributor\u201d shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: You must give any other recipients of the Work or Derivative Works a copy of this License; and You must cause any modified files to carry prominent notices stating that You changed the files; and You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and If the Work includes a \u201cNOTICE\u201d text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an \u201cAS IS\u201d BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets \u201c[]\u201d replaced with your own identifying information. (Don\u2019t include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same \u201cprinted page\u201d as the copyright notice for easier identification within third-party archives. Copyright \u00a9 2014 Google Inc. Licensed under the Apache License, Version 2.0 (the \u201cLicense\u201d); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0", "keywords": []}, {"id": 14, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#protocol-buffers", "display_name": "Protocol Buffers", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "protocol-buffers", "priority": -1, "content": "Copyright \u00a9 2014, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \u201cAS IS\u201d AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Code generated by the Protocol Buffer compiler is owned by the owner of the input file used when generating it. This code is not standalone and requires a support library to be linked with it. This support library is itself covered by the above license.", "keywords": []}, {"id": 15, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#the-mesa-3d-graphics-library", "display_name": "The Mesa 3D Graphics Library", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "the-mesa-3d-graphics-library", "priority": -1, "content": "Copyright (C) 1999-2007 Brian Paul All Rights Reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \u201cSoftware\u201d), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \u201cAS IS\u201d, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.", "keywords": []}, {"id": 16, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#xxhash", "display_name": "xxHASH", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "xxhash", "priority": -1, "content": "Copyright \u00a9 2012-2014, Yann Collet. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \u201cAS IS\u201d AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.", "keywords": []}, {"id": 17, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#yaml-cpp", "display_name": "yaml-cpp", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "yaml-cpp", "priority": -1, "content": "Copyright \u00a9 2008-2015 Jesse Beder. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \u201cSoftware\u201d), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \u201cAS IS\u201d, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.", "keywords": []}, {"id": 18, "doc_id": 18, "filename": "CopyrightAndLicenses/index.html", "domain_name": "std", "name": "CopyrightAndLicenses/index", "display_name": "Copyright and Licenses", "type": "doc", "display_type": "Page", "docname": "CopyrightAndLicenses/index", "anchor": "", "priority": -1, "content": "Nsight Compute Copyright and Licenses. Information on the NVIDIA Software License Agreement as well as third party software and tools used by Nsight Compute.", "keywords": []}, {"id": 19, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#basic-usage", "display_name": "Basic Usage", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "basic-usage", "priority": -1, "content": "In order to be able to import ncu_report you will either have to navigate to the extras/python directory, or add its absolute path to the PYTHONPATH environment variable. Then, the module can be imported like any Python module: &gt;&gt;&gt; import ncu_report Importing a report Once the module is imported, you can load a report file by calling the load_report function with the path to the file. This function returns an object of type IContext which holds all the information concerning that report. &gt;&gt;&gt; my_context = ncu_report.load_report(&quot;my_report.ncu-rep&quot;) Querying ranges When working with the Python module, kernel profiling results are grouped into ranges which are represented by IRange objects. You can inspect the number of ranges contained in the loaded report by calling the num_ranges() member function of an IContext object and retrieve a range by its index using range_by_idx(index) . &gt;&gt;&gt; my_context.num_ranges() 1 &gt;&gt;&gt; my_range = my_context.range_by_idx(0) Querying actions Inside a range , kernel profiling results are called actions . You can query the number of actions contained in a given range by using the num_actions method of an IRange object. &gt;&gt;&gt; my_range.num_actions() 2 In the same way ranges can be obtained from an IContext object by using the range_by_idx(index) method, individual actions can be obtained from IRange objects by using the action_by_idx(index) method. The resulting actions are represented by the IAction class. &gt;&gt;&gt; my_action = my_range.action_by_idx(0) As mentioned previously, an action represents a single kernel profiling result. To query the kernel\u2019s name you can use the name() member function of the IAction class. &gt;&gt;&gt; my_action.name() MyKernel Querying metrics To get a tuple of all metric names contained within an action you can use the metric_names() method. It is meant to be combined with the metric_by_name() method which returns an IMetric object. However, for the same task you may also use the [] operator, as explained in the High-Level Interface section below. The metric names displayed here are the same as the ones you can use with the --metrics flag of NVIDIA Nsight Compute. Once you have extracted a metric from an action , you can obtain its value by using one of the following three methods: as_string() to obtain its value as a Python str as_uint64() to obtain its value as a Python int as_double() to obtain its value as a Python float For example, to print the display name of the GPU on which the kernel was profiled you can query the device__attribute_display_name metric. &gt;&gt;&gt; display_name_metric = my_action.metric_by_name(&#x27;device__attribute_display_name&#x27;) &gt;&gt;&gt; display_name_metric.as_string() &#x27;NVIDIA GeForce RTX 3060 Ti&#x27; Note that accessing a metric with the wrong type can lead to unexpected (conversion) results. &gt;&gt;&gt; display_name_metric.as_double() 0.0 Therefore, it is advisable to directly use the High-Level function value() , as explained below.", "keywords": []}, {"id": 20, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#counter-domains", "display_name": "Counter Domains", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "counter-domains", "priority": -1, "content": "PM sampling metrics are composed of one or more raw counter dependencies internally. Each counter is associated with a counter domain , which describes how and where in the hardware the counter is collected. For metrics specified in section files, the automatic domain selection can be overwritten when needed to form more optimal PM sampling metric groups. Metrics { Label: &quot;Short Scoreboard&quot; Name: &quot;pmsampling:smsp__warps_issue_stalled_short_scoreboard.avg&quot; Groups: &quot;sampling_ws4&quot; CtrDomains: &quot;gpu_sm_c&quot; } Note that the CtrDomains field is currently only supported for the section Metrics field, but not for individual Options .", "keywords": []}, {"id": 21, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#derived-metrics", "display_name": "Derived Metrics", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "derived-metrics", "priority": -1, "content": "Derived Metrics allow you to define new metrics composed of constants or existing metrics directly in a section file. The new metrics are computed at collection time and added permanently to the profile result in the report. They can then subsequently be used for any tables, charts, rules, etc. NVIDIA Nsight Compute currently supports the following syntax for defining derived metrics in section files: MetricDefinitions { MetricDefinitions { Name: &quot;derived_metric_name&quot; Expression: &quot;derived_metric_expr&quot; } MetricDefinitions { ... } ... } The actual metric expression is defined as follows: derived_metric_expr ::= operand operator operand operator ::= + | - | * | / operand ::= metric | constant metric ::= (an existing metric name) constant ::= double | uint64 double ::= (double-precision number of the form &quot;N.(M)?&quot;, e.g. &quot;5.&quot; or &quot;0.3109&quot;) uint64 ::= (64-bit unsigned integer number of the form &quot;N&quot;, e.g. &quot;2029&quot;) Operators are defined as follows: For op in (+ | - | *): For each element in a metric it is applied to, the expression left-hand side op-combined with expression right-hand side. For op in (/): For each element in a metric it is applied to, the expression left-hand side op-combined with expression right-hand side. If the right-hand side operand is of integer-type, and 0, the result is the left-hand side value. Since metrics can contain regular values and/or instanced values , elements are combined as below. Constants are treated as metrics with only a regular value. 1. Regular values are operator-combined. a + b 2. If both metrics have no correlation ids, the first N values are operator-combined, where N is the minimum of the number of elements in both metrics. a1 + b1 a2 + b2 a3 a4 3. Else if both metrics have correlation ids, the sets of correlation ids from both metrics are joined and then operator-combined as applicable. a1 + b1 a2 b3 a4 + b4 b5 4. Else if only the left-hand side metric has correlation ids, the right-hand side regular metric value is operator-combined with every element of the left-hand side metric. a1 + b a2 + b a3 + b 5. Else if only the right-hand side metric has correlation ids, the right-hand side element values are operator-combined with the regular metric value of the left-hand side metric. a + b1 + b2 + b3 In all operations, the value kind of the left-hand side operand is used. If the right-hand side operand has a different value kind, it is converted. If the left-hand side operand is a string-kind, it is returned unchanged. Examples for derived metrics are derived__avg_thread_executed , which provides a hint on the number of threads executed on average at each instruction, and derived__uncoalesced_l2_transactions_global , which indicates the ratio of actual L2 transactions vs. ideal L2 transactions at each applicable instruction. MetricDefinitions { MetricDefinitions { Name: &quot;derived__avg_thread_executed&quot; Expression: &quot;thread_inst_executed_true / inst_executed&quot; } MetricDefinitions { Name: &quot;derived__uncoalesced_l2_transactions_global&quot; Expression: &quot;memory_l2_transactions_global / memory_ideal_l2_transactions_global&quot; } MetricDefinitions { Name: &quot;sm__sass_thread_inst_executed_op_ffma_pred_on_x2&quot; Expression: &quot;sm__sass_thread_inst_executed_op_ffma_pred_on.sum.peak_sustained * 2&quot; } }", "keywords": []}, {"id": 22, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#high-level-interface", "display_name": "High-Level Interface", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "high-level-interface", "priority": -1, "content": "On top of the low-level NvRules API the Python Report Interface also implements part of the Python object model . By implementing special methods, the Python Report Interface\u2019s exposed classes can be used with built-in Python mechanisms such as iteration, string formatting and length querying. This allows you to access metrics objects via the self[key] instance method of the IAction class: &gt;&gt;&gt; display_name_metric = my_action[&quot;device__attribute_display_name&quot;] There is also a convenience method IMetric.value() which allows you to query the value of a metric object without knowledge of its type: &gt;&gt;&gt; display_name_metric.value() &#x27;NVIDIA GeForce RTX 3060 Ti&#x27; All the available methods of a class, as well as their associated Python docstrings, can be looked up interactively via &gt;&gt;&gt; help(ncu_report.IMetric) or similarly for other classes and methods. In your code, you can access the docstrings via the __doc__ attribute, i.e. ncu_report.IMetric.value.__doc__ .", "keywords": []}, {"id": 23, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#integration", "display_name": "Integration", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "integration", "priority": -1, "content": "The rule system is integrated into NVIDIA Nsight Compute as part of the profile report view. When you profile a kernel, available rules will be shown in the report\u2019s Details page. You can either select to apply all available rules at once by clicking Apply Rules at the top of the page, or apply rules individually. Once applied, the rule results will be added to the current report. By default, all rules are applied automatically. Section with a single Bottleneck rule available. The same section with the Bottleneck rule applied. It added a single message to the report. The section Rule has two associated rules, Basic Template Rule and Advanced Template Rule. The latter is not yet applied. Rules can add various UI elements, including warning and error messages as well as charts and tables. Some rules are applied independently from sections. They are shown under Independent Rules.", "keywords": []}, {"id": 24, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#introduction", "display_name": "Introduction", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "introduction", "priority": -1, "content": "The goal of NVIDIA Nsight Compute is to design a profiling tool that can be easily extended and customized by expert users. While we provide useful defaults, this allows adapting the reports to a specific use case or to design new ways to investigate collected data. All the following is data driven and does not require the tools to be recompiled. While working with section files or rules files it is recommended to open the Metric Selection tool window from the Profile menu item. This tool window lists all sections and rules that were loaded. Rules are grouped as children of their associated section or grouped in the [Independent Rules] entry. For files that failed to load, the table shows the error message. Use the Reload button to reload rule files from disk.", "keywords": []}, {"id": 25, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#metric-attributes", "display_name": "Metric attributes", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "metric-attributes", "priority": -1, "content": "Apart from the possibility to query the name() and value() of an IMetric object, you can also query the following additional metric attributes: metric_type() metric_subtype() rollup_operation() unit() description() The first method metric_type() returns one out of three enum values ( IMetric.MetricType_COUNTER , IMetric.MetricType_RATIO , IMetric.MetricType_THROUGHPUT ) if the metric is a hardware metric, or IMetric.MetricType_OTHER otherwise (e.g. for launch or device attributes). The method metric_subtype() returns an enum value representing the subtype of a metric (e.g. IMetric.MetricSubtype_PEAK_SUSTAINED , IMetric.MetricSubtype_PER_CYCLE_ACTIVE ). In case a metric does not have a subtype, None is returned. All available values (without the necessary IMetric.MetricSubtype_ prefix) may be found in the NvRules API documentation, or may be looked up interactively by executing help(ncu_report.IMetric) . IMetric.rollup_operation() returns the operation which is used to accumulate different values of the same metric and can be one of IMetric.RollupOperation_AVG , IMetric.RollupOperation_MAX , IMetric.RollupOperation_MIN or IMetric.RollupOperation_SUM for averaging, maximum, minimum or summation, respectively. If the metric in question does not specify a rollup operation None will be returned. Lastly, unit() and description() return a (possibly empty) string of the metric\u2019s unit and a short textual description for hardware metrics, respectively. The above methods can be combined to filter through all metrics of a report, given certain criteria: for metric in metrics: if metric.metric_type() == IMetric.MetricType_COUNTER and \\ metric.metric_subtype() == IMetric.MetricSubtype_PER_SECOND and \\ metric.rollup_operation() == IMetric.RollupOperation_AVG: print(f&quot;{metric.name()}: {metric.value()} {metric.unit()}&quot;)", "keywords": []}, {"id": 26, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#metric-options-and-filters", "display_name": "Metric Options and Filters", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "metric-options-and-filters", "priority": -1, "content": "Sections allow the user to specify alternative options for metrics that have a different metric name on different GPU architectures. Metric options use a min-arch/max-arch range filter , replacing the base metric with the first metric option for which the current GPU architecture matches the filter. While not strictly enforced, options for a base metric are expected to share the same meaning and subsequently unit, etc., with the base metric. In addition to its options, the base metric can be filtered by the same criteria. This is useful for metrics that are only available for certain architectures or in limited collection scopes. See ProfilerMetricOptions.proto for which filter options are available. In the below example, the metric dram__cycles_elapsed.avg.per_second is collected on SM 7.0 and SM 7.5-8.6, but not on any in between. It uses the same metric name on these architectures. Metrics { Label: &quot;DRAM Frequency&quot; Name: &quot;dram__cycles_elapsed.avg.per_second&quot; Filter { MaxArch: CC_70 } Options { Name: &quot;dram__cycles_elapsed.avg.per_second&quot; Filter { MinArch: CC_75 MaxArch: CC_86 } } } In the next example, the metric in the section header is only collected for launch-based collection scopes (i.e. kernel- and application replay for CUDA kernels or CUDA Graph nodes), but not in range-based scopes. Header { Metrics { Label: &quot;Theoretical Occupancy&quot; Name: &quot;sm__maximum_warps_per_active_cycle_pct&quot; Filter { CollectionFilter { CollectionScopes: CollectionScope_Launch } } } } Similarly, CollectionFilter s can be used to set the Importance of a metric, which specifies an expectation on its availability during data collection. Required metrics, for instance, are expected to be collectable and would generate an error in case they are not available, whereas Optional metrics would only generate a warning. Here is a minimal example, illustrating the functionality: Metrics { Label: &quot;Compute (SM) Throughput&quot; Name: &quot;sm__throughput.avg.pct_of_peak_sustained_elapsed&quot; Filter { CollectionFilter { Importance: Required } } } Filters can be applied to an entire section instead of or in addition to being set for individual metrics. If both types of filters are specified, they are combined, such that Metrics -scope filters take precedence over section-scope filters.", "keywords": []}, {"id": 27, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#metric-sections", "display_name": "Metric Sections", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "metric-sections", "priority": -1, "content": "The Details page consists of sections that focus on a specific part of the kernel analysis each. Every section is defined by a corresponding section file that specifies the data to be collected as well as the visualization used in the UI or CLI output for this data. Simply modify a deployed section file to add or modify what is collected.", "keywords": []}, {"id": 28, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#missing-sections", "display_name": "Missing Sections", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "missing-sections", "priority": -1, "content": "If new or updated section files are not used by NVIDIA Nsight Compute, it is most commonly one of two reasons: The file is not found: Section files must have the .section extension. They must also be on the section search path. The default search path is the sections directory within the installation directory. In NVIDIA Nsight Compute CLI, the search paths can be overwritten using the --section-folder and --section-folder-recursive options. In NVIDIA Nsight Compute, the search path can be configured in the Profile options. Syntax errors: If the file is found but has syntax errors, it will not be available for metric collection. However, error messages are reported for easier debugging. In NVIDIA Nsight Compute CLI, use the --list-sections option to get a list of error messages, if any. In NVIDIA Nsight Compute, error messages are reported in the Metric Selection tool window.", "keywords": []}, {"id": 29, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#nvrules-api", "display_name": "NvRules API", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "nvrules-api", "priority": -1, "content": "The NvRules API is defined as a C/C++ style interface, which is converted to the NvRules.py Python module to be consumable by the rules. As such, C++ class interfaces are directly converted to Python classes und functions. See the NvRules API documentation for the classes and functions available in this interface.", "keywords": []}, {"id": 30, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#nvtx-support", "display_name": "NVTX Support", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "nvtx-support", "priority": -1, "content": "The ncu_report has support for the NVIDIA Tools Extension (NVTX). This comes through the INvtxState object which represents the NVTX state of a profiled kernel. An INvtxState object can be obtained from an action by using its nvtx_state() method. It exposes the domains() method which returns a tuple of integers representing the domains this kernel has state in. These integers can be used with the domain_by_id(id) method to get an INvtxDomainInfo object which represents the state of a domain. The INvtxDomainInfo can be used to obtain a tuple of Push-Pop , or Start-End ranges using the push_pop_ranges() and start_end_ranges() methods. There is also a actions_by_nvtx member function in the IRange class which allows you to get a tuple of actions matching the NVTX state described in its parameter. The parameters for the actions_by_nvtx function are two lists of strings representing the state for which we want to query the actions. The first parameter describes the NVTX states to include while the second one describes the NVTX states to exclude. These strings are in the same format as the ones used with the --nvtx-include and --nvtx-exclude options.", "keywords": []}, {"id": 31, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#python-report-interface", "display_name": "Python Report Interface", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "python-report-interface", "priority": -1, "content": "NVIDIA Nsight Compute features a Python-based interface to interact with exported report files. The module is called ncu_report and works on any Python version from 3.4 1 . It can be found in the extras/python directory of your NVIDIA Nsight Compute package. In order to use the Python module, you need a report file generated by NVIDIA Nsight Compute. You can obtain such a file by saving it from the graphical interface or by using the --export flag of the command line tool. The types and functions in the ncu_report module are a subset of the ones available in the NvRules API. The documentation in this section serves as a tutorial. For a more formal description of the exposed API, please refer to the the NvRules API documentation. 1 On Linux machines you will also need a GNU-compatible libc and libgcc_s.so .", "keywords": []}, {"id": 32, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#report-file-format", "display_name": "Report File Format", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "report-file-format", "priority": -1, "content": "This section documents the internals of the profiler report files (reports in the following) as created by NVIDIA Nsight Compute. The file format is subject to change in future releases without prior notice.", "keywords": []}, {"id": 33, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#rule-examples", "display_name": "Rule Examples", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "rule-examples", "priority": -1, "content": "The following example rule determines on which major GPU architecture a kernel was running. import NvRules def get_identifier(): return &quot;GpuArch&quot; def apply(handle): ctx = NvRules.get_context(handle) action = ctx.range_by_idx(0).action_by_idx(0) ccMajor = action.metric_by_name(&quot;device__attribute_compute_capability_major&quot;).as_uint64() ctx.frontend().message(&quot;Running on major compute capability &quot; + str(ccMajor))", "keywords": []}, {"id": 34, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#rule-file-api", "display_name": "Rule File API", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "rule-file-api", "priority": -1, "content": "The Rule File API is the implicit contract between the rule Python file and the tool. It defines which functions (syntactically and semantically) the Python file must provide to properly work as a rule. Mandatory Functions get_identifier() : Return the unique rule identifier string. apply(handle) : Apply this rule to the rule context provided by handle. Use NvRules.get_context(handle) to obtain the Context interface from handle. get_name() : Return the user-consumable display name of this rule. get_description() : Return the user-consumable description of this rule. Optional Functions get_section_identifier() : Return the unique section identifier that maps this rule to a section. Section-mapped rules will only be available if the corresponding section was collected. They implicitly assume that the metrics requested by the section are collected when the rule is applied. evaluate(handle) : Declare required metrics and rules that are necessary for this rule to be applied. Use NvRules.require_metrics(handle, [...]) to declare the list of metrics that must be collected prior to applying this rule. Use e.g. NvRules.require_rules(handle, [...]) to declare the list of other rules that must be available before applying this rule. Those are the only rules that can be safely proposed by the Controller interface.", "keywords": []}, {"id": 35, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#rule-system", "display_name": "Rule System", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "rule-system", "priority": -1, "content": "NVIDIA Nsight Compute features a new Python-based rule system. It is designed as the successor to the Expert System (un)guided analysis in NVIDIA Visual Profiler, but meant to be more flexible and more easily extensible to different use cases and APIs.", "keywords": []}, {"id": 36, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#rule-system-architecture", "display_name": "Rule System Architecture", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "rule-system-architecture", "priority": -1, "content": "The rule system consists of the Python interpreter, the NvRules C++ interface , the NvRules Python interface (NvRules.py) and a set of rule files. Each rule file is valid Python code that imports the NvRules.py module, adheres to certain standards defined by the Rule File API and is called to from the tool. When applying a rule, a handle to the rule Context is provided to its apply function. This context captures most of the functionality that is available to rules as part of the NvRules API . In addition, some functionality is provided directly by the NvRules module, e.g. for global error reporting. Finally, since rules are valid Python code, they can use regular libraries and language functionality that ship with Python as well. From the rule Context , multiple further objects can be accessed, e.g. the Frontend , Ranges and Actions . It should be noted that those are only interfaces, i.e. the actual implementation can vary from tool to tool that decides to implement this functionality. Naming of these interfaces is chosen to be as API-independent as possible, i.e. not to imply CUDA-specific semantics. However, since many compute and graphics APIs map to similar concepts, it can easily be mapped to CUDA terminology, too. A Range refers to a CUDA stream, an Action refers to a single CUDA kernel instance. Each action references several Metrics that have been collected during profiling (e.g. instructions executed ) or are statically available (e.g. the launch configuration). Metrics are accessed via their names from the Action . Each CUDA stream can contain any number of kernel (or other device activity) instances and so each Range can reference one or more Actions . However, currently only a single Action per Range will be available, as only a single CUDA kernel can be profiled at once. The Frontend provides an interface to manipulate the tool UI by adding messages, graphical elements such as line and bar charts or tables, as well as speedup estimations, focus metrics and source markers. The most common use case is for a rule to show at least one message, stating the result to the user, as illustrated in extras/RuleTemplates/BasicRuleTemplate.py This could be as simple as \u201cNo issues have been detected,\u201d or contain direct hints as to how the user could improve the code, e.g. \u201cMemory is more heavily utilized than Compute. Consider whether it is possible for the kernel to do more compute work.\u201d For more advanced use cases, such as adding speedup estimates, key performance indicators (a.k.a. focus metrics) or source markers to annotate individual lines of code to your rule, see the templates in extras/RuleTemplates .", "keywords": []}, {"id": 37, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#sample-script", "display_name": "Sample Script", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "sample-script", "priority": -1, "content": "NVTX Push-Pop range filtering This is a sample script which loads a report and prints the names of all the profiled kernels which were wrapped inside BottomRange and TopRange  Push-Pop ranges of the default NVTX domain. #!/usr/bin/env python3 import sys import ncu_report if len(sys.argv) != 2: print(&quot;usage: {} report_file&quot;.format(sys.argv[0]), file=sys.stderr) sys.exit(1) report = ncu_report.load_report(sys.argv[1]) for range_idx in range(report.num_ranges()): current_range = report.range_by_idx(range_idx) for action_idx in current_range.actions_by_nvtx([&quot;BottomRange/*/TopRange&quot;], []): action = current_range.action_by_idx(action_idx) print(action.name())", "keywords": []}, {"id": 38, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#section-definition", "display_name": "Section Definition", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "section-definition", "priority": -1, "content": "Protocol buffer definitions are in the NVIDIA Nsight Compute installation directory under extras/FileFormat . To understand section files, start with the definitions and documentation in ProfilerSection.proto . To see the list of available PerfWorks metrics for any device or chip, use the --query-metrics option of the command line .", "keywords": []}, {"id": 39, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#section-files", "display_name": "Section Files", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "section-files", "priority": -1, "content": "The section files delivered with the tool are stored in the sections sub-folder of the NVIDIA Nsight Compute install directory. Each section is defined in a separate file with the .section file extension. At runtime, the installed stock sections (and rules) are deployed to a user-writable directory. This can be disabled with an environment variable . Section files from the deployment directory are loaded automatically at the time the UI connects to a target application or the command line profiler is launched. This way, any changes to section files become immediately available in the next profile run. A section file is a text representation of a Google Protocol Buffer message. The full definition of all available fields of a section message is given in Section Definition . In short, each section consists of a unique Identifier (no spaces allowed), a Display Name , an optional Order value (for sorting the sections in the Details page ), an optional Description providing guidance to the user, an optional header table, an optional list of metrics to be collected but not displayed, optional bodies with additional UI elements, and other elements. See ProfilerSection.proto for the exact list of available elements. A small example of a very simple section is: Identifier: &quot;SampleSection&quot; DisplayName: &quot;Sample Section&quot; Description: &quot;This sample section shows information on active warps and cycles.&quot; Header { Metrics { Label: &quot;Active Warps&quot; Name: &quot;smsp__active_warps_avg&quot; } Metrics { Label: &quot;Active Cycles&quot; Name: &quot;smsp__active_cycles_avg&quot; } } On data collection, this section will cause the two PerfWorks metrics smsp__active_warps_avg and smsp__active_cycles_avg to be collected. The section as shown on the Details page By default, when not available, metrics specified in section files will only generate a warning during data collection, and would then show up as \u201cN/A\u201d in the UI or CLI. This is in contrast to metrics requested via --metrics which would cause an error when not available. How to specify metrics as required for data collection is described in Metric Options and Filters . More advanced elements can be used in the body of a section. See the ProfilerSection.proto file for which elements are available. The following example shows how to use these in a slightly more complex example. The usage of regexes is allowed in tables and charts in the section Body only and follows the format regex: followed by the actual regex to match PerfWorks metric names. The supported list of metrics that can be used in sections can be queried using the command line interface with the --query-metrics option. Each of these metrics can be used in any section and will be automatically collected if they appear in any enabled section. Note that even if a metric is used in multiple sections, it will only be collected once. Look at all the shipped sections to see how they are implemented. Identifier: &quot;SampleSection&quot; DisplayName: &quot;Sample Section&quot; Description: &quot;This sample section shows various metrics.&quot; Header { Metrics { Label: &quot;Active Warps&quot; Name: &quot;smsp__active_warps_avg&quot; } Metrics { Label: &quot;Active Cycles&quot; Name: &quot;smsp__active_cycles_avg&quot; } } Body { Items { Table { Label: &quot;Example Table&quot; Rows: 2 Columns: 1 Metrics { Label: &quot;Avg. Issued Instructions Per Scheduler&quot; Name: &quot;smsp__inst_issued_avg&quot; } Metrics { Label: &quot;Avg. Executed Instructions Per Scheduler&quot; Name: &quot;smsp__inst_executed_avg&quot; } } } Items { Table { Label: &quot;Metrics Table&quot; Columns: 2 Order: ColumnMajor Metrics { Name: &quot;regex:.*__elapsed_cycles_sum&quot; } } } Items { BarChart { Label: &quot;Metrics Chart&quot; CategoryAxis { Label: &quot;Units&quot; } ValueAxis { Label: &quot;Cycles&quot; } Metrics { Name: &quot;regex:.*__elapsed_cycles_sum&quot; } } } } The output of this section would look similar to this screenshot in the UI", "keywords": []}, {"id": 40, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#source-counters", "display_name": "Source Counters", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "source-counters", "priority": -1, "content": "The Source page provides correlation of various metrics with CUDA-C, PTX and SASS source of the application, depending on availability. Which Source Counter metrics are collected and the order in which they are displayed in this page is controlled using section files, specifically using the ProfilerSectionMetrics message type. Each ProfilerSectionMetrics defines one ordered group of metrics, and can be assigned an optional Order value. This value defines the ordering among those groups in the Source page. This allows, for example, you to define a group of memory-related source counters in one and a group of instruction-related counters in another section file. Identifier: &quot;SourceMetrics&quot; DisplayName: &quot;Custom Source Metrics&quot; Metrics { Order: 2 Metrics { Label: &quot;Instructions Executed&quot; Name: &quot;inst_executed&quot; } Metrics { Label: &quot;&quot; Name: &quot;collected_but_not_shown&quot; } } If a Source Counter metric is given an empty label attribute in the section file, it will be collected but not shown on the page.", "keywords": []}, {"id": 41, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#version-7-format", "display_name": "Version 7 Format", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "version-7-format", "priority": -1, "content": "Reports of version 7 are a combination of raw binary data and serialized Google Protocol Buffer version 2 messages (proto). All binary entries are stored as little endian. Protocol buffer definitions are in the NVIDIA Nsight Compute installation directory under extras/FileFormat . Offset [bytes] Entry Type Value 0 Magic Number Binary NVR\\0 4 Integer Binary sizeof(File Header) 8 File Header Proto Report version 8 + sizeof(File Header) Block 0 Mixed CUDA CUBIN source, profile results, session information 8 + sizeof(File Header) + sizeof(Block 0) Block 1 Mixed CUDA CUBIN source, profile results, session information \u2026 \u2026 \u2026 \u2026 Offset [bytes] Entry Type Value 0 Integer Binary sizeof(Block Header) 4 Block Header Proto Number of entries per payload type, payload size 4 + sizeof(Block Header) Block Payload Mixed Payload (CUDA CUBIN sources, profile results, session information, string table) Offset [bytes] Entry Type Value 0 Integer Binary sizeof(Payload type 1, entry 1) 4 Payload type 1, entry 1 Proto 4 + sizeof(Payload type 1, entry 1) Integer Binary sizeof(Payload type 1, entry 2) 8 + sizeof(Payload type 1, entry 1) Payload type 1, entry 2 Proto \u2026 \u2026 \u2026 \u2026 \u2026 Integer Binary sizeof(Payload type 2, entry 1) \u2026 Payload type 2, entry 1 Proto \u2026 \u2026 \u2026 \u2026 Notices Notices ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND SEPARATELY, \u201cMATERIALS\u201d) ARE BEING PROVIDED \u201cAS IS.\u201d NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the consequences of use of such information or for any infringement of patents or other rights of third parties that may result from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems without express written approval of NVIDIA Corporation. Trademarks NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation in the U.S. and other countries. Other company and product names may be trademarks of the respective companies with which they are associated.", "keywords": []}, {"id": 42, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "page", "name": "CustomizationGuide/index#writing-rules", "display_name": "Writing Rules", "type": "section", "display_type": "Page section", "docname": "CustomizationGuide/index", "anchor": "writing-rules", "priority": -1, "content": "To create a new rule, you need to create a new text file with the extension .py and place it at some location that is detectable by the tool (see Nsight Compute Integration on how to specify the search path for rules). At a minimum, the rule file must implement two functions, get_identifier and apply . See Rule File API for a description of all functions supported in rule files. See NvRules for details on the interface available in the rule\u2019s apply function.", "keywords": []}, {"id": 43, "doc_id": 43, "filename": "CustomizationGuide/index.html", "domain_name": "std", "name": "CustomizationGuide/index", "display_name": "Customization Guide", "type": "doc", "display_type": "Page", "docname": "CustomizationGuide/index", "anchor": "", "priority": -1, "content": "Nsight Compute Customization Guide. User manual on customizing NVIDIA Nsight Compute tools or integrating them with custom workflows. Information on writing section files, rules for automatic result analysis and scripting access to report files.", "keywords": []}, {"id": 44, "doc_id": 44, "filename": "Notices/notices.html", "domain_name": "std", "name": "Notices/notices", "display_name": "<no title>", "type": "doc", "display_type": "Page", "docname": "Notices/notices", "anchor": "", "priority": -1, "content": "Notices Notices ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND SEPARATELY, \u201cMATERIALS\u201d) ARE BEING PROVIDED \u201cAS IS.\u201d NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the consequences of use of such information or for any infringement of patents or other rights of third parties that may result from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems without express written approval of NVIDIA Corporation. Trademarks NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation in the U.S. and other countries. Other company and product names may be trademarks of the respective companies with which they are associated.", "keywords": []}, {"id": 45, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#acceleration-structure-viewer", "display_name": "Acceleration Structure Viewer", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "acceleration-structure-viewer", "priority": -1, "content": "The Acceleration Structure Viewer allows inspection of acceleration structures built using the OptiX API. In modern ray tracing APIs like OptiX, acceleration structures are data structures describing the rendered scene\u2019s geometries that will be intersected when performing ray tracing operations. More information concerning acceleration structures can be found in the OptiX programming guide . It is the responsibility of the user to set these up and pass them to the OptiX API which translates them to internal data structures that perform well on modern GPUs. The description created by the user can be very error-prone and it is sometimes hard to understand why the rendered result is not as expected. The Acceleration Structure Viewer is a component allowing OptiX users to inspect the acceleration structures they build before launching a ray tracing pipeline. The Acceleration Structure Viewer is opened through a button in the Resources window. The button will only be available when the currently viewed resource is OptiX: TraversableHandles . It opens the currently selected handle. The viewer is multi-paned: it shows a hierarchical view of the acceleration structure on the left, a graphical view of the acceleration structure in the middle, and controls and options on the right. In the hierarchical tree view on the left of the viewer the instance acceleration structures (IAS) , geometry acceleration structures (GAS) , child instances and child geometries are shown. In addition to this, some general properties for each of them is shown such as their primitive count, surface area and size on the device. In the hierarchical view on the left of the Acceleration Structure Viewer , the following information is displayed where applicable. Column Description Name An identifier for each row in the hierarchy. Click on the check box next to the name to show or hide the selected geometry or hierarchy. Double-click on this entry to jump to the item in the rendering view. # Prims The number of primitives that make up this acceleration structure. Surface Area A calculation of the total surface area for the AABB that bounds the particular entry. Size The size of the output buffer on the device holding this acceleration structure . Performance analysis tools are accessible in the bottom left corner on the main view. These tools help identify potential performance problems that are outlined in the RTX Ray Tracing Best Practices Guide . These analysis tools aim to give a broad picture of acceleration structures that may exhibit sub-optimal performance. To find the most optimal solution, profiling and experimentation is recommended but these tools may paint a better picture as to why one structure performs poorly compared to another. Action Description Instance Overlaps Identifies instance AABBs that overlap with other instances in 3D. Consider merging GASes when instance world-space AABBs overlap significantly to potentially increase performance. Instance Heatmap This allows you to set the threshold used by the AABB heatmap rendered in the visualizer.", "keywords": []}, {"id": 46, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#analysis", "display_name": "Analysis", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "analysis", "priority": -1, "content": "Guided Analysis All trace-based analysis is now covered by NVIDIA Nsight Systems . This means that NVIDIA Nsight Compute does not include analysis regarding concurrent CUDA streams or (for example) UVM events. For per-kernel analysis, NVIDIA Nsight Compute provides recommendations based on collected performance data on the Details Page . These rules currently require you to collect the required metrics via their sections up front, and do not support partial on-demand profiling. To use the rule-based recommendations, enable the respective rules in the Metric Selection . Before profiling, enable Apply Rules in the Profile Options , or click the Apply Rules button in the report afterward. Unguided Analysis All trace-based analysis is now covered by Nsight Systems. For per-kernel analysis, Python-based rules provide analysis and recommendations. See Guided Analysis above for more details. PC Sampling View Source-correlated PC sampling information can now be viewed in the Source Page . Aggregated warp states are shown on the Details Page in the Warp State Statistics section. Memory Statistics Memory Statistics are located on the Details Page . Enable the Memory Workload Analysis sections to collect the respective information. NVLink View NVLink topology diagram and NVLink property table are located on the Details Page . Enable the NVLink Topology and NVLink Table sections to collect the respective information. Refer to the Known Issues section for the limitations related to NVLink. Source-Disassembly View Source correlated with PTX and SASS disassembly is shown on the Source Page . Which information is available depends on your application\u2019s compilation/JIT flags. GPU Details View NVIDIA Nsight Compute does not automatically collect data for each executed kernel, and it does not collect any data for device-side memory copies. Summary information for all profiled kernel launches is shown on the Summary Page . Comprehensive information on all collected metrics for all profiled kernel launches is shown on the Raw Page . CPU Details View CPU callstack sampling is now covered by NVIDIA Nsight Systems . OpenACC Details View OpenACC performance analysis with NVIDIA Nsight Compute is available to limited extent. OpenACC parallel regions are not explicitly recognized, but CUDA kernels generated by the OpenACC compiler can be profiled as regular CUDA kernels. See the NVIDIA Nsight Systems release notes to check its latest support status. OpenMP Details View OpenMP performance analysis is not supported by NVIDIA Nsight Compute. See the NVIDIA Nsight Systems release notes to check its latest support status. Properties View NVIDIA Nsight Compute does not collect CUDA API and GPU activities and their properties. Performance data for profiled kernel launches is reported (for example) on the Details Page . Console View NVIDIA Nsight Compute does not currently collect stdout/stderr application output. Settings View Application launch settings are specified in the Connection Dialog . For reports collected from the UI, launch settings can be inspected on the Session Page after profiling. CPU Source View Source for CPU-only APIs is not available. Source for profiled GPU kernel launches is shown on the Source Page .", "keywords": []}, {"id": 47, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#api-statistics", "display_name": "API Statistics", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "api-statistics", "priority": -1, "content": "The API Statistics window is available when NVIDIA Nsight Compute is connected to a target application. It opens by default as soon as the connection is established. It can be re-opened using Debug &gt; API Statistics from the main menu. Whenever the target application is suspended, it shows a summary of tracked API calls with some statistical information, such as the number of calls, their total, average, minimum and maximum duration. Note that this view cannot be used as a replacement for Nsight Systems when trying to optimize CPU performance of your application. The Reset button deletes all statistics collected to the current point and starts a new collection. Use the Export to CSV button to export the current statistics to a CSV file.", "keywords": []}, {"id": 48, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#api-stream", "display_name": "API Stream", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "api-stream", "priority": -1, "content": "The API Stream window is available when NVIDIA Nsight Compute is connected to a target application. It opens by default as soon as the connection is established. It can be re-opened using Debug &gt; API Stream from the main menu. Whenever the target application is suspended, the window shows the history of API calls and traced kernel launches. The currently suspended API call or kernel launch (activity) is marked with a yellow arrow. If the suspension is at a subcall, the parent call is marked with a green arrow. The API call or kernel is suspended before being executed. For each activity, further information is shown such as the kernel name or the function parameters ( Func Parameters ) and return value ( Func Return ). Note that the function return value will only become available once you step out or over the API call. Use the Current Thread dropdown to switch between the active threads. The dropdown shows the thread ID followed by the current API name. One of several options can be chosen in the trigger dropdown, which are executed by the adjacent &gt;&gt; button. Run to Next Kernel resumes execution until the next kernel launch is found in any enabled thread. Run to Next API Call resumes execution until the next API call matching Next Trigger is found in any enabled thread. Run to Next Range Start resumes execution until the next start of an active profiler range is found. Profiler ranges are defined by using the cu(da)ProfilerStart/Stop API calls. Run to Next Range Stop resumes execution until the next stop of an active profiler range is found. The API Level dropdown changes which API levels are shown in the stream. The Export to CSV button exports the currently visible stream to a CSV file.", "keywords": []}, {"id": 49, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#as-viewer-nav", "display_name": "Navigation", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "as-viewer-nav", "priority": -1, "content": "The Acceleration Structure Viewer supports multiple navigation modes. The navigation mode can be changed using the combo box in the camera controls pane, to the right of the rendering pane. The keyboard and mouse bindings for each mode are as follows: Binding Fly Camera Dolly Camera Orbit Camera WASD/Arrow Keys Move forward, backward, left, right Move forward, backward, left, right Track (Move up, down, left, right) E/Q Move up/down Move up/down n/a Z/C Increase/decrease field of view Increase/decrease field of view Increase/decrease field of view Shift/Ctrl Move faster/slower Move faster/slower Move faster/slower Mousewheel Zoom in/out Zoom in/out Zoom in/out LMB + Drag Rotate in place Rotate left/right, move forward/backward Rotate around the geometry RMB + Drag Zoom in/out Rotate in place Zoom in/out MMB + Drag Track (Move up, down, left, right) Track (Move up, down, left, right) Track (Move up, down, left, right) Alt Temporarily switch to Orbit Camera Temporarily switch to Orbit Camera n/a F/Double Click Focus on the selected geometry Focus on the selected geometry Focus on the selected geometry Based on the coordinate system of the input geometry, you may need to change the Up Direction setting to Z-Axis or the Coordinates setting to RHS. To reset the camera to its original location, click Reset Camera . There are also a selection of Camera Controls for fast and precise navigation. To save a position, use the bookmarks controls. Each node within the acceleration structure hierarchy can also be double-clicked to quickly navigate to that location.", "keywords": []}, {"id": 50, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#baselines", "display_name": "Baselines", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "baselines", "priority": -1, "content": "The Baselines tool window can be opened by clicking the Baselines entry in the Profile menu. It provides a centralized place from which to manage configured baselines. (Refer to Baselines , for information on how to create baselines from profile results.) The baseline visibility can be controlled by clicking on the check box in a table row. When the check box is checked, the baseline will be visible in the summary header as well as all graphs in all sections. When unchecked the baseline will be hidden and will not contribute to metric difference calculations. The baseline color can be changed by double-clicking on the color swatch in the table row. The color dialog which is opened provides the ability to choose an arbitrary color as well as offers a palette of predefined colors associated with the stock baseline color rotation. The baseline name can be changed by double-clicking on the Name column in the table row. The name must not be empty and must be less than the Maximum Baseline Name Length as specified in the options dialog. The z-order of a selected baseline can be changed by clicking the Move Baseline Up and Move Baseline Down buttons in the tool bar. When a baseline is moved up or down its new position will be reflected in the report header as well as in each graph. Currently, only one baseline may be moved at a time. The selected baselines may be removed by clicking on the Clear Selected Baselines button in the tool bar. All baselines can be removed at once by clicking on the Clear All Baselines button, from either the global tool bar or the tool window tool bar. The configured baselines can be saved to a file by clicking on the Save Baselines button in the tool bar. By default baseline files use the .ncu-bln extension. Baseline files can be opened locally and/or shared with other users. Baseline information can be loaded by clicking on the Load Baselines button in the tool bar. When a baseline file is loaded, currently configured baselines will be replaced. A dialog will be presented to the user to confirm this operation when necessary. Differences between the current result and the baselines can be visualized with graphical bars for metrics in Details page section headers. Use the Difference Bars drop down to select the visualization mode. Bars are extending from left to right and have a fixed maximum.", "keywords": []}, {"id": 51, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#body", "display_name": "Body", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "body", "priority": -1, "content": "The body of this tool window displays a table with sub-launch-specific metrics. This table has four columns: Metric Name : the name of the metric Metric Unit : the unit for metric values Instance Value : the value of this metric for the selected sub-launch Aggregate Value : the aggregate value for this metric over all sub-launches in the selected result", "keywords": []}, {"id": 52, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#call-stack-nvtx-page", "display_name": "Call Stack / NVTX Page", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "call-stack-nvtx-page", "priority": -1, "content": "The CPU Call Stack section of this report page shows the CPU call stack for the executing CPU thread at the time the kernel was launched. For this information to show up in the profiler report, the option to collect CPU call stacks had to be enabled in the Connection Dialog or using the corresponding NVIDIA Nsight Compute CLI command line parameter. The NVTX State section of this report page shows the NVTX context when the kernel was launched. All thread-specific information is with respect to the thread of the kernel\u2019s launch API call. Note that NVTX information is only collected if the profiler is started with NVTX support enabled, either in the Connection Dialog or using the NVIDIA Nsight Compute CLI command line parameter.", "keywords": []}, {"id": 53, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#command-line-arguments", "display_name": "Command Line Arguments", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "command-line-arguments", "priority": -1, "content": "Please execute ncu-ui with the -h parameter within a shell window to see the currently supported command line arguments for the NVIDIA Nsight Compute UI. To open a collected profile report with ncu-ui, simply pass the path to the report file as a parameter to the shell command.", "keywords": []}, {"id": 54, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#comments-page", "display_name": "Comments Page", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "comments-page", "priority": -1, "content": "The Comments page aggregates all section comments in a single view and allows the user to edit those comments on any launch instance or section, as well as on the overall report. Comments are persisted with the report. If a section comment is added, the comment icon of the respective section in the Details Page will be highlighted.", "keywords": []}, {"id": 55, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#connection", "display_name": "Connection", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "connection", "priority": -1, "content": "Connection properties are grouped into Target Connection Options and Host Connection Properties .", "keywords": []}, {"id": 56, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#connection-dialog", "display_name": "Connection Dialog", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "connection-dialog", "priority": -1, "content": "Use the Connection Dialog to launch and attach to applications on your local and remote platforms. Start by selecting the Target Platform for profiling. By default (and if supported) your local platform will be selected. Select the platform on which you would like to start the target application or connect to a running process. When using a remote platform, you will be asked to select or create a Connection in the top drop down. To create a new connection, select + and enter your connection details. When using the local platform, localhost will be selected as the default and no further connection settings are required. You can still create or select a remote connection, if profiling will be on a remote system of the same platform. Depending on your target platform, select either Launch or Remote Launch to launch an application for profiling on the target. Note that Remote Launch will only be available if supported on the target platform. Fill in the following launch details for the application: Application Executable: Specifies the root application to launch. Note that this may not be the final application that you wish to profile. It can be a script or launcher that creates other processes. Working Directory: The directory in which the application will be launched. Command Line Arguments: Specify the arguments to pass to the application executable. Environment: The environment variables to set for the launched application. Select Attach to attach the profiler to an application already running on the target platform. This application must have been started using another NVIDIA Nsight Compute CLI instance. The list will show all application processes running on the target system which can be attached. Select the refresh button to re-create this list. Finally, select the Activity to be run on the target for the launched or attached application. Note that not all activities are necessarily compatible with all targets and connection options. Currently, the following activities exist: Interactive Profile Activity Profile Activity System Trace Activity Occupancy Calculator", "keywords": []}, {"id": 57, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#details-page", "display_name": "Details Page", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "details-page", "priority": -1, "content": "Overview The Details page is the main page for all metric data collected during a kernel launch. The page is split into individual sections. Each section consists of a header table and an optional body that can be expanded. The sections are completely user defined and can be changed easily by updating their respective files. For more information on customizing sections, see the Customization Guide . For a list of sections shipped with NVIDIA Nsight Compute, see Sections and Rules . By default, once a new profile result is collected, all applicable rules are applied. Any rule results will be shown as Recommendations on this page. Most rule results will contain an optimization advice along with an estimate of the improvement that could be achieved when successfully implementing this advice. Other rule results will be purely informative or have a warning icon to indicate a problem that occurred during execution (e.g., an optional metric that could not be collected). Results with error icons typically indicate an error while applying the rule. Estimates of potential improvement are shown below the rule result\u2019s name and exist in two types. Global estimates (\u201cEst. Speedup\u201d) are an approximation of the decrease in workload runtime, whereas local estimates (\u201cEst. Local Speedup\u201d) are an approximation of the increase in efficiency of the hardware utilization of the particular performance problem the rule addresses. Rule results often point out performance problems and guide through the analysis process. If a rule result references another report section, it will appear as a link in the recommendation. Select the link to scroll to the respective section. If the section was not collected in the same profile result, enable it in the Metric Selection tool window. You can add or edit comments in each section of the Details view by clicking on the comment button (speech bubble). The comment icon will be highlighted in sections that contain a comment. Comments are persisted in the report and are summarized in the Comments Page . Use the Comments button to annotate sections. Besides their header, sections typically have one or more bodies with additional charts or tables. Click the triangle Expander icon in the top-left corner of each section to show or hide those. If a section has multiple bodies, a dropdown in their top-right corner allows you to switch between them. Sections with multiple bodies have a dropdown to switch between them. Memory If enabled, the Memory Workload Analysis section contains a Memory chart that visualizes data transfers, cache hit rates, instructions and memory requests. More information on how to use and read this chart can be found in the Kernel Profiling Guide . Occupancy You can open the Occupancy Calculator by clicking on the calculator button in the report header or in the header of the Occupancy Section . Range Replay Note that for Range Replay results some UI elements, analysis rules, metrics or section body items such as charts or tables might not be available, as they only apply to kernel launch-based results. The filters can be checked in the corresponding section files. Rooflines If enabled, the GPU Speed Of Light Roofline Chart section contains a Roofline chart that is particularly helpful for visualizing kernel performance at a glance. (To enable roofline charts in the report, ensure that the section is enabled when profiling.) More information on how to use and read this chart can be found in Roofline Charts . NVIDIA Nsight Compute ships with several different definitions for roofline charts, including hierarchical rooflines. These additional rooflines are defined in different section files. While not part of the full section set, a new section set called roofline was added to collect and show all rooflines in one report. The idea of hierarchical rooflines is that they define multiple ceilings that represent the limiters of a hardware hierarchy. For example, a hierarchical roofline focusing on the memory hierarchy could have ceilings for the throughputs of the L1 cache, L2 cache and device memory. If the achieved performance of a kernel is limited by one of the ceilings of a hierarchical roofline, it can indicate that the corresponding unit of the hierarchy is a potential bottleneck. Sample roofline chart. The roofline chart can be zoomed and panned for more effective data analysis, using the controls in the table below. Zoom In Zoom Out Zoom Reset Pan Click the Zoom In button in the top right corner of the chart. Click the left mouse button and drag to create a rectangle that bounds the area of interest. Press the plus (+) key. Use Ctrl + MouseWheel (Windows and Linux only) Click the Zoom Out button in the top right corner of the chart. Click the right mouse button. Press the minus (-) key. Use Ctrl + MouseWheel (Windows and Linux only) Click the Zoom Reset button in the top right corner of the chart. Press the Escape (Esc) key. Use Ctrl (Command on Mac) + LeftMouseButton to grab the chart, then move the mouse. Use the cursor keys. Source Sections such as Source Counters can contain source hot spot tables. These tables indicate the N highest or lowest values of one or more metrics in your kernel source code. Select the location links to navigate directly to this location in the Source Page . Hover the mouse over a value to see which metrics contribute to it. Hot spot tables point out performance problems in your source. Timelines When collecting metrics with PM sampling , they can be viewed in a timeline . The timeline shows metrics selected in the respective section file or on the command line with their labels/names and their values over time. Different metrics may be collected in different passes (replays) of the workload, as only a limited number of them can be sampled in the same pass. Context switch trace is used to filter the collected data to only include samples from the profiled contexts and to align it in the timeline. You can hover the mouse over a metric row label to see further information on the metrics in the row. Hovering over a sample on the timeline shows the metric values at that timestamp within the current row. With the Metric Details tool window open, click to select a value on the timeline and show the metric and all its raw timestamps (absolute and relative) correlated values in the tool window. You can also use the Metric Details tool window to inspect profiler metrics generated during PM sampling. These provide information about the used sampling intervals, buffer sizes, dropped samples and other properties for each collection pass. A detailed list can be found in the metrics reference . The timeline has a context menu for further actions regarding copying content and zooming. In addition, the Enable/Disable Context Switch Filter option can be used to enable or disable the filtering of the timeline data with context switch information, if it is available. When the context switch filter is enabled (the default), samples from each pass group are only shown for the active contexts. When the context switch filter is disabled, the raw collected sampling data is shown along with a separate row for each pass group\u2019s context switch trace. When the context menu option is not available, the report does not include context switch trace data. In this case, the option Enable/Disable Trim Filter is shown instead, which, when enabled, tries to align based on the first non-zero value in any sampling metric in this pass group. However, this fallback does not take into account actual context switches.", "keywords": []}, {"id": 58, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#environment", "display_name": "Environment", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "environment", "priority": -1, "content": "Name Description Values Color Theme The currently selected UI color theme. Dark (Default) Light Mixed DPI Scaling Disable Mixed DPI Scaling if unwanted artifacts are detected when using monitors with different DPIs. Auto (Default) Off Default Document Folder Directory where documents unassociated with a project will be saved. At Startup What to do when NVIDIA Nsight Compute is launched. Show welcome page (Default) Show quick launch dialog Load last project Show empty environment Show version update notifications Show notifications when a new version of this product is available. Yes (Default) No", "keywords": []}, {"id": 59, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#exporting", "display_name": "Exporting", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "exporting", "priority": -1, "content": "The data displayed in the acceleration structure viewer document can be saved to file. Exporting an Acceleration Structure Viewer document allows for persisting the data you have collected beyond the immediate analysis session. This capability is particularly valuable for comparing different revisions of your geometry or sharing with others. Bookmarks are persisted as well.", "keywords": []}, {"id": 60, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#filtering-and-highlighting", "display_name": "Filtering and Highlighting", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "filtering-and-highlighting", "priority": -1, "content": "The acceleration structure view supports acceleration structure filtering as well as highlighting of data matching particular characteristics. The checkboxes next to each geometry allow users to toggle the rendering of each traversable. Geometry instances can also be selected by clicking on them in the main graphical view. Additionally, right clicking in the main graphical view gives options to hide or show all geometry, hide the selected geometry, or hide all but the selected geometry. Beyond filtering, the view also supports highlight-based identification of geometry specified with particular flags. Checking each highlight option will identify those resources matching that flag, colorizing for easy identification. Clicking an entry in this section will dim all geometry that does not meet the filter criteria allowing items that match the filter to standout. Selecting multiple filters requires the passing geometry to meet all selected filters (e.g., AND logic). Additionally, the heading text will be updated to reflect the number of items that meet this filter criteria.", "keywords": []}, {"id": 61, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#gpu-data", "display_name": "GPU Data", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "gpu-data", "priority": -1, "content": "The GPU Data shows the properties of all supported devices. GPU Data", "keywords": []}, {"id": 62, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#graphs", "display_name": "Graphs", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "graphs", "priority": -1, "content": "The graphs show the occupancy for your chosen block size as a blue circle, and for all other possible block sizes as a line graph. Graphs", "keywords": []}, {"id": 63, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#graphviz-dot-and-svg-exports", "display_name": "Graphviz DOT and SVG exports", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "graphviz-dot-and-svg-exports", "priority": -1, "content": "Some of the shown Resources can also be exported to GraphViz DOT or SVG* files using the Export to GraphViz or Export to SVG buttons. When exporting OptiX traversable handles , the traversable graph node types will be encoded using shapes and colors as described in the following table. Node Type Shape Color IAS Hexagon #8DD3C7 Triangle GAS Box #FFFFB3 AABB GAS Box #FCCDE5 Curve GAS Box #CCEBC5 Sphere GAS Box #BEBADA Static Transform Diamond #FB8072 SRT Transform Diamond #FDB462 Matrix Motion Transform Diamond #80B1D3 Error Paralellogram #D9D9D9", "keywords": []}, {"id": 64, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#header", "display_name": "Header", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "header", "priority": -1, "content": "On the left side of its header, this tool window displays the selected result\u2019s name and the number of sub-launches it is comprised of. The right side contains a combo box that allows selection of the sub-launch the body should represent. Each element of the combo box contains an index for the sub-launch as well as the name of the function that it launched if available.", "keywords": []}, {"id": 65, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#host-connection-properties", "display_name": "Host Connection Properties", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "host-connection-properties", "priority": -1, "content": "The Host Connection Properties determine how the command line profiler will connect to the host application during a Profile Activity . This connection is used to transfer profile information to the host during the profile session. Name Description Values Base Port Base port used to establish a connection from the command line profiler to the host application during a Profile activity (both local and remote). 1-65535 (Default: 50152) Maximum Ports Maximum number of ports to try (starting from Base Port ) when attempting to connect to the host application. 1-100 (Default: 10)", "keywords": []}, {"id": 66, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#id2", "display_name": "Interactive Profile Activity", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "id2", "priority": -1, "content": "The Interactive Profile activity allows you to initiate a session that controls the execution of the target application, similar to a debugger. You can step API calls and workloads (CUDA kernels), pause and resume, and interactively select the kernels of interest and which metrics to collect. This activity does currently not support profiling or attaching to child processes. Enable CPU Call Stack Collect the CPU-sided Call Stack at the location of each profiled kernel launch. Enable NVTX Support Collect NVTX information provided by the application or its libraries. Required to support stepping to specific NVTX contexts. Disable Profiling Start/Stop Ignore calls to cu(da)ProfilerStart or cu(da)ProfilerStop made by the application. Enable Profiling From Start Enables profiling from the application start. Disabling this is useful if the application calls cu(da)ProfilerStart and kernels before the first call to this API should not be profiled. Note that disabling this does not prevent you from manually profiling kernels. Cache Control Control the behavior of the GPU caches during profiling. Allowed values: For Flush All , all GPU caches are flushed before each kernel replay iteration during profiling. While metric values in the execution environment of the application might be slightly different without invalidating the caches, this mode offers the most reproducible metric results across the replay passes and also across multiple runs of the target application. For Flush None , no GPU caches are flushed during profiling. This can improve performance and better replicates the application behavior if only a single kernel replay pass is necessary for metric collection. However, some metric results will vary depending on prior GPU work, and between replay iterations. This can lead to inconsistent and out-of-bounds metric values. Clock Control Control the behavior of the GPU clocks during profiling. Allowed values: For Base , GPC and memory clocks are locked to their respective base frequency during profiling. This has no impact on thermal throttling. For None , no GPC or memory frequencies are changed during profiling. Import Source Enables permanently importing available source files into the report. Missing source files are searched in Source Lookup folders. Source information must be embedded in the executable, e.g. via the -lineinfo compiler option. Imported files are used in the CUDA-C view on the Source Page . Graph Profiling Set if CUDA graphs should be stepped and profiled as individual Nodes or as complete Graphs . See the Kernel Profiling Guide for more information on this mode.", "keywords": []}, {"id": 67, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#id3", "display_name": "Main Menu", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "id3", "priority": -1, "content": "File New Project Create new profiling Projects with the New Project Dialog . Open Project Open an existing profiling project. Recent Projects Open an existing profiling project from the list of recently used projects. Save Project Save the current profiling project. Save Project As Save the current profiling project with a new filename. Close Project Close the current profiling project. New File Create a new file. Open File Open an existing file. Open Remote File Download an existing file from a remote host and open it locally. The opened file will only exist in memory and will not be written to the local machine\u2019s disk unless the user explicitly saves it. For more information concerning the selection of a remote host to download the file from, see the section about Remote Connections . Only a subset of file types that are supported locally can be opened from a remote target. The following table lists file types that can be opened remotely. Extensions Description Supported ncu-rep Nsight Compute Profiler Report Yes ncu-occ Occupancy Calculator File Yes ncu-bvh OptiX AS Viewer File Yes (except on MacOSX) section Section Description No cubin Cubin File No cuh,h,hpp Header File No c,cpp,cu Source File No txt Text file No nsight-cuprof-report Nsight Compute Profiler Report (legacy) Yes Save Save the current file Save As Save a copy of the current file with a different name or type or in a different location. Save All Files Save all open files. Close Close the current file. Close All Files Close all open files. Recent Files Open an existing file from the list of recently used files. Exit Exit Nsight Compute. Connection Connect Open the Connection Dialog to launch or attach to a target application. Disabled when already connected. Disconnect Disconnect from the current target application, allows the application to continue normally and potentially re-attach. Terminate Disconnect from and terminate the current target application immediately. Debug Pause Pause the target application at the next intercepted API call or launch. Resume Resume the target application. Step In Step into the current API call or launch to the next nested call, if any, or the subsequent API call, otherwise. Step Over Step over the current API call or launch and suspend at the next, non-nested API call or launch. Step Out Step out of the current nested API call or launch to the next, non-parent API call or launch one level above. Freeze API When disabled, all CPU threads are enabled and continue to run during stepping or resume, and all threads stop as soon as at least one thread arrives at the next API call or launch. This also means that during stepping or resume the currently selected thread might change as the old selected thread makes no forward progress and the API Stream automatically switches to the thread with a new API call or launch. When enabled, only the currently selected CPU thread is enabled. All other threads are disabled and blocked. Stepping now completes if the current thread arrives at the next API call or launch. The selected thread never changes. However, if the selected thread does not call any further API calls or waits at a barrier for another thread to make progress, stepping may not complete and hang indefinitely. In this case, pause, select another thread, and continue stepping until the original thread is unblocked. In this mode, only the selected thread will ever make forward progress. Break On API Error When enabled, during resume or stepping, execution is suspended as soon as an API call returns an error code. Run to Next Kernel See API Stream tool window. Run to Next API Call See API Stream tool window. Run to Next Range Start See API Stream tool window. Run to Next Range End See API Stream tool window. API Statistics Opens the API Statistics tool window API Stream Opens the API Stream tool window Resources Opens the Resources tool window NVTX Opens the NVTX tool window Profile Profile Kernel When suspended at a kernel launch, select the profile using the current configuration. Profile Series When suspended at a kernel launch, open the Profile Series configuration dialog to setup and collect a series of profile results. Auto Profile Enable or disable auto profiling. If enabled, each kernel matching the current kernel filter (if any) will be profiled using the current section configuration. Baselines Opens the Baselines tool window. Clear Baselines Clear all current baselines. Import Source Permanently import resolved source files into the report. Existing content may be overwritten. Section/Rules Info Opens the Metric Selection tool window. Tools Project Explorer Opens the Project Explorer tool window. Output Messages Opens the Output Messages tool window. Options Opens the Options dialog. Window Save Window Layout Allows you to specify a name for the current layout. The layouts are saved to a Layouts folder in the documents directory as named \u201c.nvlayout\u201d files. Apply Window Layout Once you have saved a layout, you can restore them by using the \u201cApply Window Layout\u201d menu entry. Simply select the entry from sub-menu you want to apply. Manage Window Layout Allows you to delete or rename old layouts. Restore Default Layout Restore views to their original size and position. Show Welcome Page Opens the Welcome Page . Help Documentation Opens the latest documentation for NVIDIA Nsight Compute online. Documentation (local) Opens the local HTML documentation for NVIDIA Nsight Compute that has shipped with the tool. Check For Updates Checks online if a newer version of NVIDIA Nsight Compute is available for download. Reset Application Data Reset all NVIDIA Nsight Compute configuration data saved on disk, including option settings, default paths, recent project references etc. This will not delete saved reports. Send Feedback Opens a dialog that allows you to send bug reports and suggestions for features. Optionally, the feedback includes basic system information, screenshots, or additional files (such as profile reports). About Opens the About dialog with information about the version of NVIDIA Nsight Compute.", "keywords": []}, {"id": 68, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#id8", "display_name": "Baselines", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "id8", "priority": -1, "content": "NVIDIA Nsight Compute supports diffing collected results across one or multiple reports using Baselines. Each result in any report can be promoted to a baseline. This causes metric values from all results in all reports to show the difference to the baseline. If multiple baselines are selected simultaneously, metric values are compared to the average across all current baselines. Baselines are not stored with a report and are only available as long as the same NVIDIA Nsight Compute instance is open, unless they are saved to a ncu-bln file from the Baselines tool window . Profiler report with one baseline Select Add Baseline to promote the current result in focus to become a baseline. If a baseline is set, most metrics on the Details Page , Raw Page and Summary Page show two values: the current value of the result in focus, and the corresponding value of the baseline or the percentage of change from the corresponding baseline value. (Note that an infinite percentage gain, inf% , may be displayed when the baseline value for the metric is zero, while the focus value is not.) If multiple baselines are selected, each metric will show the following notation: &lt;focus value&gt; (&lt;difference to baselines average [%]&gt;, z=&lt;standard score&gt;@&lt;number of values&gt;) The standard score is the difference between the current value and the average across all baselines, normalized by the standard deviation. If the number of metric values contributing to the standard score equals the number of results (current and all baselines), the @&lt;number of values&gt; notation is omitted. Profiler report with multiple baselines Double-clicking on a baseline name allows the user to edit the displayed name. Edits are committed by pressing Enter/Return or upon loss of focus, and abandoned by pressing Esc . Hovering over the baseline color icon allows the user to remove this specific baseline from the list. Use the Clear Baselines entry from the dropdown button, the Profile menu, or the corresponding toolbar button to remove all baselines. Baseline changes can also be made in the Baselines tool window .", "keywords": []}, {"id": 69, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#interactive-profile-activity", "display_name": "Interactive Profile Activity", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "interactive-profile-activity", "priority": -1, "content": "Launch the target application from NVIDIA Nsight Compute When starting NVIDIA Nsight Compute, the Welcome Page will appear. Click on Quick Launch to open the Connection dialog. If the Connection dialog doesn\u2019t appear, you can open it using the Connect button from the main toolbar, as long as you are not currently connected. Select your target platform on the left-hand side and your connection target (machine) from the Connection drop down. If you have your local target platform selected, localhost will become available as a connection. Use the + button to add a new connection target. Then, continue by filling in the details in the Launch tab. In the Activity panel, select the Interactive Profile activity to initiate a session that allows controlling the execution of the target application and selecting the kernels of interest interactively. Press Launch to start the session. Launch the target application with tools instrumentation from the command line The ncu can act as a simple wrapper that forces the target application to load the necessary libraries for tools instrumentation. The parameter --mode=launch specifies that the target application should be launched and suspended before the first instrumented API call. That way the application waits until we connect with the UI. .. code:: text $ ncu \u2013mode=launch CuVectorAddDrv.exe Launch NVIDIA Nsight Compute and connect to target application Select the target machine at the top of the dialog to connect and update the list of attachable applications. By default, localhost is pre-selected if the target matches your current local platform. Select the Attach tab and the target application of interest and press Attach . Once connected, the layout of NVIDIA Nsight Compute changes into stepping mode that allows you to control the execution of any calls into the instrumented API. When connected, the API Stream window indicates that the target application waits before the very first API call. Control application execution Use the API Stream window to step the calls into the instrumented API. The dropdown at the top allows switching between different CPU threads of the application. Step In (F11), Step Over (F10), and Step Out (Shift + F11) are available from the Debug menu or the corresponding toolbar buttons. While stepping, function return values and function parameters are captured. Use Resume (F5) and Pause to allow the program to run freely. Freeze control is available to define the behavior of threads currently not in focus, i.e. selected in the thread drop down. By default, the API Stream stops on any API call that returns an error code. This can be toggled in the Debug menu by Break On API Error . Isolate a kernel launch To quickly isolate a kernel launch for profiling, use the Run to Next Kernel button in the toolbar of the API Stream window to jump to the next kernel launch. The execution will stop before the kernel launch is executed. Profile a kernel launch Once the execution of the target application is suspended at a kernel launch, additional actions become available in the UI. These actions are either available from the menu or from the toolbar. Please note that the actions are disabled, if the API stream is not at a qualifying state (not at a kernel launch or launching on an unsupported GPU). To profile, press Profile Kernel and wait until the result is shown in the Profiler Report . Profiling progress is reported in the lower right corner status bar. Instead of manually selecting Profile , it is also possible to enable Auto Profile from the Profile menu. If enabled, each kernel matching the current kernel filter (if any) will be profiled using the current section configuration. This is especially useful if an application is to be profiled unattended, or the number of kernel launches to be profiled is very large. Sections can be enabled or disabled using the Metric Selection tool window. Profile Series allows to configure the collection of a set of profile results at once. Each result in the set is profiled with varying parameters. Series are useful to investigate the behavior of a kernel across a large set of parameters without the need to recompile and rerun the application many times. For a detailed description of the options available in this activity, see Interactive Profile Activity .", "keywords": []}, {"id": 70, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#introduction", "display_name": "Introduction", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "introduction", "priority": -1, "content": "For users migrating from Visual Profiler to NVIDIA Nsight Compute, please see the Visual Profiler Transition Guide for comparison of features and workflows.", "keywords": []}, {"id": 71, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#launch-details", "display_name": "Launch Details", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "launch-details", "priority": -1, "content": "The Launch Details tool window can be opened using the Launch Details entry in the Profile menu or the respective tool bar button. When a result containing multiple sub-launches is selected and this tool window is open, it will display information about each sub-launch contained in the result. This tool window is split into two sections: a header displaying information applying to the result as a whole a body displaying information specific to the viewed sub-launch", "keywords": []}, {"id": 72, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#limitations", "display_name": "Limitations", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "limitations", "priority": -1, "content": "Range Replay When using Range Replay mode, instruction-level source metrics are not available. Graph Profiling When profiling complete CUDA graphs, instruction-level source metrics are not available.", "keywords": []}, {"id": 73, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#main-menu-and-toolbar", "display_name": "Main Menu and Toolbar", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "main-menu-and-toolbar", "priority": -1, "content": "Information on the main menu and toolbar.", "keywords": []}, {"id": 74, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#main-toolbar", "display_name": "Main Toolbar", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "main-toolbar", "priority": -1, "content": "The main toolbar shows commonly used operations from the main menu. See Main Menu for their description.", "keywords": []}, {"id": 75, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#memory-allocations", "display_name": "Memory Allocations", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "memory-allocations", "priority": -1, "content": "When using the asynchronous malloc/free APIs, the resource view for Memory Allocation will also include the memory objects created in this manner. These memory objects have a non-zero memory pool handle. The Mode column will indicate which code path was taken during the allocation of the corresponding object. The modes are: REUSE_STREAM_SUBPOOL: The memory object was allocated in memory that was previously freed. The memory was backed by the memory pool set as current for the stream on which the allocation was made. USE_EXISTING_POOL_MEMORY: The memory object was allocated in memory that was previously freed. The memory is backed by the default memory pool of the stream on which the allocation was made. REUSE_EVENT_DEPENDENCIES: The memory object was allocated in memory that was previously freed in another stream of the same context. A stream ordering dependency of the allocating stream on the free action existed. Cuda events and null stream interactions can create the required stream ordered dependencies. REUSE_OPPORTUNISTIC: The memory object was allocated in memory that was previously freed in another stream of the same context. However, no dependency between the free and allocation existed. This mode requires that the free be already committed at the time the allocation is requested. Changes in execution behavior might result in different modes for multiple runs of the application. REUSE_INTERNAL_DEPENDENCIES: The memory object was allocated in memory that was previously freed in another stream of the same context. New internal stream dependencies may have been added in order to establish the stream ordering required to reuse a piece of memory previously released. REQUEST_NEW_ALLOCATION: New memory had to be allocated for this memory object as no viable reusable pool memory was found. The allocation performance is comparable to using the non-asynchronous malloc/free APIs.", "keywords": []}, {"id": 76, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#metric-details", "display_name": "Metric Details", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "metric-details", "priority": -1, "content": "The Metric Details tool window can be opened using the Metric Details entry in the Profile menu or the respective tool bar button. When a report and the tool window are open, a metric can be selected in the report to display additional information in the tool window. It also contains a search bar to look up metrics in the focused report. Report metrics can be selected in the Details Page or the Raw Page . The window will show basic information (name, unit and raw value of the metric) as well as additional information, such as its extended description. The search bar can be used to open metrics in the focused report. It shows available matches as you type. The entered string must match from the start of the metric name. By default, selecting or searching for a new metric updates the current Default Tab . You can click the Pin Tab button to create a copy of the default tab, unless the same metric is already pinned. This makes it possible to save multiple tabs and quickly switch between them to compare values. Some metrics contain Instance Values . When available, they are listed in the tool window. Instance values can have a Correlation ID that allows correlating the individual value with its associated entity, e.g. a function address or instruction name. For metrics collected with PM sampling , the correlation ID is the GPU timestamp in nanoseconds. It is shown as an absolute value and relative to the first timestamp for this metric.", "keywords": []}, {"id": 77, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#metric-selection", "display_name": "Metric Selection", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "metric-selection", "priority": -1, "content": "The Metric Selection window can be opened from the main menu using Profile &gt; Metric Selection . It tracks all metric sets, sections and rules currently loaded in NVIDIA Nsight Compute, independent from a specific connection or report. The directory to load those files from can be configured in the Profile options dialog. It is used to inspect available sets, sections and rules, as well as to configure which should be collected, and which rules should be applied. You can also specify a comma separated list of individual metrics, that should be collected. The window has two views, which can be selected using the dropdown in its header. The Metric Sets view shows all available metric sets. Each set is associated with a number of metrics sections. You can choose a set appropriate to the level of detail for which you want to collect performance metrics. Sets which collect more detailed information normally incur higher runtime overhead during profiling. When enabling a set in this view, the associated metric sections are enabled in the Metric Sections/Rules view. When disabling a set in this view, the associated sections in the Metric Sections/Rules view are disabled. If no set is enabled, or if sections are manually enabled/disabled in the Metric Sections/Rules view, the &lt; custom &gt; entry is marked active to represent that no section set is currently enabled. Note that the basic set is enabled by default. Whenever a kernel is profiled manually, or when auto-profiling is enabled, only sections enabled in the Metric Sections/Rules view and individual metrics specified in input box are collected. Similarly, whenever rules are applied, only rules enabled in this view are active. The enabled states of sections and rules are persisted across NVIDIA Nsight Compute launches. The Reload button reloads all sections and rules from disk again. If a new section or rule is found, it will be enabled if possible. If any errors occur while loading a rule, they will be listed in an extra entry with a warning icon and a description of the error. Use the Enable All and Disable All checkboxes to enable or disable all sections and rules at once. The Filter text box can be used to filter what is currently shown in the view. It does not alter activation of any entry. The table shows sections and rules with their activation status, their relationship and further parameters, such as associated metrics or the original file on disk. Rules associated with a section are shown as children of their section entry. Rules independent of any section are shown under an additional Independent Rules entry. Double-clicking an entry in the table\u2019s Filename column opens this file as a document. It can be edited and saved directly in NVIDIA Nsight Compute. After editing the file, Reload must be selected to apply those changes. When a section or rule file is modified, the entry in the State column will show User Modified to reflect that it has been modified from its default state. When a User Modified row is selected, the Restore button will be enabled. Clicking the Restore button will restore the entry to its default state and automatically Reload the sections and rules. Similarly, when a stock section or rule file is removed from the configured Sections Directory (specified in the Profile options dialog), the State column will show User Deleted . User Deleted files can also be restored using the Restore button. Section and rule files that are created by the user (and not shipped with NVIDIA Nsight Compute) will show up as User Created in the state column . See the Sections and Rules for the list of default sections for NVIDIA Nsight Compute.", "keywords": []}, {"id": 78, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#metrics", "display_name": "Metrics", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "metrics", "priority": -1, "content": "Metrics Correlation The page is most useful when inspecting performance information and metrics correlated with your code. Metrics are shown in columns, which can be enabled or disabled using the Column Chooser accessible using the column header right click menu. To not move out of view when scrolling horizontally, columns can be fixed. By default, the Source column is fixed to the left, enabling easy inspection of all metrics correlated to a source line. To change fixing of columns, right click the column header and select Freeze or Unfreeze , respectively. The heatmap on the right-hand side of each view can be used to quickly identify locations with high metric values of the currently selected metric in the dropdown. The heatmap uses a black-body radiation color scale where black denotes the lowest mapped value and white the highest, respectively. The current scale is shown when clicking and holding the heatmap with the right mouse button. By default, applicable metrics are shown as percentage values relative to their sum across the launch. A bar is filling from left to right to indicate the value at a specific source location relative to this metric\u2019s maximum within the launch. The [%] and [+-] buttons can be used to switch the display from relative to absolute and from abbreviated absolute to full-precision absolute, respectively. For relative values and bars, the [circle/pie] button can be used to switch the display between relative to global (launch) and relative to local (function/file) scope. This button is disabled when the view is collapsed, as percentages are always relative to the global launch scope in this case. Pre-Defined Source Metrics Live Registers Number of registers that need to be kept valid by the compiler. A high value indicates that many registers are required at this code location, potentially increasing the register pressure and the maximum number of register required by the kernel. The total number of registers reported as launch__registers_per_thread may be significantly higher than the maximum live registers. The compiler may need to allocate specific registers that can creates holes in the allocation, thereby affecting launch__registers_per_thread , even if the maximum live registers is smaller. This may happen due to ABI restrictions, or restrictions enforced by particular hardware instructions. The compiler may not have a complete picture of which registers may be used in either callee or caller and has to obey ABI conventions, thereby allocating different registers even if some register could have theoretically been re-used. Warp Stall Sampling (All Samples)  1 The number of samples from the Statistical Sampler at this program location. Warp Stall Sampling (Not-issued Samples)  2 The number of samples from the Statistical Sampler at this program location on cycles the warp scheduler issued no instructions. Note that (Not Issued) samples may be taken on a different profiling pass than (All) samples mentioned above, so their values do not strictly correlate. This metric is only available on devices with compute capability 7.0 or higher. Instructions Executed Number of times the source (instruction) was executed per individual warp, independent of the number of participating threads within each warp. Thread Instructions Executed Number of times the source (instruction) was executed by any thread, regardless of predicate presence or evaluation. Predicated-On Thread Instructions Executed Number of times the source (instruction) was executed by any active, predicated-on thread. For instructions that are executed unconditionally (i.e. without predicate), this is the number of active threads in the warp, multiplied with the respective Instructions Executed value. Avg. Threads Executed Average number of thread-level executed instructions per warp, regardless of their predicate. Avg. Predicated-On Threads Executed Average number of predicated-on thread-level executed instructions per warp. Divergent Branches Number of divergent branch targets, including fallthrough. Incremented only when there are two or more active threads with divergent targets. Divergent branches can lead to warp stalls due to resolving the branch or instruction cache misses. Information on Memory Operations Label Name Description Address Space memory_type The accessed address space (global/local/shared). Access Operation memory_access_type The type of memory access (e.g. load or store). Access Size memory_access_size_type The size of the memory access, in bits. L1 Tag Requests Global memory_l1_tag_requests_global Number of L1 tag requests generated by global memory instructions. L1 Conflicts Shared N-Way derived__memory_l1_conflicts_shared_nway Average N-way conflict in L1 per shared memory instruction. A 1-way access has no conflicts and resolves in a single pass. Note: This is a derived metric which can not be collected directly. L1 Wavefronts Shared Excessive derived__memory_l1_wavefronts_shared_excessive Excessive number of wavefronts in L1 from shared memory instructions, because not all not predicated-off threads performed the operation. Note: This is a derived metric which can not be collected directly. L1 Wavefronts Shared memory_l1_wavefronts_shared Number of wavefronts in L1 from shared memory instructions. L1 Wavefronts Shared Ideal memory_l1_wavefronts_shared_ideal Ideal number of wavefronts in L1 from shared memory instructions, assuming each not predicated-off thread performed the operation. L2 Theoretical Sectors Global Excessive derived__memory_l2_theoretical_sectors_global_excessive Excessive theoretical number of sectors requested in L2 from global memory instructions, because not all not predicated-off threads performed the operation. Note: This is a derived metric which can not be collected directly. L2 Theoretical Sectors Global memory_l2_theoretical_sectors_global Theoretical number of sectors requested in L2 from global memory instructions. L2 Theoretical Sectors Global Ideal memory_l2_theoretical_sectors_global_ideal Ideal number of sectors requested in L2 from global memory instructions, assuming each not predicated-off thread performed the operation. L2 Theoretical Sectors Local memory_l2_theoretical_sectors_local Theoretical number of sectors requested in L2 from local memory instructions. All L1/L2 Sectors/Wavefronts/Requests metrics give the number of achieved (actually required), ideal, and excessive (achieved - ideal) sectors/wavefronts/requests. Ideal metrics indicate the number that would needed, given each not predicated-off thread performed the operation of given width. Excessive metrics indicate the required surplus over the ideal case. Reducing divergence between threads can reduce the excess amount and result in less work for the respective HW units. Several of the above metrics on memory operations were renamed in version 2021.2 as follows: Old name New name memory_l2_sectors_global memory_l2_theoretical_sectors_global memory_l2_sectors_global_ideal memory_l2_theoretical_sectors_global_ideal memory_l2_sectors_local memory_l2_theoretical_sectors_local memory_l1_sectors_global memory_l1_tag_requests_global memory_l1_sectors_shared memory_l1_wavefronts_shared memory_l1_sectors_shared_ideal memory_l1_wavefronts_shared_ideal L2 Explicit Evict Policy Metrics Starting with the NVIDIA Ampere architecture the eviction policy of the L2 cache can be tuned to match the kernel\u2019s access pattern. The eviction policy can be either set implicitly for a memory window (for more details see CUaccessProperty ) or set explicitly per executed memory instruction. If set explicitly, the desired eviction behavior for the cases of an L2 cache hit or miss are passed as input to the instruction. For more details refer to CUDA\u2019s Cache Eviction Priority Hints . Label Name Description L2 Explicit Evict Policies smsp__inst_executed_memdesc_explicit_evict_type Comma separated list of configured explicit eviction policies. As the policies can be set dynamically at runtime, this list includes all policies that were part of any executed instruction. L2 Explicit Hit Policy Evict First smsp__inst_executed_memdesc_explicit_hitprop_evict_first Number of times a memory instruction was executed by any warp which had the evict_first policy set in case the access leads to a cache hit in L2. Data cached with this policy will be first in the eviction priority order and will likely be evicted when cache eviction is required. This policy is suitable for streaming data. L2 Explicit Hit Policy Evict Last smsp__inst_executed_memdesc_explicit_hitprop_evict_last Number of times a memory instruction was executed by any warp which had the evict_last policy set in case the access leads to a cache hit in L2. Data cached with this policy will be last in the eviction priority order and will likely be evicted only after other data with evict_normal or evict_first eviction policy is already evicted. This policy is suitable for data that should remain persistent in cache. L2 Explicit Hit Policy Evict Normal smsp__inst_executed_memdesc_explicit_hitprop_evict_normal Number of times a memory instruction was executed by any warp which had the evict_normal (default) policy set in case the access leads to a cache hit in L2. L2 Explicit Hit Policy Evict Normal Demote smsp__inst_executed_memdesc_explicit_hitprop_evict_normal_demote Number of times a memory instruction was executed by any warp which had the evict_normal_demote policy set in case the access leads to a cache hit in L2. L2 Explicit Miss Policy Evict First smsp__inst_executed_memdesc_explicit_missprop_evict_first Number of times a memory instruction was executed by any warp which had the evict_first policy set in case the access leads to a cache miss in L2. Data cached with this policy will be first in the eviction priority order and will likely be evicted cache eviction is required. This policy is suitable for streaming data. L2 Explicit Miss Policy Evict Normal smsp__inst_executed_memdesc_explicit_missprop_evict_normal Number of times a memory instruction was executed by any warp which had the evict_normal (default) policy set in case the access leads to a cache miss in L2. Individual Warp Stall Sampling Metrics All stall_* metrics show the information combined in Warp Stall Sampling individually. See Statistical Sampler for their descriptions. See the Customization Guide on how to add additional metrics for this view and the Metrics Reference for further information on available metrics. Register Dependencies Dependencies between registers are displayed in the SASS view. When a register is read, all the potential addresses where it could have been written are found. The links between these lines are drawn in the view. All dependencies for registers, predicates, uniform registers and uniform predicates are shown in their respective columns. The picture above shows some dependencies for a simple CUDA kernel. On the first row, which is line 9 of the SASS code, we can see writes on registers R2 and R3, represented by filled triangles pointing to the left . These registers are then read on lines 17, 20 and 23, and this is represented by regular triangles pointing to the right . There are also some lines where both types of triangles are on the same line, which means that a read and a write occured for the same register. Dependencies across source files and functions are not tracked. The Register Dependencies Tracking feature is enabled by default, but can be disabled completely in Tools &gt; Options &gt; Profile &gt; Report Source Page &gt; Enable Register Dependencies . 1 This metric was previously called Sampling Data (All). 2 This metric was previously called Sampling Data (Not Issued).", "keywords": []}, {"id": 79, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#metrics-and-units", "display_name": "Metrics and Units", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "metrics-and-units", "priority": -1, "content": "Numeric metric values are shown in various places in the report, including the header and tables and charts on most pages. NVIDIA Nsight Compute supports various ways to display those metrics and their values. When available and applicable to the UI component, metrics are shown along with their unit. This is to make it apparent if a metric represents cycles, threads, bytes/s, and so on. The unit will normally be shown in rectangular brackets, e.g. Metric Name [bytes] 128 . By default, units are scaled automatically so that metric values are shown with a reasonable order of magnitude. Units are scaled using their SI-factors, i.e. byte-based units are scaled using a factor of 1000 and the prefixes K, M, G, etc. Time-based units are also scaled using a factor of 1000, with the prefixes n, u and m. This scaling can be disabled in the Profile options. Metrics which could not be collected are shown as n/a and assigned a warning icon. If the metric floating point value is out of the regular range (i.e. nan (Not a number) or inf (infinite)), they are also assigned a warning icon. The exception are metrics for which these values are expected and which are allow-listed internally.", "keywords": []}, {"id": 80, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#navigate-the-report", "display_name": "Navigate the Report", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "navigate-the-report", "priority": -1, "content": "Navigate the report The profile report comes up by default on the Details page. You can switch between different Report Pages of the report with the dropdown labeled Page on the top-left of the report. You can also use Ctrl + Shift + N and Ctrl + Shift + P shortcut keys or corresponding toolbar button to navigate next and previous pages, respectively. A report can contain any number of results from kernel launches. The Result dropdown allows switching between the different results in a report. Diffing multiple results On the Details page, press the button Add Baseline in order for the current result to become the baseline all other results from this report and any other report opened in the same instance of NVIDIA Nsight Compute get compared to. If a baseline is set, every element on the Details page shows two values: The current value of the result in focus and the corresponding value of the baseline or the percentage of change from the corresponding baseline value. Use the Clear Baselines entry from the dropdown button, the Profile menu or the corresponding toolbar button to remove all baselines. For more information see Baselines . Executing rules On the Details page some sections may provide rules. Press the Apply button to execute an individual rule. The Apply Rules button on the top executes all available rules for the current result in focus. Rules can be user-defined too. For more information see the Customization Guide .", "keywords": []}, {"id": 81, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#navigation", "display_name": "Navigation", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "navigation", "priority": -1, "content": "The View dropdown can be used to select different code (correlation) options: SASS, PTX and Source (CUDA-C, Fortran, Python, \u2026). In side-by-side views, when selecting a line in the left-hand- or right-hand-side, any correlated lines in the opposite view are highlighted. However, when the Show Single File For Multi-File Sources option is set to Yes , the target file or source object must already be selected in the respective view for those correlated lines to be shown. The Source drop down allows you to switch between the files or functions that provide the content in the view. When a different source entry is selected, the view scrolls to the start of this file or function. If a view contains multiple source files or functions, [+] and [-] buttons are shown. These can be used to expand or collapse the view, thereby showing or hiding the file or function content except for its header. If collapsed, all metrics are shown aggregated to provide a quick overview. You can use the Find (source code) line edit to search the Source column of each view. Enter the text to search and use the associated buttons to find the next or previous occurrence in this column. While the line edit is selected, you can also use the Enter or Shift*+*Enter keys to search for the next or previous occurrence, respectively. The SASS view is filtered to only show functions that were executed in the launch. You can toggle the Show Only Executed Functions option to change this, but performance of this page may be negatively affected for large binaries. It is possible that some SASS instructions are shown as N/A . Those instructions are not currently exposed publicly. Only filenames are shown in the view, together with a File Not Found error, if the source files cannot be found in their original location. This can occur, for example, if the report was moved to a different system. Select a filename and click the Resolve button above to specify where this source can be found on the local filesystem. However, the view always shows the source files if the import source option was selected during profiling, and the files were available at that time. If a file is found in its original or any source lookup location, but its attributes don\u2019t match, a File Mismatch error is shown. See the Source Lookup options for changing file lookup behavior. If the report was collected using remote profiling, and automatic resolution of remote files is enabled in the Profile options, NVIDIA Nsight Compute will attempt to load the source from the remote target. If the connection credentials are not yet available in the current NVIDIA Nsight Compute instance, they are prompted in a dialog. Loading from a remote target is currently only available for Linux x86_64 targets and Linux and Windows hosts.", "keywords": []}, {"id": 82, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#non-interactive-profile-activity", "display_name": "Non-Interactive Profile Activity", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "non-interactive-profile-activity", "priority": -1, "content": "Launch the target application from NVIDIA Nsight Compute When starting NVIDIA Nsight Compute, the Welcome Page will appear. Click on Quick Launch to open the Connection dialog. If the Connection dialog doesn\u2019t appear, you can open it using the Connect button from the main toolbar, as long as you are not currently connected. Select your target platform on the left-hand side and your localhost from the Connection drop down. Then, fill in the launch details. In the Activity panel, select the Profile activity to initiate a session that pre-configures the profile session and launches the command line profiler to collect the data. Provide the Output File name to enable starting the session with the Launch button. Additional Launch Options For more details on these options, see Command Line Options . The options are grouped into tabs: The Filter tab exposes the options to specify which kernels should be profiled. Options include the kernel regex filter, the number of launches to skip, and the total number of launches to profile. The Sections tab allows you to select which sections should be collected for each kernel launch. Hover over a section to see its description as a tool-tip. To change the sections that are enabled by default, use the Metric Selection tool window. The Sampling tab allows you to configure sampling options for each kernel launch. The Other tab includes the option to collect NVTX information or custom metrics via the --metrics option. For a detailed description of the options available in this activity, see Profile Activity .", "keywords": []}, {"id": 83, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#nvtx", "display_name": "NVTX", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "nvtx", "priority": -1, "content": "The NVTX window is available when NVIDIA Nsight Compute is connected to a target application. If closed, it can be re-opened using Debug &gt; NVTX from the main menu. Whenever the target application is suspended, the window shows the state of all active NVTX domains and ranges in the currently selected thread. Note that NVTX information is only tracked if the launching command line profiler instance was started with --nvtx or NVTX was enabled in the NVIDIA Nsight Compute launch dialog. Use the Current Thread dropdown in the API Stream window to change the currently selected thread. NVIDIA Nsight Compute supports NVTX named resources, such as threads, CUDA devices, CUDA contexts, etc. If a resource is named using NVTX, the appropriate UI elements will be updated.", "keywords": []}, {"id": 84, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#occupancy-calculator", "display_name": "Occupancy Calculator", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "occupancy-calculator", "priority": -1, "content": "NVIDIA Nsight Compute provides an Occupancy Calculator that allows you to compute the multiprocessor occupancy of a GPU for a given CUDA kernel. It offers feature parity to the CUDA Occupancy Calculator spreadsheet . The Occupancy Calculator can be opened directly from a profile report or as a new activity. The occupancy calculator data can be saved to a file using File &gt; Save . By default, the file uses the .ncu-occ extension. The occupancy calculator file can be opened using File &gt; Open File Launching from the Connection Dialog Select the Occupancy Calculator activity from the connection dialog. You can optionally specify an occupancy calculator data file, which is used to initialize the calculator with the data from the saved file. Click the Launch button to open the Occupancy Calculator. Launching from the Profiler Report The Occupancy Calculator can be opened from the Profiler Report using the calculator button located in the report header or in the header of the Occupancy section on the Detail Page . Details page header Occupancy section header The user interface consists of an input section as well as tables and graphs that display information about GPU occupancy. To use the calculator, change the input values in the input section, click the Apply button and examine the tables and graphs.", "keywords": []}, {"id": 85, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#options", "display_name": "Options", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "options", "priority": -1, "content": "NVIDIA Nsight Compute options can be accessed via the main menu under Tools &gt; Options . All options are persisted on disk and available the next time NVIDIA Nsight Compute is launched. When an option is changed from its default setting, its label will become bold. You can use the Restore Defaults button to restore all options to their default values. Profile options", "keywords": []}, {"id": 86, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#overview", "display_name": "Overview", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "overview", "priority": -1, "content": "This document is a user guide to the next-generation NVIDIA Nsight Compute profiling tools. NVIDIA Nsight Compute is an interactive kernel profiler for CUDA applications. It provides detailed performance metrics and API debugging via a user interface and command line tool. In addition, its baseline feature allows users to compare results within the tool. NVIDIA Nsight Compute provides a customizable and data-driven user interface and metric collection and can be extended with analysis scripts for post-processing results. Important Features Interactive kernel profiler and API debugger Graphical profile report Result comparison across one or multiple reports within the tool Fast Data Collection UI and Command Line interface Fully customizable reports and analysis rules", "keywords": []}, {"id": 87, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#profile", "display_name": "Profile", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "profile", "priority": -1, "content": "Name Description Values Sections Directory Directory from which to import section files and rules. Relative paths are with respect to the NVIDIA Nsight Compute installation directory. Include Sub-Directories Recursively include section files and rules from sub-directories. Yes (Default)/No Apply Applicable Rules Automatically Automatically apply active and applicable rules. Yes (Default)/No Reload Rules Before Applying Force a rule reload before applying the rule to ensure changes in the rule script are recognized. Yes/No (Default) Default Report Page The report page to show when a report is generated or opened. Auto lets the tool decide the best page to show when opening a report. Session Summary Details Source Comments Call Stack/NVTX Raw Auto (default) Function Name Mode Determines how function/kernel names are shown. Auto (default): each component uses its preferred mode Demangled: kernel names are shown demangled with all parameters Function: kernel names are shown with their demangled function name without parameters Mangled: kernel names are shown with their mangled name, if applicable NVTX Rename Mode Determines how NVTX information is used for renaming. Range replay results are always renamed when possible. None: no renaming Kernel: kernel names are renamed using the most recent enclosing push/pop range Resources (default): resources like CPU threads or CUDA contexts and streams are renamed All: Kernel and Resources Maximum Baseline Name Length The maximum length of baseline names. 1..N (Default: 40) Number of Full Baselines to Display Number of baselines to display in the report header with all details in addition to the current result. 0..N (Default: 2) Auto-Convert Metric Units Auto-adjust displayed metric units and values (e.g. Bytes to KBytes). Yes (Default)/No Show Instanced Metric Values Show the individual values of instanced metrics in tables. Yes/No (Default) Show Metrics As Floating Point Show all numeric metrics as floating-point numbers. Yes/No (Default) Show Knowledge Base Information Show information from the knowledge base in (metric) tooltips to explain terminology. Note: Nsight Compute needs to be restarted for this option to take effect. Yes (Default)/No Metrics/Properties List of metrics and properties to show on the summary page. Comma-separated list of metric entries. Each entry has the format {Label:MetricName}. Delay Load \u2018Source\u2019 Page Delays loading the content of the report page until the page becomes visible. Avoids processing costs and memory overhead until the report page is opened. Yes/No (Default) Show Single File For Multi-File Sources Shows a single file in each Source page view, even for multi-file sources. Yes/No (Default) Show Only Executed Functions Shows only executed functions in the source page views. Disabling this can impact performance. Yes (Default)/No Auto-Resolve Remote Source Files Automatically try to resolve remote source files on the source page (e.g. via SSH) if the connection is still registered. Yes/No (Default) Enable Register Dependencies Track dependencies between SASS registers/predicates and display them in the SASS view. Yes (Default)/No Kernel Analysis Size Threshold (KB) Enable SASS flow graph analysis for functions below this threshold. SASS analysis is required for Live Register and Register Dependency information. Set to -1 to enable analysis for all functions. -1..N (Default: 1024) Enable ELF Verification Enable ELF (cubin) verification to run every time before SASS analysis. This should only be enabled when working with applications compiled before CUDA 11.0 or when encountering source page issues. Yes/No (Default) API Call History Number of recent API calls shown in API Stream View. 1..N (Default: 100)", "keywords": []}, {"id": 88, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#profile-activity", "display_name": "Profile Activity", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "profile-activity", "priority": -1, "content": "The Profile activity provides a traditional, pre-configurable profiler. After configuring which kernels to profile, which metrics to collect, etc, the application is run under the profiler without interactive control. The activity completes once the application terminates. For applications that normally do not terminate on their own, e.g. interactive user interfaces, you can cancel the activity once all expected kernels are profiled. This activity does not support attaching to processes previously launched via NVIDIA Nsight Compute. These processes will be shown grayed out in the Attach tab. Output File Path to report file where the collected profile should be stored. If not present, the report extension .ncu-rep is added automatically. The placeholder %i is supported for the filename component. It is replaced by a sequentially increasing number to create a unique filename. This maps to the --export command line option. Force Overwrite If set, existing report file are overwritten. This maps to the --force-overwrite command line option. Target Processes Select the processes you want to profile. In mode Application Only , only the root application process is profiled. In mode all , the root application process and all its child processes are profiled. This maps to the --target-processes command line option. Replay Mode Select the method for replaying kernel launches multiple times. In mode Kernel , individual kernel launches are replayed transparently during the single execution of the target application. In mode Application , the entire target application is relaunched multiple times. In each iteration, additional data for the target kernel launches is collected. Application replay requires the program execution to be deterministic. This maps to the --replay-mode command line option. See the Kernel Profiling Guide for more details on the replay modes. Graph Profiling Set if CUDA graphs should be profiled as individual Nodes or as complete Graphs . Additional Options All remaining options map to their command line profiler equivalents. See the Command Line Options for details.", "keywords": []}, {"id": 89, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#profiler-report", "display_name": "Profiler Report", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "profiler-report", "priority": -1, "content": "The profiler report contains all the information collected during profiling for each kernel launch. In the user interface, it consists of a header with general information, as well as controls to switch between report pages or individual collected launches.", "keywords": []}, {"id": 90, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#profiler-report-header", "display_name": "Header", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "profiler-report-header", "priority": -1, "content": "The Page dropdown can be used to switch between the available report pages, which are explained in detail in the next section . The Result dropdown can be used to switch between all collected kernel launches. The information displayed in each page commonly represents the selected launch instance. On some pages (e.g. Raw ), information for all launches is shown and the selected instance is highlighted. You can type in this dropdown to quickly filter and find a kernel launch. The Apply Filters button opens the filter dialog. You can use more than one filter to narrow down your results. On the filter dialog, enter your filter parameters and press OK button. The Launch dropdown, Summary Page table, and Raw Page table will be filtered accordingly. Select the arrow dropdown to access the Clear Filters button, which removes all filters. Filter Dialog The Add Baseline button promotes the current result in focus to become the baseline of all other results from this report and any other report opened in the same instance of NVIDIA Nsight Compute. Select the arrow dropdown to access the Clear Baselines button, which removes all currently active baselines. The Apply Rules button applies all rules available for this report. If rules had been applied previously, those results will be replaced. By default, rules are applied immediately once the kernel launch has been profiled. This can be changed in the options under Tools &gt; Options &gt; Profile &gt; Report UI &gt; Apply Applicable Rules Automatically . The Occupancy Calculator button navigates to the Occupancy Calculator which is opened in a new document. The Source Comparison button navigates to the Source Comparison document in case at least two profile results are available for comparison. A button on the right-hand side offers multiple operations that may be performed on the page. Available operations include: Copy as Image - Copies the contents of the page to the clipboard as an image. Save as Image - Saves the contents of the page to a file as an image. Save as PDF - Saves the contents of the page to a file as a PDF. Export to CSV - Exports the contents of page to CSV format. Reset to Default - Resets the page to a default state by removing any persisted settings. Note that not all functions are available on all pages. Below the buttons described above, a table is shown with information about the selected profile result (as Current ) and potentially additional baselines. For many values in this table tooltips provide additional information or data, e.g., the tooltip of the column Attributes provides additional information about the context type and resources used for the launch. The [+] and [-] buttons can be used to show or hide the section body content. The visibility of the output of the rules can be toggled with the R button. The info toggle button i changes the section description\u2019s visibility.", "keywords": []}, {"id": 91, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#profiles", "display_name": "Profiles", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "profiles", "priority": -1, "content": "The icon next to the View dropdown can be used to manage Source View Profiles . This button opens a dialog that shows you the list of saved source view profiles. Such profiles can be created using the Create button in the dialog. Profiles let you store the column properties of all views in the report to a file. Such properties include column visibility, freeze state, width, order and the selected navigation metric. A saved profile can be applied to any opened report using the Apply button. This updates the column properties mentioned above from the selected profile in all views. Profiles are useful for configuring views to your preferences, or for a certain use case. Start by choosing metric columns from the Column Chooser . Next, configure other properties like freezing column, changing width or order and setting a heatmap metric in the Navigation dropdown before creating the profile. Once a profile is created, you can always use this profile on any opened report to hide all non-required columns or to restore your configured properties. Simply select the profile from the source view profiles dialog and click the Apply button. Note that the column properties are stored separately for each View in the profile and when applied, only those views will be updated which are present in the selected profile. You will not see the metric columns that are not available in your report even if those were configured to be visible in the source profile you have applied.", "keywords": []}, {"id": 92, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#project-dialogs", "display_name": "Project Dialogs", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "project-dialogs", "priority": -1, "content": "New Project Creates a new project. The project must be given a name, which will also be used for the project file. You can select the location where the project file should be saved on disk. Select whether a new directory with the project name should be created in that location.", "keywords": []}, {"id": 93, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#project-explorer", "display_name": "Project Explorer", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "project-explorer", "priority": -1, "content": "The Project Explorer window allows you to inspect and manage the current project. It shows the project name as well as all Items (profile reports and other files) associated with it. Right-click on any entry to see further actions, such as adding, removing or grouping items. Type in the Search project toolbar at the top to filter the currently shown entries. Project Explorer", "keywords": []}, {"id": 94, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#projects", "display_name": "Projects", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "projects", "priority": -1, "content": "NVIDIA Nsight Compute uses Project Files to group and organize profiling reports. At any given time, only one project can be open in NVIDIA Nsight Compute. Collected reports are automatically assigned to the current project. Reports stored on disk can be assigned to a project at any time. In addition to profiling reports, related files such as notes or source code can be associated with the project for future reference. Note that only references to reports or other files are saved in the project file. Those references can become invalid, for example when associated files are deleted, removed or not available on the current system, in case the project file was moved itself. NVIDIA Nsight Compute uses the ncu-proj file extension for project files. When no custom project is current, a default project is used to store e.g. the current Connection Dialog entries. To remove all information from the default project, you must close NVIDIA Nsight Compute and then delete the file from disk. On Windows, the file is located at &lt;USER&gt;\\AppData\\Local\\NVIDIA Corporation\\NVIDIA Nsight Compute\\ On Linux, the file is located at &lt;USER&gt;/.local/share/NVIDIA Corporation/NVIDIA Nsight Compute/ On MacOSX, the file is located at &lt;USER&gt;/Library/Application Support/NVIDIA Corporation/NVIDIA Nsight Compute/", "keywords": []}, {"id": 95, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#quickstart", "display_name": "Quickstart", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "quickstart", "priority": -1, "content": "The following sections provide brief step-by-step guides of how to setup and run NVIDIA Nsight Compute to collect profile information. All directories are relative to the base directory of NVIDIA Nsight Compute, unless specified otherwise. The UI executable is called ncu-ui. A shortcut with this name is located in the base directory of the NVIDIA Nsight Compute installation. The actual executable is located in the folder host\\windows-desktop-win7-x64 on Windows or host/linux-desktop-glibc_2_11_3-x64 on Linux. By default, when installing from a Linux .run file, NVIDIA Nsight Compute is located in /usr/local/cuda-&lt;cuda-version&gt;/nsight-compute-&lt;version&gt; . When installing from a .deb or .rpm package, it is located in /opt/nvidia/nsight-compute/&lt;version&gt; to be consistent with Nsight Systems . In Windows, the default path is C:\\Program Files\\NVIDIA Corporation\\Nsight Compute &lt;version&gt; . After starting NVIDIA Nsight Compute, by default the Welcome Page is opened. The Start section allows the user to start a new activity, open an existing report, create a new project or load an existing project. The Continue section provides links to recently opened reports and projects. The Explore section provides information about what is new in the latest release, as well as links to additional training. See Environment on how to change the start-up action. Welcome Page", "keywords": []}, {"id": 96, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#raw-page", "display_name": "Raw Page", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "raw-page", "priority": -1, "content": "The Raw page shows a list of all collected metrics with their units per profiled kernel launch. It can be exported, for example, to CSV format for further analysis. The page features a filter edit to quickly find specific metrics. You can transpose the table of kernels and metrics by using the Transpose button. If a metric has multiple instance values, the number of instances is shown after the standard value. This metric for example has ten instance values: 35.48 {10} . You can select in the Profile options dialog that all instance values should be shown individually or inspect the metric result in the Metric Details tool window.", "keywords": []}, {"id": 97, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#remote-connections", "display_name": "Remote Connections", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "remote-connections", "priority": -1, "content": "Remote devices that support SSH can also be configured as a target in the Connection Dialog . To configure a remote device, ensure an SSH-capable Target Platform is selected, then press the + button. The following configuration dialog will be presented. NVIDIA Nsight Compute supports both password and private key authentication methods. In this dialog, select the authentication method and enter the following information: Password IP/Host Name: The IP address or host name of the target device. User Name: The user name to be used for the SSH connection. Password: The user password to be used for the SSH connection. Port: The port to be used for the SSH connection. (The default value is 22) Deployment Directory: The directory to use on the target device to deploy supporting files. The specified user must have write permissions to this location. Connection Name: The name of the remote connection that will show up in the Connection Dialog . If not set, it will default to &lt;User&gt;@&lt;Host&gt;:&lt;Port&gt;. Private Key IP/Host Name: The IP address or host name of the target device. User Name: The user name to be used for the SSH connection. SSH Private Key: The private key that is used to authenticate to SSH server. SSH Key Passphrase: The passphrase for your private key. Port: The port to be used for the SSH connection. (The default value is 22) Deployment Directory: The directory to use on the target device to deploy supporting files. The specified user must have write permissions to this location. Connection Name: The name of the remote connection that will show up in the Connection Dialog . If not set, it will default to &lt;User&gt;@&lt;Host&gt;:&lt;Port&gt;. In addition to keyfiles specified by path and plain password authentication, NVIDIA Nsight Compute supports keyboard-interactive authentication, standard keyfile path searching and SSH agents. When all information is entered, click the Add button to make use of this new connection. When a remote connection is selected in the Connection Dialog , the Application Executable file browser will browse the remote file system using the configured SSH connection, allowing the user to select the target application on the remote device. When an activity is launched on a remote device, the following steps are taken: The command line profiler and supporting files are copied into the Deployment Directory on the the remote device. (Only files that do not exist or are out of date are copied.) Communication channels are opened to prepare for the traffic between the UI and the Application Executable . For Interactive Profile activities, a SOCKS proxy is started on the host machine. For Non-Interactive Profile activities, a remote forwarding channel is opened on the target machine to tunnel profiling information back to the host. The Application Executable is executed on the remote device. For Interactive Profile activities, a connection is established to the remote application and the profiling session begins. For Non-Interactive Profile activities, the remote application is executed under the command line profiler and the specified report file is generated. For non-interactive profiling activities, the generated report file is copied back to the host, and opened. The progress of each of these steps is presented in the Progress Log . Progress Log Note that once either activity type has been launched remotely, the tools necessary for further profiling sessions can be found in the Deployment Directory on the remote device. On Linux and Mac host platforms, NVIDIA Nsight Compute supports SSH remote profiling on target machines which are not directly addressable from the machine the UI is running on through the ProxyJump and ProxyCommand SSH options. These options can be used to specify intermediate hosts to connect to or actual commands to run to obtain a socket connected to the SSH server on the target host and can be added to your SSH configuration file. Note that for both options, NVIDIA Nsight Compute runs external commands and does not implement any mechanism to authenticate to the intermediate hosts using the credentials entered in the Connection Dialog . These credentials will only be used to authenticate to the final target in the chain of machines. When using the ProxyJump option NVIDIA Nsight Compute uses the OpenSSH client to establish the connection to the intermediate hosts. This means that in order to use ProxyJump or ProxyCommand , a version of OpenSSH supporting these options must be installed on the host machine. A common way to authenticate to the intermediate hosts in this case is to use a SSH agent and have it hold the private keys used for authentication. Since the OpenSSH SSH client is used, you can also use the SSH askpass mechanism to handle these authentications in an interactive manner. It might happen on slow networks that connections used for remote profiling through SSH time out. If this is the case, the ConnectTimeout option can be used to set the desired timeout value. A known limitation of the remote profiling through SSH is that problems may arise if NVIDIA Nsight Compute tries to do remote profiling through SSH by connecting to the same machine it is running on. In this case, the workaround is to do local profiling through localhost . For more information about available options for the OpenSSH client and the ecosystem of tools it can be used with for authentication refer to the official manual pages .", "keywords": []}, {"id": 98, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#rendering-options", "display_name": "Rendering Options", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "rendering-options", "priority": -1, "content": "Under the highlight controls, additional rendering options are available. These include methods to control the geometry colors and the ability to toggle the drawing of wireframes for meshes and AABBs.", "keywords": []}, {"id": 99, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#report-pages", "display_name": "Report Pages", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "report-pages", "priority": -1, "content": "Use the Page dropdown in the header to switch between the report pages. By default, when opening a report with a single profile result, the Details Page is shown. When opening a report with multiple results, the Summary Page is selected instead. You can change the default report page in the Profile options.", "keywords": []}, {"id": 100, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#reset", "display_name": "Reset", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "reset", "priority": -1, "content": "Entries in the connection dialog are saved as part of the current project . When working in a custom project, simply close the project to reset the dialog. When not working in a custom project, entries are stored as part of the default project . You can delete all information from the default project by closing NVIDIA Nsight Compute and then deleting the project file from disk .", "keywords": []}, {"id": 101, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#resources", "display_name": "Resources", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "resources", "priority": -1, "content": "The Resources window is available when NVIDIA Nsight Compute is connected to a target application. It shows information about the currently known resources, such as CUDA devices, CUDA streams or kernels. The window is updated every time the target application is suspended. If closed, it can be re-opened using Debug &gt; Resources from the main menu. Using the dropdown on the top, different views can be selected, where each view is specific to one kind of resource (context, stream, kernel, \u2026). The Filter edit allows you to create filter expressions using the column headers of the currently selected resource. The resource table shows all information for each resource instance. Each instance has a unique ID, the API Call ID when this resource was created, its handle, associated handles, and further parameters. When a resource is destroyed, it is removed from its table.", "keywords": []}, {"id": 102, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#send-feedback", "display_name": "Send Feedback", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "send-feedback", "priority": -1, "content": "Name Description Values Collect Usage and Platform Data Choose whether or not you wish to allow NVIDIA Nsight Compute to collect usage and platform data. Yes No (Default)", "keywords": []}, {"id": 103, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#session-page", "display_name": "Session Page", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "session-page", "priority": -1, "content": "This Session page contains basic information about the report and the machine, as well as device attributes of all devices for which launches were profiled. When switching between launch instances, the respective device attributes are highlighted.", "keywords": []}, {"id": 104, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#sessions", "display_name": "Sessions", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "sessions", "priority": -1, "content": "Instead of sessions, NVIDIA Nsight Compute uses Projects to launch and gather connection details and collected reports. Executable and Import Sessions Use the Project Explorer or the Main Menu to create a new project. Reports collected from the command line, i.e. using NVIDIA Nsight Compute CLI, can be opened directly using the main menu. In addition, you can use the Project Explorer to associate existing reports as well as any other artifacts such as executables, notes, etc., with the project. Note that those associations are only references; in other words, moving or deleting the project file on disk will not update its artifacts. nvprof or command-line profiler output files, as well as Visual Profiler sessions, cannot be imported into NVIDIA Nsight Compute.", "keywords": []}, {"id": 105, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#source-comparison", "display_name": "Source Comparison", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "source-comparison", "priority": -1, "content": "Source comparison provides a way to see the source files of two profile results side by side. It enables to quickly identify source differences and understand changes in metric values. To compare two results side by side add one result as a baseline, navigate to the other result, and then click the Source Comparison button located in the report header. For example, if you want to compare kernel XYZ from report R1 with kernel XYZ from report R2, first open report R1, add the profile result for kernel XYZ as baseline, open report R2, choose kernel XYZ, and then click the Source Comparison button. Source comparison will be shown only with first added baseline result. Source Comparison Button Source Comparison Currently only high-level Source (CUDA-C) view and SASS view are supported for comparison. Navigation to the previous or next difference is supported using the navigation buttons or the keyboard shortcuts Ctrl + 1 and Ctrl + 2 . Source Comparison Navigation Buttons", "keywords": []}, {"id": 106, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#source-lookup", "display_name": "Source Lookup", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "source-lookup", "priority": -1, "content": "Name Description Values Program Source Locations Set program source search paths. These paths are used to resolve CUDA-C source files on the Source page if the respective file cannot be found in its original location. Files which cannot be found are marked with a File Not Found error. See the Ignore File Properties option for files that are found but don\u2019t match. Ignore File Properties Ignore file properties (e.g. timestamp, size) for source resolution. If this is disabled, all file properties like modification timestamp and file size are checked against the information stored by the compiler in the application during compilation. If a file with the same name exists on a source lookup path, but not all properties match, it won\u2019t be used for resolution (and a File Mismatch error will be shown). Yes/No (Default)", "keywords": []}, {"id": 107, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#source-page", "display_name": "Source Page", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "source-page", "priority": -1, "content": "The Source page correlates assembly (SASS) with high-level code such as CUDA-C or PTX. In addition, it displays instruction-correlated metrics to help pinpoint performance problems in your code. The page can be switched between different Views to focus on a specific source layer or see two layers side-by-side. This includes SASS, PTX and Source (CUDA-C, Fortran, Python, \u2026), as well as their combinations. Which options are available depends on the source information embedded into the executable. The high-level Source (CUDA-C) view is available if the application was built with the -lineinfo or --generate-line-info nvcc flag to correlate SASS and source. When using separate linking at the ELF level, there is no PTX available in the ELF that would correspond to the final SASS. As such, NVIDIA Nsight Compute does not show any PTX even though it would be available statically in the executable and could be shown with cuobjdump -all -lptx . However, this is a pre-linked version of the PTX and cannot be reliably used for correlation. The code in the different Views can also contain warnings, errors or just notifications that are displayed as Source Markers in the left header, as shown below. These can be generated from multiple systems, but as of now only NvRules are supported.", "keywords": []}, {"id": 108, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#standalone-source-viewer", "display_name": "Standalone Source Viewer", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "standalone-source-viewer", "priority": -1, "content": "NVIDIA Nsight Compute includes a standalone source viewer for cubin files. This view is identical to the Source Page , except that it won\u2019t include any performance metrics. Cubin files can be opened from the File &gt; Open main menu command. The SM Selection dialog will be shown before opening the standalone source view. If available, the SM version present in the file name is pre-selected. For example, if your file name is mergeSort.sm_80.cubin then SM 8.0 will be pre-selected in the dialog. Choose the appropriate SM version from the drop down menu if it\u2019s not included in the file name. SM Selection Dialog Click Ok button to open Standalone Source Viewer . Standalone Source Viewer", "keywords": []}, {"id": 109, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#status-banners", "display_name": "Status Banners", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "status-banners", "priority": -1, "content": "Status banners are used to display important messages, such as profiler errors. The message can be dismissed by clicking the \u2018X\u2019 button. The number of banners shown at the same time is limited and old messages can get dismissed automatically if new ones appear. Use the Output Messages window to see the complete message history.", "keywords": []}, {"id": 110, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#summary-page", "display_name": "Summary Page", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "summary-page", "priority": -1, "content": "The Summary page shows a table of all collected results in the report, as well as a list of the most important rule outputs ( Prioritized Rules ) which are ordered by the estimated speedup that could potential be obtained by following their guidance. Prioritized Rules are shown by default and can be toggled with the [R] button on the upper right of the page. Summary page with Summary Table and Prioritized Rules. The Summary Table gives you a quick comparison overview across all profiled workloads. It contains a number of important, pre-selected metrics which can be customized as explained below. Its columns can be sorted by clicking the column header. You can transpose the table with the Transpose button. Aggregate of all results per each counter metric is shown in the table header along with the column name. You can change the aggregated values by selecting the desired results for multiple metrics simultaneously. When selecting any entry by single-click, a list of its Prioritized Rules will be shown below the table. Double-click any entry to make the result the currently active one and switch to the Details Page page to inspect its performance data. You can configure the list of metrics included in this table in the Profile options dialog. If a metric has multiple instance values, the number of instances is shown after its standard value. A metric with ten instance values could for example look like this: 35.48 {10} . In the Profile options dialog, you can select that all instance values should be shown individually. You can also inspect the instances values of a metric result in the Metric Details tool window. In addition to metrics, you can also configure the table to include any of the following properties: Properties property__api_call_id ID of the API call associated with this profile result. property__block_size Block Size. property__creation_time Local collection time. property__demangled_name Kernel demangled name. property__device_name GPU device name. property__estimated_speedup Maximal relative speedup achievable for this profile result as estimated by the guided analysis rules. property__function_name Kernel function name or range name. property__grid_dimensions Grid Dimensions. property__grid_offset Grid Offset. property__grid_size Grid Size. property__issues_detected Number of issues detected by guided analysis rules for this profile result. property__kernel_id Kernel ID. property__mangled_name Kernel mangled name. property__process_name Process name. property__runtime_improvement Runtime improvement corresponding to the estimated speedup. property__series_id ID of the profile series. property__series_parameters Profile series parameters. property__thread_id CPU thread ID. For Range Replay reports, a smaller set of columns is shown by default, as not all apply to such results. For the currently selected metric result the Prioritized Rules show the most impactful rule results with respect to the estimated potential speedup. Clicking on any of the rule names on the left allows you to easily navigate to the containing section on the details page. With the downward-facing arrow on the right a table with the relevant key performance indicators can be toggled. This table contains the metrics which should be tracked when optimizing performance according to the rule guidance. Prioritized Rules with key performance indicators table.", "keywords": []}, {"id": 111, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#system-trace-activity", "display_name": "System Trace Activity", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "system-trace-activity", "priority": -1, "content": "Launch the target application from NVIDIA Nsight Compute When starting NVIDIA Nsight Compute, the Welcome Page will appear. Click on Quick Launch to open the Connection dialog. If the Connection dialog doesn\u2019t appear, you can open it using the Connect button from the main toolbar, as long as you are not currently connected. Select your local target platform on the left-hand side and your localhost from the Connection drop down. Then, fill in the launch details. In the Activity panel, select the System Trace activity to initiate a session with pre-configured settings. Press Launch to start the session. Additional Launch Options For more details on these options, see System-Wide Profiling Options . Once the session is completed, the Nsight Systems report is opened in a new document. By default, the timeline view is shown. It provides detailed information of the activity of the CPU and GPUs and helps understanding the overall behavior and performance of application. Once a CUDA kernel is identified to be on the critical path and not meeting the performance expectations, right click on the kernel launch on timeline and select Profile Kernel from the context menu. A new Connection Dialog opens up that is already preconfigured to profile the selected kernel launch. Proceed with optimizing the selected kernel using Non-Interactive Profile Activity", "keywords": []}, {"id": 112, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#tables", "display_name": "Tables", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "tables", "priority": -1, "content": "The tables show the occupancy, as well as the number of active threads, warps, and thread blocks per multiprocessor, and the maximum number of active blocks on the GPU. Tables", "keywords": []}, {"id": 113, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#target-connection-properties", "display_name": "Target Connection Properties", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "target-connection-properties", "priority": -1, "content": "The Target Connection Properties determine how the host connects to the target application during an Interactive Profile Activity . This connection is used to transfer profile information to the host during the profile session. Name Description Values Base Port Base port used to establish a connection from the host to the target application during an Interactive Profile activity (both local and remote). 1-65535 (Default: 49152) Maximum Ports Maximum number of ports to try (starting from Base Port ) when attempting to connect to the target application. 2-65534 (Default: 64)", "keywords": []}, {"id": 114, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#timeline", "display_name": "Timeline", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "timeline", "priority": -1, "content": "Since trace analysis is now covered by Nsight Systems, NVIDIA Nsight Compute does not provide views of the application timeline. The API Stream tool window does show a per-thread stream of the last captured CUDA API calls. However, those are serialized and do not maintain runtime concurrency or provide accurate timing information.", "keywords": []}, {"id": 115, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#trace", "display_name": "Trace", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "trace", "priority": -1, "content": "NVIDIA Nsight Compute does not support tracing GPU or API activities on an accurate timeline. This functionality is covered by NVIDIA Nsight Systems . In the Interactive Profile Activity , the API Stream tool window provides a stream of recent API calls on each thread. However, since all tracked API calls are serialized by default, it does not collect accurate timestamps.", "keywords": []}, {"id": 116, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#visual-profiler-transition-guide", "display_name": "Visual Profiler Transition Guide", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "visual-profiler-transition-guide", "priority": -1, "content": "This guide provides tips for moving from Visual Profiler to NVIDIA Nsight Compute. NVIDIA Nsight Compute tries to provide as much parity as possible with Visual Profiler\u2019s kernel profiling features, but some functionality is now covered by different tools.", "keywords": []}, {"id": 117, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#visual-studio-integration-guide", "display_name": "Visual Studio Integration Guide", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "visual-studio-integration-guide", "priority": -1, "content": "This guide provides information on using NVIDIA Nsight Compute within Microsoft Visual Studio, using the NVIDIA Nsight Integration Visual Studio extension, allowing for a seamless development workflow.", "keywords": []}, {"id": 118, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "page", "name": "NsightCompute/index#visual-studio-integration-overview", "display_name": "Visual Studio Integration Overview", "type": "section", "display_type": "Page section", "docname": "NsightCompute/index", "anchor": "visual-studio-integration-overview", "priority": -1, "content": "NVIDIA Nsight Integration is a Visual Studio extension that allows you to access the power of NVIDIA Nsight Compute from within Visual Studio. When NVIDIA Nsight Compute is installed along with NVIDIA Nsight Integration, NVIDIA Nsight Compute activities will appear under the NVIDIA \u2018Nsight\u2019 menu in the Visual Studio menu bar. These activities launch NVIDIA Nsight Compute with the current project settings and executable. For more information about using NVIDIA Nsight Compute from within Visual Studio, please visit NVIDIA Nsight Integration Overview NVIDIA Nsight Integration User Guide Notices Notices ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND SEPARATELY, \u201cMATERIALS\u201d) ARE BEING PROVIDED \u201cAS IS.\u201d NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the consequences of use of such information or for any infringement of patents or other rights of third parties that may result from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems without express written approval of NVIDIA Corporation. Trademarks NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation in the U.S. and other countries. Other company and product names may be trademarks of the respective companies with which they are associated.", "keywords": []}, {"id": 119, "doc_id": 119, "filename": "NsightCompute/index.html", "domain_name": "std", "name": "NsightCompute/index", "display_name": "Nsight Compute", "type": "doc", "display_type": "Page", "docname": "NsightCompute/index", "anchor": "", "priority": -1, "content": "The User Guide for Nsight Compute.", "keywords": []}, {"id": 120, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#attach", "display_name": "Attach", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "attach", "priority": -1, "content": "Option Description Default hostname Set the hostname or IP address for connecting to the machine on which the target application is running. When attaching to a local target application, use 127.0.0.1. 127.0.0.1", "keywords": []}, {"id": 121, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#command-line-options", "display_name": "Command Line Options", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "command-line-options", "priority": -1, "content": "For long command line options, passing a unique initial substring can be sufficient.", "keywords": []}, {"id": 122, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#config-file", "display_name": "Config File", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "config-file", "priority": -1, "content": "Using the --config-file on/off option, parsing parameters from config file can be enabled or disabled. Using the --config-file-path &lt;path&gt; option, default path and name of config file can be overwritten. By default, config-file with name config.ncu-cfg is searched in the current working directory, $HOME/.config/NVIDIA Corporation on Linux and %APPDATA%\\NVIDIA Corporation\\ on Windows. If a valid config file is found, ncu parses the file and initializes any command line parameters to the values set in the file. If the same command line parameter is also set explicitly during the current invocation, the latter takes precedence. Parameters can be set under various general modes and ncu command line parameters are used to determine which general-mode needs to be parsed from the config file. See the table below for more details. Command line parameters General Mode ncu \u2013mode launch-and-attach CuVectorAddMulti.exe Launch-and-attach ncu \u2013mode launch CuVectorAddMulti.exe Launch ncu \u2013mode attach Attach ncu \u2013list-sets, ncu \u2013list-sections, ncu \u2013list-rules and ncu \u2013list-metrics List ncu \u2013query-metrics Query ncu -i &lt;MyReport.ncu-rep&gt; Import These general modes should be defined in the config file using INI-like syntax as: [&lt;general-mode&gt;] &lt;parameter&gt;=&lt;value&gt; ;&lt;comments&gt; Sample usage [Launch-and-attach] -c = 1 --section = LaunchStats, Occupancy [Import] --open-in-ui -c = 1 --section = LaunchStats, Occupancy From this configuration, ncu will parse parameters set under [Launch-and-attach] block whenever an application is profiled in launch-and-attach mode. In the same manner, parameters set under [Import] block will be parsed whenever a report is imported. Different modes can be clubbed together if there exists a set of parameters which is common to each mode. Sample shown above can be rewritten after clubbing both modes as: [Launch-and-attach, import] -c = 1 --section = LaunchStats, Occupancy [Import] --open-in-ui Additional points Options like --open-in-ui do not expect any value to be set. These options should not be passed any value. Options like --section can be passed multiple times in the command line. These options should be written only once under a general-mode with all required values seperated by comma as shown below. Explicitly setting values for these options will not overwrite the config file values. Instead, all values will be composed together and set to the option. [&lt;general-mode&gt;] &lt;parameter&gt;=&lt;value1&gt;,&lt;value2&gt;,...", "keywords": []}, {"id": 123, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#console-output", "display_name": "Console Output", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "console-output", "priority": -1, "content": "Option Description Default csv Use comma-separated values as console output. Implies \u2013print-units base by default. page Select the report page to print console output for. Available pages are: details Show results grouped as sections, include rule results. Some metrics that are collected by default (e.g. device attributes) are omitted if not specified explicitly in any section or using --metrics . raw Show all collected metrics by kernel launch. source Show source. See --print-source to select the source view. session Show launch settings, session info, process info and device attributes. details . If no --page option is given and --export is set, no results are printed to the console output. print-source Select the source view: sass Show SASS (assembly) instructions for each kernel launch. ptx Show PTX source of every cubin whose at least one kernel is profiled. cuda Show entire CUDA-C source file which has kernel code as per kernel launch. CLI shows CUDA source only if file exists on the host machine. cuda,sass Show SASS CUDA-C source correlation for each kernel launch. CLI shows CUDA source only if file exists on the host machine. Metric correlation with source is available in sass , and cuda,sass source view. Metrics specified with --metrics and specified section file with --section are correlated. Consider restricting the number of selected metrics such that values fit into a single output row. sass resolve-source-file Resolve CUDA source file in the --page source output. Provide comma separated files full path. print-details Select which part of a section should be shown in the details page output: header Show all metrics from header of the section. body Show all metrics from body of the section. all Show all metrics from the section. Replaces deprecated option --details-all . header print-metric-name Select one of the option to show it in the Metric Name column: label Show metric label. name Show metric name. label-name Show both metric label and metric name. label print-units Select the mode for scaling of metric units. Available modes are: auto Show all metrics automatically scaled to the most fitting order of magnitude. base Show all metrics in their base unit. auto print-fp Show all numeric metrics in the console output as floating point numbers. false print-kernel-base Set the basis for kernel name output. See --kernel-regex-base for options. demangled print-metric-instances Set output mode for metrics with instance values: none Only show GPU aggregate value. values Show GPU aggregate followed by all instance values. none print-nvtx-rename Select how NVTX should be used for renaming: none Don\u2019t use NVTX for renaming. kernel Rename kernels with the most recent enclosing NVTX push/pop range. none print-rule-details Print additional details for rule results, such as the triggering metrics. Currently has no effect in CSV mode. false print-summary Select the summary output mode. Available modes are: none No summary. per-gpu Summary for each GPU. per-kernel Summary for each kernel type. per-nvtx Summary for each NVTX context. none", "keywords": []}, {"id": 124, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#environment-variables", "display_name": "Environment Variables", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "environment-variables", "priority": -1, "content": "The following environment variables can be set before launching NVIDIA Nsight Compute CLI, or the UI, respectively. Name Description Default/Values NV_COMPUTE_PROFILER_DISABLE_STOCK_FILE_DEPLOYMENT Disable file deployment to the versioned Sections directory, using section and rule files from the stock directory within the installation instead. By default, the versioned directory from the user\u2019s documents folder is used to ensure that any potential user updates are taken into account. Only supported in the NVIDIA Nsight Compute CLI. Default: unset Set to \u201c1\u201d to disable deployment. NV_COMPUTE_PROFILER_LOCAL_CONNECTION_OVERRIDE Override the default local connection mechanism between frontend and profiled target processes. The default mechanism is platform-dependent. This should only be used if there are connection problems between frontend and target processes in a local launch. Default: unset (use default mechanism) Set to \u201cuds\u201d to use Unix Domain Socket connections (available on Posix platforms, only). Set to \u201ctcp\u201d to use TCP (available on all platforms). Set to \u201cnamed-pipes\u201d to use Windows Named Pipes (available on Windows, only). NV_COMPUTE_PROFILER_DISABLE_SW_PRE_PASS Disable the instruction-level software (SW) metric pre-pass. When collecting SW-patched metrics, such as inst_executed , the pre-pass is used to determine which functions are executed as part of the kernel and should be patched. This requires a separate replay pass, and if only instruction-level SW metrics are to be collected, prevents single-pass data collection. Disabling the pre-pass can improve performance if memory save-and-restore is undesirable and application replay is not possible. Default: unset (use pre-pass when applicable) Set to \u201c1\u201d to disable pre-pass.", "keywords": []}, {"id": 125, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#event-comparison", "display_name": "Event Comparison", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "event-comparison", "priority": -1, "content": "For nvprof events, the following table lists the equivalent metrics in NVIDIA Nsight Compute, if available. For a detailed explanation of the structuring of PerfWorks metrics, see Metrics Structure . Metrics starting with sm__ are collected per-SM. Metrics starting with smsp__ are collected per-SM subpartition. However, all corresponding nvprof events are collected per-SM, only. Check the Metrics Guide for more details on these terms. nvprof Event PerfWorks Metric or Formula (&gt;= SM 7.0) active_cycles sm__cycles_active.sum active_cycles_pm sm__cycles_active.sum active_cycles_sys sys__cycles_active.sum active_warps sm__warps_active.sum active_warps_pm sm__warps_active.sum atom_count smsp__inst_executed_op_generic_atom_dot_alu.sum elapsed_cycles_pm sm__cycles_elapsed.sum elapsed_cycles_sm sm__cycles_elapsed.sum elapsed_cycles_sys sys__cycles_elapsed.sum fb_subp0_read_sectors dram__sectors_read.sum fb_subp1_read_sectors dram__sectors_read.sum fb_subp0_write_sectors dram__sectors_write.sum fb_subp1_write_sectors dram__sectors_write.sum global_atom_cas smsp__inst_executed_op_generic_atom_dot_cas.sum gred_count smsp__inst_executed_op_global_red.sum inst_executed sm__inst_executed.sum inst_executed_fma_pipe_s0 smsp__inst_executed_pipe_fma.sum inst_executed_fma_pipe_s1 smsp__inst_executed_pipe_fma.sum inst_executed_fma_pipe_s2 smsp__inst_executed_pipe_fma.sum inst_executed_fma_pipe_s3 smsp__inst_executed_pipe_fma.sum inst_executed_fp16_pipe_s0 smsp__inst_executed_pipe_fp16.sum inst_executed_fp16_pipe_s1 smsp__inst_executed_pipe_fp16.sum inst_executed_fp16_pipe_s2 smsp__inst_executed_pipe_fp16.sum inst_executed_fp16_pipe_s3 smsp__inst_executed_pipe_fp16.sum inst_executed_fp64_pipe_s0 smsp__inst_executed_pipe_fp64.sum inst_executed_fp64_pipe_s1 smsp__inst_executed_pipe_fp64.sum inst_executed_fp64_pipe_s2 smsp__inst_executed_pipe_fp64.sum inst_executed_fp64_pipe_s3 smsp__inst_executed_pipe_fp64.sum inst_issued1 sm__inst_issued.sum l2_subp0_read_sector_misses lts__t_sectors_op_read_lookup_miss.sum l2_subp1_read_sector_misses lts__t_sectors_op_read_lookup_miss.sum l2_subp0_read_sysmem_sector_queries lts__t_sectors_aperture_sysmem_op_read.sum l2_subp1_read_sysmem_sector_queries lts__t_sectors_aperture_sysmem_op_read.sum l2_subp0_read_tex_hit_sectors lts__t_sectors_srcunit_tex_op_read_lookup_hit.sum l2_subp1_read_tex_hit_sectors lts__t_sectors_srcunit_tex_op_read_lookup_hit.sum l2_subp0_read_tex_sector_queries lts__t_sectors_srcunit_tex_op_read.sum l2_subp1_read_tex_sector_queries lts__t_sectors_srcunit_tex_op_read.sum l2_subp0_total_read_sector_queries lts__t_sectors_op_read.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum l2_subp1_total_read_sector_queries lts__t_sectors_op_read.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum l2_subp0_total_write_sector_queries lts__t_sectors_op_write.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum l2_subp1_total_write_sector_queries lts__t_sectors_op_write.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum l2_subp0_write_sector_misses lts__t_sectors_op_write_lookup_miss.sum l2_subp1_write_sector_misses lts__t_sectors_op_write_lookup_miss.sum l2_subp0_write_sysmem_sector_queries lts__t_sectors_aperture_sysmem_op_write.sum l2_subp1_write_sysmem_sector_queries lts__t_sectors_aperture_sysmem_op_write.sum l2_subp0_write_tex_hit_sectors lts__t_sectors_srcunit_tex_op_write_lookup_hit.sum l2_subp1_write_tex_hit_sectors lts__t_sectors_srcunit_tex_op_write_lookup_hit.sum l2_subp0_write_tex_sector_queries lts__t_sectors_srcunit_tex_op_write.sum l2_subp1_write_tex_sector_queries lts__t_sectors_srcunit_tex_op_write.sum not_predicated_off_thread_inst_executed smsp__thread_inst_executed_pred_on.sum pcie_rx_active_pulse n/a pcie_tx_active_pulse n/a prof_trigger_00 n/a prof_trigger_01 n/a prof_trigger_02 n/a prof_trigger_03 n/a prof_trigger_04 n/a prof_trigger_05 n/a prof_trigger_06 n/a prof_trigger_07 n/a inst_issued0 smsp__issue_inst0.sum sm_cta_launched sm__ctas_launched.sum shared_load smsp__inst_executed_op_shared_ld.sum shared_store smsp__inst_executed_op_shared_st.sum generic_load smsp__inst_executed_op_generic_ld.sum generic_store smsp__inst_executed_op_generic_st.sum global_load smsp__inst_executed_op_global_ld.sum global_store smsp__inst_executed_op_global_st.sum local_load smsp__inst_executed_op_local_ld.sum local_store smsp__inst_executed_op_local_st.sum shared_atom smsp__inst_executed_op_shared_atom.sum shared_atom_cas smsp__inst_executed_op_shared_atom_dot_cas.sum shared_ld_bank_conflict l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum shared_st_bank_conflict l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum shared_ld_transactions l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum shared_st_transactions l1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum tensor_pipe_active_cycles_s0 smsp__pipe_tensor_cycles_active.sum tensor_pipe_active_cycles_s1 smsp__pipe_tensor_cycles_active.sum tensor_pipe_active_cycles_s2 smsp__pipe_tensor_cycles_active.sum tensor_pipe_active_cycles_s3 smsp__pipe_tensor_cycles_active.sum thread_inst_executed smsp__thread_inst_executed.sum warps_launched smsp__warps_launched.sum", "keywords": []}, {"id": 126, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#file", "display_name": "File", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "file", "priority": -1, "content": "Option Description Default log-file Send all tool output to the specified file, or one of the standard channels. The file will be overwritten. If the file doesn\u2019t exist, a new one will be created.\u201dstdout\u201d as the whole file name indicates standard output channel (stdout). \u201cstderr\u201d as the whole file name indicates standard error channel (stderr).\u201d If --log-file is not set , profile results will be printed on the console. o,export Set the output file for writing the profile report. If not set, a temporary file will be used which is removed afterwards. The specified name supports macro expansion. See File Macros for more details. If --export is set and no --page option is given, no profile results will be printed on the console. f,force-overwrite Force overwriting all output files. By default, the profiler won\u2019t overwrite existing output files and show an error instead. i,import Set the input file for reading the profile results. open-in-ui Open report in UI instead of showing result on terminal. (Only available on host platforms) section-folder-restore Restores stock files to the default section folder or the folder specified by an accompanying \u2013section-folder option. If the operation will overwrite modified files then the \u2013force-overwrite option is required.", "keywords": []}, {"id": 127, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#file-macros", "display_name": "File Macros", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "file-macros", "priority": -1, "content": "The file name specified with option -o or --export supports the following macro expansions. Occurrences of these macros in the report file name are replaced by the corresponding character sequence. If not specified otherwise, the macros cannot be used as part of the file path. Macro Description %h Expands to the host name of the machine on which the command line profiler is running. %q{ENV_NAME} Expands to the content of the variable with the given name ENV_NAME from the environment of the command line profiler. %p Expands to the process ID of the command line profiler. %i Expands to the lowest unused positive integer number that guarantees the resulting file name is not yet used. This macro can only be used once in the output file name. %% Expands to a single % character in the output file name. This macro can be used in the file path and the file name.", "keywords": []}, {"id": 128, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#filtering", "display_name": "Filtering", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "filtering", "priority": -1, "content": "Filtering by kernel name Both nvprof and NVIDIA Nsight Compute CLI support filtering which kernels\u2019 data should be collected. In nvprof, the option is --kernels and applies to following metric collection options. In NVIDIA Nsight Compute CLI, the option is named --kernel-regex and applies to the complete application execution. In other words, NVIDIA Nsight Compute CLI does not currently support collecting different metrics for different kernels, unless they execute on different GPU architectures. Filtering by kernel ID Nvprof allows users to specify which kernels to profile using a kernel ID description, using the same --kernels option. In NVIDIA Nsight Compute CLI, the syntax for this kernel ID is identical, but the option is named --kernel-id . Filtering by device Both nvprof and NVIDIA Nsight Compute CLI use --devices to filter the devices which to profile. In contrast to nvprof, in NVIDIA Nsight Compute CLI the option applies globally, not only to following options.", "keywords": []}, {"id": 129, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#general", "display_name": "General", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "general", "priority": -1, "content": "Option Description Default h,help Show help message v,version Show version information mode Select the mode of interaction with the target application launch-and-attach: Launch the target application and immediately attach for profiling. launch: Launch the target application and suspend in the first intercepted API call, wait for tool to attach. attach: Attach to a previously launched application to which no other tool is attached. launch-and-attach p,port Base port used for connecting to target applications for --mode launch/attach 49152 max-connections Maximum number of ports for connecting to target applications 64 config-file Use config.ncu-cfg config file to set parameters. Searches in the current working directory, in \u201c$HOME/.config/NVIDIA Corporation\u201d on Linux and in \u201c%APPDATA%\\NVIDIA Corporation\\\u201d on Windows. on config-file-path Override the default path for config file.", "keywords": []}, {"id": 130, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#introduction", "display_name": "Introduction", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "introduction", "priority": -1, "content": "NVIDIA Nsight Compute CLI (ncu) provides a non-interactive way to profile applications from the command line. It can print the results directly on the command line or store them in a report file. It can also be used to simply launch the target application (see General for details) and later attach with NVIDIA Nsight Compute or another ncu instance. For users migrating from nvprof to NVIDIA Nsight Compute, please additionally see the Nvprof Transition Guide for comparison of features and workflows.", "keywords": []}, {"id": 131, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#launch", "display_name": "Launch", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "launch", "priority": -1, "content": "Option Description Default check-exit-code Check the application exit code and print an error if it is different than 0. If set, --replay-mode application will stop after the first pass if the exit code is not 0. yes injection-path-64 Override the default path for the injection libraries. The injection libraries are used by the tools to intercept relevant APIs (like CUDA or NVTX). preload-library Prepend a shared library to be loaded by the application before the injection libraries. This option can be given multiple times and the libraries will be loaded in the order they were specified. call-stack Enable CPU Call Stack collection. false nvtx Enable NVTX support for tools. false target-processes Select the processes you want to profile. Available modes are: application-only Profile only the root application process. all Profile the application and all its child processes. all target-processes-filter Set the comma separated expressions to filter which processes are profiled. &lt;process name&gt; Set the exact process name to include for profiling. regex:&lt;expression&gt; Set the regex to filter matching process name profiling. On shells that recognize regular expression symbols as special characters (e.g. Linux bash), the expression needs to be escaped with quotes, e.g. --target-processes-filter regex:&quot;.*Process&quot; . When using regex: , the expression must not include any commas. exclude:&lt;process name&gt; Set the exact process name to exclude for profiling. exclude-tree:&lt;process name&gt; Set the exact process name to exclude for profiling and further process tracking. None of its child processes will be profiled, even if they match a positive filter. This option is not available on Windows. The executable name part of the process will be considered in the match. Processing of filters stops at the first match. If any positive filter is specified, no process that is not matching a positive filter is profiled. Examples --target-processes-filter MatrixMul Filter all processes having executable name exactly as \u201cMatrixMul\u201d. --target-processes-filter regex:Matrix Filter all processes that include the string \u201cMatrix\u201d in their executable name, e.g. \u201cMatrixMul\u201d and \u201cMatrixAdd\u201d. --target-processes-filter MatrixMul,MatrixAdd Filter all processes having executable name exactly as \u201cMatrixMul\u201d or \u201cMatrixAdd\u201d. --target-processes-filter exclude:MatrixMul.exe Exclude only \u201cMatrixMul.exe\u201d. --target-processes-filter exclude-tree:ChildLauncher,ParentProcess Exclude \u201cChildLauncher\u201d and all its sub-processes. Include (only) \u201cParentProcess\u201d, but not if it\u2019s a child of \u201cChildLauncher\u201d. support-32bit Support profiling processes launched from 32-bit applications. This option is only available on x86_64 Linux. On Windows, tracking 32-bit applications is enabled by default. no null-stdin Launch the application with \u2018/dev/null\u2019 as its standard input. This avoids applications reading from standard input being stopped by SIGTTIN signals and hanging when running as backgrounded processes. false", "keywords": []}, {"id": 132, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#launch-and-attach", "display_name": "Launch and Attach", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "launch-and-attach", "priority": -1, "content": "Launching a process for profiling In nvprof, the application to profile is passed to the tool as a command line argument. The application must be a local executable. Alternatively, you can choose to use the tool in a daemon mode and profile all applicable processes on the local machine (nvprof option --profile-all-processes ). In nvprof, the decision to profile the complete process tree or only the root process is done via the --profile-child-processes flag. In NVIDIA Nsight Compute CLI, the --target-processes option is used for this. NVIDIA Nsight Compute CLI has several modes to determine which application to collect data for. By default, the executable passed via the command line to the tool is started, connected to, and profiled. This mode is called launch-and-attach . Launching a process for attach In contrast to nvprof, you can choose to only launch a local executable. In this mode ( --mode launch ), the process is started, connected to, but then suspended at the first CUDA API call. Subsequently, there is a third mode ( --mode attach ) to attach to any process launched using the aforementioned mode. In this case, all profiling and output options would be passed to the attaching instance of NVIDIA Nsight Compute CLI. Remote profiling Finally, using launch and attach , you can connect to a launched process on a remote machine, which could even run a different operating system than the local host. Use --hostname to select which remote host to connect to. Notices Notices ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND SEPARATELY, \u201cMATERIALS\u201d) ARE BEING PROVIDED \u201cAS IS.\u201d NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the consequences of use of such information or for any infringement of patents or other rights of third parties that may result from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems without express written approval of NVIDIA Corporation. Trademarks NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation in the U.S. and other countries. Other company and product names may be trademarks of the respective companies with which they are associated.", "keywords": []}, {"id": 133, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#metric-collection", "display_name": "Metric Collection", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "metric-collection", "priority": -1, "content": "Finding available metrics For nvprof, you can use --query-metrics to see the list of metrics available for the current devices on your machine. You can also use --devices to filter which local devices to query. For NVIDIA Nsight Compute CLI, this functionality is the same. However, in addition, you can combine --query-metrics with --chip [chipname] to query the available metrics for any chip, not only the ones in your present CUDA devices. Note that metric names have changed between nvprof and NVIDIA Nsight Compute CLI and metric names also differ between chips after (and including) GV100 and those before. See Metric Comparison for a comparison of nvprof and NVIDIA Nsight Compute metric names. On Volta and newer GPUs, most metrics are named using a base name and various suffixes, e.g. sm__throughput.avg.pct_of_peak_sustained_elapsed . The base name is sm__throughput and the suffix is avg.pct_of_peak_sustained_elapsed . This is because most metrics follow the same structure and have the same set of suffixes. You need to pass the full name to NVIDIA Nsight Compute when selecting a metric for profiling. To reduce the number of metrics shown for Volta and newer GPUs when using --query-metrics , by default only the base names are shown. Use --query-metrics-mode suffix --metrics &lt;metrics list&gt; to see the full names for the chosen metrics. Use --query-metrics-mode all to see all metrics with their full name directly. Selecting which metrics to collect In both nvprof and NVIDIA Nsight Compute CLI, you can specify a comma-separated list of metric names to the --metrics option. While nvprof would allow you to collect either a list or all metrics, in NVIDIA Nsight Compute CLI you can use regular expressions to select a more fine-granular subset of all available metrics. For example, you can use --metrics &quot;regex:.*&quot; to collect all metrics, or --metrics &quot;regex:smsp__cycles_elapsed&quot; to collect all \u201csmsp__cycles_elapsed\u201d metrics. Selecting which events to collect You cannot collect any events in NVIDIA Nsight Compute CLI. Selecting which section to collect In nvprof, you can either collect individual metrics or events, or a pre-configured set (all, analysis-metrics). NVIDIA Nsight Compute CLI adds the concept of a section. A section is a file that describes which metrics to collect for which GPU architecture, or architecture range. Furthermore, it defines how those metrics will be shown in both the command line output or the user interface. This includes structuring in tables, charts, histograms etc. NVIDIA Nsight Compute CLI comes with a set of pre-defined sections, located in the sections directory. You can inspect, modify or extend those, as well as add new ones, e.g. to easily collect recurring metric sets. Each section specifies a unique section identifier , and there must not be two sections with the same identifier in the search path. By default, the sections associated with the basic section set are collected. You can select one or more individual sections using the --section [section identifier] option one or more times. If no --section option is given, but --metrics is used, no sections will be collected. Selecting which section set to collect In nvprof, you can either collect individual metrics or events, or a pre-configured set (all, analysis-metrics). NVIDIA Nsight Compute CLI adds the concept of section sets . A section set defines a group of sections to collect together, in order to achieve different profiling overheads, depending on the required analysis level of detail. If no other options are selected, the basic section set is collected. You can select one or more sets using the --set [set identifier] option one or more times. If no --set option is given, but --section or --metrics is used, no sets will be collected.", "keywords": []}, {"id": 134, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#metric-comparison", "display_name": "Metric Comparison", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "metric-comparison", "priority": -1, "content": "NVIDIA Nsight Compute uses two groups of metrics, depending on which GPU architecture is profiled. For nvprof metrics, the following table lists the equivalent metrics in NVIDIA Nsight Compute, if available. For a detailed explanation of the structuring of PerfWorks metrics, see Metrics Structure . Metrics starting with sm__&amp; are collected per-SM. Metrics starting with *smsp__ are collected per-SM subpartition. However, all corresponding nvprof events are collected per-SM, only. Check the Metrics Guide for more details on these terms. nvprof Metric PerfWorks Metric or Formula (&gt;= SM 7.0) achieved_occupancy sm__warps_active.avg.pct_of_peak_sustained_active atomic_transactions l1tex__t_set_accesses_pipe_lsu_mem_global_op_atom.sum + l1tex__t_set_accesses_pipe_lsu_mem_global_op_red.sum atomic_transactions_per_request (l1tex__t_sectors_pipe_lsu_mem_global_op_atom.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_red.sum) / (l1tex__t_requests_pipe_lsu_mem_global_op_atom.sum + l1tex__t_requests_pipe_lsu_mem_global_op_red.sum) branch_efficiency smsp__sass_average_branch_targets_threads_uniform.pct cf_executed smsp__inst_executed_pipe_cbu.sum + smsp__inst_executed_pipe_adu.sum cf_fu_utilization n/a cf_issued n/a double_precision_fu_utilization smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active dram_read_bytes dram__bytes_read.sum dram_read_throughput dram__bytes_read.sum.per_second dram_read_transactions dram__sectors_read.sum dram_utilization dram__throughput.avg.pct_of_peak_sustained_elapsed dram_write_bytes dram__bytes_write.sum dram_write_throughput dram__bytes_write.sum.per_second dram_write_transactions dram__sectors_write.sum eligible_warps_per_cycle smsp__warps_eligible.sum.per_cycle_active flop_count_dp smsp__sass_thread_inst_executed_op_dadd_pred_on.sum + smsp__sass_thread_inst_executed_op_dmul_pred_on.sum + smsp__sass_thread_inst_executed_op_dfma_pred_on.sum * 2 flop_count_dp_add smsp__sass_thread_inst_executed_op_dadd_pred_on.sum flop_count_dp_fma smsp__sass_thread_inst_executed_op_dfma_pred_on.sum flop_count_dp_mul smsp__sass_thread_inst_executed_op_dmul_pred_on.sum flop_count_hp smsp__sass_thread_inst_executed_op_hadd_pred_on.sum + smsp__sass_thread_inst_executed_op_hmul_pred_on.sum + smsp__sass_thread_inst_executed_op_hfma_pred_on.sum * 2 flop_count_hp_add smsp__sass_thread_inst_executed_op_hadd_pred_on.sum flop_count_hp_fma smsp__sass_thread_inst_executed_op_hfma_pred_on.sum flop_count_hp_mul smsp__sass_thread_inst_executed_op_hmul_pred_on.sum flop_count_sp smsp__sass_thread_inst_executed_op_fadd_pred_on.sum + smsp__sass_thread_inst_executed_op_fmul_pred_on.sum + smsp__sass_thread_inst_executed_op_ffma_pred_on.sum * 2 flop_count_sp_add smsp__sass_thread_inst_executed_op_fadd_pred_on.sum flop_count_sp_fma smsp__sass_thread_inst_executed_op_ffma_pred_on.sum flop_count_sp_mul smsp__sass_thread_inst_executed_op_fmul_pred_on.sum flop_count_sp_special n/a flop_dp_efficiency smsp__sass_thread_inst_executed_ops_dadd_dmul_dfma_pred_on.avg.pct_of_peak_sustained_elapsed flop_hp_efficiency smsp__sass_thread_inst_executed_ops_hadd_hmul_hfma_pred_on.avg.pct_of_peak_sustained_elapsed flop_sp_efficiency smsp__sass_thread_inst_executed_ops_fadd_fmul_ffma_pred_on.avg.pct_of_peak_sustained_elapsed gld_efficiency smsp__sass_average_data_bytes_per_sector_mem_global_op_ld.pct gld_requested_throughput n/a gld_throughput l1tex__t_bytes_pipe_lsu_mem_global_op_ld.sum.per_second gld_transactions l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum gld_transactions_per_request l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio global_atomic_requests l1tex__t_requests_pipe_lsu_mem_global_op_atom.sum global_hit_rate (l1tex__t_sectors_pipe_lsu_mem_global_op_ld_lookup_hit.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_st_lookup_hit.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_red_lookup_hit.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_atom_lookup_hit.sum) / (l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_st.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_red.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_atom.sum) global_load_requests l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum global_reduction_requests l1tex__t_requests_pipe_lsu_mem_global_op_red.sum global_store_requests l1tex__t_requests_pipe_lsu_mem_global_op_st.sum gst_efficiency smsp__sass_average_data_bytes_per_sector_mem_global_op_st.pct gst_requested_throughput n/a gst_throughput l1tex__t_bytes_pipe_lsu_mem_global_op_st.sum.per_second gst_transactions l1tex__t_sectors_pipe_lsu_mem_global_op_st.sum gst_transactions_per_request l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio half_precision_fu_utilization smsp__inst_executed_pipe_fp16.avg.pct_of_peak_sustained_active inst_bit_convert smsp__sass_thread_inst_executed_op_conversion_pred_on.sum inst_compute_ld_st smsp__sass_thread_inst_executed_op_memory_pred_on.sum inst_control smsp__sass_thread_inst_executed_op_control_pred_on.sum inst_executed smsp__inst_executed.sum inst_executed_global_atomics smsp__sass_inst_executed_op_global_atom.sum inst_executed_global_loads smsp__inst_executed_op_global_ld.sum inst_executed_global_reductions smsp__inst_executed_op_global_red.sum inst_executed_global_stores smsp__inst_executed_op_global_st.sum inst_executed_local_loads smsp__inst_executed_op_local_ld.sum inst_executed_local_stores smsp__inst_executed_op_local_st.sum inst_executed_shared_atomics smsp__inst_executed_op_shared_atom.sum + smsp__inst_executed_op_shared_atom_dot_alu.sum + smsp__inst_executed_op_shared_atom_dot_cas.sum inst_executed_shared_loads smsp__inst_executed_op_shared_ld.sum inst_executed_shared_stores smsp__inst_executed_op_shared_st.sum inst_executed_surface_atomics smsp__inst_executed_op_surface_atom.sum inst_executed_surface_loads smsp__inst_executed_op_surface_ld.sum + smsp__inst_executed_op_shared_atom_dot_alu.sum + smsp__inst_executed_op_shared_atom_dot_cas.sum inst_executed_surface_reductions smsp__inst_executed_op_surface_red.sum inst_executed_surface_stores smsp__inst_executed_op_surface_st.sum inst_executed_tex_ops smsp__inst_executed_op_texture.sum inst_fp_16 smsp__sass_thread_inst_executed_op_fp16_pred_on.sum inst_fp_32 smsp__sass_thread_inst_executed_op_fp32_pred_on.sum inst_fp_64 smsp__sass_thread_inst_executed_op_fp64_pred_on.sum inst_integer smsp__sass_thread_inst_executed_op_integer_pred_on.sum inst_inter_thread_communication smsp__sass_thread_inst_executed_op_inter_thread_communication_pred_on.sum inst_issued smsp__inst_issued.sum inst_misc smsp__sass_thread_inst_executed_op_misc_pred_on.sum inst_per_warp smsp__average_inst_executed_per_warp.ratio inst_replay_overhead n/a ipc smsp__inst_executed.avg.per_cycle_active issue_slot_utilization smsp__issue_active.avg.pct_of_peak_sustained_active issue_slots smsp__inst_issued.sum issued_ipc smsp__inst_issued.avg.per_cycle_active l2_atomic_throughput 2 * ( lts__t_sectors_op_atom.sum.per_second + lts__t_sectors_op_red.sum.per_second ) l2_atomic_transactions 2 * ( lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum ) l2_global_atomic_store_bytes lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_global_op_atom.sum l2_global_load_bytes lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_global_op_ld.sum l2_local_global_store_bytes lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_local_op_st.sum + lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_global_op_st.sum l2_local_load_bytes lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_local_op_ld.sum l2_read_throughput lts__t_sectors_op_read.sum.per_second + lts__t_sectors_op_atom.sum.per_second + lts__t_sectors_op_red.sum.per_second 2 l2_read_transactions lts__t_sectors_op_read.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum 2 l2_surface_load_bytes lts__t_bytes_equiv_l1sectormiss_pipe_tex_mem_surface_op_ld.sum l2_surface_store_bytes lts__t_bytes_equiv_l1sectormiss_pipe_tex_mem_surface_op_st.sum l2_tex_hit_rate lts__t_sector_hit_rate.pct l2_tex_read_hit_rate lts__t_sector_op_read_hit_rate.pct l2_tex_read_throughput lts__t_sectors_srcunit_tex_op_read.sum.per_second l2_tex_read_transactions lts__t_sectors_srcunit_tex_op_read.sum l2_tex_write_hit_rate lts__t_sector_op_write_hit_rate.pct l2_tex_write_throughput lts__t_sectors_srcunit_tex_op_write.sum.per_second l2_tex_write_transactions lts__t_sectors_srcunit_tex_op_write.sum l2_utilization lts__t_sectors.avg.pct_of_peak_sustained_elapsed l2_write_throughput lts__t_sectors_op_write.sum.per_second + lts__t_sectors_op_atom.sum.per_second + lts__t_sectors_op_red.sum.per_second l2_write_transactions lts__t_sectors_op_write.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum ldst_executed n/a ldst_fu_utilization smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active ldst_issued n/a local_hit_rate n/a local_load_requests l1tex__t_requests_pipe_lsu_mem_local_op_ld.sum local_load_throughput l1tex__t_bytes_pipe_lsu_mem_local_op_ld.sum.per_second local_load_transactions l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum local_load_transactions_per_request l1tex__average_t_sectors_per_request_pipe_lsu_mem_local_op_ld.ratio local_memory_overhead n/a local_store_requests l1tex__t_requests_pipe_lsu_mem_local_op_st.sum local_store_throughput l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum.per_second local_store_transactions l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum local_store_transactions_per_request l1tex__average_t_sectors_per_request_pipe_lsu_mem_local_op_st.ratio nvlink_data_receive_efficiency n/a nvlink_data_transmission_efficiency n/a nvlink_overhead_data_received (nvlrx__bytes_data_protocol.sum / nvlrx__bytes.sum) * 100 nvlink_overhead_data_transmitted (nvltx__bytes_data_protocol.sum / nvltx__bytes.sum) * 100 nvlink_receive_throughput nvlrx__bytes.sum.per_second nvlink_total_data_received nvlrx__bytes.sum nvlink_total_data_transmitted nvltx__bytes.sum nvlink_total_nratom_data_transmitted n/a nvlink_total_ratom_data_transmitted n/a nvlink_total_response_data_received n/a nvlink_total_write_data_transmitted n/a nvlink_transmit_throughput nvltx__bytes.sum.per_second nvlink_user_data_received nvlrx__bytes_data_user.sum nvlink_user_data_transmitted nvltx__bytes_data_user.sum nvlink_user_nratom_data_transmitted n/a nvlink_user_ratom_data_transmitted n/a nvlink_user_response_data_received n/a nvlink_user_write_data_transmitted n/a pcie_total_data_received pcie__read_bytes.sum pcie_total_data_transmitted pcie__write_bytes.sum shared_efficiency smsp__sass_average_data_bytes_per_wavefront_mem_shared.pct shared_load_throughput l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum.per_second shared_load_transactions l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum shared_load_transactions_per_request n/a shared_store_throughput l1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum.per_second shared_store_transactions l1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum shared_store_transactions_per_request n/a shared_utilization l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed single_precision_fu_utilization smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active sm_efficiency smsp__cycles_active.avg.pct_of_peak_sustained_elapsed sm_tex_utilization l1tex__texin_sm2tex_req_cycles_active.avg.pct_of_peak_sustained_elapsed special_fu_utilization smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active stall_constant_memory_dependency smsp__warp_issue_stalled_imc_miss_per_warp_active.pct stall_exec_dependency smsp__warp_issue_stalled_short_scoreboard_per_warp_active.pct + smsp__warp_issue_stalled_wait_per_warp_active.pct stall_inst_fetch smsp__warp_issue_stalled_no_instruction_per_warp_active.pct stall_memory_dependency smsp__warp_issue_stalled_long_scoreboard_per_warp_active.pct stall_memory_throttle smsp__warp_issue_stalled_drain_per_warp_active.pct + smsp__warp_issue_stalled_lg_throttle_per_warp_active.pct stall_not_selected smsp__warp_issue_stalled_not_selected_per_warp_active.pct stall_other smsp__warp_issue_stalled_dispatch_stall_per_warp_active.pct + smsp__warp_issue_stalled_misc_per_warp_active.pct stall_pipe_busy smsp__warp_issue_stalled_math_pipe_throttle_per_warp_active.pct + smsp__warp_issue_stalled_mio_throttle_per_warp_active.pct stall_sleeping smsp__warp_issue_stalled_sleeping_per_warp_active.pct stall_sync smsp__warp_issue_stalled_barrier_per_warp_active.pct + smsp__warp_issue_stalled_membar_per_warp_active.pct stall_texture smsp__warp_issue_stalled_tex_throttle_per_warp_active.pct surface_atomic_requests l1tex__t_requests_pipe_tex_mem_surface_op_atom.sum surface_load_requests l1tex__t_requests_pipe_tex_mem_surface_op_ld.sum surface_reduction_requests l1tex__t_requests_pipe_tex_mem_surface_op_red.sum surface_store_requests l1tex__t_requests_pipe_tex_mem_surface_op_st.sum sysmem_read_bytes lts__t_sectors_aperture_sysmem_op_read * 32 sysmem_read_throughput lts__t_sectors_aperture_sysmem_op_read.sum.per_second sysmem_read_transactions lts__t_sectors_aperture_sysmem_op_read.sum sysmem_read_utilization n/a sysmem_utilization n/a sysmem_write_bytes lts__t_sectors_aperture_sysmem_op_write * 32 sysmem_write_throughput lts__t_sectors_aperture_sysmem_op_write.sum.per_second sysmem_write_transactions lts__t_sectors_aperture_sysmem_op_write.sum sysmem_write_utilization n/a tensor_precision_fu_utilization sm__pipe_tensor_op_hmma_cycles_active.avg.pct_of_peak_sustained_active tensor_precision_int_utilization sm__pipe_tensor_op_imma_cycles_active.avg.pct_of_peak_sustained_active (SM 7.2+) tex_cache_hit_rate l1tex__t_sector_hit_rate.pct tex_cache_throughput n/a tex_cache_transactions l1tex__lsu_writeback_active.avg.pct_of_peak_sustained_active + l1tex__tex_writeback_active.avg.pct_of_peak_sustained_active tex_fu_utilization smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active tex_sm_tex_utilization l1tex__f_tex2sm_cycles_active.avg.pct_of_peak_sustained_elapsed tex_sm_utilization sm__mio2rf_writeback_active.avg.pct_of_peak_sustained_elapsed tex_utilization n/a texture_load_requests l1tex__t_requests_pipe_tex_mem_texture.sum warp_execution_efficiency smsp__thread_inst_executed_per_inst_executed.ratio warp_nonpred_execution_efficiency smsp__thread_inst_executed_per_inst_executed.pct 2 Sector reads from reductions are added here only for compatibility to the current definition of the metric in nvprof. Reductions do not cause data to be communicated from L2 back to L1.", "keywords": []}, {"id": 135, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#metrics-and-units", "display_name": "Metrics and Units", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "metrics-and-units", "priority": -1, "content": "When available and applicable, metrics are shown along with their unit. This is to make it apparent if a metric represents cycles, threads, bytes/s, and so on. By default, units are scaled automatically so that metric values are shown with a reasonable order of magnitude. Units are scaled using their SI-factors, i.e. byte-based units are scaled using a factor of 1000 and the prefixes K, M, G, etc. Time-based units are also scaled using a factor of 1000, with the prefixes n, u and m. This scaling can be changed using a command line option, see Console Output options for details.", "keywords": []}, {"id": 136, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#modes", "display_name": "Modes", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "modes", "priority": -1, "content": "Modes change the fundamental behavior of the command line profiler. Depending on which mode is chosen, different Command Line Options become available. For example, Launch is invalid if the Attach mode is selected. Launch-and-attach: The target application is launched on the local system with the tool\u2019s injection libraries. Depending on which profiling options are chosen, selected kernels in the application are profiled and the results printed to the console or stored in a report file. The tool exits once the target application finishes or crashes, and once all results are processed. This is the default, and the only mode that supports profiling of child processes on selected platforms. Launch: The target application is launched on the local system with the tool\u2019s injection libraries. As soon as the first intercepted API call is reached (commonly cuInit() ), all application threads are suspended. The application now expects a tool to attach for profiling. You can attach using NVIDIA Nsight Compute or using the command line profiler\u2019s Attach mode. Attach: The tool tries to connect to a target application previously launched using NVIDIA Nsight Compute or using the command line profiler\u2019s Launch mode. The tool can attach to a target on the local system or using a remote connection.", "keywords": []}, {"id": 137, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#multi-process-support", "display_name": "Multi-Process Support", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "multi-process-support", "priority": -1, "content": "NVIDIA Nsight Compute CLI supports profiling multi-process applications on the following platforms: x86_64 Windows, x86_64 Linux, DRIVE OS Linux, DRIVE OS QNX, PowerPC. See the Launch options on how to enable this feature. On x86_64 Windows, NVIDIA Nsight Compute CLI supports profiling 64-bit processes launched from 32-bit applications by default . On x86_64 Linux, launching from 32-bit applications requires you to enable the support-32bit option, and the required 32-bit libraries must be installed on your system. On DRIVE OS Linux, DRIVE OS QNX and PowerPC, tracking of 32-bit applications is not supported. Profiling of 32-bit processes is not supported on any platform. Profiling MPI applications is a special case of multi-process profiling. NVIDIA Nsight Compute CLI can be used to profile applications launched with the mpirun command. To profile all ranks on a node and store all the profiling data in a single report file: ncu --target-processes all -o &lt;report-name&gt; mpirun [mpi arguments] &lt;app&gt; [app arguments] To profile multi-node submissions, one instance of NVIDIA Nsight Compute CLI can be used per node. Ensure that you specify unique report files per rank. mpirun [mpi arguments] ncu -o report_%q{OMPI_COMM_WORLD_RANK} &lt;app&gt; [app arguments] To profile a single rank one can use a wrapper script. The following script (called \u201cwrap.sh\u201d) profiles rank 0 only: #!/bin/bash if [[ $OMPI_COMM_WORLD_RANK == 0 ]]; then ncu -o report_${OMPI_COMM_WORLD_RANK} --target-processes all &quot;$@&quot; else &quot;$@&quot; fi and then execute: mpirun [mpi arguments] ./wrap.sh &lt;app&gt; [app arguments]", "keywords": []}, {"id": 138, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#nvprof-transition-guide", "display_name": "Nvprof Transition Guide", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "nvprof-transition-guide", "priority": -1, "content": "This guide provides tips for moving from nvprof to NVIDIA Nsight Compute CLI. NVIDIA Nsight Compute CLI tries to provide as much feature and usage parity as possible with nvprof, but some features are now covered by different tools and some command line options have changed their name or meaning.", "keywords": []}, {"id": 139, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#nvtx-filtering", "display_name": "NVTX Filtering", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "nvtx-filtering", "priority": -1, "content": "--nvtx-include &lt;configuration&gt; --nvtx-exclude &lt;configuration&gt; These options are used to profile only those kernels which satisfy the conditions mentioned in the configuration. Through these options, you can choose which kernel falls into a specific range or collection of ranges. You can use both options multiple times, mentioning all the --nvtx-include configurations followed by all --nvtx-exclude configurations. NVTX filtering requires --nvtx option. NVTX ranges are of two types: NvtxRangeStart/End and NvtxRangePush/Pop. The configuration syntax for both the types are briefly described below. Both range and domain names can contain whitespace. Note that \u201cDomain\u201d and \u201crange\u201d in below example are for illustration purposes only and are not required to mark domain or range names. Push-Pop Ranges Quantifier Description Example / Delimiter between range names. When only a single range name is given, the delimiter must be appended to indicate that this refers to a push/pop range. A_range/ A_range/B range A_range/\\*/B range [ Range is at the bottom of the stack [A_range [A_range/+/Range Z ] Range is at the top of the stack Range Z] Range C/\\*/Range Z] Only one B rangeetween the two other ranges B range/+/Range D * Zero or more range(s) between the two other ranges B range/\\*/Range Z @ Specify domain name. If not mentioned, assuming &lt;default domain&gt; Domain-A@A_range Domain B@A_range/\\*/Range Z] Include kernels wrapped inside push/pop range \u2018A_range\u2019 of \u2018&lt;default-domain&gt;\u2019: ncu --nvtx --nvtx-include &quot;A_range/&quot; CuNvtx.exe Include kernels wrapped inside push/pop range \u2018A_range\u2019 of \u2018Domain-A\u2019: ncu --nvtx --nvtx-include &quot;Domain-A@A_range/&quot; CuNvtx.exe Include kernels wrapped inside push/pop range \u2018A_range\u2019 of \u2018&lt;default domain&gt;\u2019, where \u2018A_range\u2019 is at the bottom of the stack: ncu --nvtx --nvtx-include &quot;[A_range&quot; CuNvtx.exe Include kernels wrapped inside push/pop ranges \u2018A_range\u2019 and \u2018B range\u2019 of \u2018&lt;default domain&gt;\u2019, with zero or many ranges between them: ncu --nvtx --nvtx-include &quot;A_range/*/B range&quot; CuNvtx.exe Exclude kernels wrapped inside push/pop ranges \u2018A_range\u2019 and \u2018B range\u2019 of \u2018&lt;default domain&gt;\u2019, with zero or many ranges between them: ncu --nvtx --nvtx-exclude &quot;A_range/*/B range&quot; CuNvtx.exe Include kernels wrapped inside only push/pop range \u2018A_range\u2019 of \u2018&lt;default domain&gt;\u2019 but not inside \u2018B range\u2019 at the top of the stack: ncu --nvtx --nvtx-include &quot;A_range/&quot; --nvtx-exclude &quot;B range]&quot; CuNvtx.exe Start-End Ranges Quantifier Description Example , Delimiter between range names A_range,B range B range,A_range,Range C @ Specify domain name. If not mentioned, assuming &lt;default domain&gt; Domain-A@A_range Domain B@B range,Range Z Include kernels wrapped inside start/end range \u2018A_range\u2019 of \u2018Domain-A\u2019: ncu --nvtx --nvtx-include &quot;Domain-A@A_range&quot; CuNvtx.exe Include kernels wrapped inside both start/end ranges, \u2018A_range\u2019 and \u2018B range\u2019 of \u2018&lt;default domain&gt;\u2019: ncu --nvtx --nvtx-include &quot;A_range,B range&quot; CuNvtx.exe Include kernels wrapped inside start/end ranges, \u2018A_range\u2019 or \u2018B range\u2019 of \u2018&lt;default domain&gt;\u2019: ncu --nvtx --nvtx-include &quot;A_range&quot; --nvtx-include &quot;B range&quot; CuNvtx.exe Include all kernels, except those which are wrapped inside start/end range \u2018A_range\u2019 of \u2018&lt;default domain&gt;\u2019: ncu --nvtx --nvtx-exclude &quot;A_range&quot; CuNvtx.exe Include kernels wrapped inside only start/end \u2018B range\u2019 and not \u2018A_range\u2019 of \u2018&lt;default domain&gt;\u2019: ncu --nvtx --nvtx-include &quot;B range&quot;--nvtx-exclude &quot;A_range&quot; CuNvtx.exe Regular Expression Support The configuration syntax for both the types NvtxRangeStart/End and NvtxRangePush/Pop is the same. Additionally, to use regular expressions, follow the following syntax. Provide prefix \u2018regex:\u2019 to treat nvtx config as regular expression. ncu --nvtx --nvtx-include &quot;regex:Domain[A-Z]@Range[0-9]/&quot; CuNvtx.exe The kernels wrapped inside push/pop range with matching regex \u2018Range[0-9]\u2019 of domain with matching regex \u2018Domain[A-Z]\u2019 are profiled. Provide \u2018/\u2019 as a prefix to \u201c[\u201d or \u201c]\u201d only for the range part of the config if \u201c[\u201d or \u201c]\u201d is at the start or at the end of the range part, respectively. This is needed so that NCU can distinguish if \u201c[\u201d or \u201c]\u201d is part of the regex or represents the top/bottom of the stack. ncu --nvtx --nvtx-include &quot;regex:[0-9]domainA@/[0-9]rangeA,RangeC[0-9/]&quot; CuNvtx.exe The kernels wrapped inside start/end ranges with matching regex \u2018[0-9]rangeA\u2019 and \u2018RangeC[0-9]\u2019 of domain with matching regex \u2018[0-9]domainA\u2019 are profiled. If any quantifier is part of the domain/range name, you need to use \u2018\\\\\u2019 or \u2018\\\u2019 as a prefix. For the \u201c$\u201d quantifier, only the \u2018\\\\\u2019 prefix is valid. Additional Information --nvtx-include DomainA@RangeA,DomainB@RangeB //Not a valid config In a single NVTX configuration, multiple ranges with regard to a single domain can be specified. Mentioning ranges from different domains inside a single NVTX config is not supported. --nvtx-include &quot;A_range\\[i\\]&quot; Quantifiers \u2018@\u2019 \u2018,\u2019 \u2018[\u2019 \u2018]\u2019 \u2018/\u2019 \u2018*\u2019 \u2018+\u2019 can be used in range names using prefix \u2018\\\u2019. The kernels wrapped inside \u2018A_range[i]\u2019 of \u2018&lt;default domain&gt;\u2019 in the application are profiled. --nvtx-include &quot;A_range&quot; //Start/End configuration --nvtx-inlcude &quot;A_range/&quot; //Push/Pop configuration --nvtx-inlcude &quot;A_range]&quot; //Push/Pop configuration If the domain/range name contains \u2018\\\u2019, you need to provide \u2018\\\\\\\\\u2019 in the config. Do not use \u2018\\\\\\\\\u2019 before any quantifier. Including/Excluding only single range for Push/Pop configuration without specifying stack frame position \u2018[\u2019 or \u2018]\u2019, use \u2018/\u2019 quantifier at the end. --nvtx-include &quot;A_range/*/B range&quot; The order in which you mention Push/Pop configurations is important. In the above example, \u2018A_range\u2019 should be below \u2018B range\u2019 in the stack of ranges so that the kernel is profiled. NVTX filtering honors cudaProfilerStart() and cudaProfilerStop(). There is no support for ranges with no name.", "keywords": []}, {"id": 140, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#output", "display_name": "Output", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "output", "priority": -1, "content": "API trace and summary NVIDIA Nsight Compute CLI does not support any form of API-usage related output. No API data is captured during profiling. Dependency analysis NVIDIA Nsight Compute CLI does not support any dependency analysis. No API data is captured during profiling. GPU trace NVIDIA Nsight Compute CLI does not support any GPU trace output. Due to kernel replay during profiling, kernel executions are serialized, and start and end timestamps do not necessarily match those during application execution. In addition, no records for memory activities are recorded. Print summary While nvprof has several command line options to specify which summary information to print, NVIDIA Nsight Compute CLI uses further arguments to the --print-summary options. Profiling data can be summarized per-gpu , per-kernel or per-nvtx context. Kernel name demangling Nvprof allows users to decide between name demangling on or off using the --demangling options. NVIDIA Nsight Compute CLI currently always demangles kernel names in the output. In addition, the option --kernel-regex-base can be used to decide which name format should be used when matching kernel names during filtering. Pages Nvprof has no concept of output pages, all data is shown as a list or summarized. NVIDIA Nsight Compute CLI uses pages to define how data should be structured and printed. Those correspond to the report pages used in the GUI variant. The option --page can be used to select which page to show, and details is selected by default. All pages also support printing in CSV format for easier post-processing, using the --csv option.", "keywords": []}, {"id": 141, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#output-pages", "display_name": "Output Pages", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "output-pages", "priority": -1, "content": "The command line profiler supports printing results to the console using various pages. Each page has an equivalent in NVIDIA Nsight Compute\u2019s Profiler Report . In the command line profiler, they are slightly adapted to fit console output. To select a page, use the --page option. By default, the details page is used. Note that if --page is not used but --export is, no results will be printed to the console. Details: This page represents NVIDIA Nsight Compute\u2019s Details page. For every profiled kernel launch, each collected is printed as section as a three-column table, followed by any rule results applied to this section. Rule results not associated with any section are printed after the kernel\u2019s sections. The first section table column shows the metric name. If the metric was given a label in the section, it is used instead. The second column shows the metric unit, if available. The third column shows the unit value. Both metric unit and value are automatically adjusted to the most fitting order of magnitude. By default, only metrics defined in section headers are shown. This can be changed by passing the --details-all option on the command line. Some metrics will show multiple values, separated by \u201c;\u201d, e.g. memory_l2_transactions_global Kbytes 240; 240; 240; 240; 240. Those are instanced metrics, which have one value per represented instance. An instance can be a streaming multiprocessor, an assembly source line, etc. Raw: This page represents NVIDIA Nsight Compute\u2019s Raw page. For every profiled kernel launch, each collected metric is printed as a three-column table. Besides metrics from sections, this includes automatically collected metrics such as device attributes and kernel launch information. The first column shows the metric name. The second and third columns show the metric unit and value, respectively. Both metric unit and value are automatically adjusted to the most fitting order of magnitude. No unresolved regex:, group:, or breakdown: metrics are included.", "keywords": []}, {"id": 142, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#pm-sampling", "display_name": "PM Sampling", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "pm-sampling", "priority": -1, "content": "These options apply to PM sampling . See here for options used in warp state sampling. Option Description Default pm-sampling-interval Set the PM sampling interval in cycles or ns (depending on the architecture), or determine dynamically when 0. 0 (auto) pm-sampling-buffer-size Set the size of the device-sided allocation for PM sampling in bytes, or determine dynamically when 0. 0 (auto) pm-sampling-max-passes Set the maximum number of passes used for PM sampling, or determine dynamically when 0. 0 (auto)", "keywords": []}, {"id": 143, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#profile", "display_name": "Profile", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "profile", "priority": -1, "content": "Option Description Default/Examples devices List the GPU devices to enable profiling on, separated by comma. 1 All devices Examples --devices 0,2 filter-mode Set the filtering mode for kernel launches. Available modes: global: Apply provided launch filters on kernel launches collectively. per-gpu: Apply provided launch filters on kernel launches separately on each device. Effective launch filters for this mode are --launch-count and --launch-skip per-launch-config: Apply kernel filters and launch filters on kernel launches separately for each GPU launch parameter i.e. Grid Size, Block Size and Shared Memory. global kernel-id Set the identifier to use for matching kernels. If the kernel does not match the identifier, it will be ignored for profiling. The identifier must be of the following format: context-id:stream-id:[name-operator:]kernel-name:invocation-nr context-id is the CUDA context ID or regular expression to match the NVTX name. stream-id is the CUDA stream ID or regular expression to match the NVTX name. name-operator is an optional operator to kernel-name . Currently, regex is the only supported operator. kernel-name is the expression to match the kernel name. By default, this is a full, literal match to what is specified by --kernel-name-base . When specifying the optional regex name operator, this is a partial regular expression match to what is specified by --kernel-name-base . invocation-nr is the N\u2019th invocation of this kernel function. Multiple invocations can also be specified using regular expressions. If the context/stream ID is a positive number, it will be strictly matched against the CUDA context/stream ID. Otherwise it will be treated as a regular expression and matched against the context/stream name specified using the NVTX library. 1 Examples --kernel-id ::foo:2 For kernel \u201cfoo\u201d, match the second invocation. --kernel-id :::&quot;.*5|3&quot; For all kernels, match the third invocation, and all for which the invocation number ends in \u201c5\u201d. --kernel-id ::regex:^.*foo$: Match all kernels ending in \u201cfoo\u201d. --kernel-id ::regex:^(?!foo): Match all kernels except those starting with \u201cfoo\u201d. Note that depending on your OS and shell, ` you might need to quote the expression, e.g. using single quotes in Linux bash : --kernel-id ::regex:&#x27;^(?!foo)&#x27;: --kernel-id 1:2::7 Match all seventh kernel invocations on context 1, stream 2. k,kernel-name Set the expression to use when matching kernel names. &lt;kernel name&gt; Set the kernel name for an exact match. regex:&lt;expression&gt; Set the regex to use for matching the kernel name. On shells that recognize regular expression symbols as special characters (e.g. Linux bash), the expression needs to be escaped with quotes, e.g. --kernel-name regex:&quot;.*Foo&quot; . If the kernel name or the provided expression do not match, it will be ignored for profiling. 1 Examples -k foo Match all kernels named exactly \u201cfoo\u201d. -k regex:foo Match all kernels that include the string \u201cfoo\u201d, e.g. \u201cfoo\u201d and \u201cfooBar\u201d. -k regex:&quot;foo|bar&quot; Match all kernels including the strings \u201cfoo\u201d or \u201cbar\u201d, e.g. \u201cfoo\u201d, \u201cfoobar\u201d, \u201c_bar2\u201d. kernel-name-base Set the basis for --kernel-name , and --kernel-id kernel-name. 1 Options are: function: Function name without parameters, templates etc. e.g. dmatrixmul demangled: Demangled function name, including parameters, templates, etc. e.g. dmatrixmul(float*,int,int) mangled: Mangled function name. e.g. _Z10dmatrixmulPfiiS_iiS_ function c,launch-count Limit the number of profiled kernel launches. The count is only incremented for launches that match the kernel filters. 1 s,launch-skip Set the number of kernel launches to skip before starting to profile kernels. The number takes into account only launches that match the kernel filters. 1 0 launch-skip-before-match Set the number of kernel launches to skip before starting to profile. The count is incremented for all launches, regardless of the kernel filters. 1 0 range-filter Filter to profile specified instance(s) of matching NVTX ranges or start/stop ranges created through cu(da)ProfilerStart/Stop APIs. Specify in format [yes/no/on/off]:[start/stop range instance(s)]:[NVTX range instance(s)] [yes/no/on/off] : default is \u2018no/off\u2019. If set to \u2018yes/on\u2019 then NVTX range numbering starts from 1 inside every start/stop range. provide numbers in regex form e.g, [2-4] or 2|3|4 to profile 2nd, 3rd and 4th instance of the matching range. NVTX range numbers will be counted for matching range provided using \u2013nvtx-include. Examples --range-filter :2:3 --nvtx-include A/ Match 2nd start/stop range and also 3rd NVTX push/pop range A in the app. --range-filter yes:2:3 --nvtx-include A/ Match 3rd NVTX push/pop range A from 2nd start/stop range. kill Terminate the target application when the requested \u2013launch-count was profiled. Allowed values: on/off yes/no no replay-mode Mechanism used for replaying a kernel launch multiple times to collect all requested profiling data: kernel: Replay individual kernel launches \u201ctransparently\u201d during the execution of the application. See Kernel Replay for more details. application: Relaunch the entire application multiple times. Requires deterministic program execution. See Application Replay for more details. range: Replay ranges of CUDA API calls and kernel launches \u201ctransparently\u201d during the execution of the application. Ranges must be defined using cu(da)ProfilerStart/Stop API pairs or NVTX expressions . See Range Replay for more details. app-range: Profile ranges without API capture by relaunching the entire application multiple times. Requires deterministic program execution. Ranges must be defined using ``cu(da)ProfilerStart/Stop`` API pairs or `NVTX expressions &lt;index.html#nvtx-filtering&gt;`__. See `Application Range Replay &lt;../ProfilingGuide/index.html#application-range-replay&gt;`__ for more details. kernel app-replay-buffer Application replay buffer location. file: Replay pass data is buffered in a temporary file. The report is created after profiling completed. This mode is more scalable, as the amount of required memory does not scale with the number of profiled kernels. memory: Replay pass data is buffered in memory, and the report is created while profiling. This mode can result in better performance if the filesystem is slow, but the amount of required memory scales with the number of profiled kernels. file app-replay-match Application replay kernel matching strategy. For all options, kernels are matched on a per-process and per-device (GPU) basis. Below options are used to configure the applied strategy in more detail. name: Kernels are matched in the following order: 1. (mangled) name, 2. order of execution grid: Kernels are matched in the following order: 1. (mangled) name, 2. CUDA grid/block size, 3. order of execution all: Kernels are matched in the following order: 1. (mangled) name, 2. CUDA grid/block size, 3. CUDA context ID, 4. CUDA stream ID, 5. order of execution grid app-replay-mode Application replay kernel matching mode: strict: Requires all kernels to match across all replay passes. relaxed: Produces results only for kernels that could be matched across replay passes. strict range-replay-options Range replay options, separated by comma. Below options are supported: enable-greedy-sync Insert ctx sync for applicable deferred APIs during capture. disable-host-restore Disable restoring device-written host allocations. none graph-profiling CUDA graph profiling mode: node Profile individual kernel nodes as regular CUDA kernels. graph Profile entire graphs as one workload (but disable profiling of individual graph kernel nodes). See the Kernel Profiling Guide for more information on this mode. node list-sections List all sections found in the searched section folders and exit. section Add a section identifier to collect in one of the following ways: &lt;section identifier&gt; Set the section identifier for an exact match. regex:&lt;expression&gt; Regular expression allows matching full section identifier. For example, .*Stats , matches all sections ending with \u2018Stats\u2019. On shells that recognize regular expression symbols as special characters (e.g. Linux bash), the expression needs to be escaped with quotes, e.g. --section &quot;regex:.*Stats&quot; . This option is ignored when used with --import and --page raw or --page source . 1 If no --section options are given, the sections associated with the basic set are collected. If no sets are found, all sections are collected. section-folder Add a non-recursive search path for .section files. Section files in this folder will be made available to the --section option. If no --section-folder options are given, the sections folder is added by default. section-folder-recursive Add a recursive search path for .section files. Section files in this folder and all folders below will be made available to the --section option. If no --section-folder options are given, the sections folder is added by default. list-rules List all rules found in the searched section folders and exit. apply-rules Apply active and applicable rules to each profiling result. Use --rule to limit which rules to apply. Allowed values: on/off yes/no yes rule Add a rule identifier to apply. Implies --apply-rules yes . If no --rule options are given, all applicable rules in the sections folder are applied. import-source If available from -lineinfo, correlated CUDA source files are permanently imported into the report. Allowed values: on/off yes/no Use --source-folders option to provide missing source files. no source-folders Add comma separated recursive search paths for missing CUDA source files to import into the report. list-metrics List all metrics collected from active sections. If the list of active sections is restricted using the --section option, only metrics from those sections will be listed. query-metrics Query available metrics for the devices on system. Use --devices and --chips to filter which devices to query. Note that by default, listed metric names need to be appended a valid suffix in order for them to become valid metrics. See --query-metrics-mode for how to get the list of valid suffixes, or check the Kernel Profiling Guide . query-metrics-mode Set the mode for querying metrics. Implies --query-metrics . Available modes: base: Only the base names of the metrics. suffix: Suffix names for the base metrics. This gives the list of all metrics derived from the base metrics. Use --metrics to specify the base metrics to query. all: Full names for all metrics. This gives the list of all base metrics and their suffix metrics. base query-metrics-collection Set which metric collection kind to query. Implies --query-metrics . Available collections: profiling: Query metrics available for profiling. pmsampling: Query metrics available for PM sampling . profiling metrics Specify all metrics to be profiled, separated by comma. If no --section options are given, only the temporary section containing all metrics listed using this option is collected. If --section options are given in addition to --metrics , all metrics from those sections and from --metrics are collected. Names passed to this option support the following prefixes: regex:&lt;expression&gt; expands to all metrics that partially match the expression. Enclose the regular expression in ^\u2026$ to force a full match. group:&lt;name&gt; lists all metrics of the metric group with that name. See section files for valid group names. breakdown:&lt;metric&gt; expands to the input metrics of the high-level throughput metric. pmsampling:&lt;metric&gt; collects the metric using PM sampling. Only single-pass metrics that don\u2019t require SASS-patching (_sass_) are supported. Using this prefix adds a timeline element to the report\u2019s details page. Combining multiple prefixes is not supported. If a metric requires a suffix to be valid, and neither regex: nor group: are used, this option automatically expands the name to all available first-level sub-metrics. When importing a report, :group and :breakdown are not supported. When using regex: , the expression must not include any commas. 1 disable-extra-suffixes Disable the collection of extra suffixes (avg, min, max, sum) for all metrics. Only collect what is explicity specified. list-chips List all supported chips that can be used with --chips . chips Specify the chips for querying metrics, separated by comma. Examples --chips gv100,tu102 profile-from-start Set if application should be profiled from its start. Allowed values: on/off yes/no yes disable-profiler-start-stop Disable profiler start/stop. When enabled, cu(da)ProfilerStart/Stop API calls are ignored. quiet Suppress all profiling output. verbose Make profiler output more verbose. cache-control Control the behavior of the GPU caches during profiling. Allowed values: all: All GPU caches are flushed before each kernel replay iteration during profiling. While metric values in the execution environment of the application might be slightly different without invalidating the caches, this mode offers the most reproducible metric results across the replay passes and also across multiple runs of the target application. none: No GPU caches are flushed during profiling. This can improve performance and better replicates the application behavior if only a single kernel replay pass is necessary for metric collection. However, some metric results will vary depending on prior GPU work, and between replay iterations. This can lead to inconsistent and out-of-bounds metric values. all clock-control Control the behavior of the GPU clocks during profiling. Allowed values: base: GPC and memory clocks are locked to their respective base frequency during profiling. This has no impact on thermal throttling. Note that actual clocks might still vary, depending on the level of driver support for this feature. As an alternative, use nvidia-smi to lock the clocks externally and set this option to none . none: No GPC or memory frequencies are changed during profiling. reset: Reset GPC and memory clocks for all or the selected devices and exit. Use if a previous, killed execution of ncu left the GPU clocks in a locked state. base nvtx-include Adds an include statement to the NVTX filter , which allows selecting kernels to profile based on NVTX ranges. 1 nvtx-exclude Adds an exclude statement to the NVTX filter , which allows selecting kernels to profile based on NVTX ranges. 1 1 This filtering option is available when using --import .", "keywords": []}, {"id": 144, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#profile-import", "display_name": "Profile Import", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "profile-import", "priority": -1, "content": "Using the --import option, saved reports can be imported into the command line profiler. When using this flag, most other options are not available, except for certain result filterting options. They are marked as such in the Profile options table.", "keywords": []}, {"id": 145, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#quickstart", "display_name": "Quickstart", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "quickstart", "priority": -1, "content": "Launch the target application with the command line profiler The command line profiler launches the target application, instruments the target API, and collects profile results for the specified kernels. The CLI executable is called ncu. A shortcut with this name is located in the base directory of the NVIDIA Nsight Compute installation. The actual executable is located in the folder target\\windows-desktop-win7-x64 on Windows or target/linux-desktop-glibc_2_11_3-x64 on Linux. By default, NVIDIA Nsight Compute is installed in /usr/local/cuda-&lt;cuda-version&gt;/NsightCompute-&lt;version&gt; on Linux and in C:\\Program Files\\NVIDIA Corporation\\Nsight Compute &lt;version&gt; on Windows. To collect the basic set for all kernel launches in the target application, launch: $ ncu -o profile CuVectorAddMulti.exe The application runs in instrumented mode and for each kernel launch, a profile result is created. The results are written by default to profile.nsight-cuprof. Each output from the compute profiler starts with ==PROF== The other lines are output from the application itself. For each profiled kernel, the name of the kernel function and the progress of data collection is shown. To collect all requested profile information, it may be required to replay the kernels multiple times. The total number of replay passes per kernel is shown after profiling has completed. [Vector addition of 1144477 elements] ==PROF== Connected to process 5268 Copy input data from the host memory to the CUDA device CUDA kernel launch A with 4471 blocks of 256 threads ==PROF== Profiling &quot;vectorAdd_A&quot; - 0: 0%....50%....100% - 46 passes CUDA kernel launch B with 4471 blocks of 256 threads ==PROF== Profiling &quot;vectorAdd_B&quot; - 1: 0%....50%....100% - 46 passes Copy output data from the CUDA device to the host memory Done ==PROF== Disconnected from process 5268 ==PROF== Report: profile.ncu-rep Customizing data collection Options are available to specify for which kernels data should be collected. -c limits the number of kernel launches collected. -s skips the given number of kernels before data collection starts. -k allows you to filter the kernels by a regex match of their names. --kernel-id allows you to filter kernels by context, stream, name and invocation, similar to nvprof. To limit what should be collected for each kernel launch, specify the exact *.section (files) by their identifier using --section . Each section file defines a set of metrics to be collected, grouped logically to solve a specific performance question. By default, the sections associated with the basic set are collected. Use --list-sets to see the list of currently available sets. Use --list-sections to see the list of currently available sections. The default search directory and location of pre-defined section files is also called sections/ . See the Profiling Guide for more details. Alternatively, you can collect a set of individual metrics using --metrics . The available metrics can be queried using --query-metrics . For an explanation of the naming conventions and structuring of metrics, see Metrics Structure . Most metrics in NVIDIA Nsight Compute are named using a base name and various suffixes, e.g. sm__throughput.avg.pct_of_peak_sustained_elapsed . The base name is sm__throughput and the suffix is avg.pct_of_peak_sustained_elapsed . This is because most metrics follow the same structure and have the same set of suffixes. You need to pass the base or full name to NVIDIA Nsight Compute when selecting a metric for profiling. Use --query-metrics-mode suffix --metrics &lt;metrics list&gt; to see the full names for the chosen metrics. Some additional metrics do not follow this structured naming. They are documented in the Metrics Reference . Changing command line output By default, a temporary file is used to store profiling results, and data is printed to the command line. To permanently store the profiler report, use -o to specify the output filename. Besides storing results in a report file, the command line profiler can print results using different pages. Those pages correspond to the respective pages in the UI\u2019s report. By default, the Details page is printed, if no explicit output file is specified. To select a different page or print in addition to storing in an explicit file, use the --page=&lt;Page&gt; command. Currently, the following pages are supported: details, raw, source . Use --csv to make any output comma separated and easier to process further. See Console Output for further options, e.g. summary views. Open the report in the UI The UI executable is called ncu-ui. A shortcut with this name is located in the base directory of the NVIDIA Nsight Compute installation. The actual executable is located in the folder host\\windows-desktop-win7-x64 on Windows or host/linux-desktop-glibc_2_11_3-x64 on Linux. In the UI window, close the Connection dialog and open the report file through File &gt; Open , by dragging the report file into NVIDIA Nsight Compute. You can also specify the report file as a command line parameter to the executable, i.e. as ncu-ui &lt;MyReport.ncu-rep&gt; . Alternatively, when using NVIDIA Nsight Compute CLI on a platform with host support, --open-in-ui can be used directly with ncu to open a collected report in the user interface. The report opens in a new document window. For more information about the report, see the Profiler Report for collecting profile information through NVIDIA Nsight Compute.", "keywords": []}, {"id": 146, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#response-file", "display_name": "Response File", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "response-file", "priority": -1, "content": "Response files can be specified by adding @FileName to the command line. The file name must immediately follow the @ character. The content of each response file is inserted in place of the corresponding response file option.", "keywords": []}, {"id": 147, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#trace", "display_name": "Trace", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "trace", "priority": -1, "content": "GPU and API trace NVIDIA Nsight Compute CLI does not support any form of tracing GPU or API activities. This functionality is covered by NVIDIA Nsight Systems .", "keywords": []}, {"id": 148, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "page", "name": "NsightComputeCli/index#warp-sampling", "display_name": "Warp Sampling", "type": "section", "display_type": "Page section", "docname": "NsightComputeCli/index", "anchor": "warp-sampling", "priority": -1, "content": "Option Description Default warp-sampling-interval Set the sampling period in the range of [0..31]. The actual frequency is 2 ^ (5 + value) cycles. If set to \u2018auto\u2019, the profiler tries to automatically determine a high sampling frequency without skipping samples or overflowing the output buffer. auto warp-sampling-max-passes Set maximum number of passes used for sampling (see the Kernel Profiling Guide for more details on profiling overhead). 5 warp-sampling-buffer-size Set the size of the device-sided allocation for samples in bytes. 32*1024*1024", "keywords": []}, {"id": 149, "doc_id": 149, "filename": "NsightComputeCli/index.html", "domain_name": "std", "name": "NsightComputeCli/index", "display_name": "Nsight Compute CLI", "type": "doc", "display_type": "Page", "docname": "NsightComputeCli/index", "anchor": "", "priority": -1, "content": "The User Guide for Nsight Compute CLI.", "keywords": []}, {"id": 150, "doc_id": 150, "filename": "NvRulesAPI/index.html", "domain_name": "std", "name": "NvRulesAPI/index", "display_name": "NvRules API", "type": "doc", "display_type": "Page", "docname": "NvRulesAPI/index", "anchor": "", "priority": -1, "content": "Modules Data Structures Namespaces Notices Notices ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND SEPARATELY, \u201cMATERIALS\u201d) ARE BEING PROVIDED \u201cAS IS.\u201d NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the consequences of use of such information or for any infringement of patents or other rights of third parties that may result from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems without express written approval of NVIDIA Corporation. Trademarks NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation in the U.S. and other countries. Other company and product names may be trademarks of the respective companies with which they are associated.", "keywords": []}, {"id": 151, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#analysis", "display_name": "Analysis", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "analysis", "priority": -1, "content": "The roofline chart can be very helpful in guiding performance optimization efforts for a particular kernel. Roofline anaysis. As shown here, the ridge point partitions the roofline chart into two regions. The area shaded in blue under the sloped Memory Bandwidth Boundary is the Memory Bound region, while the area shaded in green under the Peak Performance Boundary is the Compute Bound region. The region in which the achieved value falls, determines the current limiting factor of kernel performance. The distance from the achieved value to the respective roofline boundary (shown in this figure as a dotted white line), represents the opportunity for performance improvement. The closer the achieved value is to the roofline boundary, the more optimal is its performance. An achieved value that lies on the Memory Bandwidth Boundary but is not yet at the height of the ridge point would indicate that any further improvements in overall FLOP/s are only possible if the Arithmetic Intensity is increased at the same time. Using the baseline feature in combination with roofline charts, is a good way to track optimization progress over a number of kernel executions.", "keywords": []}, {"id": 152, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#application-range-replay", "display_name": "Application Range Replay", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "application-range-replay", "priority": -1, "content": "In Application Range Replay , all requested metrics in NVIDIA Nsight Compute are grouped into one or more passes. Similar to Range Replay , metrics are not associated with individual kernels but with the entire selected range. This allows the tool to execute workloads (kernels, CUDA graphs, \u2026) without serialization and thereby supports profiling workloads that must be run concurrently for correctness or performance reasons. In contrast to Range Replay, the range is not explicitly captured and executed directly for each pass, but instead the entire application is re-run multiple times, with one pass collected for each range in every application execution. This has the benefit that no application state must be observed and captured for each range and API calls within the range do not need to be supported explicitly, as correct execution of the range is handled by the application itself. Defining ranges to profile is identical to Range Replay . The CUDA context for which the range should be profiled must be current to the thread defining the start of the range and must be active for the entire range. Execution with Application Range Replay. An range of workloads is replayed by re-running the entire application without modifying interactions or saving and restoring memory.", "keywords": []}, {"id": 153, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#application-replay", "display_name": "Application Replay", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "application-replay", "priority": -1, "content": "In Application Replay , all metrics requested for a specific kernel launch in NVIDIA Nsight Compute are grouped into one or more passes. In contrast to Kernel Replay , the complete application is run multiple times, so that in each run one of those passes can be collected per kernel. For correctly identifying and combining performance counters collected from multiple application replay passes of a single kernel launch into one result, the application needs to be deterministic with respect to its kernel activities and their assignment to GPUs, contexts, streams, and potentially NVTX ranges. Normally, this also implies that the application needs to be deterministic with respect to its overall execution. Application replay has the benefit that memory accessed by the kernel does not need to be saved and restored via the tool, as each kernel launch executes only once during the lifetime of the application process. Besides avoiding memory save-and-restore overhead, application replay also allows to disable Cache Control . This is especially useful if other GPU activities preceding a specific kernel launch are used by the application to set caches to some expected state. In addition, application replay can support profiling kernels that have interdependencies to the host during execution. With kernel replay, this class of kernels typically hangs when being profiled, because the necessary responses from the host are missing in all but the first pass. In contrast, application replay ensures the correct behavior of the program execution in each pass. In contrast to kernel replay, multiple passes collected via application replay imply that all host-side activities of the application are duplicated, too. If the application requires significant time for e.g. setup or file-system access, the overhead will increase accordingly. Regular Application Execution Execution with Application Replay. No memory is saved or restored, but the cost of running the application itself is duplicated. Across application replay passes, NVIDIA Nsight Compute matches metric data for the individual, selected kernel launches. The matching strategy can be selected using the --app-replay-match option. For matching, only kernels within the same process and running on the same device are considered. By default, the grid strategy is used, which matches launches according to their kernel name and grid size. When multiple launches have the same attributes (e.g. name and grid size), they are matched in execution order. Kernel matching during application replay using the grid strategy.", "keywords": []}, {"id": 154, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#cache-control", "display_name": "Cache Control", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "cache-control", "priority": -1, "content": "As explained in Kernel Replay , the kernel might need to be replayed multiple times to collect all requested metrics. While NVIDIA Nsight Compute can save and restore the contents of GPU device memory accessed by the kernel for each pass, it cannot do the same for the contents of HW caches, such as e.g. the L1 and L2 cache. This can have the effect that later replay passes might have better or worse performance than e.g. the first pass, as the caches could already be primed with the data last accessed by the kernel. Similarly, the values of HW performance counters collected by the first pass might depend on which kernels, if any, were executed prior to the measured kernel launch. In order to make HW performance counter value more deterministic, NVIDIA Nsight Compute by default flushes all GPU caches before each replay pass. As a result, in each pass, the kernel will access a clean cache and the behavior will be as if the kernel was executed in complete isolation. This behavior might be undesirable for performance analysis, especially if the measurement focuses on a kernel within a larger application execution, and if the collected data targets cache-centric metrics. In this case, you can use --cache-control none to disable flushing of any HW cache by the tool.", "keywords": []}, {"id": 155, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#clock-control", "display_name": "Clock Control", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "clock-control", "priority": -1, "content": "For many metrics, their value is directly influenced by the current GPU SM and memory clock frequencies. For example, if a kernel instance is profiled that has prior kernel executions in the application, the GPU might already be in a higher clocked state and the measured kernel duration, along with other metrics, will be affected. Likewise, if a kernel instance is the first kernel to be launched in the application, GPU clocks will regularly be lower. In addition, due to kernel replay, the metric value might depend on which replay pass it is collected in, as later passes would result in higher clock states. To mitigate this non-determinism, NVIDIA Nsight Compute attempts to limit GPU clock frequencies to their base value. As a result, metric values are less impacted by the location of the kernel in the application, or by the number of the specific replay pass. However, this behavior might be undesirable for analysis of the kernel, e.g. in cases where an external tool is used to fix clock frequencies, or where the behavior of the kernel within the application is analyzed. To solve this, users can adjust the --clock-control option to specify if any clock frequencies should be fixed by the tool. Factors affecting Clock Control: Note that thermal throttling directed by the driver cannot be controlled by the tool and always overrides any selected options. On mobile targets, e.g. L4T or QNX, there may be variations in profiling results due the inability for the tool to lock clocks. Using Nsight Compute\u2019s --clock-control to set the GPU clocks will fail or will be silently ignored when profiling on a GPU partition. On L4T, you can use the jetson_clocks script to lock the clocks at their maximums during profiling. See the Special Configurations section for MIG and vGPU clock control.", "keywords": []}, {"id": 156, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#compatibility", "display_name": "Compatibility", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "compatibility", "priority": -1, "content": "The set of available replay modes and metrics depends on the type of GPU workload to profile. Workload Type Replay Mode Metric Groups Kernel Application Range Application-Range Hardware Counters / SMSP Unit-Level Source Instruction-Level Source Launch PM Sampling Kernel Yes Yes Yes 2 Yes 2 Yes Yes Yes Yes Yes Range No No Yes Yes Yes No No Some Yes Cmdlist Yes No No Yes 2 Yes Yes Yes Some Yes Graph 1 Yes No No Yes 2 Yes No No Some Yes Footnotes 1 Limitations also apply to kernels profiled outside of graphs. 2 Workload type is supported as part of the profiled range, but not separated in the result. Metric support matches that of Range workloads.", "keywords": []}, {"id": 157, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#defining-ranges", "display_name": "Defining Ranges", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "defining-ranges", "priority": -1, "content": "Range replay requires you to specify the range for profiling in the application. A range is defined by a start and an end marker and includes all CUDA API calls and kernels launched between these markers from any CPU thread. The application is responsible for inserting appropriate synchronization between threads to ensure that the anticipated set of API calls is captured. Range markers can be set using one of the following options: Profiler Start/Stop API Set the start marker using cu(da)ProfilerStart and the end marker using cu(da)ProfilerStop . Note: The CUDA driver API variants of this API require to include cudaProfiler.h . The CUDA runtime variants require to include cuda_profiler_api.h . This is the default for NVIDIA Nsight Compute. NVTX Ranges Define the range using an NVTX Include expression. The range capture starts with the first CUDA API call and ends at the last API call for which the expression is matched, respectively. If multiple expressions are specified, a range is defined as soon as any of them matches. Hence, multiple expressions can be used to conveniently capture and profile multiple ranges for the same application execution. The application must have been instrumented with the NVTX API for any expressions to match. This mode is enabled by passing --nvtx --nvtx-include &lt;expression&gt; [--nvtx-include &lt;expression&gt;] to the NVIDIA Nsight Compute CLI. Ranges must fulfill several requirements: It must be possible to synchronize all active CUDA contexts at the start of the range. Ranges must not include unsupported CUDA API calls. See Supported APIs for the list of currently supported APIs. In addition, there are several recommendations that ranges should comply with to guarantee a correct capture and replay: Set ranges as narrow as possible for capturing a specific set of CUDA kernel lanuches. The more API calls are included, the higher the potentially created overhead from capturing and replaying these API calls. Avoid freeing host allocations written by device memory during the range. This includes both heap as well as stack allocations. NVIDIA Nsight Compute does not intercept creation or destruction of generic host (CPU)-based allocations. However, to guarantee correct program execution after any replay of the range, the tool attempts to restore host allocations that were written from device memory during the capture. If these host addresses are invalid or re-assigned, the program behavior is undefined and potentially unstable. In cases where avoiding freeing such allocations is not possible, you should limit profiling to one range using --launch-count 1 , set the disable-host-restore range replay option and optionally use --kill yes to terminate the process after this range. When defining the range markers using cu(da)ProfilerStart/Stop , prefer the CUDA driver API calls cuProfilerStart/Stop . Internally, NVIDIA Nsight Compute only intercepts the CUDA driver API variants and the CUDA runtime API may not trigger these if no CUDA context is active on the calling thread.", "keywords": []}, {"id": 158, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#device-memory", "display_name": "Device Memory", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "device-memory", "priority": -1, "content": "Example Device Memory table, collected on an RTX 2080 Ti Columns Sectors For each access type, the total number of sectors requested from device memory. % Peak Percentage of peak device memory utilization. Higher values imply a higher utilization of the unit and can show potential bottlenecks, as it does not necessarily indicate efficient usage. Bytes Total number of bytes transferred between L2 Cache and device memory. Throughput Achieved device memory throughput in bytes per second. High values indicate high utilization of the unit. Rows (Access Types) Device memory loads and stores. Total The aggregate for all access types in the same column. Metrics Metrics from this table can be collected on the command line using --set full , --section MemoryWorkloadAnalysis_Tables or --metrics group:memory__dram_table .", "keywords": []}, {"id": 159, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#faq", "display_name": "FAQ", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "faq", "priority": -1, "content": "n/a metric values n/a means that the metric value is \u201cnot available\u201d. The most common reason is that the requested metric does not exist. This can either be the result of a typo, or a missing suffix . Verify the metric name against the output of of the --query-metrics NVIDIA Nsight Compute CLI option. If the metric name was copied (e.g. from an old version of this documentation), make sure that it does not contain zero-width unicode characters. Finally, the metric might simply not exist for the targeted GPU architecture. For example, the IMMA pipeline metric sm__inst_executed_pipe_tensor_op_imma.avg.pct_of_peak_sustained_active is not available on GV100 chips. Metric values outside the expected logical range This includes e.g. percentages exceeding 100% or metrics reporting negative values. For further details, see Range and Precision . ERR_NVGPUCTRPERM - The user does not have permission to access NVIDIA GPU Performance Counters on the target device. By default, NVIDIA drivers require elevated permissions to access GPU performance counters. On mobile platforms, profile as root/using sudo. On other platforms, you can either start profiling as root/using sudo, or by enabling non-admin profiling. For further details, see https://developer.nvidia.com/ERR_NVGPUCTRPERM . On Windows Subsystem for Linux (WSL), access to NVIDIA GPU Performance Counters must be enabled in the NVIDIA Control Panel of the Windows host. Unsupported GPU This indicates that the GPU, on which the current kernel is launched, is not supported. See the Release Notes for a list of devices supported by your version of NVIDIA Nsight Compute. It can also indicate that the current GPU configuration is not supported. For example, NVIDIA Nsight Compute might not be able to profile GPUs in SLI configuration. Connection error detected communicating with target application. The inter-process connection to the profiled application unexpectedly dropped. This happens if the application is killed or signals an exception (e.g. segmentation fault). Failed to connect. The target process may have exited. This occurs if the application does not call any CUDA API calls before it exits. the application terminates early because it was started from the wrong working directory, or with the wrong arguments. In this case, check the details in the Connection Dialog . the application crashes before calling any CUDA API calls. the application launches child processes which use the CUDA. In this case, launch with the --target-processes all option. The profiler returned an error code: (number) For the non-interactive Profile activity, the NVIDIA Nsight Compute CLI is started to generate the report. If either the application exited with a non-zero return code, or the NVIDIA Nsight Compute CLI encountered an error itself, the resulting return code will be shown in this message. For example, if the application hit a segmentation fault (SIGSEGV) on Linux, it will likely return error code 11. All non-zero return codes are considered errors, so the message is also shown if the application exits with return code 1 during regular execution. To debug this issue, it can help to run the data collection directly from the command line using ncu in order to observe the application\u2019s and the profiler\u2019s command line output, e.g. ==ERROR== The application returned an error code (11) Failed to open/create lock file (path). Please check that this process has write permissions on this file. NVIDIA Nsight Compute failed to create or open the file (path) with write permissions. This file is used for inter-process serialization . NVIDIA Nsight Compute does not remove this file after profiling by design. The error occurs if the file was created by a profiling process with permissions that prevent the current process from writing to this file, or if the current user can\u2019t acquire this file for other reasons (e.g. certain Linux kernel security settings). The file is in the current temporary directory, i.e. TMPDIR/nsight-compute-lock . On Windows, TMPDIR is the path returned by the Windows GetTempPath API function. On other platforms, it is the path supplied by the first environment variable in the list TMPDIR, TMP, TEMP, TEMPDIR . If none of these is found, it\u2019s /var/nvidia on QNX and /tmp otherwise. Older versions of NVIDIA Nsight Compute did not set write permissions for all users on this file by default. As a result, running the tool on the same system with a different user might cause this error. This has been resolved since version 2020.2.1. The following workarounds can be used to solve this problem: If it is otherwise ensured that no concurrent NVIDIA Nsight Compute instances are active on the same system, set TMPDIR to a different directory for which the current user has write permissions. Ask the user owning the file, or a system administrator, to remove it or add write permissions for all potential users. On Linux systems setting fs.protected_regular=1 , root or other users may not be able to access this file, even though the owner can, if the sticky bit is set on the temporary directory. Either disable this setting using sudo sysctl fs.protected_regular=0 , use a different temporary directory (see above), or enable access to hardware performance counters for non-root users and profile as the same user who owns the file (see https://developer.nvidia.com/ERR_NVGPUCTRPERM on how to change this setting). Profiling failed because a driver resource was unavailable. The error indicates that a required CUDA driver resource was unavailable during profiling. Most commonly, this means that NVIDIA Nsight Compute could not reserve the driver\u2019s performance monitor, which is necessary for collecting most metrics. This can happen if another application has a concurrent reservation on this resource. Such applications can be e.g. DCGM , a client of CUPTI\u2019s Profiling API , Nsight Graphics , or another instance of NVIDIA Nsight Compute without access to the same file system (see serialization for how this is prevented within the same file system). If you expect the problem to be caused by DCGM, consider using dcgmi profile --pause to stop its monitoring while profiling with NVIDIA Nsight Compute. Could not deploy stock * files to * Could not determine user home directory for section deployment. An error occurred while trying to deploy stock section or rule files. By default, NVIDIA Nsight Compute tries to deploy these to a versioned directory in the user\u2019s home directory (as identified by the HOME environment variable on Linux), e.g. /home/user/Documents/NVIDIA Nsight Compute/&lt;version&gt;/Sections . If the directory cannot be determined (e.g. because this environment variable is not pointing to a valid directory), or if there is an error while deploying the files (e.g. because the current process does not have write permissions on it), warning messages are shown and NVIDIA Nsight Compute falls back to using stock sections and rules from the installation directory. If you are in an environment where you consistently don\u2019t have write access to the user\u2019s home directory, consider populating this directory upfront using ncu --section-folder-restore , or by making /home/user/Documents/NVIDIA Nsight Compute/&lt;version&gt; a symlink to a writable directory. ProxyJump SSH option is not working NVIDIA Nsight Compute does not manage authentication or interactive prompts with the OpenSSH client launched when using the ProxyJump option. Therefore, to connect through an intermediate host for the first time, you will not be able to accept the intermediate host\u2019s key. A simple way to pinpoint the cause of failures in this case is to open a terminal and use the OpenSSH client to connect to the remote target. Once that connection succeeds, NVIDIA Nsight Compute should be able to connect to the target, too. SSH connection fails without trying to connect If the connection fails without trying to connect, there may be a problem with the settings you entered into the connection dialog. Please make sure that the IP/Host Name , User Name and Port fields are correctly set. SSH connections are still not working The problem might come from NVIDIA Nsight Compute\u2019s SSH client not finding a suitable host key algorithm to use which is supported by the remote server. You can force NVIDIA Nsight Compute to use a specific set of host key algorithms by setting the HostKeyAlgorithms option for the problematic host in your SSH configuration file. To list the supported host key algorithms for a remote target, you can use the ssh-keyscan utility which comes with the OpenSSH client. Removing host keys from known hosts files When connecting to a target machine, NVIDIA Nsight Compute tries to verify the target\u2019s host key against the same local database as the OpenSSH client. If NVIDIA Nsight Compute find the host key is incorrect, it will inform you through a failure dialog. If you trust the key hash shown in the dialog, you can remove the previously saved key for that host by manually editing your known hosts database or using the ssh-keygen -R &lt;host&gt; command. Qt initialization failed Failed to load Qt platform plugin See System Requirements for Linux. Notices Notices ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND SEPARATELY, \u201cMATERIALS\u201d) ARE BEING PROVIDED \u201cAS IS.\u201d NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the consequences of use of such information or for any infringement of patents or other rights of third parties that may result from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems without express written approval of NVIDIA Corporation. Trademarks NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation in the U.S. and other countries. Other company and product names may be trademarks of the respective companies with which they are associated.", "keywords": []}, {"id": 160, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#graph-profiling", "display_name": "Graph Profiling", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "graph-profiling", "priority": -1, "content": "In multiple replay modes, NVIDIA Nsight Compute can profile CUDA graphs as single workload entities, rather than profile individual kernel nodes. The behavior can be toggled in the respective command line or UI options. The primary use cases for enabling this mode are: Profile graphs that include mandatory concurrent kernel nodes. Profile graphs that include device-sided graph launches. Profile graph behavior more accurately across multiple kernel node launches, as caches are not purged in between nodes. Note that when graph profiling is enabled, certain metrics such as instruction-level source metrics are not available. This then also applies to kernels profiled outside of graphs.", "keywords": []}, {"id": 161, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#hardware-model", "display_name": "Hardware Model", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "hardware-model", "priority": -1, "content": "Compute Model All NVIDIA GPUs are designed to support a general purpose heterogeneous parallel programming model, commonly known as Compute . This model decouples the GPU from the traditional graphics pipeline and exposes it as a general purpose parallel multi-processor. A heterogeneous computing model implies the existence of a host and a device, which in this case are the CPU and GPU, respectively. At a high level view, the host (CPU) manages resources between itself and the device and will send work off to the device to be executed in parallel. Central to the compute model is the Grid, Block, Thread hierarchy, which defines how compute work is organized on the GPU. The hierarchy from top to bottom is as follows: A Grid is a 1D, 2D or 3D array of thread blocks. A Block is a 1D, 2D or 3D array of threads, also known as a Cooperative Thread Array (CTA) . A Thread is a single thread which runs on one of the GPU\u2019s SM units. The purpose of the Grid, Block, Thread hierarchy is to expose a notion of locality amongst a group of threads, i.e. a Cooperative Thread Array (CTA). In CUDA, CTAs are referred to as Thread Blocks. The architecture can exploit this locality by providing fast shared memory and barriers between the threads within a single CTA. When a Grid is launched, the architecture guarantees that all threads within a CTA will run concurrently on the same SM. Information on the grids and blocks can be found in the Launch Statistics section. The number of CTAs that fit on each SM depends on the physical resources required by the CTA. These resource limiters include the number of threads and registers, shared memory utilization, and hardware barriers. The number CTAs per SM is referred to as the CTA occupancy , and these physical resources limit this occupancy. Details on the kernel\u2019s occupancy are collected by the Occupancy section. Each CTA can be scheduled on any of the available SMs, where there is no guarantee in the order of execution. As such, CTAs must be entirely independent, which means it is not possible for one CTA to wait on the result of another CTA. As CTAs are independent, the host (CPU) can launch a large Grid that will not fit on the hardware all at once, however any GPU will still be able to run it and produce the correct results. CTAs are further divided into groups of 32 threads called Warps . If the number of threads in a CTA is not dividable by 32, the last warp will contain the remaining number of threads. The total number of CTAs that can run concurrently on a given GPU is referred to as Wave . Consequently, the size of a Wave scales with the number of available SMs of a GPU, but also with the occupancy of the kernel. Streaming Multiprocessor The Streaming Multiprocessor (SM) is the core processing unit in the GPU. The SM is optimized for a wide diversity of workloads, including general-purpose computations, deep learning, ray tracing, as well as lighting and shading. The SM is designed to simultaneously execute multiple CTAs. CTAs can be from different grid launches. The SM implements an execution model called Single Instruction Multiple Threads (SIMT), which allows individual threads to have unique control flow while still executing as part of a warp. The Turing SM inherits the Volta SM\u2019s independent thread scheduling model. The SM maintains execution state per thread, including a program counter (PC) and call stack. The independent thread scheduling allows the GPU to yield execution of any thread, either to make better use of execution resources or to allow a thread to wait for data produced by another thread possibly in the same warp. Collecting the Source Counters section allows you to inspect instruction execution and predication details on the Source Page , along with Sampling information. Each SM is partitioned into four processing blocks, called SM sub partitions . The SM sub partitions are the primary processing elements on the SM. Each sub partition contains the following units: Warp Scheduler Register File Execution Units/Pipelines/Cores Integer Execution units Floating Point Execution units Memory Load/Store units Special Function unit Tensor Cores Shared within an SM across the four SM partitions are: Unified L1 Data Cache / Shared Memory Texture units RT Cores, if available A warp is allocated to a sub partition and resides on the sub partition from launch to completion. A warp is referred to as active or resident when it is mapped to a sub partition. A sub partition manages a fixed size pool of warps. On Volta architectures, the size of the pool is 16 warps. On Turing architectures the size of the pool is 8 warps. Active warps can be in eligible state if the warp is ready to issue an instruction. This requires the warp to have a decoded instruction, all input dependencies resolved, and for the function unit to be available. Statistics on active, eligible and issuing warps can be collected with the Scheduler Statistics section. A warp is stalled when the warp is waiting on an instruction fetch, a memory dependency (result of memory instruction), an execution dependency (result of previous instruction), or a synchronization barrier. See Warp Scheduler States for the list of stall reasons that can be profiled and the Warp State Statistics section for a summary of warp states found in the kernel execution. The most important resource under the compiler\u2019s control is the number of registers used by a kernel. Each sub partition has a set of 32-bit registers, which are allocated by the HW in fixed-size chunks. The Launch Statistics section shows the kernel\u2019s register usage. Memory Global memory is a 49-bit virtual address space that is mapped to physical memory on the device, pinned system memory, or peer memory. Global memory is visible to all threads in the GPU. Global memory is accessed through the SM L1 and GPU L2. Local memory is private storage for an executing thread and is not visible outside of that thread. It is intended for thread-local data like thread stacks and register spills. Local memory addresses are translated to global virtual addresses by the the AGU unit. Local memory has the same latency as global memory. One difference between global and local memory is that local memory is arranged such that consecutive 32-bit words are accessed by consecutive thread IDs. Accesses are therefore fully coalesced as long as all threads in a warp access the same relative address (e.g., same index in an array variable, same member in a structure variable, etc.). Shared memory is located on chip, so it has much higher bandwidth and much lower latency than either local or global memory. Shared memory can be shared across a compute CTA. Compute CTAs attempting to share data across threads via shared memory must use synchronization operations (such as __syncthreads()) between stores and loads to ensure data written by any one thread is visible to other threads in the CTA. Similarly, threads that need to share data via global memory must use a more heavyweight global memory barrier. Shared memory has 32 banks that are organized such that successive 32-bit words map to successive banks that can be accessed simultaneously. Any 32-bit memory read or write request made of 32 addresses that fall in 32 distinct memory banks can therefore be serviced simultaneously, yielding an overall bandwidth that is 32 times as high as the bandwidth of a single request. However, if two addresses of a memory request fall in the same memory bank, there is a bank conflict and the access has to be serialized. A shared memory request for a warp does not generate a bank conflict between two threads that access any address within the same 32-bit word (even though the two addresses fall in the same bank). When multiple threads make the same read access, one thread receives the data and then broadcasts it to the other threads. When multiple threads write to the same location, only one thread succeeds in the write; which thread that succeeds is undefined. Detailed memory metrics are collected by the Memory Workload Analysis section. Caches All GPU units communicate to main memory through the Level 2 cache, also known as the L2. The L2 cache sits between on-chip memory clients and the framebuffer. L2 works in physical-address space. In addition to providing caching functionality, L2 also includes hardware to perform compression and global atomics. Model of the L2 cache. The Level 1 Data Cache, or L1, plays a key role in handling global, local, shared, texture, and surface memory reads and writes, as well as reduction and atomic operations. On Volta and Turing architectures there are , there are two L1 caches per TPC, one for each SM. For more information on how L1 fits into the texturing pipeline, see the TEX unit description. Also note that while this section often uses the name \u201cL1\u201d, it should be understood that the L1 data cache, shared data, and the Texture data cache are one and the same. L1 receives requests from two units: the SM and TEX. L1 receives global and local memory requests from the SM and receives texture and surface requests from TEX. These operations access memory in the global memory space, which L1 sends through a secondary cache, the L2. Cache hit and miss rates as well as data transfers are reported in the Memory Workload Analysis section. Model of Load/Store and Texture pipelines for the L1TEX cache. Texture/Surface The TEX unit performs texture fetching and filtering. Beyond plain texture memory access, TEX is responsible for the addressing, LOD, wrap, filter, and format conversion operations necessary to convert a texture read request into a result. TEX receives two general categories of requests from the SM via its input interface: texture requests and surface load/store operations. Texture and surface memory space resides in device memory and are cached in L1. Texture and surface memory are allocated as block-linear surfaces (e.g. 2D, 2D Array, 3D). Such surfaces provide a cache-friendly layout of data such that neighboring points on a 2D surface are also located close to each other in memory, which improves access locality. Surface accesses are bounds-checked by the TEX unit prior to accessing memory, which can be used for implementing different texture wrapping modes. The L1 cache is optimized for 2D spatial locality, so threads of the same warp that read texture or surface addresses that are close together in 2D space will achieve optimal performance. The L1 cache is also designed for streaming fetches with constant latency; a cache hit reduces DRAM bandwidth demand but not fetch latency. Reading device memory through texture or surface memory presents some benefits that can make it an advantageous alternative to reading memory from global or constant memory. Information on texture and surface memory can be found in the Memory Workload Analysis section.", "keywords": []}, {"id": 162, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#introduction", "display_name": "Introduction", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "introduction", "priority": -1, "content": "This guide describes various profiling topics related to NVIDIA Nsight Compute and NVIDIA Nsight Compute CLI. Most of these apply to both the UI and the CLI version of the tool. To use the tools effectively, it is recommended to read this guide, as well as at least the following chapters of the CUDA Programming Guide : Programming Model Hardware Implementation Performance Guidelines Afterwards, it should be enough to read the Quickstart chapter of the NVIDIA Nsight Compute or NVIDIA Nsight Compute CLI documentation, respectively, to start using the tools.", "keywords": []}, {"id": 163, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#kernel-replay", "display_name": "Kernel Replay", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "kernel-replay", "priority": -1, "content": "In Kernel Replay , all metrics requested for a specific kernel instance in NVIDIA Nsight Compute are grouped into one or more passes. For the first pass, all GPU memory that can be accessed by the kernel is saved. After the first pass, the subset of memory that is written by the kernel is determined. Before each pass (except the first one), this subset is restored in its original location to have the kernel access the same memory contents in each replay pass. NVIDIA Nsight Compute attempts to use the fastest available storage location for this save-and-restore strategy. For example, if data is allocated in device memory, and there is still enough device memory available, it is stored there directly. If it runs out of device memory, the data is transferred to the CPU host memory. Likewise, if an allocation originates from CPU host memory, the tool first attempts to save it into the same memory location, if possible. As explained in Overhead , the time needed for this increases the more memory is accessed, especially written, by a kernel. If NVIDIA Nsight Compute determines that only a single replay pass is necessary to collect the requested metrics, no save-and-restore is performed at all to reduce overhead. Regular Application Execution Execution with Kernel Replay. All memory is saved, and memory written by the kernel is restored in-between replay passes.", "keywords": []}, {"id": 164, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#l1-tex-cache", "display_name": "L1/TEX Cache", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "l1-tex-cache", "priority": -1, "content": "Example L1/TEX Cache memory table, collected on an RTX 2080 Ti Model of the Global Load Pipeline for the L1TEX cache on GA100, mapped to the memory table. Columns Instructions For each access type, the total number of all actually executed assembly (SASS) instructions per warp. Predicated-off instructions are not included. E.g., the instruction LDG would be counted towards Global Loads . Requests The total number of all requests to L1, generated for each instruction type. On SM 7.0 (Volta) and newer architectures, each instruction generates exactly one request for LSU traffic (global, local, \u2026). For texture (TEX) traffic, more than one request may be generated. In the example, each of the 65536 global load instructions generates exactly one request. Wavefronts Number of wavefronts required to service the requested memory operation. Wavefronts are serialized and processed on different cycles. Wavefront % Peak Percentage of peak utilization for the units processing wavefronts . High numbers can imply that the processing pipelines are saturated and can become a bottleneck. Sectors The total number of all L1 sectors accesses sent to L1. Each load or store request accesses one or more sectors in the L1 cache. Atomics and reductions are passed through to the L2 cache. Sectors/Req The average ratio of sectors to requests for the L1 cache. For the same number of active threads in a warp, smaller numbers imply a more efficient memory access pattern. For warps with 32 active threads, the optimal ratios per access size are: 32-bit: 4, 64-bit: 8, 128-bit: 16. Smaller ratios indicate some degree of uniformity or overlapped loads within a cache line. Higher numbers can imply uncoalesced memory accesses and will result in increased memory traffic. In the example, the average ratio for global loads is 32 sectors per request, which implies that each thread needs to access a different sector. Ideally, for warps with 32 active threads, with each thread accessing a single, aligned 32-bit value, the ratio would be 4, as every 8 consecutive threads access the same sector. Hit Rate Sector hit rate (percentage of requested sectors that do not miss) in the L1 cache. Sectors that miss need to be requested from L2, thereby contributing to Sector Misses to L2 . Higher hit rates imply better performance due to lower access latencies, as the request can be served by L1 instead of a later stage. Not to be confused with Tag Hit Rate (not shown). Bytes Total number of bytes requested from L1. This is identical to the number of sectors multiplied by 32 byte, since the minimum access size in L1 is one sector. Sector Misses to L2 Total number of sectors that miss in L1 and generate subsequent requests in the L2 Cache . In this example, the 262144 sector misses for global and local loads can be computed as the miss-rate of 12.5%, multiplied by the number of 2097152 sectors. % Peak to L2 Percentage of peak utilization of the L1-to-XBAR interface, used to send L2 cache requests. If this number is high, the workload is likely dominated by scattered {writes, atomics, reductions}, which can increase the latency and cause warp stalls . Returns to SM Number of return packets sent from the L1 cache back to the SM. Larger request access sizes result in higher number of returned packets. % Peak to SM Percentage of peak utilization of the XBAR-to-L1 return path (compare Returns to SM). If this number is high, the workload is likely dominated by scattered reads, thereby causing warp stalls . Improving read-coalescing or the L1 hit rate could reduce this utilization. Rows (Access Types) The various access types, e.g. loads from global memory or reduction operations on surface memory. Loads The aggregate of all load access types in the same column. Stores The aggregate of all store access types in the same column. Total The aggregate of all load and store access types in the same column. Metrics Metrics from this table can be collected on the command line using --set full , --section MemoryWorkloadAnalysis_Tables or --metrics group:memory__first_level_cache_table .", "keywords": []}, {"id": 165, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#l2-cache", "display_name": "L2 Cache", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "l2-cache", "priority": -1, "content": "Example L2 Cache memory table, collected on an RTX 2080 Ti Model of the L2 cache on GA100, mapped to the memory table. Columns Requests For each access type, the total number of requests made to the L2 cache. This correlates with the Sector Misses to L2 for the L1 cache. Each request accesses up to four sectors from a single 128 byte cache line. Sectors For each access type, the total number of sectors requested from the L2 cache. Each request accesses between one and four sectors. Sectors/Req The average ratio of sectors to requests for the L2 cache. For the same number of active threads in a warp, smaller numbers imply a more efficient memory access pattern. Smaller ratios indicate some degree of uniformity or overlapped loads within a cache line. Higher numbers can imply uncoalesced memory accesses and will result in increased memory traffic. % Peak Percentage of peak sustained number of sectors. The \u201cwork package\u201d in the L2 cache is a sector. Higher values imply a higher utilization of the unit and can show potential bottlenecks, as it does not necessarily indicate efficient usage. Hit Rate Hit rate (percentage of requested sectors that do not miss) in the L2 cache. Sectors that miss need to be requested from a later stage, thereby contributing to one of Sector Misses to Device , Sector Misses to System , or Sector Misses to Peer . Higher hit rates imply better performance due to lower access latencies, as the request can be served by L2 instead of a later stage. Bytes Total number of bytes requested from L2. This is identical to the number of sectors multiplied by 32 byte, since the minimum access size in L2 is one sector. Throughput Achieved L2 cache throughput in bytes per second. High values indicate high utilization of the unit. Sector Misses to Device Total number of sectors that miss in L2 and generate subsequent requests in device memory . Sector Misses to System Total number of sectors that miss in L2 and generate subsequent requests in system memory . Sector Misses to Peer Total number of sectors that miss in L2 and generate subsequent requests in peer memory . Rows (Access Types) The various access types, e.g. loads or reductions originating from L1 cache. L1/TEX Total Total for all operations originating from the L1 cache. ECC Total Total for all operations caused by ECC (Error Correction Code). If ECC is enabled, L2 write requests that partially modify a sector cause a corresponding sector load from DRAM. These additional load operations increase the sector misses of L2. L2 Fabric Total Total for all operations across the L2 fabric connecting the two L2 partitions. This row is only shown for kernel launches on CUDA devices with L2 fabric. GPU Total Total for all operations across all clients of the L2 cache. Independent of having them split out separately in this table. Metrics Metrics from this table can be collected on the command line using --set full , --section MemoryWorkloadAnalysis_Tables or --metrics group:memory__l2_cache_table .", "keywords": []}, {"id": 166, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#l2-cache-eviction-policies", "display_name": "L2 Cache Eviction Policies", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "l2-cache-eviction-policies", "priority": -1, "content": "Example L2 Cache Eviction Policies memory table, collected on an A100 GPU Columns First Number of sectors accessed in the L2 cache using the evict_first policy. Data cached with this policy will be first in the eviction priority order and will likely be evicted when cache eviction is required. This policy is suitable for streaming data. Hit Rate Cache hit rate for sector accesses in the L2 cache using the evict_first policy. Last Number of sectors accessed in the L2 cache using the evict_last policy. Data cached with this policy will be last in the eviction priority order and will likely be evicted only after other data with evict_normal or evict_first eviction policy is already evicted. This policy is suitable for data that should remain persistent in cache. Hit Rate Cache hit rate for sector accesses in the L2 cache using the evict_last policy. Normal Number of sectors accessed in the L2 cache using the evict_normal policy. This is the default policy. Hit Rate Cache hit rate for sector accesses in the L2 cache using the evict_normal policy. Normal Demote Number of sectors accessed in the L2 cache using the evict_normal_demote policy. Hit Rate Cache hit rate for sector accesses in the L2 cache using the evict_normal_demote policy. Rows (Access Types) The various access types, e.g. loads or reductions, originating from L1 cache. L1/TEX Total Total for all operations originating from the L1 cache. L2 Fabric Total Total for all operations across the L2 fabric connecting the two L2 partitions. This row is only shown for kernel launches on CUDA devices with L2 fabric. GPU Total Total for all operations across all clients of the L2 cache. Independent of having them split out separately in this table. Metrics Metrics from this table can be collected on the command line using --set full , --section MemoryWorkloadAnalysis_Tables or --metrics group:memory__l2_cache_evict_policy_table . Note that this table is only available on GPUs with GA100 or newer.", "keywords": []}, {"id": 167, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#memory-chart", "display_name": "Memory Chart", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "memory-chart", "priority": -1, "content": "The Memory Chart shows a graphical, logical representation of performance data for memory subunits on and off the GPU. Performance data includes transfer sizes, hit rates, number of instructions or requests, etc.", "keywords": []}, {"id": 168, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#memory-chart-overview", "display_name": "Overview", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "memory-chart-overview", "priority": -1, "content": "Memory chart for an NVIDIA A100 GPU Logical Units (green) Logical units are shown in green color. Kernel: The CUDA kernel executing on the GPU\u2019s Streaming Multiprocessors Global: CUDA global memory Local: CUDA local memory Texture: CUDA texture memory Surface: CUDA surface memory Shared: CUDA shared memory Load Global Store Shared: Instructions loading directly from global into shared memory without intermediate register file access Physical Units (blue) Physical units are shown in blue color. L1/TEX Cache: The L1/Texture cache . The underlying physical memory is split between this cache and the user-managed Shared Memory . Shared Memory: CUDA\u2019s user-managed shared memory . The underlying physical memory is split between this and the L1/TEX Cache . L2 Cache: The L2 cache L2 Compression: The memory compression unit of the L2 Cache System Memory: Off-chip system (CPU) memory Device Memory: On-chip device (GPU) memory of the CUDA device that executes the kernel Peer Memory: On-chip device (GPU) memory of other CUDA devices Depending on the exact GPU architecture, the exact set of shown units can vary, as not all GPUs have all units. Links Links between Kernel and other logical units represent the number of executed instructions ( Inst ) targeting the respective unit. For example, the link between Kernel and Global represents the instructions loading from or storing to the global memory space. Instructions using the NVIDIA A100\u2019s Load Global Store Shared paradigm are shown separately, as their register or cache access behavior can be different from regular global loads or shared stores. Links between logical units and blue, physical units represent the number of requests ( Req ) issued as a result of their respective instructions. For example, the link going from L1/TEX Cache to Global shows the number of requests generated due to global load instructions. The color of each link represents the percentage of peak utilization of the corresponding communication path. The color legend to the right of the chart shows the applied color gradient from unused (0%) to operating at peak performance (100%). Triangle markers to the left of the legend correspond to the links in the chart. The markers offer a more accurate value estimate for the achieved peak performances than the color gradient alone. A unit often shares a common data port for incoming and outgoing traffic. While the links sharing a port might operate well below their individual peak performances, the unit\u2019s data port may have already reached its peak. Port utilization is shown in the chart by colored rectangles inside the units located at the incoming and outgoing links. Ports use the same color gradient as the data links and have also a corresponding marker to the left of the legend. An example of the correlation between the peak values reported in the memory tables and the ports in the memory chart is shown below. Mapping of peak values between memory tables and memory chart Metrics Metrics from this chart can be collected on the command line using --set full , --section MemoryWorkloadAnalysis_Chart or --metrics group:memory__chart .", "keywords": []}, {"id": 169, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#memory-tables", "display_name": "Memory Tables", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "memory-tables", "priority": -1, "content": "The Memory Tables show detailed metrics for the various memory HW units, such as shared memory, the caches, and device memory. For most table entries, you can hover over it to see the underlying metric name and description. Some entries are generated as derivatives from other cells, and do not show a metric name on their own, but the respective calculation. If a certain metric does not contribute to the generic derivative calculation, it is shown as UNUSED in the tooltip. You can hover over row or column headers to see a description of this part of the table.", "keywords": []}, {"id": 170, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#metric-collection", "display_name": "Metric Collection", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "metric-collection", "priority": -1, "content": "Collection of performance metrics is the key feature of NVIDIA Nsight Compute. Since there is a huge list of metrics available, it is often easier to use some of the tool\u2019s pre-defined sets or sections to collect a commonly used subset. Users are free to adjust which metrics are collected for which kernels as needed, but it is important to keep in mind the Overhead associated with data collection.", "keywords": []}, {"id": 171, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#metrics-decoder", "display_name": "Metrics Decoder", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "metrics-decoder", "priority": -1, "content": "The following explains terms found in NVIDIA Nsight Compute metric names, as introduced in Metrics Structure . Units dram Device (main) memory, where the GPUs global and local memory resides. fbpa The FrameBuffer Partition is a memory controller which sits between the level 2 cache (LTC) and the DRAM. The number of FBPAs varies across GPUs. fe The Frontend unit is responsible for the overall flow of workloads sent by the driver. FE also facilitates a number of synchronization operations. gpc The General Processing Cluster contains SM, Texture and L1 in the form of TPC(s). It is replicated several times across a chip. gpu The entire Graphics Processing Unit. gr Graphics Engine is responsible for all 2D and 3D graphics, compute work, and synchronous graphics copying work. idc The InDexed Constant Cache is a subunit of the SM responsible for caching constants that are indexed with a register. l1tex The Level 1 (L1)/Texture Cache is located within the GPC. It can be used as directed-mapped shared memory and/or store global, local and texture data in its cache portion. l1tex__t refers to its Tag stage. l1tex__m refers to its Miss stage. l1tex__d refers to its Data stage. ltc The Level 2 cache. ltcfabric The LTC fabric is the communication fabric for the L2 cache partitions. lts A Level 2 (L2) Cache Slice is a sub-partition of the Level 2 cache. lts__t refers to its Tag stage. lts__m refers to its Miss stage. lts__d refers to its Data stage. mcc Memory controller channel of MSS. The Memory Subsystem (MSS) provides access to local DRAM, SysRAM, and provides a SyncPoint Interface for interprocessor signaling. MCC includes the row sorter/arbiter and DRAM controllers. pm Performance monitor. sm The Streaming Multiprocessor handles execution of a kernel as groups of 32 threads, called warps. Warps are further grouped into cooperative thread arrays (CTA), called blocks in CUDA. All warps of a CTA execute on the same SM. CTAs share various resources across their threads, e.g. the shared memory. smsp Each SM is partitioned into four processing blocks, called SM sub partitions. The SM sub partitions are the primary processing elements on the SM. A sub partition manages a fixed size pool of warps. sys Logical grouping of several units. tpc Thread Processing Clusters are units in the GPC. They contain one or more SM, Texture and L1 units, the Instruction Cache (ICC) and the Indexed Constant Cache (IDC). Subunits aperture_device Memory interface to local device memory (dram) aperture_peer Memory interface to remote device memory aperture_sysmem Memory interface to system memory global Global memory is a 49-bit virtual address space that is mapped to physical memory on the device, pinned system memory, or peer memory. Global memory is visible to all threads in the GPU. Global memory is accessed through the SM L1 and GPU L2. lg Local/Global memory local Local memory is private storage for an executing thread and is not visible outside of that thread. It is intended for thread-local data like thread stacks and register spills. Local memory has the same latency as global memory. lsu Load/Store unit lsuin Load/Store input mio Memory input/output mioc Memory input/output control shared Shared memory is located on chip, so it has much higher bandwidth and much lower latency than either local or global memory. Shared memory can be shared across a compute CTA. surface Surface memory texin TEXIN texture Texture memory xbar The Crossbar (XBAR) is responsible for carrying packets from a given source unit to a specific destination unit. Pipelines adu Address Divergence Unit. The ADU is responsible for address divergence handling for branches/jumps. It also provides support for constant loads and block-level barrier instructions. alu Arithmetic Logic Unit. The ALU is responsible for execution of most bit manipulation and logic instructions. It also executes integer instructions, excluding IMAD and IMUL. On NVIDIA Ampere architecture chips, the ALU pipeline performs fast FP32-to-FP16 conversion. cbu Convergence Barrier Unit. The CBU is responsible for warp-level convergence, barrier, and branch instructions. fma Fused Multiply Add/Accumulate. The FMA pipeline processes most FP32 arithmetic (FADD, FMUL, FMAD). It also performs integer multiplication operations (IMUL, IMAD), as well as integer dot products. On GA10x, FMA is a logical pipeline that indicates peak FP32 and FP16x2 performance. It is composed of the FMAHeavy and FMALite physical pipelines. fmaheavy Fused Multiply Add/Accumulate Heavy. FMAHeavy performs FP32 arithmetic (FADD, FMUL, FMAD), FP16 arithmetic (HADD2, HMUL2, HFMA2), integer multiplication operations (IMUL, IMAD), and integer dot products. fmalite Fused Multiply Add/Accumulate Lite. FMALite performs FP32 arithmetic (FADD, FMUL, FMA) and FP16 arithmetic (HADD2, HMUL2, HFMA2). fp16 Half-precision floating-point. On Volta, Turing and NVIDIA GA100, the FP16 pipeline performs paired FP16 instructions (FP16x2). It also contains a fast FP32-to-FP16 and FP16-to-FP32 converter. Starting with GA10x chips, this functionality is part of the FMA pipeline. fp64 Double-precision floating-point. The implementation of FP64 varies greatly per chip. lsu Load Store Unit. The LSU pipeline issues load, store, atomic, and reduction instructions to the L1TEX unit for global, local, and shared memory. It also issues special register reads (S2R), shuffles, and CTA-level arrive/wait barrier instructions to the L1TEX unit. tex Texture Unit. The SM texture pipeline forwards texture and surface instructions to the L1TEX unit\u2019s TEXIN stage. On GPUs where FP64 or Tensor pipelines are decoupled, the texture pipeline forwards those types of instructions, too. tma Tensor Memory Access Unit. Provides efficient data transfer mechanisms between global and shared memories with the ability to understand and traverse multidimensional data layouts. uniform Uniform Data Path. This scalar unit executes instructions where all threads use the same input and generate the same output. xu Transcendental and Data Type Conversion Unit. The XU pipeline is responsible for special functions such as sin, cos, and reciprocal square root. It is also responsible for int-to-float, and float-to-int type conversions. Quantities instruction An assembly (SASS) instruction. Each executed instruction may generate zero or more requests. request A command into a HW unit to perform some action, e.g. load data from some memory location. Each request accesses one or more sectors. sector Aligned 32 byte-chunk of memory in a cache line or device memory. An L1 or L2 cache line is four sectors, i.e. 128 bytes. Sector accesses are classified as hits if the tag is present and the sector-data is present within the cache line. Tag-misses and tag-hit-data-misses are all classified as misses. tag Unique key to a cache line. A request may look up multiple tags, if the thread addresses do not all fall within a single cache line-aligned region. The L1 and L2 both have 128 byte cache lines. Tag accesses may be classified as hits or misses. wavefront Unique \u201cwork package\u201d generated at the end of the processing stage for requests. All work items of a wavefront are processed in parallel, while work items of different wavefronts are serialized and processed on different cycles. At least one wavefront is generated for each request. A simplified model for the processing in L1TEX for Volta and newer architectures can be described as follows: When an SM executes a global or local memory instruction for a warp, a single request is sent to L1TEX. This request communicates the information for all participating threads of this warp (up to 32). For local and global memory, based on the access pattern and the participating threads, the request requires to access a number of cache lines, and sectors within these cache lines. The L1TEX unit has internally multiple processing stages operating in a pipeline. A wavefront is the maximum unit that can pass through that pipeline stage per cycle. If not all cache lines or sectors can be accessed in a single wavefront, multiple wavefronts are created and sent for processing one by one, i.e. in a serialized manner. Limitations of the work within a wavefront may include the need for a consistent memory space, a maximum number of cache lines that can be accessed, as well as various other reasons. Each wavefront then flows through the L1TEX pipeline and fetches the sectors handled in that wavefront. The given relationships of the three key values in this model are requests:sectors is 1:N, wavefronts:sectors 1:N, and requests:wavefronts is 1:N . A wavefront is described as a (work) package that can be processed at once, i.e. there is a notion of processing one wavefront per cycle in L1TEX. Wavefronts therefore represent the number of cycles required to process the requests, while the number of sectors per request is a property of the access pattern of the memory instruction for all participating threads. For example, it is possible to have a memory instruction that requires 4 sectors per request in 1 wavefront. However, you can also have a memory instruction having 4 sectors per request, but requiring 2 or more wavefronts.", "keywords": []}, {"id": 172, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#metrics-reference", "display_name": "Metrics Reference", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "metrics-reference", "priority": -1, "content": "Overview Most metrics in NVIDIA Nsight Compute can be queried using the ncu command line interface\u2019s \u2013query-metrics option. The following metrics can be collected explicitly, but are not listed by --query-metrics , and do not follow the naming scheme explained in Metrics Structure . They should be used as-is instead. launch__* metrics are collected per kernel launch, and do not require an additional replay pass. They are available as part of the kernel launch parameters (such as grid size, block size, \u2026) or are computed using the CUDA Occupancy Calculator . Launch Metrics launch__block_dim_x Number of threads for the kernel launch in X dimension. launch__block_dim_y Number of threads for the kernel launch in Y dimension. launch__block_dim_z Number of threads for the kernel launch in Z dimension. launch__block_size Total number of threads per block for the kernel launch. launch__cluster_dim_x Number of clusters for the kernel launch in X dimension. launch__cluster_dim_y Number of clusters for the kernel launch in Y dimension. launch__cluster_dim_z Number of clusters for the kernel launch in Z dimension. launch__cluster_max_active Maximum number of clusters that can co-exist on the target device. The runtime environment may affect how the hardware schedules the clusters, so the calculated occupancy is not guaranteed to be achievable. launch__cluster_max_potential_size Largest valid cluster size for the kernel function and launch configuration. launch__cluster_scheduling_policy Cluster scheduling policy. launch__context_id CUDA context id for the kernel launch (id of the primary context if launch was on a green context). launch__device_id CUDA device id for the kernel launch. launch__func_cache_config On devices where the L1 cache and shared memory use the same hardware resources, this is the preferred cache configuration for the CUDA function. The runtime will use the requested configuration if possible, but it is free to choose a different configuration if required. launch__function_pcs Kernel function entry PCs. launch__graph_contains_device_launch Set to 1 if any node in the profiled graph can launch a CUDA device graph. launch__graph_is_device_launchable Set to 1 if the profiled graph was device-launchable. launch__green_context_id CUDA context id of the green context for the kernel launch (if applicable). launch__grid_dim_x Number of blocks for the kernel launch in X dimension. launch__grid_dim_y Number of blocks for the kernel launch in Y dimension. launch__grid_dim_z Number of blocks for the kernel launch in Z dimension. launch__grid_size Total number of blocks for the kernel launch. launch__occupancy_cluster_gpu_pct Overall GPU occupancy due to clusters. launch__occupancy_cluster_pct The ratio of active blocks to the max possible active blocks due to clusters. launch__occupancy_limit_blocks Occupancy limit due to maximum number of blocks managable per SM. launch__occupancy_limit_registers Occupancy limit due to register usage. launch__occupancy_limit_shared_mem Occupancy limit due to shared memory usage. launch__occupancy_limit_warps Occupancy limit due to block size. launch__occupancy_per_block_size Number of active warps for given block size. Instance values map from number of warps (uint64) to value (uint64). launch__occupancy_per_cluster_size Number of active clusters for given cluster size. Instance values map from number of clusters (uint64) to value (uint64). launch__occupancy_per_register_count Number of active warps for given register count. Instance values map from number of warps (uint64) to value (uint64). launch__occupancy_per_shared_mem_size Number of active warps for given shared memory size. Instance values map from number of warps (uint64) to value (uint64). launch__registers_per_thread Number of registers allocated per thread. launch__registers_per_thread_allocated Number of registers allocated per thread. launch__shared_mem_config_size Shared memory size configured for the kernel launch. The size depends on the static, dynamic, and driver shared memory requirements as well as the specified or platform-determined configuration size. launch__shared_mem_per_block_allocated Allocated shared memory size per block. launch__shared_mem_per_block_driver Shared memory size per block, allocated for the CUDA driver. launch__shared_mem_per_block_dynamic Dynamic shared memory size per block, allocated for the kernel. launch__shared_mem_per_block_static Static shared memory size per block, allocated for the kernel. launch__sm_count Number of SMs utilized in the launch. launch__stream_id CUDA stream id for the kernel launch. launch__sub_launch_name Name of each sub-launch for range-like results. launch__thread_count Total number of threads across all blocks for the kernel launch. launch__uses_cdp Set to 1 if any function object in the launched workload can use CUDA dynamic parallelism. launch__uses_green_context Set to 1 if launch was on a green context. launch__waves_per_multiprocessor Number of waves per SM. Partial waves can lead to tail effects where some SMs become idle while others still have pending work to complete. NVLink Topology Metrics nvlink__bandwidth Link bandwidth in bytes/s. Instance values map from logical nvlink ID (uint64) to value (double). nvlink__count_logical Total number of logical NVLinks. nvlink__count_physical Total number of physical links. Instance values map from physical nvlink device ID (uint64) to value (uint64). nvlink__destination_ports Destination port numbers (as strings). Instance values map from logical nvlink ID (uint64) to comma-separated list of port numbers (string). nvlink__dev0Id ID of the first connected device. Instance values map from logical nvlink ID (uint64) to value (uint64). nvlink__dev0type Type of the first connected device. Instance values map from logical nvlink ID (uint64) to values [1=GPU, 2=CPU] (uint64). nvlink__dev1Id ID of the second connected device. Instance values map from logical nvlink ID (uint64) to value (uint64). nvlink__dev1type Type of the second connected device. Instance values map from logical nvlink ID (uint64) to values [1=GPU, 2=CPU] (uint64). nvlink__dev_display_name_all Device display name. Instance values map from logical nvlink device ID (uint64) to value (string). nvlink__enabled_mask NVLink enablement mask, per device. Instance values map from physical nvlink device ID (uint64) to value (uint64). nvlink__is_direct_link Indicates, per NVLink, if the link is direct. Instance values map from logical nvlink ID (uint64) to value (uint64). nvlink__is_nvswitch_connected Indicates if NVSwitch is connected. nvlink__max_count Maximum number of NVLinks. Instance values map from physical nvlink device ID (uint64) to value (uint64). nvlink__peer_access Indicates if peer access is supported. Instance values map from logical nvlink ID (uint64) to value (uint64). nvlink__peer_atomic Indicates if peer atomics are supported. Instance values map from logical nvlink ID (uint64) to value (uint64). nvlink__source_ports Source port numbers (as strings). Instance values map from logical nvlink ID (uint64) to comma-separated list of port numbers (string). nvlink__system_access Indicates if system access is supported. Instance values map from logical nvlink ID (uint64) to value (uint64). nvlink__system_atomic Indicates if system atomics are supported. Instance values map from logical nvlink ID (uint64) to value (uint64). NUMA Topology Metrics numa__cpu_affinity CPU affinity for each device. Instance values map from device ID (uint64) to comma-separated values (string). numa__dev_display_name_all Device display names for all devices. Instance values map from device ID (uint64) to comma-separated values (string). numa__id_cpu NUMA ID of the nearest CPU for each device. Instance values map from device ID (uint64) to comma-separated values (string). numa__id_memory NUMA ID of the nearest memory for each device. Instance values map from device ID (uint64) to comma-separated values (string). Device Attributes device__attribute_* metrics represent CUDA device attributes . Collecting them does not require an additional kernel replay pass, as their value is available from the CUDA driver for each CUDA device. See below for custom device__attribute_* metrics. device__attribute_architecture Chip architecture of the CUDA device. device__attribute_confidential_computing_mode Confidential computing mode. device__attribute_device_index Device index. device__attribute_display_name Product name of the CUDA device. device__attribute_fb_bus_width Frame buffer bus width. device__attribute_fbp_count Total number of frame buffer partitions. device__attribute_implementation Chip implementation of the CUDA device. device__attribute_l2s_count Total number of Level 2 cache slices. device__attribute_limits_max_cta_per_sm Maximum number of CTA per SM. device__attribute_max_gpu_frequency_khz Maximum GPU frequency in kilohertz. device__attribute_max_ipc_per_multiprocessor Maximum number of instructions per clock per multiprocessor. device__attribute_max_ipc_per_scheduler Maximum number of instructions per clock per scheduler. device__attribute_max_mem_frequency_khz Peak memory frequency in kilohertz. device__attribute_max_registers_per_thread Maximum number of registers available per thread. device__attribute_max_warps_per_multiprocessor Maximum number of warps per multiprocessor. device__attribute_max_warps_per_scheduler Maximum number of warps per scheduler. device__attribute_num_l2s_per_fbp Number of Level 2 cache slices per frame buffer partition. device__attribute_num_schedulers_per_multiprocessor Number of schedulers per multiprocessor. device__attribute_num_tex_per_multiprocessor Number of TEX unit per multiprocessor. device__attribute_sass_level SASS level. Warp Stall Reasons Collected using warp scheduler state sampling. They are incremented regardless if the scheduler issued an instruction in the same cycle or not. These metrics have instance values mapping from the function address (uint64) to the number of samples (uint64). smsp__pcsamp_warps_issue_stalled_barrier Warp was stalled waiting for sibling warps at a CTA barrier. A high number of warps waiting at a barrier is commonly caused by diverging code paths before a barrier. This causes some warps to wait a long time until other warps reach the synchronization point. Whenever possible, try to divide up the work into blocks of uniform workloads. If the block size is 512 threads or greater, consider splitting it into smaller groups. This can increase eligible warps without affecting occupancy, unless shared memory becomes a new occupancy limiter. Also, try to identify which barrier instruction causes the most stalls, and optimize the code executed before that synchronization point first. smsp__pcsamp_warps_issue_stalled_branch_resolving Warp was stalled waiting for a branch target to be computed, and the warp program counter to be updated. To reduce the number of stalled cycles, consider using fewer jump/branch operations and reduce control flow divergence, e.g. by reducing or coalescing conditionals in your code. See also the related No Instructions state. smsp__pcsamp_warps_issue_stalled_dispatch_stall Warp was stalled waiting on a dispatch stall. A warp stalled during dispatch has an instruction ready to issue, but the dispatcher holds back issuing the warp due to other conflicts or events. smsp__pcsamp_warps_issue_stalled_drain Warp was stalled after EXIT waiting for all outstanding memory operations to complete so that warp\u2019s resources can be freed. A high number of stalls due to draining warps typically occurs when a lot of data is written to memory towards the end of a kernel. Make sure the memory access patterns of these store operations are optimal for the target architecture and consider parallelized data reduction, if applicable. smsp__pcsamp_warps_issue_stalled_imc_miss Warp was stalled waiting for an immediate constant cache (IMC) miss. A read from constant memory costs one memory read from device memory only on a cache miss; otherwise, it just costs one read from the constant cache. Immediate constants are encoded into the SASS instruction as \u2018c[bank][offset]\u2019. Accesses to different addresses by threads within a warp are serialized, thus the cost scales linearly with the number of unique addresses read by all threads within a warp. As such, the constant cache is best when threads in the same warp access only a few distinct locations. If all threads of a warp access the same location, then constant memory can be as fast as a register access. smsp__pcsamp_warps_issue_stalled_lg_throttle Warp was stalled waiting for the L1 instruction queue for local and global (LG) memory operations to be not full. Typically, this stall occurs only when executing local or global memory instructions extremely frequently. Avoid redundant global memory accesses. Try to avoid using thread-local memory by checking if dynamically indexed arrays are declared in local scope, of if the kernel has excessive register pressure causing by spills. If applicable, consider combining multiple lower-width memory operations into fewer wider memory operations and try interleaving memory operations and math instructions. smsp__pcsamp_warps_issue_stalled_long_scoreboard Warp was stalled waiting for a scoreboard dependency on a L1TEX (local, global, surface, texture) operation. Find the instruction producing the data being waited upon to identify the culprit. To reduce the number of cycles waiting on L1TEX data accesses verify the memory access patterns are optimal for the target architecture, attempt to increase cache hit rates by increasing data locality (coalescing), or by changing the cache configuration. Consider moving frequently used data to shared memory. smsp__pcsamp_warps_issue_stalled_math_pipe_throttle Warp was stalled waiting for the execution pipe to be available. This stall occurs when all active warps execute their next instruction on a specific, oversubscribed math pipeline. Try to increase the number of active warps to hide the existent latency or try changing the instruction mix to utilize all available pipelines in a more balanced way. smsp__pcsamp_warps_issue_stalled_membar Warp was stalled waiting on a memory barrier. Avoid executing any unnecessary memory barriers and assure that any outstanding memory operations are fully optimized for the target architecture. smsp__pcsamp_warps_issue_stalled_mio_throttle Warp was stalled waiting for the MIO (memory input/output) instruction queue to be not full. This stall reason is high in cases of extreme utilization of the MIO pipelines, which include special math instructions, dynamic branches, as well as shared memory instructions. When caused by shared memory accesses, trying to use fewer but wider loads can reduce pipeline pressure. smsp__pcsamp_warps_issue_stalled_misc Warp was stalled for a miscellaneous hardware reason. smsp__pcsamp_warps_issue_stalled_no_instructions Warp was stalled waiting to be selected to fetch an instruction or waiting on an instruction cache miss. A high number of warps not having an instruction fetched is typical for very short kernels with less than one full wave of work in the grid. Excessively jumping across large blocks of assembly code can also lead to more warps stalled for this reason, if this causes misses in the instruction cache. See also the related Branch Resolving state. smsp__pcsamp_warps_issue_stalled_not_selected Warp was stalled waiting for the micro scheduler to select the warp to issue. Not selected warps are eligible warps that were not picked by the scheduler to issue that cycle as another warp was selected. A high number of not selected warps typically means you have sufficient warps to cover warp latencies and you may consider reducing the number of active warps to possibly increase cache coherence and data locality. smsp__pcsamp_warps_issue_stalled_selected Warp was selected by the micro scheduler and issued an instruction. smsp__pcsamp_warps_issue_stalled_short_scoreboard Warp was stalled waiting for a scoreboard dependency on a MIO (memory input/output) operation (not to L1TEX). The primary reason for a high number of stalls due to short scoreboards is typically memory operations to shared memory. Other reasons include frequent execution of special math instructions (e.g. MUFU) or dynamic branching (e.g. BRX, JMX). Consult the Memory Workload Analysis section to verify if there are shared memory operations and reduce bank conflicts, if reported. Assigning frequently accessed values to variables can assist the compiler in using low-latency registers instead of direct memory accesses. smsp__pcsamp_warps_issue_stalled_sleeping Warp was stalled due to all threads in the warp being in the blocked, yielded, or sleep state. Reduce the number of executed NANOSLEEP instructions, lower the specified time delay, and attempt to group threads in a way that multiple threads in a warp sleep at the same time. smsp__pcsamp_warps_issue_stalled_tex_throttle Warp was stalled waiting for the L1 instruction queue for texture operations to be not full. This stall reason is high in cases of extreme utilization of the L1TEX pipeline. Try issuing fewer texture fetches, surface loads, surface stores, or decoupled math operations. If applicable, consider combining multiple lower-width memory operations into fewer wider memory operations and try interleaving memory operations and math instructions. Consider converting texture lookups or surface loads into global memory lookups. Texture can accept four threads\u2019 requests per cycle, whereas global accepts 32 threads. smsp__pcsamp_warps_issue_stalled_wait Warp was stalled waiting on a fixed latency execution dependency. Typically, this stall reason should be very low and only shows up as a top contributor in already highly optimized kernels. Try to hide the corresponding instruction latencies by increasing the number of active warps, restructuring the code or unrolling loops. Furthermore, consider switching to lower-latency instructions, e.g. by making use of fast math compiler options. Warp Stall Reasons (Not Issued) Collected using warp scheduler state sampling. They are incremented only on cycles in which the warp scheduler issued no instruction. These metrics have instance values mapping from the function address (uint64) to the number of samples (uint64). smsp__pcsamp_warps_issue_stalled_barrier_not_issued Warp was stalled waiting for sibling warps at a CTA barrier. A high number of warps waiting at a barrier is commonly caused by diverging code paths before a barrier. This causes some warps to wait a long time until other warps reach the synchronization point. Whenever possible, try to divide up the work into blocks of uniform workloads. If the block size is 512 threads or greater, consider splitting it into smaller groups. This can increase eligible warps without affecting occupancy, unless shared memory becomes a new occupancy limiter. Also, try to identify which barrier instruction causes the most stalls, and optimize the code executed before that synchronization point first. smsp__pcsamp_warps_issue_stalled_branch_resolving_not_issued Warp was stalled waiting for a branch target to be computed, and the warp program counter to be updated. To reduce the number of stalled cycles, consider using fewer jump/branch operations and reduce control flow divergence, e.g. by reducing or coalescing conditionals in your code. See also the related No Instructions state. smsp__pcsamp_warps_issue_stalled_dispatch_stall_not_issued Warp was stalled waiting on a dispatch stall. A warp stalled during dispatch has an instruction ready to issue, but the dispatcher holds back issuing the warp due to other conflicts or events. smsp__pcsamp_warps_issue_stalled_drain_not_issued Warp was stalled after EXIT waiting for all memory operations to complete so that warp resources can be freed. A high number of stalls due to draining warps typically occurs when a lot of data is written to memory towards the end of a kernel. Make sure the memory access patterns of these store operations are optimal for the target architecture and consider parallelized data reduction, if applicable. smsp__pcsamp_warps_issue_stalled_imc_miss_not_issued Warp was stalled waiting for an immediate constant cache (IMC) miss. A read from constant memory costs one memory read from device memory only on a cache miss; otherwise, it just costs one read from the constant cache. Accesses to different addresses by threads within a warp are serialized, thus the cost scales linearly with the number of unique addresses read by all threads within a warp. As such, the constant cache is best when threads in the same warp access only a few distinct locations. If all threads of a warp access the same location, then constant memory can be as fast as a register access. smsp__pcsamp_warps_issue_stalled_lg_throttle_not_issued Warp was stalled waiting for the L1 instruction queue for local and global (LG) memory operations to be not full. Typically, this stall occurs only when executing local or global memory instructions extremely frequently. Avoid redundant global memory accesses. Try to avoid using thread-local memory by checking if dynamically indexed arrays are declared in local scope, of if the kernel has excessive register pressure causing by spills. If applicable, consider combining multiple lower-width memory operations into fewer wider memory operations and try interleaving memory operations and math instructions. smsp__pcsamp_warps_issue_stalled_long_scoreboard_not_issued Warp was stalled waiting for a scoreboard dependency on a L1TEX (local, global, surface, texture) operation. Find the instruction producing the data being waited upon to identify the culprit. To reduce the number of cycles waiting on L1TEX data accesses verify the memory access patterns are optimal for the target architecture, attempt to increase cache hit rates by increasing data locality (coalescing), or by changing the cache configuration. Consider moving frequently used data to shared memory. smsp__pcsamp_warps_issue_stalled_math_pipe_throttle_not_issued Warp was stalled waiting for the execution pipe to be available. This stall occurs when all active warps execute their next instruction on a specific, oversubscribed math pipeline. Try to increase the number of active warps to hide the existent latency or try changing the instruction mix to utilize all available pipelines in a more balanced way. smsp__pcsamp_warps_issue_stalled_membar_not_issued Warp was stalled waiting on a memory barrier. Avoid executing any unnecessary memory barriers and assure that any outstanding memory operations are fully optimized for the target architecture. smsp__pcsamp_warps_issue_stalled_mio_throttle_not_issued Warp was stalled waiting for the MIO (memory input/output) instruction queue to be not full. This stall reason is high in cases of extreme utilization of the MIO pipelines, which include special math instructions, dynamic branches, as well as shared memory instructions. When caused by shared memory accesses, trying to use fewer but wider loads can reduce pipeline pressure. smsp__pcsamp_warps_issue_stalled_misc_not_issued Warp was stalled for a miscellaneous hardware reason. smsp__pcsamp_warps_issue_stalled_no_instructions_not_issued Warp was stalled waiting to be selected to fetch an instruction or waiting on an instruction cache miss. A high number of warps not having an instruction fetched is typical for very short kernels with less than one full wave of work in the grid. Excessively jumping across large blocks of assembly code can also lead to more warps stalled for this reason, if this causes misses in the instruction cache. See also the related Branch Resolving state. smsp__pcsamp_warps_issue_stalled_not_selected_not_issued Warp was stalled waiting for the micro scheduler to select the warp to issue. Not selected warps are eligible warps that were not picked by the scheduler to issue that cycle as another warp was selected. A high number of not selected warps typically means you have sufficient warps to cover warp latencies and you may consider reducing the number of active warps to possibly increase cache coherence and data locality. smsp__pcsamp_warps_issue_stalled_selected_not_issued Warp was selected by the micro scheduler and issued an instruction. smsp__pcsamp_warps_issue_stalled_short_scoreboard_not_issued Warp was stalled waiting for a scoreboard dependency on a MIO (memory input/output) operation (not to L1TEX). The primary reason for a high number of stalls due to short scoreboards is typically memory operations to shared memory. Other reasons include frequent execution of special math instructions (e.g. MUFU) or dynamic branching (e.g. BRX, JMX). Consult the Memory Workload Analysis section to verify if there are shared memory operations and reduce bank conflicts, if reported. Assigning frequently accessed values to variables can assist the compiler in using low-latency registers instead of direct memory accesses. smsp__pcsamp_warps_issue_stalled_sleeping_not_issued Warp was stalled due to all threads in the warp being in the blocked, yielded, or sleep state. Reduce the number of executed NANOSLEEP instructions, lower the specified time delay, and attempt to group threads in a way that multiple threads in a warp sleep at the same time. smsp__pcsamp_warps_issue_stalled_tex_throttle_not_issued Warp was stalled waiting for the L1 instruction queue for texture operations to be not full. This stall reason is high in cases of extreme utilization of the L1TEX pipeline. Try issuing fewer texture fetches, surface loads, surface stores, or decoupled math operations. If applicable, consider combining multiple lower-width memory operations into fewer wider memory operations and try interleaving memory operations and math instructions. Consider converting texture lookups or surface loads into global memory lookups. Texture can accept four threads\u2019 requests per cycle, whereas global accepts 32 threads. smsp__pcsamp_warps_issue_stalled_wait_not_issued Warp was stalled waiting on a fixed latency execution dependency. Typically, this stall reason should be very low and only shows up as a top contributor in already highly optimized kernels. Try to hide the corresponding instruction latencies by increasing the number of active warps, restructuring the code or unrolling loops. Furthermore, consider switching to lower-latency instructions, e.g. by making use of fast math compiler options. Source Metrics Most are collected using SASS-patching. These metrics have instance values mapping from function address (uint64) to associated values (uint64). Metrics memory_[access_]type map to string values. branch_inst_executed Number of unique branch targets assigned to the instruction, including both divergent and uniform branches. derived__avg_thread_executed Average number of thread-level executed instructions per warp (regardless of their predicate). Computed as: thread_inst_executed / inst_executed derived__avg_thread_executed_true Average number of predicated-on thread-level executed instructions per warp. Computed as: thread_inst_executed_true / inst_executed derived__memory_l1_conflicts_shared_nway Average N-way conflict in L1 per shared memory instruction. A 1-way access has no conflicts and resolves in a single pass. Computed as: memory_l1_wavefronts_shared / inst_executed derived__memory_l1_wavefronts_shared_excessive Excessive number of wavefronts in L1 from shared memory instructions, because not all not predicated-off threads performed the operation. derived__memory_l2_theoretical_sectors_global_excessive Excessive theoretical number of sectors requested in L2 from global memory instructions, because not all not predicated-off threads performed the operation. inst_executed Number of warp-level executed instructions, ignoring instruction predicates. Warp-level means the values increased by one per individual warp executing the instruction, independent of the number of participating threads within each warp. memory_access_size_type The size of the memory access, in bits. memory_access_type The type of memory access (e.g. load or store). memory_l1_tag_requests_global Number of L1 tag requests generated by global memory instructions. memory_l1_wavefronts_shared Number of wavefronts in L1 from shared memory instructions. memory_l1_wavefronts_shared_ideal Ideal number of wavefronts in L1 from shared memory instructions, assuming each not predicated-off thread performed the operation. memory_l2_theoretical_sectors_global Theoretical number of sectors requested in L2 from global memory instructions. memory_l2_theoretical_sectors_global_ideal Ideal number of sectors requested in L2 from global memory instructions, assuming each not predicated-off thread performed the operation. memory_l2_theoretical_sectors_local Theoretical number of sectors requested in L2 from local memory instructions. memory_type The accessed address space (global/local/shared). smsp__branch_targets_threads_divergent Number of divergent branch targets, including fallthrough. Incremented only when there are two or more active threads with divergent targets. smsp__branch_targets_threads_uniform Number of uniform branch execution, including fallthrough, where all active threads selected the same branch target. smsp__pcsamp_sample_count Number of collected warp state samples per program counter. This metric is collected using warp sampling. thread_inst_executed Number of thread-level executed instructions, regardless of predicate presence or evaluation. thread_inst_executed_true Number of thread-level executed instructions, where the instruction predicate evaluated to true, or no predicate was given. L2 Cache Eviction Metrics smsp__sass_inst_executed_memdesc_explicit_evict_type L2 cache eviction policy types. smsp__sass_inst_executed_memdesc_explicit_hitprop_evict_first Number of warp-level executed instructions with L2 cache eviction hit property \u2018first\u2019. smsp__sass_inst_executed_memdesc_explicit_hitprop_evict_last Number of warp-level executed instructions with L2 cache eviction hit property \u2018last\u2019. smsp__sass_inst_executed_memdesc_explicit_hitprop_evict_normal Number of warp-level executed instructions with L2 cache eviction hit property \u2018normal\u2019. smsp__sass_inst_executed_memdesc_explicit_hitprop_evict_normal_demote Number of warp-level executed instructions with L2 cache eviction hit property \u2018normal demote\u2019. smsp__sass_inst_executed_memdesc_explicit_missprop_evict_first Number of warp-level executed instructions with L2 cache eviction miss property \u2018first\u2019. smsp__sass_inst_executed_memdesc_explicit_missprop_evict_normal Number of warp-level executed instructions with L2 cache eviction miss property \u2018normal\u2019. Instructions Per Opcode Metrics Collected using SASS-patching. These metrics have instance values mapping from the SASS opcode (string) to the number of executions (uint64). sass__inst_executed_per_opcode Number of warp-level executed instructions, instanced by basic SASS opcode. sass__inst_executed_per_opcode_with_modifier_all Number of warp-level executed instructions, instanced by all SASS opcode modifiers. sass__inst_executed_per_opcode_with_modifier_selective Number of warp-level executed instructions, instanced by selective SASS opcode modifiers. sass__thread_inst_executed_true_per_opcode Number of thread-level executed instructions, instanced by basic SASS opcode. sass__thread_inst_executed_true_per_opcode_with_modifier_all Number of thread-level executed instructions, instanced by all SASS opcode modifiers. sass__thread_inst_executed_true_per_opcode_with_modifier_selective Number of thread-level executed instructions, instanced by selective SASS opcode modifiers. Metric Groups group:memory__chart Group of metrics for the workload analysis chart. group:memory__dram_table Group of metrics for the device memory workload analysis table. group:memory__first_level_cache_table Group of metrics for the L1/TEX cache workload analysis table. group:memory__l2_cache_evict_policy_table Group of metrics for the L2 cache eviction policies table. group:memory__l2_cache_table Group of metrics for the L2 cache workload analysis table. group:memory__shared_table Group of metrics for the shared memory workload analysis table. group:smsp__pcsamp_warp_stall_reasons Group of metrics for the number of samples from the warp sampler per program location. group:smsp__pcsamp_warp_stall_reasons_not_issued Group of metrics for the number of samples from the warp sampler per program location on cycles the warp scheduler issued no instructions. Profiler Metrics Metrics generated by the tool itself to inform about statistics or problems during profiling. profiler__perfworks_session_reuse Indicates if the PerfWorks session was reused between results. profiler__pmsampler_buffer_size_bytes Buffer size in bytes per pass group used for PM sampling. Instance values map from pass group to bytes. profiler__pmsampler_ctxsw_* GPU context switch states over time during PM sampling for a specific pass group. Instance values map from timestamp to context state (1 - enabled, 0 - disabled). profiler__pmsampler_dropped_samples Number of samples dropped per pass group during PM sampling due to insufficient buffer size. Instance values map from pass group to samples. profiler__pmsampler_interval_cycles Sampling interval in cycles per pass group used for PM sampling, or zero if time-based interval was used. Instance values map from pass group to cycles. profiler__pmsampler_interval_time Sampling interval in nanoseconds per pass group used for PM sampling, or zero if cycle-based interval was used. Instance values map from pass group to nanoseconds. profiler__pmsampler_merged_samples Number of samples merged per pass group during PM sampling due to HW back pressure while streaming results. Instance values map from pass group to samples. profiler__pmsampler_pass_groups Number of pass groups used for PM sampling. Instance values map from pass group to comma-separated list of metrics collected in this pass. profiler__replayer_passes Number of passes the result was replayed for profiling across all experiments. profiler__replayer_passes_type_warmup Number of passes the result was replayed to warmup the GPU for profiling. smsp__pcsamp_aggregated_passes Number of passes required for statistical warp stall sampling. smsp__pcsamp_buffer_size_bytes Buffer size in bytes for statistical warp stall sampling. smsp__pcsamp_dropped_bytes Bytes dropped during statistical warp stall sampling due to insufficient buffer size. smsp__pcsamp_interval Interval number for warp stall sampling. smsp__pcsamp_interval_cycles Interval cycles for statistical warp stall sampling.", "keywords": []}, {"id": 173, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#metrics-structure", "display_name": "Metrics Structure", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "metrics-structure", "priority": -1, "content": "Metrics Overview NVIDIA Nsight Compute uses an advanced metrics calculation system, designed to help you determine what happened (counters and metrics), and how close the program reached to peak GPU performance (throughputs as a percentage). Every counter has associated peak rates in the database, to allow computing its throughput as a percentage. Throughput metrics return the maximum percentage value of their constituent counters. These constituents have been carefully selected to represent the sections of the GPU pipeline that govern peak performance. While all counters can be converted to a %-of-peak, not all counters are suitable for peak-performance analysis; examples of unsuitable counters include qualified subsets of activity, and workload residency counters. Using throughput metrics ensures meaningful and actionable analysis. Two types of peak rates are available for every counter: burst and sustained. Burst rate is the maximum rate reportable in a single clock cycle. Sustained rate is the maximum rate achievable over an infinitely long measurement period, for \u201ctypical\u201d operations. For many counters, burst equals sustained. Since the burst rate cannot be exceeded, percentages of burst rate will always be less than 100%. Percentages of sustained rate can occasionally exceed 100% in edge cases. Metrics Entities While in NVIDIA Nsight Compute, all performance counters are named metrics , they can be split further into groups with specific properties. For metrics collected via the PerfWorks measurement library, the following entities exist: Counters may be either a raw counter from the GPU, or a calculated counter value. Every counter has four sub-metrics under it, which are also called roll-ups : .sum The sum of counter values across all unit instances. .avg The average counter value across all unit instances. .min The minimum counter value across all unit instances. .max The maximum counter value across all unit instances. Counter roll-ups have the following calculated quantities as built-in sub-metrics: .peak_sustained the peak sustained rate .peak_sustained_active the peak sustained rate during unit active cycles .peak_sustained_active.per_second the peak sustained rate during unit active cycles, per second * .peak_sustained_elapsed the peak sustained rate during unit elapsed cycles .peak_sustained_elapsed.per_second the peak sustained rate during unit elapsed cycles, per second * .per_second the number of operations per second .per_cycle_active the number of operations per unit active cycle .per_cycle_elapsed the number of operations per unit elapsed cycle .pct_of_peak_sustained_active % of peak sustained rate achieved during unit active cycles .pct_of_peak_sustained_elapsed % of peak sustained rate achieved during unit elapsed cycles * sub-metrics added in NVIDIA Nsight Compute 2022.2.0. Example: ncu --query-metrics-mode suffix --metrics sm__inst_executed --chip ga100 Ratios have three sub-metrics: .pct The value expressed as a percentage. .ratio The value expressed as a ratio. .max_rate The ratio\u2019s maximum value. Example: ncu --query-metrics-mode suffix --metrics smsp__average_warp_latency --chip ga100 Throughputs indicate how close a portion of the GPU reached to peak rate. Every throughput has the following sub-metrics: .pct_of_peak_sustained_active % of peak sustained rate achieved during unit active cycles .pct_of_peak_sustained_elapsed % of peak sustained rate achieved during unit elapsed cycles Example: ncu --query-metrics-mode suffix --metrics sm__throughput --chip ga100 Throughputs have a breakdown of underlying metrics from which the throughput value is computed. You can collect breakdown:&lt;throughput-metric&gt; to collect a throughput\u2019s breakdown metrics. Deprecated counter sub-metrics: The following sub-metrics were removed, due to not being useful for performance optimization: .peak_burst the peak burst rate .pct_of_peak_burst_active % of peak burst rate achieved during unit active cycles .pct_of_peak_burst_elapsed % of peak burst rate achieved during unit elapsed cycles .pct_of_peak_burst_region % of peak burst rate achieved over a user-specified \u201crange\u201d .pct_of_peak_burst_frame % of peak burst rate achieved over a user-specified \u201cframe\u201d .pct_of_peak_sustained_region % of peak sustained rate achieved over a user-specified \u201crange\u201d time .pct_of_peak_sustained_frame % of peak sustained rate achieved over a user-specified \u201cframe\u201d time .per_cycle_in_region the number of operations per user-specified \u201crange\u201d cycle .per_cycle_in_frame the number of operations per user-specified \u201cframe\u201d cycle .peak_sustained_region the peak sustained rate over a user-specified \u201crange\u201d .peak_sustained_region.per_second the peak sustained rate over a user-specified \u201crange\u201d, per second * .peak_sustained_frame the peak sustained rate over a user-specified \u201cframe\u201d .peak_sustained_frame.per_second the peak sustained rate over a user-specified \u201cframe\u201d, per second * Deprecated throughput sub-metrics: The following sub-metrics were removed, due to not being useful for performance optimization: .pct_of_peak_burst_active % of peak burst rate achieved during unit active cycles .pct_of_peak_burst_elapsed % of peak burst rate achieved during unit elapsed cycles .pct_of_peak_burst_region % of peak burst rate achieved over a user-specified \u201crange\u201d time .pct_of_peak_burst_frame % of peak burst rate achieved over a user-specified \u201cframe\u201d time .pct_of_peak_sustained_region % of peak sustained rate achieved over a user-specified \u201crange\u201d .pct_of_peak_sustained_frame % of peak sustained rate achieved over a user-specified \u201cframe\u201d In addition to PerfWorks metrics, NVIDIA Nsight Compute uses several other measurement providers that each generate their own metrics. These are explained in the Metrics Reference . Metrics Examples ## non-metric names -- *not* directly evaluable sm__inst_executed # counter smsp__average_warp_latency # ratio sm__throughput # throughput ## a counter&#x27;s four first-level sub-metrics -- all evaluable sm__inst_executed.sum sm__inst_executed.avg sm__inst_executed.min sm__inst_executed.max ## all names below are metrics -- all evaluable l1tex__data_bank_conflicts_pipe_lsu.sum l1tex__data_bank_conflicts_pipe_lsu.sum.peak_sustained l1tex__data_bank_conflicts_pipe_lsu.sum.peak_sustained_active l1tex__data_bank_conflicts_pipe_lsu.sum.peak_sustained_active.per_second l1tex__data_bank_conflicts_pipe_lsu.sum.peak_sustained_elapsed l1tex__data_bank_conflicts_pipe_lsu.sum.peak_sustained_elapsed.per_second l1tex__data_bank_conflicts_pipe_lsu.sum.per_cycle_active l1tex__data_bank_conflicts_pipe_lsu.sum.per_cycle_elapsed l1tex__data_bank_conflicts_pipe_lsu.sum.per_second l1tex__data_bank_conflicts_pipe_lsu.sum.pct_of_peak_sustained_active l1tex__data_bank_conflicts_pipe_lsu.sum.pct_of_peak_sustained_elapsed ... Metrics Naming Conventions Counters and metrics _generally_ obey the naming scheme: Unit-Level Counter : unit__(subunit?)_(pipestage?)_quantity_(qualifiers?) Interface Counter : unit__(subunit?)_(pipestage?)_(interface)_quantity_(qualifiers?) Unit Metric : (counter_name).(rollup_metric) Sub-Metric : (counter_name).(rollup_metric).(submetric) where unit: A logical or physical unit of the GPU subunit: The subunit within the unit where the counter was measured. Sometimes this is a pipeline mode instead. pipestage: The pipeline stage within the subunit where the counter was measured. quantity: What is being measured. Generally matches the dimensional units . qualifiers: Any additional predicates or filters applied to the counter. Often, an unqualified counter can be broken down into several qualified sub-components. interface: Of the form sender2receiver , where sender is the source-unit and receiver is the destination-unit. rollup_metric: One of sum, avg, min, max. submetric: refer to section Metrics Entities Components are not always present. Most top-level counters have no qualifiers. Subunit and pipestage may be absent where irrelevant, or there may be many subunit specifiers for detailed counters. Cycle Metrics Counters using the term cycles in the name report the number of cycles in the unit\u2019s clock domain. Unit-level cycle metrics include: unit__cycles_elapsed : The number of cycles within a range. The cycles\u2019 DimUnits are specific to the unit\u2019s clock domain. unit__cycles_active : The number of cycles where the unit was processing data. unit__cycles_stalled : The number of cycles where the unit was unable to process new data because its output interface was blocked. unit__cycles_idle : The number of cycles where the unit was idle. Interface-level cycle counters are often (not always) available in the following variations: unit__(interface)_active : Cycles where data was transferred from source-unit to destination-unit. unit__(interface)_stalled : Cycles where the source-unit had data, but the destination-unit was unable to accept data. Instanced Metrics Metrics collected with NVIDIA Nsight Compute can have a single (aggregate) value, multiple instance values, or both. Instances allow the metric to have multiple sub-values, e.g. representing the value of an source metric at each instruction offset. If a metric has instance values, it often also has a correlation ID for each instance. Correlation IDs and values form a mapping that allows the tool to correlate the values within a context. For source metrics, that context is commonly the address ranges of the functions executed as part of the workload. You can find which metrics have instance values in the Metrics Reference . In the UI, the Metric Details tool window can be used to conveniently view correlation IDs and instance values for each metric. Also, both the UI and the command line interface provide options to show instance values in addition to a metric aggregate where applicable.", "keywords": []}, {"id": 174, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#multi-instance-gpu", "display_name": "Multi Instance GPU", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "multi-instance-gpu", "priority": -1, "content": "Multi-Instance GPU (MIG) is a feature that allows a GPU to be partitioned into multiple CUDA devices. The partitioning is carried out on two levels: First, a GPU can be split into one or multiple GPU Instances. Each GPU Instance claims ownership of one or more streaming multiprocessors (SM), a subset of the overall GPU memory, and possibly other GPU resources, such as the video encoders/decoders. Second, each GPU Instance can be further partitioned into one or more Compute Instances. Each Compute Instance has exclusive ownership of its assigned SMs of the GPU Instance. However, all Compute Instances within a GPU Instance share the GPU Instance\u2019s memory and memory bandwidth. Every Compute Instance acts and operates as a CUDA device with a unique device ID. See the driver release notes as well as the documentation for the nvidia-smi CLI tool for more information on how to configure MIG instances. For profiling, a Compute Instance can be of one of two types: isolated or shared . An isolated Compute Instance owns all of its assigned resources and does not share any GPU unit with another Compute Instance. In other words, the Compute Instance is the same size as its parent GPU Instance and consequently does not have any other sibling Compute Instances. Profiling works as usual for isolated Compute Instances. A shared Compute Instance uses GPU resources that can potentially also be accessed by other Compute Instances in the same GPU Instance. Due to this resource sharing, collecting profiling data from those shared units is not permitted. Attempts to collect metrics from a shared unit fail with an error message of ==ERROR== Failed to access the following metrics. When profiling on a MIG instance, it is not possible to collect metrics from GPU units that are shared with other MIG instances followed by the list of failing metrics. Collecting only metrics from GPU units that are exclusively owned by a shared Compute Instance is still possible. Locking Clocks NVIDIA Nsight Compute is not able to set the clock frequency on any Compute Instance for profiling. You can continue analyzing kernels without fixed clock frequencies (using --clock-control none ; see here for more details). If you have sufficient permissions, nvidia-smi can be used to configure a fixed frequency for the whole GPU by calling nvidia-smi --lock-gpu-clocks=tdp,tdp . This sets the GPU clocks to the base TDP frequency until you reset the clocks by calling nvidia-smi --reset-gpu-clocks . MIG on Baremetal (non-vGPU) All Compute Instances on a GPU share the same clock frequencies. MIG on NVIDIA vGPU Enabling profiling for a VM gives the VM access to the GPU\u2019s global performance counters, which may include activity from other VMs executing on the same GPU. Enabling profiling for a VM also allows the VM to lock clocks on the GPU, which impacts all other VMs executing on the same GPU, including MIG Compute Instances.", "keywords": []}, {"id": 175, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#overhead", "display_name": "Overhead", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "overhead", "priority": -1, "content": "As with most measurements, collecting performance data using NVIDIA Nsight Compute CLI incurs some runtime overhead on the application. The overhead does depend on a number of different factors: Number and type of collected metrics Depending on the selected metric, data is collected either through a hardware performance monitor on the GPU, through software patching of the kernel instructions or via a launch or device attribute. The overhead between these mechanisms varies greatly, with launch and device attributes being \u201cstatically\u201d available and requiring no kernel runtime overhead. Furthermore, only a limited number of metrics can be collected in a single pass of the kernel execution. If more metrics are requested, the kernel launch is replayed multiple times, with its accessible memory being saved and restored between subsequent passes to guarantee deterministic execution. Therefore, collecting more metrics can significantly increase overhead by requiring more replay passes and increasing the total amount of memory that needs to be restored during replay. The collected section set Since each set specifies a group of sections to be collected, choosing a less comprehensive set can reduce profiling overhead. See the --set command in the NVIDIA Nsight Compute CLI documentation. Number of collected sections Since each section specifies a number of metrics to be collected, selecting fewer sections can reduce profiling overhead. See the --section command in the NVIDIA Nsight Compute CLI documentation. Number of profiled kernels By default, all selected metrics are collected for all launched kernels. To reduce the impact on the application, you can try to limit performance data collection to as few kernel functions and instances as makes sense for your analysis. See the filtering commands in the NVIDIA Nsight Compute CLI documentation. There is a relatively high one-time overhead for the first profiled kernel in each context to generate the metric configuration. This overhead does not occur for subsequent kernels in the same context, if the list of collected metrics remains unchanged. GPU Architecture For some metrics, the overhead can vary depending on the exact chip they are collected on, e.g. due to varying number of units on the chip. Similarly, the overhead for resetting the L2 cache in-between kernel replay passes depends on the size of that cache.", "keywords": []}, {"id": 176, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#persistence-mode", "display_name": "Persistence Mode", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "persistence-mode", "priority": -1, "content": "The NVIDIA kernel mode driver must be running and connected to a target GPU device before any user interactions with that device can take place. The driver behavior differs depending on the OS. Generally, on Linux, if the kernel mode driver is not already running or connected to a target GPU, the invocation of any program that attempts to interact with that GPU will transparently cause the driver to load and/or initialize the GPU. When all GPU clients terminate the driver will then deinitialize the GPU. If persistence mode is not enabled (as part of the OS, or by the user), applications triggering GPU initialization may incur a short startup cost. In addition, on some configurations, there may also be a shutdown cost when the GPU is de-initialized at the end of the application. It is recommended to enable persistence mode on applicable operating systems before profiling with NVIDIA Nsight Compute for more consistent application behavior.", "keywords": []}, {"id": 177, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#pm-sampling", "display_name": "PM Sampling", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "pm-sampling", "priority": -1, "content": "NVIDIA Nsight Compute supports collecting many metrics by sampling the GPU\u2019s performance monitors (PM) periodically at fixed intervals. The resulting metrics are instanced , with each sample being composed of its value and the (GPU) timestamp when it was collected. This allows the tool to visualize the data on a timeline that helps you understand how the behavior of the profiled workload changes during its runtime. Metrics collected with PM sampling have instance values mapping from their sample timestamp (in ns) to their sample value. When logically possible, the non-instanced value of the metric represents the aggregate across all instances. The aggregation operation (e.g. sum, average) depends on the metric structure. A metric is collected using PM sampling in the following cases: The metric name has the pmsampling:  prefix . The metric name includes a valid Triage group. The metric is requested in a section\u2019s Timeline field. Prefixing the metric with pmsampling: is still recommended in this case to avoid conflicts with profiler metrics of the same name collected e.g. by other sections. Architecture Support Sampling Intervals Volta and earlier Not supported n/a TU10x-GA100 Supported &gt;= 20000 cycles GA10x and later Supported &gt;= 1000 ns PM sampling is supported on all platforms except vGPU. See below for further limitations that apply to context switch trace . You can query the list of metrics available to PM sampling using the --query-metrics-collection pmsampling option. Note though that while all listed metrics are available to the PM sampler, only those requiring a single pass can be collected. Context Switch Trace Since this data collection samples across the entire GPU device, the tool concurrently collects a context switch trace . The trace is stored as a separate, instanced metric . It tracks when the context of interest was active and can be used to filter the sampling metric to only relevant instances and to better align metrics from multiple passes on the timeline. While it\u2019s generally preferable to have this trace collected, it can be disabled using an environment variable . Note that context switch trace is not supported on Windows Subsystem for Linux (WSL), Multi-Instance GPU (MIG), within containers or on mobile platforms. Counter Domains PM sampling metrics are composed of one or more raw counter dependencies internally. If metrics in the same pass share such a dependency, it is only collected once. Each counter is associated with a counter domain , which describes how and where in the hardware the counter is collected. There is a limited number of counters in each domain that can be collected concurrently in the same pass, and the number may vary, depending on the selected counters. Selecting counters from different domains has the possibility that more metric dependencies can be fit into the same pass. Furthermore, some counters can be collected through different domains, and the domain may be chosen by the tool or the user. When querying the PM sampling metric collection, the required and optional domains for a metric\u2019s counter dependencies are shown. E.g., for l1tex__throughput gpu_sm_a,[gpu_sm_b,gpu_sm_c] , the domain gpu_sm_a is required and one of the optional domains [gpu_sm_b,gpu_sm_c] must be chosen for this metric to be collectable. Counter domains can only be selected explicitly in section files , using one or more instances of the CtrDomains: &quot;&lt;domain&gt;&quot; field for PM sampling metrics. Note that most users should be able to rely on the tool\u2019s automatic selection of counter domains, or the pre-configured domains in section files.", "keywords": []}, {"id": 178, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#profile-series", "display_name": "Profile Series", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "profile-series", "priority": -1, "content": "The performance of a kernel is highly dependent on the used launch parameters. Small changes to the launch parameters can have a significant effect on the runtime behavior of the kernel. However, identifying the best parameter set for a kernel by manually testing a lot of combinations can be a tedious process. To make this workflow faster and more convenient, Profile Series provide the ability to automatically profile a single kernel multiple times with changing parameters. The parameters to be modified and values to be tested can be independently enabled and configured. For each combination of selected parameter values a unique profile result is collected. And the modified parameter values are tracked in the description of the results of a series. By comparing the results of a profile series, the kernel\u2019s behavior on the changing parameters can be seen and the most optimal parameter set can be identified quickly. Profile Series action. Profile Series dialog.", "keywords": []}, {"id": 179, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#profiling-applications", "display_name": "Profiling Applications", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "profiling-applications", "priority": -1, "content": "During regular execution, a CUDA application process will be launched by the user. It communicates directly with the CUDA user-mode driver, and potentially with the CUDA runtime library. Regular Application Execution When profiling an application with NVIDIA Nsight Compute, the behavior is different. The user launches the NVIDIA Nsight Compute frontend (either the UI or the CLI) on the host system, which in turn starts the actual application as a new process on the target system. While host and target are often the same machine, the target can also be a remote system with a potentially different operating system. The tool inserts its measurement libraries into the application process, which allow the profiler to intercept communication with the CUDA user-mode driver. In addition, when a kernel launch is detected, the libraries can collect the requested performance metrics from the GPU. The results are then transferred back to the frontend. Profiled Application Execution", "keywords": []}, {"id": 180, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#range-and-precision", "display_name": "Range and Precision", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "range-and-precision", "priority": -1, "content": "Overview In general, measurement values that lie outside the expected logical range of a metric can be attributed to one or more of the below root-causes. If values are exceeding such range, they are not clamped by the tool to their expected value on purpose to ensure that the rest of the profiler report remains self-consistent. Asynchronous GPU activity GPU engines other than the one measured by a metric (display, copy engine, video encoder, video decoder, etc.) potentially access shared resources during profiling. Such chip-global shared resources include L2, DRAM, PCIe, and NVLINK. If the kernel launch is small, the other engine(s) can cause significant confusion in e.g. the DRAM results, since it is not possible to isolate the DRAM traffic of the SM. To reduce the impact of such asynchronous units, consider profiling on a GPU without active display and without other processes that can access the GPU at the time. Multi-pass data collection Out-of-range metrics often occur when the profiler replays the kernel launch to collect metrics, and work distribution is significantly different across replay passes. A metric such as hit rate (hits / queries) can have significant error if hits and queries are collected on different passes and the kernel does not saturate the GPU to reach a steady state (generally &gt; 20 \u00b5s). Similarly, it can show unexpected values when the workload is inherently variable, as e.g. in the case of spin loops. To mitigate the issue, when applicable try to increase the measured workload to allow the GPU to reach a steady state for each launch. Reducing the number of metrics collected at the same time can also improve precision by increasing the likelihood that counters contributing to one metric are collected in a single pass. Tool issue If you still observe metric issues after following the guidelines above, please reach out to us and describe your issue.", "keywords": []}, {"id": 181, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#range-replay", "display_name": "Range Replay", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "range-replay", "priority": -1, "content": "In Range Replay , all requested metrics in NVIDIA Nsight Compute are grouped into one or more passes. In contrast to Kernel Replay and Application Replay , Range Replay captures and replays complete ranges of CUDA API calls and kernel launches within the profiled application. Metrics are then not associated with individual kernels but with the entire range. This allows the tool to execute kernels without serialization and thereby supports profiling kernels that should be run concurrently for correctness or performance reasons. Execution with Range Replay. An entire range of API calls and kernel launches is captured and replayed. Host and device memory is saved and restored as necessary.", "keywords": []}, {"id": 182, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#replay", "display_name": "Replay", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "replay", "priority": -1, "content": "Depending on which metrics are to be collected, kernels might need to be replayed one or more times, since not all metrics can be collected in a single pass . For example, the number of metrics originating from hardware (HW) performance counters that the GPU can collect at the same time is limited. In addition, patch-based software (SW) performance counters can have a high impact on kernel runtime and would skew results for HW counters.", "keywords": []}, {"id": 183, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#reproducibility", "display_name": "Reproducibility", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "reproducibility", "priority": -1, "content": "In order to provide actionable and deterministic results across application runs, NVIDIA Nsight Compute applies various methods to adjust how metrics are collected. This includes serializing kernel launches, purging GPU caches before each kernel replay or adjusting GPU clocks .", "keywords": []}, {"id": 184, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#roofline-charts", "display_name": "Roofline Charts", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "roofline-charts", "priority": -1, "content": "Roofline charts provide a very helpful way to visualize achieved performance on complex processing units, like GPUs. This section introduces the Roofline charts that are presented within a profile report.", "keywords": []}, {"id": 185, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#roofline-overview", "display_name": "Overview", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "roofline-overview", "priority": -1, "content": "Kernel performance is not only dependent on the operational speed of the GPU. Since a kernel requires data to work on, performance is also dependent on the rate at which the GPU can feed data to the kernel. A typical roofline chart combines the peak performance and memory bandwidth of the GPU, with a metric called Arithmetic Intensity (a ratio between Work and Memory Traffic ), into a single chart, to more realistically represent the achieved performance of the profiled kernel. A simple roofline chart might look like the following: Roofline overview. This chart actually shows two different rooflines. However, the following components can be identified for each: Vertical Axis - The vertical axis represents Floating Point Operations per Second (FLOPS). For GPUs this number can get quite large and so the numbers on this axis can be scaled for easier reading (as shown here). In order to better accommodate the range, this axis is rendered using a logarithmic scale. Horizontal Axis - The horizontal axis represents Arithmetic Intensity , which is the ratio between Work (expressed in floating point operations per second), and Memory Traffic (expressed in bytes per second). The resulting unit is in floating point operations per byte. This axis is also shown using a logarithmic scale. Memory Bandwidth Boundary - The memory bandwidth boundary is the sloped part of the roofline. By default, this slope is determined entirely by the memory transfer rate of the GPU but can be customized inside the SpeedOfLight_RooflineChart.section file if desired. Peak Performance Boundary - The peak performance boundary is the flat part of the roofline By default, this value is determined entirely by the peak performance of the GPU but can be customized inside the SpeedOfLight_RooflineChart.section file if desired. Ridge Point - The ridge point is the point at which the memory bandwidth boundary meets the peak performance boundary. This point is a useful reference when analyzing kernel performance. Achieved Value - The achieved value represents the performance of the profiled kernel. If baselines are being used, the roofline chart will also contain an achieved value for each baseline. The outline color of the plotted achieved value point can be used to determine from which baseline the point came.", "keywords": []}, {"id": 186, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#sampling", "display_name": "Sampling", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "sampling", "priority": -1, "content": "NVIDIA Nsight Compute can collect certain performance data via sampling at fixed intervals.", "keywords": []}, {"id": 187, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#sections-and-rules", "display_name": "Sections and Rules", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "sections-and-rules", "priority": -1, "content": "Identifier and Filename Description ComputeWorkloadAnalysis (Compute Workload Analysis) Detailed analysis of the compute resources of the streaming multiprocessors (SM), including the achieved instructions per clock (IPC) and the utilization of each available pipeline. Pipelines with very high utilization might limit the overall performance. InstructionStats (Instruction Statistics) Statistics of the executed low-level assembly instructions (SASS). The instruction mix provides insight into the types and frequency of the executed instructions. A narrow mix of instruction types implies a dependency on few instruction pipelines, while others remain unused. Using multiple pipelines allows hiding latencies and enables parallel execution. LaunchStats (Launch Statistics) Summary of the configuration used to launch the kernel. The launch configuration defines the size of the kernel grid, the division of the grid into blocks, and the GPU resources needed to execute the kernel. Choosing an efficient launch configuration maximizes device utilization. MemoryWorkloadAnalysis (Memory Workload Analysis) Detailed analysis of the memory resources of the GPU. Memory can become a limiting factor for the overall kernel performance when fully utilizing the involved hardware units (Mem Busy), exhausting the available communication bandwidth between those units (Max Bandwidth), or by reaching the maximum throughput of issuing memory instructions (Mem Pipes Busy). Depending on the limiting factor, the memory chart and tables allow to identify the exact bottleneck in the memory system. NUMA Affinity (NumaAffinity) Non-uniform memory access (NUMA) affinities based on compute and memory distances for all GPUs. Nvlink (Nvlink) High-level summary of NVLink utilization. It shows the total received and transmitted (sent) memory, as well as the overall link peak utilization. Nvlink_Tables (Nvlink_Tables) Detailed tables with properties for each NVLink. Nvlink_Topology (Nvlink_Topology) NVLink Topology diagram shows logical NVLink connections with transmit/receive throughput. Occupancy (Occupancy) Occupancy is the ratio of the number of active warps per multiprocessor to the maximum number of possible active warps. Another way to view occupancy is the percentage of the hardware\u2019s ability to process warps that is actively in use. Higher occupancy does not always result in higher performance, however, low occupancy always reduces the ability to hide latencies, resulting in overall performance degradation. Large discrepancies between the theoretical and the achieved occupancy during execution typically indicates highly imbalanced workloads. PM Sampling (PmSampling) Timeline view of metrics sampled periodically over the workload duration. Data is collected across multiple passes. Use this section to understand how workload behavior changes over its runtime. PM Sampling: Warp States (PmSampling_WarpStates) Warp states sampled periodically over the workload duration. Metrics in different groups come from different passes. SchedulerStats (Scheduler Statistics) Summary of the activity of the schedulers issuing instructions. Each scheduler maintains a pool of warps that it can issue instructions for. The upper bound of warps in the pool (Theoretical Warps) is limited by the launch configuration. On every cycle each scheduler checks the state of the allocated warps in the pool (Active Warps). Active warps that are not stalled (Eligible Warps) are ready to issue their next instruction. From the set of eligible warps, the scheduler selects a single warp from which to issue one or more instructions (Issued Warp). On cycles with no eligible warps, the issue slot is skipped and no instruction is issued. Having many skipped issue slots indicates poor latency hiding. SourceCounters (Source Counters) Source metrics, including branch efficiency and sampled warp stall reasons. Warp Stall Sampling metrics are periodically sampled over the kernel runtime. They indicate when warps were stalled and couldn\u2019t be scheduled. See the documentation for a description of all stall reasons. Only focus on stalls if the schedulers fail to issue every cycle. SpeedOfLight (GPU Speed Of Light Throughput) High-level overview of the throughput for compute and memory resources of the GPU. For each unit, the throughput reports the achieved percentage of utilization with respect to the theoretical maximum. Breakdowns show the throughput for each individual sub-metric of Compute and Memory to clearly identify the highest contributor. WarpStateStats (Warp State Statistics) Analysis of the states in which all warps spent cycles during the kernel execution. The warp states describe a warp\u2019s readiness or inability to issue its next instruction. The warp cycles per instruction define the latency between two consecutive instructions. The higher the value, the more warp parallelism is required to hide this latency. For each warp state, the chart shows the average number of cycles spent in that state per issued instruction. Stalls are not always impacting the overall performance nor are they completely avoidable. Only focus on stall reasons if the schedulers fail to issue every cycle.", "keywords": []}, {"id": 188, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#serialization", "display_name": "Serialization", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "serialization", "priority": -1, "content": "NVIDIA Nsight Compute serializes kernel launches within the profiled application, potentially across multiple processes profiled by one or more instances of the tool at the same time. Serialization across processes is necessary since for the collection of HW performance metrics, some GPU and driver objects can only be acquired by a single process at a time. To achieve this, the lock file TMPDIR/nsight-compute-lock is used. On Windows, TMPDIR is the path returned by the Windows GetTempPath API function. On other platforms, it is the path supplied by the first environment variable in the list TMPDIR, TMP, TEMP, TEMPDIR . If none of these is found, it\u2019s /var/nvidia on QNX and /tmp otherwise. Serialization within the process is required for most metrics to be mapped to the proper kernel. In addition, without serialization, performance metric values might vary widely if kernel execute concurrently on the same device. It is currently not possible to disable this tool behavior. Refer to the FAQ entry on possible workarounds.", "keywords": []}, {"id": 189, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#sets-and-sections", "display_name": "Sets and Sections", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "sets-and-sections", "priority": -1, "content": "NVIDIA Nsight Compute uses Section Sets (short sets ) to decide, on a very high level, the amount of metrics to be collected. Each set includes one or more Sections , with each section specifying several logically associated metrics. For example, one section might include only high-level SM and memory utilization metrics, while another could include metrics associated with the memory units, or the HW scheduler. The number and type of metrics specified by a section has significant impact on the overhead during profiling. To allow you to quickly choose between a fast, less detailed profile and a slower, more comprehensive analysis, you can select the respective section set. See Overhead for more information on profiling overhead. By default, a relatively small number of metrics is collected. Those mostly include high-level utilization information as well as static launch and occupancy data. The latter two are regularly available without replaying the kernel launch. The basic set is collected when no --set , --section and no --metrics options are passed on the command line. The full set of sections can be collected with --set full . Use --list-sets to see the list of currently available sets. Use --list-sections to see the list of currently available sections. The default search directory and the location of pre-defined section files are also called sections/ . All related command line options can be found in the NVIDIA Nsight Compute CLI documentation.", "keywords": []}, {"id": 190, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#shared-memory", "display_name": "Shared Memory", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "shared-memory", "priority": -1, "content": "Example Shared Memory table, collected on an RTX 2080 Ti Columns Instructions For each access type, the total number of all actually executed assembly (SASS) instructions per warp. Predicated-off instructions are not included. E.g., the instruction STS would be counted towards Shared Store . Requests The total number of all requests to shared memory. On SM 7.0 (Volta) and newer architectures, each shared memory instruction generates exactly one request. Wavefronts Number of wavefronts required to service the requested shared memory data. Wavefronts are serialized and processed on different cycles. % Peak Percentage of peak utilization. Higher values imply a higher utilization of the unit and can show potential bottlenecks, as it does not necessarily indicate efficient usage. Bank Conflicts If multiple threads\u2019 requested addresses map to different offsets in the same memory bank, the accesses are serialized. The hardware splits a conflicting memory request into as many separate conflict-free requests as necessary, decreasing the effective bandwidth by a factor equal to the number of colliding memory requests. Rows (Access Types) Shared memory access operations. Total The aggregate for all access types in the same column. Metrics Metrics from this table can be collected on the command line using --set full , --section MemoryWorkloadAnalysis_Tables or --metrics group:memory__shared_table .", "keywords": []}, {"id": 191, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#supported-apis", "display_name": "Supported APIs", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "supported-apis", "priority": -1, "content": "Range replay supports a subset of the CUDA API for capture and replay. This page lists the supported functions as well as any further, API-specific limitations that may apply. If an unsupported API call is detected in the captured range, an error is reported and the range cannot be profiled. The groups listed below match the ones found in the CUDA Driver API documentation . Generally, range replay only captures and replay CUDA Driver API calls. CUDA Runtime APIs calls can be captured when they generate only supported CUDA Driver API calls internally. Deprecated APIs are not supported. Error Handling All supported. Initialization Not supported. Version Management All supported. Device Management All supported, except: cuDeviceSetMemPool Primary Context Management cuDevicePrimaryCtxGetState Context Management All supported, except: cuCtxSetCacheConfig cuCtxSetSharedMemConfig Module Management cuModuleGetFunction cuModuleGetGlobal cuModuleGetSurfRef cuModuleGetTexRef cuModuleLoad cuModuleLoadData cuModuleLoadDataEx cuModuleLoadFatBinary cuModuleUnload Library Management All supported, except: cuKernelSetAttribute cuKernelSetCacheConfig Memory Management cuArray* cuDeviceGetByPCIBusId cuDeviceGetPCIBusId cuMemAlloc cuMemAllocHost cuMemAllocPitch cuMemcpy* cuMemFree cuMemFreeHost cuMemGetAddressRange cuMemGetInfo cuMemHostAlloc cuMemHostGetDevicePointer cuMemHostGetFlags cuMemHostRegister cuMemHostUnregister cuMemset* cuMipmapped* Virtual Memory Management Not supported. Stream Ordered Memory Allocator Not supported. Unified Addressing Not supported. Stream Management cuStreamCreate* cuStreamDestroy cuStreamGet* cuStreamQuery cuStreamSetAttribute cuStreamSynchronize cuStreamWaitEvent Event Management All supported. External Resource interoperability Not supported. Stream Memory Operations Not supported. Execution Control cuFuncGetAttribute cuFuncGetModule cuFuncSetAttribute cuFuncSetCacheConfig cuLaunchCooperativeKernel cuLaunchHostFunc cuLaunchKernel Graph Management Not supported. Occupancy All supported. Texture/Surface Reference Management Not supported. Texture Object Management All supported. Surface Object Management All supported. Peer Context Memory Access Not supported. Graphics Interoperability Not supported. Driver Entry Point Access All supported. Surface Object Management All supported. OpenGL Interoperability Not supported. VDPAU Interoperability Not supported. EGL Interoperability Not supported. Green Contexts cuCtxFromGreenCtx cuGreenCtxCreate cuGreenCtxDestroy cuGreenCtxRecordEvent cuGreenCtxWaitEvent cuStreamGetGreenCtx", "keywords": []}, {"id": 192, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "page", "name": "ProfilingGuide/index#warp-sampling", "display_name": "Warp Sampling", "type": "section", "display_type": "Page section", "docname": "ProfilingGuide/index", "anchor": "warp-sampling", "priority": -1, "content": "NVIDIA Nsight Compute supports periodic sampling of the warp program counter and warp scheduler state. At a fixed interval of cycles, the sampler in each streaming multiprocessor selects an active warp and outputs the program counter and the warp scheduler state. The tool selects the minimum interval for the device. On small devices, this can be every 32 cycles. On larger chips with more multiprocessors, this may be 2048 cycles. The sampler selects a random active warp. On the same cycle the scheduler may select a different warp to issue. The resulting metrics are correlated with the individual executed instructions but don\u2019t have any time resolution. See the Warp Stall Reasons tables in the Metrics Reference for a description of the individual warp scheduler states.", "keywords": []}, {"id": 193, "doc_id": 193, "filename": "ProfilingGuide/index.html", "domain_name": "std", "name": "ProfilingGuide/index", "display_name": "Kernel Profiling Guide", "type": "doc", "display_type": "Page", "docname": "ProfilingGuide/index", "anchor": "", "priority": -1, "content": "Nsight Compute profiling guide. Kernel Profiling Guide with metric types and meaning, data collection modes and FAQ for common problems.", "keywords": []}, {"id": 194, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#gpu-support", "display_name": "GPU Support", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "gpu-support", "priority": -1, "content": "Architecture Support Kepler No Maxwell No Pascal No Volta GV100 Yes Volta GV11b Yes Turing TU1xx Yes NVIDIA GA100 Yes NVIDIA GA10x Yes NVIDIA GA10b Yes NVIDIA GH100 Yes Most metrics used in NVIDIA Nsight Compute are identical to those of the PerfWorks Metrics API and follow the documented Metrics Structure . A comparison between the metrics used in nvprof and their equivalent in NVIDIA Nsight Compute can be found in the NVIDIA Nsight Compute CLI User Manual .", "keywords": []}, {"id": 195, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#known-issues", "display_name": "Known Issues", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "known-issues", "priority": -1, "content": "Installation The installer might not show all patch-level version numbers during installation. Some command line options listed in the help of a .run installer of NVIDIA Nsight Compute are affecting only the archive extraction, but not the installation stage. To pass command line options to the embedded installer script, specify those options after -- in the form of -- -&lt;option&gt; . The available options for the installer script are: -help : Print help message -targetpath=&lt;PATH&gt; : Specify install path -noprompt : No prompts. Implies acceptance of the EULA For example, specifying only option --quiet extracts the installer archive without any output to the console, but still prompts for user interaction during the installation. To install NVIDIA Nsight Compute without any console output nor any user interaction, please specify --quiet -- -noprompt . After using the SDK Manager to install the NVIDIA Nsight Compute tools, their binary path needs to be manually added to your PATH environment variable. See also the System Requirements for more installation instructions. Launch and Connection Launching applications on remote targets/platforms is not supported for several combinations. See Platform Support for details. Manually launch the application using command line ncu --mode=launch on the remote system and connect using the UI or CLI afterwards. In the NVIDIA Nsight Compute connection dialog, a remote system can only be specified for one target platform. Remove a connection from its current target platform in order to be able to add it to another. Loading of CUDA sources via SSH requires that the remote connection is configured, and that the hostname/IP address of the connection matches the target (as seen in the report session details). For example, prefer my-machine.my-domain.com, instead of my-machine, even though the latter resolves to the same. Other issues concerning remote connections are discussed in the documentation for remote connections . Local connections between NVIDIA Nsight Compute and the launched target application might not work on some ppc64le or aarch64 (sbsa) systems configured to only support IPv6. On these platforms, the NV_COMPUTE_PROFILER_LOCAL_CONNECTION_OVERRIDE=uds environment variable can be set to use Unix Domain Sockets instead of TCP for local connections to workaround the problem. On x86_64 Linux, Unix Domain Sockets are used by default, but local TCP connections can be forced using NV_COMPUTE_PROFILER_LOCAL_CONNECTION_OVERRIDE=tcp . Profiling and Metrics Profiling of 32-bit processes is not supported. Profiling kernels executed on a device that is part of an SLI group is not supported. An \u201cUnsupported GPU\u201d error is shown in this case. Profiling a kernel while other contexts are active on the same device (e.g. X server, or secondary CUDA or graphics application) can result in varying metric values for L2/FB (Device Memory) related metrics. Specifically, L2/FB traffic from non-profiled contexts cannot be excluded from the metric results. To completely avoid this issue, profile the application on a GPU without secondary contexts accessing the same device (e.g. no X server on Linux). In the current release, profiling a kernel while any other GPU work is executing on the same MIG compute instance can result in varying metric values for all units. NVIDIA Nsight Compute enforces serialization of the CUDA launches within the target application to ensure those kernels do not influence each other. See Serialization for more details. However, GPU work issued through other APIs in the target process or workloads created by non-target processes running simultaneously in the same MIG compute instance will influence the collected metrics. Note that it is acceptable to run CUDA processes in other MIG compute instances as they will not influence the profiled MIG compute instance. On Linux kernels settings fs.protected_regular=1 (e.g. some Ubuntu 20.04 cloud service provider instances), root users may not be able to access the inter-process lock file . See the FAQ for workarounds. Profiling only supports up to 32 device instances, including instances of MIG partitions. Profiling the 33rd or higher device instance will result in indeterminate data. Enabling certain metrics can cause GPU kernels to run longer than the driver\u2019s watchdog time-out limit. In these cases the driver will terminate the GPU kernel resulting in an application error and profiling data will not be available. Please disable the driver watchdog time out before profiling such long running CUDA kernels. On Linux, setting the X Config option Interactive to false is recommended. For Windows, detailed information on disabling the Windows TDR is available at https://docs.microsoft.com/en-us/windows-hardware/drivers/display/timeout-detection-and-recovery Collecting device-level metrics, such as the NVLink metrics ( nvl* ), is not supported on NVIDIA virtual GPUs (vGPUs). As of CUDA 11.4 and R470 TRD1 driver release, NVIDIA Nsight Compute is supported in a vGPU environment which requires a vGPU license. If the license is not obtained after 20 minutes, the reported performance metrics data from the GPU will be inaccurate. This is because of a feature in vGPU environment which reduces performance but retains functionality as specified here . Profiling on NVIDIA live-migrated virtual machines is not supported and can result in undefined behavior. Profiling with enabled multi-process service (MPS) can result in undefined behavior. When Profiling using Range Replay or Application Range Replay with multiple CUDA Green Contexts active which belong to the same device context, the range result will contain counter values aggregated on all Green Contexts The NVLink Topology section is not supported for a configuration using NVSwitch. NVIDIA Nsight Compute does not support per-NVLink metrics. NVIDIA Nsight Compute does not support the Logical NVLink Throughput table. Setting a reduced NvLink Bandwidth mode does not impact the reported peak values for NvLink metrics. All peak values and corresponding percentages are calculated off the non-reduced NvLink bandwidth. Reconfiguring the NvLink Bandwidth mode using nvidia-smi` while profiling may lead to undefined tools\u2019 behavior. Profiling CUDA graph kernel nodes that can launch device graphs or are part of device-launchable graphs is not supported. Use Graph Profiling mode instead. Profiling in Graph Profiling mode is performed on the context that is specified by the stream handle for the graph launch. Consequently, only memory allocated on this context is saved off and restored during graph replay and only kernel nodes executed on this context are profiled. On CUDA drivers older than 530.x, profiling on Windows Subsystem for Linux (WSL) is not supported if the system has multiple physical NVIDIA GPUs. This is not affected by setting CUDA_VISIBLE_DEVICES . Collecting software counters through PerfWorks currently forces all functions in the module of the profiled kernel to be loaded. This increases the host and device memory footprint of the target application for the remainder of the process lifetime. PM Sampling is not supported when collecting a Profile Series. Compatibility Applications calling blocking functions on std input/output streams can result in the profiler to stop, until the blocking function call is resolved. NVIDIA Nsight Compute can hang on applications using RAPIDS in versions 0.6 and 0.7, due to an issue in cuDF. Profiling child processes launched via clone() is not supported. Profiling child processes launched from Python using os.system() is not supported on ppc64le. Profiling of Cooperative Groups kernels launched with cuLaunchCooperativeKernelMultiDevice is not yet supported. On Linux systems, when profiling bsd-csh scripts, the original application output will not be printed. As a workaround, use a different C-shell, e.g. tcsh . Attempting to use the --clock-control option to set the GPU clocks will fail when profiling on a GPU partition. Please use nvidia-smi (installed with NVIDIA display driver) to control the clocks for the entire GPU. This will require administrative privileges when the GPU is partitioned. On Linux aarch64, NVIDIA Nsight Compute does not work if the HOME environment variable is not set. NVIDIA Nsight Compute versions 2020.1.0 to 2020.2.1 are not compatible with CUDA driver version 460+ if the application launches Cooperative Groups kernels. Profiling will fail with error \u201cUnknownError\u201d. Collecting CPU call stack information on Windows Server 2016 can hang NVIDIA Nsight Compute in some cases. Currently, the only workaround is to skip CPU call stack collection on such systems by not specifying the option --call-stack . When profiling a script, --target-processes all may target utility executables such as xargs , uname or ls . To avoid profiling these, use the --target-processes-filter option accordingly. On mobile platforms, --kill option is not supported with application replay mode. NVIDIA Nsight Compute might show invalid characters for Unicode names and paths on Windows 10. As a workaround, use a third-party terminal emulator, e.g. Git bash. User Interface The API Statistics filter in NVIDIA Nsight Compute does not support units. File size is the only property considered when resolving source files. Timestamps are currently ignored. Terminating or disconnecting an application in the Interactive Profiling activity while the API Stream View is updated can lead to a crash. See the OptiX library support section for limitations concerning the Acceleration Structure Viewer . After updating from a previous version of NVIDIA Nsight Compute on Linux, the file load dialog may not allow column resizing and sorting. As a workaround, the ~/.config/QtProject.conf file can be edited to remove the treeViewHeader entry from the [FileDialog] section.", "keywords": []}, {"id": 196, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#library-support", "display_name": "Library Support", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "library-support", "priority": -1, "content": "NVIDIA Nsight Compute can be used to profile CUDA applications, as well as applications that use CUDA via NVIDIA or third-party libraries. For most such libraries, the behavior is expected to be identical to applications using CUDA directly. However, for certain libraries, NVIDIA Nsight Compute has certain restrictions, alternate behavior, or requires non-default setup steps prior to profiling.", "keywords": []}, {"id": 197, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#optix", "display_name": "OptiX", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "optix", "priority": -1, "content": "NVIDIA Nsight Compute supports profiling of OptiX applications, but with certain restrictions. Internal Kernels Kernels launched by OptiX that contain no user-defined code are given the generic name NVIDIA internal . These kernels show up on the API Stream in the NVIDIA Nsight Compute UI, and can be profiled in both the UI as well as the NVIDIA Nsight Compute CLI. However, no CUDA-C source, PTX or SASS is available for them. User Kernels Kernels launched by OptiX can contain user-defined code. OptiX identifies these kernels in the API Stream with a custom name. This name starts with raygen__ (for \u201cray generation\u201d). These kernels show up on the API Stream and can be profiled in the UI as well as the NVIDIA Nsight Compute CLI. The Source page displays CUDA-C source, PTX and SASS defined by the user. Certain parts of the kernel, including device functions that contain OptiX-internal code, will not be available in the Source page. SASS When SASS information is available in the profile report, certain instructions might not be available in the Source page and shown as N/A . The Acceleration Structure Viewer for OptiX traversable handles currently has the following limitations: The Acceleration Structure Viewer is not supported on MacOSX. Viewing instance acceleration structures using multi-level instancing is not supported. Applying motion traversables to acceleration structures is not supported. The following feature set is supported per OptiX API version: OptiX API Version Kernel Profiling API Interception Resource Tracking 6.x Yes No No 7.0 - 8.0 Yes Yes Yes", "keywords": []}, {"id": 198, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#platform-support", "display_name": "Platform Support", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "platform-support", "priority": -1, "content": "Host denotes the UI can run on that platform. Target means that we can instrument applications on that platform for data collection. Applications launched with instrumentation on a target system can be connected to from most host platforms. The reports collected on one system can be opened on any other system. Host Targets Windows Yes Windows*, Linux (x86_64) Windows Subsystem for Linux (WSL2) Yes Windows Subsystem for Linux (WSL2) as part of the Linux (x86_64) package. Linux (x86_64) Yes Windows*, Linux (x86_64), Linux (ppc64le), Linux (aarch64 sbsa) Linux (ppc64le) No Linux (ppc64le) Linux (aarch64 sbsa) Yes Linux (aarch64 sbsa) Linux (x86_64) (Drive SDK) Yes Windows*, Linux (x86_64), Linux (aarch64), QNX MacOSX 11+ Yes Windows*, Linux (x86_64), Linux (ppc64le) Linux (aarch64) No Linux (aarch64) QNX No QNX Target platforms marked with * do not support remote launch from the respective host. Remote launch means that the application can be launched on the target system from the host UI. Instead, the application must be launched from the target system. Profiling of 32-bit processes is not supported.", "keywords": []}, {"id": 199, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#support", "display_name": "Support", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "support", "priority": -1, "content": "Information on supported platforms and GPUs.", "keywords": []}, {"id": 200, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#system-requirements", "display_name": "System Requirements", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "system-requirements", "priority": -1, "content": "Linux and WSL On all Linux platforms, NVIDIA Nsight Compute CLI requires GLIBC version 2.15 or higher. On x86_64, the NVIDIA Nsight Compute UI requires GLIBC version 2.17 or higher. On Linux (aarch64 sbsa), the NVIDIA Nsight Compute UI requires GLIBC version 2.26 or higher. The NVIDIA Nsight Compute UI requires several packages to be installed to enable Qt. Please refer to the Qt for X11 Requirements . When executing ncu-ui with missing dependencies, an error message with information on the missing packages is shown. Note that only one package will be shown at a time, even though multiple may be missing from your system. For selected operating systems, the following commands install needed packages for NVIDIA Nsight Compute on X11: Ubuntu 18.04 apt install libopengl0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-render-util0 libxcb-xkb1 libxkbcommon-x11-0 Ubuntu 20.04 apt install libopengl0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-render-util0 libxcb-xinerama0 libxcb-xkb1 libxkbcommon-x11-0 RHEL 8.6 yum install libglvnd-opengl libxcb libxkbcommon-x11 Profiling on Windows Subsystem for Linux (WSL) is only supported with WSL version 2. Profiling is supported on Windows 10 WSL with OS build version 19044 and greater, and NVIDIA display driver version 545 or higher. It is not supported on Windows 10 WSL for systems that exceed 1 TB of system memory. Profiling is supported on Windows 11 WSL with NVIDIA display driver version 525 or higher. The Linux (x86_64) NVIDIA Nsight Compute package can be used and should be installed directly within WSL2. Remote profiling to and from WSL2 works equivalently to regular Linux (x86_64) hosts and targets, as long as it\u2019s accessible via SSH. Access to NVIDIA GPU Performance Counters must be enabled in the NVIDIA Control Panel of the Windows host. See also the CUDA on WSL User Guide . Windows Only Windows 10 and 11 are supported as host and target. The Visual Studio 2017 redistributable is not automatically installed by the NVIDIA Nsight Compute installer. The workaround is to install the x64 version of the \u2018Microsoft Visual C++ Redistributable for Visual Studio 2017\u2019 manually. The installer is linked on the main download page for Visual Studio at https://www.visualstudio.com/downloads/ or download directly from https://go.microsoft.com/fwlink/?LinkId=746572 . Notices Notices ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND SEPARATELY, \u201cMATERIALS\u201d) ARE BEING PROVIDED \u201cAS IS.\u201d NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the consequences of use of such information or for any infringement of patents or other rights of third parties that may result from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems without express written approval of NVIDIA Corporation. Trademarks NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation in the U.S. and other countries. Other company and product names may be trademarks of the respective companies with which they are associated.", "keywords": []}, {"id": 201, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2019-1", "display_name": "Updates in 2019.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2019-1", "priority": -1, "content": "General Support for CUDA 10.1 Improved performance Bug fixes Profiling on Volta GPUs now uses the same metric names as on Turing GPUs Section files support descriptions The default sections and rules directory has been renamed to sections NVIDIA Nsight Compute Added new profiling options to the options dialog Details page shows rule result icons in the section headers Section descriptions are shown in the details page and in the sections tool window Source page supports collapsing multiple source files or functions to show aggregated results Source page heatmap color scale has changed Invalid metric results are highlighted in the profiler report Loaded section and rule files can be opened from the sections tool window NVIDIA Nsight Compute CLI Support for profiling child processes on Linux and Windows x86_64 targets NVIDIA Nsight Compute CLI uses a temporary file if no output file is specified Support for new --quiet option Support for setting the GPU clock control mode using new --clock-control option Details page output shows the NVTX context when --nvtx is enabled Support for filtering kernel launches for profiling based on their NVTX context using new --nvtx-include and --nvtx-exclude options Added new --summary options for aggregating profiling results Added option --open-in-ui to open reports collected with NVIDIA Nsight Compute CLI directly in NVIDIA Nsight Compute Resolved Issues Installation directory scripts use absolute paths OpenACC kernel names are correctly demangled Profile activity report file supports a relative path Source view can resolve all applicable files at once UI font colors are improved Details page layout and label elision issues are resolved Turing metrics are properly reported on the Summary page All byte-based metrics use a factor of 1000 when scaling units to follow SI standards CSV exports properly align columns with empty entries Fixed the metric computation for double_precision_fu_utilization on GV11b Fixed incorrect \u2018selected\u2019 PC sampling counter values The SpeedOfLight section uses \u2018max\u2019 instead of \u2018avg\u2019 cycles metrics for Elapsed Cycles", "keywords": []}, {"id": 202, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2019-2", "display_name": "Updates in 2019.2", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2019-2", "priority": -1, "content": "General Improved performance Bug fixes Kernel launch context and stream are reported as metrics PC sampling configuration options are reported as metrics The default base port for connections to the target changed Section files support multiple, named Body fields NvRules allows users to query metrics using any convertible data type NVIDIA Nsight Compute Support for filtering kernel launches using their NVTX context Support for new options to select the connection port range The Profile activity supports configuring PC sampling parameters Sections on the Details page support selecting individual bodies NVIDIA Nsight Compute CLI Support for stepping to kernel launches from specific NVTX contexts Support for new --port and --max-connections options Support for new --sampling-* options to configure PC sampling parameters Section file errors are reported with --list-sections A warning is shown if some section files could not be loaded Resolved Issues Using the \u2013summary option works for reports that include invalid metrics The full process executable filename is reported for QNX targets The project system now properly stores the state of opened reports Fixed PTX syntax highlighting Fixed an issue when switching between manual and auto profiling in NVIDIA Nsight Compute The source page in NVIDIA Nsight Compute now works with results from multiple processes Charts on the NVIDIA Nsight Compute details page uses proper localization for numbers NVIDIA Nsight Compute no longer requires the system locale to be set to English", "keywords": []}, {"id": 203, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2019-3", "display_name": "Updates in 2019.3", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2019-3", "priority": -1, "content": "General Improved performance Bug fixes Kernel launch context and stream are reported as metrics PC sampling configuration options are reported as metrics The default base port for connections to the target changed Section files support multiple, named Body fields NvRules allows users to query metrics using any convertible data type NVIDIA Nsight Compute Support for filtering kernel launches using their NVTX context Support for new options to select the connection port range The Profile activity supports configuring PC sampling parameters Sections on the Details page support selecting individual bodies NVIDIA Nsight Compute CLI Support for stepping to kernel launches from specific NVTX contexts Support for new --port and --max-connections options Support for new --sampling-* options to configure PC sampling parameters Section file errors are reported with --list-sections A warning is shown if some section files could not be loaded Resolved Issues Using the \u2013summary option works for reports that include invalid metrics The full process executable filename is reported for QNX targets The project system now properly stores the state of opened reports Fixed PTX syntax highlighting Fixed an issue when switching between manual and auto profiling in NVIDIA Nsight Compute The source page in NVIDIA Nsight Compute now works with results from multiple processes Charts on the NVIDIA Nsight Compute details page uses proper localization for numbers NVIDIA Nsight Compute no longer requires the system locale to be set to English", "keywords": []}, {"id": 204, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2019-3-1", "display_name": "Updates in 2019.3.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2019-3-1", "priority": -1, "content": "NVIDIA Nsight Compute Added ability to send bug reports and suggestions for features using Send Feedback in the Help menu Resolved Issues Fixed calculation of theoretical occupancy for grids with blocks that are not a multiple of 32 threads Fixed intercepting child processes launched through Python\u2019s subprocess.Popen class Fixed issue of NVTX push/pop ranges not showing up for child threads in NVIDIA Nsight Compute CLI Fixed performance regression for metric lookups on the Source page Fixed description in rule covering the IMC stall reason Fixed cases were baseline values were not correctly calculated in the Memory tables when comparing reports of different architectures Fixed incorrect calculation of baseline values in the Executed Instruction Mix chart Fixed accessing instanced metrics in the NvRules API Fixed a bug that could cause the collection of unnecessary metrics in the Interactive Profile activity Fixed potential crash on exit of the profiled target application Switched underlying metric for SOL FB in the GPU Speed Of Light section to be driven by dram__throughput.avg.pct_of_peak_sustained_elapsed instead of fbpa__throughput.avg.pct_of_peak_sustained_elapsed", "keywords": []}, {"id": 205, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2019-4", "display_name": "Updates in 2019.4", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2019-4", "priority": -1, "content": "General Added support for the Linux PowerPC target platform Reduced the profiling overhead, especially if no source metrics are collected Reduced the overhead for non-profiled kernels Improved the deployment performance during remote launches Trying to profile on an unsupported GPU now shows an \u201cUnsupported GPU\u201d error message Added support for the %i sequential number placeholder to generate unique report file names Added support for smsp__sass_* metrics on Volta and newer GPUs The launch__occupancy_limit_shared_mem now reports the device block limit if no shared memory is used by the kernel NVIDIA Nsight Compute The Profile activity shows the command line used to launch ncu The heatmap on the Source page now shows the represented metric in its tooltip The Memory Workload Analysis Chart on the Details page now supports baselines When applying rules, a message displaying the number of new rule results is shown in the status bar The Visual Profiler Transition Guide was added to the documentation Connection dialog activity options were added to the documentation A warning dialog is shown if the application is resumed without Auto-Profile enabled Pausing the application now has immediate feedback in the toolbar controls Added a Close All command to the File menu NVIDIA Nsight Compute CLI The --query-metrics option now shows only metric base names for faster metric query. The new option --query-metrics-mode can be used to display the valid suffixes for each base metric. Added support for passing response files using the @ operator to specify command line options through a file Resolved Issues Fixed an issue that reported the wrong executable name in the Session page when attaching Fixed issues that chart labels were shown elided on the Details page Fixed an issue that caused the cache hitrates to be shown incorrectly when baselines were added Fixed an illegal memory access when collecting sass__*_histogram metrics for applications using PyTorch on Pascal GPUs Fixed an issue when attempting to collect all smsp__* metrics on Volta and newer GPUs Fixed an issue when profiling multi-context applications Fixed that profiling start/stop settings from the connection dialog weren\u2019t properly passed to the interactive profile activity Fixed that certain smsp__warp_cycles_per_issue_stall* metrics returned negative values on Pascal GPUs Fixed that metric names were truncated in the --page details non-CSV command line output Fixed that the target application could crash if a connection port was used by another application with higher privileges", "keywords": []}, {"id": 206, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2019-5", "display_name": "Updates in 2019.5", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2019-5", "priority": -1, "content": "General Added section sets to reduce the default overhead and make it easier to configure metric sets for profiling Reduced the size of the installation Added support for CUDA Graphs Recapture API The NvRules API now supports accessing correlation IDs for instanced metrics Added breakdown tables for SOL SM and SOL Memory in the Speed Of Light section for Volta+ GPUs NVIDIA Nsight Compute Added a snap-select feature to the Source page heatmap help navigate large files Added support for loading remote CUDA-C source files via SSH on demand for Linux x86_64 targets Charts on the Details page provide better help in tool tips when hovering metric names Improved the performance of the Source page when scrolling or collapsing The charts for Warp States and Compute pipelines are now sorted by value NVIDIA Nsight Compute CLI Added support for GPU cache control, see --cache-control Added support for setting the kernel name base in command line output, see --kernel-base Added support for listing the available names for --chips , see --list-chips Improved the stability on Windows when using --target-processes all Reduced the profiling overhead for small metric sets in applications with many kernels Resolved Issues Reduced the overhead caused by demangling kernel names multiple times Fixed an issue that kernel names were not demangled in CUDA Graph Nodes resources window The connection dialog better disables unsupported combinations or warns of invalid entries Fixed metric thread_inst_executed_true to derive from smsp_not_predicated_off_thread_inst_executed on Volta+ GPUs Fixed an issue with computing the theoretical occupancy on GV100 Selecting an entry on the Source page heatmap no longer selects the respective source line, to avoid losing the current selection Fixed the current view indicator of the Source page heatmap to be line-accurate Fixed an issue when comparing metrics from Pascal and later architectures on the Summary page Fixed an issue that metrics representing constant values on Volta+ couldn\u2019t be collected without non-constant metrics", "keywords": []}, {"id": 207, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2019-5-1", "display_name": "Updates in 2019.5.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2019-5-1", "priority": -1, "content": "General Added support for Nsight Compute Visual Studio Integration", "keywords": []}, {"id": 208, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2019-5-2", "display_name": "Updates in 2019.5.2", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2019-5-2", "priority": -1, "content": "General Bug fixes", "keywords": []}, {"id": 209, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2019-5-3", "display_name": "Updates in 2019.5.3", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2019-5-3", "priority": -1, "content": "General More C++ kernel names can be properly demangled", "keywords": []}, {"id": 210, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2020-1", "display_name": "Updates in 2020.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2020-1", "priority": -1, "content": "General Added support for the NVIDIA GA100/SM 8.x GPU architecture Removed support for the Pascal SM 6.x GPU architecture Windows 7 is not a supported host or target platform anymore Added a rule for reporting uncoalesced memory accesses as part of the Source Counters section Added support for report name placeholders %p, %q, %i and %h The Kernel Profiling Guide was added to the documentation NVIDIA Nsight Compute The UI command was renamed from nv-nsight-cu to ncu-ui . Old names remain for backwards compatibility. Added support for roofline analysis charts Added linked hot spot tables in section bodies to indicate performance problems in the source code Added section navigation links in rule results to quickly jump to the referenced section Added a new option to select how kernel names are shown in the UI Added new memory tables for the L1/TEX cache and the L2 cache. The old tables are still available for backwards compatibility and moved to a new section containing deprecated UI elements. Memory tables now show the metric name as a tooltip Source resolution now takes into account file properties when selecting a file from disk Results in the profile report can now be filtered by NVTX range The Source page now supports collapsing views even for single files The UI shows profiler error messages as dismissible banners for increased visibility Improved the baseline name control in the profiler report header NVIDIA Nsight Compute CLI The CLI command was renamed from nv-nsight-cu-cli to ncu . Old names remain for backwards compatibility. Queried metrics on GV100 and newer chips are sorted alphabetically Multiple instances of NVIDIA Nsight Compute CLI can now run concurrently on the same system, e.g. for profiling individual MPI ranks. Profiled kernels are serialized across all processes using a system-wide file lock. Resolved Issues More C++ kernel names can be properly demangled Fixed a free(): invalid pointer error when profiling applications using pytorch &gt; 19.07 Fixed profiling IBM Spectrum MPI applications that require PAMI GPU hooks ( --smpiargs=&quot;-gpu&quot; ) Fixed that the first kernel instruction was missed when computing sass__inst_executed_per_opcode Reduced surplus DRAM write traffic created from flushing caches during kernel replay The Compute Workload Analysis section shows the IMMA pipeline on GV11b GPUs Profile reports now scroll properly on MacOS when using a trackpad Relative output filenames for the Profile activity now use the document directory, instead of the current working directory Fixed path expansion of ~ on Windows Memory access information is now shown properly for RED assembly instructions on the Source page Fixed that user PYTHONHOME and PYTHONPATH environment variables would be picked up by NVIDIA Nsight Compute, resulting in locale encoding issues.", "keywords": []}, {"id": 211, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2020-1-1", "display_name": "Updates in 2020.1.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2020-1-1", "priority": -1, "content": "General Added support for the NVIDIA GA100/SM 8.x GPU architecture Metrics passed to --metrics on the NVIDIA Nsight Compute CLI or in the respective Profile activity option are automatically expanded to all first-level sub-metrics if required. See the documentation on --metrics for more details. Added new rules for detecting inefficiencies of using the sparse data compression on the NVIDIA Ampere architecture. The version of the NVIDIA Nsight Compute target collecting the results is shown in the Session page. Added new launch__grid_dim_[x,y,z] and launch__block_dim_[x,y,z] metrics. NVIDIA Nsight Compute The Break on API Error functionality has been improved when auto profiling. NVIDIA Nsight Compute CLI The full path to the report output file is printed after profiling. Added and corrected metrics in the nvprof Metric Comparison table. Resolved Issues Documented the breakdown: metrics prefix. Fixed handling of escaped domain delimiters in NVTX filter expressions. Fixed issues with the occupancy charts for small block sizes. Fixed an issue when choosing a default report page in the options dialog. Fixed that the scroll bar could overlap the content when exporting the report page as an image.", "keywords": []}, {"id": 212, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2020-1-2", "display_name": "Updates in 2020.1.2", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2020-1-2", "priority": -1, "content": "General The NVIDIA Nsight Compute installer for Mac is now code-signed and notarized. Disabled the creation of the Python cache when executing rules to avoid permission issues and signing conflicts. Resolved Issues Fixed the launcher script of the NVIDIA Nsight Compute CLI to no longer fail if uname -p is not available. Fixed the API parameter capture for function cuDeviceGetLuid .", "keywords": []}, {"id": 213, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2020-2", "display_name": "Updates in 2020.2", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2020-2", "priority": -1, "content": "General Added support for the NVIDIA Ampere GPUs with compute capability 8.6 and CUDA toolkit 11.1. Added support for application replay to collect metric results across multiple application runs, instead of replaying individual kernels. Added new launch__device_id metric. Added support for NVLink ( nvl* ) metrics for GPUs with compute capabilities 7.0, 7.5 and 8.0 Added documentation for memory charts and tables in the Profiling Guide . NVIDIA Nsight Compute Updated menu and toolbar layout. Added support for zoom and pan on roofline charts. The Resources tool window shows the current CUDA stream attributes. The memory chart shows a heatmap for link and port utilization. The hot-spot tables in the Source Counters section now show values as percentages, too. On-demand resolve of remote CUDA-C source is now available for MacOS hosts. Metric columns in the Summary and Raw pages are now sortable. Added a new option to set the number of recent API calls shown in the API Stream tool window. NVIDIA Nsight Compute CLI CLI output now shows NVTX payload information. CSV output now shows NVTX states. Added a new --replay-mode option to select the mechanism used for replaying a kernel launch multiple times. Added a new --kill option to terminate the application once all requested kernels were profiled. Added a new --log-file option to decide the output stream for printing tool output. Added a new --check-exit-code option to decide if the child application exit code should be checked. Resolved Issues The profiling progress dialog is not dismissed automatically anymore after an error. The inter-process lock is now automatically given write permissions for all users. All project extensions are enabled in the default dialog filter. Fixed handling of targets using tcsh during remote profiling. Fixed handling of quoted application arguments on Windows.", "keywords": []}, {"id": 214, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2020-2-1", "display_name": "Updates in 2020.2.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2020-2-1", "priority": -1, "content": "Resolved Issues Fixed several issues related to auto-profiling in the UI. Fixed a metric collection issue when profiling kernels on different GPU architectures with application replay. Fixed a performance problem related to profiling large process trees. Fixed that occupancy charts would not render correctly when comparing against baselines. Fixed that no memory metrics were shown on the Source page for LDGSTS instructions. Fixed the automatic sorting on the Summary and Raw pages. Fixed an issue that would cause the NVIDIA Nsight Compute CLI to consume too much memory when importing or printing reports. Long kernel names are now elided in the Details page source hot spot tables. Fixed that function names in the Resources tool window were demangled differently.", "keywords": []}, {"id": 215, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2020-3", "display_name": "Updates in 2020.3", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2020-3", "priority": -1, "content": "General Added support for derived metrics in section files. Derived metrics can be used to create new metrics based on existing metrics and constants. See the Customization Guide for details. Added a new Import Source ( --import-source ) option to the UI and command line to permanently import source files into the report, when available. Added a new section that shows selected NVLink metrics on supported systems. Added a new launch__func_cache_config metric to the Launch Statistics section. Added new branch efficiency metrics to the Source Counters section, including smsp__sass_average_branch_targets_threads_uniform.pct to replace nvprof\u2019s branch_efficiency , as well as instruction-level metrics smsp__branch_targets_threads_divergent , smsp__branch_targets_threads_uniform and branch_inst_executed . A warning is shown if kernel replay starts staging GPU memory to CPU memory or the file system. Section and rule files are deployed to a versioned directory in the user\u2019s home directory to allow easier editing of those files, and to prevent modifying the base installation. Removed support for NVLINK( nvl* ) metrics due to a potential application hang during data collection. The metrics will be added back in a future version of the driver/tool. NVIDIA Nsight Compute Added support for Profile Series . Series allow you to profile a kernel with a range of configurable parameters to analyze the performance of each combination. Added a new Allocations view to the Resources tool window which shows the state of all current memory allocations. Added a new Memory Pools view to the Resources tool window which shows the state of all current memory pools. Added coverage of peer memory to the Memory Chart . The Source page now shows the number of excessive sectors requested from L1 or L2, e.g. due to uncoalesced memory accesses. The Source column on the Source page can now be scrolled horizontally. The kernel duration gpu__time_duration.sum was added as column on the Summary page. Improved the performance of application replay when not all kernels in the application are profiled. NVIDIA Nsight Compute CLI Added a new --app-replay-match option to select the mechanism used for matching kernel instances across application replay passes. An error is shown if --nvtx-include/exclude are used without --nvtx . Resolved Issues The Grid Size column on the Raw page now shows the CUDA grid size like the Launch Statistics section, rather than the combined grid and block sizes. The Branch Resolving wap stall reason was added to the PC sampling metric groups and the Warp State Statistics section. The API Stream tool window shows kernel names according to the selected Function Name Mode. Fixed that an incorrect line could be shown after a heatmap selection on the Source page. Fixed incorrect metric usage for system memory in the Memory Chart . Previously, all requested memory of L2 from system memory was reported instead of only the portion that missed in L2.", "keywords": []}, {"id": 216, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2020-3-1", "display_name": "Updates in 2020.3.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2020-3-1", "priority": -1, "content": "General Added support for LDSM instruction-level metrics. NVIDIA Nsight Compute LDSM instruction-level metrics are shown in the Source page and memory tables. Improved reporting and documentation for collecting Profile Series . Frozen columns in the Source page are automatically scrolled into view. Resolved Issues Fixed an issue when profiling multi-threaded applications. Fixed an issue that NVIDIA Nsight Compute would not automatically restart when using Reset Application Data . Fixed issues with target applications using libstdc++. Fixed an issue when collecting single-pass metrics in multiple Nsight Compute instances. Fixed an issue when using Kernel ID and setting Launch Capture Count as non-zero in the UI\u2019s Profile activity. Fixed an issue that prevented different users on the same Linux system to use NVIDIA Nsight Compute in shared instance mode. Fixed an issue that prevented resources from being properly renamed using NVTX information in the UI.", "keywords": []}, {"id": 217, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-1", "display_name": "Updates in 2021.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-1", "priority": -1, "content": "General Added support for the CUDA toolkit 11.3. Added support for the OptiX 7 API . GpuArch enumeration values used for filtering in section files were renamed from architecture names to compute capabilities. NVTX states can now be accessed via the NvRules API . Added a rule for the Occupancy section. NVIDIA Nsight Compute Added support for new CUDA asynchronous allocator attributes in the Memory Pools resources view. Added a topology chart and link properties table in the NVLink section. The selected metric column is scrolled into view on the Source page when a new metric is selected. Users can choose the Source heatmap color scale in the Options dialog. NVIDIA Nsight Compute CLI Added file-based application replay as the new default application replay mode. File-based replay uses a temporary file for keeping replay data, instead of allocating them in memory. This keeps the required memory footprint close to constant, independent of the number of profiled kernels. Users can switch between buffer modes using the --app-replay-buffer option. CLI output now shows NVTX color and message information. --kernel-regex and --kernel-regex-base&gt; options are deprecated and replaced by --kernel-name and --kernel-regex-base , respectively. All options which support regex need to provide regex: as a prefix before an argument to match per the regex, e.g &lt;option&gt; &lt;regex:expression&gt; Resolved Issues Fixed that baselines were not updated properly on the Comments page. Fixed that NVTX ranges named using their payloads can be used in NVTX filtering expressions. Fixed crashes in MacOSX hosts when terminating the target application. The NVLINK( nvl* ) metrics are now added back.", "keywords": []}, {"id": 218, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-1-1", "display_name": "Updates in 2021.1.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-1-1", "priority": -1, "content": "General Updated OpenSSL library to version 1.1.1k. NVIDIA Nsight Compute Remote source resolution can now use the IP address, in addition to the hostname, to find the necessary SSH target. NVIDIA Nsight Compute CLI Added support for the existing command line options for kernel filtering while importing data from an existing report file using --import . Option -k is not considered as deprecated option --kernel-regex anymore. Resolved Issues Fixed failure to profile kernels from applications that use the CUDA graphics interop APIs to share semaphores. Fixed wavefront metric in the L1TEX table for writes to shared memory on GA10x chips. Fixed an issue resulting in incomplete data collection for the interactive profile activity after switching from single-pass mode to collecting multiple passes in the same session. Fixed values shown in the mimimap of the Source page when all functions are collapsed. Fixed an issue causing names set by the NVTX naming APIs of one application to be applied to all subsequent sessions of the same instance of NVIDIA Nsight Compute. Fixed behavior of horizontal scroll bars when clicking in the source views on the Source page. Fixed appearance of multi-line entries in column chooser on the Source page. Fixed enablement state of the reset button on the Connection dialog. Fixed potential crash of NVIDIA Nsight Compute when windows size becomes small while being on the Source page. Fixed potential crash of NVIDIA Nsight Compute when relative paths for section/rules files could not be found. Fixed potential crash of NVIDIA Nsight Compute after removing baselines.", "keywords": []}, {"id": 219, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-2", "display_name": "Updates in 2021.2", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-2", "priority": -1, "content": "General Added support for the CUDA toolkit 11.4. Added support for OptiX version 7.3. Added support for profiling on NVIDIA virtual GPUs (vGPUs) on an upcoming GRID/vGPU release. Added a new Python-based report interface for interacting with report files from Python scripts. Added a new rule to warn users when sampling metrics were selected, but no sampling data was collected. Renamed SOL to Throughput in the Speed of Light section. Renamed several memory_* metrics used on the Source page, to better reflect the measured value. See the Source page documentation for more details. NVIDIA Nsight Compute Added support for opening cubin files in a Standalone Source Viewer without profiling the application. Moved the output of all rules so that it is visible even if a section\u2019s body is collapsed. Visibility of the rules\u2019 output can be toggled by a new button in the report header. The profiler report header now shows the report name for each baseline when ambiguous. Rules can define Focused Metrics that were most important for triggering their result output. Metrics are provided per result message which additional information, such as the underlying conditions and thresholds. Memory tables show tooltips for cells with derived metric calculations. Added a knowledge base service to show more comprehensive background information on metric names and descriptions in their tooltips. Following a link in the Source Counters hot spot tables automatically selects the corresponding metric in the Source page. Added new columns for visualizing register dependencies in the SASS view of the Source page . Functions in the SASS view are now sorted by name. Added support for OptiX 7.x resource tracking in the interactive profile activity. The Resources tool window will show information on instantiated optixDeviceContexts, optixModules, optixProgramGroups, optixPipelines and optixDenoiser objects. Added support for new CUDA graph memory allocation APIs. Improved consistency between command line parameters and the Next Trigger filter in the API Stream window for handling of regex inputs. The Next Trigger filter now considers kernel/API name as a regular expression only if string has regex: as prefix. Added ability to select font settings in the options dialog. Added ability to configure the metrics shown on the summary page via the options dialog. The selected heatmap color scale now also applies to the Memory chart . The ncu-ui script now checks for missing library dependencies, such as OpenGL or Qt . NVIDIA Nsight Compute CLI Added environment variable NV_COMPUTE_PROFILER_DISABLE_STOCK_FILE_DEPLOYMENT=1 to skip deployment of section and rule files. Resolved Issues Fixed a performance issue in the NVIDIA Nsight Compute CLI when using --page raw --csv --units auto . Fixed that the SSH passphase key is no longer persisted in the project file. Fixed state of restore button in connection dialog. The button now supports restoring the default settings, if current setting differ from the default. Fixed that the complete GPU name can be shown in the NVLINK topology diagram on MacOS. Fixed that collapsing the Source view reset the selected metrics. Fixed that correlated lines could differ between filtered and unfiltered views of the executed functions. Fixed that two application icons were shown in the MacOS dock. Improved HiDPI awareness.", "keywords": []}, {"id": 220, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-2-1", "display_name": "Updates in 2021.2.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-2-1", "priority": -1, "content": "General Reduced the memory overhead when loading reports in the Python Report Interface . Resolved Issues Fixed that links in the Memory Allocations Resource view were not working correctly. Fixed that NVTX state might not be correctly reset between interactive profiling activities. Fixed that the UI could crash when opening baselines from different GPU architectures.", "keywords": []}, {"id": 221, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-2-2", "display_name": "Updates in 2021.2.2", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-2-2", "priority": -1, "content": "General Changes for profiling support on NVIDIA virtual GPUs (vGPUs) for an upcoming GRID/vGPU release. Resolved Issues Fixed hang issue on QNX when using the --target-processes all option while profiling shell scripts.", "keywords": []}, {"id": 222, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-2-3", "display_name": "Updates in 2021.2.3", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-2-3", "priority": -1, "content": "General Added support for the NVIDIA GA10b chip. Resolved Issues Improved error message on QNX for failure to deploy stock section and rules files.", "keywords": []}, {"id": 223, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-2-4", "display_name": "Updates in 2021.2.4", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-2-4", "priority": -1, "content": "Resolved Issues Fixed an issue that prevented remote interactive profiling of kernels on NVIDIA GA10b chips.", "keywords": []}, {"id": 224, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-2-5", "display_name": "Updates in 2021.2.5", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-2-5", "priority": -1, "content": "Resolved Issues Improve the handling of the performance monitor reservation on mobile target GPUs.", "keywords": []}, {"id": 225, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-2-6", "display_name": "Updates in 2021.2.6", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-2-6", "priority": -1, "content": "Resolved Issues Fixed an issue causing a hang on QNX after pressing ctrl+c while profiling a multi-process application.", "keywords": []}, {"id": 226, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-2-7", "display_name": "Updates in 2021.2.7", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-2-7", "priority": -1, "content": "General Enabled stack canaries with random canary values for L4T builds.", "keywords": []}, {"id": 227, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-2-8", "display_name": "Updates in 2021.2.8", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-2-8", "priority": -1, "content": "General Updated Python libraries to version 3.10.5.", "keywords": []}, {"id": 228, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-2-9", "display_name": "Updates in 2021.2.9", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-2-9", "priority": -1, "content": "NVIDIA Nsight Compute Clarify when not all metrics for the roofline chart could be collected on the current chip.", "keywords": []}, {"id": 229, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-3", "display_name": "Updates in 2021.3", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-3", "priority": -1, "content": "General Added support for the CUDA toolkit 11.5. Added a new rule for detecting inefficient memory access patterns in the L1TEX cache and L2 cache. Added a new rule for detecting high usage of system or peer memory. Added new IAction::sass_by_pc function to the the NvRules API . The Python-based report interface is now available for Windows and MacOS hosts, too. Added Hierarchical Roofline section files in a new \u201croofline\u201d section set. Added support for collecting CPU call stack information. NVIDIA Nsight Compute Added support for new remote profiling SSH connection and authentication options as well as local SSH configuration files. Added an Occupancy Calculator which can be opened directly from a profile report or as a new activity. It offers feature parity to the CUDA Occupancy Calculator spreadsheet . Added new Baselines tool window to manage (hide, update, re-order, save/load) baseline selections. The Source page views now support multi-line/cell selection and copy/paste. Different colors are used for highlighting selections and correlated lines. The search edit on the Source page now supports Shift+Enter to search in reverse direction. The Memory Workload Analysis Chart can be configured to show throughput values instead of transferred bytes. The Profile activity now supports the --devices option. The NVLink Topology diagram displays per NVLink metrics. Added a new tool window showing the CPU call stack at the location where the current thread was suspended during interactive profiling activities. If enabled, the Call Stack / NVTX page of the profile report shows the captured CPU call stack for the selected kernel launch. NVIDIA Nsight Compute CLI Added support for printing source/metric content with the new --page source and --print-source  command line options . Added new option --call-stack to enable collecting the CPU call stack for every profiled kernel launch. Resolved Issues Fixed that memory_* metrics could not be collected with the --metrics option. Fixed that selection and copy/paste was not supported for section header tables on the Details page. Fixed issues with the Source page when collapsing the content. Fixed that the UI could crash when applying rules to a new profile result. Fixed that PC Sampling metrics were not available for Profile Series . Fixed that local profiling did not work if no non-loopback address was configured for the system. Fixed termination of remote-launched applications. On QNX, terminating an application profiled via Remote Launch is now supported. Canceling remote-launched Profile activities is now supported.", "keywords": []}, {"id": 230, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2021-3-1", "display_name": "Updates in 2021.3.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2021-3-1", "priority": -1, "content": "Resolved Issues Fixed that kernels with the same name and launch configuration were in some scenarios associated with the wrong profiling results during application replay. Fixed an issue with binary forward compatibility of the report format. Fixed an issue with applications calling into the CUDA API during process teardown. Fixed an issue profiling application using pre-CUDA API 3.1 contexts. Fixed a crash when resolving files on the Source page. Fixed that opening reports with large embedded CUBINs would hang the UI. Fixed an issue with remote profiling on a target where the UI is already launched.", "keywords": []}, {"id": 231, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2022-1", "display_name": "Updates in 2022.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2022-1", "priority": -1, "content": "General Added support for the CUDA toolkit 11.6. Added support for GA103 chips. Added a new Range Replay mode to profile ranges of multiple, concurrent kernels. Range replay is available in the NVIDIA Nsight Compute CLI and the non-interactive Profile activity. Added a new rule to detect non-fused floating-point instructions. The Uncoalesced Memory access rules now show results in a dynamic table. Unix Domain Sockets and Windows Named Pipes are used for local connection between the host and target processes on x86_64 Linux and Windows, respectively. The NvRules API now supports querying action names using different function name bases (e.g. demangled). NVIDIA Nsight Compute The default report page is now chosen automatically when opening a report. Added coverage for ECC (Error Correction Code) operations in the L2 Cache table of the Memory Analysis section. Added a new L2 Evict Policies table to the Memory Analysis section. The Occupancy Calculator now updates automatically when the input changes. Added new metric Thread Instructions Executed to the Source page. Added tooltips to the Register Dependency columns in the Source page to identify the associated register more conveniently. Improved the selection of Sections and Sets in the Profile activity connection dialog. NVLink utilization is shown in the NVLink Tables section. NVLink links are colored according to the measured throughput. NVIDIA Nsight Compute CLI --kernel-regex and --kernel-regex-base options are no longer supported. Alternate options are --kernel-name and --kernel-name-base respectively, added in 2021.1.0. Added support to resolve CUDA source files in the --page source output with the new --resolve-source-file  command line option . Added new option --target-processes-filter to filter the processes being profiled by name. The CPU Stack Trace is shown in the NVIDIA Nsight Compute CLI output. Resolved Issues Fixed the calculation of aggregated average instruction execution metrics in non-SASS views on the Source page. Fixed that atomic instructions are counted as both loads and stores in the Memory Analysis tables.", "keywords": []}, {"id": 232, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2022-1-1", "display_name": "Updates in 2022.1.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2022-1-1", "priority": -1, "content": "General Filtering kernel launches or profile results based on NVTX domains/ranges now takes registered strings in the payload field into account, if the range name is empty. Added support for the suffix .max_rate for ratio metrics. Resolved Issues Fixed a crash during the disassembly of the kernel\u2019s SASS code for the Source page. Fixed a crash on exit of the NVIDIA Nsight Compute UI. Fixed a hang during profiling when CPU call stack collection is enabled. Fixed missing to flush UVM buffers before taking memory checkpoints during Range Replay . Fixed tracking of memory during Range Replay , if the CUDA context has any device mapped memory allocations. Fixed the maximum available shared memory sizes in the Occupancy Calculator for NVIDIA Ampere GPUs. Fixed that the shared memory usage of the kernel is incorrectly initialized when opening the Occupancy Calculator from a profile report.", "keywords": []}, {"id": 233, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2022-2", "display_name": "Updates in 2022.2", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2022-2", "priority": -1, "content": "General Added support for the CUDA toolkit 11.7. Improved performance for profiling and metric query. Added Linux (aarch64 sbsa) as a supported host platform . The NVIDIA Nsight Compute CLI stores the command line arguments, which can be viewed in the Session report page. Added an API to query the version of the Python Report and NvRules interfaces. Added an API to query the PTX in the Python Report and NvRules interfaces. NVIDIA Nsight Compute The Acceleration Structure Viewer allows inspection of acceleration structures built using the OptiX API for debugging and performance optimization. The Source page column chooser now supports to enable or disable groups of metrics. Note that not all metrics are enabled anymore by default to make the view easier to use. The Resources tool window now links to the exact target resource instances for CUDA resource types. The Resources tool window now shows the instanced nodes for CUDA graphs. The Resources tool window now shows the loading state and number of loaded functions for CUDA Modules . The Resources tool window now shows the graph node enablement state for applicable instanced graph nodes. The Resources tool window now shows the graph node priorities for instanced kernel graph nodes. Added regex support in the Next Trigger filter for NVTX based filtering. The Next Trigger filter now considers the NVTX config as a regular expression if the regex: prefix is specified. Added regex support in the report\u2019s Filter Results dialog. Added keyboard shortcuts to navigate between the pages in a report. The behavior for selecting sets and sections is now consistent between the Sections/Rules Info window and the non-interactive profile activity . Reports can now be opened directly from the welcome dialog. NVIDIA Nsight Compute CLI Added support for collecting sampling-based warp stalls in range replay mode. Added regex support in NVTX filtering . The metric type is shown when querying metrics. Resolved Issues Reduced overhead of connecting to the host UI for non-interactive remote profiling sessions. Fixed issues with persisting the Source page state when collapsing or switching between results. Fixed an issue that locked GPU clocks were not reset when terminating the NVIDIA Nsight Compute CLI while profiling a kernel. Fixed issues with selecting and copying text from the Details page tables. Fixed an issue with opening report files in the UI on MacOSX. Fixed an issue with the Freeze API option.", "keywords": []}, {"id": 234, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2022-2-1", "display_name": "Updates in 2022.2.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2022-2-1", "priority": -1, "content": "Resolved Issues Fixed an issue that caused some tootips to not show up for the charts on the Details page. Fixed the incorrect reporting of the accessed bytes for LDGSTS (access) traffic in the L1TEX memory table. Fixed an issue that resulted in an empty view on the Source page after resolving multiple source files at once. Fixed a failure to connect to remote machines over SSH due to a mismatch in the configuration of data compression. Fixed a potential failure to profile kernels on multiple devices of the same type of chip. The failure occurred on the attempt to profile on the second device.", "keywords": []}, {"id": 235, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2022-3", "display_name": "Updates in 2022.3", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2022-3", "priority": -1, "content": "General Added support for the CUDA toolkit 11.8. Added support for the Ada GPU architecture. Added support for the Hopper GPU architecture. Added support for OptiX 7.6 . Added uncoalescedGlobalAccesses sample CUDA application and document to show how the NVIDIA Nsight Compute profiler can be used to analyze and identify the memory accesses which are uncoalesced and result in inefficient DRAM accesses. Refer to the README, sample code and document under extras/samples/uncoalescedGlobalAccesses . Added Metrics Reference in the documentation that lists metrics not available through --query-metrics . Reduced the overhead of collecting SASS-patching based metrics. On Multi-Instance GPU (MIG) configurations, NVIDIA Nsight Compute cannot lock clocks anymore. Users are expected to lock clocks externally using nvidia-smi. NVIDIA Nsight Compute Wrapper script nv-nsight-cu is deprecated in favor of ncu-ui and will be removed in a future release. Source page supports range replay results. Added a second chart on the Compute Workload Analysis section to avoid mixing metrics with different meaning. NVIDIA Nsight Compute now tracks traversable handles created with optixAccelRelocate . NVIDIA Nsight Compute now tracks traversable handles created as updates from others. The Acceleration Structure viewer now reports unsupported inputs. The Acceleration Structure viewer now supports opening multiple traversable handles. The Acceleration Structure viewer now uses OptiX naming for displayed elements. NVIDIA Nsight Compute CLI Wrapper script nv-nsight-cu-cli is deprecated in favor of ncu and will be removed in a future release. Added new option --filter-mode per-gpu to enable filtering of kernel launches on each GPU separately. Added new option --app-replay-mode relaxed to produce profiling results for valid kernels even if the number of kernel launches is inconsistent across application replay passes. Added a documentation section on supported environment variables . Improved the performance when loading existing reports on the command line. Resolved Issues Fixed an issue when resolving files on the Source page. Fixed an issue when profiling OptiX applications. Fixed an issue in the OptiX traversable handle management caused by clashing handle values. Fixed an issue in the Acceleration Structure viewer causing the display of invalid memory when viewing AABB buffers.", "keywords": []}, {"id": 236, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2022-4", "display_name": "Updates in 2022.4", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2022-4", "priority": -1, "content": "General Added support for the CUDA toolkit 12.0. Added support for profiling CUDA graphs as complete workloads instead of as single kernel nodes. Enable this using the Graph Profiling option in the activities . Similarly to range replay results, selected metrics are not available when profiling graphs. Added support for profiling on Windows Subsystem for Linux (WSL2). See the System Requirements for more details. Deprecated nv-nsight-cu and nv-nsight-cu-cli aliases are removed in favor of ncu-ui and ncu . NVIDIA Nsight Compute The Source page now loads disassembly and static analysis results asynchronously in the background. Added a new Metric Details tool window to inspect metric information such as raw value, unit, description or instance values. Open the tool window and select a metric on the Details or Raw page or lookup any metric in the focused report directly in the tool window\u2019s search bar. In the Source page PTX view, the source name will be shown as a list of comma-separated files. Added flexibility with NVTX based filtering in the Next Trigger filter, similar to the command line. Filters can now use nvtx-include and nvtx-exclude expressions by adding the nvtx-include: or nvtx-exclude: prefix. NVTX views now show the payload type. Simplified the command line generated by the Profile activity . Reduced the number of steps required to re-run the Profile activity . The way to rename Baselines in-place has been improved. The Resources tool window now shows the CUDA Dynamic Parallelism state for CUDA functions and modules. OptiX traversable handles can now be exported as Graphviz DOT or SVG files for visualization from the Resources tool window. All OptiX build, instance and geometry flags can be viewed in the Acceleration Structure Viewer . Added OptiX-specific highlight filters to the Acceleration Structure Viewer. Added support for user-specified index strides to the Acceleration Structure Viewer. NVIDIA Nsight Compute CLI Added new option --graph-profiling graph to enable profiling of complete CUDA graphs as single workloads. Added new option --filter-mode per-launch-config to enable filtering of kernel launches for each GPU launch parameter separately. Added support to print section body item metrics on the details page with the new --print-details  command line option . Added support to select what to show in Metric Name column on the details page with the new --print-metric-name  command line option . Removed deprecated options: --units , --fp , --summary and --kernel-base Added support to print launch, session, process and device attributes on session page with the new --page session option. Added --kill yes support for application replay mode. Resolved Issues Fixed an issue that NVIDIA Nsight Compute could crash when continuing profiling after transposing the Raw page table. Fixed an issue that caused closing a report document to be delayed by pending source analysis. Fixed support for profiling applications with older OptiX versions. Fixed display of OptiX module inputs for IR and built-in modules.", "keywords": []}, {"id": 237, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2022-4-1", "display_name": "Updates in 2022.4.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2022-4-1", "priority": -1, "content": "General Improved the documentation for the NvRules API. The python report interface links libstdc++ statically. Resolved Issues Fixed an issue that enabled profiling on CUDA Graph uploads. Fixed formatting issues during unit conversion of metric instances. Fixed an issue that could lead to a crash during application replay. Fixed an issue that could lead to a crash in the python report interface. Fixed typos in the metrics reference documentation and descriptions.", "keywords": []}, {"id": 238, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2023-1", "display_name": "Updates in 2023.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2023-1", "priority": -1, "content": "General Added support for the CUDA toolkit 12.1. Added a new app-range replay mode to profile ranges without API capture by relaunching the entire application multiple times. Added sharedBankConflicts sample CUDA application and document to show how NVIDIA Nsight Compute can be used to analyze and identify the shared memory bank conflicts which result in inefficient shared memory accesses. Refer to the README.TXT file, sample code and document under extras/samples/sharedBankConflicts . Jupyter notebook samples are available in the Nsight training github repository . The equivalent of the high-level Python report interface is now available in rule files. NVIDIA Nsight Compute Added support for profiling individual metrics in Interactive Profile activity . A new input field for metrics was added in the Metric Selection tool window. Files on remote systems can be opened directly from the menu . Metric- and section-related entries in the menu, Profile activity and Metric Selection tool window were renamed to make them more clear. CPU and GPU NUMA topology metrics can be collected on applicable systems. Topology information is shown in a new NUMA Affinity section . Added content-aware suggestions to the Details page to provide suggestions based on the selected profiling options. Added support for re-resolving source files on the Source page. Not-issued warp stall reasons are removed from the Source Counters section tables and hidden by default on the Source page. Users should focus on regular warp stall reasons by default and only inspect not-issued samples if this distinction is needed. Added support to search missing CUDA source files to permanently import into the report using Source Lookup options in the Interactive Profile activity . The source page now shows metric values as percentages by default. New buttons are added to support switching between different value modes. NVIDIA Nsight Compute CLI Added support for config files in the current working or user directory to set default ncu parameters. See the General options for more details. Added --range-filter  command line option which allows to select subset of enabled profile ranges. Added new --source-folders  command line option that allows to recursively search for missing CUDA source files to permanently import into the report. Resolved Issues Fixed performance issues on the Summary and Raw pages for large reports. Improved support for non-ASCII characters in filenames. Fixed an issue with delayed updates of assembly analysis information on the Source page\u2019s Source and PTX views. Fixed potential crashes when using the Python report interface.", "keywords": []}, {"id": 239, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2023-1-1", "display_name": "Updates in 2023.1.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2023-1-1", "priority": -1, "content": "NVIDIA Nsight Compute Added new configuration options to set the default view mode and precision for the Source page. Resolved Issues Added support for the DT_RUNPATH attribute when intercepting calls to dlopen . Fixed issue for applications or libraries relying on DT_RUNPATH not finding all dynamic libraries when launched by NVIDIA Nsight Compute. Improved interaction between custom additional metrics and the selected metric set. Adding custom metrics no longer forces switching to the custom metric set. Added ability to gracefully skip folders with insufficient access permissions while importing source code. Fixed the calculation of the peak values for the L1 and L2 cache bandwidths in the hierarchical roofline charts. Fixed issue that prevented modules loaded with function optixModuleCreateFromPTX showing up in the Optix: Modules table of the Resources tool window. Fixed handling of deprecated functions when querying function pointers from the OptiX interception library. Fixed that sometimes sections or rules couldn\u2019t be easily selected in the tool window. Fixed issue with Reset Application Data that prevented some setting from correctly resetting. Fixed potential crash of NVIDIA Nsight Compute when Reset Application Data was executed multiple times in a row. Fixed a crash when saving or loading baselines for non-kernel results. Fixed that memory written while executing a CUDA graph was not properly restored in single-pass graph profiling. Fixed potential memory leak while collecting SW counters for modules with unpatched kernel functions.", "keywords": []}, {"id": 240, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2023-2", "display_name": "Updates in 2023.2", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2023-2", "priority": -1, "content": "General Extended the rules system to show estimates of the potential speedup that can be achieved by addressing the corresponding performance bottleneck. These speedups allow prioritizing applicable rules and help focusing first on optimization strategies with the highest potential performance gain. Added support for rules to highlight individual source lines. Lines with global/local memory access with high excessive sector counts and shared accesses with many bank conflicts are automatically detected and highlighted. Added the ability to query metric attributes in NvRules API. Added support for creating instanced metrics through the NvRules API. For Orin+ mobile chips on the Linux aarch64 platform, added metrics ( mcc__* ) support for memory controller channel (MC Channel) unit which connects to the DRAM. NVIDIA Nsight Compute Added hyperlinks to the SASS View of the Source Page for instructions that reference others by address or offset. This enables to quickly jump to the target instruction of a branch. Improved the search bar in the Metric Details tool window. The search string now matches any part of the metric names, and the matching results are shown in a sorted order. Added a visual indication of scale of the metric value changes when the baselines are used. The background bars in the table cells of the Details Page allow to quickly identify which metrics values increased or decreased the most. The color scheme can be configured in the Baselines tool window . Added support for rules toggle button on the Summary Page. Allows to hide the bottom pane with the rules output for the selected kernel launch. Added support for allowing users to configure properties on Summary Page using Metrics/Properties profile option . Added percentage bars on Summary Page . NVIDIA Nsight Compute CLI Added support for tracking child processes launched with posix_spawn(p) when using --target-processes all . Added support for tracking child processes launched with system() on Windows and Linux (aarch64, x86_64) when using --target-processes all . Resolved Issues Fixed table alignment in the output of the NVIDIA Nsight Compute CLI on Windows when printing Unicode characters. Fixed view corruption in the Source Page after switching from the collapsed view to the expanded view. Fixed missing tooltip descriptions for some SASS instructions. Fixed potential crash when copying from the Resources tool window using CTRL+C. Fixed possible crash when restoring sections in the Sections tool window.", "keywords": []}, {"id": 241, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2023-2-1", "display_name": "Updates in 2023.2.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2023-2-1", "priority": -1, "content": "Resolved Issues Fixed a crash during application replay while having the temporary directory located on a network file system (NFS). Improved detection mechanism for C2C interface. Added caching of the detected configuration to reduce overhead.", "keywords": []}, {"id": 242, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2023-2-2", "display_name": "Updates in 2023.2.2", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2023-2-2", "priority": -1, "content": "Resolved Issues Fixed possible crash when profiling CUDA graphs on multiple GPUs. Fixed the detection mechanism of the C2C interface, i.e. metric c2clink__present . The fix requires the display driver shipping with this release or any newer driver.", "keywords": []}, {"id": 243, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2023-3", "display_name": "Updates in 2023.3", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2023-3", "priority": -1, "content": "General NVIDIA Nsight Compute now supports collecting many metrics by sampling the GPU\u2019s performance monitors (PM) periodically at fixed intervals. The results can be visualized on a timeline . Added WSL profiling support on Windows 10 WSL with OS build version 19044 and greater. WSL profiling is not supported on Windows 10 WSL for systems that exceed 1 TB of system memory. Rule outputs are prioritized to improve the accuracy of estimated speedups. The Summary page now shows the most actionable optimization advices when a result row is selected. Improved the handling and reporting for unavailable metrics during collection and when applying rules. Added instructionMix sample CUDA application and document to show how to use NVIDIA Nsight Compute to analyze and identify the performance bottleneck due to an imbalanced instruction mix. Refer to the README.TXT file, sample code, and document under extras/samples/instructionMix . NVIDIA Nsight Compute Added support to see the source files of two profile results side by side using Source Comparison . This allows you to quickly identify source differences and understand changes in metric values. The Summary page is now the default page when a report is opened. Previous behavior can be enabled in the options dialog. On the Summary and Raw pages, values from all/selected rows are automatically aggregated in the column header for applicable metrics. Selected individual cells are aggregated in the bottom status bar. Added Launch Name and Device options in the filter dialog launched by Apply Filters button in the report header . Added support for source view profiles that persist the Source page configuration and allow you to re-apply it to other reports. The Metric Details tool window now supports querying metrics beyond the current report by using the chip:&lt;chipname&gt; tag in the search. Added support for CUDA Graph Edge Data (such as port and dependency type) and CUDA Graph Conditional Handles in the Resources tool window. The Acceleration Structure Viewer and Resources tool window now support OptiX Opacity Micromaps. NVIDIA Nsight Compute CLI Tracking and profiling all child processes ( --target-processes all ) is now the default for ncu. Improved reporting of requested but unavailable metrics. Metrics requested in section files are by default considered optional and only cause a warning to be shown. Resolved Issues Support for tracking child processes launched with system() is available on Linux ppc64le. Improved the behavior of following SASS navigation links on the Source page. Fixed issues with profiling CUDA graphs in graph-profiling mode when nodes are associated with a non-current CUDA context. Fixed an issue in L2 bandwidth calculations in the hierarchical roofline sections.", "keywords": []}, {"id": 244, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2023-3-1", "display_name": "Updates in 2023.3.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2023-3-1", "priority": -1, "content": "General Switched to using OpenSSL version 1.1.1w. Improved the speedup estimates for rule IssueSlotUtilization as well as its child rules. Updated report files and documentation for the samples located at extras/samples/ . Resolved Issues Fixed collection of context switch data during PM Sampling when using Range Replay . Fixed potential crash of NVIDIA Nsight Compute when an invalid regular expression was provided as requested metric. Improved the performance of NVIDIA Nsight Compute in cases where only a single process is being profiled and --target-processes all was specified. Fixed an issue of reporting too high register counts on the Source Page. Fixed a bug that could cause a GPU fault while collecting SW counters through PerfWorks. Fixed showing incorrect baseline values for the Runtime Improvement values on the Summary Page.", "keywords": []}, {"id": 245, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2024-1", "display_name": "Updates in 2024.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2024-1", "priority": -1, "content": "General Switched to using OpenSSL version 3.0.10. Added new metrics available when profiling on CUDA Green Contexts. Reduced the number of passes required for collecting PM sampling sections. Counter domains can now be specified for PM sampling metrics in section files. PM sampling metrics can now be queried in the command line and Metric Details window by specifying the respective collection option. Added a new optional PmSampling_WarpStates section for understanding warp stall reasons over the workload duration. Added a new rule for detecting load imbalances. Improved the performance of graph-level profiling on new drivers. Updated the metrics compatibility table for OptiX cmdlists and instruction-level SASS metrics. NVIDIA Nsight Compute Added SASS view and Source Markers support in Source Comparison . Improved Source Comparison diff visualization by adding empty lines on other side of inserted/deleted lines. The Source page column chooser can now be opened directly from the Navigation drop down. Added a Launch Details tool window for showing information about individual launches within larger workloads like OptiX command lists. Added support for CUDA Green Contexts in the Resources tool window, the Launch Statistics section and the report header. NVIDIA Nsight Compute CLI Improved documentation on NVTX expressions and command line output when a potentially incorrect expression led to no workloads being profiled. Improved checking for invalid expressions when using the --target-processes-filer option. Resolved Issues Fixed that the L1 cache achieved roofline value was missing when profiling on GH100. Fixed several \u201cLaunch Failed\u201d errors when collecting instruction-level SASS metrics. Fixed that Live Register values would be too high for some workloads. Fixed a scrolling issue on the Source page when collapsing a multi-file view. Fixed an issue that no PM sampling data would be shown in the timeline when context switch trace was not available. Fixed a display issue in the memory chart when adding baselines. Fixed a crash when adding baselines. Fixed a crash in timeline views when not all configured data was available. Fixed that the application history was not always deleted when selecting Reset Application Data. Fixed an error in the metric compatibility documentation.", "keywords": []}, {"id": 246, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#updates-in-2024-1-1", "display_name": "Updates in 2024.1.1", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "updates-in-2024-1-1", "priority": -1, "content": "General Added clarification that for profiling a range with multiple, active CUDA Green Contexts, counter values that are not attributable to SMs will be aggregated over all these Green Contexts. Resolved Issues Changed the way the PerfWorks library is loaded into the target application\u2019s process space. This addresses possible connection errors in case the library search path includes other directories with PerfWorks libraries. Fixed an issue that caused PM sampling data to be missing from the results of a Profile Series. Fixed the incorrect calculation of the percentage values in the Inline Function table. Fixed a potential crash of the NVIDIA Nsight Compute UI when PM sampling data was requested, but no sample was collected.", "keywords": []}, {"id": 247, "doc_id": 247, "filename": "ReleaseNotes/index.html", "domain_name": "std", "name": "ReleaseNotes/index", "display_name": "Release Notes", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/index", "anchor": "", "priority": -1, "content": "Nsight Compute Release Notes. Release notes, including new features and important bug fixes. Supported platforms and GPUs. List of known issues for the current release.", "keywords": []}, {"id": 248, "doc_id": 248, "filename": "ReleaseNotes/topics/gpu-support.html", "domain_name": "std", "name": "ReleaseNotes/topics/gpu-support", "display_name": "GPU Support", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/gpu-support", "anchor": "", "priority": -1, "content": "Architecture Support Kepler No Maxwell No Pascal No Volta GV100 Yes Volta GV11b Yes Turing TU1xx Yes NVIDIA GA100 Yes NVIDIA GA10x Yes NVIDIA GA10b Yes NVIDIA GH100 Yes Most metrics used in NVIDIA Nsight Compute are identical to those of the PerfWorks Metrics API and follow the documented Metrics Structure . A comparison between the metrics used in nvprof and their equivalent in NVIDIA Nsight Compute can be found in the NVIDIA Nsight Compute CLI User Manual .", "keywords": []}, {"id": 249, "doc_id": 249, "filename": "ReleaseNotes/topics/known-issues.html", "domain_name": "std", "name": "ReleaseNotes/topics/known-issues", "display_name": "Known Issues", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/known-issues", "anchor": "", "priority": -1, "content": "Installation The installer might not show all patch-level version numbers during installation. Some command line options listed in the help of a .run installer of NVIDIA Nsight Compute are affecting only the archive extraction, but not the installation stage. To pass command line options to the embedded installer script, specify those options after -- in the form of -- -&lt;option&gt; . The available options for the installer script are: -help : Print help message -targetpath=&lt;PATH&gt; : Specify install path -noprompt : No prompts. Implies acceptance of the EULA For example, specifying only option --quiet extracts the installer archive without any output to the console, but still prompts for user interaction during the installation. To install NVIDIA Nsight Compute without any console output nor any user interaction, please specify --quiet -- -noprompt . After using the SDK Manager to install the NVIDIA Nsight Compute tools, their binary path needs to be manually added to your PATH environment variable. See also the System Requirements for more installation instructions. Launch and Connection Launching applications on remote targets/platforms is not supported for several combinations. See Platform Support for details. Manually launch the application using command line ncu --mode=launch on the remote system and connect using the UI or CLI afterwards. In the NVIDIA Nsight Compute connection dialog, a remote system can only be specified for one target platform. Remove a connection from its current target platform in order to be able to add it to another. Loading of CUDA sources via SSH requires that the remote connection is configured, and that the hostname/IP address of the connection matches the target (as seen in the report session details). For example, prefer my-machine.my-domain.com, instead of my-machine, even though the latter resolves to the same. Other issues concerning remote connections are discussed in the documentation for remote connections . Local connections between NVIDIA Nsight Compute and the launched target application might not work on some ppc64le or aarch64 (sbsa) systems configured to only support IPv6. On these platforms, the NV_COMPUTE_PROFILER_LOCAL_CONNECTION_OVERRIDE=uds environment variable can be set to use Unix Domain Sockets instead of TCP for local connections to workaround the problem. On x86_64 Linux, Unix Domain Sockets are used by default, but local TCP connections can be forced using NV_COMPUTE_PROFILER_LOCAL_CONNECTION_OVERRIDE=tcp . Profiling and Metrics Profiling of 32-bit processes is not supported. Profiling kernels executed on a device that is part of an SLI group is not supported. An \u201cUnsupported GPU\u201d error is shown in this case. Profiling a kernel while other contexts are active on the same device (e.g. X server, or secondary CUDA or graphics application) can result in varying metric values for L2/FB (Device Memory) related metrics. Specifically, L2/FB traffic from non-profiled contexts cannot be excluded from the metric results. To completely avoid this issue, profile the application on a GPU without secondary contexts accessing the same device (e.g. no X server on Linux). In the current release, profiling a kernel while any other GPU work is executing on the same MIG compute instance can result in varying metric values for all units. NVIDIA Nsight Compute enforces serialization of the CUDA launches within the target application to ensure those kernels do not influence each other. See Serialization for more details. However, GPU work issued through other APIs in the target process or workloads created by non-target processes running simultaneously in the same MIG compute instance will influence the collected metrics. Note that it is acceptable to run CUDA processes in other MIG compute instances as they will not influence the profiled MIG compute instance. On Linux kernels settings fs.protected_regular=1 (e.g. some Ubuntu 20.04 cloud service provider instances), root users may not be able to access the inter-process lock file . See the FAQ for workarounds. Profiling only supports up to 32 device instances, including instances of MIG partitions. Profiling the 33rd or higher device instance will result in indeterminate data. Enabling certain metrics can cause GPU kernels to run longer than the driver\u2019s watchdog time-out limit. In these cases the driver will terminate the GPU kernel resulting in an application error and profiling data will not be available. Please disable the driver watchdog time out before profiling such long running CUDA kernels. On Linux, setting the X Config option Interactive to false is recommended. For Windows, detailed information on disabling the Windows TDR is available at https://docs.microsoft.com/en-us/windows-hardware/drivers/display/timeout-detection-and-recovery Collecting device-level metrics, such as the NVLink metrics ( nvl* ), is not supported on NVIDIA virtual GPUs (vGPUs). As of CUDA 11.4 and R470 TRD1 driver release, NVIDIA Nsight Compute is supported in a vGPU environment which requires a vGPU license. If the license is not obtained after 20 minutes, the reported performance metrics data from the GPU will be inaccurate. This is because of a feature in vGPU environment which reduces performance but retains functionality as specified here . Profiling on NVIDIA live-migrated virtual machines is not supported and can result in undefined behavior. Profiling with enabled multi-process service (MPS) can result in undefined behavior. When Profiling using Range Replay or Application Range Replay with multiple CUDA Green Contexts active which belong to the same device context, the range result will contain counter values aggregated on all Green Contexts The NVLink Topology section is not supported for a configuration using NVSwitch. NVIDIA Nsight Compute does not support per-NVLink metrics. NVIDIA Nsight Compute does not support the Logical NVLink Throughput table. Setting a reduced NvLink Bandwidth mode does not impact the reported peak values for NvLink metrics. All peak values and corresponding percentages are calculated off the non-reduced NvLink bandwidth. Reconfiguring the NvLink Bandwidth mode using nvidia-smi` while profiling may lead to undefined tools\u2019 behavior. Profiling CUDA graph kernel nodes that can launch device graphs or are part of device-launchable graphs is not supported. Use Graph Profiling mode instead. Profiling in Graph Profiling mode is performed on the context that is specified by the stream handle for the graph launch. Consequently, only memory allocated on this context is saved off and restored during graph replay and only kernel nodes executed on this context are profiled. On CUDA drivers older than 530.x, profiling on Windows Subsystem for Linux (WSL) is not supported if the system has multiple physical NVIDIA GPUs. This is not affected by setting CUDA_VISIBLE_DEVICES . Collecting software counters through PerfWorks currently forces all functions in the module of the profiled kernel to be loaded. This increases the host and device memory footprint of the target application for the remainder of the process lifetime. PM Sampling is not supported when collecting a Profile Series. Compatibility Applications calling blocking functions on std input/output streams can result in the profiler to stop, until the blocking function call is resolved. NVIDIA Nsight Compute can hang on applications using RAPIDS in versions 0.6 and 0.7, due to an issue in cuDF. Profiling child processes launched via clone() is not supported. Profiling child processes launched from Python using os.system() is not supported on ppc64le. Profiling of Cooperative Groups kernels launched with cuLaunchCooperativeKernelMultiDevice is not yet supported. On Linux systems, when profiling bsd-csh scripts, the original application output will not be printed. As a workaround, use a different C-shell, e.g. tcsh . Attempting to use the --clock-control option to set the GPU clocks will fail when profiling on a GPU partition. Please use nvidia-smi (installed with NVIDIA display driver) to control the clocks for the entire GPU. This will require administrative privileges when the GPU is partitioned. On Linux aarch64, NVIDIA Nsight Compute does not work if the HOME environment variable is not set. NVIDIA Nsight Compute versions 2020.1.0 to 2020.2.1 are not compatible with CUDA driver version 460+ if the application launches Cooperative Groups kernels. Profiling will fail with error \u201cUnknownError\u201d. Collecting CPU call stack information on Windows Server 2016 can hang NVIDIA Nsight Compute in some cases. Currently, the only workaround is to skip CPU call stack collection on such systems by not specifying the option --call-stack . When profiling a script, --target-processes all may target utility executables such as xargs , uname or ls . To avoid profiling these, use the --target-processes-filter option accordingly. On mobile platforms, --kill option is not supported with application replay mode. NVIDIA Nsight Compute might show invalid characters for Unicode names and paths on Windows 10. As a workaround, use a third-party terminal emulator, e.g. Git bash. User Interface The API Statistics filter in NVIDIA Nsight Compute does not support units. File size is the only property considered when resolving source files. Timestamps are currently ignored. Terminating or disconnecting an application in the Interactive Profiling activity while the API Stream View is updated can lead to a crash. See the OptiX library support section for limitations concerning the Acceleration Structure Viewer . After updating from a previous version of NVIDIA Nsight Compute on Linux, the file load dialog may not allow column resizing and sorting. As a workaround, the ~/.config/QtProject.conf file can be edited to remove the treeViewHeader entry from the [FileDialog] section.", "keywords": []}, {"id": 250, "doc_id": 250, "filename": "ReleaseNotes/topics/library-support.html", "domain_name": "std", "name": "ReleaseNotes/topics/library-support", "display_name": "Library Support", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/library-support", "anchor": "", "priority": -1, "content": "NVIDIA Nsight Compute can be used to profile CUDA applications, as well as applications that use CUDA via NVIDIA or third-party libraries. For most such libraries, the behavior is expected to be identical to applications using CUDA directly. However, for certain libraries, NVIDIA Nsight Compute has certain restrictions, alternate behavior, or requires non-default setup steps prior to profiling.", "keywords": []}, {"id": 251, "doc_id": 251, "filename": "ReleaseNotes/topics/library-support-optix.html", "domain_name": "std", "name": "ReleaseNotes/topics/library-support-optix", "display_name": "OptiX", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/library-support-optix", "anchor": "", "priority": -1, "content": "NVIDIA Nsight Compute supports profiling of OptiX applications, but with certain restrictions. Internal Kernels Kernels launched by OptiX that contain no user-defined code are given the generic name NVIDIA internal . These kernels show up on the API Stream in the NVIDIA Nsight Compute UI, and can be profiled in both the UI as well as the NVIDIA Nsight Compute CLI. However, no CUDA-C source, PTX or SASS is available for them. User Kernels Kernels launched by OptiX can contain user-defined code. OptiX identifies these kernels in the API Stream with a custom name. This name starts with raygen__ (for \u201cray generation\u201d). These kernels show up on the API Stream and can be profiled in the UI as well as the NVIDIA Nsight Compute CLI. The Source page displays CUDA-C source, PTX and SASS defined by the user. Certain parts of the kernel, including device functions that contain OptiX-internal code, will not be available in the Source page. SASS When SASS information is available in the profile report, certain instructions might not be available in the Source page and shown as N/A . The Acceleration Structure Viewer for OptiX traversable handles currently has the following limitations: The Acceleration Structure Viewer is not supported on MacOSX. Viewing instance acceleration structures using multi-level instancing is not supported. Applying motion traversables to acceleration structures is not supported. The following feature set is supported per OptiX API version: OptiX API Version Kernel Profiling API Interception Resource Tracking 6.x Yes No No 7.0 - 8.0 Yes Yes Yes", "keywords": []}, {"id": 252, "doc_id": 252, "filename": "ReleaseNotes/topics/platform-support.html", "domain_name": "std", "name": "ReleaseNotes/topics/platform-support", "display_name": "Platform Support", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/platform-support", "anchor": "", "priority": -1, "content": "Host denotes the UI can run on that platform. Target means that we can instrument applications on that platform for data collection. Applications launched with instrumentation on a target system can be connected to from most host platforms. The reports collected on one system can be opened on any other system. Host Targets Windows Yes Windows*, Linux (x86_64) Windows Subsystem for Linux (WSL2) Yes Windows Subsystem for Linux (WSL2) as part of the Linux (x86_64) package. Linux (x86_64) Yes Windows*, Linux (x86_64), Linux (ppc64le), Linux (aarch64 sbsa) Linux (ppc64le) No Linux (ppc64le) Linux (aarch64 sbsa) Yes Linux (aarch64 sbsa) Linux (x86_64) (Drive SDK) Yes Windows*, Linux (x86_64), Linux (aarch64), QNX MacOSX 11+ Yes Windows*, Linux (x86_64), Linux (ppc64le) Linux (aarch64) No Linux (aarch64) QNX No QNX Target platforms marked with * do not support remote launch from the respective host. Remote launch means that the application can be launched on the target system from the host UI. Instead, the application must be launched from the target system. Profiling of 32-bit processes is not supported.", "keywords": []}, {"id": 253, "doc_id": 253, "filename": "ReleaseNotes/topics/release-notes.html", "domain_name": "std", "name": "ReleaseNotes/topics/release-notes", "display_name": "Release Notes", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/release-notes", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 254, "doc_id": 254, "filename": "ReleaseNotes/topics/release-notes-older-versions.html", "domain_name": "std", "name": "ReleaseNotes/topics/release-notes-older-versions", "display_name": "Older Versions", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/release-notes-older-versions", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 255, "doc_id": 255, "filename": "ReleaseNotes/topics/support.html", "domain_name": "std", "name": "ReleaseNotes/topics/support", "display_name": "Support", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/support", "anchor": "", "priority": -1, "content": "Information on supported platforms and GPUs.", "keywords": []}, {"id": 256, "doc_id": 256, "filename": "ReleaseNotes/topics/system-requirements.html", "domain_name": "std", "name": "ReleaseNotes/topics/system-requirements", "display_name": "System Requirements", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/system-requirements", "anchor": "", "priority": -1, "content": "Linux and WSL On all Linux platforms, NVIDIA Nsight Compute CLI requires GLIBC version 2.15 or higher. On x86_64, the NVIDIA Nsight Compute UI requires GLIBC version 2.17 or higher. On Linux (aarch64 sbsa), the NVIDIA Nsight Compute UI requires GLIBC version 2.26 or higher. The NVIDIA Nsight Compute UI requires several packages to be installed to enable Qt. Please refer to the Qt for X11 Requirements . When executing ncu-ui with missing dependencies, an error message with information on the missing packages is shown. Note that only one package will be shown at a time, even though multiple may be missing from your system. For selected operating systems, the following commands install needed packages for NVIDIA Nsight Compute on X11: Ubuntu 18.04 apt install libopengl0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-render-util0 libxcb-xkb1 libxkbcommon-x11-0 Ubuntu 20.04 apt install libopengl0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-render-util0 libxcb-xinerama0 libxcb-xkb1 libxkbcommon-x11-0 RHEL 8.6 yum install libglvnd-opengl libxcb libxkbcommon-x11 Profiling on Windows Subsystem for Linux (WSL) is only supported with WSL version 2. Profiling is supported on Windows 10 WSL with OS build version 19044 and greater, and NVIDIA display driver version 545 or higher. It is not supported on Windows 10 WSL for systems that exceed 1 TB of system memory. Profiling is supported on Windows 11 WSL with NVIDIA display driver version 525 or higher. The Linux (x86_64) NVIDIA Nsight Compute package can be used and should be installed directly within WSL2. Remote profiling to and from WSL2 works equivalently to regular Linux (x86_64) hosts and targets, as long as it\u2019s accessible via SSH. Access to NVIDIA GPU Performance Counters must be enabled in the NVIDIA Control Panel of the Windows host. See also the CUDA on WSL User Guide . Windows Only Windows 10 and 11 are supported as host and target. The Visual Studio 2017 redistributable is not automatically installed by the NVIDIA Nsight Compute installer. The workaround is to install the x64 version of the \u2018Microsoft Visual C++ Redistributable for Visual Studio 2017\u2019 manually. The installer is linked on the main download page for Visual Studio at https://www.visualstudio.com/downloads/ or download directly from https://go.microsoft.com/fwlink/?LinkId=746572 .", "keywords": []}, {"id": 257, "doc_id": 257, "filename": "ReleaseNotes/topics/updates-2019-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2019-1", "display_name": "Updates in 2019.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2019-1", "anchor": "", "priority": -1, "content": "General Support for CUDA 10.1 Improved performance Bug fixes Profiling on Volta GPUs now uses the same metric names as on Turing GPUs Section files support descriptions The default sections and rules directory has been renamed to sections NVIDIA Nsight Compute Added new profiling options to the options dialog Details page shows rule result icons in the section headers Section descriptions are shown in the details page and in the sections tool window Source page supports collapsing multiple source files or functions to show aggregated results Source page heatmap color scale has changed Invalid metric results are highlighted in the profiler report Loaded section and rule files can be opened from the sections tool window NVIDIA Nsight Compute CLI Support for profiling child processes on Linux and Windows x86_64 targets NVIDIA Nsight Compute CLI uses a temporary file if no output file is specified Support for new --quiet option Support for setting the GPU clock control mode using new --clock-control option Details page output shows the NVTX context when --nvtx is enabled Support for filtering kernel launches for profiling based on their NVTX context using new --nvtx-include and --nvtx-exclude options Added new --summary options for aggregating profiling results Added option --open-in-ui to open reports collected with NVIDIA Nsight Compute CLI directly in NVIDIA Nsight Compute Resolved Issues Installation directory scripts use absolute paths OpenACC kernel names are correctly demangled Profile activity report file supports a relative path Source view can resolve all applicable files at once UI font colors are improved Details page layout and label elision issues are resolved Turing metrics are properly reported on the Summary page All byte-based metrics use a factor of 1000 when scaling units to follow SI standards CSV exports properly align columns with empty entries Fixed the metric computation for double_precision_fu_utilization on GV11b Fixed incorrect \u2018selected\u2019 PC sampling counter values The SpeedOfLight section uses \u2018max\u2019 instead of \u2018avg\u2019 cycles metrics for Elapsed Cycles", "keywords": []}, {"id": 258, "doc_id": 258, "filename": "ReleaseNotes/topics/updates-2019-2.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2019-2", "display_name": "Updates in 2019.2", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2019-2", "anchor": "", "priority": -1, "content": "General Improved performance Bug fixes Kernel launch context and stream are reported as metrics PC sampling configuration options are reported as metrics The default base port for connections to the target changed Section files support multiple, named Body fields NvRules allows users to query metrics using any convertible data type NVIDIA Nsight Compute Support for filtering kernel launches using their NVTX context Support for new options to select the connection port range The Profile activity supports configuring PC sampling parameters Sections on the Details page support selecting individual bodies NVIDIA Nsight Compute CLI Support for stepping to kernel launches from specific NVTX contexts Support for new --port and --max-connections options Support for new --sampling-* options to configure PC sampling parameters Section file errors are reported with --list-sections A warning is shown if some section files could not be loaded Resolved Issues Using the \u2013summary option works for reports that include invalid metrics The full process executable filename is reported for QNX targets The project system now properly stores the state of opened reports Fixed PTX syntax highlighting Fixed an issue when switching between manual and auto profiling in NVIDIA Nsight Compute The source page in NVIDIA Nsight Compute now works with results from multiple processes Charts on the NVIDIA Nsight Compute details page uses proper localization for numbers NVIDIA Nsight Compute no longer requires the system locale to be set to English", "keywords": []}, {"id": 259, "doc_id": 259, "filename": "ReleaseNotes/topics/updates-2019-3.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2019-3", "display_name": "Updates in 2019.3", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2019-3", "anchor": "", "priority": -1, "content": "General Improved performance Bug fixes Kernel launch context and stream are reported as metrics PC sampling configuration options are reported as metrics The default base port for connections to the target changed Section files support multiple, named Body fields NvRules allows users to query metrics using any convertible data type NVIDIA Nsight Compute Support for filtering kernel launches using their NVTX context Support for new options to select the connection port range The Profile activity supports configuring PC sampling parameters Sections on the Details page support selecting individual bodies NVIDIA Nsight Compute CLI Support for stepping to kernel launches from specific NVTX contexts Support for new --port and --max-connections options Support for new --sampling-* options to configure PC sampling parameters Section file errors are reported with --list-sections A warning is shown if some section files could not be loaded Resolved Issues Using the \u2013summary option works for reports that include invalid metrics The full process executable filename is reported for QNX targets The project system now properly stores the state of opened reports Fixed PTX syntax highlighting Fixed an issue when switching between manual and auto profiling in NVIDIA Nsight Compute The source page in NVIDIA Nsight Compute now works with results from multiple processes Charts on the NVIDIA Nsight Compute details page uses proper localization for numbers NVIDIA Nsight Compute no longer requires the system locale to be set to English", "keywords": []}, {"id": 260, "doc_id": 260, "filename": "ReleaseNotes/topics/updates-2019-3-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2019-3-1", "display_name": "Updates in 2019.3.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2019-3-1", "anchor": "", "priority": -1, "content": "NVIDIA Nsight Compute Added ability to send bug reports and suggestions for features using Send Feedback in the Help menu Resolved Issues Fixed calculation of theoretical occupancy for grids with blocks that are not a multiple of 32 threads Fixed intercepting child processes launched through Python\u2019s subprocess.Popen class Fixed issue of NVTX push/pop ranges not showing up for child threads in NVIDIA Nsight Compute CLI Fixed performance regression for metric lookups on the Source page Fixed description in rule covering the IMC stall reason Fixed cases were baseline values were not correctly calculated in the Memory tables when comparing reports of different architectures Fixed incorrect calculation of baseline values in the Executed Instruction Mix chart Fixed accessing instanced metrics in the NvRules API Fixed a bug that could cause the collection of unnecessary metrics in the Interactive Profile activity Fixed potential crash on exit of the profiled target application Switched underlying metric for SOL FB in the GPU Speed Of Light section to be driven by dram__throughput.avg.pct_of_peak_sustained_elapsed instead of fbpa__throughput.avg.pct_of_peak_sustained_elapsed", "keywords": []}, {"id": 261, "doc_id": 261, "filename": "ReleaseNotes/topics/updates-2019-4.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2019-4", "display_name": "Updates in 2019.4", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2019-4", "anchor": "", "priority": -1, "content": "General Added support for the Linux PowerPC target platform Reduced the profiling overhead, especially if no source metrics are collected Reduced the overhead for non-profiled kernels Improved the deployment performance during remote launches Trying to profile on an unsupported GPU now shows an \u201cUnsupported GPU\u201d error message Added support for the %i sequential number placeholder to generate unique report file names Added support for smsp__sass_* metrics on Volta and newer GPUs The launch__occupancy_limit_shared_mem now reports the device block limit if no shared memory is used by the kernel NVIDIA Nsight Compute The Profile activity shows the command line used to launch ncu The heatmap on the Source page now shows the represented metric in its tooltip The Memory Workload Analysis Chart on the Details page now supports baselines When applying rules, a message displaying the number of new rule results is shown in the status bar The Visual Profiler Transition Guide was added to the documentation Connection dialog activity options were added to the documentation A warning dialog is shown if the application is resumed without Auto-Profile enabled Pausing the application now has immediate feedback in the toolbar controls Added a Close All command to the File menu NVIDIA Nsight Compute CLI The --query-metrics option now shows only metric base names for faster metric query. The new option --query-metrics-mode can be used to display the valid suffixes for each base metric. Added support for passing response files using the @ operator to specify command line options through a file Resolved Issues Fixed an issue that reported the wrong executable name in the Session page when attaching Fixed issues that chart labels were shown elided on the Details page Fixed an issue that caused the cache hitrates to be shown incorrectly when baselines were added Fixed an illegal memory access when collecting sass__*_histogram metrics for applications using PyTorch on Pascal GPUs Fixed an issue when attempting to collect all smsp__* metrics on Volta and newer GPUs Fixed an issue when profiling multi-context applications Fixed that profiling start/stop settings from the connection dialog weren\u2019t properly passed to the interactive profile activity Fixed that certain smsp__warp_cycles_per_issue_stall* metrics returned negative values on Pascal GPUs Fixed that metric names were truncated in the --page details non-CSV command line output Fixed that the target application could crash if a connection port was used by another application with higher privileges", "keywords": []}, {"id": 262, "doc_id": 262, "filename": "ReleaseNotes/topics/updates-2019-5.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2019-5", "display_name": "Updates in 2019.5", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2019-5", "anchor": "", "priority": -1, "content": "General Added section sets to reduce the default overhead and make it easier to configure metric sets for profiling Reduced the size of the installation Added support for CUDA Graphs Recapture API The NvRules API now supports accessing correlation IDs for instanced metrics Added breakdown tables for SOL SM and SOL Memory in the Speed Of Light section for Volta+ GPUs NVIDIA Nsight Compute Added a snap-select feature to the Source page heatmap help navigate large files Added support for loading remote CUDA-C source files via SSH on demand for Linux x86_64 targets Charts on the Details page provide better help in tool tips when hovering metric names Improved the performance of the Source page when scrolling or collapsing The charts for Warp States and Compute pipelines are now sorted by value NVIDIA Nsight Compute CLI Added support for GPU cache control, see --cache-control Added support for setting the kernel name base in command line output, see --kernel-base Added support for listing the available names for --chips , see --list-chips Improved the stability on Windows when using --target-processes all Reduced the profiling overhead for small metric sets in applications with many kernels Resolved Issues Reduced the overhead caused by demangling kernel names multiple times Fixed an issue that kernel names were not demangled in CUDA Graph Nodes resources window The connection dialog better disables unsupported combinations or warns of invalid entries Fixed metric thread_inst_executed_true to derive from smsp_not_predicated_off_thread_inst_executed on Volta+ GPUs Fixed an issue with computing the theoretical occupancy on GV100 Selecting an entry on the Source page heatmap no longer selects the respective source line, to avoid losing the current selection Fixed the current view indicator of the Source page heatmap to be line-accurate Fixed an issue when comparing metrics from Pascal and later architectures on the Summary page Fixed an issue that metrics representing constant values on Volta+ couldn\u2019t be collected without non-constant metrics", "keywords": []}, {"id": 263, "doc_id": 263, "filename": "ReleaseNotes/topics/updates-2019-5-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2019-5-1", "display_name": "Updates in 2019.5.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2019-5-1", "anchor": "", "priority": -1, "content": "General Added support for Nsight Compute Visual Studio Integration", "keywords": []}, {"id": 264, "doc_id": 264, "filename": "ReleaseNotes/topics/updates-2019-5-2.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2019-5-2", "display_name": "Updates in 2019.5.2", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2019-5-2", "anchor": "", "priority": -1, "content": "General Bug fixes", "keywords": []}, {"id": 265, "doc_id": 265, "filename": "ReleaseNotes/topics/updates-2019-5-3.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2019-5-3", "display_name": "Updates in 2019.5.3", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2019-5-3", "anchor": "", "priority": -1, "content": "General More C++ kernel names can be properly demangled", "keywords": []}, {"id": 266, "doc_id": 266, "filename": "ReleaseNotes/topics/updates-2020-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2020-1", "display_name": "Updates in 2020.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2020-1", "anchor": "", "priority": -1, "content": "General Added support for the NVIDIA GA100/SM 8.x GPU architecture Removed support for the Pascal SM 6.x GPU architecture Windows 7 is not a supported host or target platform anymore Added a rule for reporting uncoalesced memory accesses as part of the Source Counters section Added support for report name placeholders %p, %q, %i and %h The Kernel Profiling Guide was added to the documentation NVIDIA Nsight Compute The UI command was renamed from nv-nsight-cu to ncu-ui . Old names remain for backwards compatibility. Added support for roofline analysis charts Added linked hot spot tables in section bodies to indicate performance problems in the source code Added section navigation links in rule results to quickly jump to the referenced section Added a new option to select how kernel names are shown in the UI Added new memory tables for the L1/TEX cache and the L2 cache. The old tables are still available for backwards compatibility and moved to a new section containing deprecated UI elements. Memory tables now show the metric name as a tooltip Source resolution now takes into account file properties when selecting a file from disk Results in the profile report can now be filtered by NVTX range The Source page now supports collapsing views even for single files The UI shows profiler error messages as dismissible banners for increased visibility Improved the baseline name control in the profiler report header NVIDIA Nsight Compute CLI The CLI command was renamed from nv-nsight-cu-cli to ncu . Old names remain for backwards compatibility. Queried metrics on GV100 and newer chips are sorted alphabetically Multiple instances of NVIDIA Nsight Compute CLI can now run concurrently on the same system, e.g. for profiling individual MPI ranks. Profiled kernels are serialized across all processes using a system-wide file lock. Resolved Issues More C++ kernel names can be properly demangled Fixed a free(): invalid pointer error when profiling applications using pytorch &gt; 19.07 Fixed profiling IBM Spectrum MPI applications that require PAMI GPU hooks ( --smpiargs=&quot;-gpu&quot; ) Fixed that the first kernel instruction was missed when computing sass__inst_executed_per_opcode Reduced surplus DRAM write traffic created from flushing caches during kernel replay The Compute Workload Analysis section shows the IMMA pipeline on GV11b GPUs Profile reports now scroll properly on MacOS when using a trackpad Relative output filenames for the Profile activity now use the document directory, instead of the current working directory Fixed path expansion of ~ on Windows Memory access information is now shown properly for RED assembly instructions on the Source page Fixed that user PYTHONHOME and PYTHONPATH environment variables would be picked up by NVIDIA Nsight Compute, resulting in locale encoding issues.", "keywords": []}, {"id": 267, "doc_id": 267, "filename": "ReleaseNotes/topics/updates-2020-1-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2020-1-1", "display_name": "Updates in 2020.1.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2020-1-1", "anchor": "", "priority": -1, "content": "General Added support for the NVIDIA GA100/SM 8.x GPU architecture Metrics passed to --metrics on the NVIDIA Nsight Compute CLI or in the respective Profile activity option are automatically expanded to all first-level sub-metrics if required. See the documentation on --metrics for more details. Added new rules for detecting inefficiencies of using the sparse data compression on the NVIDIA Ampere architecture. The version of the NVIDIA Nsight Compute target collecting the results is shown in the Session page. Added new launch__grid_dim_[x,y,z] and launch__block_dim_[x,y,z] metrics. NVIDIA Nsight Compute The Break on API Error functionality has been improved when auto profiling. NVIDIA Nsight Compute CLI The full path to the report output file is printed after profiling. Added and corrected metrics in the nvprof Metric Comparison table. Resolved Issues Documented the breakdown: metrics prefix. Fixed handling of escaped domain delimiters in NVTX filter expressions. Fixed issues with the occupancy charts for small block sizes. Fixed an issue when choosing a default report page in the options dialog. Fixed that the scroll bar could overlap the content when exporting the report page as an image.", "keywords": []}, {"id": 268, "doc_id": 268, "filename": "ReleaseNotes/topics/updates-2020-1-2.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2020-1-2", "display_name": "Updates in 2020.1.2", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2020-1-2", "anchor": "", "priority": -1, "content": "General The NVIDIA Nsight Compute installer for Mac is now code-signed and notarized. Disabled the creation of the Python cache when executing rules to avoid permission issues and signing conflicts. Resolved Issues Fixed the launcher script of the NVIDIA Nsight Compute CLI to no longer fail if uname -p is not available. Fixed the API parameter capture for function cuDeviceGetLuid .", "keywords": []}, {"id": 269, "doc_id": 269, "filename": "ReleaseNotes/topics/updates-2020-2.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2020-2", "display_name": "Updates in 2020.2", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2020-2", "anchor": "", "priority": -1, "content": "General Added support for the NVIDIA Ampere GPUs with compute capability 8.6 and CUDA toolkit 11.1. Added support for application replay to collect metric results across multiple application runs, instead of replaying individual kernels. Added new launch__device_id metric. Added support for NVLink ( nvl* ) metrics for GPUs with compute capabilities 7.0, 7.5 and 8.0 Added documentation for memory charts and tables in the Profiling Guide . NVIDIA Nsight Compute Updated menu and toolbar layout. Added support for zoom and pan on roofline charts. The Resources tool window shows the current CUDA stream attributes. The memory chart shows a heatmap for link and port utilization. The hot-spot tables in the Source Counters section now show values as percentages, too. On-demand resolve of remote CUDA-C source is now available for MacOS hosts. Metric columns in the Summary and Raw pages are now sortable. Added a new option to set the number of recent API calls shown in the API Stream tool window. NVIDIA Nsight Compute CLI CLI output now shows NVTX payload information. CSV output now shows NVTX states. Added a new --replay-mode option to select the mechanism used for replaying a kernel launch multiple times. Added a new --kill option to terminate the application once all requested kernels were profiled. Added a new --log-file option to decide the output stream for printing tool output. Added a new --check-exit-code option to decide if the child application exit code should be checked. Resolved Issues The profiling progress dialog is not dismissed automatically anymore after an error. The inter-process lock is now automatically given write permissions for all users. All project extensions are enabled in the default dialog filter. Fixed handling of targets using tcsh during remote profiling. Fixed handling of quoted application arguments on Windows.", "keywords": []}, {"id": 270, "doc_id": 270, "filename": "ReleaseNotes/topics/updates-2020-2-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2020-2-1", "display_name": "Updates in 2020.2.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2020-2-1", "anchor": "", "priority": -1, "content": "Resolved Issues Fixed several issues related to auto-profiling in the UI. Fixed a metric collection issue when profiling kernels on different GPU architectures with application replay. Fixed a performance problem related to profiling large process trees. Fixed that occupancy charts would not render correctly when comparing against baselines. Fixed that no memory metrics were shown on the Source page for LDGSTS instructions. Fixed the automatic sorting on the Summary and Raw pages. Fixed an issue that would cause the NVIDIA Nsight Compute CLI to consume too much memory when importing or printing reports. Long kernel names are now elided in the Details page source hot spot tables. Fixed that function names in the Resources tool window were demangled differently.", "keywords": []}, {"id": 271, "doc_id": 271, "filename": "ReleaseNotes/topics/updates-2020-3.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2020-3", "display_name": "Updates in 2020.3", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2020-3", "anchor": "", "priority": -1, "content": "General Added support for derived metrics in section files. Derived metrics can be used to create new metrics based on existing metrics and constants. See the Customization Guide for details. Added a new Import Source ( --import-source ) option to the UI and command line to permanently import source files into the report, when available. Added a new section that shows selected NVLink metrics on supported systems. Added a new launch__func_cache_config metric to the Launch Statistics section. Added new branch efficiency metrics to the Source Counters section, including smsp__sass_average_branch_targets_threads_uniform.pct to replace nvprof\u2019s branch_efficiency , as well as instruction-level metrics smsp__branch_targets_threads_divergent , smsp__branch_targets_threads_uniform and branch_inst_executed . A warning is shown if kernel replay starts staging GPU memory to CPU memory or the file system. Section and rule files are deployed to a versioned directory in the user\u2019s home directory to allow easier editing of those files, and to prevent modifying the base installation. Removed support for NVLINK( nvl* ) metrics due to a potential application hang during data collection. The metrics will be added back in a future version of the driver/tool. NVIDIA Nsight Compute Added support for Profile Series . Series allow you to profile a kernel with a range of configurable parameters to analyze the performance of each combination. Added a new Allocations view to the Resources tool window which shows the state of all current memory allocations. Added a new Memory Pools view to the Resources tool window which shows the state of all current memory pools. Added coverage of peer memory to the Memory Chart . The Source page now shows the number of excessive sectors requested from L1 or L2, e.g. due to uncoalesced memory accesses. The Source column on the Source page can now be scrolled horizontally. The kernel duration gpu__time_duration.sum was added as column on the Summary page. Improved the performance of application replay when not all kernels in the application are profiled. NVIDIA Nsight Compute CLI Added a new --app-replay-match option to select the mechanism used for matching kernel instances across application replay passes. An error is shown if --nvtx-include/exclude are used without --nvtx . Resolved Issues The Grid Size column on the Raw page now shows the CUDA grid size like the Launch Statistics section, rather than the combined grid and block sizes. The Branch Resolving wap stall reason was added to the PC sampling metric groups and the Warp State Statistics section. The API Stream tool window shows kernel names according to the selected Function Name Mode. Fixed that an incorrect line could be shown after a heatmap selection on the Source page. Fixed incorrect metric usage for system memory in the Memory Chart . Previously, all requested memory of L2 from system memory was reported instead of only the portion that missed in L2.", "keywords": []}, {"id": 272, "doc_id": 272, "filename": "ReleaseNotes/topics/updates-2020-3-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2020-3-1", "display_name": "Updates in 2020.3.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2020-3-1", "anchor": "", "priority": -1, "content": "General Added support for LDSM instruction-level metrics. NVIDIA Nsight Compute LDSM instruction-level metrics are shown in the Source page and memory tables. Improved reporting and documentation for collecting Profile Series . Frozen columns in the Source page are automatically scrolled into view. Resolved Issues Fixed an issue when profiling multi-threaded applications. Fixed an issue that NVIDIA Nsight Compute would not automatically restart when using Reset Application Data . Fixed issues with target applications using libstdc++. Fixed an issue when collecting single-pass metrics in multiple Nsight Compute instances. Fixed an issue when using Kernel ID and setting Launch Capture Count as non-zero in the UI\u2019s Profile activity. Fixed an issue that prevented different users on the same Linux system to use NVIDIA Nsight Compute in shared instance mode. Fixed an issue that prevented resources from being properly renamed using NVTX information in the UI.", "keywords": []}, {"id": 273, "doc_id": 273, "filename": "ReleaseNotes/topics/updates-2021-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-1", "display_name": "Updates in 2021.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-1", "anchor": "", "priority": -1, "content": "General Added support for the CUDA toolkit 11.3. Added support for the OptiX 7 API . GpuArch enumeration values used for filtering in section files were renamed from architecture names to compute capabilities. NVTX states can now be accessed via the NvRules API . Added a rule for the Occupancy section. NVIDIA Nsight Compute Added support for new CUDA asynchronous allocator attributes in the Memory Pools resources view. Added a topology chart and link properties table in the NVLink section. The selected metric column is scrolled into view on the Source page when a new metric is selected. Users can choose the Source heatmap color scale in the Options dialog. NVIDIA Nsight Compute CLI Added file-based application replay as the new default application replay mode. File-based replay uses a temporary file for keeping replay data, instead of allocating them in memory. This keeps the required memory footprint close to constant, independent of the number of profiled kernels. Users can switch between buffer modes using the --app-replay-buffer option. CLI output now shows NVTX color and message information. --kernel-regex and --kernel-regex-base&gt; options are deprecated and replaced by --kernel-name and --kernel-regex-base , respectively. All options which support regex need to provide regex: as a prefix before an argument to match per the regex, e.g &lt;option&gt; &lt;regex:expression&gt; Resolved Issues Fixed that baselines were not updated properly on the Comments page. Fixed that NVTX ranges named using their payloads can be used in NVTX filtering expressions. Fixed crashes in MacOSX hosts when terminating the target application. The NVLINK( nvl* ) metrics are now added back.", "keywords": []}, {"id": 274, "doc_id": 274, "filename": "ReleaseNotes/topics/updates-2021-1-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-1-1", "display_name": "Updates in 2021.1.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-1-1", "anchor": "", "priority": -1, "content": "General Updated OpenSSL library to version 1.1.1k. NVIDIA Nsight Compute Remote source resolution can now use the IP address, in addition to the hostname, to find the necessary SSH target. NVIDIA Nsight Compute CLI Added support for the existing command line options for kernel filtering while importing data from an existing report file using --import . Option -k is not considered as deprecated option --kernel-regex anymore. Resolved Issues Fixed failure to profile kernels from applications that use the CUDA graphics interop APIs to share semaphores. Fixed wavefront metric in the L1TEX table for writes to shared memory on GA10x chips. Fixed an issue resulting in incomplete data collection for the interactive profile activity after switching from single-pass mode to collecting multiple passes in the same session. Fixed values shown in the mimimap of the Source page when all functions are collapsed. Fixed an issue causing names set by the NVTX naming APIs of one application to be applied to all subsequent sessions of the same instance of NVIDIA Nsight Compute. Fixed behavior of horizontal scroll bars when clicking in the source views on the Source page. Fixed appearance of multi-line entries in column chooser on the Source page. Fixed enablement state of the reset button on the Connection dialog. Fixed potential crash of NVIDIA Nsight Compute when windows size becomes small while being on the Source page. Fixed potential crash of NVIDIA Nsight Compute when relative paths for section/rules files could not be found. Fixed potential crash of NVIDIA Nsight Compute after removing baselines.", "keywords": []}, {"id": 275, "doc_id": 275, "filename": "ReleaseNotes/topics/updates-2021-2.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-2", "display_name": "Updates in 2021.2", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-2", "anchor": "", "priority": -1, "content": "General Added support for the CUDA toolkit 11.4. Added support for OptiX version 7.3. Added support for profiling on NVIDIA virtual GPUs (vGPUs) on an upcoming GRID/vGPU release. Added a new Python-based report interface for interacting with report files from Python scripts. Added a new rule to warn users when sampling metrics were selected, but no sampling data was collected. Renamed SOL to Throughput in the Speed of Light section. Renamed several memory_* metrics used on the Source page, to better reflect the measured value. See the Source page documentation for more details. NVIDIA Nsight Compute Added support for opening cubin files in a Standalone Source Viewer without profiling the application. Moved the output of all rules so that it is visible even if a section\u2019s body is collapsed. Visibility of the rules\u2019 output can be toggled by a new button in the report header. The profiler report header now shows the report name for each baseline when ambiguous. Rules can define Focused Metrics that were most important for triggering their result output. Metrics are provided per result message which additional information, such as the underlying conditions and thresholds. Memory tables show tooltips for cells with derived metric calculations. Added a knowledge base service to show more comprehensive background information on metric names and descriptions in their tooltips. Following a link in the Source Counters hot spot tables automatically selects the corresponding metric in the Source page. Added new columns for visualizing register dependencies in the SASS view of the Source page . Functions in the SASS view are now sorted by name. Added support for OptiX 7.x resource tracking in the interactive profile activity. The Resources tool window will show information on instantiated optixDeviceContexts, optixModules, optixProgramGroups, optixPipelines and optixDenoiser objects. Added support for new CUDA graph memory allocation APIs. Improved consistency between command line parameters and the Next Trigger filter in the API Stream window for handling of regex inputs. The Next Trigger filter now considers kernel/API name as a regular expression only if string has regex: as prefix. Added ability to select font settings in the options dialog. Added ability to configure the metrics shown on the summary page via the options dialog. The selected heatmap color scale now also applies to the Memory chart . The ncu-ui script now checks for missing library dependencies, such as OpenGL or Qt . NVIDIA Nsight Compute CLI Added environment variable NV_COMPUTE_PROFILER_DISABLE_STOCK_FILE_DEPLOYMENT=1 to skip deployment of section and rule files. Resolved Issues Fixed a performance issue in the NVIDIA Nsight Compute CLI when using --page raw --csv --units auto . Fixed that the SSH passphase key is no longer persisted in the project file. Fixed state of restore button in connection dialog. The button now supports restoring the default settings, if current setting differ from the default. Fixed that the complete GPU name can be shown in the NVLINK topology diagram on MacOS. Fixed that collapsing the Source view reset the selected metrics. Fixed that correlated lines could differ between filtered and unfiltered views of the executed functions. Fixed that two application icons were shown in the MacOS dock. Improved HiDPI awareness.", "keywords": []}, {"id": 276, "doc_id": 276, "filename": "ReleaseNotes/topics/updates-2021-2-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-2-1", "display_name": "Updates in 2021.2.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-2-1", "anchor": "", "priority": -1, "content": "General Reduced the memory overhead when loading reports in the Python Report Interface . Resolved Issues Fixed that links in the Memory Allocations Resource view were not working correctly. Fixed that NVTX state might not be correctly reset between interactive profiling activities. Fixed that the UI could crash when opening baselines from different GPU architectures.", "keywords": []}, {"id": 277, "doc_id": 277, "filename": "ReleaseNotes/topics/updates-2021-2-2.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-2-2", "display_name": "Updates in 2021.2.2", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-2-2", "anchor": "", "priority": -1, "content": "General Changes for profiling support on NVIDIA virtual GPUs (vGPUs) for an upcoming GRID/vGPU release. Resolved Issues Fixed hang issue on QNX when using the --target-processes all option while profiling shell scripts.", "keywords": []}, {"id": 278, "doc_id": 278, "filename": "ReleaseNotes/topics/updates-2021-2-3.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-2-3", "display_name": "Updates in 2021.2.3", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-2-3", "anchor": "", "priority": -1, "content": "General Added support for the NVIDIA GA10b chip. Resolved Issues Improved error message on QNX for failure to deploy stock section and rules files.", "keywords": []}, {"id": 279, "doc_id": 279, "filename": "ReleaseNotes/topics/updates-2021-2-4.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-2-4", "display_name": "Updates in 2021.2.4", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-2-4", "anchor": "", "priority": -1, "content": "Resolved Issues Fixed an issue that prevented remote interactive profiling of kernels on NVIDIA GA10b chips.", "keywords": []}, {"id": 280, "doc_id": 280, "filename": "ReleaseNotes/topics/updates-2021-2-5.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-2-5", "display_name": "Updates in 2021.2.5", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-2-5", "anchor": "", "priority": -1, "content": "Resolved Issues Improve the handling of the performance monitor reservation on mobile target GPUs.", "keywords": []}, {"id": 281, "doc_id": 281, "filename": "ReleaseNotes/topics/updates-2021-2-6.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-2-6", "display_name": "Updates in 2021.2.6", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-2-6", "anchor": "", "priority": -1, "content": "Resolved Issues Fixed an issue causing a hang on QNX after pressing ctrl+c while profiling a multi-process application.", "keywords": []}, {"id": 282, "doc_id": 282, "filename": "ReleaseNotes/topics/updates-2021-2-7.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-2-7", "display_name": "Updates in 2021.2.7", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-2-7", "anchor": "", "priority": -1, "content": "General Enabled stack canaries with random canary values for L4T builds.", "keywords": []}, {"id": 283, "doc_id": 283, "filename": "ReleaseNotes/topics/updates-2021-2-8.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-2-8", "display_name": "Updates in 2021.2.8", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-2-8", "anchor": "", "priority": -1, "content": "General Updated Python libraries to version 3.10.5.", "keywords": []}, {"id": 284, "doc_id": 284, "filename": "ReleaseNotes/topics/updates-2021-2-9.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-2-9", "display_name": "Updates in 2021.2.9", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-2-9", "anchor": "", "priority": -1, "content": "NVIDIA Nsight Compute Clarify when not all metrics for the roofline chart could be collected on the current chip.", "keywords": []}, {"id": 285, "doc_id": 285, "filename": "ReleaseNotes/topics/updates-2021-3.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-3", "display_name": "Updates in 2021.3", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-3", "anchor": "", "priority": -1, "content": "General Added support for the CUDA toolkit 11.5. Added a new rule for detecting inefficient memory access patterns in the L1TEX cache and L2 cache. Added a new rule for detecting high usage of system or peer memory. Added new IAction::sass_by_pc function to the the NvRules API . The Python-based report interface is now available for Windows and MacOS hosts, too. Added Hierarchical Roofline section files in a new \u201croofline\u201d section set. Added support for collecting CPU call stack information. NVIDIA Nsight Compute Added support for new remote profiling SSH connection and authentication options as well as local SSH configuration files. Added an Occupancy Calculator which can be opened directly from a profile report or as a new activity. It offers feature parity to the CUDA Occupancy Calculator spreadsheet . Added new Baselines tool window to manage (hide, update, re-order, save/load) baseline selections. The Source page views now support multi-line/cell selection and copy/paste. Different colors are used for highlighting selections and correlated lines. The search edit on the Source page now supports Shift+Enter to search in reverse direction. The Memory Workload Analysis Chart can be configured to show throughput values instead of transferred bytes. The Profile activity now supports the --devices option. The NVLink Topology diagram displays per NVLink metrics. Added a new tool window showing the CPU call stack at the location where the current thread was suspended during interactive profiling activities. If enabled, the Call Stack / NVTX page of the profile report shows the captured CPU call stack for the selected kernel launch. NVIDIA Nsight Compute CLI Added support for printing source/metric content with the new --page source and --print-source  command line options . Added new option --call-stack to enable collecting the CPU call stack for every profiled kernel launch. Resolved Issues Fixed that memory_* metrics could not be collected with the --metrics option. Fixed that selection and copy/paste was not supported for section header tables on the Details page. Fixed issues with the Source page when collapsing the content. Fixed that the UI could crash when applying rules to a new profile result. Fixed that PC Sampling metrics were not available for Profile Series . Fixed that local profiling did not work if no non-loopback address was configured for the system. Fixed termination of remote-launched applications. On QNX, terminating an application profiled via Remote Launch is now supported. Canceling remote-launched Profile activities is now supported.", "keywords": []}, {"id": 286, "doc_id": 286, "filename": "ReleaseNotes/topics/updates-2021-3-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2021-3-1", "display_name": "Updates in 2021.3.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2021-3-1", "anchor": "", "priority": -1, "content": "Resolved Issues Fixed that kernels with the same name and launch configuration were in some scenarios associated with the wrong profiling results during application replay. Fixed an issue with binary forward compatibility of the report format. Fixed an issue with applications calling into the CUDA API during process teardown. Fixed an issue profiling application using pre-CUDA API 3.1 contexts. Fixed a crash when resolving files on the Source page. Fixed that opening reports with large embedded CUBINs would hang the UI. Fixed an issue with remote profiling on a target where the UI is already launched.", "keywords": []}, {"id": 287, "doc_id": 287, "filename": "ReleaseNotes/topics/updates-2022-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2022-1", "display_name": "Updates in 2022.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2022-1", "anchor": "", "priority": -1, "content": "General Added support for the CUDA toolkit 11.6. Added support for GA103 chips. Added a new Range Replay mode to profile ranges of multiple, concurrent kernels. Range replay is available in the NVIDIA Nsight Compute CLI and the non-interactive Profile activity. Added a new rule to detect non-fused floating-point instructions. The Uncoalesced Memory access rules now show results in a dynamic table. Unix Domain Sockets and Windows Named Pipes are used for local connection between the host and target processes on x86_64 Linux and Windows, respectively. The NvRules API now supports querying action names using different function name bases (e.g. demangled). NVIDIA Nsight Compute The default report page is now chosen automatically when opening a report. Added coverage for ECC (Error Correction Code) operations in the L2 Cache table of the Memory Analysis section. Added a new L2 Evict Policies table to the Memory Analysis section. The Occupancy Calculator now updates automatically when the input changes. Added new metric Thread Instructions Executed to the Source page. Added tooltips to the Register Dependency columns in the Source page to identify the associated register more conveniently. Improved the selection of Sections and Sets in the Profile activity connection dialog. NVLink utilization is shown in the NVLink Tables section. NVLink links are colored according to the measured throughput. NVIDIA Nsight Compute CLI --kernel-regex and --kernel-regex-base options are no longer supported. Alternate options are --kernel-name and --kernel-name-base respectively, added in 2021.1.0. Added support to resolve CUDA source files in the --page source output with the new --resolve-source-file  command line option . Added new option --target-processes-filter to filter the processes being profiled by name. The CPU Stack Trace is shown in the NVIDIA Nsight Compute CLI output. Resolved Issues Fixed the calculation of aggregated average instruction execution metrics in non-SASS views on the Source page. Fixed that atomic instructions are counted as both loads and stores in the Memory Analysis tables.", "keywords": []}, {"id": 288, "doc_id": 288, "filename": "ReleaseNotes/topics/updates-2022-1-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2022-1-1", "display_name": "Updates in 2022.1.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2022-1-1", "anchor": "", "priority": -1, "content": "General Filtering kernel launches or profile results based on NVTX domains/ranges now takes registered strings in the payload field into account, if the range name is empty. Added support for the suffix .max_rate for ratio metrics. Resolved Issues Fixed a crash during the disassembly of the kernel\u2019s SASS code for the Source page. Fixed a crash on exit of the NVIDIA Nsight Compute UI. Fixed a hang during profiling when CPU call stack collection is enabled. Fixed missing to flush UVM buffers before taking memory checkpoints during Range Replay . Fixed tracking of memory during Range Replay , if the CUDA context has any device mapped memory allocations. Fixed the maximum available shared memory sizes in the Occupancy Calculator for NVIDIA Ampere GPUs. Fixed that the shared memory usage of the kernel is incorrectly initialized when opening the Occupancy Calculator from a profile report.", "keywords": []}, {"id": 289, "doc_id": 289, "filename": "ReleaseNotes/topics/updates-2022-2.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2022-2", "display_name": "Updates in 2022.2", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2022-2", "anchor": "", "priority": -1, "content": "General Added support for the CUDA toolkit 11.7. Improved performance for profiling and metric query. Added Linux (aarch64 sbsa) as a supported host platform . The NVIDIA Nsight Compute CLI stores the command line arguments, which can be viewed in the Session report page. Added an API to query the version of the Python Report and NvRules interfaces. Added an API to query the PTX in the Python Report and NvRules interfaces. NVIDIA Nsight Compute The Acceleration Structure Viewer allows inspection of acceleration structures built using the OptiX API for debugging and performance optimization. The Source page column chooser now supports to enable or disable groups of metrics. Note that not all metrics are enabled anymore by default to make the view easier to use. The Resources tool window now links to the exact target resource instances for CUDA resource types. The Resources tool window now shows the instanced nodes for CUDA graphs. The Resources tool window now shows the loading state and number of loaded functions for CUDA Modules . The Resources tool window now shows the graph node enablement state for applicable instanced graph nodes. The Resources tool window now shows the graph node priorities for instanced kernel graph nodes. Added regex support in the Next Trigger filter for NVTX based filtering. The Next Trigger filter now considers the NVTX config as a regular expression if the regex: prefix is specified. Added regex support in the report\u2019s Filter Results dialog. Added keyboard shortcuts to navigate between the pages in a report. The behavior for selecting sets and sections is now consistent between the Sections/Rules Info window and the non-interactive profile activity . Reports can now be opened directly from the welcome dialog. NVIDIA Nsight Compute CLI Added support for collecting sampling-based warp stalls in range replay mode. Added regex support in NVTX filtering . The metric type is shown when querying metrics. Resolved Issues Reduced overhead of connecting to the host UI for non-interactive remote profiling sessions. Fixed issues with persisting the Source page state when collapsing or switching between results. Fixed an issue that locked GPU clocks were not reset when terminating the NVIDIA Nsight Compute CLI while profiling a kernel. Fixed issues with selecting and copying text from the Details page tables. Fixed an issue with opening report files in the UI on MacOSX. Fixed an issue with the Freeze API option.", "keywords": []}, {"id": 290, "doc_id": 290, "filename": "ReleaseNotes/topics/updates-2022-2-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2022-2-1", "display_name": "Updates in 2022.2.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2022-2-1", "anchor": "", "priority": -1, "content": "Resolved Issues Fixed an issue that caused some tootips to not show up for the charts on the Details page. Fixed the incorrect reporting of the accessed bytes for LDGSTS (access) traffic in the L1TEX memory table. Fixed an issue that resulted in an empty view on the Source page after resolving multiple source files at once. Fixed a failure to connect to remote machines over SSH due to a mismatch in the configuration of data compression. Fixed a potential failure to profile kernels on multiple devices of the same type of chip. The failure occurred on the attempt to profile on the second device.", "keywords": []}, {"id": 291, "doc_id": 291, "filename": "ReleaseNotes/topics/updates-2022-3.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2022-3", "display_name": "Updates in 2022.3", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2022-3", "anchor": "", "priority": -1, "content": "General Added support for the CUDA toolkit 11.8. Added support for the Ada GPU architecture. Added support for the Hopper GPU architecture. Added support for OptiX 7.6 . Added uncoalescedGlobalAccesses sample CUDA application and document to show how the NVIDIA Nsight Compute profiler can be used to analyze and identify the memory accesses which are uncoalesced and result in inefficient DRAM accesses. Refer to the README, sample code and document under extras/samples/uncoalescedGlobalAccesses . Added Metrics Reference in the documentation that lists metrics not available through --query-metrics . Reduced the overhead of collecting SASS-patching based metrics. On Multi-Instance GPU (MIG) configurations, NVIDIA Nsight Compute cannot lock clocks anymore. Users are expected to lock clocks externally using nvidia-smi. NVIDIA Nsight Compute Wrapper script nv-nsight-cu is deprecated in favor of ncu-ui and will be removed in a future release. Source page supports range replay results. Added a second chart on the Compute Workload Analysis section to avoid mixing metrics with different meaning. NVIDIA Nsight Compute now tracks traversable handles created with optixAccelRelocate . NVIDIA Nsight Compute now tracks traversable handles created as updates from others. The Acceleration Structure viewer now reports unsupported inputs. The Acceleration Structure viewer now supports opening multiple traversable handles. The Acceleration Structure viewer now uses OptiX naming for displayed elements. NVIDIA Nsight Compute CLI Wrapper script nv-nsight-cu-cli is deprecated in favor of ncu and will be removed in a future release. Added new option --filter-mode per-gpu to enable filtering of kernel launches on each GPU separately. Added new option --app-replay-mode relaxed to produce profiling results for valid kernels even if the number of kernel launches is inconsistent across application replay passes. Added a documentation section on supported environment variables . Improved the performance when loading existing reports on the command line. Resolved Issues Fixed an issue when resolving files on the Source page. Fixed an issue when profiling OptiX applications. Fixed an issue in the OptiX traversable handle management caused by clashing handle values. Fixed an issue in the Acceleration Structure viewer causing the display of invalid memory when viewing AABB buffers.", "keywords": []}, {"id": 292, "doc_id": 292, "filename": "ReleaseNotes/topics/updates-2022-4.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2022-4", "display_name": "Updates in 2022.4", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2022-4", "anchor": "", "priority": -1, "content": "General Added support for the CUDA toolkit 12.0. Added support for profiling CUDA graphs as complete workloads instead of as single kernel nodes. Enable this using the Graph Profiling option in the activities . Similarly to range replay results, selected metrics are not available when profiling graphs. Added support for profiling on Windows Subsystem for Linux (WSL2). See the System Requirements for more details. Deprecated nv-nsight-cu and nv-nsight-cu-cli aliases are removed in favor of ncu-ui and ncu . NVIDIA Nsight Compute The Source page now loads disassembly and static analysis results asynchronously in the background. Added a new Metric Details tool window to inspect metric information such as raw value, unit, description or instance values. Open the tool window and select a metric on the Details or Raw page or lookup any metric in the focused report directly in the tool window\u2019s search bar. In the Source page PTX view, the source name will be shown as a list of comma-separated files. Added flexibility with NVTX based filtering in the Next Trigger filter, similar to the command line. Filters can now use nvtx-include and nvtx-exclude expressions by adding the nvtx-include: or nvtx-exclude: prefix. NVTX views now show the payload type. Simplified the command line generated by the Profile activity . Reduced the number of steps required to re-run the Profile activity . The way to rename Baselines in-place has been improved. The Resources tool window now shows the CUDA Dynamic Parallelism state for CUDA functions and modules. OptiX traversable handles can now be exported as Graphviz DOT or SVG files for visualization from the Resources tool window. All OptiX build, instance and geometry flags can be viewed in the Acceleration Structure Viewer . Added OptiX-specific highlight filters to the Acceleration Structure Viewer. Added support for user-specified index strides to the Acceleration Structure Viewer. NVIDIA Nsight Compute CLI Added new option --graph-profiling graph to enable profiling of complete CUDA graphs as single workloads. Added new option --filter-mode per-launch-config to enable filtering of kernel launches for each GPU launch parameter separately. Added support to print section body item metrics on the details page with the new --print-details  command line option . Added support to select what to show in Metric Name column on the details page with the new --print-metric-name  command line option . Removed deprecated options: --units , --fp , --summary and --kernel-base Added support to print launch, session, process and device attributes on session page with the new --page session option. Added --kill yes support for application replay mode. Resolved Issues Fixed an issue that NVIDIA Nsight Compute could crash when continuing profiling after transposing the Raw page table. Fixed an issue that caused closing a report document to be delayed by pending source analysis. Fixed support for profiling applications with older OptiX versions. Fixed display of OptiX module inputs for IR and built-in modules.", "keywords": []}, {"id": 293, "doc_id": 293, "filename": "ReleaseNotes/topics/updates-2022-4-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2022-4-1", "display_name": "Updates in 2022.4.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2022-4-1", "anchor": "", "priority": -1, "content": "General Improved the documentation for the NvRules API. The python report interface links libstdc++ statically. Resolved Issues Fixed an issue that enabled profiling on CUDA Graph uploads. Fixed formatting issues during unit conversion of metric instances. Fixed an issue that could lead to a crash during application replay. Fixed an issue that could lead to a crash in the python report interface. Fixed typos in the metrics reference documentation and descriptions.", "keywords": []}, {"id": 294, "doc_id": 294, "filename": "ReleaseNotes/topics/updates-2023-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2023-1", "display_name": "Updates in 2023.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2023-1", "anchor": "", "priority": -1, "content": "General Added support for the CUDA toolkit 12.1. Added a new app-range replay mode to profile ranges without API capture by relaunching the entire application multiple times. Added sharedBankConflicts sample CUDA application and document to show how NVIDIA Nsight Compute can be used to analyze and identify the shared memory bank conflicts which result in inefficient shared memory accesses. Refer to the README.TXT file, sample code and document under extras/samples/sharedBankConflicts . Jupyter notebook samples are available in the Nsight training github repository . The equivalent of the high-level Python report interface is now available in rule files. NVIDIA Nsight Compute Added support for profiling individual metrics in Interactive Profile activity . A new input field for metrics was added in the Metric Selection tool window. Files on remote systems can be opened directly from the menu . Metric- and section-related entries in the menu, Profile activity and Metric Selection tool window were renamed to make them more clear. CPU and GPU NUMA topology metrics can be collected on applicable systems. Topology information is shown in a new NUMA Affinity section . Added content-aware suggestions to the Details page to provide suggestions based on the selected profiling options. Added support for re-resolving source files on the Source page. Not-issued warp stall reasons are removed from the Source Counters section tables and hidden by default on the Source page. Users should focus on regular warp stall reasons by default and only inspect not-issued samples if this distinction is needed. Added support to search missing CUDA source files to permanently import into the report using Source Lookup options in the Interactive Profile activity . The source page now shows metric values as percentages by default. New buttons are added to support switching between different value modes. NVIDIA Nsight Compute CLI Added support for config files in the current working or user directory to set default ncu parameters. See the General options for more details. Added --range-filter  command line option which allows to select subset of enabled profile ranges. Added new --source-folders  command line option that allows to recursively search for missing CUDA source files to permanently import into the report. Resolved Issues Fixed performance issues on the Summary and Raw pages for large reports. Improved support for non-ASCII characters in filenames. Fixed an issue with delayed updates of assembly analysis information on the Source page\u2019s Source and PTX views. Fixed potential crashes when using the Python report interface.", "keywords": []}, {"id": 295, "doc_id": 295, "filename": "ReleaseNotes/topics/updates-2023-1-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2023-1-1", "display_name": "Updates in 2023.1.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2023-1-1", "anchor": "", "priority": -1, "content": "NVIDIA Nsight Compute Added new configuration options to set the default view mode and precision for the Source page. Resolved Issues Added support for the DT_RUNPATH attribute when intercepting calls to dlopen . Fixed issue for applications or libraries relying on DT_RUNPATH not finding all dynamic libraries when launched by NVIDIA Nsight Compute. Improved interaction between custom additional metrics and the selected metric set. Adding custom metrics no longer forces switching to the custom metric set. Added ability to gracefully skip folders with insufficient access permissions while importing source code. Fixed the calculation of the peak values for the L1 and L2 cache bandwidths in the hierarchical roofline charts. Fixed issue that prevented modules loaded with function optixModuleCreateFromPTX showing up in the Optix: Modules table of the Resources tool window. Fixed handling of deprecated functions when querying function pointers from the OptiX interception library. Fixed that sometimes sections or rules couldn\u2019t be easily selected in the tool window. Fixed issue with Reset Application Data that prevented some setting from correctly resetting. Fixed potential crash of NVIDIA Nsight Compute when Reset Application Data was executed multiple times in a row. Fixed a crash when saving or loading baselines for non-kernel results. Fixed that memory written while executing a CUDA graph was not properly restored in single-pass graph profiling. Fixed potential memory leak while collecting SW counters for modules with unpatched kernel functions.", "keywords": []}, {"id": 296, "doc_id": 296, "filename": "ReleaseNotes/topics/updates-2023-2.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2023-2", "display_name": "Updates in 2023.2", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2023-2", "anchor": "", "priority": -1, "content": "General Extended the rules system to show estimates of the potential speedup that can be achieved by addressing the corresponding performance bottleneck. These speedups allow prioritizing applicable rules and help focusing first on optimization strategies with the highest potential performance gain. Added support for rules to highlight individual source lines. Lines with global/local memory access with high excessive sector counts and shared accesses with many bank conflicts are automatically detected and highlighted. Added the ability to query metric attributes in NvRules API. Added support for creating instanced metrics through the NvRules API. For Orin+ mobile chips on the Linux aarch64 platform, added metrics ( mcc__* ) support for memory controller channel (MC Channel) unit which connects to the DRAM. NVIDIA Nsight Compute Added hyperlinks to the SASS View of the Source Page for instructions that reference others by address or offset. This enables to quickly jump to the target instruction of a branch. Improved the search bar in the Metric Details tool window. The search string now matches any part of the metric names, and the matching results are shown in a sorted order. Added a visual indication of scale of the metric value changes when the baselines are used. The background bars in the table cells of the Details Page allow to quickly identify which metrics values increased or decreased the most. The color scheme can be configured in the Baselines tool window . Added support for rules toggle button on the Summary Page. Allows to hide the bottom pane with the rules output for the selected kernel launch. Added support for allowing users to configure properties on Summary Page using Metrics/Properties profile option . Added percentage bars on Summary Page . NVIDIA Nsight Compute CLI Added support for tracking child processes launched with posix_spawn(p) when using --target-processes all . Added support for tracking child processes launched with system() on Windows and Linux (aarch64, x86_64) when using --target-processes all . Resolved Issues Fixed table alignment in the output of the NVIDIA Nsight Compute CLI on Windows when printing Unicode characters. Fixed view corruption in the Source Page after switching from the collapsed view to the expanded view. Fixed missing tooltip descriptions for some SASS instructions. Fixed potential crash when copying from the Resources tool window using CTRL+C. Fixed possible crash when restoring sections in the Sections tool window.", "keywords": []}, {"id": 297, "doc_id": 297, "filename": "ReleaseNotes/topics/updates-2023-2-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2023-2-1", "display_name": "Updates in 2023.2.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2023-2-1", "anchor": "", "priority": -1, "content": "Resolved Issues Fixed a crash during application replay while having the temporary directory located on a network file system (NFS). Improved detection mechanism for C2C interface. Added caching of the detected configuration to reduce overhead.", "keywords": []}, {"id": 298, "doc_id": 298, "filename": "ReleaseNotes/topics/updates-2023-2-2.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2023-2-2", "display_name": "Updates in 2023.2.2", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2023-2-2", "anchor": "", "priority": -1, "content": "Resolved Issues Fixed possible crash when profiling CUDA graphs on multiple GPUs. Fixed the detection mechanism of the C2C interface, i.e. metric c2clink__present . The fix requires the display driver shipping with this release or any newer driver.", "keywords": []}, {"id": 299, "doc_id": 299, "filename": "ReleaseNotes/topics/updates-2023-3.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2023-3", "display_name": "Updates in 2023.3", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2023-3", "anchor": "", "priority": -1, "content": "General NVIDIA Nsight Compute now supports collecting many metrics by sampling the GPU\u2019s performance monitors (PM) periodically at fixed intervals. The results can be visualized on a timeline . Added WSL profiling support on Windows 10 WSL with OS build version 19044 and greater. WSL profiling is not supported on Windows 10 WSL for systems that exceed 1 TB of system memory. Rule outputs are prioritized to improve the accuracy of estimated speedups. The Summary page now shows the most actionable optimization advices when a result row is selected. Improved the handling and reporting for unavailable metrics during collection and when applying rules. Added instructionMix sample CUDA application and document to show how to use NVIDIA Nsight Compute to analyze and identify the performance bottleneck due to an imbalanced instruction mix. Refer to the README.TXT file, sample code, and document under extras/samples/instructionMix . NVIDIA Nsight Compute Added support to see the source files of two profile results side by side using Source Comparison . This allows you to quickly identify source differences and understand changes in metric values. The Summary page is now the default page when a report is opened. Previous behavior can be enabled in the options dialog. On the Summary and Raw pages, values from all/selected rows are automatically aggregated in the column header for applicable metrics. Selected individual cells are aggregated in the bottom status bar. Added Launch Name and Device options in the filter dialog launched by Apply Filters button in the report header . Added support for source view profiles that persist the Source page configuration and allow you to re-apply it to other reports. The Metric Details tool window now supports querying metrics beyond the current report by using the chip:&lt;chipname&gt; tag in the search. Added support for CUDA Graph Edge Data (such as port and dependency type) and CUDA Graph Conditional Handles in the Resources tool window. The Acceleration Structure Viewer and Resources tool window now support OptiX Opacity Micromaps. NVIDIA Nsight Compute CLI Tracking and profiling all child processes ( --target-processes all ) is now the default for ncu. Improved reporting of requested but unavailable metrics. Metrics requested in section files are by default considered optional and only cause a warning to be shown. Resolved Issues Support for tracking child processes launched with system() is available on Linux ppc64le. Improved the behavior of following SASS navigation links on the Source page. Fixed issues with profiling CUDA graphs in graph-profiling mode when nodes are associated with a non-current CUDA context. Fixed an issue in L2 bandwidth calculations in the hierarchical roofline sections.", "keywords": []}, {"id": 300, "doc_id": 300, "filename": "ReleaseNotes/topics/updates-2023-3-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2023-3-1", "display_name": "Updates in 2023.3.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2023-3-1", "anchor": "", "priority": -1, "content": "General Switched to using OpenSSL version 1.1.1w. Improved the speedup estimates for rule IssueSlotUtilization as well as its child rules. Updated report files and documentation for the samples located at extras/samples/ . Resolved Issues Fixed collection of context switch data during PM Sampling when using Range Replay . Fixed potential crash of NVIDIA Nsight Compute when an invalid regular expression was provided as requested metric. Improved the performance of NVIDIA Nsight Compute in cases where only a single process is being profiled and --target-processes all was specified. Fixed an issue of reporting too high register counts on the Source Page. Fixed a bug that could cause a GPU fault while collecting SW counters through PerfWorks. Fixed showing incorrect baseline values for the Runtime Improvement values on the Summary Page.", "keywords": []}, {"id": 301, "doc_id": 301, "filename": "ReleaseNotes/topics/updates-2024-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2024-1", "display_name": "Updates in 2024.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2024-1", "anchor": "", "priority": -1, "content": "General Switched to using OpenSSL version 3.0.10. Added new metrics available when profiling on CUDA Green Contexts. Reduced the number of passes required for collecting PM sampling sections. Counter domains can now be specified for PM sampling metrics in section files. PM sampling metrics can now be queried in the command line and Metric Details window by specifying the respective collection option. Added a new optional PmSampling_WarpStates section for understanding warp stall reasons over the workload duration. Added a new rule for detecting load imbalances. Improved the performance of graph-level profiling on new drivers. Updated the metrics compatibility table for OptiX cmdlists and instruction-level SASS metrics. NVIDIA Nsight Compute Added SASS view and Source Markers support in Source Comparison . Improved Source Comparison diff visualization by adding empty lines on other side of inserted/deleted lines. The Source page column chooser can now be opened directly from the Navigation drop down. Added a Launch Details tool window for showing information about individual launches within larger workloads like OptiX command lists. Added support for CUDA Green Contexts in the Resources tool window, the Launch Statistics section and the report header. NVIDIA Nsight Compute CLI Improved documentation on NVTX expressions and command line output when a potentially incorrect expression led to no workloads being profiled. Improved checking for invalid expressions when using the --target-processes-filer option. Resolved Issues Fixed that the L1 cache achieved roofline value was missing when profiling on GH100. Fixed several \u201cLaunch Failed\u201d errors when collecting instruction-level SASS metrics. Fixed that Live Register values would be too high for some workloads. Fixed a scrolling issue on the Source page when collapsing a multi-file view. Fixed an issue that no PM sampling data would be shown in the timeline when context switch trace was not available. Fixed a display issue in the memory chart when adding baselines. Fixed a crash when adding baselines. Fixed a crash in timeline views when not all configured data was available. Fixed that the application history was not always deleted when selecting Reset Application Data. Fixed an error in the metric compatibility documentation.", "keywords": []}, {"id": 302, "doc_id": 302, "filename": "ReleaseNotes/topics/updates-2024-1-1.html", "domain_name": "std", "name": "ReleaseNotes/topics/updates-2024-1-1", "display_name": "Updates in 2024.1.1", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/topics/updates-2024-1-1", "anchor": "", "priority": -1, "content": "General Added clarification that for profiling a range with multiple, active CUDA Green Contexts, counter values that are not attributable to SMs will be aggregated over all these Green Contexts. Resolved Issues Changed the way the PerfWorks library is loaded into the target application\u2019s process space. This addresses possible connection errors in case the library search path includes other directories with PerfWorks libraries. Fixed an issue that caused PM sampling data to be missing from the results of a Profile Series. Fixed the incorrect calculation of the percentage values in the Inline Function table. Fixed a potential crash of the NVIDIA Nsight Compute UI when PM sampling data was requested, but no sample was collected.", "keywords": []}, {"id": 303, "doc_id": 305, "filename": "Training/index.html", "domain_name": "page", "name": "Training/index#external-resources", "display_name": "External Resources", "type": "section", "display_type": "Page section", "docname": "Training/index", "anchor": "external-resources", "priority": -1, "content": "Forum DevTalk Nsight Compute Forum Blogs Check the current list of blog posts Videos Check the current list of training videos Code Examples Have a look at our coding examples on GitHub", "keywords": []}, {"id": 304, "doc_id": 305, "filename": "Training/index.html", "domain_name": "page", "name": "Training/index#filter-options", "display_name": "Filter Options", "type": "section", "display_type": "Page section", "docname": "Training/index", "anchor": "filter-options", "priority": -1, "content": "Note that examples will use the term workload to refer to either kernels, graphs, ranges or cmdlists unless stated otherwise. Profile first two workloads --launch-count 2 Profile first two workloads launched on device with device ID 1 --device 1 --launch-count 2 Profile 2nd workload on each gpu --launch-skip 1 --launch-count 1 --filter-mode per-gpu Skip first 2 workloads of each launch configuration before profiling --launch-skip 2 --filter-mode per-launch-config Profile \u201cBar\u201d kernel --kernel-name Bar Profile kernels which have \u201cBar\u201d in function name --kernel-name regex:Bar Profile only 2nd invocation of kernel \u201cFoo\u201d --kernel-id ::Foo:2 Profile only 2nd invocation of all kernels which have \u201cBar\u201d in name --kernel-id ::regex:Bar:2 Skip first 2 workloads before matching \u201cFoo\u201d or \u201cBar\u201d in kernel names --launch-skip-before-match 2 --kernel-name regex:\u201cFoo|Bar\u201d Profile all 7th kernel invocations with mangled name \u201c_FooBar\u201d on CUDA context ID 1 and stream ID 2 --kernel-id 1:2:_Foobar:7 --kernel-name-base mangled Profile all workloads launched in first 3 cu(da)ProfilerStart/Stop APIs created ranges --range-filter :[1-3]: Profile all workloads launched in 2nd NVTX Push/Pop range A --range-filters ::2 --nvtx --nvtx-include A/ Profile all workloads launched in NVTX Push/Pop range A except the ones in NVTX Push/Pop range B --nvtx --nvtx-include A/ --nvtx-exclude B/ Profile all \u201cFoo\u201d kernels except those launched in NVTX Push/Pop range B --nvtx --nvtx-exclude B/ --kernel-name Foo Profile all workloads launched in 2nd NVTX Start/End range A inside 2nd cu(da)ProfilerStart/Stop APIs created range --range-filter yes:2:2 --nvtx --nvtx-include A Profile all workloads launched in 1st NVTX Push/Pop range A inside both 1st and 2nd cu(da)ProfilerStart/Stop APIs created range --range-filter yes:[1-2]:1 --nvtx --nvtx-include A/ Profile all workloads launched in 1st cu(da)ProfilerStart/Stop APIs created range with 2nd NVTX push/pop range A and domain D --range-filter no:1:2 --nvtx --nvtx-include D@A/ Notices Notices ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND SEPARATELY, \u201cMATERIALS\u201d) ARE BEING PROVIDED \u201cAS IS.\u201d NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the consequences of use of such information or for any infringement of patents or other rights of third parties that may result from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems without express written approval of NVIDIA Corporation. Trademarks NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation in the U.S. and other countries. Other company and product names may be trademarks of the respective companies with which they are associated.", "keywords": []}, {"id": 305, "doc_id": 305, "filename": "Training/index.html", "domain_name": "std", "name": "Training/index", "display_name": "Training", "type": "doc", "display_type": "Page", "docname": "Training/index", "anchor": "", "priority": -1, "content": "Nsight Compute training content. NVIDIA Nsight Compute Training resources.", "keywords": []}, {"id": 306, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction", "display_name": "NV::Rules::IAction", "type": "class", "display_type": "C++ class", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IActionE", "priority": 1, "content": "class  IAction Action interface. Represents an activity such as a CUDA kernel on a single range, for which zero or more metrics were collected Public Types", "keywords": []}, {"id": 307, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::NameBase", "display_name": "NV::Rules::IAction::NameBase", "type": "enum", "display_type": "C++ enum", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction8NameBaseE", "priority": 1, "content": "enum  class  NameBase Basis for action names. The basis determines how the action name is represented when calling name . FUNCTION is the minimal base name without parameters or return value. DEMANGLED is the demangled name, including parameters and return value. MANGLED is the mangled name, including parameters and return value. Values:", "keywords": []}, {"id": 308, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::NameBase::DEMANGLED", "display_name": "NV::Rules::IAction::NameBase::DEMANGLED", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction8NameBase9DEMANGLEDE", "priority": 1, "content": "enumerator  DEMANGLED", "keywords": []}, {"id": 309, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::NameBase::FUNCTION", "display_name": "NV::Rules::IAction::NameBase::FUNCTION", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction8NameBase8FUNCTIONE", "priority": 1, "content": "enumerator  FUNCTION", "keywords": []}, {"id": 310, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::NameBase::MANGLED", "display_name": "NV::Rules::IAction::NameBase::MANGLED", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction8NameBase7MANGLEDE", "priority": 1, "content": "enumerator  MANGLED Public Functions", "keywords": []}, {"id": 311, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_double_metric", "display_name": "NV::Rules::IAction::add_double_metric", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction17add_double_metricEPKcN7IMetric9ValueKindEd", "priority": 1, "content": "virtual  IMetric  * add_double_metric const  char  * value_name IMetric :: ValueKind  value_kind double  value  =  0 Add a single floating point metric to this action. Add a single floating point metric with name value_name and kind value_kind to this action Returns the new IMetric", "keywords": []}, {"id": 312, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_double_metric::value", "display_name": "NV::Rules::IAction::add_double_metric::value", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction17add_double_metricEPKcN7IMetric9ValueKindEd", "priority": 1, "content": "virtual  IMetric  * add_double_metric const  char  * value_name IMetric :: ValueKind  value_kind double  value  =  0 Add a single floating point metric to this action. Add a single floating point metric with name value_name and kind value_kind to this action Returns the new IMetric", "keywords": []}, {"id": 313, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_double_metric::value_kind", "display_name": "NV::Rules::IAction::add_double_metric::value_kind", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction17add_double_metricEPKcN7IMetric9ValueKindEd", "priority": 1, "content": "virtual  IMetric  * add_double_metric const  char  * value_name IMetric :: ValueKind  value_kind double  value  =  0 Add a single floating point metric to this action. Add a single floating point metric with name value_name and kind value_kind to this action Returns the new IMetric", "keywords": []}, {"id": 314, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_double_metric::value_name", "display_name": "NV::Rules::IAction::add_double_metric::value_name", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction17add_double_metricEPKcN7IMetric9ValueKindEd", "priority": 1, "content": "virtual  IMetric  * add_double_metric const  char  * value_name IMetric :: ValueKind  value_kind double  value  =  0 Add a single floating point metric to this action. Add a single floating point metric with name value_name and kind value_kind to this action Returns the new IMetric", "keywords": []}, {"id": 315, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_integer_metric", "display_name": "NV::Rules::IAction::add_integer_metric", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction18add_integer_metricEPKcN7IMetric9ValueKindE8uint64_t", "priority": 1, "content": "virtual  IMetric  * add_integer_metric const  char  * value_name IMetric :: ValueKind  value_kind uint64_t  value  =  0 Add a single integer metric to this action. Add a single integer metric to with name value_name and kind value_kind this action. Returns the new IMetric", "keywords": []}, {"id": 316, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_integer_metric::value", "display_name": "NV::Rules::IAction::add_integer_metric::value", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction18add_integer_metricEPKcN7IMetric9ValueKindE8uint64_t", "priority": 1, "content": "virtual  IMetric  * add_integer_metric const  char  * value_name IMetric :: ValueKind  value_kind uint64_t  value  =  0 Add a single integer metric to this action. Add a single integer metric to with name value_name and kind value_kind this action. Returns the new IMetric", "keywords": []}, {"id": 317, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_integer_metric::value_kind", "display_name": "NV::Rules::IAction::add_integer_metric::value_kind", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction18add_integer_metricEPKcN7IMetric9ValueKindE8uint64_t", "priority": 1, "content": "virtual  IMetric  * add_integer_metric const  char  * value_name IMetric :: ValueKind  value_kind uint64_t  value  =  0 Add a single integer metric to this action. Add a single integer metric to with name value_name and kind value_kind this action. Returns the new IMetric", "keywords": []}, {"id": 318, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_integer_metric::value_name", "display_name": "NV::Rules::IAction::add_integer_metric::value_name", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction18add_integer_metricEPKcN7IMetric9ValueKindE8uint64_t", "priority": 1, "content": "virtual  IMetric  * add_integer_metric const  char  * value_name IMetric :: ValueKind  value_kind uint64_t  value  =  0 Add a single integer metric to this action. Add a single integer metric to with name value_name and kind value_kind this action. Returns the new IMetric", "keywords": []}, {"id": 319, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_metric", "display_name": "NV::Rules::IAction::add_metric", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction10add_metricEPKc", "priority": 1, "content": "virtual  IMutableMetric  * add_metric const  char  * value_name  =  0 Add a single mutable metric to this action. Add a single, mutable metric with name value_name to this action. The metric is not initialized with any kind or value and needs to be assigned proper values and/or instances afterwards. IMutableMetric provides the following interface: void set_double(IMetric::ValueKind value_kind, double value) Return true if the value was set, false otherwise. Assign the metric a floating-point value of value_kind Acceptable kinds are FLOAT, DOUBLE or ANY (for which the implementation chooses the kind internally) bool set_double(size_t instance, IMetric::ValueKind value_kind, double value) Return true if the instance value was set, false otherwise. Assign the metric an instance floating-point value of value_kind Acceptable kinds are FLOAT, DOUBLE or ANY (for which the implementation chooses the kind internally) void set_uint64(IMetric::ValueKind value_kind, uint64_t value) Return true if the value was set, false otherwise. Assign the metric an integer value of value_kind Acceptable kinds are UINT32, UINT64 or ANY (for which the implementation chooses the kind internally) bool set_uint64(size_t instance, IMetric::ValueKind value_kind, uint64_t value) Return true if the instance value was set, false otherwise. Assign the metric an instance integer value of value_kind Acceptable kinds are UINT32, UINT64 or ANY (for which the implementation chooses the kind internally) void set_string(IMetric::ValueKind value_kind, const char* value) Return true if the value was set, false otherwise. Assign the metric a string value of value_kind Acceptable kinds are STRING or ANY (for which the implementation chooses the kind internally) bool set_string(size_t instance, IMetric::ValueKind value_kind, const char* value) Return true if the instance value was set, false otherwise. Assign the metric an instance string value of value_kind Acceptable kinds are STRING or ANY (for which the implementation chooses the kind internally) IMutableMetric *mutable_correlation_ids() Returns a new, mutable metric object representing the correlation IDs for the metric\u2019s instance values. Correlation IDs are used to associate instance values with the \u201cinstance\u201d their value represents. In the returned new metric object, the correlation IDs are that object\u2019s instance values. Returns the new IMutableMetric", "keywords": []}, {"id": 320, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_metric::value_name", "display_name": "NV::Rules::IAction::add_metric::value_name", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction10add_metricEPKc", "priority": 1, "content": "virtual  IMutableMetric  * add_metric const  char  * value_name  =  0 Add a single mutable metric to this action. Add a single, mutable metric with name value_name to this action. The metric is not initialized with any kind or value and needs to be assigned proper values and/or instances afterwards. IMutableMetric provides the following interface: void set_double(IMetric::ValueKind value_kind, double value) Return true if the value was set, false otherwise. Assign the metric a floating-point value of value_kind Acceptable kinds are FLOAT, DOUBLE or ANY (for which the implementation chooses the kind internally) bool set_double(size_t instance, IMetric::ValueKind value_kind, double value) Return true if the instance value was set, false otherwise. Assign the metric an instance floating-point value of value_kind Acceptable kinds are FLOAT, DOUBLE or ANY (for which the implementation chooses the kind internally) void set_uint64(IMetric::ValueKind value_kind, uint64_t value) Return true if the value was set, false otherwise. Assign the metric an integer value of value_kind Acceptable kinds are UINT32, UINT64 or ANY (for which the implementation chooses the kind internally) bool set_uint64(size_t instance, IMetric::ValueKind value_kind, uint64_t value) Return true if the instance value was set, false otherwise. Assign the metric an instance integer value of value_kind Acceptable kinds are UINT32, UINT64 or ANY (for which the implementation chooses the kind internally) void set_string(IMetric::ValueKind value_kind, const char* value) Return true if the value was set, false otherwise. Assign the metric a string value of value_kind Acceptable kinds are STRING or ANY (for which the implementation chooses the kind internally) bool set_string(size_t instance, IMetric::ValueKind value_kind, const char* value) Return true if the instance value was set, false otherwise. Assign the metric an instance string value of value_kind Acceptable kinds are STRING or ANY (for which the implementation chooses the kind internally) IMutableMetric *mutable_correlation_ids() Returns a new, mutable metric object representing the correlation IDs for the metric\u2019s instance values. Correlation IDs are used to associate instance values with the \u201cinstance\u201d their value represents. In the returned new metric object, the correlation IDs are that object\u2019s instance values. Returns the new IMutableMetric", "keywords": []}, {"id": 321, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_string_metric", "display_name": "NV::Rules::IAction::add_string_metric", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction17add_string_metricEPKcN7IMetric9ValueKindEPKc", "priority": 1, "content": "virtual  IMetric  * add_string_metric const  char  * value_name IMetric :: ValueKind  value_kind const  char  * value  =  0 Add a single string metric to this action. Add a single string metric with name value_name and kind value_kind to this action Returns the new IMetric", "keywords": []}, {"id": 322, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_string_metric::value", "display_name": "NV::Rules::IAction::add_string_metric::value", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction17add_string_metricEPKcN7IMetric9ValueKindEPKc", "priority": 1, "content": "virtual  IMetric  * add_string_metric const  char  * value_name IMetric :: ValueKind  value_kind const  char  * value  =  0 Add a single string metric to this action. Add a single string metric with name value_name and kind value_kind to this action Returns the new IMetric", "keywords": []}, {"id": 323, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_string_metric::value_kind", "display_name": "NV::Rules::IAction::add_string_metric::value_kind", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction17add_string_metricEPKcN7IMetric9ValueKindEPKc", "priority": 1, "content": "virtual  IMetric  * add_string_metric const  char  * value_name IMetric :: ValueKind  value_kind const  char  * value  =  0 Add a single string metric to this action. Add a single string metric with name value_name and kind value_kind to this action Returns the new IMetric", "keywords": []}, {"id": 324, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::add_string_metric::value_name", "display_name": "NV::Rules::IAction::add_string_metric::value_name", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction17add_string_metricEPKcN7IMetric9ValueKindEPKc", "priority": 1, "content": "virtual  IMetric  * add_string_metric const  char  * value_name IMetric :: ValueKind  value_kind const  char  * value  =  0 Add a single string metric to this action. Add a single string metric with name value_name and kind value_kind to this action Returns the new IMetric", "keywords": []}, {"id": 325, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::metric_by_name", "display_name": "NV::Rules::IAction::metric_by_name", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction14metric_by_nameEPKc", "priority": 1, "content": "virtual  IMetric  * metric_by_name const  char  * metric_name  =  0 Get a single metric by name. Get a single IMetric by metric_name", "keywords": []}, {"id": 326, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::metric_by_name::metric_name", "display_name": "NV::Rules::IAction::metric_by_name::metric_name", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction14metric_by_nameEPKc", "priority": 1, "content": "virtual  IMetric  * metric_by_name const  char  * metric_name  =  0 Get a single metric by name. Get a single IMetric by metric_name", "keywords": []}, {"id": 327, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::metric_names", "display_name": "NV::Rules::IAction::metric_names", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction12metric_namesEv", "priority": 1, "content": "virtual  std :: set &lt; std :: string &gt;  metric_names  =  0 Get the set of metric names. Get the set of metrics available for this action", "keywords": []}, {"id": 328, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::name", "display_name": "NV::Rules::IAction::name", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction4nameE8NameBase", "priority": 1, "content": "virtual  const  char  * name NameBase  base  =  NameBase :: FUNCTION  =  0 Get the action name. Get the action name", "keywords": []}, {"id": 329, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::name::base", "display_name": "NV::Rules::IAction::name::base", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction4nameE8NameBase", "priority": 1, "content": "virtual  const  char  * name NameBase  base  =  NameBase :: FUNCTION  =  0 Get the action name. Get the action name", "keywords": []}, {"id": 330, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::nvtx_state", "display_name": "NV::Rules::IAction::nvtx_state", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction10nvtx_stateEv", "priority": 1, "content": "virtual  INvtxState  * nvtx_state  =  0 Get the NVTX state associated with this action. Returns a new INvtxState object if any NVTX state is available for this action.", "keywords": []}, {"id": 331, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::ptx_by_pc", "display_name": "NV::Rules::IAction::ptx_by_pc", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction9ptx_by_pcE8uint64_t", "priority": 1, "content": "virtual  std :: string  ptx_by_pc uint64_t  address  =  0 Get the PTX for a function address within this action. Get the PTX for a function address within this action. The address is expected to be absolute, i.e. as obtained from the correlation IDs of source-correlated metrics. Returns PTX string. An empty string is returned if no PTX is associated with this address", "keywords": []}, {"id": 332, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::ptx_by_pc::address", "display_name": "NV::Rules::IAction::ptx_by_pc::address", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction9ptx_by_pcE8uint64_t", "priority": 1, "content": "virtual  std :: string  ptx_by_pc uint64_t  address  =  0 Get the PTX for a function address within this action. Get the PTX for a function address within this action. The address is expected to be absolute, i.e. as obtained from the correlation IDs of source-correlated metrics. Returns PTX string. An empty string is returned if no PTX is associated with this address", "keywords": []}, {"id": 333, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::sass_by_pc", "display_name": "NV::Rules::IAction::sass_by_pc", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction10sass_by_pcE8uint64_t", "priority": 1, "content": "virtual  std :: string  sass_by_pc uint64_t  address  =  0 Get the disassembled SASS for a function address within this action. Get the disassembled SASS for a function address within this action. The address is expected to be absolute, i.e. as obtained from the correlation IDs of source-correlated metrics. The SASS may include formatting whitespace as generated by the disassembler. Returns SASS string. An empty string is returned if no SASS is associated with this address", "keywords": []}, {"id": 334, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::sass_by_pc::address", "display_name": "NV::Rules::IAction::sass_by_pc::address", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction10sass_by_pcE8uint64_t", "priority": 1, "content": "virtual  std :: string  sass_by_pc uint64_t  address  =  0 Get the disassembled SASS for a function address within this action. Get the disassembled SASS for a function address within this action. The address is expected to be absolute, i.e. as obtained from the correlation IDs of source-correlated metrics. The SASS may include formatting whitespace as generated by the disassembler. Returns SASS string. An empty string is returned if no SASS is associated with this address", "keywords": []}, {"id": 335, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::source_files", "display_name": "NV::Rules::IAction::source_files", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction12source_filesEv", "priority": 1, "content": "virtual  std :: map &lt; std :: string ,  std :: string &gt;  source_files  =  0 Get the source files associated with an action. Get the map of source files associated with this action, along with their content. If content is not available for a file (e.g. because it hadn\u2019t been imported into the report), the file name will map to an empty string. Returns a map from file name to file content", "keywords": []}, {"id": 336, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::source_info", "display_name": "NV::Rules::IAction::source_info", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction11source_infoE8uint64_t", "priority": 1, "content": "virtual  ISourceInfo  * source_info uint64_t  address  =  0 Get the source info for a function address within this action. Get the source info for address within this action. Addresses are commonly obtained as correlation ids of source-correlated metrics. Returns a new ISourceInfo object if it is available", "keywords": []}, {"id": 337, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::source_info::address", "display_name": "NV::Rules::IAction::source_info::address", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IAction11source_infoE8uint64_t", "priority": 1, "content": "virtual  ISourceInfo  * source_info uint64_t  address  =  0 Get the source info for a function address within this action. Get the source info for address within this action. Addresses are commonly obtained as correlation ids of source-correlated metrics. Returns a new ISourceInfo object if it is available", "keywords": []}, {"id": 338, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "cpp", "name": "NV::Rules::IAction::~IAction", "display_name": "NV::Rules::IAction::~IAction", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "_CPPv4N2NV5Rules7IActionD0Ev", "priority": 1, "content": "inline  virtual  ~IAction", "keywords": []}, {"id": 339, "doc_id": 339, "filename": "api/classNV_1_1Rules_1_1IAction.html", "domain_name": "std", "name": "api/classNV_1_1Rules_1_1IAction", "display_name": "NV::Rules::IAction", "type": "doc", "display_type": "Page", "docname": "api/classNV_1_1Rules_1_1IAction", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 340, "doc_id": 345, "filename": "api/classNV_1_1Rules_1_1IBaseContext.html", "domain_name": "cpp", "name": "NV::Rules::IBaseContext", "display_name": "NV::Rules::IBaseContext", "type": "class", "display_type": "C++ class", "docname": "api/classNV_1_1Rules_1_1IBaseContext", "anchor": "_CPPv4N2NV5Rules12IBaseContextE", "priority": 1, "content": "class  IBaseContext Subclassed by NV::Rules::IContext , NV::Rules::IEvaluator Public Functions", "keywords": []}, {"id": 341, "doc_id": 345, "filename": "api/classNV_1_1Rules_1_1IBaseContext.html", "domain_name": "cpp", "name": "NV::Rules::IBaseContext::get_version", "display_name": "NV::Rules::IBaseContext::get_version", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IBaseContext", "anchor": "_CPPv4N2NV5Rules12IBaseContext11get_versionEv", "priority": 1, "content": "virtual  std :: string  get_version  =  0 Get version number of this interface. Returns the version number of this interface as a string of the form &lt;year&gt;.&lt;major&gt;.&lt;minor&gt; It matches the Nsight Compute version this interface originates from.", "keywords": []}, {"id": 342, "doc_id": 345, "filename": "api/classNV_1_1Rules_1_1IBaseContext.html", "domain_name": "cpp", "name": "NV::Rules::IBaseContext::handle_exception", "display_name": "NV::Rules::IBaseContext::handle_exception", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IBaseContext", "anchor": "_CPPv4N2NV5Rules12IBaseContext16handle_exceptionEPKc", "priority": 1, "content": "virtual  bool  handle_exception const  char  * msg  =  0 Context exception handler. Should return true if a python exception should be set, false otherwise", "keywords": []}, {"id": 343, "doc_id": 345, "filename": "api/classNV_1_1Rules_1_1IBaseContext.html", "domain_name": "cpp", "name": "NV::Rules::IBaseContext::handle_exception::msg", "display_name": "NV::Rules::IBaseContext::handle_exception::msg", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IBaseContext", "anchor": "_CPPv4N2NV5Rules12IBaseContext16handle_exceptionEPKc", "priority": 1, "content": "virtual  bool  handle_exception const  char  * msg  =  0 Context exception handler. Should return true if a python exception should be set, false otherwise", "keywords": []}, {"id": 344, "doc_id": 345, "filename": "api/classNV_1_1Rules_1_1IBaseContext.html", "domain_name": "cpp", "name": "NV::Rules::IBaseContext::~IBaseContext", "display_name": "NV::Rules::IBaseContext::~IBaseContext", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IBaseContext", "anchor": "_CPPv4N2NV5Rules12IBaseContextD0Ev", "priority": 1, "content": "inline  virtual  ~IBaseContext", "keywords": []}, {"id": 345, "doc_id": 345, "filename": "api/classNV_1_1Rules_1_1IBaseContext.html", "domain_name": "std", "name": "api/classNV_1_1Rules_1_1IBaseContext", "display_name": "NV::Rules::IBaseContext", "type": "doc", "display_type": "Page", "docname": "api/classNV_1_1Rules_1_1IBaseContext", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 346, "doc_id": 356, "filename": "api/classNV_1_1Rules_1_1IContext.html", "domain_name": "cpp", "name": "NV::Rules::IContext", "display_name": "NV::Rules::IContext", "type": "class", "display_type": "C++ class", "docname": "api/classNV_1_1Rules_1_1IContext", "anchor": "_CPPv4N2NV5Rules8IContextE", "priority": 1, "content": "class  IContext  :  public  NV :: Rules :: IBaseContext Context interface. The rule context provides the rule with access to all collected data, properties and means to relay results back to the caller. Public Functions", "keywords": []}, {"id": 347, "doc_id": 356, "filename": "api/classNV_1_1Rules_1_1IContext.html", "domain_name": "cpp", "name": "NV::Rules::IContext::controller", "display_name": "NV::Rules::IContext::controller", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IContext", "anchor": "_CPPv4N2NV5Rules8IContext10controllerEv", "priority": 1, "content": "virtual  IController  * controller  =  0 Get the controller object. Get the IController object", "keywords": []}, {"id": 348, "doc_id": 356, "filename": "api/classNV_1_1Rules_1_1IContext.html", "domain_name": "cpp", "name": "NV::Rules::IContext::frontend", "display_name": "NV::Rules::IContext::frontend", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IContext", "anchor": "_CPPv4N2NV5Rules8IContext8frontendEv", "priority": 1, "content": "virtual  IFrontend  * frontend  =  0 Get the frontend object. Get the IFrontend object", "keywords": []}, {"id": 349, "doc_id": 356, "filename": "api/classNV_1_1Rules_1_1IContext.html", "domain_name": "cpp", "name": "NV::Rules::IContext::get_version", "display_name": "NV::Rules::IContext::get_version", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IContext", "anchor": "_CPPv4N2NV5Rules8IContext11get_versionEv", "priority": 1, "content": "virtual  std :: string  get_version  =  0 Get version number of this interface. Returns the version number of this interface as a string of the form &lt;year&gt;.&lt;major&gt;.&lt;minor&gt; It matches the Nsight Compute version this interface originates from.", "keywords": []}, {"id": 350, "doc_id": 356, "filename": "api/classNV_1_1Rules_1_1IContext.html", "domain_name": "cpp", "name": "NV::Rules::IContext::handle_exception", "display_name": "NV::Rules::IContext::handle_exception", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IContext", "anchor": "_CPPv4N2NV5Rules8IContext16handle_exceptionEPKc", "priority": 1, "content": "virtual  bool  handle_exception const  char  * msg  =  0 Context exception handler. Should return true if a python exception should be set, false otherwise", "keywords": []}, {"id": 351, "doc_id": 356, "filename": "api/classNV_1_1Rules_1_1IContext.html", "domain_name": "cpp", "name": "NV::Rules::IContext::handle_exception::msg", "display_name": "NV::Rules::IContext::handle_exception::msg", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IContext", "anchor": "_CPPv4N2NV5Rules8IContext16handle_exceptionEPKc", "priority": 1, "content": "virtual  bool  handle_exception const  char  * msg  =  0 Context exception handler. Should return true if a python exception should be set, false otherwise", "keywords": []}, {"id": 352, "doc_id": 356, "filename": "api/classNV_1_1Rules_1_1IContext.html", "domain_name": "cpp", "name": "NV::Rules::IContext::num_ranges", "display_name": "NV::Rules::IContext::num_ranges", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IContext", "anchor": "_CPPv4N2NV5Rules8IContext10num_rangesEv", "priority": 1, "content": "virtual  size_t  num_ranges  =  0 Get the number of available ranges. Get the number of available ranges", "keywords": []}, {"id": 353, "doc_id": 356, "filename": "api/classNV_1_1Rules_1_1IContext.html", "domain_name": "cpp", "name": "NV::Rules::IContext::range_by_idx", "display_name": "NV::Rules::IContext::range_by_idx", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IContext", "anchor": "_CPPv4N2NV5Rules8IContext12range_by_idxE6size_t", "priority": 1, "content": "virtual  IRange  * range_by_idx size_t  idx  =  0 Get a single range by index. Get a single IRange by index", "keywords": []}, {"id": 354, "doc_id": 356, "filename": "api/classNV_1_1Rules_1_1IContext.html", "domain_name": "cpp", "name": "NV::Rules::IContext::range_by_idx::idx", "display_name": "NV::Rules::IContext::range_by_idx::idx", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IContext", "anchor": "_CPPv4N2NV5Rules8IContext12range_by_idxE6size_t", "priority": 1, "content": "virtual  IRange  * range_by_idx size_t  idx  =  0 Get a single range by index. Get a single IRange by index", "keywords": []}, {"id": 355, "doc_id": 356, "filename": "api/classNV_1_1Rules_1_1IContext.html", "domain_name": "cpp", "name": "NV::Rules::IContext::~IContext", "display_name": "NV::Rules::IContext::~IContext", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IContext", "anchor": "_CPPv4N2NV5Rules8IContextD0Ev", "priority": 1, "content": "inline  virtual  ~IContext", "keywords": []}, {"id": 356, "doc_id": 356, "filename": "api/classNV_1_1Rules_1_1IContext.html", "domain_name": "std", "name": "api/classNV_1_1Rules_1_1IContext", "display_name": "NV::Rules::IContext", "type": "doc", "display_type": "Page", "docname": "api/classNV_1_1Rules_1_1IContext", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 357, "doc_id": 362, "filename": "api/classNV_1_1Rules_1_1IController.html", "domain_name": "cpp", "name": "NV::Rules::IController", "display_name": "NV::Rules::IController", "type": "class", "display_type": "C++ class", "docname": "api/classNV_1_1Rules_1_1IController", "anchor": "_CPPv4N2NV5Rules11IControllerE", "priority": 1, "content": "class  IController Controller interface. The controller can be used to interact with the tool runtime, e.g. to signal the tools to propose a follow-up rule. Public Functions", "keywords": []}, {"id": 358, "doc_id": 362, "filename": "api/classNV_1_1Rules_1_1IController.html", "domain_name": "cpp", "name": "NV::Rules::IController::get_message_vault", "display_name": "NV::Rules::IController::get_message_vault", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IController", "anchor": "_CPPv4N2NV5Rules11IController17get_message_vaultEv", "priority": 1, "content": "virtual  std :: shared_ptr &lt; IMessageVault &gt;  get_message_vault  =  0 Get an IMessageVault object that can be used for message passing between rules.", "keywords": []}, {"id": 359, "doc_id": 362, "filename": "api/classNV_1_1Rules_1_1IController.html", "domain_name": "cpp", "name": "NV::Rules::IController::propose_rule", "display_name": "NV::Rules::IController::propose_rule", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IController", "anchor": "_CPPv4N2NV5Rules11IController12propose_ruleEPKc", "priority": 1, "content": "virtual  void  propose_rule const  char  * rule  =  0 Propose the specified rule in the current context.", "keywords": []}, {"id": 360, "doc_id": 362, "filename": "api/classNV_1_1Rules_1_1IController.html", "domain_name": "cpp", "name": "NV::Rules::IController::propose_rule::rule", "display_name": "NV::Rules::IController::propose_rule::rule", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IController", "anchor": "_CPPv4N2NV5Rules11IController12propose_ruleEPKc", "priority": 1, "content": "virtual  void  propose_rule const  char  * rule  =  0 Propose the specified rule in the current context.", "keywords": []}, {"id": 361, "doc_id": 362, "filename": "api/classNV_1_1Rules_1_1IController.html", "domain_name": "cpp", "name": "NV::Rules::IController::~IController", "display_name": "NV::Rules::IController::~IController", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IController", "anchor": "_CPPv4N2NV5Rules11IControllerD0Ev", "priority": 1, "content": "inline  virtual  ~IController", "keywords": []}, {"id": 362, "doc_id": 362, "filename": "api/classNV_1_1Rules_1_1IController.html", "domain_name": "std", "name": "api/classNV_1_1Rules_1_1IController", "display_name": "NV::Rules::IController", "type": "doc", "display_type": "Page", "docname": "api/classNV_1_1Rules_1_1IController", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 363, "doc_id": 372, "filename": "api/classNV_1_1Rules_1_1IEvaluator.html", "domain_name": "cpp", "name": "NV::Rules::IEvaluator", "display_name": "NV::Rules::IEvaluator", "type": "class", "display_type": "C++ class", "docname": "api/classNV_1_1Rules_1_1IEvaluator", "anchor": "_CPPv4N2NV5Rules10IEvaluatorE", "priority": 1, "content": "class  IEvaluator  :  public  NV :: Rules :: IBaseContext Evaluator interface. The evaluator is used during rule setup to pass information about rule dependencies to the tool. For most cases, its Python wrapper functions can be used instead for convenience. Public Functions", "keywords": []}, {"id": 364, "doc_id": 372, "filename": "api/classNV_1_1Rules_1_1IEvaluator.html", "domain_name": "cpp", "name": "NV::Rules::IEvaluator::get_version", "display_name": "NV::Rules::IEvaluator::get_version", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IEvaluator", "anchor": "_CPPv4N2NV5Rules10IEvaluator11get_versionEv", "priority": 1, "content": "virtual  std :: string  get_version  =  0 Get version number of this interface. Returns the version number of this interface as a string of the form &lt;year&gt;.&lt;major&gt;.&lt;minor&gt; It matches the Nsight Compute version this interface originates from.", "keywords": []}, {"id": 365, "doc_id": 372, "filename": "api/classNV_1_1Rules_1_1IEvaluator.html", "domain_name": "cpp", "name": "NV::Rules::IEvaluator::handle_exception", "display_name": "NV::Rules::IEvaluator::handle_exception", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IEvaluator", "anchor": "_CPPv4N2NV5Rules10IEvaluator16handle_exceptionEPKc", "priority": 1, "content": "virtual  bool  handle_exception const  char  * msg  =  0 Context exception handler. Should return true if a python exception should be set, false otherwise", "keywords": []}, {"id": 366, "doc_id": 372, "filename": "api/classNV_1_1Rules_1_1IEvaluator.html", "domain_name": "cpp", "name": "NV::Rules::IEvaluator::handle_exception::msg", "display_name": "NV::Rules::IEvaluator::handle_exception::msg", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IEvaluator", "anchor": "_CPPv4N2NV5Rules10IEvaluator16handle_exceptionEPKc", "priority": 1, "content": "virtual  bool  handle_exception const  char  * msg  =  0 Context exception handler. Should return true if a python exception should be set, false otherwise", "keywords": []}, {"id": 367, "doc_id": 372, "filename": "api/classNV_1_1Rules_1_1IEvaluator.html", "domain_name": "cpp", "name": "NV::Rules::IEvaluator::require_metric", "display_name": "NV::Rules::IEvaluator::require_metric", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IEvaluator", "anchor": "_CPPv4N2NV5Rules10IEvaluator14require_metricEPKc", "priority": 1, "content": "virtual  void  require_metric const  char  * metric  =  0 Define that the specified metric must have been collected in order for the calling rule to be applied.", "keywords": []}, {"id": 368, "doc_id": 372, "filename": "api/classNV_1_1Rules_1_1IEvaluator.html", "domain_name": "cpp", "name": "NV::Rules::IEvaluator::require_metric::metric", "display_name": "NV::Rules::IEvaluator::require_metric::metric", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IEvaluator", "anchor": "_CPPv4N2NV5Rules10IEvaluator14require_metricEPKc", "priority": 1, "content": "virtual  void  require_metric const  char  * metric  =  0 Define that the specified metric must have been collected in order for the calling rule to be applied.", "keywords": []}, {"id": 369, "doc_id": 372, "filename": "api/classNV_1_1Rules_1_1IEvaluator.html", "domain_name": "cpp", "name": "NV::Rules::IEvaluator::require_rule", "display_name": "NV::Rules::IEvaluator::require_rule", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IEvaluator", "anchor": "_CPPv4N2NV5Rules10IEvaluator12require_ruleEPKc", "priority": 1, "content": "virtual  void  require_rule const  char  * rule  =  0 Define that the specified rule must be available and ready to be applied in order for the calling rule to be applied itself.", "keywords": []}, {"id": 370, "doc_id": 372, "filename": "api/classNV_1_1Rules_1_1IEvaluator.html", "domain_name": "cpp", "name": "NV::Rules::IEvaluator::require_rule::rule", "display_name": "NV::Rules::IEvaluator::require_rule::rule", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IEvaluator", "anchor": "_CPPv4N2NV5Rules10IEvaluator12require_ruleEPKc", "priority": 1, "content": "virtual  void  require_rule const  char  * rule  =  0 Define that the specified rule must be available and ready to be applied in order for the calling rule to be applied itself.", "keywords": []}, {"id": 371, "doc_id": 372, "filename": "api/classNV_1_1Rules_1_1IEvaluator.html", "domain_name": "cpp", "name": "NV::Rules::IEvaluator::~IEvaluator", "display_name": "NV::Rules::IEvaluator::~IEvaluator", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IEvaluator", "anchor": "_CPPv4N2NV5Rules10IEvaluatorD0Ev", "priority": 1, "content": "inline  virtual  ~IEvaluator", "keywords": []}, {"id": 372, "doc_id": 372, "filename": "api/classNV_1_1Rules_1_1IEvaluator.html", "domain_name": "std", "name": "api/classNV_1_1Rules_1_1IEvaluator", "display_name": "NV::Rules::IEvaluator", "type": "doc", "display_type": "Page", "docname": "api/classNV_1_1Rules_1_1IEvaluator", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 373, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend", "display_name": "NV::Rules::IFrontend", "type": "class", "display_type": "C++ class", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontendE", "priority": 1, "content": "class  IFrontend Frontend interface. The frontend is responsible for relaying messages and results to the caller via user interfaces, logs or output files. Public Types", "keywords": []}, {"id": 374, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::Dict", "display_name": "NV::Rules::IFrontend::Dict", "type": "type", "display_type": "C++ type", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend4DictE", "priority": 1, "content": "using  Dict  =  std :: map &lt; std :: string ,  double &gt;", "keywords": []}, {"id": 375, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::MarkerKind", "display_name": "NV::Rules::IFrontend::MarkerKind", "type": "enum", "display_type": "C++ enum", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend10MarkerKindE", "priority": 1, "content": "enum  class  MarkerKind Marker Kind. The kind of marker that is created in the Source Page with source_marker . SASS The marker will be associated with a SASS instruction. SOURCE The marker will be associated with a Source line. Values:", "keywords": []}, {"id": 376, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::MarkerKind::SASS", "display_name": "NV::Rules::IFrontend::MarkerKind::SASS", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend10MarkerKind4SASSE", "priority": 1, "content": "enumerator  SASS", "keywords": []}, {"id": 377, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::MarkerKind::SOURCE", "display_name": "NV::Rules::IFrontend::MarkerKind::SOURCE", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend10MarkerKind6SOURCEE", "priority": 1, "content": "enumerator  SOURCE", "keywords": []}, {"id": 378, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::MsgType", "display_name": "NV::Rules::IFrontend::MsgType", "type": "enum", "display_type": "C++ enum", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7MsgTypeE", "priority": 1, "content": "enum  class  MsgType Message type. The type a message to the frontend represents when calling message . MSG_NONE No specific type for this message. MSG_OK The message is informative. MSG_OPTIMIZATION The message represents a suggestion for performance optimization. MSG_WARNING The message represents a warning or fixable issue. MSG_ERROR The message represents an error, potentially in executing the rule. Values:", "keywords": []}, {"id": 379, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::MsgType::MSG_ERROR", "display_name": "NV::Rules::IFrontend::MsgType::MSG_ERROR", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7MsgType9MSG_ERRORE", "priority": 1, "content": "enumerator  MSG_ERROR", "keywords": []}, {"id": 380, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::MsgType::MSG_NONE", "display_name": "NV::Rules::IFrontend::MsgType::MSG_NONE", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7MsgType8MSG_NONEE", "priority": 1, "content": "enumerator  MSG_NONE", "keywords": []}, {"id": 381, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::MsgType::MSG_OK", "display_name": "NV::Rules::IFrontend::MsgType::MSG_OK", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7MsgType6MSG_OKE", "priority": 1, "content": "enumerator  MSG_OK", "keywords": []}, {"id": 382, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::MsgType::MSG_OPTIMIZATION", "display_name": "NV::Rules::IFrontend::MsgType::MSG_OPTIMIZATION", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7MsgType16MSG_OPTIMIZATIONE", "priority": 1, "content": "enumerator  MSG_OPTIMIZATION", "keywords": []}, {"id": 383, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::MsgType::MSG_WARNING", "display_name": "NV::Rules::IFrontend::MsgType::MSG_WARNING", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7MsgType11MSG_WARNINGE", "priority": 1, "content": "enumerator  MSG_WARNING", "keywords": []}, {"id": 384, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::RuleId", "display_name": "NV::Rules::IFrontend::RuleId", "type": "type", "display_type": "C++ type", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend6RuleIdE", "priority": 1, "content": "using  RuleId  =  std :: string", "keywords": []}, {"id": 385, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::Severity", "display_name": "NV::Rules::IFrontend::Severity", "type": "enum", "display_type": "C++ enum", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend8SeverityE", "priority": 1, "content": "enum  class  Severity Focus metric severity. The severity a focus metric represents when calling focus_metric . The severity can be used by the rule system client when processing the focus metric. SEVERITY_DEFAULT The default severity. SEVERITY_LOW Severity if low. SEVERITY_HIGH Severity if high. Values:", "keywords": []}, {"id": 386, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::Severity::SEVERITY_DEFAULT", "display_name": "NV::Rules::IFrontend::Severity::SEVERITY_DEFAULT", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend8Severity16SEVERITY_DEFAULTE", "priority": 1, "content": "enumerator  SEVERITY_DEFAULT", "keywords": []}, {"id": 387, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::Severity::SEVERITY_HIGH", "display_name": "NV::Rules::IFrontend::Severity::SEVERITY_HIGH", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend8Severity13SEVERITY_HIGHE", "priority": 1, "content": "enumerator  SEVERITY_HIGH", "keywords": []}, {"id": 388, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::Severity::SEVERITY_LOW", "display_name": "NV::Rules::IFrontend::Severity::SEVERITY_LOW", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend8Severity12SEVERITY_LOWE", "priority": 1, "content": "enumerator  SEVERITY_LOW", "keywords": []}, {"id": 389, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::SpeedupType", "display_name": "NV::Rules::IFrontend::SpeedupType", "type": "enum", "display_type": "C++ enum", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend11SpeedupTypeE", "priority": 1, "content": "enum  class  SpeedupType Speedup estimation type. The type of speedup estimation used when calling speedup . LOCAL The proportional increase in efficiency of the hardware usage when viewing the performance problem in isolation. GLOBAL The proportional reduction in runtime of the entire workload. Values:", "keywords": []}, {"id": 390, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::SpeedupType::GLOBAL", "display_name": "NV::Rules::IFrontend::SpeedupType::GLOBAL", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend11SpeedupType6GLOBALE", "priority": 1, "content": "enumerator  GLOBAL Public Functions", "keywords": []}, {"id": 391, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::SpeedupType::LOCAL", "display_name": "NV::Rules::IFrontend::SpeedupType::LOCAL", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend11SpeedupType5LOCALE", "priority": 1, "content": "enumerator  LOCAL", "keywords": []}, {"id": 392, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::focus_metric", "display_name": "NV::Rules::IFrontend::focus_metric", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend12focus_metricEiPKcd8SeverityPKc", "priority": 1, "content": "virtual  bool  focus_metric int  message_id const  char  * metric_name double  metric_value Severity  severity const  char  * info  =  0 Rule focus metric message. Issues a focus metric message to the frontend, e.g. to indicate a key metric that triggered the rule output. Can be associated with a prior message using its message_id . The severity can be used to indicate this metric\u2019s impact/severity on the result. The info field can be set to a descriptive string for further information, e.g. the calculation leading to this metric being focused. Returns whether the focus_metric could be set successfully for the message_id .", "keywords": []}, {"id": 393, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::focus_metric::info", "display_name": "NV::Rules::IFrontend::focus_metric::info", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend12focus_metricEiPKcd8SeverityPKc", "priority": 1, "content": "virtual  bool  focus_metric int  message_id const  char  * metric_name double  metric_value Severity  severity const  char  * info  =  0 Rule focus metric message. Issues a focus metric message to the frontend, e.g. to indicate a key metric that triggered the rule output. Can be associated with a prior message using its message_id . The severity can be used to indicate this metric\u2019s impact/severity on the result. The info field can be set to a descriptive string for further information, e.g. the calculation leading to this metric being focused. Returns whether the focus_metric could be set successfully for the message_id .", "keywords": []}, {"id": 394, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::focus_metric::message_id", "display_name": "NV::Rules::IFrontend::focus_metric::message_id", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend12focus_metricEiPKcd8SeverityPKc", "priority": 1, "content": "virtual  bool  focus_metric int  message_id const  char  * metric_name double  metric_value Severity  severity const  char  * info  =  0 Rule focus metric message. Issues a focus metric message to the frontend, e.g. to indicate a key metric that triggered the rule output. Can be associated with a prior message using its message_id . The severity can be used to indicate this metric\u2019s impact/severity on the result. The info field can be set to a descriptive string for further information, e.g. the calculation leading to this metric being focused. Returns whether the focus_metric could be set successfully for the message_id .", "keywords": []}, {"id": 395, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::focus_metric::metric_name", "display_name": "NV::Rules::IFrontend::focus_metric::metric_name", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend12focus_metricEiPKcd8SeverityPKc", "priority": 1, "content": "virtual  bool  focus_metric int  message_id const  char  * metric_name double  metric_value Severity  severity const  char  * info  =  0 Rule focus metric message. Issues a focus metric message to the frontend, e.g. to indicate a key metric that triggered the rule output. Can be associated with a prior message using its message_id . The severity can be used to indicate this metric\u2019s impact/severity on the result. The info field can be set to a descriptive string for further information, e.g. the calculation leading to this metric being focused. Returns whether the focus_metric could be set successfully for the message_id .", "keywords": []}, {"id": 396, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::focus_metric::metric_value", "display_name": "NV::Rules::IFrontend::focus_metric::metric_value", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend12focus_metricEiPKcd8SeverityPKc", "priority": 1, "content": "virtual  bool  focus_metric int  message_id const  char  * metric_name double  metric_value Severity  severity const  char  * info  =  0 Rule focus metric message. Issues a focus metric message to the frontend, e.g. to indicate a key metric that triggered the rule output. Can be associated with a prior message using its message_id . The severity can be used to indicate this metric\u2019s impact/severity on the result. The info field can be set to a descriptive string for further information, e.g. the calculation leading to this metric being focused. Returns whether the focus_metric could be set successfully for the message_id .", "keywords": []}, {"id": 397, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::focus_metric::severity", "display_name": "NV::Rules::IFrontend::focus_metric::severity", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend12focus_metricEiPKcd8SeverityPKc", "priority": 1, "content": "virtual  bool  focus_metric int  message_id const  char  * metric_name double  metric_value Severity  severity const  char  * info  =  0 Rule focus metric message. Issues a focus metric message to the frontend, e.g. to indicate a key metric that triggered the rule output. Can be associated with a prior message using its message_id . The severity can be used to indicate this metric\u2019s impact/severity on the result. The info field can be set to a descriptive string for further information, e.g. the calculation leading to this metric being focused. Returns whether the focus_metric could be set successfully for the message_id .", "keywords": []}, {"id": 398, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::load_chart_from_file", "display_name": "NV::Rules::IFrontend::load_chart_from_file", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend20load_chart_from_fileEPKc", "priority": 1, "content": "virtual  void  load_chart_from_file const  char  * filename  =  0 Load a ProfilerSection google protcol buffer chart from filename .", "keywords": []}, {"id": 399, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::load_chart_from_file::filename", "display_name": "NV::Rules::IFrontend::load_chart_from_file::filename", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend20load_chart_from_fileEPKc", "priority": 1, "content": "virtual  void  load_chart_from_file const  char  * filename  =  0 Load a ProfilerSection google protcol buffer chart from filename .", "keywords": []}, {"id": 400, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::message", "display_name": "NV::Rules::IFrontend::message", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7messageE7MsgTypePKcPKc", "priority": 1, "content": "virtual  int  message MsgType  type const  char  * str const  char  * name  =  nullptr  =  0 Rule result message. Issues a message str with a specific message type and optional name name to the frontend. Returns a message ID that is unique in this rule invocation.", "keywords": []}, {"id": 401, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::message", "display_name": "NV::Rules::IFrontend::message", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7messageEPKcPKc", "priority": 1, "content": "virtual  int  message const  char  * str const  char  * name  =  nullptr  =  0 Rule result message. Issues a message str to the frontend with the default MsgType and optional name name . Returns a message ID that is unique in this rule invocation.", "keywords": []}, {"id": 402, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::message::name", "display_name": "NV::Rules::IFrontend::message::name", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7messageE7MsgTypePKcPKc", "priority": 1, "content": "virtual  int  message MsgType  type const  char  * str const  char  * name  =  nullptr  =  0 Rule result message. Issues a message str with a specific message type and optional name name to the frontend. Returns a message ID that is unique in this rule invocation.", "keywords": []}, {"id": 403, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::message::name", "display_name": "NV::Rules::IFrontend::message::name", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7messageEPKcPKc", "priority": 1, "content": "virtual  int  message const  char  * str const  char  * name  =  nullptr  =  0 Rule result message. Issues a message str to the frontend with the default MsgType and optional name name . Returns a message ID that is unique in this rule invocation.", "keywords": []}, {"id": 404, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::message::str", "display_name": "NV::Rules::IFrontend::message::str", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7messageE7MsgTypePKcPKc", "priority": 1, "content": "virtual  int  message MsgType  type const  char  * str const  char  * name  =  nullptr  =  0 Rule result message. Issues a message str with a specific message type and optional name name to the frontend. Returns a message ID that is unique in this rule invocation.", "keywords": []}, {"id": 405, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::message::str", "display_name": "NV::Rules::IFrontend::message::str", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7messageEPKcPKc", "priority": 1, "content": "virtual  int  message const  char  * str const  char  * name  =  nullptr  =  0 Rule result message. Issues a message str to the frontend with the default MsgType and optional name name . Returns a message ID that is unique in this rule invocation.", "keywords": []}, {"id": 406, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::message::type", "display_name": "NV::Rules::IFrontend::message::type", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7messageE7MsgTypePKcPKc", "priority": 1, "content": "virtual  int  message MsgType  type const  char  * str const  char  * name  =  nullptr  =  0 Rule result message. Issues a message str with a specific message type and optional name name to the frontend. Returns a message ID that is unique in this rule invocation.", "keywords": []}, {"id": 407, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::receive_dict_from_parent", "display_name": "NV::Rules::IFrontend::receive_dict_from_parent", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend24receive_dict_from_parentERK6RuleId", "priority": 1, "content": "virtual  Dict  receive_dict_from_parent const  RuleId  &amp; parent_id  =  0 Receive a dictionary from parent rule parent_id . Receive a dictionary of type dict[str,float] sent using send_dict_to_children . In case parent_id does not represent a pre-specified parent rule of this rule, or in case the parent rule has not been executed, an empty dict will be returned.", "keywords": []}, {"id": 408, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::receive_dict_from_parent::parent_id", "display_name": "NV::Rules::IFrontend::receive_dict_from_parent::parent_id", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend24receive_dict_from_parentERK6RuleId", "priority": 1, "content": "virtual  Dict  receive_dict_from_parent const  RuleId  &amp; parent_id  =  0 Receive a dictionary from parent rule parent_id . Receive a dictionary of type dict[str,float] sent using send_dict_to_children . In case parent_id does not represent a pre-specified parent rule of this rule, or in case the parent rule has not been executed, an empty dict will be returned.", "keywords": []}, {"id": 409, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::send_dict_to_children", "display_name": "NV::Rules::IFrontend::send_dict_to_children", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend21send_dict_to_childrenERK4Dict", "priority": 1, "content": "virtual  void  send_dict_to_children const  Dict  &amp; dict  =  0 Send a dictionary dict to all child rules. Sends a Python dictionary of type dict[str,float] to all rules that specify this rule as a parent rule. Child rules can retrieve the message using receive_dict_from_parent . In case this function is called repeatedly, the dict is updated accordingly, thereby adding new key-value pairs, and overwriting values of pre-existing keys.", "keywords": []}, {"id": 410, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::send_dict_to_children::dict", "display_name": "NV::Rules::IFrontend::send_dict_to_children::dict", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend21send_dict_to_childrenERK4Dict", "priority": 1, "content": "virtual  void  send_dict_to_children const  Dict  &amp; dict  =  0 Send a dictionary dict to all child rules. Sends a Python dictionary of type dict[str,float] to all rules that specify this rule as a parent rule. Child rules can retrieve the message using receive_dict_from_parent . In case this function is called repeatedly, the dict is updated accordingly, thereby adding new key-value pairs, and overwriting values of pre-existing keys.", "keywords": []}, {"id": 411, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::source_marker", "display_name": "NV::Rules::IFrontend::source_marker", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend13source_markerEPKc8uint64_t10MarkerKind7MsgType", "priority": 1, "content": "virtual  void  source_marker const  char  * str uint64_t  address_or_line MarkerKind  kind MsgType  type  =  MsgType :: MSG_NONE  =  0 Rule source marker. Creates a source marker with a message str at the source location address_or_line in the appropriate kind of source. The type denotes the urgency associated with this marker and is optional.", "keywords": []}, {"id": 412, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::source_marker", "display_name": "NV::Rules::IFrontend::source_marker", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend13source_markerEPKc8uint64_t10MarkerKindPKc7MsgType", "priority": 1, "content": "virtual  void  source_marker const  char  * str uint64_t  address_or_line MarkerKind  kind const  char  * file_name MsgType  type  =  MsgType :: MSG_NONE  =  0 Rule source marker. Creates a source marker with a message str at the source location address_or_line in the appropriate kind of source. This function supports multi-file programs, which means a file_name has to be specified. The type denotes the urgency associated with this marker and is optional.", "keywords": []}, {"id": 413, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::source_marker::address_or_line", "display_name": "NV::Rules::IFrontend::source_marker::address_or_line", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend13source_markerEPKc8uint64_t10MarkerKind7MsgType", "priority": 1, "content": "virtual  void  source_marker const  char  * str uint64_t  address_or_line MarkerKind  kind MsgType  type  =  MsgType :: MSG_NONE  =  0 Rule source marker. Creates a source marker with a message str at the source location address_or_line in the appropriate kind of source. The type denotes the urgency associated with this marker and is optional.", "keywords": []}, {"id": 414, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::source_marker::address_or_line", "display_name": "NV::Rules::IFrontend::source_marker::address_or_line", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend13source_markerEPKc8uint64_t10MarkerKindPKc7MsgType", "priority": 1, "content": "virtual  void  source_marker const  char  * str uint64_t  address_or_line MarkerKind  kind const  char  * file_name MsgType  type  =  MsgType :: MSG_NONE  =  0 Rule source marker. Creates a source marker with a message str at the source location address_or_line in the appropriate kind of source. This function supports multi-file programs, which means a file_name has to be specified. The type denotes the urgency associated with this marker and is optional.", "keywords": []}, {"id": 415, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::source_marker::file_name", "display_name": "NV::Rules::IFrontend::source_marker::file_name", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend13source_markerEPKc8uint64_t10MarkerKindPKc7MsgType", "priority": 1, "content": "virtual  void  source_marker const  char  * str uint64_t  address_or_line MarkerKind  kind const  char  * file_name MsgType  type  =  MsgType :: MSG_NONE  =  0 Rule source marker. Creates a source marker with a message str at the source location address_or_line in the appropriate kind of source. This function supports multi-file programs, which means a file_name has to be specified. The type denotes the urgency associated with this marker and is optional.", "keywords": []}, {"id": 416, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::source_marker::kind", "display_name": "NV::Rules::IFrontend::source_marker::kind", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend13source_markerEPKc8uint64_t10MarkerKind7MsgType", "priority": 1, "content": "virtual  void  source_marker const  char  * str uint64_t  address_or_line MarkerKind  kind MsgType  type  =  MsgType :: MSG_NONE  =  0 Rule source marker. Creates a source marker with a message str at the source location address_or_line in the appropriate kind of source. The type denotes the urgency associated with this marker and is optional.", "keywords": []}, {"id": 417, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::source_marker::kind", "display_name": "NV::Rules::IFrontend::source_marker::kind", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend13source_markerEPKc8uint64_t10MarkerKindPKc7MsgType", "priority": 1, "content": "virtual  void  source_marker const  char  * str uint64_t  address_or_line MarkerKind  kind const  char  * file_name MsgType  type  =  MsgType :: MSG_NONE  =  0 Rule source marker. Creates a source marker with a message str at the source location address_or_line in the appropriate kind of source. This function supports multi-file programs, which means a file_name has to be specified. The type denotes the urgency associated with this marker and is optional.", "keywords": []}, {"id": 418, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::source_marker::str", "display_name": "NV::Rules::IFrontend::source_marker::str", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend13source_markerEPKc8uint64_t10MarkerKind7MsgType", "priority": 1, "content": "virtual  void  source_marker const  char  * str uint64_t  address_or_line MarkerKind  kind MsgType  type  =  MsgType :: MSG_NONE  =  0 Rule source marker. Creates a source marker with a message str at the source location address_or_line in the appropriate kind of source. The type denotes the urgency associated with this marker and is optional.", "keywords": []}, {"id": 419, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::source_marker::str", "display_name": "NV::Rules::IFrontend::source_marker::str", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend13source_markerEPKc8uint64_t10MarkerKindPKc7MsgType", "priority": 1, "content": "virtual  void  source_marker const  char  * str uint64_t  address_or_line MarkerKind  kind const  char  * file_name MsgType  type  =  MsgType :: MSG_NONE  =  0 Rule source marker. Creates a source marker with a message str at the source location address_or_line in the appropriate kind of source. This function supports multi-file programs, which means a file_name has to be specified. The type denotes the urgency associated with this marker and is optional.", "keywords": []}, {"id": 420, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::source_marker::type", "display_name": "NV::Rules::IFrontend::source_marker::type", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend13source_markerEPKc8uint64_t10MarkerKind7MsgType", "priority": 1, "content": "virtual  void  source_marker const  char  * str uint64_t  address_or_line MarkerKind  kind MsgType  type  =  MsgType :: MSG_NONE  =  0 Rule source marker. Creates a source marker with a message str at the source location address_or_line in the appropriate kind of source. The type denotes the urgency associated with this marker and is optional.", "keywords": []}, {"id": 421, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::source_marker::type", "display_name": "NV::Rules::IFrontend::source_marker::type", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend13source_markerEPKc8uint64_t10MarkerKindPKc7MsgType", "priority": 1, "content": "virtual  void  source_marker const  char  * str uint64_t  address_or_line MarkerKind  kind const  char  * file_name MsgType  type  =  MsgType :: MSG_NONE  =  0 Rule source marker. Creates a source marker with a message str at the source location address_or_line in the appropriate kind of source. This function supports multi-file programs, which means a file_name has to be specified. The type denotes the urgency associated with this marker and is optional.", "keywords": []}, {"id": 422, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::speedup", "display_name": "NV::Rules::IFrontend::speedup", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7speedupEi11SpeedupTyped", "priority": 1, "content": "virtual  bool  speedup int  message_id SpeedupType  type double  estimated_speedup  =  0 Rule estimated speedup message. Issues an estimated speedup to the frontend associated to the message with ID message_id . When type is SpeedupType::GLOBAL , it indicates what proportional decrease in workload runtime could potentially be achieved, when following the guidelines of the rule. When type is SpeedupType::LOCAL , it indicates what increase in the efficiency of the hardware usage within the context of the performance problem could be achieved. Returns whether the speedup could be set successfully for the message_id .", "keywords": []}, {"id": 423, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::speedup::estimated_speedup", "display_name": "NV::Rules::IFrontend::speedup::estimated_speedup", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7speedupEi11SpeedupTyped", "priority": 1, "content": "virtual  bool  speedup int  message_id SpeedupType  type double  estimated_speedup  =  0 Rule estimated speedup message. Issues an estimated speedup to the frontend associated to the message with ID message_id . When type is SpeedupType::GLOBAL , it indicates what proportional decrease in workload runtime could potentially be achieved, when following the guidelines of the rule. When type is SpeedupType::LOCAL , it indicates what increase in the efficiency of the hardware usage within the context of the performance problem could be achieved. Returns whether the speedup could be set successfully for the message_id .", "keywords": []}, {"id": 424, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::speedup::message_id", "display_name": "NV::Rules::IFrontend::speedup::message_id", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7speedupEi11SpeedupTyped", "priority": 1, "content": "virtual  bool  speedup int  message_id SpeedupType  type double  estimated_speedup  =  0 Rule estimated speedup message. Issues an estimated speedup to the frontend associated to the message with ID message_id . When type is SpeedupType::GLOBAL , it indicates what proportional decrease in workload runtime could potentially be achieved, when following the guidelines of the rule. When type is SpeedupType::LOCAL , it indicates what increase in the efficiency of the hardware usage within the context of the performance problem could be achieved. Returns whether the speedup could be set successfully for the message_id .", "keywords": []}, {"id": 425, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::speedup::type", "display_name": "NV::Rules::IFrontend::speedup::type", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontend7speedupEi11SpeedupTyped", "priority": 1, "content": "virtual  bool  speedup int  message_id SpeedupType  type double  estimated_speedup  =  0 Rule estimated speedup message. Issues an estimated speedup to the frontend associated to the message with ID message_id . When type is SpeedupType::GLOBAL , it indicates what proportional decrease in workload runtime could potentially be achieved, when following the guidelines of the rule. When type is SpeedupType::LOCAL , it indicates what increase in the efficiency of the hardware usage within the context of the performance problem could be achieved. Returns whether the speedup could be set successfully for the message_id .", "keywords": []}, {"id": 426, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "cpp", "name": "NV::Rules::IFrontend::~IFrontend", "display_name": "NV::Rules::IFrontend::~IFrontend", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "_CPPv4N2NV5Rules9IFrontendD0Ev", "priority": 1, "content": "inline  virtual  ~IFrontend", "keywords": []}, {"id": 427, "doc_id": 427, "filename": "api/classNV_1_1Rules_1_1IFrontend.html", "domain_name": "std", "name": "api/classNV_1_1Rules_1_1IFrontend", "display_name": "NV::Rules::IFrontend", "type": "doc", "display_type": "Page", "docname": "api/classNV_1_1Rules_1_1IFrontend", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 428, "doc_id": 437, "filename": "api/classNV_1_1Rules_1_1IMessageVault.html", "domain_name": "cpp", "name": "NV::Rules::IMessageVault", "display_name": "NV::Rules::IMessageVault", "type": "class", "display_type": "C++ class", "docname": "api/classNV_1_1Rules_1_1IMessageVault", "anchor": "_CPPv4N2NV5Rules13IMessageVaultE", "priority": 1, "content": "class  IMessageVault Public Types", "keywords": []}, {"id": 429, "doc_id": 437, "filename": "api/classNV_1_1Rules_1_1IMessageVault.html", "domain_name": "cpp", "name": "NV::Rules::IMessageVault::Dict", "display_name": "NV::Rules::IMessageVault::Dict", "type": "type", "display_type": "C++ type", "docname": "api/classNV_1_1Rules_1_1IMessageVault", "anchor": "_CPPv4N2NV5Rules13IMessageVault4DictE", "priority": 1, "content": "using  Dict  =  IFrontend :: Dict", "keywords": []}, {"id": 430, "doc_id": 437, "filename": "api/classNV_1_1Rules_1_1IMessageVault.html", "domain_name": "cpp", "name": "NV::Rules::IMessageVault::Get", "display_name": "NV::Rules::IMessageVault::Get", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMessageVault", "anchor": "_CPPv4NK2NV5Rules13IMessageVault3GetERK6RuleId", "priority": 1, "content": "virtual  Dict  Get const  RuleId  &amp; ruleId  const  =  0 Retrieve the message associated with ruleId from the vault. In case ruleId is unknown, an empty message is returned.", "keywords": []}, {"id": 431, "doc_id": 437, "filename": "api/classNV_1_1Rules_1_1IMessageVault.html", "domain_name": "cpp", "name": "NV::Rules::IMessageVault::Get::ruleId", "display_name": "NV::Rules::IMessageVault::Get::ruleId", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IMessageVault", "anchor": "_CPPv4NK2NV5Rules13IMessageVault3GetERK6RuleId", "priority": 1, "content": "virtual  Dict  Get const  RuleId  &amp; ruleId  const  =  0 Retrieve the message associated with ruleId from the vault. In case ruleId is unknown, an empty message is returned.", "keywords": []}, {"id": 432, "doc_id": 437, "filename": "api/classNV_1_1Rules_1_1IMessageVault.html", "domain_name": "cpp", "name": "NV::Rules::IMessageVault::Put", "display_name": "NV::Rules::IMessageVault::Put", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMessageVault", "anchor": "_CPPv4N2NV5Rules13IMessageVault3PutERK6RuleIdRK4Dict", "priority": 1, "content": "virtual  void  Put const  RuleId  &amp; ruleId const  Dict  &amp; message  =  0 Commit a message message associated with ruleId to the vault. In case multiple messages associated with the same ruleId are commited, the messages are merged, in such a way that new key-value pairs are added, and values of pre-existing keys are updated.", "keywords": []}, {"id": 433, "doc_id": 437, "filename": "api/classNV_1_1Rules_1_1IMessageVault.html", "domain_name": "cpp", "name": "NV::Rules::IMessageVault::Put::message", "display_name": "NV::Rules::IMessageVault::Put::message", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IMessageVault", "anchor": "_CPPv4N2NV5Rules13IMessageVault3PutERK6RuleIdRK4Dict", "priority": 1, "content": "virtual  void  Put const  RuleId  &amp; ruleId const  Dict  &amp; message  =  0 Commit a message message associated with ruleId to the vault. In case multiple messages associated with the same ruleId are commited, the messages are merged, in such a way that new key-value pairs are added, and values of pre-existing keys are updated.", "keywords": []}, {"id": 434, "doc_id": 437, "filename": "api/classNV_1_1Rules_1_1IMessageVault.html", "domain_name": "cpp", "name": "NV::Rules::IMessageVault::Put::ruleId", "display_name": "NV::Rules::IMessageVault::Put::ruleId", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IMessageVault", "anchor": "_CPPv4N2NV5Rules13IMessageVault3PutERK6RuleIdRK4Dict", "priority": 1, "content": "virtual  void  Put const  RuleId  &amp; ruleId const  Dict  &amp; message  =  0 Commit a message message associated with ruleId to the vault. In case multiple messages associated with the same ruleId are commited, the messages are merged, in such a way that new key-value pairs are added, and values of pre-existing keys are updated.", "keywords": []}, {"id": 435, "doc_id": 437, "filename": "api/classNV_1_1Rules_1_1IMessageVault.html", "domain_name": "cpp", "name": "NV::Rules::IMessageVault::RuleId", "display_name": "NV::Rules::IMessageVault::RuleId", "type": "type", "display_type": "C++ type", "docname": "api/classNV_1_1Rules_1_1IMessageVault", "anchor": "_CPPv4N2NV5Rules13IMessageVault6RuleIdE", "priority": 1, "content": "using  RuleId  =  IFrontend :: RuleId Public Functions", "keywords": []}, {"id": 436, "doc_id": 437, "filename": "api/classNV_1_1Rules_1_1IMessageVault.html", "domain_name": "cpp", "name": "NV::Rules::IMessageVault::~IMessageVault", "display_name": "NV::Rules::IMessageVault::~IMessageVault", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMessageVault", "anchor": "_CPPv4N2NV5Rules13IMessageVaultD0Ev", "priority": 1, "content": "virtual  ~IMessageVault  =  default", "keywords": []}, {"id": 437, "doc_id": 437, "filename": "api/classNV_1_1Rules_1_1IMessageVault.html", "domain_name": "std", "name": "api/classNV_1_1Rules_1_1IMessageVault", "display_name": "NV::Rules::IMessageVault", "type": "doc", "display_type": "Page", "docname": "api/classNV_1_1Rules_1_1IMessageVault", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 438, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric", "display_name": "NV::Rules::IMetric", "type": "class", "display_type": "C++ class", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetricE", "priority": 1, "content": "class  IMetric Metric interface. Represents a single, named metric. A metric can carry one value or multiple, if it is an instanced metric. Public Types", "keywords": []}, {"id": 439, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::Description", "display_name": "NV::Rules::IMetric::Description", "type": "type", "display_type": "C++ type", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric11DescriptionE", "priority": 1, "content": "using  Description  =  std :: string", "keywords": []}, {"id": 440, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype", "display_name": "NV::Rules::IMetric::MetricSubtype", "type": "enum", "display_type": "C++ enum", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtypeE", "priority": 1, "content": "enum  class  MetricSubtype Metric subtype. Indicates the subtype of the metric. Subtypes only apply to hardware metrics. Other metric types have subtype None in Python. Values:", "keywords": []}, {"id": 441, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::MAX_RATE", "display_name": "NV::Rules::IMetric::MetricSubtype::MAX_RATE", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype8MAX_RATEE", "priority": 1, "content": "enumerator  MAX_RATE", "keywords": []}, {"id": 442, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::NONE", "display_name": "NV::Rules::IMetric::MetricSubtype::NONE", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype4NONEE", "priority": 1, "content": "enumerator  NONE", "keywords": []}, {"id": 443, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PCT", "display_name": "NV::Rules::IMetric::MetricSubtype::PCT", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype3PCTE", "priority": 1, "content": "enumerator  PCT", "keywords": []}, {"id": 444, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PCT_OF_PEAK_SUSTAINED_ACTIVE", "display_name": "NV::Rules::IMetric::MetricSubtype::PCT_OF_PEAK_SUSTAINED_ACTIVE", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype28PCT_OF_PEAK_SUSTAINED_ACTIVEE", "priority": 1, "content": "enumerator  PCT_OF_PEAK_SUSTAINED_ACTIVE", "keywords": []}, {"id": 445, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PCT_OF_PEAK_SUSTAINED_ELAPSED", "display_name": "NV::Rules::IMetric::MetricSubtype::PCT_OF_PEAK_SUSTAINED_ELAPSED", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype29PCT_OF_PEAK_SUSTAINED_ELAPSEDE", "priority": 1, "content": "enumerator  PCT_OF_PEAK_SUSTAINED_ELAPSED", "keywords": []}, {"id": 446, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PCT_OF_PEAK_SUSTAINED_FRAME", "display_name": "NV::Rules::IMetric::MetricSubtype::PCT_OF_PEAK_SUSTAINED_FRAME", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype27PCT_OF_PEAK_SUSTAINED_FRAMEE", "priority": 1, "content": "enumerator  PCT_OF_PEAK_SUSTAINED_FRAME", "keywords": []}, {"id": 447, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PCT_OF_PEAK_SUSTAINED_REGION", "display_name": "NV::Rules::IMetric::MetricSubtype::PCT_OF_PEAK_SUSTAINED_REGION", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype28PCT_OF_PEAK_SUSTAINED_REGIONE", "priority": 1, "content": "enumerator  PCT_OF_PEAK_SUSTAINED_REGION", "keywords": []}, {"id": 448, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED", "display_name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype14PEAK_SUSTAINEDE", "priority": 1, "content": "enumerator  PEAK_SUSTAINED", "keywords": []}, {"id": 449, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_ACTIVE", "display_name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_ACTIVE", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype21PEAK_SUSTAINED_ACTIVEE", "priority": 1, "content": "enumerator  PEAK_SUSTAINED_ACTIVE", "keywords": []}, {"id": 450, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_ACTIVE_PER_SECOND", "display_name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_ACTIVE_PER_SECOND", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype32PEAK_SUSTAINED_ACTIVE_PER_SECONDE", "priority": 1, "content": "enumerator  PEAK_SUSTAINED_ACTIVE_PER_SECOND", "keywords": []}, {"id": 451, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_ELAPSED", "display_name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_ELAPSED", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype22PEAK_SUSTAINED_ELAPSEDE", "priority": 1, "content": "enumerator  PEAK_SUSTAINED_ELAPSED", "keywords": []}, {"id": 452, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_ELAPSED_PER_SECOND", "display_name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_ELAPSED_PER_SECOND", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype33PEAK_SUSTAINED_ELAPSED_PER_SECONDE", "priority": 1, "content": "enumerator  PEAK_SUSTAINED_ELAPSED_PER_SECOND", "keywords": []}, {"id": 453, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_FRAME", "display_name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_FRAME", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype20PEAK_SUSTAINED_FRAMEE", "priority": 1, "content": "enumerator  PEAK_SUSTAINED_FRAME", "keywords": []}, {"id": 454, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_FRAME_PER_SECOND", "display_name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_FRAME_PER_SECOND", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype31PEAK_SUSTAINED_FRAME_PER_SECONDE", "priority": 1, "content": "enumerator  PEAK_SUSTAINED_FRAME_PER_SECOND", "keywords": []}, {"id": 455, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_REGION", "display_name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_REGION", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype21PEAK_SUSTAINED_REGIONE", "priority": 1, "content": "enumerator  PEAK_SUSTAINED_REGION", "keywords": []}, {"id": 456, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_REGION_PER_SECOND", "display_name": "NV::Rules::IMetric::MetricSubtype::PEAK_SUSTAINED_REGION_PER_SECOND", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype32PEAK_SUSTAINED_REGION_PER_SECONDE", "priority": 1, "content": "enumerator  PEAK_SUSTAINED_REGION_PER_SECOND", "keywords": []}, {"id": 457, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PER_CYCLE_ACTIVE", "display_name": "NV::Rules::IMetric::MetricSubtype::PER_CYCLE_ACTIVE", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype16PER_CYCLE_ACTIVEE", "priority": 1, "content": "enumerator  PER_CYCLE_ACTIVE", "keywords": []}, {"id": 458, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PER_CYCLE_ELAPSED", "display_name": "NV::Rules::IMetric::MetricSubtype::PER_CYCLE_ELAPSED", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype17PER_CYCLE_ELAPSEDE", "priority": 1, "content": "enumerator  PER_CYCLE_ELAPSED", "keywords": []}, {"id": 459, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PER_CYCLE_IN_FRAME", "display_name": "NV::Rules::IMetric::MetricSubtype::PER_CYCLE_IN_FRAME", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype18PER_CYCLE_IN_FRAMEE", "priority": 1, "content": "enumerator  PER_CYCLE_IN_FRAME", "keywords": []}, {"id": 460, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PER_CYCLE_IN_REGION", "display_name": "NV::Rules::IMetric::MetricSubtype::PER_CYCLE_IN_REGION", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype19PER_CYCLE_IN_REGIONE", "priority": 1, "content": "enumerator  PER_CYCLE_IN_REGION", "keywords": []}, {"id": 461, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::PER_SECOND", "display_name": "NV::Rules::IMetric::MetricSubtype::PER_SECOND", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype10PER_SECONDE", "priority": 1, "content": "enumerator  PER_SECOND", "keywords": []}, {"id": 462, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricSubtype::RATIO", "display_name": "NV::Rules::IMetric::MetricSubtype::RATIO", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13MetricSubtype5RATIOE", "priority": 1, "content": "enumerator  RATIO", "keywords": []}, {"id": 463, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricType", "display_name": "NV::Rules::IMetric::MetricType", "type": "enum", "display_type": "C++ enum", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric10MetricTypeE", "priority": 1, "content": "enum  class  MetricType Metric type. Indicates the type of the metric. May take values COUNTER , RATIO or THROUGHPUT for hardware metrics, and OTHER for all other types (e.g. launch metrics, device attributes). Values:", "keywords": []}, {"id": 464, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricType::COUNTER", "display_name": "NV::Rules::IMetric::MetricType::COUNTER", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric10MetricType7COUNTERE", "priority": 1, "content": "enumerator  COUNTER", "keywords": []}, {"id": 465, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricType::OTHER", "display_name": "NV::Rules::IMetric::MetricType::OTHER", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric10MetricType5OTHERE", "priority": 1, "content": "enumerator  OTHER", "keywords": []}, {"id": 466, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricType::RATIO", "display_name": "NV::Rules::IMetric::MetricType::RATIO", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric10MetricType5RATIOE", "priority": 1, "content": "enumerator  RATIO", "keywords": []}, {"id": 467, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::MetricType::THROUGHPUT", "display_name": "NV::Rules::IMetric::MetricType::THROUGHPUT", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric10MetricType10THROUGHPUTE", "priority": 1, "content": "enumerator  THROUGHPUT", "keywords": []}, {"id": 468, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::RollupOperation", "display_name": "NV::Rules::IMetric::RollupOperation", "type": "enum", "display_type": "C++ enum", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric15RollupOperationE", "priority": 1, "content": "enum  class  RollupOperation Rollup Operation. Indicates the type of operation that is used to accumulate multiple values of a given metric. May take values AVG , MAX , MIN and SUM , as well as None , if no such operation is specified. Values:", "keywords": []}, {"id": 469, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::RollupOperation::AVG", "display_name": "NV::Rules::IMetric::RollupOperation::AVG", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric15RollupOperation3AVGE", "priority": 1, "content": "enumerator  AVG", "keywords": []}, {"id": 470, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::RollupOperation::MAX", "display_name": "NV::Rules::IMetric::RollupOperation::MAX", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric15RollupOperation3MAXE", "priority": 1, "content": "enumerator  MAX", "keywords": []}, {"id": 471, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::RollupOperation::MIN", "display_name": "NV::Rules::IMetric::RollupOperation::MIN", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric15RollupOperation3MINE", "priority": 1, "content": "enumerator  MIN", "keywords": []}, {"id": 472, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::RollupOperation::NONE", "display_name": "NV::Rules::IMetric::RollupOperation::NONE", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric15RollupOperation4NONEE", "priority": 1, "content": "enumerator  NONE", "keywords": []}, {"id": 473, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::RollupOperation::SUM", "display_name": "NV::Rules::IMetric::RollupOperation::SUM", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric15RollupOperation3SUME", "priority": 1, "content": "enumerator  SUM", "keywords": []}, {"id": 474, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::Unit", "display_name": "NV::Rules::IMetric::Unit", "type": "type", "display_type": "C++ type", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric4UnitE", "priority": 1, "content": "using  Unit  =  std :: string", "keywords": []}, {"id": 475, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::ValueKind", "display_name": "NV::Rules::IMetric::ValueKind", "type": "enum", "display_type": "C++ enum", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9ValueKindE", "priority": 1, "content": "enum  class  ValueKind Metric value kind. Indicates the kind (or type) of a metric value, metric instance value or correlation ID. Values:", "keywords": []}, {"id": 476, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::ValueKind::ANY", "display_name": "NV::Rules::IMetric::ValueKind::ANY", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9ValueKind3ANYE", "priority": 1, "content": "enumerator  ANY", "keywords": []}, {"id": 477, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::ValueKind::DOUBLE", "display_name": "NV::Rules::IMetric::ValueKind::DOUBLE", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9ValueKind6DOUBLEE", "priority": 1, "content": "enumerator  DOUBLE", "keywords": []}, {"id": 478, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::ValueKind::FLOAT", "display_name": "NV::Rules::IMetric::ValueKind::FLOAT", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9ValueKind5FLOATE", "priority": 1, "content": "enumerator  FLOAT", "keywords": []}, {"id": 479, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::ValueKind::STRING", "display_name": "NV::Rules::IMetric::ValueKind::STRING", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9ValueKind6STRINGE", "priority": 1, "content": "enumerator  STRING", "keywords": []}, {"id": 480, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::ValueKind::UINT32", "display_name": "NV::Rules::IMetric::ValueKind::UINT32", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9ValueKind6UINT32E", "priority": 1, "content": "enumerator  UINT32", "keywords": []}, {"id": 481, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::ValueKind::UINT64", "display_name": "NV::Rules::IMetric::ValueKind::UINT64", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9ValueKind6UINT64E", "priority": 1, "content": "enumerator  UINT64 Public Functions", "keywords": []}, {"id": 482, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::ValueKind::UNKNOWN", "display_name": "NV::Rules::IMetric::ValueKind::UNKNOWN", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9ValueKind7UNKNOWNE", "priority": 1, "content": "enumerator  UNKNOWN", "keywords": []}, {"id": 483, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::as_double", "display_name": "NV::Rules::IMetric::as_double", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9as_doubleE6size_t", "priority": 1, "content": "virtual  double  as_double size_t  instance  =  0 Get a value from the metric instances as double. Get the value of metric instance instance as double", "keywords": []}, {"id": 484, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::as_double", "display_name": "NV::Rules::IMetric::as_double", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9as_doubleEv", "priority": 1, "content": "virtual  double  as_double  =  0 Get the metric value as double. Get the metric value as double", "keywords": []}, {"id": 485, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::as_double::instance", "display_name": "NV::Rules::IMetric::as_double::instance", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9as_doubleE6size_t", "priority": 1, "content": "virtual  double  as_double size_t  instance  =  0 Get a value from the metric instances as double. Get the value of metric instance instance as double", "keywords": []}, {"id": 486, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::as_string", "display_name": "NV::Rules::IMetric::as_string", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9as_stringE6size_t", "priority": 1, "content": "virtual  const  char  * as_string size_t  instance  =  0 Get a value from the metric instances as string. Get the value of metric instance instance as string", "keywords": []}, {"id": 487, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::as_string", "display_name": "NV::Rules::IMetric::as_string", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9as_stringEv", "priority": 1, "content": "virtual  const  char  * as_string  =  0 Get the metric value as string. Get the metric value as string", "keywords": []}, {"id": 488, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::as_string::instance", "display_name": "NV::Rules::IMetric::as_string::instance", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9as_stringE6size_t", "priority": 1, "content": "virtual  const  char  * as_string size_t  instance  =  0 Get a value from the metric instances as string. Get the value of metric instance instance as string", "keywords": []}, {"id": 489, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::as_uint64", "display_name": "NV::Rules::IMetric::as_uint64", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9as_uint64E6size_t", "priority": 1, "content": "virtual  uint64_t  as_uint64 size_t  instance  =  0 Get a value from the metric instances as 64bit integer. Get the value of metric instance instance as 64bit integer", "keywords": []}, {"id": 490, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::as_uint64", "display_name": "NV::Rules::IMetric::as_uint64", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9as_uint64Ev", "priority": 1, "content": "virtual  uint64_t  as_uint64  =  0 Get the metric value as 64bit integer. Get the metric value as 64bit integer", "keywords": []}, {"id": 491, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::as_uint64::instance", "display_name": "NV::Rules::IMetric::as_uint64::instance", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9as_uint64E6size_t", "priority": 1, "content": "virtual  uint64_t  as_uint64 size_t  instance  =  0 Get a value from the metric instances as 64bit integer. Get the value of metric instance instance as 64bit integer", "keywords": []}, {"id": 492, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::correlation_ids", "display_name": "NV::Rules::IMetric::correlation_ids", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric15correlation_idsEv", "priority": 1, "content": "virtual  IMetric  * correlation_ids  =  0 Get a metric object for this metric\u2019s instance value\u2019s correlation IDs. Returns a new metric object representing the correlation IDs for the metric\u2019s instance values. Use has_correlation_ids to check if this metric has correlation IDs for its instance values. Correlation IDs are used to associate instance values with the \u201cinstance\u201d their value represents. In the returned new metric object, the correlation IDs are that object\u2019s instance values.", "keywords": []}, {"id": 493, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::description", "display_name": "NV::Rules::IMetric::description", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4NK2NV5Rules7IMetric11descriptionEv", "priority": 1, "content": "virtual  Description  description  const  =  0 Get the metric description. Get a textual description of the metric", "keywords": []}, {"id": 494, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::has_correlation_ids", "display_name": "NV::Rules::IMetric::has_correlation_ids", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric19has_correlation_idsEv", "priority": 1, "content": "virtual  bool  has_correlation_ids  =  0 Check if the metric has correlation IDs. If true, the metric has correlation IDs matching its instance values, which can be accessed using correlation_ids .", "keywords": []}, {"id": 495, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::has_value", "display_name": "NV::Rules::IMetric::has_value", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9has_valueE9ValueKind", "priority": 1, "content": "virtual  bool  has_value ValueKind  value_kind  =  0 Check if the metric has a non-instanced value of a specific kind. Check if the metric has a value of a specific value_kind . If true, the metric may still have instance values, too.", "keywords": []}, {"id": 496, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::has_value", "display_name": "NV::Rules::IMetric::has_value", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9has_valueEv", "priority": 1, "content": "virtual  bool  has_value  =  0 Check if the metric has a non-instanced value. Check if the metric has a non-instanced value. If true, the metric may still have instance values, too.", "keywords": []}, {"id": 497, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::has_value::value_kind", "display_name": "NV::Rules::IMetric::has_value::value_kind", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric9has_valueE9ValueKind", "priority": 1, "content": "virtual  bool  has_value ValueKind  value_kind  =  0 Check if the metric has a non-instanced value of a specific kind. Check if the metric has a value of a specific value_kind . If true, the metric may still have instance values, too.", "keywords": []}, {"id": 498, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::kind", "display_name": "NV::Rules::IMetric::kind", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric4kindE6size_t", "priority": 1, "content": "virtual  ValueKind  kind size_t  instance  =  0 Get the metric value kind for an instance value. Get the metric value kind for its instance value instance . num_instances can be used to check the total number of instance values.", "keywords": []}, {"id": 499, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::kind", "display_name": "NV::Rules::IMetric::kind", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric4kindEv", "priority": 1, "content": "virtual  ValueKind  kind  =  0 Get the metric value kind. Get the metric value kind", "keywords": []}, {"id": 500, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::kind::instance", "display_name": "NV::Rules::IMetric::kind::instance", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric4kindE6size_t", "priority": 1, "content": "virtual  ValueKind  kind size_t  instance  =  0 Get the metric value kind for an instance value. Get the metric value kind for its instance value instance . num_instances can be used to check the total number of instance values.", "keywords": []}, {"id": 501, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::metric_subtype", "display_name": "NV::Rules::IMetric::metric_subtype", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4NK2NV5Rules7IMetric14metric_subtypeEv", "priority": 1, "content": "virtual  MetricSubtype  metric_subtype  const  =  0 Get the metric subtype. Get the metric subtype", "keywords": []}, {"id": 502, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::metric_type", "display_name": "NV::Rules::IMetric::metric_type", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4NK2NV5Rules7IMetric11metric_typeEv", "priority": 1, "content": "virtual  MetricType  metric_type  const  =  0 Get the metric type. Get the metric type", "keywords": []}, {"id": 503, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::name", "display_name": "NV::Rules::IMetric::name", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric4nameEv", "priority": 1, "content": "virtual  const  char  * name  =  0 Get the metric name. Get the metric name", "keywords": []}, {"id": 504, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::num_instances", "display_name": "NV::Rules::IMetric::num_instances", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetric13num_instancesEv", "priority": 1, "content": "virtual  size_t  num_instances  =  0 Get the number of instance values for this metric. Get the number of instance values for this metric. Not all metrics have instance values. If a metric has instance values, it may also have correlation_ids matching these instance values.", "keywords": []}, {"id": 505, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::rollup_operation", "display_name": "NV::Rules::IMetric::rollup_operation", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4NK2NV5Rules7IMetric16rollup_operationEv", "priority": 1, "content": "virtual  RollupOperation  rollup_operation  const  =  0 Get the type of rollup operation. Get the type of rollup operation", "keywords": []}, {"id": 506, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::unit", "display_name": "NV::Rules::IMetric::unit", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4NK2NV5Rules7IMetric4unitEv", "priority": 1, "content": "virtual  Unit  unit  const  =  0 Get the metric unit. Get the metric unit", "keywords": []}, {"id": 507, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "cpp", "name": "NV::Rules::IMetric::~IMetric", "display_name": "NV::Rules::IMetric::~IMetric", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "_CPPv4N2NV5Rules7IMetricD0Ev", "priority": 1, "content": "inline  virtual  ~IMetric", "keywords": []}, {"id": 508, "doc_id": 508, "filename": "api/classNV_1_1Rules_1_1IMetric.html", "domain_name": "std", "name": "api/classNV_1_1Rules_1_1IMetric", "display_name": "NV::Rules::IMetric", "type": "doc", "display_type": "Page", "docname": "api/classNV_1_1Rules_1_1IMetric", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 509, "doc_id": 518, "filename": "api/classNV_1_1Rules_1_1INvtxDomainInfo.html", "domain_name": "cpp", "name": "NV::Rules::INvtxDomainInfo", "display_name": "NV::Rules::INvtxDomainInfo", "type": "class", "display_type": "C++ class", "docname": "api/classNV_1_1Rules_1_1INvtxDomainInfo", "anchor": "_CPPv4N2NV5Rules15INvtxDomainInfoE", "priority": 1, "content": "class  INvtxDomainInfo NvtxDomainInfo interface. Represents a single NVTX domain of the NVTX state, including all ranges associated with this domain. Public Functions", "keywords": []}, {"id": 510, "doc_id": 518, "filename": "api/classNV_1_1Rules_1_1INvtxDomainInfo.html", "domain_name": "cpp", "name": "NV::Rules::INvtxDomainInfo::name", "display_name": "NV::Rules::INvtxDomainInfo::name", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxDomainInfo", "anchor": "_CPPv4N2NV5Rules15INvtxDomainInfo4nameEv", "priority": 1, "content": "virtual  std :: string  name  =  0 Get the domain name. Get the name of this domain", "keywords": []}, {"id": 511, "doc_id": 518, "filename": "api/classNV_1_1Rules_1_1INvtxDomainInfo.html", "domain_name": "cpp", "name": "NV::Rules::INvtxDomainInfo::push_pop_range", "display_name": "NV::Rules::INvtxDomainInfo::push_pop_range", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxDomainInfo", "anchor": "_CPPv4N2NV5Rules15INvtxDomainInfo14push_pop_rangeE6size_t", "priority": 1, "content": "virtual  INvtxRange  * push_pop_range size_t  idx  =  0 Get a push/pop range object. Get a push/pop INvtxRange object by it\u2019s running index \\idx. The index is identical to the range\u2019s order on the call stack.", "keywords": []}, {"id": 512, "doc_id": 518, "filename": "api/classNV_1_1Rules_1_1INvtxDomainInfo.html", "domain_name": "cpp", "name": "NV::Rules::INvtxDomainInfo::push_pop_range::idx", "display_name": "NV::Rules::INvtxDomainInfo::push_pop_range::idx", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1INvtxDomainInfo", "anchor": "_CPPv4N2NV5Rules15INvtxDomainInfo14push_pop_rangeE6size_t", "priority": 1, "content": "virtual  INvtxRange  * push_pop_range size_t  idx  =  0 Get a push/pop range object. Get a push/pop INvtxRange object by it\u2019s running index \\idx. The index is identical to the range\u2019s order on the call stack.", "keywords": []}, {"id": 513, "doc_id": 518, "filename": "api/classNV_1_1Rules_1_1INvtxDomainInfo.html", "domain_name": "cpp", "name": "NV::Rules::INvtxDomainInfo::push_pop_ranges", "display_name": "NV::Rules::INvtxDomainInfo::push_pop_ranges", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxDomainInfo", "anchor": "_CPPv4N2NV5Rules15INvtxDomainInfo15push_pop_rangesEv", "priority": 1, "content": "virtual  std :: vector &lt; std :: string &gt;  push_pop_ranges  =  0 Get the list of push/pop range names. Get the sorted list of stacked push/pop range names in this domain, associated with the current INvtxState .", "keywords": []}, {"id": 514, "doc_id": 518, "filename": "api/classNV_1_1Rules_1_1INvtxDomainInfo.html", "domain_name": "cpp", "name": "NV::Rules::INvtxDomainInfo::start_end_range", "display_name": "NV::Rules::INvtxDomainInfo::start_end_range", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxDomainInfo", "anchor": "_CPPv4N2NV5Rules15INvtxDomainInfo15start_end_rangeE6size_t", "priority": 1, "content": "virtual  INvtxRange  * start_end_range size_t  idx  =  0 Get a start/end range object. Get a start/end INvtxRange object by it\u2019s running index \\idx.", "keywords": []}, {"id": 515, "doc_id": 518, "filename": "api/classNV_1_1Rules_1_1INvtxDomainInfo.html", "domain_name": "cpp", "name": "NV::Rules::INvtxDomainInfo::start_end_range::idx", "display_name": "NV::Rules::INvtxDomainInfo::start_end_range::idx", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1INvtxDomainInfo", "anchor": "_CPPv4N2NV5Rules15INvtxDomainInfo15start_end_rangeE6size_t", "priority": 1, "content": "virtual  INvtxRange  * start_end_range size_t  idx  =  0 Get a start/end range object. Get a start/end INvtxRange object by it\u2019s running index \\idx.", "keywords": []}, {"id": 516, "doc_id": 518, "filename": "api/classNV_1_1Rules_1_1INvtxDomainInfo.html", "domain_name": "cpp", "name": "NV::Rules::INvtxDomainInfo::start_end_ranges", "display_name": "NV::Rules::INvtxDomainInfo::start_end_ranges", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxDomainInfo", "anchor": "_CPPv4N2NV5Rules15INvtxDomainInfo16start_end_rangesEv", "priority": 1, "content": "virtual  std :: vector &lt; std :: string &gt;  start_end_ranges  =  0 Get the list of start/end range names. Get the list of start/end range names in this domain, associated with the current INvtxState .", "keywords": []}, {"id": 517, "doc_id": 518, "filename": "api/classNV_1_1Rules_1_1INvtxDomainInfo.html", "domain_name": "cpp", "name": "NV::Rules::INvtxDomainInfo::~INvtxDomainInfo", "display_name": "NV::Rules::INvtxDomainInfo::~INvtxDomainInfo", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxDomainInfo", "anchor": "_CPPv4N2NV5Rules15INvtxDomainInfoD0Ev", "priority": 1, "content": "inline  virtual  ~INvtxDomainInfo", "keywords": []}, {"id": 518, "doc_id": 518, "filename": "api/classNV_1_1Rules_1_1INvtxDomainInfo.html", "domain_name": "std", "name": "api/classNV_1_1Rules_1_1INvtxDomainInfo", "display_name": "NV::Rules::INvtxDomainInfo", "type": "doc", "display_type": "Page", "docname": "api/classNV_1_1Rules_1_1INvtxDomainInfo", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 519, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange", "display_name": "NV::Rules::INvtxRange", "type": "class", "display_type": "C++ class", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4N2NV5Rules10INvtxRangeE", "priority": 1, "content": "class  INvtxRange NvtxRange interface. Represents a single NVTX Push/Pop or Start/End range. Public Types", "keywords": []}, {"id": 520, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::PayloadType", "display_name": "NV::Rules::INvtxRange::PayloadType", "type": "enum", "display_type": "C++ enum", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4N2NV5Rules10INvtxRange11PayloadTypeE", "priority": 1, "content": "enum  class  PayloadType NVTX payload type. Type of an NVTX payload. This can be used to choose the appropriate payload_as_* function. Values:", "keywords": []}, {"id": 521, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_DOUBLE", "display_name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_DOUBLE", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4N2NV5Rules10INvtxRange11PayloadType14PAYLOAD_DOUBLEE", "priority": 1, "content": "enumerator  PAYLOAD_DOUBLE", "keywords": []}, {"id": 522, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_FLOAT", "display_name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_FLOAT", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4N2NV5Rules10INvtxRange11PayloadType13PAYLOAD_FLOATE", "priority": 1, "content": "enumerator  PAYLOAD_FLOAT", "keywords": []}, {"id": 523, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_INT32", "display_name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_INT32", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4N2NV5Rules10INvtxRange11PayloadType13PAYLOAD_INT32E", "priority": 1, "content": "enumerator  PAYLOAD_INT32", "keywords": []}, {"id": 524, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_INT64", "display_name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_INT64", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4N2NV5Rules10INvtxRange11PayloadType13PAYLOAD_INT64E", "priority": 1, "content": "enumerator  PAYLOAD_INT64", "keywords": []}, {"id": 525, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_JSON", "display_name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_JSON", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4N2NV5Rules10INvtxRange11PayloadType12PAYLOAD_JSONE", "priority": 1, "content": "enumerator  PAYLOAD_JSON Public Functions", "keywords": []}, {"id": 526, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_UINT32", "display_name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_UINT32", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4N2NV5Rules10INvtxRange11PayloadType14PAYLOAD_UINT32E", "priority": 1, "content": "enumerator  PAYLOAD_UINT32", "keywords": []}, {"id": 527, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_UINT64", "display_name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_UINT64", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4N2NV5Rules10INvtxRange11PayloadType14PAYLOAD_UINT64E", "priority": 1, "content": "enumerator  PAYLOAD_UINT64", "keywords": []}, {"id": 528, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_UNKNOWN", "display_name": "NV::Rules::INvtxRange::PayloadType::PAYLOAD_UNKNOWN", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4N2NV5Rules10INvtxRange11PayloadType15PAYLOAD_UNKNOWNE", "priority": 1, "content": "enumerator  PAYLOAD_UNKNOWN", "keywords": []}, {"id": 529, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::category", "display_name": "NV::Rules::INvtxRange::category", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4NK2NV5Rules10INvtxRange8categoryEv", "priority": 1, "content": "virtual  uint32_t  category  const  =  0 Get the category attribute value. Get the category attribute value. If INvtxRange::has_attributes returns false, the returned value is undefined.", "keywords": []}, {"id": 530, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::color", "display_name": "NV::Rules::INvtxRange::color", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4NK2NV5Rules10INvtxRange5colorEv", "priority": 1, "content": "virtual  uint32_t  color  const  =  0 Get the color attribute value. Get the color attribute value. If INvtxRange::has_attributes returns false, the returned value is undefined.", "keywords": []}, {"id": 531, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::has_attributes", "display_name": "NV::Rules::INvtxRange::has_attributes", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4NK2NV5Rules10INvtxRange14has_attributesEv", "priority": 1, "content": "virtual  bool  has_attributes  const  =  0 Check if the range has event attributes. Check if the range has event attributes, such as INvtxRange::color , INvtxRange::category , or INvtxRange::message .", "keywords": []}, {"id": 532, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::message", "display_name": "NV::Rules::INvtxRange::message", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4NK2NV5Rules10INvtxRange7messageEv", "priority": 1, "content": "virtual  std :: string  message  const  =  0 Get the message attribute value. Get the message attribute value. If INvtxRange::has_attributes returns false, the returned value is undefined.", "keywords": []}, {"id": 533, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::name", "display_name": "NV::Rules::INvtxRange::name", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4NK2NV5Rules10INvtxRange4nameEv", "priority": 1, "content": "virtual  std :: string  name  const  =  0 Get the range name. Get the name of this range", "keywords": []}, {"id": 534, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::payload_as_double", "display_name": "NV::Rules::INvtxRange::payload_as_double", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4NK2NV5Rules10INvtxRange17payload_as_doubleEv", "priority": 1, "content": "virtual  double  payload_as_double  const  =  0 Get the payload attribute value as double (64-bit floating point) Get the payload attribute value as a double (64-bit floating point). If the INvtxRange::payload_type cannot be casted to double, the returned value is undefined. If INvtxRange::has_attributes returns false, the returned value is undefined.", "keywords": []}, {"id": 535, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::payload_as_string", "display_name": "NV::Rules::INvtxRange::payload_as_string", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4NK2NV5Rules10INvtxRange17payload_as_stringEv", "priority": 1, "content": "virtual  std :: string  payload_as_string  const  =  0 Get the payload attribute value as a string. Get the payload attribute value as a string. If the INvtxRange::payload_type cannot be casted to a string, the returned value is undefined. If INvtxRange::has_attributes returns false, the returned value is undefined.", "keywords": []}, {"id": 536, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::payload_as_uint64", "display_name": "NV::Rules::INvtxRange::payload_as_uint64", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4NK2NV5Rules10INvtxRange17payload_as_uint64Ev", "priority": 1, "content": "virtual  uint64_t  payload_as_uint64  const  =  0 Get the payload attribute value as uint64. Get the payload attribute value as a uint64. If the INvtxRange::payload_type cannot be casted to uint64, the returned value is undefined. If INvtxRange::has_attributes returns false, the returned value is undefined.", "keywords": []}, {"id": 537, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::payload_type", "display_name": "NV::Rules::INvtxRange::payload_type", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4NK2NV5Rules10INvtxRange12payload_typeEv", "priority": 1, "content": "virtual  PayloadType  payload_type  const  =  0 Get the payload attribute type. Get the payload attribute type. If INvtxRange::has_attributes returns false, the returned type is undefined.", "keywords": []}, {"id": 538, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "cpp", "name": "NV::Rules::INvtxRange::~INvtxRange", "display_name": "NV::Rules::INvtxRange::~INvtxRange", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "_CPPv4N2NV5Rules10INvtxRangeD0Ev", "priority": 1, "content": "inline  virtual  ~INvtxRange", "keywords": []}, {"id": 539, "doc_id": 539, "filename": "api/classNV_1_1Rules_1_1INvtxRange.html", "domain_name": "std", "name": "api/classNV_1_1Rules_1_1INvtxRange", "display_name": "NV::Rules::INvtxRange", "type": "doc", "display_type": "Page", "docname": "api/classNV_1_1Rules_1_1INvtxRange", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 540, "doc_id": 545, "filename": "api/classNV_1_1Rules_1_1INvtxState.html", "domain_name": "cpp", "name": "NV::Rules::INvtxState", "display_name": "NV::Rules::INvtxState", "type": "class", "display_type": "C++ class", "docname": "api/classNV_1_1Rules_1_1INvtxState", "anchor": "_CPPv4N2NV5Rules10INvtxStateE", "priority": 1, "content": "class  INvtxState NvtxState interface. Represents the NVTX (Nvidia Tools Extensions) state associated with a single action. Public Functions", "keywords": []}, {"id": 541, "doc_id": 545, "filename": "api/classNV_1_1Rules_1_1INvtxState.html", "domain_name": "cpp", "name": "NV::Rules::INvtxState::domain_by_id", "display_name": "NV::Rules::INvtxState::domain_by_id", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxState", "anchor": "_CPPv4N2NV5Rules10INvtxState12domain_by_idEy", "priority": 1, "content": "virtual  INvtxDomainInfo  * domain_by_id long  long  unsigned  id  =  0 Get a domain by ID. Get a single domain info object by its ID. Use INvtxState::domains to retrieve the list of valid domain IDs.", "keywords": []}, {"id": 542, "doc_id": 545, "filename": "api/classNV_1_1Rules_1_1INvtxState.html", "domain_name": "cpp", "name": "NV::Rules::INvtxState::domain_by_id::id", "display_name": "NV::Rules::INvtxState::domain_by_id::id", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1INvtxState", "anchor": "_CPPv4N2NV5Rules10INvtxState12domain_by_idEy", "priority": 1, "content": "virtual  INvtxDomainInfo  * domain_by_id long  long  unsigned  id  =  0 Get a domain by ID. Get a single domain info object by its ID. Use INvtxState::domains to retrieve the list of valid domain IDs.", "keywords": []}, {"id": 543, "doc_id": 545, "filename": "api/classNV_1_1Rules_1_1INvtxState.html", "domain_name": "cpp", "name": "NV::Rules::INvtxState::domains", "display_name": "NV::Rules::INvtxState::domains", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxState", "anchor": "_CPPv4N2NV5Rules10INvtxState7domainsEv", "priority": 1, "content": "virtual  std :: vector &lt; long  long  unsigned &gt;  domains  =  0 Get the domain IDs. Get the list of domain IDs in this state", "keywords": []}, {"id": 544, "doc_id": 545, "filename": "api/classNV_1_1Rules_1_1INvtxState.html", "domain_name": "cpp", "name": "NV::Rules::INvtxState::~INvtxState", "display_name": "NV::Rules::INvtxState::~INvtxState", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1INvtxState", "anchor": "_CPPv4N2NV5Rules10INvtxStateD0Ev", "priority": 1, "content": "inline  virtual  ~INvtxState", "keywords": []}, {"id": 545, "doc_id": 545, "filename": "api/classNV_1_1Rules_1_1INvtxState.html", "domain_name": "std", "name": "api/classNV_1_1Rules_1_1INvtxState", "display_name": "NV::Rules::INvtxState", "type": "doc", "display_type": "Page", "docname": "api/classNV_1_1Rules_1_1INvtxState", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 546, "doc_id": 554, "filename": "api/classNV_1_1Rules_1_1IRange.html", "domain_name": "cpp", "name": "NV::Rules::IRange", "display_name": "NV::Rules::IRange", "type": "class", "display_type": "C++ class", "docname": "api/classNV_1_1Rules_1_1IRange", "anchor": "_CPPv4N2NV5Rules6IRangeE", "priority": 1, "content": "class  IRange Range interface. Represents a serial, ordered stream of execution, such as a CUDA stream. It holds one or more actions that were logically executing in this range Public Functions", "keywords": []}, {"id": 547, "doc_id": 554, "filename": "api/classNV_1_1Rules_1_1IRange.html", "domain_name": "cpp", "name": "NV::Rules::IRange::action_by_idx", "display_name": "NV::Rules::IRange::action_by_idx", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IRange", "anchor": "_CPPv4N2NV5Rules6IRange13action_by_idxE6size_t", "priority": 1, "content": "virtual  IAction  * action_by_idx size_t  idx  =  0 Get a single action by index. Get a single IAction by index idx .", "keywords": []}, {"id": 548, "doc_id": 554, "filename": "api/classNV_1_1Rules_1_1IRange.html", "domain_name": "cpp", "name": "NV::Rules::IRange::action_by_idx::idx", "display_name": "NV::Rules::IRange::action_by_idx::idx", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IRange", "anchor": "_CPPv4N2NV5Rules6IRange13action_by_idxE6size_t", "priority": 1, "content": "virtual  IAction  * action_by_idx size_t  idx  =  0 Get a single action by index. Get a single IAction by index idx .", "keywords": []}, {"id": 549, "doc_id": 554, "filename": "api/classNV_1_1Rules_1_1IRange.html", "domain_name": "cpp", "name": "NV::Rules::IRange::actions_by_nvtx", "display_name": "NV::Rules::IRange::actions_by_nvtx", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IRange", "anchor": "_CPPv4N2NV5Rules6IRange15actions_by_nvtxERKNSt6vectorINSt6stringEEERKNSt6vectorINSt6stringEEE", "priority": 1, "content": "virtual  std :: set &lt; long  long  unsigned &gt;  actions_by_nvtx const  std :: vector &lt; std :: string &gt;  &amp; includes const  std :: vector &lt; std :: string &gt;  &amp; excludes  =  0 Get a set of actions by their NVTX state. Get a set of indizes to IAction objects by their NVTX state. The state is defined using a series of includes and excludes .", "keywords": []}, {"id": 550, "doc_id": 554, "filename": "api/classNV_1_1Rules_1_1IRange.html", "domain_name": "cpp", "name": "NV::Rules::IRange::actions_by_nvtx::excludes", "display_name": "NV::Rules::IRange::actions_by_nvtx::excludes", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IRange", "anchor": "_CPPv4N2NV5Rules6IRange15actions_by_nvtxERKNSt6vectorINSt6stringEEERKNSt6vectorINSt6stringEEE", "priority": 1, "content": "virtual  std :: set &lt; long  long  unsigned &gt;  actions_by_nvtx const  std :: vector &lt; std :: string &gt;  &amp; includes const  std :: vector &lt; std :: string &gt;  &amp; excludes  =  0 Get a set of actions by their NVTX state. Get a set of indizes to IAction objects by their NVTX state. The state is defined using a series of includes and excludes .", "keywords": []}, {"id": 551, "doc_id": 554, "filename": "api/classNV_1_1Rules_1_1IRange.html", "domain_name": "cpp", "name": "NV::Rules::IRange::actions_by_nvtx::includes", "display_name": "NV::Rules::IRange::actions_by_nvtx::includes", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/classNV_1_1Rules_1_1IRange", "anchor": "_CPPv4N2NV5Rules6IRange15actions_by_nvtxERKNSt6vectorINSt6stringEEERKNSt6vectorINSt6stringEEE", "priority": 1, "content": "virtual  std :: set &lt; long  long  unsigned &gt;  actions_by_nvtx const  std :: vector &lt; std :: string &gt;  &amp; includes const  std :: vector &lt; std :: string &gt;  &amp; excludes  =  0 Get a set of actions by their NVTX state. Get a set of indizes to IAction objects by their NVTX state. The state is defined using a series of includes and excludes .", "keywords": []}, {"id": 552, "doc_id": 554, "filename": "api/classNV_1_1Rules_1_1IRange.html", "domain_name": "cpp", "name": "NV::Rules::IRange::num_actions", "display_name": "NV::Rules::IRange::num_actions", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IRange", "anchor": "_CPPv4N2NV5Rules6IRange11num_actionsEv", "priority": 1, "content": "virtual  size_t  num_actions  =  0 Get the number of actions. Get the number of available actions in this range", "keywords": []}, {"id": 553, "doc_id": 554, "filename": "api/classNV_1_1Rules_1_1IRange.html", "domain_name": "cpp", "name": "NV::Rules::IRange::~IRange", "display_name": "NV::Rules::IRange::~IRange", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1IRange", "anchor": "_CPPv4N2NV5Rules6IRangeD0Ev", "priority": 1, "content": "inline  virtual  ~IRange", "keywords": []}, {"id": 554, "doc_id": 554, "filename": "api/classNV_1_1Rules_1_1IRange.html", "domain_name": "std", "name": "api/classNV_1_1Rules_1_1IRange", "display_name": "NV::Rules::IRange", "type": "doc", "display_type": "Page", "docname": "api/classNV_1_1Rules_1_1IRange", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 555, "doc_id": 559, "filename": "api/classNV_1_1Rules_1_1ISourceInfo.html", "domain_name": "cpp", "name": "NV::Rules::ISourceInfo", "display_name": "NV::Rules::ISourceInfo", "type": "class", "display_type": "C++ class", "docname": "api/classNV_1_1Rules_1_1ISourceInfo", "anchor": "_CPPv4N2NV5Rules11ISourceInfoE", "priority": 1, "content": "class  ISourceInfo SourceInfo interface. Represents the source correlation info for a specific function address within an action Public Functions", "keywords": []}, {"id": 556, "doc_id": 559, "filename": "api/classNV_1_1Rules_1_1ISourceInfo.html", "domain_name": "cpp", "name": "NV::Rules::ISourceInfo::file_name", "display_name": "NV::Rules::ISourceInfo::file_name", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1ISourceInfo", "anchor": "_CPPv4N2NV5Rules11ISourceInfo9file_nameEv", "priority": 1, "content": "virtual  const  char  * file_name  =  0 Get the file name. Get the file name, as embedded in the correlation info", "keywords": []}, {"id": 557, "doc_id": 559, "filename": "api/classNV_1_1Rules_1_1ISourceInfo.html", "domain_name": "cpp", "name": "NV::Rules::ISourceInfo::line", "display_name": "NV::Rules::ISourceInfo::line", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1ISourceInfo", "anchor": "_CPPv4N2NV5Rules11ISourceInfo4lineEv", "priority": 1, "content": "virtual  uint32_t  line  =  0 Get the file line. Get the line within file_name", "keywords": []}, {"id": 558, "doc_id": 559, "filename": "api/classNV_1_1Rules_1_1ISourceInfo.html", "domain_name": "cpp", "name": "NV::Rules::ISourceInfo::~ISourceInfo", "display_name": "NV::Rules::ISourceInfo::~ISourceInfo", "type": "function", "display_type": "C++ function", "docname": "api/classNV_1_1Rules_1_1ISourceInfo", "anchor": "_CPPv4N2NV5Rules11ISourceInfoD0Ev", "priority": 1, "content": "inline  virtual  ~ISourceInfo", "keywords": []}, {"id": 559, "doc_id": 559, "filename": "api/classNV_1_1Rules_1_1ISourceInfo.html", "domain_name": "std", "name": "api/classNV_1_1Rules_1_1ISourceInfo", "display_name": "NV::Rules::ISourceInfo", "type": "doc", "display_type": "Page", "docname": "api/classNV_1_1Rules_1_1ISourceInfo", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 560, "doc_id": 560, "filename": "api/data-structures.html", "domain_name": "std", "name": "api/data-structures", "display_name": "Data Structures", "type": "doc", "display_type": "Page", "docname": "api/data-structures", "anchor": "", "priority": -1, "content": "NV::Rules::IAction : Action interface. NV::Rules::IBaseContext NV::Rules::IContext : Context interface. NV::Rules::IController : Controller interface. NV::Rules::IEvaluator : Evaluator interface. NV::Rules::IFrontend : Frontend interface. NV::Rules::IMessageVault NV::Rules::IMetric : Metric interface. NV::Rules::INvtxDomainInfo : NvtxDomainInfo interface. NV::Rules::INvtxRange : NvtxRange interface. NV::Rules::INvtxState : NvtxState interface. NV::Rules::IRange : Range interface. NV::Rules::ISourceInfo : SourceInfo interface.", "keywords": []}, {"id": 561, "doc_id": 561, "filename": "api/group__NVRULES__HW.html", "domain_name": "std", "name": "api/group__NVRULES__HW", "display_name": "NvRules Tool lifetime-managed classes.", "type": "doc", "display_type": "Page", "docname": "api/group__NVRULES__HW", "anchor": "", "priority": -1, "content": "Objects of heavyweight classes are lifetime-managed by the tool. Data Structures NV::Rules::IBaseContext NV::Rules::IContext : Context interface. NV::Rules::IController : Controller interface. NV::Rules::IEvaluator : Evaluator interface. NV::Rules::IFrontend : Frontend interface. NV::Rules::IMessageVault", "keywords": []}, {"id": 562, "doc_id": 562, "filename": "api/group__NVRULES__LW.html", "domain_name": "std", "name": "api/group__NVRULES__LW", "display_name": "NvRules Python garbage-collected classes.", "type": "doc", "display_type": "Page", "docname": "api/group__NVRULES__LW", "anchor": "", "priority": -1, "content": "Objects of lightweight classes are garbage-collected by the python runtime. Data Structures NV::Rules::IAction : Action interface. NV::Rules::IMetric : Metric interface. NV::Rules::INvtxDomainInfo : NvtxDomainInfo interface. NV::Rules::INvtxRange : NvtxRange interface. NV::Rules::INvtxState : NvtxState interface. NV::Rules::IRange : Range interface. NV::Rules::ISourceInfo : SourceInfo interface.", "keywords": []}, {"id": 563, "doc_id": 573, "filename": "api/group__NVRULES__NM.html", "domain_name": "cpp", "name": "NV::Rules::ContextHandle", "display_name": "NV::Rules::ContextHandle", "type": "type", "display_type": "C++ type", "docname": "api/group__NVRULES__NM", "anchor": "_CPPv4N2NV5Rules13ContextHandleE", "priority": 1, "content": "typedef  unsigned  long  long  NV :: Rules :: ContextHandle", "keywords": []}, {"id": 564, "doc_id": 573, "filename": "api/group__NVRULES__NM.html", "domain_name": "cpp", "name": "NV::Rules::get_context", "display_name": "NV::Rules::get_context", "type": "function", "display_type": "C++ function", "docname": "api/group__NVRULES__NM", "anchor": "_CPPv4N2NV5Rules11get_contextE13ContextHandle", "priority": 1, "content": "IContext  * NV :: Rules :: get_context ContextHandle  h Converts the context handle. Converts the context handle h to an IContext object", "keywords": []}, {"id": 565, "doc_id": 573, "filename": "api/group__NVRULES__NM.html", "domain_name": "cpp", "name": "NV::Rules::get_context::h", "display_name": "NV::Rules::get_context::h", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/group__NVRULES__NM", "anchor": "_CPPv4N2NV5Rules11get_contextE13ContextHandle", "priority": 1, "content": "IContext  * NV :: Rules :: get_context ContextHandle  h Converts the context handle. Converts the context handle h to an IContext object", "keywords": []}, {"id": 566, "doc_id": 573, "filename": "api/group__NVRULES__NM.html", "domain_name": "cpp", "name": "NV::Rules::get_evaluator", "display_name": "NV::Rules::get_evaluator", "type": "function", "display_type": "C++ function", "docname": "api/group__NVRULES__NM", "anchor": "_CPPv4N2NV5Rules13get_evaluatorE13ContextHandle", "priority": 1, "content": "IEvaluator  * NV :: Rules :: get_evaluator ContextHandle  h Returns the evaluator object. Return the IEvaluator object from the context handle h", "keywords": []}, {"id": 567, "doc_id": 573, "filename": "api/group__NVRULES__NM.html", "domain_name": "cpp", "name": "NV::Rules::get_evaluator::h", "display_name": "NV::Rules::get_evaluator::h", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/group__NVRULES__NM", "anchor": "_CPPv4N2NV5Rules13get_evaluatorE13ContextHandle", "priority": 1, "content": "IEvaluator  * NV :: Rules :: get_evaluator ContextHandle  h Returns the evaluator object. Return the IEvaluator object from the context handle h", "keywords": []}, {"id": 568, "doc_id": 573, "filename": "api/group__NVRULES__NM.html", "domain_name": "cpp", "name": "NV::Rules::get_version", "display_name": "NV::Rules::get_version", "type": "function", "display_type": "C++ function", "docname": "api/group__NVRULES__NM", "anchor": "_CPPv4N2NV5Rules11get_versionE13ContextHandle", "priority": 1, "content": "std :: string  NV :: Rules :: get_version ContextHandle  h Get version number of this interface. Returns the version number of this interface as a string of the form &lt;year&gt;.&lt;major&gt;.&lt;minor&gt; It matches the Nsight Compute version this interface originates from.", "keywords": []}, {"id": 569, "doc_id": 573, "filename": "api/group__NVRULES__NM.html", "domain_name": "cpp", "name": "NV::Rules::get_version::h", "display_name": "NV::Rules::get_version::h", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/group__NVRULES__NM", "anchor": "_CPPv4N2NV5Rules11get_versionE13ContextHandle", "priority": 1, "content": "std :: string  NV :: Rules :: get_version ContextHandle  h Get version number of this interface. Returns the version number of this interface as a string of the form &lt;year&gt;.&lt;major&gt;.&lt;minor&gt; It matches the Nsight Compute version this interface originates from.", "keywords": []}, {"id": 570, "doc_id": 573, "filename": "api/group__NVRULES__NM.html", "domain_name": "cpp", "name": "NV::Rules::raise_exception", "display_name": "NV::Rules::raise_exception", "type": "function", "display_type": "C++ function", "docname": "api/group__NVRULES__NM", "anchor": "_CPPv4N2NV5Rules15raise_exceptionE13ContextHandlePKc", "priority": 1, "content": "void  NV :: Rules :: raise_exception ContextHandle  h const  char  * msg Raises a python exception. Raise a python exception with msg . This will terminate the script execution, if the context does not chose to mask the exception", "keywords": []}, {"id": 571, "doc_id": 573, "filename": "api/group__NVRULES__NM.html", "domain_name": "cpp", "name": "NV::Rules::raise_exception::h", "display_name": "NV::Rules::raise_exception::h", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/group__NVRULES__NM", "anchor": "_CPPv4N2NV5Rules15raise_exceptionE13ContextHandlePKc", "priority": 1, "content": "void  NV :: Rules :: raise_exception ContextHandle  h const  char  * msg Raises a python exception. Raise a python exception with msg . This will terminate the script execution, if the context does not chose to mask the exception", "keywords": []}, {"id": 572, "doc_id": 573, "filename": "api/group__NVRULES__NM.html", "domain_name": "cpp", "name": "NV::Rules::raise_exception::msg", "display_name": "NV::Rules::raise_exception::msg", "type": "functionParam", "display_type": "C++ function parameter", "docname": "api/group__NVRULES__NM", "anchor": "_CPPv4N2NV5Rules15raise_exceptionE13ContextHandlePKc", "priority": 1, "content": "void  NV :: Rules :: raise_exception ContextHandle  h const  char  * msg Raises a python exception. Raise a python exception with msg . This will terminate the script execution, if the context does not chose to mask the exception", "keywords": []}, {"id": 573, "doc_id": 573, "filename": "api/group__NVRULES__NM.html", "domain_name": "std", "name": "api/group__NVRULES__NM", "display_name": "NvRules non-member functions.", "type": "doc", "display_type": "Page", "docname": "api/group__NVRULES__NM", "anchor": "", "priority": -1, "content": "Non-member utility functions. Functions NV::Rules::get_context(ContextHandle h) : Converts the context handle. NV::Rules::get_evaluator(ContextHandle h) : Returns the evaluator object. NV::Rules::get_version(ContextHandle h) : Get version number of this interface. NV::Rules::raise_exception(ContextHandle h, const char *msg) : Raises a python exception. Typedefs NV::Rules::ContextHandle", "keywords": []}, {"id": 574, "doc_id": 574, "filename": "api/modules.html", "domain_name": "std", "name": "api/modules", "display_name": "Modules", "type": "doc", "display_type": "Page", "docname": "api/modules", "anchor": "", "priority": -1, "content": "NvRules Python garbage-collected classes. : Objects of lightweight classes are garbage-collected by the python runtime. NvRules Tool lifetime-managed classes. : Objects of heavyweight classes are lifetime-managed by the tool. NvRules non-member functions. : Non-member utility functions.", "keywords": []}, {"id": 575, "doc_id": 576, "filename": "api/namespaceNV.html", "domain_name": "cpp", "name": "NV", "display_name": "NV", "type": "type", "display_type": "C++ type", "docname": "api/namespaceNV", "anchor": "_CPPv42NV", "priority": 1, "content": "namespace  NV Namespaces Rules", "keywords": []}, {"id": 576, "doc_id": 576, "filename": "api/namespaceNV.html", "domain_name": "std", "name": "api/namespaceNV", "display_name": "NV", "type": "doc", "display_type": "Page", "docname": "api/namespaceNV", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 577, "doc_id": 581, "filename": "api/namespaceNV_1_1Rules.html", "domain_name": "cpp", "name": "NV::Rules", "display_name": "NV::Rules", "type": "type", "display_type": "C++ type", "docname": "api/namespaceNV_1_1Rules", "anchor": "_CPPv4N2NV5RulesE", "priority": 1, "content": "namespace  Rules Data Structures IAction : Action interface. IBaseContext IContext : Context interface. IController : Controller interface. IEvaluator : Evaluator interface. IFrontend : Frontend interface. IMessageVault IMetric : Metric interface. INvtxDomainInfo : NvtxDomainInfo interface. INvtxRange : NvtxRange interface. INvtxState : NvtxState interface. IRange : Range interface. ISourceInfo : SourceInfo interface. Enumerations RuleKind : Rule kind. Functions get_context(ContextHandle h) : Converts the context handle. get_evaluator(ContextHandle h) : Returns the evaluator object. get_version(ContextHandle h) : Get version number of this interface. raise_exception(ContextHandle h, const char *msg) : Raises a python exception. Typedefs ContextHandle", "keywords": []}, {"id": 578, "doc_id": 581, "filename": "api/namespaceNV_1_1Rules.html", "domain_name": "cpp", "name": "NV::Rules::RuleKind", "display_name": "NV::Rules::RuleKind", "type": "enum", "display_type": "C++ enum", "docname": "api/namespaceNV_1_1Rules", "anchor": "_CPPv4N2NV5Rules8RuleKindE", "priority": 1, "content": "enum  class  NV :: Rules :: RuleKind Rule kind. Reserved for future use. Values:", "keywords": []}, {"id": 579, "doc_id": 581, "filename": "api/namespaceNV_1_1Rules.html", "domain_name": "cpp", "name": "NV::Rules::RuleKind::DEFAULT", "display_name": "NV::Rules::RuleKind::DEFAULT", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/namespaceNV_1_1Rules", "anchor": "_CPPv4N2NV5Rules8RuleKind7DEFAULTE", "priority": 1, "content": "enumerator  DEFAULT", "keywords": []}, {"id": 580, "doc_id": 581, "filename": "api/namespaceNV_1_1Rules.html", "domain_name": "cpp", "name": "NV::Rules::RuleKind::GUIDED", "display_name": "NV::Rules::RuleKind::GUIDED", "type": "enumerator", "display_type": "C++ enumerator", "docname": "api/namespaceNV_1_1Rules", "anchor": "_CPPv4N2NV5Rules8RuleKind6GUIDEDE", "priority": 1, "content": "enumerator  GUIDED", "keywords": []}, {"id": 581, "doc_id": 581, "filename": "api/namespaceNV_1_1Rules.html", "domain_name": "std", "name": "api/namespaceNV_1_1Rules", "display_name": "NV::Rules", "type": "doc", "display_type": "Page", "docname": "api/namespaceNV_1_1Rules", "anchor": "", "priority": -1, "content": "", "keywords": []}, {"id": 582, "doc_id": 582, "filename": "api/namespaces.html", "domain_name": "std", "name": "api/namespaces", "display_name": "Namespaces", "type": "doc", "display_type": "Page", "docname": "api/namespaces", "anchor": "", "priority": -1, "content": "NV NV::Rules", "keywords": []}, {"id": 583, "doc_id": 588, "filename": "index.html", "domain_name": "page", "name": "index#copyright-and-licenses", "display_name": "Copyright And Licenses", "type": "section", "display_type": "Page section", "docname": "index", "anchor": "copyright-and-licenses", "priority": -1, "content": "Copyright and Licenses Information on the NVIDIA Software License Agreement as well as third party software and tools used by Nsight Compute.", "keywords": []}, {"id": 584, "doc_id": 588, "filename": "index.html", "domain_name": "page", "name": "index#developer-interfaces", "display_name": "Developer Interfaces", "type": "section", "display_type": "Page section", "docname": "index", "anchor": "developer-interfaces", "priority": -1, "content": "Customization Guide User manual on customizing NVIDIA Nsight Compute tools or integrating them with custom workflows. Information on writing section files, rules for automatic result analysis and scripting access to report files.", "keywords": []}, {"id": 585, "doc_id": 588, "filename": "index.html", "domain_name": "page", "name": "index#nsight-compute", "display_name": "Nsight Compute", "type": "section", "display_type": "Page section", "docname": "index", "anchor": "nsight-compute", "priority": -1, "content": "Release Notes Release notes, including new features and important bug fixes. Supported platforms and GPUs. List of known issues for the current release. Kernel Profiling Guide Kernel Profiling Guide with metric types and meaning, data collection modes and FAQ for common problems. Nsight Compute NVIDIA Nsight Compute User Interface (UI) manual. Information on all views, controls and workflows within the tool UI. Transitions guide for Visual Profiler. Nsight Compute CLI NVIDIA Nsight Compute Command Line Interface (CLI) manual. Information on workflows and options for the command line, including multi-process profiling and NVTX filtering. Transitions guide for Nvprof.", "keywords": []}, {"id": 586, "doc_id": 588, "filename": "index.html", "domain_name": "page", "name": "index#release-information", "display_name": "Release Information", "type": "section", "display_type": "Page section", "docname": "index", "anchor": "release-information", "priority": -1, "content": "Archives Find documentation for previous versions of NVIDIA Nsight Compute.", "keywords": []}, {"id": 587, "doc_id": 588, "filename": "index.html", "domain_name": "page", "name": "index#training", "display_name": "Training", "type": "section", "display_type": "Page section", "docname": "index", "anchor": "training", "priority": -1, "content": "Training NVIDIA Nsight Compute Training resources.", "keywords": []}, {"id": 588, "doc_id": 588, "filename": "index.html", "domain_name": "std", "name": "index", "display_name": "Nsight Compute Documentation", "type": "doc", "display_type": "Page", "docname": "index", "anchor": "", "priority": -1, "content": "", "keywords": []}]};