1
|
const searchData = {"envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "repo_docs.ext.toctree": 2, "repo_docs.ext.mermaid": 1, "repo_docs.ext.enhanced_search": 2, "sphinx": 57}, "data": [{"id": 0, "doc_id": 1, "filename": "Archives/index.html", "domain_name": "page", "name": "Archives/index#nvidia-nsight-systems-archives", "display_name": "NVIDIA Nsight Systems Archives", "type": "section", "display_type": "Page section", "docname": "Archives/index", "anchor": "nvidia-nsight-systems-archives", "priority": -1, "content": "Below, you can find the current and past release information for NVIDIA Nsight Systems. Nsight Systems Unified Documentation Latest version NVIDIA Nsight Systems 2023.2 NVIDIA Nsight Systems 2023.1 NVIDIA Nsight Systems 2022.5 NVIDIA Nsight Systems 2022.4 NVIDIA Nsight Systems 2022.3 NVIDIA Nsight Systems 2022.2 NVIDIA Nsight Systems 2022.1 NVIDIA Nsight Systems 2021.5 NVIDIA Nsight Systems 2021.4 NVIDIA Nsight Systems 2021.3 NVIDIA Nsight Systems 2021.2 NVIDIA Nsight Systems 2021.1 NVIDIA Nsight Systems 2020.5 NVIDIA Nsight Systems 2020.4 NVIDIA Nsight Systems 2020.3 NVIDIA Nsight Systems 2020.2 NVIDIA Nsight Systems 2020.1 Nsight Systems for Workstation NVIDIA Nsight Systems 2019.6 NVIDIA Nsight Systems 2019.5 NVIDIA Nsight Systems 2019.4 NVIDIA Nsight Systems 2019.3.6 NVIDIA Nsight Systems 2019.3 NVIDIA Nsight Systems 2019.2 NVIDIA Nsight Systems 2019.1 NVIDIA Nsight Systems 2018.3 NVIDIA Nsight Systems 2018.2 NVIDIA Nsight Systems 2018.1 NVIDIA Nsight Systems 2018.0 NVIDIA Nsight Systems for Embedded NVIDIA Nsight Systems 2019.6 NVIDIA Nsight Systems 2019.4 NVIDIA Nsight Systems 2019.3 NVIDIA Nsight Graphics 2018.1 NVIDIA Nsight Systems for DRIVE NVIDIA Nsight Systems for Drive 5.2.6 NVIDIA Nsight Systems for Drive 5.2.3 NVIDIA Nsight Systems for Drive 5.2.0 NVIDIA Nsight Systems for Drive 5.1.15 NVIDIA Nsight Systems for Drive 5.1.12 NVIDIA Nsight Systems for Drive 5.1.9 NVIDIA Nsight Systems for Drive 5.1.6 NVIDIA Nsight Systems for Drive 5.1.3 NVIDIA Nsight Systems for Drive 5.0.13", "keywords": []}, {"id": 1, "doc_id": 1, "filename": "Archives/index.html", "domain_name": "std", "name": "Archives/index", "display_name": "Archives", "type": "doc", "display_type": "Page", "docname": "Archives/index", "anchor": "", "priority": -1, "content": "Documentation for previous versions of NVIDIA Nsight Systems.", "keywords": []}, {"id": 2, "doc_id": 4, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#nvidia-software-license-agreement", "display_name": "NVIDIA Software License Agreement", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "nvidia-software-license-agreement", "priority": -1, "content": "NVIDIA CORPORATION NVIDIA SOFTWARE LICENSE AGREEMENT IMPORTANT \u2014 READ BEFORE DOWNLOADING, INSTALLING, COPYING OR USING THE LICENSED SOFTWARE This Software License Agreement (\u201cSLA\u201d), made and entered into as of the time and date of click through action (\u201cEffective Date\u201d), is a legal agreement between you and NVIDIA Corporation (\u201cNVIDIA\u201d) and governs the use of the NVIDIA computer software and the documentation made available for use with such NVIDIA software. By downloading, installing, copying, or otherwise using the NVIDIA software and/or documentation, you agree to be bound by the terms of this SLA. If you do not agree to the terms of this SLA, do not download, install, copy or use the NVIDIA software or documentation. IF YOU ARE ENTERING INTO THIS SLA ON BEHALF OF A COMPANY OR OTHER LEGAL ENTITY, YOU REPRESENT THAT YOU HAVE THE LEGAL AUTHORITY TO BIND THE ENTITY TO THIS SLA, IN WHICH CASE \u201cYOU\u201d WILL MEAN THE ENTITY YOU REPRESENT. IF YOU DON\u2019T HAVE SUCH AUTHORITY, OR IF YOU DON\u2019T ACCEPT ALL THE TERMS AND CONDITIONS OF THIS SLA, THEN NVIDIA DOES NOT AGREE TO LICENSE THE LICENSED SOFTWARE TO YOU, AND YOU MAY NOT DOWNLOAD, INSTALL, COPY OR USE IT. LICENSE. 1.1 License Grant. Subject to the terms of the AGREEMENT, NVIDIA hereby grants you a non-exclusive, non-transferable license, without the right to sublicense (except as expressly set forth in a Supplement), during the applicable license term unless earlier terminated as provided below, to have Authorized Users install and use the Software, including modifications (if expressly permitted in a Supplement), in accordance with the Documentation. You are only licensed to activate and use Licensed Software for which you a have a valid license, even if during the download or installation you are presented with other product options. No Orders are binding on NVIDIA until accepted by NVIDIA. Your Orders are subject to the AGREEMENT. SLA Supplements : Certain Licensed Software licensed under this SLA may be subject to additional terms and conditions that will be presented to you in a Supplement for acceptance prior to the delivery of such Licensed Software under this SLA and the applicable Supplement. Licensed Software will only be delivered to you upon your acceptance of all applicable terms. 1.2 Limited Purpose Licenses . If your license is provided for one of the purposes indicated below, then notwithstanding contrary terms in Section 1.1 or in a Supplement, such licenses are for internal use and do not include any right or license to sub- license and distribute the Licensed Software or its output in any way in any public release, however limited, and/or in any manner that provides third parties with use of or access to the Licensed Software or its functionality or output, including (but not limited to) external alpha or beta testing or development phases. Further: Evaluation License . You may use evaluation licenses solely for your internal evaluation of the Licensed Software for broader adoption within your Enterprise or in connection with a NVIDIA product purchase decision, and such licenses have an expiration date as indicated by NVIDIA in its sole discretion (or ninety days from the date of download if no other duration is indicated). Educational/Academic License . You may use educational/academic licenses solely for educational purposes and all users must be enrolled or employed by an academic institution. If you do not meet NVIDIA\u2019s academic program requirements for educational institutions, you have no rights under this license. Test/Development License . You may use test/development licenses solely for your internal development, testing and/or debugging of your software applications or for interoperability testing with the Licensed Software, and such licenses have an expiration date as indicated by NVIDIA in its sole discretion (or one year from the date of download if no other duration is indicated). NVIDIA Confidential Information under the AGREEMENT includes output from Licensed Software developer tools identified as \u201cPro\u201d versions, where the output reveals functionality or performance data pertinent to NVIDIA hardware or software products. 1.3 Pre-Release Licenses . With respect to alpha, beta, preview, and other pre-release Software and Documentation ( \u201cPre- Release Licensed Software\u201d ) delivered to you under the AGREEMENT you acknowledge and agree that such Pre-Release Licensed Software (i) may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, accessibility, availability, and reliability standards relative to commercially provided NVIDIA software and documentation, and (ii) use of such Pre-Release Licensed Software may result in unexpected results, loss of data, project delays or other unpredictable damage or loss. THEREFORE, PRE-RELEASE LICENSED SOFTWARE IS NOT INTENDED FOR USE, AND SHOULD NOT BE USED, IN PRODUCTION OR BUSINESS-CRITICAL SYSTEMS. NVIDIA has no obligation to make available a commercial version of any Pre-Release Licensed Software and NVIDIA has the right to abandon development of Pre-Release Licensed Software at any time without liability. 1.4 Enterprise and Contractor Usage . You may allow your Enterprise employees and Contractors to access and use the Licensed Software pursuant to the terms of the AGREEMENT solely to perform work on your behalf, provided further that with respect to Contractors: (i) you obtain a written agreement from each Contractor which contains terms and obligations with respect to access to and use of Licensed Software no less protective of NVIDIA than those set forth in the AGREEMENT, and (ii) such Contractor\u2019s access and use expressly excludes any sublicensing or distribution rights for the Licensed Software. You are responsible for the compliance with the terms and conditions of the AGREEMENT by your Enterprise and Contractors. Any act or omission that, if committed by you, would constitute a breach of the AGREEMENT shall be deemed to constitute a breach of the AGREEMENT if committed by your Enterprise or Contractors. 1.5 Services . Except as expressly indicated in an Order, NVIDIA is under no obligation to provide support for the Licensed Software or to provide any patches, maintenance, updates or upgrades under the AGREEMENT. Unless patches, maintenance, updates or upgrades are provided with their separate governing terms and conditions, they constitute Licensed Software licensed to you under the AGREEMENT. LIMITATIONS. 2.1 License Restrictions . Except as expressly authorized in the AGREEMENT, you agree that you will not (nor authorize third parties to): (i) copy and use Software that was licensed to you for use in one or more NVIDIA hardware products in other unlicensed products (provided that copies solely for backup purposes are allowed); (ii) reverse engineer, decompile, disassemble (except to the extent applicable laws specifically require that such activities be permitted) or attempt to derive the source code, underlying ideas, algorithm or structure of Software provided to you in object code form; (iii) sell, transfer, assign, distribute, rent, loan, lease, sublicense or otherwise make available the Licensed Software or its functionality to third parties (a) as an application services provider or service bureau, (b) by operating hosted/virtual system environments, (c) by hosting, time sharing or providing any other type of services, or (d) otherwise by means of the internet; (iv) modify, translate or otherwise create any derivative works of any Licensed Software; (v) remove, alter, cover or obscure any proprietary notice that appears on or with the Licensed Software or any copies thereof; (vi) use the Licensed Software, or allow its use, transfer, transmission or export in violation of any applicable export control laws, rules or regulations; (vii) distribute, permit access to, or sublicense the Licensed Software as a stand-alone product; (viii) bypass, disable, circumvent or remove any form of copy protection, encryption, security or digital rights management or authentication mechanism used by NVIDIA in connection with the Licensed Software, or use the Licensed Software together with any authorization code, serial number, or other copy protection device not supplied by NVIDIA directly or through an authorized reseller; (ix) use the Licensed Software for the purpose of developing competing products or technologies or assisting a third party in such activities; (x) use the Licensed Software with any system or application where the use or failure of such system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss including, without limitation, use in connection with any nuclear, avionics, navigation, military, medical, life support or other life critical application (\u201cCritical Applications\u201d), unless the parties have entered into a Critical Applications agreement; (xi) distribute any modification or derivative work you make to the Licensed Software under or by reference to the same name as used by NVIDIA; or (xii) use the Licensed Software in any manner that would cause the Licensed Software to become subject to an Open Source License. Nothing in the AGREEMENT shall be construed to give you a right to use, or otherwise obtain access to, any source code from which the Software or any portion thereof is compiled or interpreted. You acknowledge that NVIDIA does not design, test, manufacture or certify the Licensed Software for use in the context of a Critical Application and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such use. You agree to defend, indemnify and hold harmless NVIDIA and its Affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney\u2019s fees and costs incident to establishing the right of indemnification) arising out of or related to you and your Enterprise, and their respective employees, contractors, agents, distributors, resellers, end users, officers and directors use of Licensed Software outside of the scope of the AGREEMENT or any other breach of the terms of the AGREEMENT. 2.2 Third Party License Obligations . You acknowledge and agree that the Licensed Software may include or incorporate third party technology (collectively \u201cThird Party Components\u201d), which is provided for use in or with the Software and not otherwise used separately. If the Licensed Software includes or incorporates Third Party Components, then the third-party pass-through terms and conditions (\u201cThird Party Terms\u201d) for the particular Third Party Component will be bundled with the Software or otherwise made available online as indicated by NVIDIA and will be incorporated by reference into the AGREEMENT. In the event of any conflict between the terms in the AGREEMENT and the Third Party Terms, the Third Party Terms shall govern. Copyright to Third Party Components are held by the copyright holders indicated in the copyright notices indicated in the Third Party Terms. Audio/Video Encoders and Decoders . You acknowledge and agree that it is your sole responsibility to obtain any additional third party licenses required to make, have made, use, have used, sell, import, and offer for sale your products or services that include or incorporate any Third Party Components and content relating to audio and/or video encoders and decoders from, including but not limited to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A., MPEG-LA, and Coding Technologies as NVIDIA does not grant to you under the AGREEMENT any necessary patent or other rights with respect to audio and/or video encoders and decoders. 2.3 Limited Rights . Your rights in the Licensed Software are limited to those expressly granted under the AGREEMENT and no other licenses are granted whether by implication, estoppel or otherwise. NVIDIA reserves all rights, title and interest in and to the Licensed Software not expressly granted under the AGREEMENT. 3. CONFIDENTIALITY. Neither party will use the other party\u2019s Confidential Information, except as necessary for the performance of the AGREEMENT, nor will either party disclose such Confidential Information to any third party, except to personnel of NVIDIA and its Affiliates, you, your Enterprise, your Enterprise Contractors, and each party\u2019s legal and financial advisors that have a need to know such Confidential Information for the performance of the AGREEMENT, provided that each such personnel, employee and Contractor is subject to a written agreement that includes confidentiality obligations consistent with those set forth herein. Each party will use all reasonable efforts to maintain the confidentiality of all of the other party\u2019s Confidential Information in its possession or control, but in no event less than the efforts that it ordinarily uses with respect to its own Confidential Information of similar nature and importance. The foregoing obligations will not restrict either party from disclosing the other party\u2019s Confidential Information or the terms and conditions of the AGREEMENT as required under applicable securities regulations or pursuant to the order or requirement of a court, administrative agency, or other governmental body, provided that the party required to make such disclosure (i) gives reasonable notice to the other party to enable it to contest such order or requirement prior to its disclosure (whether through protective orders or otherwise), (ii) uses reasonable effort to obtain confidential treatment or similar protection to the fullest extent possible to avoid such public disclosure, and (iii) discloses only the minimum amount of information necessary to comply with such requirements. 4. OWNERSHIP. You are not obligated to disclose to NVIDIA any modifications that you, your Enterprise or your Contractors make to the Licensed Software as permitted under the AGREEMENT. As between the parties, all modifications are owned by NVIDIA and licensed to you under the AGREEMENT unless otherwise expressly provided in a Supplement. The Licensed Software and all modifications owned by NVIDIA, and the respective Intellectual Property Rights therein, are and will remain the sole and exclusive property of NVIDIA or its licensors, whether the Licensed Software is separate from or combined with any other products or materials. You shall not engage in any act or omission that would impair NVIDIA\u2019s and/or its licensors\u2019 Intellectual Property Rights in the Licensed Software or any other materials, information, processes or subject matter proprietary to NVIDIA. NVIDIA\u2019s licensors are intended third party beneficiaries with the right to enforce provisions of the AGREEMENT with respect to their Confidential Information and/or Intellectual Property Rights. 5. FEEDBACK. You have no obligation to provide Feedback to NVIDIA. However, NVIDIA and/or its Affiliates may use and include any Feedback that you provide to improve the Licensed Software or other NVIDIA products, technologies or materials. Accordingly, if you provide Feedback, you agree that NVIDIA and/or its Affiliates, at their option, may, and may permit their licensees, to make, have made, use, have used, reproduce, license, distribute and otherwise commercialize the Feedback in the Licensed Software or in other NVIDIA products, technologies or materials without the payment of any royalties or fees to you. All Feedback becomes the sole property of NVIDIA and may be used in any manner NVIDIA sees fit, and you hereby assign to NVIDIA all of your right, title and interest in and to any Feedback. NVIDIA has no obligation to respond to Feedback or to incorporate Feedback into the Licensed Software. 6. NO WARRANTIES. THE LICENSED SOFTWARE AND ANY OTHER CONFIDENTIAL INFORMATION AND/OR SERVICES ARE PROVIDED BY NVIDIA \u201cAS IS\u201d AND \u201cWITH ALL FAULTS,\u201d AND NVIDIA EXPRESSLY DISCLAIMS ALL OTHER WARRANTIES OF ANY KIND OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF OPERABILITY, CONDITION, VALUE, ACCURACY OF DATA, OR QUALITY, AS WELL AS ANY WARRANTIES OF MERCHANTABILITY, SYSTEM INTEGRATION, WORKMANSHIP, SUITABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON- INFRINGEMENT, OR THE ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO WARRANTY IS MADE BY NVIDIA ON THE BASIS OF TRADE USAGE, COURSE OF DEALING OR COURSE OF TRADE. NVIDIA DOES NOT WARRANT THAT THE LICENSED SOFTWARE OR ANY OTHER CONFIDENTIAL INFORMATION AND/OR SERVICES PROVIDED BY NVIDIA UNDER THE AGREEMENT WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. YOU ACKNOWLEDGE THAT NVIDIA\u2019S OBLIGATIONS UNDER THE AGREEMENT ARE FOR THE BENEFIT OF YOU ONLY. Nothing in this warranty section affects any statutory rights of consumers or other recipients to the extent that they cannot be waived or limited by contract under applicable law. 7. LIMITATION OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA OR ITS LICENSORS SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THE AGREEMENT OR THE USE OR PERFORMANCE OF THE LICENSED SOFTWARE AND ANY OTHER CONFIDENTIAL INFORMATION AND/OR SERVICES PROVIDED BY NVIDIA UNDER THE AGREEMENT, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY. IN NO EVENT WILL NVIDIA\u2019S TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THE AGREEMENT EXCEED THE NET AMOUNTS RECEIVED BY NVIDIA FOR YOUR USE OF THE PARTICULAR LICENSED SOFTWARE DURING THE TWELVE (12) MONTHS BEFORE THE LIABILITY AROSE (or up to US$10.00 if you acquired the Licensed Software for no charge). THE NATURE OF THE LIABILITY, THE NUMBER OF CLAIMS OR SUITS OR THE NUMBER OF PARTIES WITHIN YOUR ENTERPRISE THAT ACCEPTED THE TERMS OF THE AGREEMENT SHALL NOT ENLARGE OR EXTEND THIS LIMIT. THE FOREGOING LIMITATIONS SHALL APPLY REGARDLESS OF WHETHER NVIDIA OR ITS LICENSORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES AND REGARDLESS OF WHETHER ANY REMEDY FAILS ITS ESSENTIAL PURPOSE. The disclaimers, exclusions and limitations of liability set forth in the AGREEMENT form an essential basis of the bargain between the parties, and, absent any such disclaimers, exclusions or limitations of liability, the provisions of the AGREEMENT, including, without limitation, the economic terms, would be substantially different. TERM AND TERMINATION. 8.1 AGREEMENT, Licenses and Services . This SLA shall become effective upon the Effective Date, each Supplement upon their acceptance, and both this SLA and Supplements shall continue in effect until your last access or use of the Licensed Software and/or services hereunder, unless earlier terminated as provided in this \u201cTerm and Termination\u201d section. Each Licensed Software license ends at the earlier of (a) the expiration of the applicable license term, or (b) termination of such license or the AGREEMENT. Each service ends at the earlier of (x) the expiration of the applicable service term, (y) termination of such service or the AGREEMENT, or (z) expiration or termination of the associated license and no credit or refund will be provided upon the expiration or termination of the associated license for any service fees paid. 8.2 Termination and Effect of Expiration or Termination . NVIDIA may terminate the AGREEMENT in whole or in part: (i) if you breach any term of the AGREEMENT and fail to cure such breach within thirty (30) days following notice thereof from NVIDIA (or immediately if you violate NVIDIA\u2019s Intellectual Property Rights); (ii) if you become the subject of a voluntary or involuntary petition in bankruptcy or any proceeding relating to insolvency, receivership, liquidation or composition for the benefit of creditors, if that petition or proceeding is not dismissed with prejudice within sixty (60) days after filing, or if you cease to do business; or (iii) if you commence or participate in any legal proceeding against NVIDIA, with respect to the Licensed Software that is the subject of the proceeding during the pendency of such legal proceeding. If you or your authorized NVIDIA reseller fail to pay license fees or service fees when due then NVIDIA may, in its sole discretion, suspend or terminate your license grants, services and any other rights provided under the AGREEMENT for the affected Licensed Software, in addition to any other remedies NVIDIA may have at law or equity. Upon any expiration or termination of the AGREEMENT, a license or a service provided hereunder, (a) any amounts owed to NVIDIA become immediately due and payable, (b) you must promptly discontinue use of the affected Licensed Software and/or service, and (c) you must promptly destroy or return to NVIDIA all copies of the affected Licensed Software and all portions thereof in your possession or control, and each party will promptly destroy or return to the other all of the other party\u2019s Confidential Information within its possession or control. Upon written request, you will certify in writing that you have complied with your obligations under this section. Upon expiration or termination of the AGREEMENT all provisions survive except for the license grant provisions. CONSENT TO COLLECTION AND USE OF INFORMATION. You hereby agree and acknowledge that the Software may access, collect non-personally identifiable information about your Enterprise computer systems in order to properly optimize such systems for use with the Software. To the extent that you use the Software, you hereby consent to all of the foregoing, and represent and warrant that you have the right to grant such consent. In addition, you agree that you are solely responsible for maintaining appropriate data backups and system restore points for your Enterprise systems, and that NVIDIA will have no responsibility for any damage or loss to such systems (including loss of data or access) arising from or relating to (a) any changes to the configuration, application settings, environment variables, registry, drivers, BIOS, or other attributes of the systems (or any part of such systems) initiated through the Software; or (b) installation of any Software or third party software patches initiated through the Software. In certain systems you may change your system update preferences by unchecking \u201cAutomatically check for updates\u201d in the \u201cPreferences\u201d tab of the control panel for the Software. In connection with the receipt of the Licensed Software or services you may receive access to links to third party websites and services and the availability of those links does not imply any endorsement by NVIDIA. NVIDIA encourages you to review the privacy statements on those sites and services that you choose to visit so that you can understand how they may collect, use and share personal information of individuals. NVIDIA is not responsible or liable for: (i) the availability or accuracy of such links; or (ii) the products, services or information available on or through such links; or (iii) the privacy statements or practices of sites and services controlled by other companies or organizations. To the extent that you or members of your Enterprise provide to NVIDIA during registration or otherwise personal information, you acknowledge that such information will be collected, used and disclosed by NVIDIA in accordance with NVIDIA\u2019s privacy policy, available at URL http://www.nvidia.com/object/privacy_policy.html . GENERAL. This SLA, any Supplements incorporated hereto, and Orders constitute the entire agreement of the parties with respect to the subject matter hereto and supersede all prior negotiations, conversations, or discussions between the parties relating to the subject matter hereto, oral or written, and all past dealings or industry custom. Any additional and/or conflicting terms and conditions on purchase order(s) or any other documents issued by you are null, void, and invalid. Any amendment or waiver under the AGREEMENT must be in writing and signed by representatives of both parties. The AGREEMENT and the rights and obligations thereunder may not be assigned by you, in whole or in part, including by merger, consolidation, dissolution, operation of law, or any other manner, without written consent of NVIDIA, and any purported assignment in violation of this provision shall be void and of no effect. NVIDIA may assign, delegate or transfer the AGREEMENT and its rights and obligations hereunder, and if to a non-Affiliate you will be notified. Each party acknowledges and agrees that the other is an independent contractor in the performance of the AGREEMENT, and each party is solely responsible for all of its employees, agents, contractors, and labor costs and expenses arising in connection therewith. The parties are not partners, joint ventures or otherwise affiliated, and neither has any authority to make any statements, representations or commitments of any kind to bind the other party without prior written consent. Neither party will be responsible for any failure or delay in its performance under the AGREEMENT (except for any payment obligations) to the extent due to causes beyond its reasonable control for so long as such force majeure event continues in effect. The AGREEMENT will be governed by and construed under the laws of the State of Delaware and the United States without regard to the conflicts of law provisions thereof and without regard to the United Nations Convention on Contracts for the International Sale of Goods. The parties consent to the personal jurisdiction of the federal and state courts located in Santa Clara County, California. You acknowledge and agree that a breach of any of your promises or agreements contained in the AGREEMENT may result in irreparable and continuing injury to NVIDIA for which monetary damages may not be an adequate remedy and therefore NVIDIA is entitled to seek injunctive relief as well as such other and further relief as may be appropriate. If any court of competent jurisdiction determines that any provision of the AGREEMENT is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. Unless otherwise specified, remedies are cumulative. The Licensed Software has been developed entirely at private expense and is \u201ccommercial items\u201d consisting of \u201ccommercial computer software\u201d and \u201ccommercial computer software documentation\u201d provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions set forth in the AGREEMENT pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (c)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2701 San Tomas Expressway, Santa Clara, CA 95050. You acknowledge that the Licensed Software described under the AGREEMENT is subject to export control under the U.S. Export Administration Regulations (EAR) and economic sanctions regulations administered by the U.S. Department of Treasury\u2019s Office of Foreign Assets Control (OFAC). Therefore, you may not export, reexport or transfer in-country the Licensed Software without first obtaining any license or other approval that may be required by BIS and/or OFAC. You are responsible for any violation of the U.S. or other applicable export control or economic sanctions laws, regulations and requirements related to the Licensed Software. By accepting this SLA, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the Licensed Software. Any notice delivered by NVIDIA to you under the AGREEMENT will be delivered via mail, email or fax. Please direct your legal notices or other correspondence to NVIDIA Corporation, 2701 San Tomas Expressway, Santa Clara, California 95050, United States of America, Attention: Legal Department. GLOSSARY OF TERMS Certain capitalized terms, if not otherwise defined elsewhere in this SLA, shall have the meanings set forth below: \u201c Affiliate \u201d means any legal entity that Owns, is Owned by, or is commonly Owned with a party. \u201cOwn\u201d means having more than 50% ownership or the right to direct the management of the entity. \u201c AGREEMENT \u201d means this SLA and all associated Supplements entered by the parties referencing this SLA. \u201c Authorized Users \u201d means your Enterprise individual employees and any of your Enterprise\u2019s Contractors, subject to the terms of the \u201cEnterprise and Contractors Usage\u201d section. \u201c Confidential Information \u201d means the Licensed Software (unless made publicly available by NVIDIA without confidentiality obligations), and any NVIDIA business, marketing, pricing, research and development, know-how, technical, scientific, financial status, proposed new products or other information disclosed by NVIDIA to you which, at the time of disclosure, is designated in writing as confidential or proprietary (or like written designation), or orally identified as confidential or proprietary or is otherwise reasonably identifiable by parties exercising reasonable business judgment, as confidential. Confidential Information does not and will not include information that: (i) is or becomes generally known to the public through no fault of or breach of the AGREEMENT by the receiving party; (ii) is rightfully known by the receiving party at the time of disclosure without an obligation of confidentiality; (iii) is independently developed by the receiving party without use of the disclosing party\u2019s Confidential Information; or (iv) is rightfully obtained by the receiving party from a third party without restriction on use or disclosure. \u201c Contractor \u201d means an individual who works primarily for your Enterprise on a contractor basis from your secure network. \u201c Documentation \u201d means the NVIDIA documentation made available for use with the Software, including (without limitation) user manuals, datasheets, operations instructions, installation guides, release notes and other materials provided to you under the AGREEMENT. \u201c Enterprise \u201d means you or any company or legal entity for which you accepted the terms of this SLA, and their subsidiaries of which your company or legal entity owns more than fifty percent (50%) of the issued and outstanding equity. \u201c Feedback \u201d means any and all suggestions, feature requests, comments or other feedback regarding the Licensed Software, including possible enhancements or modifications thereto. \u201c Intellectual Property Rights \u201d means all patent, copyright, trademark, trade secret, trade dress, trade names, utility models, mask work, moral rights, rights of attribution or integrity service marks, master recording and music publishing rights, performance rights, author\u2019s rights, database rights, registered design rights and any applications for the protection or registration of these rights, or other intellectual or industrial property rights or proprietary rights, howsoever arising and in whatever media, whether now known or hereafter devised, whether or not registered, (including all claims and causes of action for infringement, misappropriation or violation and all rights in any registrations and renewals), worldwide and whether existing now or in the future. \u201c Licensed Software \u201d means Software, Documentation and all modifications owned by NVIDIA. \u201c Open Source License \u201d includes, without limitation, a software license that requires as a condition of use, modification, and/or distribution of such software that the Software be (i) disclosed or distributed in source code form; (ii) be licensed for the purpose of making derivative works; or (iii) be redistributable at no charge. \u201c Order \u201d means a purchase order issued by you, a signed purchase agreement with you, or other ordering document issued by you to NVIDIA or a NVIDIA authorized reseller (including any on-line acceptance process) that references and incorporates the AGREEMENT and is accepted by NVIDIA. \u201c Software \u201d means the NVIDIA software programs licensed to you under the AGREEMENT including, without limitation, libraries, sample code, utility programs and programming code. \u201c Supplement \u201d means the additional terms and conditions beyond those stated in this SLA that apply to certain Licensed Software licensed hereunder.", "keywords": []}, {"id": 3, "doc_id": 4, "filename": "CopyrightAndLicenses/index.html", "domain_name": "page", "name": "CopyrightAndLicenses/index#third-party-copyright-and-license-notices", "display_name": "Third Party Copyright and License Notices", "type": "section", "display_type": "Page section", "docname": "CopyrightAndLicenses/index", "anchor": "third-party-copyright-and-license-notices", "priority": -1, "content": "Nsight Systems includes the following third-party libraries: libelf from the elfutils 0.187 release - LGPLv3 License GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below. 0. Additional Definitions. As used herein, "this License" refers to version 3 of the GNU Lesser General Public License, and the "GNU GPL" refers to version 3 of the GNU General Public License. "The Library" refers to a covered work governed by this License, other than an Application or a Combined Work as defined below. An "Application" is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library. A "Combined Work" is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the "Linked Version". The "Minimal Corresponding Source" for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version. The "Corresponding Application Code" for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work. 1. Exception to Section 3 of the GNU GPL. You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL. 2. Conveying Modified Versions. If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version: a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy. 3. Object Code Incorporating Material from Library Header Files. The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following: a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the object code with a copy of the GNU GPL and this license document. 4. Combined Works. You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following: a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the Combined Work with a copy of the GNU GPL and this license document. c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document. d) Do one of the following: 0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source. 1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version. e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.) 5. Combined Libraries. You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License. b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 6. Revised Versions of the GNU Lesser General Public License. The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. Apache Arrow 5.0.0 Apache Thrift 0.13.0 Apache-2.0 License : Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. As dependences of Apache Arrow 5.0.0 : src/plasma/fling.cc and src/plasma/fling.h: Apache 2.0 Copyright 2013 Sharvil Nanavati Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- src/plasma/thirdparty/ae: Modified / 3-Clause BSD Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com> All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Redis nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- src/plasma/thirdparty/dlmalloc.c: CC0 This is a version (aka dlmalloc) of malloc/free/realloc written by Doug Lea and released to the public domain, as explained at http://creativecommons.org/publicdomain/zero/1.0/ Send questions, comments, complaints, performance data, etc to dl@cs.oswego.edu -------------------------------------------------------------------------------- src/plasma/common.cc (some portions) Copyright (c) Austin Appleby (aappleby (AT) gmail) Some portions of this file are derived from code in the MurmurHash project All code is released to the public domain. For business purposes, Murmurhash is under the MIT license. https://sites.google.com/site/murmurhash/ -------------------------------------------------------------------------------- src/arrow/util (some portions): Apache 2.0, and 3-clause BSD Some portions of this module are derived from code in the Chromium project, copyright (c) Google inc and (c) The Chromium Authors and licensed under the Apache 2.0 License or the under the 3-clause BSD license: Copyright (c) 2013 The Chromium Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- This project includes code from Daniel Lemire's FrameOfReference project. https://github.com/lemire/FrameOfReference/blob/6ccaf9e97160f9a3b299e23a8ef739e711ef0c71/src/bpacking.cpp https://github.com/lemire/FrameOfReference/blob/146948b6058a976bc7767262ad3a2ce201486b93/scripts/turbopacking64.py Copyright: 2013 Daniel Lemire Home page: http://lemire.me/en/ Project page: https://github.com/lemire/FrameOfReference License: Apache License Version 2.0 http://www.apache.org/licenses/LICENSE-2.0 -------------------------------------------------------------------------------- This project includes code from the TensorFlow project Copyright 2015 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- This project includes code from the NumPy project. https://github.com/numpy/numpy/blob/e1f191c46f2eebd6cb892a4bfe14d9dd43a06c4e/numpy/core/src/multiarray/multiarraymodule.c#L2910 https://github.com/numpy/numpy/blob/68fd82271b9ea5a9e50d4e761061dfcca851382a/numpy/core/src/multiarray/datetime.c Copyright (c) 2005-2017, NumPy Developers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the NumPy Developers nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- This project includes code from the Boost project Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- This project includes code from the FlatBuffers project Copyright 2014 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- This project includes code from the tslib project Copyright 2015 Microsoft Corporation. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- This project includes code from the jemalloc project https://github.com/jemalloc/jemalloc Copyright (C) 2002-2017 Jason Evans <jasone@canonware.com>. All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. Copyright (C) 2009-2017 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice(s), this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice(s), this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- This project includes code from the Go project, BSD 3-clause license + PATENTS weak patent termination clause (https://github.com/golang/go/blob/master/PATENTS). Copyright (c) 2009 The Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- This project includes code from the hs2client https://github.com/cloudera/hs2client Copyright 2016 Cloudera Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- The script ci/scripts/util_wait_for_it.sh has the following license Copyright (c) 2016 Giles Hall Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- The script r/configure has the following license (MIT) Copyright (c) 2017, Jeroen Ooms and Jim Hester Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- cpp/src/arrow/util/logging.cc, cpp/src/arrow/util/logging.h and cpp/src/arrow/util/logging-test.cc are adapted from Ray Project (https://github.com/ray-project/ray) (Apache 2.0). Copyright (c) 2016 Ray Project (https://github.com/ray-project/ray) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- The files cpp/src/arrow/vendored/datetime/date.h, cpp/src/arrow/vendored/datetime/tz.h, cpp/src/arrow/vendored/datetime/tz_private.h, cpp/src/arrow/vendored/datetime/ios.h, cpp/src/arrow/vendored/datetime/ios.mm, cpp/src/arrow/vendored/datetime/tz.cpp are adapted from Howard Hinnant's date library (https://github.com/HowardHinnant/date) It is licensed under MIT license. The MIT License (MIT) Copyright (c) 2015, 2016, 2017 Howard Hinnant Copyright (c) 2016 Adrian Colomitchi Copyright (c) 2017 Florian Dang Copyright (c) 2017 Paul Thompson Copyright (c) 2018 Tomasz Kami\u0144ski Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- The file cpp/src/arrow/util/utf8.h includes code adapted from the page https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ with the following license (MIT) Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- The file cpp/src/arrow/vendored/string_view.hpp has the following license Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- The files in cpp/src/arrow/vendored/xxhash/ have the following license (BSD 2-Clause License) xxHash Library Copyright (c) 2012-2014, Yann Collet All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - xxHash homepage: http://www.xxhash.com - xxHash source repository : https://github.com/Cyan4973/xxHash -------------------------------------------------------------------------------- The files in cpp/src/arrow/vendored/double-conversion/ have the following license (BSD 3-Clause License) Copyright 2006-2011, the V8 project authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- The files in cpp/src/arrow/vendored/uriparser/ have the following license (BSD 3-Clause License) uriparser - RFC 3986 URI parsing library Copyright (C) 2007, Weijia Song <songweijia@gmail.com> Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org> All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the <ORGANIZATION> nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- The files under dev/tasks/conda-recipes have the following license BSD 3-clause license Copyright (c) 2015-2018, conda-forge All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- The files in cpp/src/arrow/vendored/utfcpp/ have the following license Copyright 2006-2018 Nemanja Trifunovic Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- This project includes code from Apache Kudu. * cpp/cmake_modules/CompilerInfo.cmake is based on Kudu's cmake_modules/CompilerInfo.cmake Copyright: 2016 The Apache Software Foundation. Home page: https://kudu.apache.org/ License: http://www.apache.org/licenses/LICENSE-2.0 -------------------------------------------------------------------------------- This project includes code from Apache Impala (incubating), formerly Impala. The Impala code and rights were donated to the ASF as part of the Incubator process after the initial code imports into Apache Parquet. Copyright: 2012 Cloudera, Inc. Copyright: 2016 The Apache Software Foundation. Home page: http://impala.apache.org/ License: http://www.apache.org/licenses/LICENSE-2.0 -------------------------------------------------------------------------------- This project includes code from Apache Aurora. * dev/release/{release,changelog,release-candidate} are based on the scripts from Apache Aurora Copyright: 2016 The Apache Software Foundation. Home page: https://aurora.apache.org/ License: http://www.apache.org/licenses/LICENSE-2.0 -------------------------------------------------------------------------------- This project includes code from the Google styleguide. * cpp/build-support/cpplint.py is based on the scripts from the Google styleguide. Copyright: 2009 Google Inc. All rights reserved. Homepage: https://github.com/google/styleguide License: 3-clause BSD -------------------------------------------------------------------------------- This project includes code from Snappy. * cpp/cmake_modules/{SnappyCMakeLists.txt,SnappyConfig.h} are based on code from Google's Snappy project. Copyright: 2009 Google Inc. All rights reserved. Homepage: https://github.com/google/snappy License: 3-clause BSD -------------------------------------------------------------------------------- This project includes code from the manylinux project. * python/manylinux1/scripts/{build_python.sh,python-tag-abi-tag.py, requirements.txt} are based on code from the manylinux project. Copyright: 2016 manylinux Homepage: https://github.com/pypa/manylinux License: The MIT License (MIT) -------------------------------------------------------------------------------- This project includes code from the cymove project: * python/pyarrow/includes/common.pxd includes code from the cymove project The MIT License (MIT) Copyright (c) 2019 Omer Ozarslan Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- The projects includes code from the Ursabot project under the dev/archery directory. License: BSD 2-Clause Copyright 2019 RStudio, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- This project include code from mingw-w64. * cpp/src/arrow/util/cpu-info.cc has a polyfill for mingw-w64 < 5 Copyright (c) 2009 - 2013 by the mingw-w64 project Homepage: https://mingw-w64.org License: Zope Public License (ZPL) Version 2.1. --------------------------------------------------------------------------------- This project include code from Google's Asylo project. * cpp/src/arrow/result.h is based on status_or.h Copyright (c) Copyright 2017 Asylo authors Homepage: https://asylo.dev/ License: Apache 2.0 -------------------------------------------------------------------------------- This project includes code from Google's protobuf project * cpp/src/arrow/result.h ARROW_ASSIGN_OR_RAISE is based off ASSIGN_OR_RETURN * cpp/src/arrow/util/bit_stream_utils.h contains code from wire_format_lite.h Copyright 2008 Google Inc. All rights reserved. Homepage: https://developers.google.com/protocol-buffers/ License: Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Code generated by the Protocol Buffer compiler is owned by the owner of the input file used when generating it. This code is not standalone and requires a support library to be linked with it. This support library is itself covered by the above license. -------------------------------------------------------------------------------- 3rdparty dependency LLVM is statically linked in certain binary distributions. Additionally some sections of source code have been derived from sources in LLVM and have been clearly labeled as such. LLVM has the following license: ============================================================================== The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: ============================================================================== Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ---- LLVM Exceptions to the Apache 2.0 License ---- As an exception, if, as a result of your compiling your source code, portions of this Software are embedded into an Object form of such source code, you may redistribute such embedded portions in such Object form without complying with the conditions of Sections 4(a), 4(b) and 4(d) of the License. In addition, if you combine or link compiled forms of this Software with software that is licensed under the GPLv2 ("Combined Software") and if a court of competent jurisdiction determines that the patent provision (Section 3), the indemnity provision (Section 9) or other Section of the License conflicts with the conditions of the GPLv2, you may retroactively and prospectively choose to deem waived or otherwise exclude such Section(s) of the License, but only in their entirety and only with respect to the Combined Software. ============================================================================== Software from third parties included in the LLVM Project: ============================================================================== The LLVM Project contains third party software which is under different license terms. All such code will be identified clearly using at least one of two mechanisms: 1) It will be in a separate directory tree with its own `LICENSE.txt` or `LICENSE` file at the top containing the specific license and restrictions which apply to that software, or 2) It will contain specific license and restriction terms at the top of every file. -------------------------------------------------------------------------------- 3rdparty dependency gRPC is statically linked in certain binary distributions, like the python wheels. gRPC has the following license: Copyright 2014 gRPC authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- 3rdparty dependency Apache Thrift is statically linked in certain binary distributions, like the python wheels. Apache Thrift has the following license: Apache Thrift Copyright (C) 2006 - 2019, The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- 3rdparty dependency Apache ORC is statically linked in certain binary distributions, like the python wheels. Apache ORC has the following license: Apache ORC Copyright 2013-2019 The Apache Software Foundation This product includes software developed by The Apache Software Foundation (http://www.apache.org/). This product includes software developed by Hewlett-Packard: (c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- 3rdparty dependency zstd is statically linked in certain binary distributions, like the python wheels. ZSTD has the following license: BSD License For Zstandard software Copyright (c) 2016-present, Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name Facebook nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- 3rdparty dependency lz4 is statically linked in certain binary distributions, like the python wheels. lz4 has the following license: LZ4 Library Copyright (c) 2011-2016, Yann Collet All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- 3rdparty dependency Brotli is statically linked in certain binary distributions, like the python wheels. Brotli has the following license: Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- 3rdparty dependency rapidjson is statically linked in certain binary distributions, like the python wheels. rapidjson and its dependencies have the following licenses: Tencent is pleased to support the open source community by making RapidJSON available. Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. If you have downloaded a copy of the RapidJSON binary from Tencent, please note that the RapidJSON binary is licensed under the MIT License. If you have downloaded a copy of the RapidJSON source code from Tencent, please note that RapidJSON source code is licensed under the MIT License, except for the third-party components listed below which are subject to different license terms. Your integration of RapidJSON into your own projects may require compliance with the MIT License, as well as the other licenses applicable to the third-party components included within RapidJSON. To avoid the problematic JSON license in your own projects, it's sufficient to exclude the bin/jsonchecker/ directory, as it's the only code under the JSON license. A copy of the MIT License is included in this file. Other dependencies and licenses: Open Source Software Licensed Under the BSD License: -------------------------------------------------------------------- The msinttypes r29 Copyright (c) 2006-2013 Alexander Chemeris All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Open Source Software Licensed Under the JSON License: -------------------------------------------------------------------- json.org Copyright (c) 2002 JSON.org All Rights Reserved. JSON_checker Copyright (c) 2002 JSON.org All Rights Reserved. Terms of the JSON License: --------------------------------------------------- Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. The Software shall be used for Good, not Evil. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Terms of the MIT License: -------------------------------------------------------------------- Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- 3rdparty dependency snappy is statically linked in certain binary distributions, like the python wheels. snappy has the following license: Copyright 2011, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. === Some of the benchmark data in testdata/ is licensed differently: - fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and is licensed under the Creative Commons Attribution 3.0 license (CC-BY-3.0). See https://creativecommons.org/licenses/by/3.0/ for more information. - kppkn.gtb is taken from the Gaviota chess tablebase set, and is licensed under the MIT License. See https://sites.google.com/site/gaviotachessengine/Home/endgame-tablebases-1 for more information. - paper-100k.pdf is an excerpt (bytes 92160 to 194560) from the paper \u201cCombinatorial Modeling of Chromatin Features Quantitatively Predicts DNA Replication Timing in _Drosophila_\u201d by Federico Comoglio and Renato Paro, which is licensed under the CC-BY license. See http://www.ploscompbiol.org/static/license for more ifnormation. - alice29.txt, asyoulik.txt, plrabn12.txt and lcet10.txt are from Project Gutenberg. The first three have expired copyrights and are in the public domain; the latter does not have expired copyright, but is still in the public domain according to the license information (http://www.gutenberg.org/ebooks/53). -------------------------------------------------------------------------------- 3rdparty dependency gflags is statically linked in certain binary distributions, like the python wheels. gflags has the following license: Copyright (c) 2006, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- 3rdparty dependency glog is statically linked in certain binary distributions, like the python wheels. glog has the following license: Copyright (c) 2008, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. A function gettimeofday in utilities.cc is based on http://www.google.com/codesearch/p?hl=en#dR3YEbitojA/COPYING&q=GetSystemTimeAsFileTime%20license:bsd The license of this code is: Copyright (c) 2003-2008, Jouni Malinen <j@w1.fi> and contributors All Rights Reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name(s) of the above-listed copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- 3rdparty dependency re2 is statically linked in certain binary distributions, like the python wheels. re2 has the following license: Copyright (c) 2009 The RE2 Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- 3rdparty dependency c-ares is statically linked in certain binary distributions, like the python wheels. c-ares has the following license: # c-ares license Copyright (c) 2007 - 2018, Daniel Stenberg with many contributors, see AUTHORS file. Copyright 1998 by the Massachusetts Institute of Technology. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation, and that the name of M.I.T. not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. M.I.T. makes no representations about the suitability of this software for any purpose. It is provided "as is" without express or implied warranty. -------------------------------------------------------------------------------- 3rdparty dependency zlib is redistributed as a dynamically linked shared library in certain binary distributions, like the python wheels. In the future this will likely change to static linkage. zlib has the following license: zlib.h -- interface of the 'zlib' general purpose compression library version 1.2.11, January 15th, 2017 Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. Jean-loup Gailly Mark Adler jloup@gzip.org madler@alumni.caltech.edu -------------------------------------------------------------------------------- 3rdparty dependency openssl is redistributed as a dynamically linked shared library in certain binary distributions, like the python wheels. openssl preceding version 3 has the following license: LICENSE ISSUES ============== The OpenSSL toolkit stays under a double license, i.e. both the conditions of the OpenSSL License and the original SSLeay license apply to the toolkit. See below for the actual license texts. OpenSSL License --------------- /* ==================================================================== * Copyright (c) 1998-2019 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgment: * "This product includes software developed by the OpenSSL Project * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" * * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to * endorse or promote products derived from this software without * prior written permission. For written permission, please contact * openssl-core@openssl.org. * * 5. Products derived from this software may not be called "OpenSSL" * nor may "OpenSSL" appear in their names without prior written * permission of the OpenSSL Project. * * 6. Redistributions of any form whatsoever must retain the following * acknowledgment: * "This product includes software developed by the OpenSSL Project * for use in the OpenSSL Toolkit (http://www.openssl.org/)" * * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * ==================================================================== * * This product includes cryptographic software written by Eric Young * (eay@cryptsoft.com). This product includes software written by Tim * Hudson (tjh@cryptsoft.com). * */ Original SSLeay License ----------------------- /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * * This package is an SSL implementation written * by Eric Young (eay@cryptsoft.com). * The implementation was written so as to conform with Netscapes SSL. * * This library is free for commercial and non-commercial use as long as * the following conditions are aheared to. The following conditions * apply to all code found in this distribution, be it the RC4, RSA, * lhash, DES, etc., code; not just the SSL code. The SSL documentation * included with this distribution is covered by the same copyright terms * except that the holder is Tim Hudson (tjh@cryptsoft.com). * * Copyright remains Eric Young's, and as such any Copyright notices in * the code are not to be removed. * If this package is used in a product, Eric Young should be given attribution * as the author of the parts of the library used. * This can be in the form of a textual message at program startup or * in documentation (online or textual) provided with the package. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * "This product includes cryptographic software written by * Eric Young (eay@cryptsoft.com)" * The word 'cryptographic' can be left out if the rouines from the library * being used are not cryptographic related :-). * 4. If you include any Windows specific code (or a derivative thereof) from * the apps directory (application code) you must include an acknowledgement: * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" * * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * The licence and distribution terms for any publically available version or * derivative of this code cannot be changed. i.e. this code cannot simply be * copied and put under another distribution licence * [including the GNU Public Licence.] */ -------------------------------------------------------------------------------- This project includes code from the rtools-backports project. * ci/scripts/PKGBUILD and ci/scripts/r_windows_build.sh are based on code from the rtools-backports project. Copyright: Copyright (c) 2013 - 2019, \u0410\u043b\u0435\u043a\u0441\u0435\u0439 and Jeroen Ooms. All rights reserved. Homepage: https://github.com/r-windows/rtools-backports License: 3-clause BSD -------------------------------------------------------------------------------- Some code from pandas has been adapted for the pyarrow codebase. pandas is available under the 3-clause BSD license, which follows: pandas license ============== Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team All rights reserved. Copyright (c) 2008-2011 AQR Capital Management, LLC All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- Some bits from DyND, in particular aspects of the build system, have been adapted from libdynd and dynd-python under the terms of the BSD 2-clause license The BSD 2-Clause License Copyright (C) 2011-12, Dynamic NDArray Developers All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Dynamic NDArray Developers list: * Mark Wiebe * Continuum Analytics -------------------------------------------------------------------------------- Some source code from Ibis (https://github.com/cloudera/ibis) has been adapted for PyArrow. Ibis is released under the Apache License, Version 2.0. -------------------------------------------------------------------------------- This project includes code from the autobrew project. * r/tools/autobrew and dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb are based on code from the autobrew project. Copyright (c) 2019, Jeroen Ooms License: MIT Homepage: https://github.com/jeroen/autobrew -------------------------------------------------------------------------------- dev/tasks/homebrew-formulae/apache-arrow.rb has the following license: BSD 2-Clause License Copyright (c) 2009-present, Homebrew contributors All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ---------------------------------------------------------------------- cpp/src/arrow/vendored/base64.cpp has the following license ZLIB License Copyright (C) 2004-2017 Ren\u00e9 Nyffenegger This source code is provided 'as-is', without any express or implied warranty. In no event will the author be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this source code must not be misrepresented; you must not claim that you wrote the original source code. If you use this source code in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original source code. 3. This notice may not be removed or altered from any source distribution. Ren\u00e9 Nyffenegger rene.nyffenegger@adp-gmbh.ch -------------------------------------------------------------------------------- The file cpp/src/arrow/vendored/optional.hpp has the following license Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- This project includes code from Folly. * cpp/src/arrow/vendored/ProducerConsumerQueue.h is based on Folly's * folly/Portability.h * folly/lang/Align.h * folly/ProducerConsumerQueue.h Copyright: Copyright (c) Facebook, Inc. and its affiliates. Home page: https://github.com/facebook/folly License: http://www.apache.org/licenses/LICENSE-2.0 -------------------------------------------------------------------------------- The file cpp/src/arrow/vendored/musl/strptime.c has the following license Copyright \u00a9 2005-2020 Rich Felker, et al. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- The file cpp/cmake_modules/BuildUtils.cmake contains code from https://gist.github.com/cristianadam/ef920342939a89fae3e8a85ca9459b49 which is made available under the MIT license Copyright (c) 2019 Cristian Adam Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- The files in cpp/src/arrow/vendored/portable-snippets/ contain code from https://github.com/nemequ/portable-snippets and have the following copyright notice: Each source file contains a preamble explaining the license situation for that file, which takes priority over this file. With the exception of some code pulled in from other repositories (such as \u00b5nit, an MIT-licensed project which is used for testing), the code is public domain, released using the CC0 1.0 Universal dedication (*). (*) https://creativecommons.org/publicdomain/zero/1.0/legalcode -------------------------------------------------------------------------------- The files in cpp/src/arrow/vendored/fast_float/ contain code from https://github.com/lemire/fast_float which is made available under the Apache License 2.0. -------------------------------------------------------------------------------- The file python/pyarrow/vendored/version.py contains code from https://github.com/pypa/packaging/ which is made available under both the Apache license v2.0 and the BSD 2-clause license. -------------------------------------------------------------------------------- The files in cpp/src/arrow/vendored/pcg contain code from https://github.com/imneme/pcg-cpp and have the following copyright notice: Copyright 2014-2019 Melissa O'Neill <oneill@pcg-random.org>, and the PCG Project contributors. SPDX-License-Identifier: (Apache-2.0 OR MIT) Licensed under the Apache License, Version 2.0 (provided in LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) or under the MIT license (provided in LICENSE-MIT.txt and at http://opensource.org/licenses/MIT), at your option. This file may not be copied, modified, or distributed except according to those terms. Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either express or implied. See your chosen license for details. Boost 1.63 - Boost Software License: Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Google Protocol Buffers 3.3.0 - BSD 3-clause This license applies to all parts of Protocol Buffers except the following: - Atomicops support for generic gcc, located in src/google/protobuf/stubs/atomicops_internals_generic_gcc.h. This file is copyrighted by Red Hat Inc. - Atomicops support for AIX/POWER, located in src/google/protobuf/stubs/atomicops_internals_power.h. This file is copyrighted by Bloomberg Finance LP. Copyright 2014, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Code generated by the Protocol Buffer compiler is owned by the owner of the input file used when generating it. This code is not standalone and requires a support library to be linked with it. This support library is itself covered by the above license. Breakpad - BSD 3-clause Copyright (c) 2006, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. The libssh project & gstreamer GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. Linking with OpenSSL 17. In addition, as a special exception, we give permission to link the code of its release of libssh with the OpenSSL project's "OpenSSL" library (or with modified versions of it that use the same license as the "OpenSSL" library), and distribute the linked executables. You must obey the GNU Lesser General Public License in all respects for all of the code used other than "OpenSSL". If you modify this file, you may extend this exception to your version of the file, but you are not obligated to do so. If you do not wish to do so, delete this exception statement from your version. END OF TERMS AND CONDITIONS Copies of libssh source will be made available upon request in accordance with LPGL requirements. LZ4 - BSD license Copyright (C) 2011-2016, Yann Collet. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Fugue icons - Creative Commons Attribution 3.0 License Creative Commons Legal Code Attribution 3.0 Unported CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS LICENSE DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE INFORMATION PROVIDED, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM ITS USE. License THE WORK (AS DEFINED BELOW) IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE OR COPYRIGHT LAW IS PROHIBITED. BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. 1. Definitions a. "Adaptation" means a work based upon the Work, or upon the Work and other pre-existing works, such as a translation, adaptation, derivative work, arrangement of music or other alterations of a literary or artistic work, or phonogram or performance and includes cinematographic adaptations or any other form in which the Work may be recast, transformed, or adapted including in any form recognizably derived from the original, except that a work that constitutes a Collection will not be considered an Adaptation for the purpose of this License. For the avoidance of doubt, where the Work is a musical work, performance or phonogram, the synchronization of the Work in timed-relation with a moving image ("synching") will be considered an Adaptation for the purpose of this License. b. "Collection" means a collection of literary or artistic works, such as encyclopedias and anthologies, or performances, phonograms or broadcasts, or other works or subject matter other than works listed in Section 1(f) below, which, by reason of the selection and arrangement of their contents, constitute intellectual creations, in which the Work is included in its entirety in unmodified form along with one or more other contributions, each constituting separate and independent works in themselves, which together are assembled into a collective whole. A work that constitutes a Collection will not be considered an Adaptation (as defined above) for the purposes of this License. c. "Distribute" means to make available to the public the original and copies of the Work or Adaptation, as appropriate, through sale or other transfer of ownership. d. "Licensor" means the individual, individuals, entity or entities that offer(s) the Work under the terms of this License. e. "Original Author" means, in the case of a literary or artistic work, the individual, individuals, entity or entities who created the Work or if no individual or entity can be identified, the publisher; and in addition (i) in the case of a performance the actors, singers, musicians, dancers, and other persons who act, sing, deliver, declaim, play in, interpret or otherwise perform literary or artistic works or expressions of folklore; (ii) in the case of a phonogram the producer being the person or legal entity who first fixes the sounds of a performance or other sounds; and, (iii) in the case of broadcasts, the organization that transmits the broadcast. f. "Work" means the literary and/or artistic work offered under the terms of this License including without limitation any production in the literary, scientific and artistic domain, whatever may be the mode or form of its expression including digital form, such as a book, pamphlet and other writing; a lecture, address, sermon or other work of the same nature; a dramatic or dramatico-musical work; a choreographic work or entertainment in dumb show; a musical composition with or without words; a cinematographic work to which are assimilated works expressed by a process analogous to cinematography; a work of drawing, painting, architecture, sculpture, engraving or lithography; a photographic work to which are assimilated works expressed by a process analogous to photography; a work of applied art; an illustration, map, plan, sketch or three-dimensional work relative to geography, topography, architecture or science; a performance; a broadcast; a phonogram; a compilation of data to the extent it is protected as a copyrightable work; or a work performed by a variety or circus performer to the extent it is not otherwise considered a literary or artistic work. g. "You" means an individual or entity exercising rights under this License who has not previously violated the terms of this License with respect to the Work, or who has received express permission from the Licensor to exercise rights under this License despite a previous violation. h. "Publicly Perform" means to perform public recitations of the Work and to communicate to the public those public recitations, by any means or process, including by wire or wireless means or public digital performances; to make available to the public Works in such a way that members of the public may access these Works from a place and at a place individually chosen by them; to perform the Work to the public by any means or process and the communication to the public of the performances of the Work, including by public digital performance; to broadcast and rebroadcast the Work by any means including signs, sounds or images. i. "Reproduce" means to make copies of the Work by any means including without limitation by sound or visual recordings and the right of fixation and reproducing fixations of the Work, including storage of a protected performance or phonogram in digital form or other electronic medium. 2. Fair Dealing Rights. Nothing in this License is intended to reduce, limit, or restrict any uses free from copyright or rights arising from limitations or exceptions that are provided for in connection with the copyright protection under copyright law or other applicable laws. 3. License Grant. Subject to the terms and conditions of this License, Licensor hereby grants You a worldwide, royalty-free, non-exclusive, perpetual (for the duration of the applicable copyright) license to exercise the rights in the Work as stated below: a. to Reproduce the Work, to incorporate the Work into one or more Collections, and to Reproduce the Work as incorporated in the Collections; b. to create and Reproduce Adaptations provided that any such Adaptation, including any translation in any medium, takes reasonable steps to clearly label, demarcate or otherwise identify that changes were made to the original Work. For example, a translation could be marked "The original work was translated from English to Spanish," or a modification could indicate "The original work has been modified."; c. to Distribute and Publicly Perform the Work including as incorporated in Collections; and, d. to Distribute and Publicly Perform Adaptations. e. For the avoidance of doubt: i. Non-waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme cannot be waived, the Licensor reserves the exclusive right to collect such royalties for any exercise by You of the rights granted under this License; ii. Waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme can be waived, the Licensor waives the exclusive right to collect such royalties for any exercise by You of the rights granted under this License; and, iii. Voluntary License Schemes. The Licensor waives the right to collect royalties, whether individually or, in the event that the Licensor is a member of a collecting society that administers voluntary licensing schemes, via that society, from any exercise by You of the rights granted under this License. The above rights may be exercised in all media and formats whether now known or hereafter devised. The above rights include the right to make such modifications as are technically necessary to exercise the rights in other media and formats. Subject to Section 8(f), all rights not expressly granted by Licensor are hereby reserved. 4. Restrictions. The license granted in Section 3 above is expressly made subject to and limited by the following restrictions: a. You may Distribute or Publicly Perform the Work only under the terms of this License. You must include a copy of, or the Uniform Resource Identifier (URI) for, this License with every copy of the Work You Distribute or Publicly Perform. You may not offer or impose any terms on the Work that restrict the terms of this License or the ability of the recipient of the Work to exercise the rights granted to that recipient under the terms of the License. You may not sublicense the Work. You must keep intact all notices that refer to this License and to the disclaimer of warranties with every copy of the Work You Distribute or Publicly Perform. When You Distribute or Publicly Perform the Work, You may not impose any effective technological measures on the Work that restrict the ability of a recipient of the Work from You to exercise the rights granted to that recipient under the terms of the License. This Section 4(a) applies to the Work as incorporated in a Collection, but this does not require the Collection apart from the Work itself to be made subject to the terms of this License. If You create a Collection, upon notice from any Licensor You must, to the extent practicable, remove from the Collection any credit as required by Section 4(b), as requested. If You create an Adaptation, upon notice from any Licensor You must, to the extent practicable, remove from the Adaptation any credit as required by Section 4(b), as requested. b. If You Distribute, or Publicly Perform the Work or any Adaptations or Collections, You must, unless a request has been made pursuant to Section 4(a), keep intact all copyright notices for the Work and provide, reasonable to the medium or means You are utilizing: (i) the name of the Original Author (or pseudonym, if applicable) if supplied, and/or if the Original Author and/or Licensor designate another party or parties (e.g., a sponsor institute, publishing entity, journal) for attribution ("Attribution Parties") in Licensor's copyright notice, terms of service or by other reasonable means, the name of such party or parties; (ii) the title of the Work if supplied; (iii) to the extent reasonably practicable, the URI, if any, that Licensor specifies to be associated with the Work, unless such URI does not refer to the copyright notice or licensing information for the Work; and (iv) , consistent with Section 3(b), in the case of an Adaptation, a credit identifying the use of the Work in the Adaptation (e.g., "French translation of the Work by Original Author," or "Screenplay based on original Work by Original Author"). The credit required by this Section 4 (b) may be implemented in any reasonable manner; provided, however, that in the case of a Adaptation or Collection, at a minimum such credit will appear, if a credit for all contributing authors of the Adaptation or Collection appears, then as part of these credits and in a manner at least as prominent as the credits for the other contributing authors. For the avoidance of doubt, You may only use the credit required by this Section for the purpose of attribution in the manner set out above and, by exercising Your rights under this License, You may not implicitly or explicitly assert or imply any connection with, sponsorship or endorsement by the Original Author, Licensor and/or Attribution Parties, as appropriate, of You or Your use of the Work, without the separate, express prior written permission of the Original Author, Licensor and/or Attribution Parties. c. Except as otherwise agreed in writing by the Licensor or as may be otherwise permitted by applicable law, if You Reproduce, Distribute or Publicly Perform the Work either by itself or as part of any Adaptations or Collections, You must not distort, mutilate, modify or take other derogatory action in relation to the Work which would be prejudicial to the Original Author's honor or reputation. Licensor agrees that in those jurisdictions (e.g. Japan), in which any exercise of the right granted in Section 3(b) of this License (the right to make Adaptations) would be deemed to be a distortion, mutilation, modification or other derogatory action prejudicial to the Original Author's honor and reputation, the Licensor will waive or not assert, as appropriate, this Section, to the fullest extent permitted by the applicable national law, to enable You to reasonably exercise Your right under Section 3(b) of this License (right to make Adaptations) but not otherwise. 5. Representations, Warranties and Disclaimer UNLESS OTHERWISE MUTUALLY AGREED TO BY THE PARTIES IN WRITING, LICENSOR OFFERS THE WORK AS-IS AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE WORK, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTIBILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OF ABSENCE OF ERRORS, WHETHER OR NOT DISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OF IMPLIED WARRANTIES, SO SUCH EXCLUSION MAY NOT APPLY TO YOU. 6. Limitation on Liability. EXCEPT TO THE EXTENT REQUIRED BY APPLICABLE LAW, IN NO EVENT WILL LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES ARISING OUT OF THIS LICENSE OR THE USE OF THE WORK, EVEN IF LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 7. Termination a. This License and the rights granted hereunder will terminate automatically upon any breach by You of the terms of this License. Individuals or entities who have received Adaptations or Collections from You under this License, however, will not have their licenses terminated provided such individuals or entities remain in full compliance with those licenses. Sections 1, 2, 5, 6, 7, and 8 will survive any termination of this License. b. Subject to the above terms and conditions, the license granted here is perpetual (for the duration of the applicable copyright in the Work). Notwithstanding the above, Licensor reserves the right to release the Work under different license terms or to stop distributing the Work at any time; provided, however that any such election will not serve to withdraw this License (or any other license that has been, or is required to be, granted under the terms of this License), and this License will continue in full force and effect unless terminated as stated above. 8. Miscellaneous a. Each time You Distribute or Publicly Perform the Work or a Collection, the Licensor offers to the recipient a license to the Work on the same terms and conditions as the license granted to You under this License. b. Each time You Distribute or Publicly Perform an Adaptation, Licensor offers to the recipient a license to the original Work on the same terms and conditions as the license granted to You under this License. c. If any provision of this License is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this License, and without further action by the parties to this agreement, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable. d. No term or provision of this License shall be deemed waived and no breach consented to unless such waiver or consent shall be in writing and signed by the party to be charged with such waiver or consent. e. This License constitutes the entire agreement between the parties with respect to the Work licensed here. There are no understandings, agreements or representations with respect to the Work not specified here. Licensor shall not be bound by any additional provisions that may appear in any communication from You. This License may not be modified without the mutual written agreement of the Licensor and You. f. The rights granted under, and the subject matter referenced, in this License were drafted utilizing the terminology of the Berne Convention for the Protection of Literary and Artistic Works (as amended on September 28, 1979), the Rome Convention of 1961, the WIPO Copyright Treaty of 1996, the WIPO Performances and Phonograms Treaty of 1996 and the Universal Copyright Convention (as revised on July 24, 1971). These rights and subject matter take effect in the relevant jurisdiction in which the License terms are sought to be enforced according to the corresponding provisions of the implementation of those treaty provisions in the applicable national law. If the standard suite of rights granted under applicable copyright law includes additional rights not granted under this License, such additional rights are deemed to be included in the License; this License is not intended to restrict the license of any rights under applicable law. Creative Commons Notice Creative Commons is not a party to this License, and makes no warranty whatsoever in connection with the Work. Creative Commons will not be liable to You or any party on any legal theory for any damages whatsoever, including without limitation any general, special, incidental or consequential damages arising in connection to this license. Notwithstanding the foregoing two (2) sentences, if Creative Commons has expressly identified itself as the Licensor hereunder, it shall have all rights and obligations of Licensor. Except for the limited purpose of indicating to the public that the Work is licensed under the CCPL, Creative Commons does not authorize the use by either party of the trademark "Creative Commons" or any related trademark or logo of Creative Commons without the prior written consent of Creative Commons. Any permitted use will be in compliance with Creative Commons' then-current trademark usage guidelines, as may be published on its website or otherwise made available upon request from time to time. For the avoidance of doubt, this trademark restriction does not form part of this License. Creative Commons may be contacted at https://creativecommons.org/. Simpleperf & n.eko & libbacktrace - Modified under the terms of the Apache License: Copyright (c) 2015, The Android Open Source Project Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS libunwind & ncurses Copyright (C) 1996 X Consortium Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Except as contained in this notice, the name of the X Consortium shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization from the X Consortium. X Window System is a trademark of X Consortium, Inc. liblzma XZ Utils Licensing Different licenses apply to different files in this package. Here is a rough summary of which licenses apply to which parts of this package (but check the individual files to be sure!): -liblzma is in the public domain. -xz, xzdec, and lzmadec command line tools are in the public domain unless GNU getopt_long had to be compiled and linked in from the lib directory. The getopt_long code is under GNU LGPLv2.1+. -The scripts to grep, diff, and view compressed files have been adapted from gzip. These scripts and their documentation are under GNU GPLv2+. -All the documentation in the doc directory and most of the XZ Utils specific documentation files in other directories are in the public domain. -Translated messages are in the public domain. -The build system contains public domain files, and files that are under GNU GPLv2+ or GNU GPLv3+. None of these files end up in the binaries being built. -Test files and test code in the tests directory, and debugging utilities in the debug directory are in the public domain. -The extra directory may contain public domain files, and files that are under various free software licenses. You can do whatever you want with the files that have been put into the public domain. If you find public domain legally problematic, take the previous sentence as a license grant. If you still find the lack of copyright legally problematic, you have too many lawyers. As usual, this software is provided "as is", without any warranty. If you copy significant amounts of public domain code from XZ Utils into your project, acknowledging this somewhere in your software is polite (especially if it is proprietary, non-free software), but naturally it is not legally required. Here is an example of a good notice to put into "about box" or into documentation: This software includes code from XZ Utils https://tukaani.org/xz/. The following license texts are included in the following files: -COPYING.LGPLv2.1: GNU Lesser General Public License version 2.1 -COPYING.GPLv2: GNU General Public License version 2 -COPYING.GPLv3: GNU General Public License version 3 Note that the toolchain (compiler, linker etc.) may add some code pieces that are copyrighted. Thus, it is possible that e.g. liblzma binary wouldn't actually be in the public domain in its entirety even though it contains no copyrighted code from the XZ Utils source package. If you have questions, don't hesitate to ask the author(s) for more information. Mesa 3D Graphics Library - MIT license Copyright (C) 1999-2007 Brian Paul All Rights Reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. MinHook - The Minimalistic API Hooking Library for x64/x86 - BSD license Copyright (C) 2009-2017 Tsuda Kageyu. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. XCB util-keysyms Copyright \u00a9 2008 Ian Osgood <iano@quirkster.com> Copyright \u00a9 2008 Jamey Sharp <jamey@minilop.net> Copyright \u00a9 2008 Josh Triplett <josh@freedesktop.org> Copyright \u00a9 2008 Ulrich Eckhardt <doomster@knuut.de> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Except as contained in this notice, the names of the authors or their institutions shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization from the authors openh264 Copyright (c) 2013, Cisco Systems All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.", "keywords": []}, {"id": 4, "doc_id": 4, "filename": "CopyrightAndLicenses/index.html", "domain_name": "std", "name": "CopyrightAndLicenses/index", "display_name": "Copyright and Licenses", "type": "doc", "display_type": "Page", "docname": "CopyrightAndLicenses/index", "anchor": "", "priority": -1, "content": "Nsight Systems nsys Nsight Systems Workstation Edition Nsight Systems Embedded Platforms Edition 2023-11-11 11 November 2023 2023.4.1 2023.4 2023 4 2023.4 Nsight Systems Copyright and Licenses. Information on the NVIDIA Software License Agreement as well as third party software and tools used by Nsight Systems.", "keywords": []}, {"id": 5, "doc_id": 17, "filename": "InstallationGuide/index.html", "domain_name": "page", "name": "InstallationGuide/index#cuda-version", "display_name": "CUDA Version", "type": "section", "display_type": "Page section", "docname": "InstallationGuide/index", "anchor": "cuda-version", "priority": -1, "content": "Nsight Systems supports CUDA 10.0, 10.1, 10.2, and 11.X for most platforms Nsight Systems on Arm SBSA supports 10.2 and 11.X Note that CUDA version and driver version must be compatible. CUDA Version Driver minimum version 11.0 450 10.2 440.30 10.1 418.39 10.0 410.48 From CUDA 11.X on, any driver from 450 on will be supported, although new features introduced in more recent drivers will not be available. For information about which drivers were specifically released with each toolkit, see CUDA Toolkit Release Notes - Major Component Versions", "keywords": []}, {"id": 6, "doc_id": 17, "filename": "InstallationGuide/index.html", "domain_name": "page", "name": "InstallationGuide/index#finding-the-right-package", "display_name": "Finding the Right Package", "type": "section", "display_type": "Page section", "docname": "InstallationGuide/index", "anchor": "finding-the-right-package", "priority": -1, "content": "Nsight Systems is available for multiple targets and multiple host OSs. To choose the right package, first consider the target system to be analyzed. For Tegra target systems, select Nsight Systems Embedded Platforms Edition available as part of NVIDIA JetPack SDK . For x86_64, IBM Power target systems, or Arm SBSA select from the target packages from Nsight Systems Workstation Edition , available from https://developer.nvidia.com/nsight-systems . This web release will always contain the latest and greatest Nsight Systems features. The x86_64, IBM Power, and Arm SBSA target versions of Nsight Systems are also available in the CUDA Toolkit. Each package is limited to one architecture. For example, Tegra packages do not contain support for profiling x86 targets, and x86 packages do not contain support for profiling Tegra targets. After choosing an appropriate target version, select the package corresponding to the host OS, the OS on the system where results will be viewed. These packages are in the form of common installer types: .msi for Windows; .run, .rpm, and .deb for x86 Linux; .deb and .rpm for Linux on IBM Power; and .dmg for the macOS installer. Note: the IBM Power and Arm SBSA packages do not have a GUI for visualization of the result. If you wish to visualize your result, please download and install the GUI available for macOS, x86_64 Linux, or Windows systems. Tegra packages Windows host - Install .msi on Windows machine. Enables remote access to Tegra device for profiling. Linux host - Install .run on Linux system. Enables remote access to Tegra device for profiling. macOS host - Install .dmg on macOS machine. Enables remote access to Tegra device for profiling. x86_64 packages Windows host - Install .msi on Windows machine. Enables remote access to Linux x86_64 or Windows devices for profiling as well as running on local system. Linux host - Install .run, .rpm, or .deb on Linux system. Enables remote access to Linux x86_64 or Windows devices for profiling or running collection on localhost. Linux CLI only - The Linux CLI is shipped in all x86 packages, but if you just want the CLI, we have a package for that. Install .deb on Linux system. Enables only CLI collection, report can be imported or opened in x86_64 host. macOS host - Install .dmg on macOS machine. Enables remote access to Linux x86_64 device for profiling. IBM Power packages Power CLI only - The IBM Power support does not include a host GUI. Install .deb or .rpm on your Power system. Enables only CLI collection, report can be imported or opened in GUI on any supported host platform. Arm SBSA packages Arm SBSA CLI only - Arm SBSA support does not include a host GUI. Install .deb or .rpm on your Arm SBSA system. Enables only CLI collection, report can be imported or opened in GUI on any supported host platform.", "keywords": []}, {"id": 7, "doc_id": 17, "filename": "InstallationGuide/index.html", "domain_name": "page", "name": "InstallationGuide/index#host-application-requirements", "display_name": "Host Application Requirements", "type": "section", "display_type": "Page section", "docname": "InstallationGuide/index", "anchor": "host-application-requirements", "priority": -1, "content": "The Nsight Systems host application runs on the following host platforms: Windows 10, Windows Server 2019. Only 64-bit versions are supported. Linux Ubuntu 14.04 and higher are known to work, running on other modern distributions should be possible as well. Only 64-bit versions are supported. OS X 10.10 \u201cYosemite\u201d and higher.", "keywords": []}, {"id": 8, "doc_id": 17, "filename": "InstallationGuide/index.html", "domain_name": "page", "name": "InstallationGuide/index#installing-gui-on-the-host-system", "display_name": "Installing GUI on the Host System", "type": "section", "display_type": "Page section", "docname": "InstallationGuide/index", "anchor": "installing-gui-on-the-host-system", "priority": -1, "content": "Copy the appropriate file to your host system in a directory where you have write and execute permissions. Run the install file, accept the EULA, and Nsight Systems will install on your system. On Linux, there are special options to enable automated installation. Running the installer with the --accept flag will automatically accept the EULA, running with the --accept flag and the --quiet flag will automatically accept the EULA without printing to stdout. Running with --quiet without --accept will display an error. The installation will create a Host directory for this host and a Target directory for each target this Nsight Systems package supports. All binaries needed to collect data on a target device will be installed on the target by the host on first connection to the device. There is no need to install the package on the target device. If installing from the CUDA Toolkit, see the CUDA Toolkit documentation .", "keywords": []}, {"id": 9, "doc_id": 17, "filename": "InstallationGuide/index.html", "domain_name": "page", "name": "InstallationGuide/index#installing-multi-report-analysis-system", "display_name": "Installing Multi Report Analysis System", "type": "section", "display_type": "Page section", "docname": "InstallationGuide/index", "anchor": "installing-multi-report-analysis-system", "priority": -1, "content": "PREVIEW FEATURE The Nsight Systems multi-report analysis system can be located in the <install-dir>/target-linux-x64/python/packages directory. For this initial preview release, multi-node analysis is only available to run recipes on Linux targets, and only available to visualize on Linux or Windows hosts. Recipe Dependencies The system is written in Python and depends on a set of Python packages. The prerequisites are Python 3.6 or newer with pip and venv. If you don\u2019t have Python, you can install it from python.org or your Linux package manager. Pip/venv on Ubuntu If pip/venv were not installed with Python, run: $ sudo apt-get install python3-pip $ sudo apt-get install python3-venv On a fresh Ubuntu install, we will need to run the following before the above commands: $ sudo apt-get update The dependent packages can either be installed automatically by an automated script or manually. Automated script The <install-dir>/target-linux-x64/python/packages/nsys_recipe/install.py script automates the installation of the recipe dependencies. You must select either the --current or --venv PATH option when you run the script. Options: -h : Display help --current : Install packages in the current environment. If a venv is active, packages will be installed there. Otherwise, packages will be installed in the system site-packages directory. It enables usage of nsys recipe without having to source a virtual environment. However, new packages risk colliding with existing ones if different versions are required. --venv PATH : Install packages in a virtual environment. If it doesn\u2019t already exist, it is created. It prevents risk of package collision in the current environment but requires the virtual environment to be activated before running nsys recipe . --tar : download wheel packages online and tar them --untar : untar the wheel packages and install --python : change the python executable (default is python3) --no-jupyter : do not install requirements for the jupyter notebook --no-dask : do not install requirements for Dask If --tar or --untar option wasn\u2019t specified, the script will directly download the pip packages from the internet. Manual steps If you would rather install the dependencies manually, please follow the following steps: Create a virtual environment We recommend creating a virtual environment to avoid installing packages directly into your system Python. The commands create the virtual environment in the current working directory. See venv - python doc To create a venv named recipe_env: $ python3 -m venv recipe_env $ source recipe_env/bin/activate List of dependencies We have three files located in <install-dir>/target-linux-x64/python/packages/nsys_recipe/requirements for the dependencies: Common.txt (required): dependencies needed by all recipes Dask.txt (optional): dependencies needed by the Dask mode Jupyter.txt (optional): dependencies needed to open the Jupyter notebook One-step installation The following command will install all dependencies for CLI and GUI. Please note that you will want to activate your venv first as described above, otherwise the modules will not be available in the venv. $ python3 -m pip install -r nsys_recipe/requirements/dask.txt -r nsys_recipe/requirements/common.txt -r nsys_recipe/requirements/jupyter.txt Two-step installation (for machines without internet) If you wish to download the dependencies on a machine without internet, you can download the wheel packages on a machine with internet, transfer them to the target machine and install the packages there. On the machine with internet: $ python3 -m pip download -r nsys_recipe/requirements/dask.txt -r nsys_recipe/requirements/common.txt -r nsys_recipe/requirements/jupyter.txt -d \u201crecipe-deps\u201d $ tar -cvfz recipe-deps.tar.gz recipe-deps On the machine with no internet: $ tar -xvfz recipe-deps.tar.gz $ python3 -m pip install recipe-deps/* --no-index Jupyter Notebook The Nsight Systems UI has the ability to internally load a Jupyter notebook. It uses the Jupyter notebook installation associated with the Python on your $PATH, which is expected to be the Python installed into the virtual environment created in the earlier steps of this guide. If Jupyter is installed in a different location, you can add a third variable to the config.ini file that will override the default path to Jupyter: JupyterPythonExe=\u201c/path/to/recipe_env/bin/python\u201d This config.ini file should be placed in <install_dir>/host-linux-x64 Note that on Windows, the path should use Windows slashes and they must be double slashes: JupyterPythonExe=\u201cc:\\\\path\\\\to\\\\recipe_env\\\\bin\\\\python.exe\u201d", "keywords": []}, {"id": 10, "doc_id": 17, "filename": "InstallationGuide/index.html", "domain_name": "page", "name": "InstallationGuide/index#launching-the-gui", "display_name": "Launching the GUI", "type": "section", "display_type": "Page section", "docname": "InstallationGuide/index", "anchor": "launching-the-gui", "priority": -1, "content": "Depending on your OS, Nsight Systems will have installed an icon on your host desktop that you can use to launch the GUI. To launch the GUI directly, run the nsys-ui executable in the Host sub-directory of your installation.", "keywords": []}, {"id": 11, "doc_id": 17, "filename": "InstallationGuide/index.html", "domain_name": "page", "name": "InstallationGuide/index#optional-setting-up-the-cli", "display_name": "Optional: Setting up the CLI", "type": "section", "display_type": "Page section", "docname": "InstallationGuide/index", "anchor": "optional-setting-up-the-cli", "priority": -1, "content": "All Nsight Systems targets can be profiled using the CLI. IBM Power and Arm SBSA targets can only be profiled using the CLI. The CLI is especially helpful when scripts are used to run unattended collections or when access to the target system via ssh is not possible. In particular, this can be used to enable collection in a Docker container. The CLI can be found in the Target directory of the Nsight Systems installation. Users who want to install the CLI as a standalone tool can do so by copying the files within the Target directory to the location of their choice. If you wish to run the CLI without root (recommended mode) you will want to install in a directory where you have full access. Once you have the CLI set up, you can use the nsys status -e command to check your environment. ~$ nsys status -e Sampling Environment Check Linux Kernel Paranoid Level = 1: OK Linux Distribution = Ubuntu Linux Kernel Version = 4.15.0-109-generic: OK Linux perf_event_open syscall available: OK Sampling trigger event available: OK Intel(c) Last Branch Record support: Available Sampling Environment: OK This status check allows you to ensure that the system requirements for CPU sampling using Nsight Systems are met in your local environment. If the Sampling Environment is not OK, you will still be able to run various trace operations. Intel(c) Last Branch Record allows tools, including Nsight Systems to use hardware to quickly get limited stack information. Nsight Systems will use this method for stack resolution by default if available. For information about changing these environment settings, see System Requirements section in the Installation Guide. For information about changing the backtrace method, see Profiling from the CLI in the User Guide. To get started using the CLI, run nsys --help for a list of options or see Profiling Applications from the CLI in the User Guide for full documentation.", "keywords": []}, {"id": 12, "doc_id": 17, "filename": "InstallationGuide/index.html", "domain_name": "page", "name": "InstallationGuide/index#overview", "display_name": "Overview", "type": "section", "display_type": "Page section", "docname": "InstallationGuide/index", "anchor": "overview", "priority": -1, "content": "Nsight Systems is a statistical sampling profiler with tracing features. It is designed to work with devices and devkits based on NVIDIA Tegra SoCs (system-on-chip), Arm SBSA (server based system architecture) systems, IBM Power systems, and systems based on the x86_64 processor architecture that also include NVIDIA GPU(s). Throughout this document we will refer to the device on which profiling happens as the target , and the computer on which the user works and controls the profiling session as the host . Note that for x86_64 based systems these may be on the same device, whereas with Tegra, Arm, or IBM Power based systems they will always be separate. Furthermore, three different activities are distinguished as follows: Profiling \u2014 The process of collecting any performance data. A profiling session in Nsight Systems typically includes sampling and tracing. Sampling \u2014 The process of periodically stopping the profilee (the application under investigation during the profiling session), typically to collect backtraces (call stacks of active threads), which allows you to understand statistically how much time is spent in each function. Additionally, hardware counters can also be sampled. This process is inherently imprecise when a low number of samples have been collected. Tracing \u2014 The process of collecting precise information about various activities happening in the profilee or in the system. For example, profilee API execution may be traced providing the exact time and duration of a function call. Nsight Systems supports multiple generations of Tegra SoCs, NVIDIA discrete GPUs, and various CPU architectures, as well as various target and host operating systems. This documentation describes the full set of features available in any version of Nsight Systems . In the event that a feature is not available in all versions, that will be noted in the text. In general, Nsight Systems Embedded Platforms Edition indicates the package that supports Tegra processors for the embedded and automotive market and Nsight Systems Workstation Edition supports x86_64, IBM Power, and Arm server (SBSA) processors for the workstation and cluster market. Common features that are supported by Nsight Systems on most platforms include the following: Sampling of the profilee and collecting backtraces using multiple algorithms (such as frame pointers or DWARF data). Building top-down, bottom-up, and flat views as appropriate. This information helps identify performance bottlenecks in CPU-intensive code. Sampling or tracing system power behaviors, such as CPU frequency. (Only on Nsight Systems Embedded Platforms Edition )Sampling counters from Arm PMU (Performance Monitoring Unit). Information such as cache misses gets statistically correlated with function execution. Support for multiple windows. Users with multiple monitors can see multiple reports simultaneously, or have multiple views into the same report file. With Nsight Systems , a user could: Identify call paths that monopolize the CPU. Identify individual functions that monopolize the CPU (across different call paths). For Nsight Systems Embedded Platforms Edition , identify functions that have poor cache utilization. If platform supports CUDA, see visual representation of CUDA Runtime and Driver API calls, as well as CUDA GPU workload. Nsight Systems uses the CUDA Profiling Tools Interface (CUPTI), for more information, see: CUPTI documentation . If the user annotates with NVIDIA Tools Extension (NVTX), see visual representation of NVTX annotations: ranges, markers, and thread names. For Windows targets, see visual representation of D3D12: which API calls are being made on the CPU, graphic frames, stutter analysis, as well as GPU workloads (command lists and debug ranges). For x86_64 targets, see visual representation of Vulkan: which API calls are being made on the CPU, graphic frames, stutter analysis, as well as Vulkan GPU workloads (command buffers and debug ranges).", "keywords": []}, {"id": 13, "doc_id": 17, "filename": "InstallationGuide/index.html", "domain_name": "page", "name": "InstallationGuide/index#requirements-for-x86-64-power-and-arm-sbsa-targets-on-linux", "display_name": "Requirements for x86_64, Power, and Arm SBSA Targets on Linux", "type": "section", "display_type": "Page section", "docname": "InstallationGuide/index", "anchor": "requirements-for-x86-64-power-and-arm-sbsa-targets-on-linux", "priority": -1, "content": "When attaching to x86_64, Power, or Arm SBSA Linux-based target from the GUI on the host, the connection is established through SSH. Use of Linux Perf : To collect thread scheduling data and IP (instruction pointer) samples, the Linux operating system\u2019s perf_event_paranoid level must be 2 or less. Use the following command to check: cat /proc/sys/kernel/perf_event_paranoid If the output is >2, then do the following to temporarily adjust the paranoid level (note that this has to be done after each reboot): sudo sh -c 'echo 2 >/proc/sys/kernel/perf_event_paranoid' To make the change permanent, use the following command: sudo sh -c 'echo kernel.perf_event_paranoid=2 > /etc/sysctl.d/local.conf' Kernel version : To collect thread scheduling data and IP (instruction pointer) samples and backtraces, the kernel version must be: 3.10.0-693 or later for CentOS and RedHat Enterprise Linux 7.4+ 4.3 or greater for all other distros including Ubuntu To check the version number of the kernel on a target device, run the following command on the device: uname -a Note that only CentOS, RedHat, and Ubuntu distros are tested/confirmed to work correctly. glibc version : To check the glibc version on a target device, run the following command: ldd --version Nsight Systems requires glibc 2.17 or more recent. CUDA : See above for supported CUDA versions in this release. Use the deviceQuery command to determine the CUDA driver and runtime versions on the system. the deviceQuery command is available in the CUDA SDK. It is normally installed at: /usr/local/cuda/samples/1_Utilities/deviceQuery Only pure 64-bit environments are supported. In other words, 32-bit systems or 32-bit processes running within a 64-bit environment are not supported. Nsight Systems requires write permission to the /var/lock directory on the target system. Docker : See Collecting Data within a Docker section of the User Guide for more information.", "keywords": []}, {"id": 14, "doc_id": 17, "filename": "InstallationGuide/index.html", "domain_name": "page", "name": "InstallationGuide/index#supported-platforms", "display_name": "Supported Platforms", "type": "section", "display_type": "Page section", "docname": "InstallationGuide/index", "anchor": "supported-platforms", "priority": -1, "content": "Depending on your OS, different GPUs are supported L4T (Linux for Tegra) Jetson AGX Xavier Jetson TX2 Jetson TX2i Jetson TX Jetson Nano Jetson Xavier NX x86_64, IBM Power (from Power 9), or Arm SBSA NVIDIA GPU architectures starting with Pascal OS (64 bit only) Ubuntu 18.04, 20.04, and 22.04 CentOS and RedHat Enterprise Linux 7.4+ with kernel version 3.10.0-693 or later. Windows 10, 11, and Win Server 2022", "keywords": []}, {"id": 15, "doc_id": 17, "filename": "InstallationGuide/index.html", "domain_name": "page", "name": "InstallationGuide/index#system-requirements", "display_name": "System Requirements", "type": "section", "display_type": "Page section", "docname": "InstallationGuide/index", "anchor": "system-requirements", "priority": -1, "content": "Nsight Systems supports multiple platforms. For simplicity, think of these as Nsight Systems Embedded Platforms Edition and Nsight Systems Workstation Edition , where Nsight Systems Workstation Edition supports desktops, workstations, and clusters with x86_64, IBM Power, and Arm SBSA CPUs on Linux and Windows OSs, while Nsight Systems Embedded Platforms Edition supports NVIDIA Tegra products for the embedded and gaming space on Linux for Tegra and QNX OSs.", "keywords": []}, {"id": 16, "doc_id": 17, "filename": "InstallationGuide/index.html", "domain_name": "page", "name": "InstallationGuide/index#x86-64-windows-target-device-requirements", "display_name": "x86_64 Windows Target Device Requirements", "type": "section", "display_type": "Page section", "docname": "InstallationGuide/index", "anchor": "x86-64-windows-target-device-requirements", "priority": -1, "content": "DX12 Requires : Windows 10 with NVIDIA Driver 411.63 or higher for DX12 trace Windows 10 April 2018 Update (version 1803, AKA Redstone 4) with NVIDIA Driver 411.63 or higher for DirectX Ray Tracing, and tracing DX12 Copy command queues.", "keywords": []}, {"id": 17, "doc_id": 17, "filename": "InstallationGuide/index.html", "domain_name": "std", "name": "InstallationGuide/index", "display_name": "Installation Guide", "type": "doc", "display_type": "Page", "docname": "InstallationGuide/index", "anchor": "", "priority": -1, "content": "Nsight Systems nsys Nsight Systems Workstation Edition Nsight Systems Embedded Platforms Edition 2023-11-11 11 November 2023 2023.4.1 2023.4 2023 4 2023.4 NVIDIA Nsight Systems installation guide.", "keywords": []}, {"id": 18, "doc_id": 24, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#cuda-trace-issues", "display_name": "CUDA Trace Issues", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "cuda-trace-issues", "priority": -1, "content": "When using CUDA Toolkit 10.X, tracing of DtoD memory copy operations may result in a crash. To avoid this issue, update CUDA Toolkit to 11.X or the latest version. Nsight Systems will not trace kernels when a CDP (CUDA Dynamic Parallelism) kernel is found in a target application on Volta devices or later. On Tegra platforms, CUDA trace requires root privileges. Use the Launch as root checkbox in project settings to make the profiled application run as root. If the target application uses multiple streams from multiple threads, CUDA event buffers may not be released properly. In this case, you will see the following diagnostic error: Couldn't allocate CUPTI bufer x times. Some CUPTI events may be missing. Please contact the Nsight Systems team. In this version of Nsight Systems , if you are starting and stopping profiling inside your application using the interactive CLI, the CUDA memory allocation graph generation is only guaranteed to be correct in the first profiling range. This limitation will be removed in a future version of the product. CUDA GPU trace collection requires a fraction of GPU memory. If your application utilizes all available GPU memory, CUDA trace might not work or can break your application. As an example cuDNN application can crash with CUDNN_STATUS_INTERNAL_ERROR error if GPU memory allocation fails. For older Linux kernels, prior to 4.4, when profiling very short-lived applications (~1 second) that exit in the middle of the profiling session, it is possible that Nsight Systems will not show the CUDA events on the timeline. When more than 64k serialized CUDA kernels and memory copies are executed in the application, you may encounter the following exception during profiling: InvalidArgumentException: "Wrong event order detected" Please upgrade to the CUDA 9.2 driver at minimum to avoid this problem. If you cannot upgrade, you can get a partial analysis, missing potentially a large fraction of CUDA events, by using the CLI. On Vibrante, when running a profiling session with multiple targets that are guest VMs in a CCC configuration behind a NAT, you may encounter an error with the following text during profiling: Failed to sync time on device. Please edit the group connection settings, select Targets on the same SoC checkbox there and try again. When using the 455 driver, as shipped with CUDA Tool Kit 11.1, and tracing CUDA with Nsight Systems you many encounter a crash when the application exits. To avoid this issue, end your profiling session before the application exits or update your driver.", "keywords": []}, {"id": 19, "doc_id": 24, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#docker-issues", "display_name": "Docker Issues", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "docker-issues", "priority": -1, "content": "In a Docker, when a system\u2019s host utilizes a kernel older than v4.3, it is not possible for Nsight Systems to collect sampling data unless both the host and Docker are running a RHEL or CentOS operating system utilizing kernel version 3.10.1-693 or newer. A user override for this will be made available in a future version. When docker exec is called on a running container and stdout is kept open from a command invoked inside that shell, the exec shell hangs until the command exits. You can avoid this issue by running with docker exec --tty . See the bug reports at: https://github.com/moby/moby/issues/33039 https://github.com/drud/ddev/issues/732", "keywords": []}, {"id": 20, "doc_id": 24, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#general-issues", "display_name": "General Issues", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "general-issues", "priority": -1, "content": "The current release of Nsight Systems CLI doesn\u2019t support naming a session with a name longer than 127 characters. Profiling an executable with a name exceeding 111 characters is also unsupported by the nsys profile command. Those limitations will be removed in a future version of the CLI. Nsight Systems 2020.4 introduces collection of thread scheduling information without full sampling. While this allows system information at a lower cost, it does add overhead. To turn off thread schedule information collection, add --cpuctxsw=none to your command line or turn off in the GUI. Profiling greater than 5 minutes is not officially supported at this time. Profiling high activity applications, on high performance machines, over a long analysis time can create large result files that may take a very long time to load, run out of memory, or lock up the system. If you have a complex application, we recommend starting with a short profiling session duration of no more than 5 minutes for your initial profile. If your application has a natural repeating pattern, often referred to as a frame, you may typically only need a few of these. This suggested limit will increase in future releases. Attaching or re-attaching to a process from the GUI is not supported with the x86_64 Linux or IBM Power target. Equivalent results can be obtained by using the interactive CLI to launch the process and then starting and stopping analysis at multiple points. To reduce overhead, Nsight Systems traces a subset of API calls likely to impact performance when tracing APIs rather than all possible calls. There is currently no way to change the subset being traced when using the CLI. See respective library portion of this documentation for a list of calls traced by default. The CLI limitation will be removed in a future version of the product. There is an upper bound on the default size used by the tool to record trace events during the collection. If you see the following diagnostic error, then Nsight Systems hit the upper limit. Reached the size limit on recording trace events for this process. Try reducing the profiling duration or reduce the number of features traced. When profiling a framework or application that uses CUPTI, like some versions of TensorFlow(tm), Nsight Systems will not be able to trace CUDA usage due to limitations in CUPTI. These limitations will be corrected in a future version of CUPTI. Consider turning off the application\u2019s use of CUPTI if CUDA tracing is required. Tracing an application that uses a memory allocator that is not thread-safe is not supported. Tracing OS Runtime libraries in an application that preloads glibc symbols is unsupported and can lead to undefined behavior. Nsight Systems cannot profile applications launched through a virtual window manager like GNU Screen. Using Nsight Systems MPI trace functionality with the Darshan runtime module can lead to segfaults. To resolve the issue, unload the module. module unload darshan-runtime Profiling MPI Fortran APIs with MPI_Status as an argument, e.g. MPI_Recv, MPI_Test[all], MPI_Wait[all], can potentially cause memory corruption for MPICH versions 3.0.x. The reason is that the MPI_Status structure in MPICH 3.0.x has a different memory layout than in other MPICH versions (2.1.x and >=3.1.x have been tested) and the version (3.3.2) we used to compile the Nsight Systems MPI interception library. Using nsys export to export to an SQLite database will fail if the destination filesystem doesn\u2019t support file locking. The error message will mention: std::exception::what: database is locked On some Linux systems when VNC is used, some widgets can be rendered incorrectly, or Nsight Systems can crash when opening Analysis Summary or Diagnostics Summary pages. In this case, try forcing a specific software renderer: GALLIUM_DRIVER=llvmpipe nsys-ui Due to a known bug in Open MPI 4.0.1 , target application may crash at the end of execution when being profiled by Nsight Systems . To avoid the issue, use a different Open MPI version, or add --mca btl ^vader option to mpirun command line. The multiprocessing module in Python is commonly used by customers to create new processes. On Linux, the module defaults to using the \u201cfork\u201d mode where it forks new processes, but does not call exec. According to the POSIX standard, fork without exec leads to undefined behavior and tools like Nsight Systems that rely on injection are only allowed to make async-signal-safe calls in such a process. This makes it very hard for tools like Nsight Systems to collect profiling information. See https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods Use the set_start_method in the multiprocessing module to change the start method to \u201cspawn\u201d which is much safer and allows tools like Nsight Systems to collect data. See the code example given in the link above. The user needs to ensure that processes exit gracefully (by using close and join methods, for example, in the multiprocessing module\u2019s objects). Otherwise, Nsight Systems cannot flush buffers properly and you might end up with missing traces. When the CLI sequence launch, start, stop is used to profile a process-tree, LinuxPerf does a depth first search (DFS) to find all of the threads launched by the process-tree before programming the OS to collect the data. If, during the DFS, one or more threads are created by the process tree, it is possible those threads won\u2019t be found and LinuxPerf would not collect data for them. Note that once a thread is programmed via perf_event_open, any subsequent children processes or threads generated by that thread will be tracked since the perf_event_open inherit bit is set. No other CLI command sequence suffers from this possible issue. Also, if a systemwide mode is used, the issue does not exist.", "keywords": []}, {"id": 21, "doc_id": 24, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#multi-report-analysis-issues", "display_name": "Multi Report Analysis Issues", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "multi-report-analysis-issues", "priority": -1, "content": "Be aware that setting up Dask analysis on your workstation requires some additional work on the system. For small data inputs, running the recipes without Dask may be faster.", "keywords": []}, {"id": 22, "doc_id": 24, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#vgpu-issues", "display_name": "vGPU Issues", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "vgpu-issues", "priority": -1, "content": "When running Nsight Systems on vGPU you should always use the profiler grant. See Virtual GPU Software Documentation for details on enabling NVIDIA CUDA Toolkit profilers for NVIDIA vGPUs. Without the grant, unexpected migrations may crash a running session, report an error and abort. It may also silently produce a corrupted report which may be unloadable or show inaccurate data with no warning. Starting with vGPU 13.0, device level metrics collection is exposed to end users even on vGPU. Device level metrics will give info about all the work being executed on the GPU. The work might be in the same VM or some other VM running on the same physical GPU. As of CUDA 11.4 and R470 TRD1 driver release, Nsight Systems is supported in a vGPU environment which requires a vGPU license. If the license is not obtained after 20 minutes, the tool will still work but the reported GPU performance metrics data will be inaccurate. This is because of a feature in vGPU environment which reduces performance but retains functionality as specified in Grid Licensing User Guide .", "keywords": []}, {"id": 23, "doc_id": 24, "filename": "ReleaseNotes/index.html", "domain_name": "page", "name": "ReleaseNotes/index#what-s-new", "display_name": "What\u2019s New", "type": "section", "display_type": "Page section", "docname": "ReleaseNotes/index", "anchor": "what-s-new", "priority": -1, "content": "Additional MPI communicator creation APIs Option to trace the Python Global Interpretter Lock (GIL) Unified Memory CPU and GPU page fault information now available for Arm BETA: NVIDIA Infiniband switch congestion events (requires Quantum2 and firmware version 31.2012.1068 or higher) Multi-node analysis now supports Mac, Windows x64, Linux Arm Servers Recipe enhancements for NCCL, heatmaps, differencing CLI improvements - Add option -n to nsys status command to provide information about availability of network counters in the users environment. NVIDIA Grace PMU uncore counter sampling Windows GPU resource trace enhancements for allocations, migrations, Direct3D, & Vulkan UX and performance improvements", "keywords": []}, {"id": 24, "doc_id": 24, "filename": "ReleaseNotes/index.html", "domain_name": "std", "name": "ReleaseNotes/index", "display_name": "Release Notes", "type": "doc", "display_type": "Page", "docname": "ReleaseNotes/index", "anchor": "", "priority": -1, "content": "Nsight Systems nsys Nsight Systems Workstation Edition Nsight Systems Embedded Platforms Edition 2023-11-11 11 November 2023 2023.4.1 2023.4 2023 4 2023.4 Release notes and known issues.", "keywords": []}, {"id": 25, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#adding-report-to-the-timeline", "display_name": "Adding Report to the Timeline", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "adding-report-to-the-timeline", "priority": -1, "content": "Starting with 2021.3, Nsight Systems can load multiple report files into a single timeline. This is a BETA feature and will be improved in the future releases. Please let us know about your experience on the forums or through Help > Send Feedback\u2026 in the main menu. To load multiple report files into a single timeline, first start by opening a report as usual \u2014 using File > Open\u2026 from the main menu, or double clicking on a report in the Project Explorer window. Then additional report files can be loaded into the same timeline using one of the methods: File > Add Report (beta)\u2026 in the main menu, and select another report file that you want to open Right click on the report in the project explorer window, and click Add Report (beta)", "keywords": []}, {"id": 26, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#analysis-summary-view", "display_name": "Analysis Summary View", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "analysis-summary-view", "priority": -1, "content": "This view shows a summary of the profiling session. In particular, it is useful to review the project configuration used to generate this report. Information from this view can be selected and copied using the mouse cursor.", "keywords": []}, {"id": 27, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#arrow-format-description", "display_name": "Arrow Format Description", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "arrow-format-description", "priority": -1, "content": "The Arrow type exported file uses the IPC stream format to store the data in a file. The tables can be read by opening the file as an arrow stream. For example one can use the open_stream function from the arrow python package. For more information on the interfaces that can be used to read an IPC stream file, please refer to the Apache Arrow documentation [ 1 , 2 ]. The name of each table is included in the schema metadata. Thus, while reading each table, the user can extract the table title from the metadata. The table name metadata field has the key table_name . The titles of all the available tables can be found in section SQLite Schema Reference .", "keywords": []}, {"id": 28, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#available-metrics", "display_name": "Available metrics", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "available-metrics", "priority": -1, "content": "GPC Clock Frequency - gpc__cycles_elapsed.avg.per_second The average GPC clock frequency in hertz. In public documentation the GPC clock may be called the \u201cApplication\u201d clock, \u201cGraphic\u201d clock, \u201cBase\u201d clock, or \u201cBoost\u201d clock. Note : The collection mechanism for GPC can result in a small fluctuation between samples. SYS Clock Frequency - sys__cycles_elapsed.avg.per_second The average SYS clock frequency in hertz. The GPU front end (command processor), copy engines, and the performance monitor run at the SYS clock. On Turing and NVIDIA GA100 GPUs the sampling frequency is based upon a period of SYS clocks (not time) so samples per second will vary with SYS clock. On NVIDIA GA10x GPUs the sampling frequency is based upon a fixed frequency clock. The maximum frequency scales linearly with the SYS clock. GR Active - gr__cycles_active.sum.pct_of_peak_sustained_elapsed The percentage of cycles the graphics/compute engine is active. The graphics/compute engine is active if there is any work in the graphics pipe or if the compute pipe is processing work. GA100 MIG - MIG is not yet supported. This counter will report the activity of the primary GR engine. Sync Compute In Flight - gr__dispatch_cycles_active_queue_sync.avg.pct_of_peak_sustained_elapsed The percentage of cycles with synchronous compute in flight. CUDA: CUDA will only report synchronous queue in the case of MPS configured with 64 sub-context. Synchronous refers to work submitted in VEID=0. Graphics: This will be true if any compute work submitted from the direct queue is in flight. Async Compute in Flight - gr__dispatch_cycles_active_queue_async.avg.pct_of_peak_sustained_elapsed The percentage of cycles with asynchronous compute in flight. CUDA: CUDA will only report all compute work as asynchronous. The one exception is if MPS is configured and all 64 sub-context are in use. 1 sub-context (VEID=0) will report as synchronous. Graphics: This will be true if any compute work submitted from a compute queue is in flight. Draw Started - fe__draw_count.avg.pct_of_peak_sustained_elapsed The ratio of draw calls issued to the graphics pipe to the maximum sustained rate of the graphics pipe. Note: The percentage will always be very low as the front end can issue draw calls significantly faster than the pipe can execute the draw call. The rendering of this row will be changed to help indicate when draw calls are being issued. Dispatch Started - gr__dispatch_count.avg.pct_of_peak_sustained_elapsed The ratio of compute grid launches (dispatches) to the compute pipe to the maximum sustained rate of the compute pipe. Note: The percentage will always be very low as the front end can issue grid launches significantly faster than the pipe can execute the draw call. The rendering of this row will be changed to help indicate when grid launches are being issued. Vertex/Tess/Geometry Warps in Flight - tpc__warps_active_shader_vtg_realtime.avg.pct_of_peak_sustained_elapsed The ratio of active vertex, geometry, tessellation, and meshlet shader warps resident on the SMs to the maximum number of warps per SM as a percentage. Pixel Warps in Flight - tpc__warps_active_shader_ps_realtime.avg.pct_of_peak_sustained_elapsed The ratio of active pixel/fragment shader warps resident on the SMs to the maximum number of warps per SM as a percentage. Compute Warps in Flight - tpc__warps_active_shader_cs_realtime.avg.pct_of_peak_sustained_elapsed The ratio of active compute shader warps resident on the SMs to the maximum number of warps per SM as a percentage. Active SM Unused Warp Slots - tpc__warps_inactive_sm_active_realtime.avg.pct_of_peak_sustained_elapsed The ratio of inactive warp slots on the SMs to the maximum number of warps per SM as a percentage. This is an indication of how many more warps may fit on the SMs if occupancy is not limited by a resource such as max warps of a shader type, shared memory, registers per thread, or thread blocks per SM. Idle SM Unused Warp Slots - tpc__warps_inactive_sm_idle_realtime.avg.pct_of_peak_sustained_elapsed The ratio of inactive warps slots due to idle SMs to the the maximum number of warps per SM as a percentage. This is an indicator that the current workload on the SM is not sufficient to put work on all SMs. This can be due to: CPU starving the GPU current work is too small to saturate the GPU current work is trailing off but blocking next work SM Active - sm__cycles_active.avg.pct_of_peak_sustained_elapsed The ratio of cycles SMs had at least 1 warp in flight (allocated on SM) to the number of cycles as a percentage. A value of 0 indicates all SMs were idle (no warps in flight). A value of 50% can indicate some gradient between all SMs active 50% of the sample period or 50% of SMs active 100% of the sample period. SM Issue - sm__inst_executed_realtime.avg.pct_of_peak_sustained_elapsed The ratio of cycles that SM sub-partitions (warp schedulers) issued an instruction to the number of cycles in the sample period as a percentage. Tensor Active - sm__pipe_tensor_cycles_active_realtime.avg.pct_of_peak_sustained_elapsed The ratio of cycles the SM tensor pipes were active issuing tensor instructions to the number of cycles in the sample period as a percentage. TU102/4/6: This metric is not available on TU10x for periodic sampling. Please see Tensor Active/FP16 Active. Tensor Active / FP16 Active - sm__pipe_shared_cycles_active_realtime.avg.pct_of_peak_sustained_elapsed TU102/4/6 only The ratio of cycles the SM tensor pipes or FP16x2 pipes were active issuing tensor instructions to the number of cycles in the sample period as a percentage. DRAM Read Bandwidth - dramc__read_throughput.avg.pct_of_peak_sustained_elapsed , dram__read_throughput.avg.pct_of_peak_sustained_elapsed VRAM Read Bandwidth - FBPA.TriageA.dramc__read_throughput.avg.pct_of_peak_sustained_elapsed , FBSP.TriageSCG.dramc__read_throughput.avg.pct_of_peak_sustained_elapsed , FBSP.TriageAC.dramc__read_throughput.avg.pct_of_peak_sustained_elapsed The ratio of cycles the DRAM interface was active reading data to the elapsed cycles in the same period as a percentage. DRAM Write Bandwidth - dramc__write_throughput.avg.pct_of_peak_sustained_elapsed , dram__write_throughput.avg.pct_of_peak_sustained_elapsed VRAM Write Bandwidth - FBPA.TriageA.dramc__write_throughput.avg.pct_of_peak_sustained_elapsed , FBSP.TriageSCG.dramc__write_throughput.avg.pct_of_peak_sustained_elapsed , FBSP.TriageAC.dramc__write_throughput.avg.pct_of_peak_sustained_elapsed The ratio of cycles the DRAM interface was active writing data to the elapsed cycles in the same period as a percentage. NVLink bytes received - nvlrx__bytes.avg.pct_of_peak_sustained_elapsed The ratio of bytes received on the NVLink interface to the maximum number of bytes receivable in the sample period as a percentage. This value includes protocol overhead. NVLink bytes transmitted - nvltx__bytes.avg.pct_of_peak_sustained_elapsed The ratio of bytes transmitted on the NVLink interface to the maximum number of bytes transmittable in the sample period as a percentage. This value includes protocol overhead. PCIe Read Throughput - pcie__read_bytes.avg.pct_of_peak_sustained_elapsed The ratio of bytes received on the PCIe interface to the maximum number of bytes receivable in the sample period as a percentage. The theoretical value is calculated based upon the PCIe generation and number of lanes. This value includes protocol overhead. PCIe Write Throughput - pcie__write_bytes.avg.pct_of_peak_sustained_elapsed The ratio of bytes transmitted on the PCIe interface to the maximum number of bytes receivable in the sample period as a percentage. The theoretical value is calculated based upon the PCIe generation and number of lanes. This value includes protocol overhead. PCIe Read Requests to BAR1 - pcie__rx_requests_aperture_bar1_op_read.sum PCIe Write Requests to BAR1 - pcie__rx_requests_aperture_bar1_op_write.sum BAR1 is a PCI Express (PCIe) interface used to allow the CPU or other devices to directly access GPU memory. The GPU normally transfers memory with its copy engines, which would not show up as BAR1 activity. The GPU drivers on the CPU do a small amount of BAR1 accesses, but heavier traffic is typically coming from other technologies. On Linux, technologies like GPU Direct, GPU Direct RDMA, and GPU Direct Storage transfer data across PCIe BAR1. In the case of GPU Direct RDMA, that would be an Ethernet or InfiniBand adapter directly writing to GPU memory. On Windows, Direct3D12 resources can also be made accessible directly to the CPU via NVAPI functions to support small writes or reads from GPU buffers, in this case too many BAR1 accesses can indicate a performance issue, like it has been demonstrated in the Optimizing DX12 Resource Uploads to the GPU Using CPU-Visible VRAM technical blog post.", "keywords": []}, {"id": 29, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#available-recipes", "display_name": "Available Recipes", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "available-recipes", "priority": -1, "content": "All recipes are run using the new \u201crecipe\u201d CLI command switch. usage: nsys recipe [args] <recipe-name> [recipe args] Nsight Systems provides several initial analysis recipes, mostly based around making our existing statistics and expert systems rules run multi-report. These recipes can be found at <target-linux-x64>/python/packages/nsys_recipe . They are written in Python, and you can edit them if you would like. However, be advised that as this is a preview release, it is likely that the APIs will change between now and the final release. Additional recipes will be added before the product release and on an ongoing basis. Statistics and Expert Systems Recipes The following stats and expert systems options from Nsight Systems are available as recipes. For more information about them please use nsys recipe [recipe name] --help or see Nsight Systems Report Scripts in this documentation cuda_api_sum - CUDA API Summary cuda_api_sync - CUDA Synchronization APIs cuda_gpu_kern_sum - CUDA GPU Kernel Summary cuda_gpu_mem_size_sum - CUDA GPU MemOps Summary (by Size) cuda_gpu_mem_time_sum - CUDA GPU MemOps Summary (by Time) cuda_memcpy_async - CUDA Async Memcpy with Pageable Memory cuda_memcpy_sync - CUDA Synchronous Memcpy cuda_memset_sync - CUDA Synchronous Memset dx12_mem_ops - DX12 Memory Operations gpu_gaps - GPU Gaps gpu_time_util - GPU Time Utilization nvtx_gpu_proj_trace - NVTX GPU Trace nvtx_sum - NVTX Range Summary osrt_sum - OS Runtime Summary Please note that all recipes are in the form of python scripts. You may alter the given recipes or write your own to meet your needs. Refer to Tutorial: Create a User-Defined Recipe for an example of how to do this Heatmap Recipes cuda_gpu_time_util_map - CUDA GPU Kernel Time Utilization Heatmap gpu_metric_util_map - GPU Metric Utilization Heatmap Both recipes generate a Jupyter notebook with code cells ready to plot the heatmap chart: cuda_gpu_time_util_map -- CUDA GPU Kernel Time Utilization Heatmap $ nsys recipe cuda_gpu_time_util_map --help usage: cuda_gpu_time_util_map.py [-h] [--output OUTPUT] [--force-overwrite] [--start time] [--end time] [--nvtx range[@domain]] [--rows limit] [--bins BINS] --dir DIR [--mode {none,concurrent,dask-futures}] This recipe calculates the percentage of GPU utilization based on the presence of CUDA kernels. Note that the utilization refers to the "time" utilization and not the "resource" utilization. If multiple kernels run concurrently, their utilization will be added up and may exceed 100%. options: -h, --help show this help message and exit Context: --mode {none,concurrent,dask-futures} Mode to run tasks Recipe: --output OUTPUT Output directory name --force-overwrite Overwrite existing directory --start time Start time used for filtering in nanoseconds --end time End time used for filtering in nanoseconds --nvtx range[@domain] NVTX range and domain used for filtering --rows limit Maximum number of rows per input file --bins BINS Number of bins --dir DIR Directory of nsys-rep files gpu_metric_util_map -- GPU Metric Utilization Heatmap $ nsys recipe gpu_metric_util_map --help usage: gpu_metric_util_map.py [-h] [--output OUTPUT] [--force-overwrite] [--start time] [--end time] [--nvtx range[@domain]] [--rows limit] [--bins BINS] --dir DIR [--mode {none,concurrent,dask-futures}] This recipe calculates the percentage of SM Active, SM Issue, and Tensor Active metrics. options: -h, --help show this help message and exit Context: --mode {none,concurrent,dask-futures} Mode to run tasks Recipe: --output OUTPUT Output directory name --force-overwrite Overwrite existing directory --start time Start time used for filtering in nanoseconds --end time End time used for filtering in nanoseconds --nvtx range[@domain] NVTX range and domain used for filtering --rows limit Maximum number of rows per input file --bins BINS Number of bins --dir DIR Directory of nsys-rep files Pacing Recipes cuda_gpu_kern_pace - CUDA GPU Kernel Pacing nvtx_pace - NVTX Pacing Both recipes generate a Jupyter notebook with code cells ready to plot various graphs showing the progress of the target operation/range for each rank: cuda_gpu_kern_pace -- CUDA GPU Kernel Pacing $ nsys recipe cuda_gpu_kern_pace --help usage: cuda_gpu_kern_pace.py [-h] [--output OUTPUT] [--force-overwrite] --name NAME --dir DIR [--mode {none,concurrent,dask-futures}] This recipe investigates the progress and consistency of an iteration based application. optional arguments: -h, --help show this help message and exit Context: --mode {none,concurrent,dask-futures} Mode to run tasks Recipe: --output OUTPUT Output directory name --force-overwrite Overwrite existing directory --name NAME Name of the kernel used as delineator between iterations --dir DIR Directory of nsys-rep files nvtx_pace -- NVTX Pacing $ nsys recipe nvtx_pace --help usage: nvtx_pace.py [-h] [--output OUTPUT] [--force-overwrite] [--gpu] --name NAME --dir DIR [--mode {none,concurrent,dask-futures}] This recipe investigates the progress and consistency of an iteration based application. optional arguments: -h, --help show this help message and exit Context: --mode {none,concurrent,dask-futures} Mode to run tasks Recipe: --output OUTPUT Output directory name --force-overwrite Overwrite existing directory --gpu GPU projection --name NAME Name of the NVTX range used as delineator between iterations --dir DIR Directory of nsys-rep files Additional Statistics Recipes mpi_sum - MPI Summary nccl_sum - NCCL Summary nvtx_gpu_proj_sum - NVTX GPU Projection Summary All recipes generate a Jupyter notebook with code cells ready to plot various statistical graphs: mpi_sum -- MPI Summary $ nsys recipe mpi_sum --help usage: mpi_sum.py [-h] [--output OUTPUT] [--force-overwrite] --dir DIR [--mode {none,concurrent,dask-futures}] This recipe provides a summary of MPI functions and their execution times. optional arguments: -h, --help show this help message and exit Context: --mode {none,concurrent,dask-futures} Mode to run tasks Recipe: --output OUTPUT Output directory name --force-overwrite Overwrite existing directory --dir DIR Directory of nsys-rep files nccl_sum -- NCCL Summary $ nsys recipe nccl_sum --help usage: nccl_sum.py [-h] [--output OUTPUT] [--force-overwrite] [--gpu] --dir DIR [--mode {none,concurrent,dask-futures}] This recipe provides a summary of NCCL functions and their execution times. optional arguments: -h, --help show this help message and exit Context: --mode {none,concurrent,dask-futures} Mode to run tasks Recipe: --output OUTPUT Output directory name --force-overwrite Overwrite existing directory --gpu GPU projection --dir DIR Directory of nsys-rep files nvtx_gpu_proj_sum -- NVTX GPU Projection Summary $ nsys recipe nvtx_gpu_proj_sum --help usage: nvtx_gpu_proj_sum.py [-h] [--output OUTPUT] [--force-overwrite] [--gpu] --dir DIR [--mode {none,concurrent,dask-futures}] This recipe provides a summary of NVTX time ranges projected from the CPU onto the GPU, and their execution times. optional arguments: -h, --help show this help message and exit Context: --mode {none,concurrent,dask-futures} Mode to run tasks Recipe: --output OUTPUT Output directory name --force-overwrite Overwrite existing directory --gpu GPU projection --dir DIR Directory of nsys-rep files", "keywords": []}, {"id": 30, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#blog-posts", "display_name": "Blog Posts", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "blog-posts", "priority": -1, "content": "NVIDIA developer blogs, these are longer form, technical pieces written by tool and domain experts. 2021 - Optimizing DX12 Resource Uploads to the GPU Using CPU-Visible VRAM 2019 - Migrating to NVIDIA Nsight Tools from NVVP and nvprof 2019 - Transitioning to Nsight Systems from NVIDIA Visual Profiler / nvprof 2019 - NVIDIA Nsight Systems Add Vulkan Support 2019 - TensorFlow Performance Logging Plugin nvtx-plugins-tf Goes Public 2020 - Understanding the Visualization of Overhead and Latency in Nsight Systems 2021 - Optimizing DX12 Resource Uploads to the GPU Using CPU-Visible VRAM", "keywords": []}, {"id": 31, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#broken-backtraces-on-tegra", "display_name": "Broken Backtraces on Tegra", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "broken-backtraces-on-tegra", "priority": -1, "content": "In Nsight Systems Embedded Platforms Edition , in the symbols table there is a special entry called Broken backtraces . This entry is used to denote the point in the call chain where the unwinding algorithms used by Nsight Systems could not determine what is the next (caller) function. Broken backtraces happen because there is no information related to the current function that the unwinding algorithms can use. In the Top-Down view, these functions are immediate children of the Broken backtraces row. One can eliminate broken backtraces by modifying the build system to provide at least one kind of unwind information. The types of unwind information, used by the algorithms in Nsight Systems , include the following: For ARMv7 binaries: DWARF information in ELF sections: .debug_frame , .zdebug_frame , .eh_frame , .eh_frame_hdr . This information is the most precise. .zdebug_frame is a compressed version of .debug_frame , so at most one of them is typically present. .eh_frame_hdr is a companion section for .eh_frame and might be absent. Compiler flag: -g . Exception handling information in EHABI format provided in .ARM.exidx and .ARM.extab ELF sections. .ARM.extab might be absent if all information is compact enough to be encoded into .ARM.exidx . Compiler flag: -funwind-tables . Frame pointers (built into the .text section). Compiler flag: -fno-omit-frame-pointer . For Aarch64 binaries: DWARF information in ELF sections: .debug_frame , .zdebug_frame , .eh_frame , .eh_frame_hdr . See additional comments above. Compiler flag: -g . Frame pointers (built into the .text section). Compiler flag: -fno-omit-frame-pointer . The following ELF sections should be considered empty if they have size of 4 bytes: .debug_frame , .eh_frame , .ARM.exidx . In this case, these sections only contain termination records and no useful information. For GCC, use the following compiler invocation to see which compiler flags are enabled in your toolchain by default (for example, to check if -funwind-tables is enabled by default): $ gcc -Q --help=common For GCC and Clang, add -### to the compiler invocation command to see which compiler flags are actually being used. Since EHABI and DWARF information is compiled on per-unit basis (every .cpp or .c file, as well as every static library, can be built with or without this information), presence of the ELF sections does not guarantee that every function has necessary unwind information. Frame pointers are required by the Aarch64 Procedure Call Standard. Adding frame pointers slows down execution time, but in most cases the difference is negligible.", "keywords": []}, {"id": 32, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-analyze-command-switch-options", "display_name": "CLI Analyze Command Switch Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-analyze-command-switch-options", "priority": -1, "content": "The nsys analyze command generates and outputs to the terminal a report using expert system rules on existing results. Reports are generated from an SQLite export of a .nsys-rep file. If a .nsys-rep file is specified, Nsight Systems will look for an accompanying SQLite file and use it. If no SQLite export file exists, one will be created. After choosing the analyze command switch, the following options are available. Usage: nsys [global-options] analyze [options] [input-file] Short Long Possible Parameters Default Switch Description --help <tag> none Print the help message. The option can take one optional argument that will be used as a tag. If a tag is provided, only options relevant to the tag will be printed. -f --format column, table, csv, tsv, json, hdoc, htable, . Specify the output format. The special name \u201c.\u201d indicates the default format for the given output. The default format for console is column, while files and process outputs default to csv. This option may be used multiple times. Multiple formats may also be specified using a comma-separated list (<name[:args\u2026][,name[:args\u2026]\u2026]>). See Report Scripts for options available with each format. --force-export true, false false Force a re-export of the SQLite file from the specified .nsys-rep file, even if an SQLite file already exists. --force-overwrite true, false false Overwrite any existing output files. --help-formats <format_name>, ALL, [none] none With no argument, list a summary of the available output formats. If a format name is given, a more detailed explanation of the the format is displayed. If ALL is given, a more detailed explanation of all available formats is displayed. --help-rules <rule_name>, ALL, [none] none With no argument, list available rules with a short description. If a rule name is given, a more detailed explanation of the rule is displayed. If ALL is given, a more detailed explanation of all available rules is displayed. -o --output -, @<command>, <basename>, . - Specify the output mechanism. There are three output mechanisms: print to console, output to file, or output to command. This option may be used multiple times. Multiple outputs may also be specified using a comma-separated list. If the given output name is \u201c-\u201d, the output will be displayed on the console. If the output name starts with \u201c@\u201d, the output designates a command to run. The nsys command will be executed and the analysis output will be piped into the command. Any other output is assumed to be the base path and name for a file. If a file basename is given, the filename used will be: <basename>_<analysis&args>.<output_format>. The default base (including path) is the name of the SQLite file (as derived from the input file or --sqlite option), minus the extension. The output \u201c.\u201d can be used to indicate the analysis should be output to a file, and the default basename should be used. To write one or more analysis outputs to files using the default basename, use --output . If the output starts with \u201c@\u201d, the nsys command output is piped to the given command. The command is run, and the output is piped to the command\u2019s stdin (standard-input). The command\u2019s stdout and stderr remain attached to the console, so any output will be displayed directly to the console. Be aware there are some limitations in how the command string is parsed. No shell expansions (including *, ?, [], and ~) are supported. The command cannot be piped to another command, nor redirected to a file using shell syntax. The command and command arguments are split on whitespace, and no quotes (within the command syntax) are supported. For commands that require complex command line syntax, it is suggested that the command be put into a shell script file, and the script designated as the output command. -q --quiet Do not display verbose messages, only display errors. -r --rule cuda_memcpy_async, cuda_memcpy_sync, cuda_memset_sync, cuda_api_sync, gpu_gaps, gpu_time_util, dx12_mem_ops all Specify the rules(s) to execute, including any arguments. This option may be used multiple times. Multiple rules may also be specified using a comma-separated list. See Expert Systems section and --help-rules switch for details on all rules. --sqlite <file.sqlite> Specify the SQLite export filename. If this file exists, it will be used. If this file doesn\u2019t exist (or if --force-export was given) this file will be created from the specified .nsys-rep file before processing. This option cannot be used if the specified input file is also an SQLite file. --timeunit nsec, nanoseconds, usec, microseconds, msec, milliseconds, seconds nanoseconds Set basic unit of time. The argument of the switch is matched by using the longest prefix matching. Meaning that it is not necessary to write a whole word as the switch argument. It is similar to passing a \u201c:time=<unit>\u201d argument to every formatter, although the formatter uses more strict naming conventions. See nsys analyze --help-formats column for more detailed information on unit conversion.", "keywords": []}, {"id": 33, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-cancel-command-switch-options", "display_name": "CLI Cancel Command Switch Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-cancel-command-switch-options", "priority": -1, "content": "After choosing the cancel command switch, the following options are available. Usage: nsys [global-options] cancel [options] Short Long Possible Parameters Default Switch Description --help <tag> none Print the help message. The option can take one optional argument that will be used as a tag. If a tag is provided, only options relevant to the tag will be printed. --session <session identifier> none Cancel the collection in the given session. The option argument must represent a valid session name or ID as reported by nsys sessions list . Any %q{ENV_VAR} pattern in the option argument will be substituted with the value of the environment variable. Any %h pattern in the option argument will be substituted with the hostname of the system. Any %% pattern in the option argument will be substituted with % .", "keywords": []}, {"id": 34, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-command-switches", "display_name": "CLI Command Switches", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-command-switches", "priority": -1, "content": "The Nsight Systems command line interface can be used in two modes. You may launch your application and begin analysis with options specified to the nsys profile command. Alternatively, you can control the launch of an application and data collection using interactive CLI commands. Command Description analyze Post process existing Nsight Systems result, either in .nsys-rep or SQLite format, to generate expert systems report. cancel Cancels an existing collection started in interactive mode. All data already collected in the current collection is discarded. export Generates an export file from an existing .nsys-rep file. For more information about the exported formats see the /documentation/nsys-exporter directory in your Nsight Systems installation directory. launch In interactive mode, launches an application in an environment that supports the requested options. The launch command can be executed before or after a start command. nvprof Special option to help with transition from legacy NVIDIA nvprof tool. Calling nsys nvprof [options] will provide the best available translation of nvprof [options] See Migrating from NVIDIA nvprof topic for details. No additional functionality of nsys will be available when using this option. Note: Not available on IBM Power targets. profile A fully formed profiling description requiring and accepting no further input. The command switch options used (see below table) determine when the collection starts, stops, what collectors are used (e.g. API trace, IP sampling, etc.), what processes are monitored, etc. recipe PREVIEW FEATURE Post process multiple existing Nsight Systems results, in .nsys-rep or SQLite to generate statistical information and create various plots. See Multi-Node Analysis topic for details. sessions Gives information about all sessions running on the system. shutdown Disconnects the CLI process from the launched application and forces the CLI process to exit. If a collection is pending or active, it is cancelled start Start a collection in interactive mode. The start command can be executed before or after a launch command. stats Post process existing Nsight Systems result, either in .nsys-rep or SQLite format, to generate statistical information. status Reports on the status of a CLI-based collection or the suitability of the profiling environment. stop Stop a collection that was started in interactive mode. When executed, all active collections stop, the CLI process terminates but the application continues running.", "keywords": []}, {"id": 35, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-export-command-switch-options", "display_name": "CLI Export Command Switch Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-export-command-switch-options", "priority": -1, "content": "After choosing the export command switch, the following options are available. Usage: nsys [global-options] export [options] [nsys-rep-file] Short Long Possible Parameters Default Switch Description -f --force-overwrite true, false false If true, overwrite all existing result files with same output filename (QDSTRM, nsys-rep, SQLITE, HDF, TEXT, ARROW, JSON). --help <tag> none Print the help message. The option can take one optional argument that will be used as a tag. If a tag is provided, only options relevant to the tag will be printed. -l --lazy true, false true Controls if table creation is lazy or not. When true, a table will only be created when it contains data. This option will be deprecated in the future, and all exports will be non-lazy. This affects SQLite, HDF5, and Arrow exports only. -o --output <filename> <inputfile.ext> Set the .output filename. The default is the input filename with the extension for the chosen format. -q --quiet true, false false If true, do not display progress bar --separate-strings true,false false Output stored strings and thread names separately, with one value per line. This affects JSON and text output only. -t --type arrow, hdf, info, json, sqlite, text sqlite Export format type. HDF format is supported only on x86_64 Linux and Windows --ts-normalize true, false false If true, all timestamp values in the report will be shifted to UTC wall-clock time, as defined by the UNIX epoch. This option can be used in conjunction with the --ts-shift option, in which case both adjustments will be applied. If this option is used to align a series of reports from a cluster or distributed system, the accuracy of the alignment is limited by the synchronization precision of the system clocks. For detailed analysis, the use of PTP or another high-precision synchronization methodology is recommended. NTP is unlikely to produce desirable results. This option only applies to Arrow, HDF5, and SQLite exports. --ts-shift signed integer, in nanoseconds 0 If given, all timestamp values in the report will be shifted by the given amount. This option can be used in conjunction with the --ts-normalize option, in which case both adjustments will be applied. This option can be used to \u201chand-align\u201d report files captured at different times, or reports captured on distributed systems with poorly synchronized system clocks. This option only applies to Arrow, HDF5, and SQLite exports.", "keywords": []}, {"id": 36, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-global-options", "display_name": "CLI Global Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-global-options", "priority": -1, "content": "Short Long Description -h --help Help message providing information about available command switches and their options. -v --version Output Nsight Systems CLI version information.", "keywords": []}, {"id": 37, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-launch-command-switch-options", "display_name": "CLI Launch Command Switch Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-launch-command-switch-options", "priority": -1, "content": "After choosing the launch command switch, the following options are available. Usage: nsys [global-options] launch [options] <application> [application-arguments] Short Long Possible Parameters Default Switch Description -b --backtrace auto,fp,lbr,dwarf,none Select the backtrace method to use while sampling. The option \u2018lbr\u2019 uses Intel(c) Corporation\u2019s Last Branch Record registers, available only with Intel(c) CPUs codenamed Haswell and later. The option \u2018fp\u2019 is frame pointer and assumes that frame pointers were enabled during compilation. The option \u2018dwarf\u2019 uses DWARF\u2019s CFI (Call Frame Information). Setting the value to \u2018none\u2019 can reduce collection overhead. --clock-frequency-changes true, false false Collect clock frequency changes. Available in Nsight Systems Embedded Platforms Edition only. --cpu-cluster-events 0x16, 0x17, \u2026, none none Collect per-cluster Uncore PMU counters. Multiple values can be selected, separated by commas only (no spaces). Use the --cpu-cluster-events=help switch to see the full list of values. Available in Nsight Systems Embedded Platforms Edition only. --command-file < filename > none Open a file that contains profile switches and parse the switches. Note additional switches on the command line will override switches in the file. This flag can be specified more than once. --cpu-core-events ( Nsight Systems Embedded Platforms Edition ) 0x11,0x13,\u2026,none none Collect per-core PMU counters. Multiple values can be selected, separated by commas only (no spaces). Use the --cpu-core-events=help switch to see the full list of values. --cpu-socket-events 0x2a, 0x2c, \u2026, none none Collect per-socket Uncore PMU counters. Multiple values can be selected, separated by commas only (no spaces). Use the --cpu-socket-events=help switch to see the full list of values. Available in Nsight Systems Embedded Platforms Edition only. --cpuctxsw process-tree, system-wide, none process-tree Trace OS thread scheduling activity. Select \u2018none\u2019 to disable tracing CPU context switches. Depending on the platform, some values may require admin or root privileges. Note: if the --sample switch is set to a value other than \u2018none\u2019, the --cpuctxsw setting is hardcoded to the same value as the --sample switch. If --sample=none and a target application is launched, the default is \u2018process-tree\u2019, otherwise the default is \u2018none\u2019. Requires --sampling-trigger=perf switch in Nsight Systems Embedded Platforms Edition --cuda-flush-interval milliseconds See Description Set the interval, in milliseconds, when buffered CUDA data is automatically saved to storage. CUDA data buffer saves may cause profiler overhead. Buffer save behavior can be controlled with this switch. If the CUDA flush interval is set to 0 on systems running CUDA 11.0 or newer, buffers are saved when they fill. If a flush interval is set to a non-zero value on such systems, buffers are saved only when the flush interval expires. If a flush interval is set and the profiler runs out of available buffers before the flush interval expires, additional buffers will be allocated as needed. In this case, setting a flush interval can reduce buffer save overhead but increase memory use by the profiler. If the flush interval is set to 0 on systems running older versions of CUDA, buffers are saved at the end of the collection. If the profiler runs out of available buffers, additional buffers are allocated as needed. If a flush interval is set to a non-zero value on such systems, buffers are saved when the flush interval expires. A cuCtxSynchronize call may be inserted into the workflow before the buffers are saved which will cause application overhead. In this case, setting a flush interval can reduce memory use by the profiler but may increase save overhead. For collections over 30 seconds an interval of 10 seconds is recommended. Default is 10000 for Nsight Systems Embedded Platforms Edition and 0 otherwise. --cuda-memory-usage true, false false Track the GPU memory usage by CUDA kernels. Applicable only when CUDA tracing is enabled. Note: This feature may cause significant runtime overhead. --cuda-um-cpu-page-faults true, false false This switch tracks the page faults that occur when CPU code tries to access a memory page that resides on the device. Note that this feature may cause significant runtime overhead. Not available on Nsight Systems Embedded Platforms Edition . --cuda-um-gpu-page-faults true, false false This switch tracks the page faults that occur when GPU code tries to access a memory page that resides on the host. Note that this feature may cause significant runtime overhead. Not available on Nsight Systems Embedded Platforms Edition . --cudabacktrace all, none, kernel, memory, sync, other none When tracing CUDA APIs, enable the collection of a backtrace when a CUDA API is invoked. Significant runtime overhead may occur. Values may be combined using \u2018,\u2019. Each value except \u2018none\u2019 may be appended with a threshold after \u2018:\u2019. Threshold is duration, in nanoseconds, that CUDA APIs must execute before backtraces are collected, e.g. \u2018kernel:500\u2019. Default value for each threshold is 1000ns (1us). Note: CPU sampling must be enabled. Note: Not available on IBM Power targets. --cuda-graph-trace graph, node graph If \u2018graph\u2019 is selected, CUDA graphs will be traced as a whole and node activities will not be collected. This will reduce overhead to a minimum, but requires CUDA driver version 515.43 or higher. If \u2018node\u2019 is selected, node activities will be collected, but CUDA graphs will not be traced as a whole. This may cause significant runtime overhead. Default is \u2018graph\u2019 if available, otherwise default is \u2018node\u2019. --dx-force-declare-adapter-removal-support true, false false The Nsight Systems trace initialization involves creating a D3D device and discarding it. Enabling this flag makes a call to DXGIDeclareAdapterRemovalSupport() before device creation. Requires DX11 or DX12 trace to be enabled. --dx12-gpu-workload true, false, individual, batch, none individual If individual or true, trace each DX12 workload\u2019s GPU activity individually. If batch, trace DX12 workloads\u2019 GPU activity in ExecuteCommandLists call batches. If none or false, do not trace DX12 workloads\u2019 GPU activity. Note that this switch is applicable only when --trace=dx12 is specified. This option is only supported on Windows targets. --dx12-wait-calls true, false true If true, trace wait calls that block on fences for DX12. Note that this switch is applicable only when --trace=dx12 is specified. This option is only supported on Windows targets. -e --env-var A=B NA Set environment variable(s) for the application process to be launched. Environment variables should be defined as A=B. Multiple environment variables can be specified as A=B,C=D. --help <tag> none Print the help message. The option can take one optional argument that will be used as a tag. If a tag is provided, only options relevant to the tag will be printed. --hotkey-capture \u2018F1\u2019 to \u2018F12\u2019 \u2018F12\u2019 Hotkey to trigger the profiling session. Note that this switch is applicable only when --capture-range=hotkey is specified. -n --inherit-environment true, false true When true, the current environment variables and the tool\u2019s environment variables will be specified for the launched process. When false, only the tool\u2019s environment variables will be specified for the launched process. --injection-use-detours true,false true Use detours for injection. If false, process injection will be performed by windows hooks which allows it to bypass anti-cheat software. --isr true, false false Trace Interrupt Service Routines (ISRs) and Deferred Procedure Calls (DPCs). Requires administrative privileges. Available only on Windows devices. --mpi-impl openmpi,mpich openmpi When using --trace=mpi to trace MPI APIs use --mpi-impl to specify which MPI implementation the application is using. If no MPI implementation is specified, nsys tries to automatically detect it based on the dynamic linker\u2019s search path. If this fails, \u2018openmpi\u2019 is used. Calling --mpi-impl without --trace=mpi is not supported. -p --nvtx-capture range@domain, range, range@* none Specify NVTX range and domain to trigger the profiling session. This option is applicable only when used along with --capture-range=nvtx . --nvtx-domain-exclude default, <domain_names> Choose to exclude NVTX events from a comma separated list of domains. \u2018default\u2019 excludes NVTX events without a domain. A domain with this name or commas in a domain name must be escaped with \u2018\\\u2019. Note: Only one of --nvtx-domain-include and --nvtx-domain-exclude can be used. This option is only applicable when --trace=nvtx is specified. --nvtx-domain-include default, <domain_names> Choose to only include NVTX events from a comma separated list of domains. \u2018default\u2019 filters the NVTX default domain. A domain with this name or commas in a domain name must be escaped with \u2018\\\u2019. Note: Only one of --nvtx-domain-include and --nvtx-domain-exclude can be used. This option is only applicable when --trace=nvtx is specified. --python-nvtx-annotations <json_file> Specify the path to the JSON file containing the requested NVTX annotations. --opengl-gpu-workload true, false true If true, trace the OpenGL workloads\u2019 GPU activity. Note that this switch is applicable only when --trace=opengl is specified. This option is not supported on IBM Power targets. --os-events \u2018help\u2019 or the end users selected events in the format \u2018x,y\u2019 Select the OS events to sample. Use the --os-events=help switch to see the full list of events. Multiple values can be selected, separated by commas only (no spaces). Use the --event-sample switch to enable. Not available on Nsight Systems Embedded Platforms Edition . --osrt-backtrace-depth integer 24 Set the depth for the backtraces collected for OS runtime libraries calls. --osrt-backtrace-stack-size integer 6144 Set the stack dump size, in bytes, to generate backtraces for OS runtime libraries calls. --osrt-backtrace-threshold nanoseconds 80000 Set the duration, in nanoseconds, that all OS runtime libraries calls must execute before backtraces are collected. --osrt-threshold < nanoseconds > 1000 ns Set the duration, in nanoseconds, that Operating System Runtime (osrt) APIs must execute before they are traced. Values significantly less than 1000 may cause significant overhead and result in extremely large result files. Note: Not available for IBM Power targets. --python-backtrace cuda, none, false none Collect Python backtrace event when tracing the selected API\u2019s trigger. This option is supported on Arm server (SBSA) platforms and x86 Linux targets. Note: the selected API tracing must be enabled. For example, --cudabacktrace must be set when using --python-backtrace=cuda . --python-sampling true, false false Collect Python backtrace sampling events. This option is supported on Arm server (SBSA) platforms, x86 Linux and Windows targets. Note: When profiling Python-only workflows, consider disabling the CPU sampling option to reduce overhead. --python-sampling-frequency 1 < integers < 2000 1000 Specify the Python sampling frequency. The minimum supported frequency is 1Hz. The maximum supported frequency is 2KHz. This option is ignored if the --python-sampling option is set to false. --qnx-kernel-events class/event,event, class/event:mode, class:mode,help,none none Multiple values can be selected, separated by commas only (no spaces). See the --qnx-kernel-events-mode switch description for \u2018:mode\u2019 format. Use the --qnx-kernel-events=help switch to see the full list of values. Example: --qnx-kernel-events=8/1:system:wide,_NTO_TRACE_THREAD:process:fast, \\_NTO_TRACE_KERCALLENTER/\\__KER_BAD,_NTO_TRACE_COMM,13 . Collect QNX kernel events. --qnx-kernel-events-mode system,process,fast,wide system:fast Values are separated by a colon (\u2018:\u2019) only (no spaces). \u2018system\u2019 and \u2018process\u2019 cannot be specified at the same time. \u2018fast\u2019 and \u2018wide\u2019 cannot be specified at the same time. Please check the QNX documentation to determine when to select the \u2018fast\u2019 or \u2018wide\u2019 mode. Specify the default mode for QNX kernel events collection. --resolve-symbols true,false true Resolve symbols of captured samples and backtraces. --retain-etw-files true, false false Retain ETW files generated by the trace, merge and move the files to the output directory. --run-as < username > none Run the target application as the specified username. If not specified, the target application will be run by the same user as Nsight Systems . Requires root privileges. Available for Linux targets only. -s --sample process-tree, system-wide, none process-tree Select how to collect CPU IP/backtrace samples. If \u2018none\u2019 is selected, CPU sampling is disabled. Depending on the platform, some values may require admin or root privileges. If a target application is launched, the default is \u2018process-tree\u2019, otherwise, the default is \u2018none\u2019. Note: \u2018system-wide\u2019 is not available on all platforms. Note: If set to \u2018none\u2019, CPU context switch data will still be collected unless the --cpuctxsw switch is set to \u2018none\u2019. --samples-per-backtrace integer <= 32 1 The number of CPU IP samples collected for every CPU IP/backtrace sample collected. For example, if set to 4, on the fourth CPU IP sample collected, a backtrace will also be collected. Lower values increase the amount of data collected. Higher values can reduce collection overhead and reduce the number of CPU IP samples dropped. If DWARF backtraces are collected, the default is 4, otherwise the default is 1. This option is not available on Nsight Systems Embedded Platforms Edition or on non-Linux targets. --sampling-frequency 100 < integers < 8000 1000 Specify the sampling/backtracing frequency. The minimum supported frequency is 100 Hz. The maximum supported frequency is 8000 Hz. This option is supported only on QNX, Linux for Tegra, and Windows targets. --sampling-period ( Nsight Systems Embedded Platforms Edition ) integer determined dynamically The number of CPU Cycle events counted before a CPU instruction pointer (IP) sample is collected. If configured, backtraces may also be collected. The smaller the sampling period, the higher the sampling rate. Note that smaller sampling periods will increase overhead and significantly increase the size of the result file(s). Requires --sampling-trigger=perf switch. --sampling-period (not Nsight Systems Embedded Platforms Edition ) integer determined dynamically The number of events counted before a CPU instruction pointer (IP) sample is collected. The event used to trigger the collection of a sample is determined dynamically. For example, on Intel based platforms, it will probably be \u201cReference Cycles\u201d and on AMD platforms, \u201cCPU Cycles\u201d. If configured, backtraces may also be collected. The smaller the sampling period, the higher the sampling rate. Note that smaller sampling periods will increase overhead and significantly increase the size of the result file(s). This option is available only on Linux targets. --sampling-trigger timer, sched, perf, cuda timer,sched Specify backtrace collection trigger. Multiple APIs can be selected, separated by commas only (no spaces). Available on Nsight Systems Embedded Platforms Edition targets only. --session session identifier none Launch the application in the indicated session. The option argument must represent a valid session name or ID as reported by nsys sessions list . Any %q{ENV_VAR} pattern will be substituted with the value of the environment variable. Any %h pattern will be substituted with the hostname of the system. Any %% pattern will be substituted with % . --session-new [a-Z][0-9,a-Z,spaces] profile-<id>-<application> Name the session created by the command. Name must start with an alphabetical character followed by printable or space characters. Any %q{ENV_VAR} pattern will be substituted with the value of the environment variable. Any %h pattern will be substituted with the hostname of the system. Any %% pattern will be substituted with % . -w --show-output true, false true If true, send target process\u2019s stdout and stderr streams to both the console and stdout/stderr files which are added to the report file. If false, only send target process stdout and stderr streams to the stdout/stderr files which are added to the report file. -t --trace cuda, nvtx, cublas, cublas-verbose, cusparse, cusparse-verbose, cudnn, cudla, cudla-verbose, cusolver, cusolver-verbose, opengl, opengl-annotations, openacc, openmp, osrt, mpi, nvvideo, vulkan, vulkan-annotations, dx11, dx11-annotations, dx12, dx12-annotations, oshmem, ucx, wddm, tegra-accelerators, python-gil, none cuda, opengl, nvtx, osrt Select the API(s) to be traced. The osrt switch controls the OS runtime libraries tracing. Multiple APIs can be selected, separated by commas only (no spaces). Since OpenACC and cuXXX APIs are tightly linked with CUDA, selecting one of those APIs will automatically enable CUDA tracing. cublas, cudla, cusparse and cusolver all have XXX-verbose options available. Reflex SDK latency markers will be automatically collected when DX or vulkan API trace is enabled. See information on --mpi-impl option below if mpi is selected. If \u2018<api>-annotations\u2019 is selected, the corresponding API will also be traced. If the none option is selected, no APIs are traced and no other API can be selected. Note: cublas, cudnn, nvvideo, opengl, and vulkan are not available on IBM Power target. cuddn is not available on Windows target. --trace-fork-before-exec true, false false If true, trace any child process after fork and before they call one of the exec functions. Beware, tracing in this interval relies on undefined behavior and might cause your application to crash or deadlock. Note: This option is only available on Linux target platforms. --vulkan-gpu-workload true, false, individual, batch, none individual If individual or true, trace each Vulkan workload\u2019s GPU activity individually. If batch, trace Vulkan workloads\u2019 GPU activity in vkQueueSubmit call batches. If none or false, do not trace Vulkan workloads\u2019 GPU activity. Note that this switch is applicable only when --trace=vulkan is specified. This option is not supported on QNX. --wait primary,all all If primary, the CLI will wait on the application process termination. If all, the CLI will additionally wait on re-parented processes created by the application. --wddm-additional-events true, false true If true, collect additional range of ETW events, including context status, allocations, sync wait and signal events, etc. Note that this switch is applicable only when --trace=wddm is specified. This option is only supported on Windows targets. --wddm-backtraces true, false false If true, collect backtraces of WDDM events. Disabling this data collection can reduce overhead for certain target applications. Note that this switch is applicable only when --trace=wddm is specified. This option is only supported on Windows targets.", "keywords": []}, {"id": 38, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-nvprof-command-switch-options", "display_name": "CLI nvprof Command Switch Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-nvprof-command-switch-options", "priority": -1, "content": "After choosing the nvprof command switch, the following options are available. When you are ready to move to using Nsight Systems CLI directly, see Command Line Options documentation for the nsys switch(es) given below. Note that the nsys implementation and output may vary from nvprof. Usage . nsys nvprof [options] Switch Parameters (Default in Bold) nsys switch Switch Description --annotate-mpi off , openmpi, mpich --trace=mpi AND --mpi-impl Automatically annotate MPI calls with NVTX markers. Specify the MPI implementation installed on your machine. Only OpenMPI and MPICH implementations are supported. --cpu-thread-tracing on, off --trace=osrt Collect information about CPU thread API activity. --profile-api-trace none, runtime, driver, all --trace=cuda Turn on/off CUDA runtime and driver API tracing. For Nsight Systems there is no separate CUDA runtime and CUDA driver trace, so selecting runtime or driver is equivalent to selecting all . --profile-from-start on , off if off use --capture-range=cudaProfilerApi Enable/disable profiling from the start of the application. If disabled, the application can use {cu,cuda}Profiler{Start,Stop} to turn on/off profiling. -t --timeout <nanoseconds> default= 0 --duration=seconds If greater than 0, stop the collection and kill the launched application after timeout seconds. nvprof started counting when the CUDA driver is initialized. nsys starts counting immediately. --cpu-profiling on, off --sampling=cpu Turn on/off CPU profiling --openacc-profiling on , off --trace=openacc to turn on Enable/disable recording information from the OpenACC profiling interface. Note: OpenACC profiling interface depends on the presence of the OpenACC runtime. For supported runtimes, see CUDA Trace section of documentation -o --export-profile <filename> --output={filename} and/or --export=sqlite Export named file to be imported or opened in the Nsight Systems GUI. %q{ENV_VAR} in string will be replaced with the set value of the environment variable. If not set this is an error. %h in the string is replaced with the system hostname. %% in the string is replaced with %. %p in the string is not supported currently. Any other character following % is illegal. The default is report1, with the number incrementing to avoid overwriting files, in users working directory. -f --force-overwrite --force-overwrite=true Force overwriting all output files with same name. -h --help --help Print Nsight Systems CLI help -V --version --version Print Nsight Systems CLI version information", "keywords": []}, {"id": 39, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-profile-command-switch-options", "display_name": "CLI Profile Command Switch Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-profile-command-switch-options", "priority": -1, "content": "After choosing the profile command switch, the following options are available. Usage: nsys [global-options] profile [options] <application> [application-arguments] Short Long Possible Parameters Default Switch Description --accelerator-trace none,tegra-accelerators none Collect other accelerators workload trace from the hardware engine units. Available in Nsight Systems Embedded Platforms Edition only. --auto-report-name true, false false Derive report file name from collected data uses details of profiled graphics application. Format: [Process Name][GPU Name][Window Resolution][Graphics API] Timestamp .nsys-rep If true, automatically generate report file names. -b --backtrace auto,fp,lbr,dwarf,none Select the backtrace method to use while sampling. The option \u2018lbr\u2019 uses Intel(c) Corporation\u2019s Last Branch Record registers, available only with Intel(c) CPUs codenamed Haswell and later. The option \u2018fp\u2019 is frame pointer and assumes that frame pointers were enabled during compilation. The option \u2018dwarf\u2019 uses DWARF\u2019s CFI (Call Frame Information). Setting the value to \u2018none\u2019 can reduce collection overhead. -c --capture-range none, cudaProfilerApi, hotkey, nvtx none When --capture-range is used, profiling will start only when appropriate start API or hotkey is invoked. If --capture-range is set to none, start/stop API calls and hotkeys will be ignored. Note: Hotkey works for graphic applications only. --capture-range-end none, stop, stop-shutdown, repeat[:N], repeat-shutdown:N stop-shutdown Specify the desired behavior when a capture range ends. Applicable only when used along with --capture-range option. If none , capture range end will be ignored. If stop , collection will stop at capture range end. Any subsequent capture ranges will be ignored. Target app will continue running. If stop-shutdown , collection will stop at capture range end and session will be shutdown. If repeat[:N] , collection will stop at capture range end and subsequent capture ranges will trigger more collections. Use the optional :N to specify max number of capture ranges to be honored. Any subsequent capture ranges will be ignored once N capture ranges are collected. If repeat-shutdown:N , same behavior as repeat:N but session will be shutdown after N ranges. For stop-shutdown and repeat-shutdown:N , as always, use --kill option to specify whether target app should be terminated when shutting down session. --clock-frequency-changes true, false false Collect clock frequency changes. Available only in Nsight Systems Embedded Platforms Edition and Arm server (SBSA) platforms --command-file < filename > none Open a file that contains profile switches and parse the switches. Note additional switches on the command line will override switches in the file. This flag can be specified more than once. --cpu-cluster-events 0x16, 0x17, \u2026, none none Collect per-cluster Uncore PMU counters. Multiple values can be selected, separated by commas only (no spaces). Use the --cpu-cluster-events=help switch to see the full list of values. Available in Nsight Systems Embedded Platforms Edition only. --cpu-core-events ( Nsight Systems Embedded Platforms Edition ) 0x11,0x13,\u2026,none none Collect per-core PMU counters. Multiple values can be selected, separated by commas only (no spaces). Use the --cpu-core-events=help switch to see the full list of values. --cpu-core-events (not Nsight Systems Embedded Platforms Edition ) \u2018help\u2019 or the end users selected events in the format \u2018x,y\u2019 \u20182\u2019 i.e. Instructions Retired Select the CPU Core events to sample. Use the --cpu-core-events=help switch to see the full list of events and the number of events that can be collected simultaneously. Multiple values can be selected, separated by commas only (no spaces). Use the --event-sample switch to enable. --cpu-socket-events 0x2a, 0x2c, \u2026, none none Collect per-socket Uncore PMU counters. Multiple values can be selected, separated by commas only (no spaces). Use the --cpu-socket-events=help switch to see the full list of values. Available in Nsight Systems Embedded Platforms Edition only. --cpuctxsw process-tree, system-wide, none process-tree Trace OS thread scheduling activity. Select \u2018none\u2019 to disable tracing CPU context switches. Depending on the platform, some values may require admin or root privileges. Note: if the --sample switch is set to a value other than \u2018none\u2019, the --cpuctxsw setting is hardcoded to the same value as the --sample switch. If --sample=none and a target application is launched, the default is \u2018process-tree\u2019, otherwise the default is \u2018none\u2019. Requires --sampling-trigger=perf switch in Nsight Systems Embedded Platforms Edition --cuda-flush-interval milliseconds See Description Set the interval, in milliseconds, when buffered CUDA data is automatically saved to storage. CUDA data buffer saves may cause profiler overhead. Buffer save behavior can be controlled with this switch. If the CUDA flush interval is set to 0 on systems running CUDA 11.0 or newer, buffers are saved when they fill. If a flush interval is set to a non-zero value on such systems, buffers are saved only when the flush interval expires. If a flush interval is set and the profiler runs out of available buffers before the flush interval expires, additional buffers will be allocated as needed. In this case, setting a flush interval can reduce buffer save overhead but increase memory use by the profiler. If the flush interval is set to 0 on systems running older versions of CUDA, buffers are saved at the end of the collection. If the profiler runs out of available buffers, additional buffers are allocated as needed. If a flush interval is set to a non-zero value on such systems, buffers are saved when the flush interval expires. A cuCtxSynchronize call may be inserted into the workflow before the buffers are saved which will cause application overhead. In this case, setting a flush interval can reduce memory use by the profiler but may increase save overhead. For collections over 30 seconds an interval of 10 seconds is recommended. Default is 10000 for Nsight Systems Embedded Platforms Edition and 0 otherwise. --cuda-graph-trace graph, node graph If \u2018graph\u2019 is selected, CUDA graphs will be traced as a whole and node activities will not be collected. This will reduce overhead to a minimum, but requires CUDA driver version 515.43 or higher. If \u2018node\u2019 is selected, node activities will be collected, but CUDA graphs will not be traced as a whole. This may cause significant runtime overhead. Default is \u2018graph\u2019 if available, otherwise default is \u2018node\u2019. --cuda-memory-usage true, false false Track the GPU memory usage by CUDA kernels. Applicable only when CUDA tracing is enabled. Note: This feature may cause significant runtime overhead. --cuda-um-cpu-page-faults true, false false This switch tracks the page faults that occur when CPU code tries to access a memory page that resides on the device. Note that this feature may cause significant runtime overhead. Not available on Nsight Systems Embedded Platforms Edition . --cuda-um-gpu-page-faults true, false false This switch tracks the page faults that occur when GPU code tries to access a memory page that resides on the host. Note that this feature may cause significant runtime overhead. Not available on Nsight Systems Embedded Platforms Edition . --cudabacktrace all, none, kernel, memory, sync, other none When tracing CUDA APIs, enable the collection of a backtrace when a CUDA API is invoked. Significant runtime overhead may occur. Values may be combined using \u2018,\u2019. Each value except \u2018none\u2019 may be appended with a threshold after \u2018:\u2019. Threshold is duration, in nanoseconds, that CUDA APIs must execute before backtraces are collected, e.g. \u2018kernel:500\u2019. Default value for each threshold is 1000ns (1us). Note: CPU sampling must be enabled. Note: Not available on IBM Power targets. -y --delay < seconds > 0 Collection start delay in seconds. -d --duration < seconds > NA Collection duration in seconds, duration must be greater than zero. The launched process will be terminated when the specified profiling duration expires unless the user specifies the --kill none option (details below). --duration-frames 60 <= integer Stop the recording session after this many frames have been captured. Note when it is selected cannot include any other stop options. If not specified, the default is disabled. --dx-force-declare-adapter-removal-support true, false false The Nsight Systems trace initialization involves creating a D3D device and discarding it. Enabling this flag makes a call to DXGIDeclareAdapterRemovalSupport() before device creation. Requires DX11 or DX12 trace to be enabled. --dx12-gpu-workload true, false, individual, batch, none individual If individual or true, trace each DX12 workload\u2019s GPU activity individually. If batch, trace DX12 workloads\u2019 GPU activity in ExecuteCommandLists call batches. If none or false, do not trace DX12 workloads\u2019 GPU activity. Note that this switch is applicable only when --trace=dx12 is specified. This option is only supported on Windows targets. --dx12-wait-calls true, false true If true, trace wait calls that block on fences for DX12. Note that this switch is applicable only when --trace=dx12 is specified. This option is only supported on Windows targets. --el1-sampling true, false false Enable EL1 sampling. Available in Nsight Systems Embedded Platforms Edition only. --el1-sampling-config < filepath config.json > none EL1 sampling config. Available in Nsight Systems Embedded Platforms Edition only. -e --env-var A=B NA Set environment variable(s) for the application process to be launched. Environment variables should be defined as A=B. Multiple environment variables can be specified as A=B,C=D. --etw-provider \u201c<name>,<guid>\u201d, or path to JSON file none Add custom ETW trace provider(s). If you want to specify more attributes than Name and GUID, provide a JSON configuration file as as outlined below. This switch can be used multiple times to add multiple providers. Note: Only available for Windows targets. --event-sample system-wide, none none Use the --cpu-core-events=help and the --os-events=help switches to see the full list of events. If event sampling is enabled and no events are selected, the CPU Core event \u2018Instructions Retired\u2019 is selected by default. Not available on Nsight Systems Embedded Platforms Edition . --event-sampling-frequency Integers from 1 to 20 Hz 3 The sampling frequency used to collect event counts. Minimum event sampling frequency is 1 Hz. Maximum event sampling frequency is 20 Hz. Not available in Nsight Systems Embedded Platforms Edition . --export arrow, hdf, json, sqlite, text, none none Create additional output file(s) based on the data collected. This option can be given more than once. WARNING: If the collection captures a large amount of data, creating the export file may take several minutes to complete. --flush-on-cudaprofilerstop true, false true If set to true, any call to cudaProfilerStop() will cause the CUDA trace buffers to be flushed. Note that the CUDA trace buffers will be flushed when the collection ends, irrespective of the value of this switch. -f --force-overwrite true, false false If true, overwrite all existing result files with same output filename (.qdstrm, .nsys-rep, .arrows, .h5, .json, .sqlite, .txt). --ftrace Collect ftrace events. Argument should list events to collect as: subsystem1/event1,subsystem2/event2. Requires root. No ftrace events are collected by default. Note: Not available on IBM Power targets. --ftrace-keep-user-config Skip initial ftrace setup and collect already configured events. Default resets the ftrace configuration. --gpu-metrics-device GPU ID, help, all, none none Collect GPU Metrics from specified devices. Determine GPU IDs by using --gpu-metrics-device=help switch. --gpu-metrics-frequency integer 10000 Specify GPU Metrics sampling frequency. Minimum supported frequency is 10 (Hz). Maximum supported frequency is 200000 (Hz). --gpu-metrics-set index, alias Specify metric set for GPU Metrics. The argument must be one of indices or aliases reported by --gpu-metrics-set=help switch. If not specified, the default is the first metric set that supports all selected GPUs. --gpuctxsw true,false false Trace GPU context switches. Note that this requires driver r435.17 or later and root permission. Not supported on IBM Power targets. --help <tag> none Print the help message. The option can take one optional argument that will be used as a tag. If a tag is provided, only options relevant to the tag will be printed. --hotkey-capture \u2018F1\u2019 to \u2018F12\u2019 \u2018F12\u2019 Hotkey to trigger the profiling session. Note that this switch is applicable only when --capture-range=hotkey is specified. --ib-switch-congestion-device <IB switch GUIDs> none A comma-separated list of InfiniBand switch GUIDs. Collect InfiniBand switch congestion events from switches identified by the specified GUIDs. This switch can be used multiple times. System scope. Use the \u2013ib-switch-congestion-nic-device, \u2013ib-switch-congestion-percent, and \u2013ib-switch-congestion-threshold-high switches to further control how congestion events are collected. --ib-switch-congestion-nic-device <NIC name> none The name of the NIC (HCA) through which InfiniBand switches will be accessed. By default, the first active NIC will be used. One way to find a NIC\u2019s name is via the ibnetdiscover \u2013Hca_list | grep \u201c$(hostname)\u201d command. Example usage: \u2013ib-switch-congestion-nic-device=mlx5_3 --ib-switch-congestion-percent 1 <= integer <= 100 50 Percent of InfiniBand switch congestion events to be collected. This option enables reducing the network bandwidth consumed by reporting congestion events. --ib-switch-congestion-threshold-high 1 <= integer <= 1023 75 High threshold percentage for InfiniBand switch egress port buffer size. Before a packet leaves an InfiniBand switch, it is stored at an egress port buffer. The buffer\u2019s size is checked and if it exceeds the given threshold percentage, a congestion event is reported. The percentage can be greater than 100. --ib-switch-metrics-device <IB switch GUIDs> none A comma-separated list of InfiniBand switch GUIDs. Collect metrics from the specified InfiniBand switches. This switch can be used multiple times. System scope. -n --inherit-environment true, false true When true, the current environment variables and the tool\u2019s environment variables will be specified for the launched process. When false, only the tool\u2019s environment variables will be specified for the launched process. --injection-use-detours true,false true Use detours for injection. If false, process injection will be performed by windows hooks which allows it to bypass anti-cheat software. --isr true, false false Trace Interrupt Service Routines (ISRs) and Deferred Procedure Calls (DPCs). Requires administrative privileges. Available only on Windows devices. --kill none, sigkill, sigterm, signal number sigterm Send signal to the target application\u2019s process group. Can be used with --duration or range markers. --mpi-impl openmpi,mpich openmpi When using --trace=mpi to trace MPI APIs use --mpi-impl to specify which MPI implementation the application is using. If no MPI implementation is specified, nsys tries to automatically detect it based on the dynamic linker\u2019s search path. If this fails, \u2018openmpi\u2019 is used. Calling --mpi-impl without --trace=mpi is not supported. --nic-metrics true, false false Collect metrics from supported NIC/HCA devices. System scope. Not available on Nsight Systems Embedded Platforms Edition . -p --nvtx-capture range@domain, range, range@* none Specify NVTX range and domain to trigger the profiling session. This option is applicable only when used along with --capture-range=nvtx . --nvtx-domain-exclude default, <domain_names> Choose to exclude NVTX events from a comma separated list of domains. \u2018default\u2019 excludes NVTX events without a domain. A domain with this name or commas in a domain name must be escaped with \u2018\\\u2019. Note: Only one of --nvtx-domain-include and --nvtx-domain-exclude can be used. This option is only applicable when --trace=nvtx is specified. --nvtx-domain-include default, <domain_names> Choose to only include NVTX events from a comma separated list of domains. \u2018default\u2019 filters the NVTX default domain. A domain with this name or commas in a domain name must be escaped with \u2018\\\u2019. Note: Only one of --nvtx-domain-include and --nvtx-domain-exclude can be used. This option is only applicable when --trace=nvtx is specified. --python-nvtx-annotations <json_file> Specify the path to the JSON file containing the requested NVTX annotations. --opengl-gpu-workload true, false true If true, trace the OpenGL workloads\u2019 GPU activity. Note that this switch is applicable only when --trace=opengl is specified. This option is not supported on IBM Power targets. --os-events \u2018help\u2019 or the end users selected events in the format \u2018x,y\u2019 Select the OS events to sample. Use the --os-events=help switch to see the full list of events. Multiple values can be selected, separated by commas only (no spaces). Use the --event-sample switch to enable. Not available on Nsight Systems Embedded Platforms Edition . --osrt-backtrace-depth integer 24 Set the depth for the backtraces collected for OS runtime libraries calls. --osrt-backtrace-stack-size integer 6144 Set the stack dump size, in bytes, to generate backtraces for OS runtime libraries calls. --osrt-backtrace-threshold nanoseconds 80000 Set the duration, in nanoseconds, that all OS runtime libraries calls must execute before backtraces are collected. --osrt-threshold < nanoseconds > 1000 ns Set the duration, in nanoseconds, that Operating System Runtime (osrt) APIs must execute before they are traced. Values significantly less than 1000 may cause significant overhead and result in extremely large result files. Note: Not available for IBM Power targets. -o --output < filename > report# Set report file name. Any %q{ENV_VAR} pattern in the filename will be substituted with the value of the environment variable. Any %h pattern in the filename will be substituted with the hostname of the system. Any %p pattern in the filename will be substituted with the PID of the target process or the PID of the root process if there is a process tree. Any %% pattern in the filename will be substituted with %. Default is report#.{qdstrm,nsys-rep,sqlite,h5,txt,arrows,json} in the working directory. --process-scope main, process-tree, system-wide main Select which process(es) to trace. Available in Nsight Systems Embedded Platforms Edition only. Nsight Systems Workstation Edition will always trace system-wide in this version of the tool. --python-backtrace cuda, none, false none Collect Python backtrace event when tracing the selected API\u2019s trigger. This option is supported on Arm server (SBSA) platforms and x86 Linux targets. Note: the selected API tracing must be enabled. For example, --cudabacktrace must be set when using --python-backtrace=cuda . --python-sampling true, false false Collect Python backtrace sampling events. This option is supported on Arm server (SBSA) platforms, x86 Linux and Windows targets. Note: When profiling Python-only workflows, consider disabling the CPU sampling option to reduce overhead. --python-sampling-frequency 1 < integers < 2000 1000 Specify the Python sampling frequency. The minimum supported frequency is 1Hz. The maximum supported frequency is 2KHz. This option is ignored if the --python-sampling option is set to false. --qnx-kernel-events class/event,event, class/event:mode, class:mode,help,none none Multiple values can be selected, separated by commas only (no spaces). See the --qnx-kernel-events-mode switch description for \u2018:mode\u2019 format. Use the --qnx-kernel-events=help switch to see the full list of values. Example: --qnx-kernel-events=8/1:system:wide,_NTO_TRACE_THREAD:process:fast, \\_NTO_TRACE_KERCALLENTER/\\__KER_BAD,_NTO_TRACE_COMM,13 . Collect QNX kernel events. --qnx-kernel-events-mode system,process,fast,wide system:fast Values are separated by a colon (\u2018:\u2019) only (no spaces). \u2018system\u2019 and \u2018process\u2019 cannot be specified at the same time. \u2018fast\u2019 and \u2018wide\u2019 cannot be specified at the same time. Please check the QNX documentation to determine when to select the \u2018fast\u2019 or \u2018wide\u2019 mode. Specify the default mode for QNX kernel events collection. --resolve-symbols true,false true Resolve symbols of captured samples and backtraces. --retain-etw-files true, false false Retain ETW files generated by the trace, merge and move the files to the output directory. --run-as < username > none Run the target application as the specified username. If not specified, the target application will be run by the same user as Nsight Systems . Requires root privileges. Available for Linux targets only. -s --sample process-tree, system-wide, none process-tree Select how to collect CPU IP/backtrace samples. If \u2018none\u2019 is selected, CPU sampling is disabled. Depending on the platform, some values may require admin or root privileges. If a target application is launched, the default is \u2018process-tree\u2019, otherwise, the default is \u2018none\u2019. Note: \u2018system-wide\u2019 is not available on all platforms. Note: If set to \u2018none\u2019, CPU context switch data will still be collected unless the --cpuctxsw switch is set to \u2018none\u2019. --samples-per-backtrace integer <= 32 1 The number of CPU IP samples collected for every CPU IP/backtrace sample collected. For example, if set to 4, on the fourth CPU IP sample collected, a backtrace will also be collected. Lower values increase the amount of data collected. Higher values can reduce collection overhead and reduce the number of CPU IP samples dropped. If DWARF backtraces are collected, the default is 4, otherwise the default is 1. This option is not available on Nsight Systems Embedded Platforms Edition or on non-Linux targets. --sampling-frequency 100 < integers < 8000 1000 Specify the sampling/backtracing frequency. The minimum supported frequency is 100 Hz. The maximum supported frequency is 8000 Hz. This option is supported only on QNX, Linux for Tegra, and Windows targets. --sampling-period ( Nsight Systems Embedded Platforms Edition ) integer determined dynamically The number of CPU Cycle events counted before a CPU instruction pointer (IP) sample is collected. If configured, backtraces may also be collected. The smaller the sampling period, the higher the sampling rate. Note that smaller sampling periods will increase overhead and significantly increase the size of the result file(s). Requires --sampling-trigger=perf switch. --sampling-period (not Nsight Systems Embedded Platforms Edition ) integer determined dynamically The number of events counted before a CPU instruction pointer (IP) sample is collected. The event used to trigger the collection of a sample is determined dynamically. For example, on Intel based platforms, it will probably be \u201cReference Cycles\u201d and on AMD platforms, \u201cCPU Cycles\u201d. If configured, backtraces may also be collected. The smaller the sampling period, the higher the sampling rate. Note that smaller sampling periods will increase overhead and significantly increase the size of the result file(s). This option is available only on Linux targets. --sampling-trigger timer, sched, perf, cuda timer,sched Specify backtrace collection trigger. Multiple APIs can be selected, separated by commas only (no spaces). Available on Nsight Systems Embedded Platforms Edition targets only. --session-new [a-Z][0-9,a-Z,spaces] profile-<id>-<application> Name the session created by the command. Name must start with an alphabetical character followed by printable or space characters. Any %q{ENV_VAR} pattern will be substituted with the value of the environment variable. Any %h pattern will be substituted with the hostname of the system. Any %% pattern will be substituted with % . -w --show-output true, false true If true, send target process\u2019s stdout and stderr streams to both the console and stdout/stderr files which are added to the report file. If false, only send target process stdout and stderr streams to the stdout/stderr files which are added to the report file. --soc-metrics true,false false Collect SOC Metrics. Available in Nsight Systems Embedded Platforms Edition only. --soc-metrics-frequency integer 10000 Specify SOC Metrics sampling frequency. Minimum supported frequency is \u2018100\u2019 (Hz). Maximum supported frequency is \u20181000000\u2019 (Hz). Available in Nsight Systems Embedded Platforms Edition only. --soc-metrics-set see description see description Specify metric set for SOC Metrics sampling. The option argument must be one of indices or aliases reported by --soc-metrics-set=help switch. Default is the first supported set. Available in Nsight Systems Embedded Platforms Edition only. --start-frame-index 1 <= integer Start the recording session when the frame index reaches the frame number preceding the start frame index. Note when it is selected cannot include any other start options. If not specified, the default is disabled. --stats true, false false Generate summary statistics after the collection. WARNING: When set to true, an SQLite database will be created after the collection. If the collection captures a large amount of data, creating the database file may take several minutes to complete. -x --stop-on-exit true, false true If true, stop collecting automatically when the launched process has exited or when the duration expires - whichever occurs first. If false, duration must be set and the collection stops only when the duration expires. Nsight Systems does not officially support runs longer than 5 minutes. -t --trace cuda, nvtx, cublas, cublas-verbose, cusparse, cusparse-verbose, cudnn, cudla, cudla-verbose, cusolver, cusolver-verbose, opengl, opengl-annotations, openacc, openmp, osrt, mpi, nvvideo, vulkan, vulkan-annotations, dx11, dx11-annotations, dx12, dx12-annotations, oshmem, ucx, wddm, tegra-accelerators, python-gil, none cuda, opengl, nvtx, osrt Select the API(s) to be traced. The osrt switch controls the OS runtime libraries tracing. Multiple APIs can be selected, separated by commas only (no spaces). Since OpenACC and cuXXX APIs are tightly linked with CUDA, selecting one of those APIs will automatically enable CUDA tracing. cublas, cudla, cusparse and cusolver all have XXX-verbose options available. Reflex SDK latency markers will be automatically collected when DX or vulkan API trace is enabled. See information on --mpi-impl option below if mpi is selected. If \u2018<api>-annotations\u2019 is selected, the corresponding API will also be traced. If the none option is selected, no APIs are traced and no other API can be selected. Note: cublas, cudnn, nvvideo, opengl, and vulkan are not available on IBM Power target. cuddn is not available on Windows target. --trace-fork-before-exec true, false false If true, trace any child process after fork and before they call one of the exec functions. Beware, tracing in this interval relies on undefined behavior and might cause your application to crash or deadlock. Note: This option is only available on Linux target platforms. --vsync true, false false Collect vsync events. If collection of vsync events is enabled, display/display_scanline ftrace events will also be captured. Available in Nsight Systems Embedded Platforms Edition only. --vulkan-gpu-workload true, false, individual, batch, none individual If individual or true, trace each Vulkan workload\u2019s GPU activity individually. If batch, trace Vulkan workloads\u2019 GPU activity in vkQueueSubmit call batches. If none or false, do not trace Vulkan workloads\u2019 GPU activity. Note that this switch is applicable only when --trace=vulkan is specified. This option is not supported on QNX. --wait primary,all all If primary, the CLI will wait on the application process termination. If all, the CLI will additionally wait on re-parented processes created by the application. --wddm-additional-events true, false true If true, collect additional range of ETW events, including context status, allocations, sync wait and signal events, etc. Note that this switch is applicable only when --trace=wddm is specified. This option is only supported on Windows targets. --wddm-backtraces true, false false If true, collect backtraces of WDDM events. Disabling this data collection can reduce overhead for certain target applications. Note that this switch is applicable only when --trace=wddm is specified. This option is only supported on Windows targets. --xhv-trace < filepath pct.json > none Collect hypervisor trace. Available in Nsight Systems Embedded Platforms Edition only. --xhv-trace-events all, none, core, sched, irq, trap all Available in Nsight Systems Embedded Platforms Edition only.", "keywords": []}, {"id": 40, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-sessions-command-switch-subcommands", "display_name": "CLI Sessions Command Switch Subcommands", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-sessions-command-switch-subcommands", "priority": -1, "content": "After choosing the sessions command switch, the following subcommands are available. Usage: nsys [global-options] sessions [subcommand] Subcommand Description list List all active sessions including ID, name, and state information", "keywords": []}, {"id": 41, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-sessions-list-command-switch-options", "display_name": "CLI Sessions List Command Switch Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-sessions-list-command-switch-options", "priority": -1, "content": "After choosing the sessions list command switch, the following options are available. Usage: nsys [global-options] sessions list [options] Short Long Possible Parameters Default Switch Description --help <tag> none Print the help message. The option can take one optional argument that will be used as a tag. If a tag is provided, only options relevant to the tag will be printed. -p --show-header true, false true Controls whether a header should appear in the output.", "keywords": []}, {"id": 42, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-shutdown-command-switch-options", "display_name": "CLI Shutdown Command Switch Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-shutdown-command-switch-options", "priority": -1, "content": "After choosing the shutdown command switch, the following options are available. Usage: nsys [global-options] shutdown [options] Short Long Possible Parameters Default Switch Description --help <tag> none Print the help message. The option can take one optional argument that will be used as a tag. If a tag is provided, only options relevant to the tag will be printed. --kill On Linux: one, sigkill, sigterm, signal number On Windows: true, false On Linux: sigterm On Windows: true Send signal to the target application\u2019s process group when shutting down session. --session session identifier none Shutdown the indicated session. The option argument must represent a valid session name or ID as reported by nsys sessions list . Any %q{ENV_VAR} pattern will be substituted with the value of the environment variable. Any %h pattern will be substituted with the hostname of the system. Any %% pattern will be substituted with % .", "keywords": []}, {"id": 43, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-start-command-switch-options", "display_name": "CLI Start Command Switch Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-start-command-switch-options", "priority": -1, "content": "After choosing the start command switch, the following options are available. Usage: nsys [global-options] start [options] Short Long Possible Parameters Default Switch Description --accelerator-trace none,tegra-accelerators none Collect other accelerators workload trace from the hardware engine units. Only available on Nsight Systems Embedded Platforms Edition . -b --backtrace auto,fp,lbr,dwarf,none Select the backtrace method to use while sampling. The option \u2018lbr\u2019 uses Intel(c) Corporation\u2019s Last Branch Record registers, available only with Intel(c) CPUs codenamed Haswell and later. The option \u2018fp\u2019 is frame pointer and assumes that frame pointers were enabled during compilation. The option \u2018dwarf\u2019 uses DWARF\u2019s CFI (Call Frame Information). Setting the value to \u2018none\u2019 can reduce collection overhead. -c --capture-range none, cudaProfilerApi, hotkey, nvtx none When --capture-range is used, profiling will start only when appropriate start API or hotkey is invoked. If --capture-range is set to none, start/stop API calls and hotkeys will be ignored. Note: hotkey works for graphic applications only. CUDA or NVTX tracing must be enabled on the target application for \u2018-c cudaProfilerApi\u2019 or \u2018-c nvtx\u2019 to work. --capture-range-end none, stop, stop-shutdown, repeat[:N], repeat-shutdown:N stop-shutdown Specify the desired behavior when a capture range ends. Applicable only when used along with --capture-range option. If none , capture range end will be ignored. If stop , collection will stop at capture range end. Any subsequent capture ranges will be ignored. Target app will continue running. If stop-shutdown , collection will stop at capture range end and session will be shutdown. If repeat[:N] , collection will stop at capture range end and subsequent capture ranges will trigger more collections. Use the optional :N to specify max number of capture ranges to be honored. Any subsequent capture ranges will be ignored once N capture ranges are collected. If repeat-shutdown:N , same behavior as repeat:N but session will be shutdown after N ranges. For stop-shutdown and repeat-shutdown:N , as always use --kill option to specify whether target app should be terminated when shutting down session. --cpu-core-events (not Nsight Systems Embedded Platforms Edition ) \u2018help\u2019 or the end users selected events in the format \u2018x,y\u2019 \u20182\u2019 i.e. Instructions Retired Select the CPU Core events to sample. Use the --cpu-core-events=help switch to see the full list of events and the number of events that can be collected simultaneously. Multiple values can be selected, separated by commas only (no spaces). Use the --event-sample switch to enable. --cpuctxsw process-tree, system-wide, none process-tree Trace OS thread scheduling activity. Select \u2018none\u2019 to disable tracing CPU context switches. Depending on the platform, some values may require admin or root privileges. Note: if the --sample switch is set to a value other than \u2018none\u2019, the --cpuctxsw setting is hardcoded to the same value as the --sample switch. If --sample=none and a target application is launched, the default is \u2018process-tree\u2019, otherwise the default is \u2018none\u2019. Requires --sampling-trigger=perf switch in Nsight Systems Embedded Platforms Edition . --el1-sampling true, false false Enable EL1 sampling. Available in Nsight Systems Embedded Platforms Edition only. --el1-sampling-config < filepath config.json > none EL1 sampling config. Available in Nsight Systems Embedded Platforms Edition only. --etw-provider \u201c<name>,<guid>\u201d, or path to JSON file none Add custom ETW trace provider(s). If you want to specify more attributes than Name and GUID, provide a JSON configuration file as as outlined below. This switch can be used multiple times to add multiple providers. Note: Only available for Windows targets. --event-sample system-wide, none none Use the --cpu-core-events=help and the --os-events=help switches to see the full list of events. If event sampling is enabled and no events are selected, the CPU Core event \u2018Instructions Retired\u2019 is selected by default. Not available in Nsight Systems Embedded Platforms Edition . --event-sampling-frequency Integers from 1 to 20 Hz 3 The sampling frequency used to collect event counts. Minimum event sampling frequency is 1 Hz. Maximum event sampling frequency is 20 Hz. Not available in Nsight Systems Embedded Platforms Edition . --export arrow, hdf, json, sqlite, text, none none Create additional output file(s) based on the data collected. This option can be given more than once. WARNING: If the collection captures a large amount of data, creating the export file may take several minutes to complete. --flush-on-cudaprofilerstop true, false true If set to true, any call to cudaProfilerStop() will cause the CUDA trace buffers to be flushed. Note that the CUDA trace buffers will be flushed when the collection ends, irrespective of the value of this switch. -f --force-overwrite true, false false If true, overwrite all existing result files with same output filename (.qdstrm, .nsys-rep, .arrows, .hdf, .json, .sqlite, .txt). --ftrace Collect ftrace events. Argument should list events to collect as: subsystem1/event1,subsystem2/event2. Requires root. No ftrace events are collected by default. Note: Not supported on IBM Power targets. --ftrace-keep-user-config true, false false Skip initial ftrace setup and collect already configured events. Default resets the ftrace configuration. --gpu-metrics-device GPU ID, help, all, none none Collect GPU Metrics from specified devices. Determine GPU IDs by using --gpu-metrics-device=help switch. --gpu-metrics-frequency integer 10000 Specify GPU Metrics sampling frequency. Minimum supported frequency is 10 (Hz). Maximum supported frequency is 200000(Hz). --gpu-metrics-set index first Specify metric set for GPU Metrics sampling. The argument must be one of indices reported by --gpu-metrics-set=help switch. Default is the first metric set that supports selected GPU. --gpuctxsw true,false false Trace GPU context switches. Note that this requires driver r435.17 or later and root permission. Not supported on IBM Power targets. --help <tag> none Print the help message. The option can take one optional argument that will be used as a tag. If a tag is provided, only options relevant to the tag will be printed. --ib-switch-congestion-device <IB switch GUIDs> none A comma-separated list of InfiniBand switch GUIDs. Collect InfiniBand switch congestion events from switches identified by the specified GUIDs. This switch can be used multiple times. System scope. Use the \u2013ib-switch-congestion-nic-device, \u2013ib-switch-congestion-percent, and \u2013ib-switch-congestion-threshold-high switches to further control how congestion events are collected. --ib-switch-congestion-nic-device <NIC name> none The name of the NIC (HCA) through which InfiniBand switches will be accessed. By default, the first active NIC will be used. One way to find a NIC\u2019s name is via the ibnetdiscover \u2013Hca_list | grep \u201c$(hostname)\u201d command. Example usage: \u2013ib-switch-congestion-nic-device=mlx5_3 --ib-switch-congestion-percent 1 <= integer <= 100 50 Percent of InfiniBand switch congestion events to be collected. This option enables reducing the network bandwidth consumed by reporting congestion events. --ib-switch-congestion-threshold-high 1 <= integer <= 1023 75 High threshold percentage for InfiniBand switch egress port buffer size. Before a packet leaves an InfiniBand switch, it is stored at an egress port buffer. The buffer\u2019s size is checked and if it exceeds the given threshold percentage, a congestion event is reported. The percentage can be greater than 100. --isr true, false false Trace Interrupt Service Routines (ISRs) and Deferred Procedure Calls (DPCs). Requires administrative privileges. Available only on Windows devices. --nic-metrics true, false false Collect metrics from supported NIC/HCA devices. System scope. Not available on Nsight Systems Embedded Platforms Edition . --os-events \u2018help\u2019 or the end users selected events in the format \u2018x,y\u2019 none Select the OS events to sample. Use the --os-events=help switch to see the full list of events. Multiple values can be selected, separated by commas only (no spaces). Use the --event-sample switch to enable. Not available in Nsight Systems Embedded Platforms Edition . -o --output < filename > report# Set report file name. Any %q{ENV_VAR} pattern in the filename will be substituted with the value of the environment variable. Any %h pattern in the filename will be substituted with the hostname of the system. Any %p pattern in the filename will be substituted with the PID of the target process or the PID of the root process if there is a process tree. Any %% pattern in the filename will be substituted with %. Default is report#.{nsys-rep,sqlite,h5,txt,arrows,json} in the working directory. --process-scope main, process-tree, system-wide main Select which process(es) to trace. Available in Nsight Systems Embedded Platforms Edition only. Nsight Systems Workstation Edition will always trace system-wide in this version of the tool. --retain-etw-files true, false false Retain ETW files generated by the trace, merge and move the files to the output directory. -s --sample process-tree, system-wide, none process-tree Select how to collect CPU IP/backtrace samples. If \u2018none\u2019 is selected, CPU sampling is disabled. Depending on the platform, some values may require admin or root privileges. If a target application is launched, the default is \u2018process-tree\u2019, otherwise, the default is \u2018none\u2019. Note: \u2018system-wide\u2019 is not available on all platforms. Note: If set to \u2018none\u2019, CPU context switch data will still be collected unless the --cpuctxsw switch is set to \u2018none\u2019. --samples-per-backtrace integer <= 32 1 The number of CPU IP samples collected for every CPU IP/backtrace sample collected. For example, if set to 4, on the fourth CPU IP sample collected, a backtrace will also be collected. Lower values increase the amount of data collected. Higher values can reduce collection overhead and reduce the number of CPU IP samples dropped. If DWARF backtraces are collected, the default is 4, otherwise the default is 1. This option is not available on Nsight Systems Embedded Platforms Edition or on non-Linux targets. --sampling-frequency integers between 100 and 8000 1000 Specify the sampling/backtracing frequency. The minimum supported frequency is 100 Hz. The maximum supported frequency is 8000 Hz. This option is supported only on QNX, Linux for Tegra, and Windows targets. Requires --sampling-trigger=perf switch in Nsight Systems Embedded Platforms Edition --sampling-period ( Nsight Systems Embedded Platforms Edition ) integer determined dynamically The number of CPU Cycle events counted before a CPU instruction pointer (IP) sample is collected. If configured, backtraces may also be collected. The smaller the sampling period, the higher the sampling rate. Note that smaller sampling periods will increase overhead and significantly increase the size of the result file(s). Requires --sampling-trigger=perf switch. --sampling-period (not Nsight Systems Embedded Platforms Edition ) integer determined dynamically The number of events counted before a CPU instruction pointer (IP) sample is collected. The event used to trigger the collection of a sample is determined dynamically. For example, on Intel based platforms, it will probably be \u201cReference Cycles\u201d and on AMD platforms, \u201cCPU Cycles\u201d. If configured, backtraces may also be collected. The smaller the sampling period, the higher the sampling rate. Note that smaller sampling periods will increase overhead and significantly increase the size of the result file(s). This option is available only on Linux targets. --sampling-trigger timer, sched, perf, cuda timer,sched Specify backtrace collection trigger. Multiple APIs can be selected, separated by commas only (no spaces). Available on Nsight Systems Embedded Platforms Edition targets only. --session session identifier none Start the application in the indicated session. The option argument must represent a valid session name or ID as reported by nsys sessions list . Any %q{ENV_VAR} pattern will be substituted with the value of the environment variable. Any %h pattern will be substituted with the hostname of the system. Any %% pattern will be substituted with % . --session-new [a-Z][0-9,a-Z,spaces] [default] Start the application in a new session. Name must start with an alphabetical character followed by printable or space characters. Any %q{ENV_VAR} pattern will be substituted with the value of the environment variable. Any %h pattern will be substituted with the hostname of the system. Any %% pattern will be substituted with % . --soc-metrics true,false false Collect SOC Metrics. Available in Nsight Systems Embedded Platforms Edition only. --soc-metrics-frequency integer 10000 Specify SOC Metrics sampling frequency. Minimum supported frequency is \u2018100\u2019 (Hz). Maximum supported frequency is \u20181000000\u2019 (Hz). Available in Nsight Systems Embedded Platforms Edition only. --soc-metrics-set see description see description Specify metric set for SOC Metrics sampling. The option argument must be one of indices or aliases reported by --soc-metrics-set=help switch. Default is the first supported set. Available in Nsight Systems Embedded Platforms Edition only. --stats true, false false Generate summary statistics after the collection. WARNING: When set to true, an SQLite database will be created after the collection. If the collection captures a large amount of data, creating the database file may take several minutes to complete. -x --stop-on-exit true, false true If true, stop collecting automatically when all tracked processes have exited or when stop command is issued - whichever occurs first. If false, stop only on stop command. Note: When this is true, stop command is optional. Nsight Systems does not officially support runs longer than 5 minutes. --vsync true, false false Collect vsync events. If collection of vsync events is enabled, display/display_scanline ftrace events will also be captured. Available in Nsight Systems Embedded Platforms Edition only. --xhv-trace < filepath pct.json > none Collect hypervisor trace. Available in Nsight Systems Embedded Platforms Edition only. --xhv-trace-events all, none, core, sched, irq, trap all Available in Nsight Systems Embedded Platforms Edition only.", "keywords": []}, {"id": 44, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-stats-command-switch-options", "display_name": "CLI Stats Command Switch Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-stats-command-switch-options", "priority": -1, "content": "The nsys stats command generates a series of summary or trace reports. These reports can be output to the console, or to individual files, or piped to external processes. Reports can be rendered in a variety of different output formats, from human readable columns of text, to formats more appropriate for data exchange, such as CSV. Reports are generated from an SQLite export of a .nsys-rep file. If a .nsys-rep file is specified, Nsight Systems will look for an accompanying SQLite file and use it. If no SQLite file exists, one will be exported and created. Individual reports are generated by calling out to scripts that read data from the SQLite file and return their report data in CSV format. Nsight Systems ingests this data and formats it as requested, then displays the data to the console, writes it to a file, or pipes it to an external process. Adding new reports is as simple as writing a script that can read the SQLite file and generate the required CSV output. See the shipped scripts as an example. Both reports and formatters may take arguments to tweak their processing. For details on shipped scripts and formatters, see Report Scripts topic. Reports are processed using a three-tuple that consists of 1) the requested report (and any arguments), 2) the presentation format (and any arguments), and 3) the output (filename, console, or external process). The first report specified uses the first format specified, and is presented via the first output specified. The second report uses the second format for the second output, and so forth. If more reports are specified than formats or outputs, the format and/or output list is expanded to match the number of provided reports by repeating the last specified element of the list (or the default, if nothing was specified). nsys stats is a very powerful command and can handle complex argument structures, please see the topic below on Example Stats Command Sequences. After choosing the stats command switch, the following options are available. Usage: nsys [global-options] stats [options] [input-file] Short Long Possible Parameters Default Switch Description --help <tag> none Print the help message. The option can take one optional argument that will be used as a tag. If a tag is provided, only options relevant to the tag will be printed. -f --format column, table, csv, tsv, json, hdoc, htable, . Specify the output format. The special name \u201c.\u201d indicates the default format for the given output. The default format for console is column, while files and process outputs default to csv. This option may be used multiple times. Multiple formats may also be specified using a comma-separated list (<name[:args\u2026][,name[:args\u2026]\u2026]>). See Report Scripts for options available with each format. --force-export true, false false Force a re-export of the SQLite file from the specified .nsys-rep file, even if an SQLite file already exists. --force-overwrite true, false false Overwrite any existing report file(s). --help-formats <format_name>, ALL, [none] none With no argument, give a summary of the available output formats. If a format name is given, a more detailed explanation of that format is displayed. If ALL is given, a more detailed explanation of all available formats is displayed. --help-reports <report_name>, ALL, [none] none With no argument, list a summary of the available summary and trace reports. If a report name is given, a more detailed explanation of the report is displayed. If ALL is given, a more detailed explanation of all available reports is displayed. -o --output -, @<command>, <basename>, . - Specify the output mechanism. There are three output mechanisms: print to console, output to file, or output to command. This option may be used multiple times. Multiple outputs may also be specified using a comma-separated list. If the given output name is \u201c-\u201d, the output will be displayed on the console. If the output name starts with \u201c@\u201d, the output designates a command to run. The nsys command will be executed and the analysis output will be piped into the command. Any other output is assumed to be the base path and name for a file. If a file basename is given, the filename used will be: <basename>_<analysis&args>.<output_format>. The default base (including path) is the name of the SQLite file (as derived from the input file or --sqlite option), minus the extension. The output \u201c.\u201d can be used to indicate the analysis should be output to a file, and the default basename should be used. To write one or more analysis outputs to files using the default basename, use the option: --output . If the output starts with \u201c@\u201d, the nsys command output is piped to the given command. The command is run, and the output is piped to the command\u2019s stdin (standard-input). The command\u2019s stdout and stderr remain attached to the console, so any output will be displayed directly to the console. Be aware there are some limitations in how the command string is parsed. No shell expansions (including *, ?, [], and ~) are supported. The command cannot be piped to another command, nor redirected to a file using shell syntax. The command and command arguments are split on whitespace, and no quotes (within the command syntax) are supported. For commands that require complex command line syntax, it is suggested that the command be put in a shell script file, and that be designated as the output command. -q --quiet Do not display verbose messages, only display errors. -r --report See Report Scripts Specify the report(s) to generate, including any arguments. This option may be used multiple times. Multiple reports may also be specified using a comma-separated list (<name[:args\u2026][,name[:args\u2026]\u2026]>). If no reports are specified, the following will be used as the default report set: nvtx_sum, osrt_sum, cuda_api_sum, cuda_gpu_kern_sum, cuda_gpu_mem_time_sum, cuda_gpu_mem_size_sum, openmp_sum, opengl_khr_range_sum, opengl_khr_gpu_range_sum, vulkan_marker_sum, vulkan_gpu_marker_sum, dx11_pix_sum, dx12_gpu_marker_sum, dx12_pix_sum, wddm_queue_sum, um_sum, um_total_sum, um_cpu_page_faults_sum, openacc_sum. See Report Scripts section for details about existing built-in scripts and how to make your own. --report-dir <path> Add a directory to the path used to find report scripts. This is usually only needed if you have one or more directories with personal scripts. This option may be used multiple times. Each use adds a new directory to the end of the path. A search path can also be defined using the environment variable \u201cNSYS_STATS_REPORT_PATH\u201d. Directories added this way will be added after the application flags. The last two entries in the path will always be the current working directory, followed by the directory containing the shipped nsys reports. --sqlite <file.sqlite> Specify the SQLite export filename. If this file exists, it will be used. If this file doesn\u2019t exist (or if --force-export was given) this file will be created from the specified .nsys-rep file before processing. This option cannot be used if the specified input file is also an SQLite file. --timeunit nsec, nanoseconds, usec, microseconds, msec, milliseconds, seconds nanoseconds Set basic unit of time. The argument of the switch is matched by using the longest prefix matching. Meaning that it is not necessary to write a whole word as the switch argument. It is similar to passing a \u201c:time=<unit>\u201d argument to every formatter, although the formatter uses more strict naming conventions. See nsys stats --help-formats column for more detailed information on unit conversion.", "keywords": []}, {"id": 45, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-status-command-switch-options", "display_name": "CLI Status Command Switch Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-status-command-switch-options", "priority": -1, "content": "The nsys status command returns the current state of the CLI. After choosing the status command switch, the following options are available. Usage: nsys [global-options] status [options] Short Long Possible Parameters Default Switch Description --all Prints information for all the available profiling environments. -e --environment Returns information about the system regarding suitability of the profiling environment. --help <tag> none Print the help message. The option can take one optional argument that will be used as a tag. If a tag is provided, only options relevant to the tag will be printed. -n --network Returns information about the system regarding suitability of the network profiling environment. --session session identifier none Print the status of the indicated session. The option argument must represent a valid session name or ID as reported by nsys sessions list . Any %q{ENV_VAR} pattern will be substituted with the value of the environment variable. Any %h pattern will be substituted with the hostname of the system. Any %% pattern will be substituted with % .", "keywords": []}, {"id": 46, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-stop-command-switch-options", "display_name": "CLI Stop Command Switch Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-stop-command-switch-options", "priority": -1, "content": "After choosing the stop command switch, the following options are available. Usage: nsys [global-options] stop [options] Short Long Possible Parameters Default Switch Description --help <tag> none Print the help message. The option can take one optional argument that will be used as a tag. If a tag is provided, only options relevant to the tag will be printed. --session session identifier none Stop the indicated session. The option argument must represent a valid session name or ID as reported by nsys sessions list . Any %q{ENV_VAR} pattern will be substituted with the value of the environment variable. Any %h pattern will be substituted with the hostname of the system. Any %% pattern will be substituted with % .", "keywords": []}, {"id": 47, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cli-troubleshooting", "display_name": "CLI Troubleshooting", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cli-troubleshooting", "priority": -1, "content": "If you have collected a report file using the CLI and the report will not open in the GUI, check to see that your GUI version is the same or greater than the CLI version you used. If it is not, download a new version of the Nsight Systems GUI and you will be able to load and visualize your report. This situation occurs most frequently when you update Nsight Systems using a CLI only package, such as the package available from the NVIDIA HPC SDK.", "keywords": []}, {"id": 48, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#command-line-options", "display_name": "Command Line Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "command-line-options", "priority": -1, "content": "The Nsight Systems command lines can have one of two forms: nsys [global_option] or nsys [command_switch][optional command_switch_options][application] [optional application_options] All command line options are case sensitive. For command switch options, when short options are used, the parameters should follow the switch after a space; e.g. -s process-tree . When long options are used, the switch should be followed by an equal sign and then the parameter(s); e.g. --sample=process-tree . For this version of Nsight Systems , if you launch a process from the command line to begin analysis, the launched process will be terminated when collection is complete, including runs with --duration set, unless the user specifies the --kill none option (details below). The exception is that if the user uses NVTX, cudaProfilerStart/Stop, or hotkeys to control the duration, the application will continue unless --kill is set. The Nsight Systems CLI supports concurrent analysis by using sessions. Each Nsight Systems session is defined by a sequence of CLI commands that define one or more collections (e.g. when and what data is collected). A session begins with either a start, launch, or profile command. A session ends with a shutdown command, when a profile command terminates, or, if requested, when all the process tree(s) launched in the session exit. Multiple sessions can run concurrently on the same system.", "keywords": []}, {"id": 49, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#commands", "display_name": "Commands", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "commands", "priority": -1, "content": "Info To find out report\u2019s start and end time use info command. Usage: ImportNvtxt --cmd info -i [--input] arg Example: ImportNvtxt info Report.nsys-rep Analysis start (ns) 83501026500000 Analysis end (ns) 83506375000000 Create You can create a report file using existing NVTXT with create command. Usage: ImportNvtxt --cmd create -n [--nvtxt] arg -o [--output] arg [-m [--mode] mode_name mode_args] Available modes are: lerp \u2014 insert with linear interpolation. lin \u2014 insert with linear equation. Usage for lerp mode is: --mode lerp --ns_a arg --ns_b arg [--nvtxt_a arg --nvtxt_b arg] with: ns_a \u2014 a nanoseconds value. ns_b \u2014 a nanoseconds value (greater than ns_a ). nvtxt_a \u2014 an nvtxt file\u2019s time unit value corresponding to ns_a nanoseconds. nvtxt_b \u2014 an nvtxt file\u2019s time unit value corresponding to ns_b nanoseconds. If nvtxt_a and nvtxt_b are not specified, they are respectively set to nvtxt file\u2019s minimum and maximum time value. Usage for lin mode is: --mode lin --ns_a arg --freq arg [--nvtxt_a arg] with: ns_a \u2014 a nanoseconds value. freq \u2014 the nvtxt file\u2019s timer frequency. nvtxt_a \u2014 an nvtxt file\u2019s time unit value corresponding to ns_a nanoseconds. If nvtxt_a is not specified, it is set to nvtxt file\u2019s minimum time value. Examples: ImportNvtxt --cmd create -n Sample.nvtxt -o Report.nsys-rep The output will be a new generated report file which can be opened and viewed by Nsight Systems . Merge To merge NVTXT file with an existing report file use merge command. Usage: ImportNvtxt --cmd merge -i [--input] arg -n [--nvtxt] arg -o [--output] arg [-m [--mode] mode_name mode_args] Available modes are: lerp \u2014 insert with linear interpolation. lin \u2014 insert with linear equation. Usage for lerp mode is: --mode lerp --ns_a arg --ns_b arg [--nvtxt_a arg --nvtxt_b arg] with: ns_a \u2014 a nanoseconds value. ns_b \u2014 a nanoseconds value (greater than ns_a ). nvtxt_a \u2014 an nvtxt file\u2019s time unit value corresponding to ns_a nanoseconds. nvtxt_b \u2014 an nvtxt file\u2019s time unit value corresponding to ns_b nanoseconds. If nvtxt_a and nvtxt_b are not specified, they are respectively set to nvtxt file\u2019s minimum and maximum time value. Usage for lin mode is: --mode lin --ns_a arg --freq arg [--nvtxt_a arg] with: ns_a \u2014 a nanoseconds value. freq \u2014 the nvtxt file\u2019s timer frequency. nvtxt_a \u2014 an nvtxt file\u2019s time unit value corresponding to ns_a nanoseconds. If nvtxt_a is not specified, it is set to nvtxt file\u2019s minimum time value. Time values in <filename.nvtxt> are assumed to be nanoseconds if no mode specified. Example ImportNvtxt --cmd merge -i Report.nsys-rep -n Sample.nvtxt -o NewReport.nsys-rep", "keywords": []}, {"id": 50, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#common-sqlite-examples", "display_name": "Common SQLite Examples", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "common-sqlite-examples", "priority": -1, "content": "Common Helper Commands When utilizing sqlite3 command line tool, it\u2019s helpful to have data printed as named columns, this can be done with: .mode column .headers on Default column width is determined by the data in the first row of results. If this doesn\u2019t work out well, you can specify widths manually. .width 10 20 50 Obtaining Sample Report CLI interface of Nsight Systems was used to profile radixSortThrust CUDA sample, then the resulting .nsys-rep file was exported using the nsys export. nsys profile --trace=cuda,osrt radixSortThrust nsys export --type sqlite report1.nsys-rep Serialized Process and Thread Identifiers Nsight Systems stores identifiers where events originated in serialized form. For events that have globalTid or globalPid fields exported, use the following code to extract numeric TID and PID. SELECT globalTid / 0x1000000 % 0x1000000 AS PID, globalTid % 0x1000000 AS TID FROM TABLE_NAME; Note: globalTid field includes both TID and PID values, while globalPid only containes the PID value. Correlate CUDA Kernel Launches With CUDA API Kernel Launches ALTER TABLE CUPTI_ACTIVITY_KIND_RUNTIME ADD COLUMN name TEXT; ALTER TABLE CUPTI_ACTIVITY_KIND_RUNTIME ADD COLUMN kernelName TEXT; UPDATE CUPTI_ACTIVITY_KIND_RUNTIME SET kernelName = (SELECT value FROM StringIds JOIN CUPTI_ACTIVITY_KIND_KERNEL AS cuda_gpu ON cuda_gpu.shortName = StringIds.id AND CUPTI_ACTIVITY_KIND_RUNTIME.correlationId = cuda_gpu.correlationId); UPDATE CUPTI_ACTIVITY_KIND_RUNTIME SET name = (SELECT value FROM StringIds WHERE nameId = StringIds.id); Select 10 longest CUDA API ranges that resulted in kernel execution. SELECT name, kernelName, start, end FROM CUPTI_ACTIVITY_KIND_RUNTIME WHERE kernelName IS NOT NULL ORDER BY end - start LIMIT 10; Results: name kernelName start end ---------------------- ----------------------- ---------- ---------- cudaLaunchKernel_v7000 RadixSortScanBinsKernel 658863435 658868490 cudaLaunchKernel_v7000 RadixSortScanBinsKernel 609755015 609760075 cudaLaunchKernel_v7000 RadixSortScanBinsKernel 632683286 632688349 cudaLaunchKernel_v7000 RadixSortScanBinsKernel 606495356 606500439 cudaLaunchKernel_v7000 RadixSortScanBinsKernel 603114486 603119586 cudaLaunchKernel_v7000 RadixSortScanBinsKernel 802729785 802734906 cudaLaunchKernel_v7000 RadixSortScanBinsKernel 593381170 593386294 cudaLaunchKernel_v7000 RadixSortScanBinsKernel 658759955 658765090 cudaLaunchKernel_v7000 RadixSortScanBinsKernel 681549917 681555059 cudaLaunchKernel_v7000 RadixSortScanBinsKernel 717812527 717817671 Remove Ranges Overlapping With Overhead Use the this query to count CUDA API ranges overlapping with the overhead ones. Replace \u201cSELECT COUNT(*)\u201d with \u201cDELETE\u201d to remove such ranges. SELECT COUNT(*) FROM CUPTI_ACTIVITY_KIND_RUNTIME WHERE rowid IN ( SELECT cuda.rowid FROM PROFILER_OVERHEAD as overhead INNER JOIN CUPTI_ACTIVITY_KIND_RUNTIME as cuda ON (cuda.start BETWEEN overhead.start and overhead.end) OR (cuda.end BETWEEN overhead.start and overhead.end) OR (cuda.start < overhead.start AND cuda.end > overhead.end) ); Results: COUNT(*) ---------- 1095 Find CUDA API Calls That Resulted in Original Graph Node Creation. SELECT graph.graphNodeId, api.start, graph.start as graphStart, api.end, api.globalTid, api.correlationId, api.globalTid, (SELECT value FROM StringIds where api.nameId == id) as name FROM CUPTI_ACTIVITY_KIND_RUNTIME as api JOIN ( SELECT start, graphNodeId, globalTid from CUDA_GRAPH_EVENTS GROUP BY graphNodeId HAVING COUNT(originalGraphNodeId) = 0 ) as graph ON api.globalTid == graph.globalTid AND api.start < graph.start AND api.end > graph.start ORDER BY graphNodeId; Results: graphNodeId start graphStart end globalTid correlationId globalTid name ----------- ---------- ---------- ---------- --------------- ------------- --------------- ----------------------------- 1 584366518 584378040 584379102 281560221750233 109 281560221750233 cudaGraphAddMemcpyNode_v10000 2 584379402 584382428 584383139 281560221750233 110 281560221750233 cudaGraphAddMemsetNode_v10000 3 584390663 584395352 584396053 281560221750233 111 281560221750233 cudaGraphAddKernelNode_v10000 4 584396314 584397857 584398438 281560221750233 112 281560221750233 cudaGraphAddMemsetNode_v10000 5 584398759 584400311 584400812 281560221750233 113 281560221750233 cudaGraphAddKernelNode_v10000 6 584401083 584403047 584403527 281560221750233 114 281560221750233 cudaGraphAddMemcpyNode_v10000 7 584403928 584404920 584405491 281560221750233 115 281560221750233 cudaGraphAddHostNode_v10000 29 632107852 632117921 632121407 281560221750233 144 281560221750233 cudaMemcpyAsync_v3020 30 632122168 632125545 632127989 281560221750233 145 281560221750233 cudaMemsetAsync_v3020 31 632131546 632133339 632135584 281560221750233 147 281560221750233 cudaMemsetAsync_v3020 34 632162514 632167393 632169297 281560221750233 151 281560221750233 cudaMemcpyAsync_v3020 35 632170068 632173334 632175388 281560221750233 152 281560221750233 cudaLaunchHostFunc_v10000 Backtraces for OSRT Ranges Adding text columns makes results of the query below more human-readable. ALTER TABLE OSRT_API ADD COLUMN name TEXT; UPDATE OSRT_API SET name = (SELECT value FROM StringIds WHERE OSRT_API.nameId = StringIds.id); ALTER TABLE OSRT_CALLCHAINS ADD COLUMN symbolName TEXT; UPDATE OSRT_CALLCHAINS SET symbolName = (SELECT value FROM StringIds WHERE symbol = StringIds.id); ALTER TABLE OSRT_CALLCHAINS ADD COLUMN moduleName TEXT; UPDATE OSRT_CALLCHAINS SET moduleName = (SELECT value FROM StringIds WHERE module = StringIds.id); Print backtrace of the longest OSRT range SELECT globalTid / 0x1000000 % 0x1000000 AS PID, globalTid % 0x1000000 AS TID, start, end, name, callchainId, stackDepth, symbolName, moduleName FROM OSRT_API LEFT JOIN OSRT_CALLCHAINS ON callchainId == OSRT_CALLCHAINS.id WHERE OSRT_API.rowid IN (SELECT rowid FROM OSRT_API ORDER BY end - start DESC LIMIT 1) ORDER BY stackDepth LIMIT 10; Results: PID TID start end name callchainId stackDepth symbolName moduleName ---------- ---------- ---------- ---------- ---------------------- ----------- ---------- ------------------------------ ---------------------------------------- 19163 19176 360897690 860966851 pthread_cond_timedwait 88 0 pthread_cond_timedwait@GLIBC_2 /lib/x86_64-linux-gnu/libpthread-2.27.so 19163 19176 360897690 860966851 pthread_cond_timedwait 88 1 0x7fbc983b7227 /usr/lib/x86_64-linux-gnu/libcuda.so.418 19163 19176 360897690 860966851 pthread_cond_timedwait 88 2 0x7fbc9835d5c7 /usr/lib/x86_64-linux-gnu/libcuda.so.418 19163 19176 360897690 860966851 pthread_cond_timedwait 88 3 0x7fbc983b64a8 /usr/lib/x86_64-linux-gnu/libcuda.so.418 19163 19176 360897690 860966851 pthread_cond_timedwait 88 4 start_thread /lib/x86_64-linux-gnu/libpthread-2.27.so 19163 19176 360897690 860966851 pthread_cond_timedwait 88 5 __clone /lib/x86_64-linux-gnu/libc-2.27.so Profiled processes output streams ALTER TABLE ProcessStreams ADD COLUMN filename TEXT; UPDATE ProcessStreams SET filename = (SELECT value FROM StringIds WHERE ProcessStreams.filenameId = StringIds.id); ALTER TABLE ProcessStreams ADD COLUMN content TEXT; UPDATE ProcessStreams SET content = (SELECT value FROM StringIds WHERE ProcessStreams.contentId = StringIds.id); Select all collected stdout and stderr streams. select globalPid / 0x1000000 % 0x1000000 AS PID, filename, content from ProcessStreams; Results: PID filename content ---------- ------------------------------------------------------- -------------------------------------------------------------------------------------------------------------------- 19163 /tmp/nvidia/nsight_systems/streams/pid_19163_stdout.log /home/user_name/NVIDIA_CUDA-10.1_Samples/6_Advanced/radixSortThrust/radixSortThrust Starting... GPU Device 0: "Quadro P2000" with compute capability 6.1 Sorting 1048576 32-bit unsigned int keys and values radixSortThrust, Throughput = 401.0872 MElements/s, Time = 0.00261 s, Size = 1048576 elements Test passed 19163 /tmp/nvidia/nsight_systems/streams/pid_19163_stderr.log Thread Summary Please note, that Nsight Systems applies additional logic during sampling events processing to work around lost events. This means that the results of the below query might differ slightly from the ones shown in \u201cAnalysis summary\u201d tab. Thread summary calculated using CPU cycles (when available). SELECT globalTid / 0x1000000 % 0x1000000 AS PID, globalTid % 0x1000000 AS TID, ROUND(100.0 * SUM(cpuCycles) / ( SELECT SUM(cpuCycles) FROM COMPOSITE_EVENTS GROUP BY globalTid / 0x1000000000000 % 0x100 ), 2 ) as CPU_utilization, (SELECT value FROM StringIds WHERE id = ( SELECT nameId FROM ThreadNames WHERE ThreadNames.globalTid = COMPOSITE_EVENTS.globalTid ) ) as thread_name FROM COMPOSITE_EVENTS GROUP BY globalTid ORDER BY CPU_utilization DESC LIMIT 10; Results: PID TID CPU_utilization thread_name ---------- ---------- --------------- --------------- 19163 19163 98.4 radixSortThrust 19163 19168 1.35 CUPTI worker th 19163 19166 0.25 [NS] Thread running time may be calculated using scheduling data, when PMU counter data was not collected. CREATE INDEX sched_start ON SCHED_EVENTS (start); CREATE TABLE CPU_USAGE AS SELECT first.globalTid as globalTid, (SELECT nameId FROM ThreadNames WHERE ThreadNames.globalTid = first.globalTid) as nameId, sum(second.start - first.start) as total_duration, count() as ranges_count FROM SCHED_EVENTS as first LEFT JOIN SCHED_EVENTS as second ON second.rowid = ( SELECT rowid FROM SCHED_EVENTS WHERE start > first.start AND globalTid = first.globalTid ORDER BY start ASC LIMIT 1 ) WHERE first.isSchedIn != 0 GROUP BY first.globalTid ORDER BY total_duration DESC; SELECT globalTid / 0x1000000 % 0x1000000 AS PID, globalTid % 0x1000000 AS TID, (SELECT value FROM StringIds where nameId == id) as thread_name, ROUND(100.0 * total_duration / (SELECT SUM(total_duration) FROM CPU_USAGE), 2) as CPU_utilization FROM CPU_USAGE ORDER BY CPU_utilization DESC; Results: PID TID thread_name CPU_utilization ---------- ---------- --------------- --------------- 19163 19163 radixSortThrust 93.74 19163 19169 radixSortThrust 3.22 19163 19168 CUPTI worker th 2.46 19163 19166 [NS] 0.44 19163 19172 radixSortThrust 0.07 19163 19167 [NS Comms] 0.05 19163 19176 radixSortThrust 0.02 19163 19170 radixSortThrust 0.0 Function Table These examples demonstrate how to calculate Flat and BottomUp (for top level only) views statistics. To set up: ALTER TABLE SAMPLING_CALLCHAINS ADD COLUMN symbolName TEXT; UPDATE SAMPLING_CALLCHAINS SET symbolName = (SELECT value FROM StringIds WHERE symbol = StringIds.id); ALTER TABLE SAMPLING_CALLCHAINS ADD COLUMN moduleName TEXT; UPDATE SAMPLING_CALLCHAINS SET moduleName = (SELECT value FROM StringIds WHERE module = StringIds.id); To get flat view: SELECT symbolName, moduleName, ROUND(100.0 * sum(cpuCycles) / (SELECT SUM(cpuCycles) FROM COMPOSITE_EVENTS), 2) AS flatTimePercentage FROM SAMPLING_CALLCHAINS LEFT JOIN COMPOSITE_EVENTS ON SAMPLING_CALLCHAINS.id == COMPOSITE_EVENTS.id GROUP BY symbol, module ORDER BY flatTimePercentage DESC LIMIT 5; To get BottomUp view (top level only): SELECT symbolName, moduleName, ROUND(100.0 * sum(cpuCycles) / (SELECT SUM(cpuCycles) FROM COMPOSITE_EVENTS), 2) AS selfTimePercentage FROM SAMPLING_CALLCHAINS LEFT JOIN COMPOSITE_EVENTS ON SAMPLING_CALLCHAINS.id == COMPOSITE_EVENTS.id WHERE stackDepth == 0 GROUP BY symbol, module ORDER BY selfTimePercentage DESC LIMIT 5; Results: symbolName moduleName flatTimePercentage ----------- ----------- ------------------ [Max depth] [Max depth] 99.92 thrust::zip /home/user_ 24.17 thrust::zip /home/user_ 24.17 thrust::det /home/user_ 24.17 thrust::det /home/user_ 24.17 symbolName moduleName selfTimePercentage -------------- ------------------------------------------- ------------------ 0x7fbc984982b6 /usr/lib/x86_64-linux-gnu/libcuda.so.418.39 5.29 0x7fbc982d0010 /usr/lib/x86_64-linux-gnu/libcuda.so.418.39 2.81 thrust::iterat /home/user_name/NVIDIA_CUDA-10.1_Samples/6_ 2.23 thrust::iterat /home/user_name/NVIDIA_CUDA-10.1_Samples/6_ 1.55 void thrust::i /home/user_name/NVIDIA_CUDA-10.1_Samples/6_ 1.55 DX12 API Frame Duration Histogram The example demonstrates how to calculate DX12 CPU frames durartion and construct a histogram out of it. CREATE INDEX DX12_API_ENDTS ON DX12_API (end); CREATE TEMP VIEW DX12_API_FPS AS SELECT end AS start, (SELECT end FROM DX12_API WHERE end > outer.end AND nameId == (SELECT id FROM StringIds WHERE value == "IDXGISwapChain::Present") ORDER BY end ASC LIMIT 1) AS end FROM DX12_API AS outer WHERE nameId == (SELECT id FROM StringIds WHERE value == "IDXGISwapChain::Present") ORDER BY end; Number of frames with a duration of [X, X + 1) milliseconds. SELECT CAST((end - start) / 1000000.0 AS INT) AS duration_ms, count(*) FROM DX12_API_FPS WHERE end IS NOT NULL GROUP BY duration_ms ORDER BY duration_ms; Results: duration_ms count(*) ----------- ---------- 3 1 4 2 5 7 6 153 7 19 8 116 9 16 10 8 11 2 12 2 13 1 14 4 16 3 17 2 18 1 GPU Context Switch Events Enumeration GPU context duration is between first BEGIN and a matching END event. SELECT (CASE tag WHEN 8 THEN "BEGIN" WHEN 7 THEN "END" END) AS tag, globalPid / 0x1000000 % 0x1000000 AS PID, vmId, seqNo, contextId, timestamp, gpuId FROM GPU_CONTEXT_SWITCH_EVENTS WHERE tag in (7, 8) ORDER BY seqNo LIMIT 10; Results: tag PID vmId seqNo contextId timestamp gpuId ---------- ---------- ---------- ---------- ---------- ---------- ---------- BEGIN 23371 0 0 1048578 56759171 0 BEGIN 23371 0 1 1048578 56927765 0 BEGIN 23371 0 3 1048578 63799379 0 END 23371 0 4 1048578 63918806 0 BEGIN 19397 0 5 1048577 64014692 0 BEGIN 19397 0 6 1048577 64250369 0 BEGIN 19397 0 8 1048577 1918310004 0 END 19397 0 9 1048577 1918521098 0 BEGIN 19397 0 10 1048577 2024164744 0 BEGIN 19397 0 11 1048577 2024358650 0 Resolve NVTX Category Name The example demonstrates how to resolve NVTX category name for NVTX marks and ranges. WITH event AS ( SELECT * FROM NVTX_EVENTS WHERE eventType IN (34, 59, 60) -- mark, push/pop, start/end ), category AS ( SELECT category, domainId, text AS categoryName FROM NVTX_EVENTS WHERE eventType == 33 -- new category ) SELECT start, end, globalTid, eventType, domainId, category, categoryName, text FROM event JOIN category USING (category, domainId) ORDER BY start; Results: start end globalTid eventType domainId category categoryName text ---------- ---------- --------------- ---------- ---------- ---------- ------------------------- ---------------- 18281150 18311960 281534938484214 59 0 1 FirstCategoryUnderDefault Push Pop Range A 18288187 18306674 281534938484214 59 0 2 SecondCategoryUnderDefaul Push Pop Range B 18294247 281534938484214 34 0 1 FirstCategoryUnderDefault Mark A 18300034 281534938484214 34 0 2 SecondCategoryUnderDefaul Mark B 18345546 18372595 281534938484214 60 1 1 FirstCategoryUnderMyDomai Start End Range 18352924 18378342 281534938484214 60 1 2 SecondCategoryUnderMyDoma Start End Range 18359634 281534938484214 34 1 1 FirstCategoryUnderMyDomai Mark A 18365448 281534938484214 34 1 2 SecondCategoryUnderMyDoma Mark B Rename CUDA Kernels with NVTX The example demonstrates how to map innermost NVTX push-pop range to a matching CUDA kernel run. ALTER TABLE CUPTI_ACTIVITY_KIND_KERNEL ADD COLUMN nvtxRange TEXT; CREATE INDEX nvtx_start ON NVTX_EVENTS (start); UPDATE CUPTI_ACTIVITY_KIND_KERNEL SET nvtxRange = ( SELECT NVTX_EVENTS.text FROM NVTX_EVENTS JOIN CUPTI_ACTIVITY_KIND_RUNTIME ON NVTX_EVENTS.eventType == 59 AND NVTX_EVENTS.globalTid == CUPTI_ACTIVITY_KIND_RUNTIME.globalTid AND NVTX_EVENTS.start <= CUPTI_ACTIVITY_KIND_RUNTIME.start AND NVTX_EVENTS.end >= CUPTI_ACTIVITY_KIND_RUNTIME.end WHERE CUPTI_ACTIVITY_KIND_KERNEL.correlationId == CUPTI_ACTIVITY_KIND_RUNTIME.correlationId ORDER BY NVTX_EVENTS.start DESC LIMIT 1 ); SELECT start, end, globalPid, StringIds.value as shortName, nvtxRange FROM CUPTI_ACTIVITY_KIND_KERNEL JOIN StringIds ON shortName == id ORDER BY start LIMIT 6; Results: start end globalPid shortName nvtxRange ---------- ---------- ----------------- ------------- ---------- 526545376 526676256 72057700439031808 MatrixMulCUDA 526899648 527030368 72057700439031808 MatrixMulCUDA Add 527031648 527162272 72057700439031808 MatrixMulCUDA Add 527163584 527294176 72057700439031808 MatrixMulCUDA My Kernel 527296160 527426592 72057700439031808 MatrixMulCUDA My Range 527428096 527558656 72057700439031808 MatrixMulCUDA Select CUDA Calls With Backtraces ALTER TABLE CUPTI_ACTIVITY_KIND_RUNTIME ADD COLUMN name TEXT; UPDATE CUPTI_ACTIVITY_KIND_RUNTIME SET name = (SELECT value FROM StringIds WHERE CUPTI_ACTIVITY_KIND_RUNTIME.nameId = StringIds.id); ALTER TABLE CUDA_CALLCHAINS ADD COLUMN symbolName TEXT; UPDATE CUDA_CALLCHAINS SET symbolName = (SELECT value FROM StringIds WHERE symbol = StringIds.id); SELECT globalTid % 0x1000000 AS TID, start, end, name, callchainId, stackDepth, symbolName FROM CUDA_CALLCHAINS JOIN CUPTI_ACTIVITY_KIND_RUNTIME ON callchainId == CUDA_CALLCHAINS.id ORDER BY callchainId, stackDepth LIMIT 11; Results: TID start end name callchainId stackDepth symbolName ---------- ---------- ---------- ------------- ----------- ---------- -------------- 11928 168976467 169077826 cuMemAlloc_v2 1 0 0x7f13c44f02ab 11928 168976467 169077826 cuMemAlloc_v2 1 1 0x7f13c44f0b8f 11928 168976467 169077826 cuMemAlloc_v2 1 2 0x7f13c44f3719 11928 168976467 169077826 cuMemAlloc_v2 1 3 cuMemAlloc_v2 11928 168976467 169077826 cuMemAlloc_v2 1 4 cudart::driver 11928 168976467 169077826 cuMemAlloc_v2 1 5 cudart::cudaAp 11928 168976467 169077826 cuMemAlloc_v2 1 6 cudaMalloc 11928 168976467 169077826 cuMemAlloc_v2 1 7 cudaError cuda 11928 168976467 169077826 cuMemAlloc_v2 1 8 main 11928 168976467 169077826 cuMemAlloc_v2 1 9 __libc_start_m 11928 168976467 169077826 cuMemAlloc_v2 1 10 _start SLI Peer-to-Peer Query The example demonstrates how to query SLI Peer-to-Peer events with resource size greater than value and within a time range sorted by resource size descending. SELECT * FROM SLI_P2P WHERE resourceSize < 98304 AND start > 1568063100 AND end < 1579468901 ORDER BY resourceSize DESC; Results: start end eventClass globalTid gpu frameId transferSkipped srcGpu dstGpu numSubResources resourceSize subResourceIdx smplWidth smplHeight smplDepth bytesPerElement dxgiFormat logSurfaceNames transferInfo isEarlyPushManagedByNvApi useAsyncP2pForResolve transferFuncName regimeName debugName bindType ---------- ---------- ---------- ----------------- ---------- ---------- --------------- ---------- ---------- --------------- ------------ -------------- ---------- ---------- ---------- --------------- ---------- --------------- ------------ ------------------------- --------------------- ---------------- ---------- ---------- ---------- 1570351100 1570351101 62 72057698056667136 0 771 0 256 512 1 1048576 0 256 256 1 16 2 3 0 0 1570379300 1570379301 62 72057698056667136 0 771 0 256 512 1 1048576 0 64 64 64 4 31 3 0 0 1572316400 1572316401 62 72057698056667136 0 773 0 256 512 1 1048576 0 256 256 1 16 2 3 0 0 1572345400 1572345401 62 72057698056667136 0 773 0 256 512 1 1048576 0 64 64 64 4 31 3 0 0 1574734300 1574734301 62 72057698056667136 0 775 0 256 512 1 1048576 0 256 256 1 16 2 3 0 0 1574767200 1574767201 62 72057698056667136 0 775 0 256 512 1 1048576 0 64 64 64 4 31 3 0 0 Generic Events Syscall usage histogram by PID: SELECT json_extract(data, '$.common_pid') AS PID, count(*) AS total FROM GENERIC_EVENTS WHERE PID IS NOT NULL AND typeId = ( SELECT typeId FROM GENERIC_EVENT_TYPES WHERE json_extract(data, '$.Name') = "raw_syscalls:sys_enter") GROUP BY PID ORDER BY total DESC LIMIT 10; Results: PID total ---------- ---------- 5551 32811 9680 3988 4328 1477 9564 1246 4376 1204 4377 1167 4357 656 4355 655 4356 640 4354 633 Fetching Generic Events in JSON Format Text and JSON export modes don\u2019t include generic events. Use the below queries (without LIMIT clause) to extract JSON lines representation of generic events, types and sources. SELECT json_insert('{}', '$.sourceId', sourceId, '$.data', json(data) ) FROM GENERIC_EVENT_SOURCES LIMIT 2; SELECT json_insert('{}', '$.typeId', typeId, '$.sourceId', sourceId, '$.data', json(data) ) FROM GENERIC_EVENT_TYPES LIMIT 2; SELECT json_insert('{}', '$.rawTimestamp', rawTimestamp, '$.timestamp', timestamp, '$.typeId', typeId, '$.data', json(data) ) FROM GENERIC_EVENTS LIMIT 2; Results: json_insert('{}', '$.sourceId', sourceId, '$.data', json(data) ) --------------------------------------------------------------------------------------------------------------- {"sourceId":72057602627862528,"data":{"Name":"FTrace","TimeSource":"ClockMonotonicRaw","SourceGroup":"FTrace"}} json_insert('{}', '$.typeId', typeId, '$.sourceId', sourceId, '$.data', json(data) ) -------------------------------------------------------------------------------------------------------------------- {"typeId":72057602627862547,"sourceId":72057602627862528,"data":{"Name":"raw_syscalls:sys_enter","Format":"\\"NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)\\", REC->id, REC->args[0], REC->args[1], REC->args[2], REC->args[3], REC->args[4], REC->args[5]","Fields":[{"Name":"common_pid","Prefix":"int","Suffix":""},{"Name":"id","Prefix":"long","S {"typeId":72057602627862670,"sourceId":72057602627862528,"data":{"Name":"irq:irq_handler_entry","Format":"\\"irq=%d name=%s\\", REC->irq, __get_str(name)","Fields":[{"Name":"common_pid","Prefix":"int","Suffix":""},{"Name":"irq","Prefix":"int","Suffix":""},{"Name":"name","Prefix":"__data_loc char[]","Suffix":""},{"Name":"common_type", json_insert('{}', '$.rawTimestamp', rawTimestamp, '$.timestamp', timestamp, '$.typeId', typeId, '$.data', json(data) ) -------------------------------------------------------------------------------------------------------------------- {"rawTimestamp":1183694330725221,"timestamp":6236683,"typeId":72057602627862670,"data":{"common_pid":"0","irq":"66","name":"327696","common_type":"142","common_flags":"9","common_preempt_count":"0"}} {"rawTimestamp":1183694333695687,"timestamp":9207149,"typeId":72057602627862670,"data":{"common_pid":"0","irq":"66","name":"327696","common_type":"142","common_flags":"9","common_preempt_count":"0"}}", "keywords": []}, {"id": 51, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#conference-presentations", "display_name": "Conference Presentations", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "conference-presentations", "priority": -1, "content": "GTC 2023 Optimize Multi-Node System Workloads With NVIDIA Nsight Systems GTC 2023 Ray-Tracing Development using NVIDIA Nsight Graphics and NVIDIA Nsight Systems GTC 2022 - Killing Cloud Monsters Has Never Been Smoother GTC 2022 - Optimizing Communication with Nsight Systems Network Profiling GTC 2022 - Optimizing Vulkan 1.3 Applications with Nsight Graphics & Nsight Systems GTC 2021 - Tuning GPU Network and Memory Usage in Apache Spark GTC 2020 - Rebalancing the Load: Profile-Guided Optimization of the NAMD Molecular Dynamics Program for Modern GPUs using Nsight Systems GTC 2020 - Scaling the Transformer Model Implementation in PyTorch Across Multiple Nodes GTC 2019 - Using Nsight Tools to Optimize the NAMD Molecular Dynamics Simulation Program GTC 2019 - Optimizing Facebook AI Workloads for NVIDIA GPUs GTC 2018 - Optimizing HPC Simulation and Visualization Codes Using NVIDIA Nsight Systems GTC 2018 - Israel - Boost DNN Training Performance using NVIDIA Tools Siggraph 2018 - Taming the Beast; Using NVIDIA Tools to Unlock Hidden GPU Performance", "keywords": []}, {"id": 52, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#configuring-dask", "display_name": "Configuring Dask", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "configuring-dask", "priority": -1, "content": "The multi-report analysis system does not offer options to configure the Dask environment. However, you could achieve this by modifying the recipe script directly or using one of the following from Dask\u2019s configuration system: YAML files: Dask by default searches for all YAML files in ~/.config/dask/ or /etc/dask/ . This search path can be changed using the environment variable DASK_ROOT_CONFIG or DASK_CONFIG . See Dask documentation for the complete list of locations and the lookup order. Example: $ cat example.yaml 'Distributed': 'scheduler': 'allowed-failures': 5 Environment variables: Dask searches for all environment variables that start with DASK_ , then transforms keys by converting to lower-case and changing double-underscores to nested structures. See Dask documentation for the complete list of variables. Example: DASK_DISTRIBUTED__SCHEDULER__ALLOWED_FAILURES=5 Dask Client With no configuration set, the dask-futures mode option initializes the Dask Client with the default arguments, which results in creating a LocalCluster in the background. The following are the YAML/environment variables that could be set to change the default behavior: distributed.comm.timeouts.connect / DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT client-name / DASK_CLIENT_NAME scheduler-address / DASK_SCHEDULER_ADDRESS distributed.client.heartbeat / DASK_DISTRIBUTED__CLIENT__HEARTBEAT distributed.client.scheduler-info-interval / DASK_DISTRIBUTED__CLIENT__SCHEDULER_INFO_INTERVAL distributed.client.preload / DASK_DISTRIBUTED__CLIENT__PRELOAD distributed.client.preload-argv / DASK_DISTRIBUTED__CLIENT__PRELOAD_ARGV Recipe\u2019s environment variables Recipe has its own list of environment variables to make the configuration more complete and flexible. These environment variables are either missing from Dask\u2019s configuration system or specific to the recipe system: NSYS_DASK_SCHEDULER_FILE: Path to a file with scheduler information. It will be used to initialize the Dask Client. NSYS_DIR: Path to the directory of Nsight Systems containing the target and host directories. The nsys executable and the recipe dependencies will be searched in this directory instead of the one deduced from the currently running recipe file path.", "keywords": []}, {"id": 53, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#connecting-to-the-target-device", "display_name": "Connecting to the Target Device", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "connecting-to-the-target-device", "priority": -1, "content": "Nsight Systems provides a simple interface to profile on localhost or manage multiple connections to Linux or Windows based devices via SSH. The network connections manager can be launched through the device selection dropdown: On x86_64: On Tegra: The dialog has simple controls that allow adding, removing, and modifying connections: Security notice : SSH is only used to establish the initial connection to a target device, perform checks, and upload necessary files. The actual profiling commands and data are transferred through a raw, unencrypted socket. Nsight Systems should not be used in a network setup where attacker-in-the-middle attack is possible, or where untrusted parties may have network access to the target device. While connecting to the target device, you will be prompted to input the user\u2019s password. Please note that if you choose to remember the password, it will be stored in plain text in the configuration file on the host. Stored passwords are bound to the public key fingerprint of the remote device. The No authentication option is useful for devices configured for passwordless login using root username. To enable such a configuration, edit the file /etc/ssh/sshd_config on the target and specify the following option: PermitRootLogin yes Then set empty password using passwd and restart the SSH service with service ssh restart . Open ports : The Nsight Systems daemon requires port 22 and port 45555 to be open for listening. You can confirm that these ports are open with the following command: sudo firewall-cmd --list-ports --permanent sudo firewall-cmd --reload To open a port use the following command, skip --permanent option to open only for this session: sudo firewall-cmd --permanent --add-port 45555/tcp sudo firewall-cmd --reload Likewise, if you are running on a cloud system, you must open port 22 and port 45555 for ingress. Kernel Version Number - To check for the version number of the kernel support of Nsight Systems on a target device, run the following command on the remote device: cat /proc/quadd/version Minimal supported version is 1.82. Additionally, presence of Netcat command ( nc ) is required on the target device. For example, on Ubuntu this package can be installed using the following command: sudo apt-get install netcat-openbsd", "keywords": []}, {"id": 54, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#container-support-on-linux-servers", "display_name": "Container Support on Linux Servers", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "container-support-on-linux-servers", "priority": -1, "content": "Collecting data within a Docker container While examples in this section use Docker container semantics, other containers work much the same. The following information assumes the reader is knowledgeable regarding Docker containers. For further information about Docker use in general, see the Docker documentation . We strongly recommend using the CLI to profile in a container. Best container practice is to split services across containers when they do not require colocation. The Nsight Systems GUI is not needed to profile and brings in many dependencies, so the CLI is recommended. If you wish the GUI can be in a separate side-car container you use after to view your report. All you need is a shared folder between the containers. See section on GUI VNC Container below for more information. Enable Docker Collection When starting the Docker to perform a Nsight Systems collection, additional steps are required to enable the perf_event_open system call. This is required in order to utilize the Linux kernel\u2019s perf subsystem which provides sampling information to Nsight Systems . There are three ways to enable the perf_event_open syscall. You can enable it by using the --privileged=true switch, adding --cap-add=SYS_ADMIN switch to your docker run command file, or you can enable it by setting the seccomp security profile if your system meets the requirements. Secure computing mode (seccomp) is a feature of the Linux kernel that can be used to restrict an application\u2019s access. This feature is available only if the kernel is enabled with seccomp support. To check for seccomp support: $ grep CONFIG_SECCOMP= /boot/config-$(uname -r) The official Docker documentation says: "Seccomp profiles require seccomp 2.2.1 which is not available on Ubuntu 14.04, Debian Wheezy, or Debian Jessie. To use seccomp on these distributions, you must download the latest static Linux binaries (rather than packages)." Download the default seccomp profile file, default.json, relevant to your Docker version. If perf_event_open is already listed in the file as guarded by CAP_SYS_ADMIN , then remove the perf_event_open line. Add the following lines under \u201csyscalls\u201d and save the resulting file as default_with_perf.json . { "name": "perf_event_open", "action": "SCMP_ACT_ALLOW", "args": [] }, Then you will be able to use the following switch when starting the Docker to apply the new seccomp profile. --security-opt seccomp=default_with_perf.json Launch Docker Collection Here is an example command that has been used to launch a Docker for testing with Nsight Systems : sudo nvidia-docker run --network=host --security-opt seccomp=default_with_perf.json --rm -ti caffe-demo2 bash There is a known issue where Docker collections terminate prematurely with older versions of the driver and the CUDA Toolkit. If collection is ending unexpectedly, please update to the latest versions. After the Docker has been started, use the Nsight Systems CLI to launch a collection within the Docker. The resulting .qdstrm file can be imported into the Nsight Systems host like any other CLI result.", "keywords": []}, {"id": 55, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cpu-profiling-using-linux-os-perf-subsystem", "display_name": "CPU Profiling Using Linux OS Perf Subsystem", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cpu-profiling-using-linux-os-perf-subsystem", "priority": -1, "content": "Nsight Systems on Linux targets, utilizes the Linux OS\u2019 perf subsystem to sample CPU Instruction Pointers (IPs) and backtraces, trace CPU context switches, and sample CPU and OS event counts. The Linux perf tool utilizes the same perf subsystem. Nsight Systems , on L4T and potentially other ARM targets, may use a custom kernel module to collect the same data. The Nsight Systems CLI command nsys status --environment indicates when the kernel module is used instead of the Linux OS\u2019 perf subsystem. Features CPU Instruction Pointer / Backtrace Sampling Nsight Systems can sample CPU Instruction Pointers / backtraces periodically. The collection of a sample is triggered by a hardware event overflow - e.g. a sample is collected after every 1 million CPU reference cycles on a per thread basis. In the GUI, samples are shown on the individual thread timelines, in the Event Viewer, and in the Top Down, Bottom Up, or Flat views which provide histogram-like summaries of the data. IP / backtrace collections can be configured in process-tree or system-wide mode. In process-tree mode, Nsight Systems will sample the process, and any of its descendants, launched by the tool. In system-wide mode, Nsight Systems will sample all processes running on the system, including any processes launched by the tool. CPU Context Switch Tracing Nsight Systems can trace every time the OS schedules a thread on a logical CPU and every time the OS thread gets unscheduled from a logical CPU. The data is used to show CPU utilization and OS thread utilization within the Nsight Systems GUI. Context switch collections can be configured in process-tree or system-wide mode. In process-tree mode, Nsight Systems will trace the process, and any of its descendants, launched by Nsight Systems . In system-wide mode, Nsight Systems will trace all processes running on the system, including any processes launched by the Nsight Systems . CPU Event Sampling Nsight Systems can periodically sample CPU hardware event counts and OS event counts and show the event\u2019s rate over time in the Nsight Systems GUI. Event sample collections can be configured in system-wide mode only. In system-wide mode, Nsight Systems will sample event counts of all CPUs and the OS event counts running on the system. Event counts are not directly associated with processes or threads. System Requirements Paranoid Level The system\u2019s paranoid level must be 2 or lower. Paranoid Level CPU IP/backtrace Sampling process-tree mode CPU IP/backtrace Sampling system-wide mode CPU Context Switch Tracing process-tree mode CPU Context Switch Tracing system-wide mode Event Sampling system-wide mode 3 or greater not available not available not available not available not available 2 User mode IP/backtrace samples only not available available not available not available 1 Kernel and user mode IP/backtrace samples not available available not available not available 0, -1 Kernel and user mode IP/backtrace samples Kernel and user mode IP/backtrace samples available available hardware and OS events Kernel Version To support the CPU profiling features utilized by Nsight Systems , the kernel version must be greater than or equal to v4.3. RedHat has backported the required features to the v3.10.0-693 kernel. RedHat distros and their derivatives (e.g. CentOS) require a 3.10.0-693 or later kernel. Use the uname -r command to check the kernel\u2019s version. perf_event_open syscall The perf_event_open syscall needs to be available. When running within a Docker container, the default seccomp settings will normally block the perf_event_open syscall. To workaround this issue, use the Docker run --privileged switch when launching the docker or modify the docker\u2019s seccomp settings. Some VMs (virtual machines), e.g. AWS, may also block the perf_event_open syscall. Sampling Trigger In some rare case, a sampling trigger is not available. The sampling trigger is either a hardware or software event that causes a sample to be collected. Some VMs block hardware events from being accessed and therefore, prevent hardware events from being used as sampling triggers. In those cases, Nsight Systems will fall back to using a software trigger if possible. Checking Your Target System Use the nsys status --environment command to check if a system meets the Nsight Systems CPU profiling requirements. Example output from this command is shown below. Note that this command does not check for Linux capability overrides - i.e. if the user or executable files have CAP_SYS_ADMIN or CAP_PERFMON capability. Also, note that this command does not indicate if system-wide mode can be used. Configuring a CPU Profiling Collection When configuring Nsight Systems for CPU Profiling from the CLI, use some or all of the following options: --sample , --cpuctxsw , --event-sample , --backtrace , --cpu-core-events , --event-sampling-frequency , --os-events , --samples-per-backtrace , and --sampling-period . Details about these options, including examples can be found in the Profiling from the CLI section of the User Guide When configuring from the GUI, the following options are available: The configuration used during CPU profiling is documented in the Analysis Summary: As well as in the Diagnosics Summary: Visualizing CPU Profiling Results Here are example screenshots visualizing CPU profiling results. For details about navigating the Timeline View and the backtraces, see the section on Timeline View in the Reading Your Report in the GUI section of the User Guide. Example of CPU IP/Backtrace Data In the timeline, yellow-orange marks can be found under each thread\u2019s timeline that indicate the moment an IP / backtrace sample was collected on that thread (e.g. see the yellow-orange marks in the Specific Samples box above). Hovering the cursor over a mark will cause a tooltip to display the backtrace for that sample. Below the Timeline is a drop-down list with multiple options including Events View, Top-Down View, Bottom-Up View, and Flat View. All four of these views can be used to view CPU IP / back trace sampling data. Example of Event Sampling Event sampling samples hardware or software event counts during a collection and then graphs those events as rates on the Timeline. The above screenshot shows 4 hardware events. Core and cache events are graphed under the associated CPU row (see the red box in the screenshot) while uncore and OS events are graphed in their own row (see the green box in the screenshot). Hovering the cursor over an event sampling row in the timeline shows the event\u2019s rate at that moment. Common Issues Reducing Overhead Caused By Sampling There are several ways to reduce overhead caused by sampling. disable sampling (i.e. use the --sampling=none switch) increase the sampling period (i.e. reduce the sampling rate) using the --sampling-period switch stop collecting backtraces (i.e. use the --backtrace=none switch) or collect more efficient backtraces - if available, use the --backtrace=lbr switch. reduce the number of backtraces collected per sample. See documentation for the --samples-per-backtrace switch. Throttling The Linux operating system enforces a maximum time to handle sampling interrupts. This means that if collecting samples takes more than a specified amount of time, the OS will throttle (i.e slow down) the sampling rate to prevent the perf subsystem from causing too much overhead. When this occurs, sampling data may become irregular even though the thread is very busy. The above screenshot shows a case where CPU IP / backtrace sampling was throttled during a collection. Note the irregular intervals of sampling tickmarks on the thread timeline. The number of times a collection throttled is provided in the Nsight Systems GUI\u2019s Diagnostics messages. If a collection throttles frequently (e.g. 1000s of times), increasing the sampling period should help reduce throttling. When throttling occurs, the OS sets a new (lower) maximum sampling rate in the procfs. This value must be reset before the sampling rate can be increased again. Use the following command to reset the OS\u2019 max sampling rate echo '100000' | sudo tee /proc/sys/kernel/perf_event_max_sample_rate Sample intervals are irregular My samples are not periodic - why? My samples are clumped up - why? There are gaps in between the samples - why? Likely reasons: Throttling, as described above The paranoid level is set to 2. If the paranoid level is set to 2, anytime the workload makes a system call and spends time executing kernel mode code, samples will not be collected and there will be gaps in the sampling data. The sampling trigger itself is not periodic. If the trigger event is not periodic, for example, the Instructions Retired. event, sample collection will primarily occur when cache misses are occurring. No CPU profiling data is collected There are a few common issues that cause CPU profiling data to not be collected System requirements are not met. Check your system settings with the nsys status --environment command and see the System Requirements section above. I profiled my workload in a Docker container but no sampling data was collected. By default, Docker containers prevent the perf_event_open syscall from being utilized. To override this behavior, launch the Docker with the --privileged switch or modify the Docker\u2019s seccomp settings. I profiled my workload in a Docker container running Ubuntu 20+ running on top of a host system running CentOS with a kernel version < 3.10.0-693. The nsys status --environment command indicated that CPU profiling was supported. The host OS kernel version determines if CPU profiling is allowed and a CentOS host with a version < 3.10.0-693 is too old. In this case, the nsys status --environment command is incorrect.", "keywords": []}, {"id": 56, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cuda-default-function-list-for-cli", "display_name": "CUDA Default Function List for CLI", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cuda-default-function-list-for-cli", "priority": -1, "content": "CUDA Runtime API cudaBindSurfaceToArray cudaBindTexture cudaBindTexture2D cudaBindTextureToArray cudaBindTextureToMipmappedArray cudaConfigureCall cudaCreateSurfaceObject cudaCreateTextureObject cudaD3D10MapResources cudaD3D10RegisterResource cudaD3D10UnmapResources cudaD3D10UnregisterResource cudaD3D9MapResources cudaD3D9MapVertexBuffer cudaD3D9RegisterResource cudaD3D9RegisterVertexBuffer cudaD3D9UnmapResources cudaD3D9UnmapVertexBuffer cudaD3D9UnregisterResource cudaD3D9UnregisterVertexBuffer cudaDestroySurfaceObject cudaDestroyTextureObject cudaDeviceReset cudaDeviceSynchronize cudaEGLStreamConsumerAcquireFrame cudaEGLStreamConsumerConnect cudaEGLStreamConsumerConnectWithFlags cudaEGLStreamConsumerDisconnect cudaEGLStreamConsumerReleaseFrame cudaEGLStreamConsumerReleaseFrame cudaEGLStreamProducerConnect cudaEGLStreamProducerDisconnect cudaEGLStreamProducerReturnFrame cudaEventCreate cudaEventCreateFromEGLSync cudaEventCreateWithFlags cudaEventDestroy cudaEventQuery cudaEventRecord cudaEventRecord_ptsz cudaEventSynchronize cudaFree cudaFreeArray cudaFreeHost cudaFreeMipmappedArray cudaGLMapBufferObject cudaGLMapBufferObjectAsync cudaGLRegisterBufferObject cudaGLUnmapBufferObject cudaGLUnmapBufferObjectAsync cudaGLUnregisterBufferObject cudaGraphicsD3D10RegisterResource cudaGraphicsD3D11RegisterResource cudaGraphicsD3D9RegisterResource cudaGraphicsEGLRegisterImage cudaGraphicsGLRegisterBuffer cudaGraphicsGLRegisterImage cudaGraphicsMapResources cudaGraphicsUnmapResources cudaGraphicsUnregisterResource cudaGraphicsVDPAURegisterOutputSurface cudaGraphicsVDPAURegisterVideoSurface cudaHostAlloc cudaHostRegister cudaHostUnregister cudaLaunch cudaLaunchCooperativeKernel cudaLaunchCooperativeKernelMultiDevice cudaLaunchCooperativeKernel_ptsz cudaLaunchKernel cudaLaunchKernel_ptsz cudaLaunch_ptsz cudaMalloc cudaMalloc3D cudaMalloc3DArray cudaMallocArray cudaMallocHost cudaMallocManaged cudaMallocMipmappedArray cudaMallocPitch cudaMemGetInfo cudaMemPrefetchAsync cudaMemPrefetchAsync_ptsz cudaMemcpy cudaMemcpy2D cudaMemcpy2DArrayToArray cudaMemcpy2DArrayToArray_ptds cudaMemcpy2DAsync cudaMemcpy2DAsync_ptsz cudaMemcpy2DFromArray cudaMemcpy2DFromArrayAsync cudaMemcpy2DFromArrayAsync_ptsz cudaMemcpy2DFromArray_ptds cudaMemcpy2DToArray cudaMemcpy2DToArrayAsync cudaMemcpy2DToArrayAsync_ptsz cudaMemcpy2DToArray_ptds cudaMemcpy2D_ptds cudaMemcpy3D cudaMemcpy3DAsync cudaMemcpy3DAsync_ptsz cudaMemcpy3DPeer cudaMemcpy3DPeerAsync cudaMemcpy3DPeerAsync_ptsz cudaMemcpy3DPeer_ptds cudaMemcpy3D_ptds cudaMemcpyArrayToArray cudaMemcpyArrayToArray_ptds cudaMemcpyAsync cudaMemcpyAsync_ptsz cudaMemcpyFromArray cudaMemcpyFromArrayAsync cudaMemcpyFromArrayAsync_ptsz cudaMemcpyFromArray_ptds cudaMemcpyFromSymbol cudaMemcpyFromSymbolAsync cudaMemcpyFromSymbolAsync_ptsz cudaMemcpyFromSymbol_ptds cudaMemcpyPeer cudaMemcpyPeerAsync cudaMemcpyToArray cudaMemcpyToArrayAsync cudaMemcpyToArrayAsync_ptsz cudaMemcpyToArray_ptds cudaMemcpyToSymbol cudaMemcpyToSymbolAsync cudaMemcpyToSymbolAsync_ptsz cudaMemcpyToSymbol_ptds cudaMemcpy_ptds cudaMemset cudaMemset2D cudaMemset2DAsync cudaMemset2DAsync_ptsz cudaMemset2D_ptds cudaMemset3D cudaMemset3DAsync cudaMemset3DAsync_ptsz cudaMemset3D_ptds cudaMemsetAsync cudaMemsetAsync_ptsz cudaMemset_ptds cudaPeerRegister cudaPeerUnregister cudaStreamAddCallback cudaStreamAddCallback_ptsz cudaStreamAttachMemAsync cudaStreamAttachMemAsync_ptsz cudaStreamCreate cudaStreamCreateWithFlags cudaStreamCreateWithPriority cudaStreamDestroy cudaStreamQuery cudaStreamQuery_ptsz cudaStreamSynchronize cudaStreamSynchronize_ptsz cudaStreamWaitEvent cudaStreamWaitEvent_ptsz cudaThreadSynchronize cudaUnbindTexture CUDA Primary API cu64Array3DCreate cu64ArrayCreate cu64D3D9MapVertexBuffer cu64GLMapBufferObject cu64GLMapBufferObjectAsync cu64MemAlloc cu64MemAllocPitch cu64MemFree cu64MemGetInfo cu64MemHostAlloc cu64Memcpy2D cu64Memcpy2DAsync cu64Memcpy2DUnaligned cu64Memcpy3D cu64Memcpy3DAsync cu64MemcpyAtoD cu64MemcpyDtoA cu64MemcpyDtoD cu64MemcpyDtoDAsync cu64MemcpyDtoH cu64MemcpyDtoHAsync cu64MemcpyHtoD cu64MemcpyHtoDAsync cu64MemsetD16 cu64MemsetD16Async cu64MemsetD2D16 cu64MemsetD2D16Async cu64MemsetD2D32 cu64MemsetD2D32Async cu64MemsetD2D8 cu64MemsetD2D8Async cu64MemsetD32 cu64MemsetD32Async cu64MemsetD8 cu64MemsetD8Async cuArray3DCreate cuArray3DCreate_v2 cuArrayCreate cuArrayCreate_v2 cuArrayDestroy cuBinaryFree cuCompilePtx cuCtxCreate cuCtxCreate_v2 cuCtxDestroy cuCtxDestroy_v2 cuCtxSynchronize cuD3D10CtxCreate cuD3D10CtxCreateOnDevice cuD3D10CtxCreate_v2 cuD3D10MapResources cuD3D10RegisterResource cuD3D10UnmapResources cuD3D10UnregisterResource cuD3D11CtxCreate cuD3D11CtxCreateOnDevice cuD3D11CtxCreate_v2 cuD3D9CtxCreate cuD3D9CtxCreateOnDevice cuD3D9CtxCreate_v2 cuD3D9MapResources cuD3D9MapVertexBuffer cuD3D9MapVertexBuffer_v2 cuD3D9RegisterResource cuD3D9RegisterVertexBuffer cuD3D9UnmapResources cuD3D9UnmapVertexBuffer cuD3D9UnregisterResource cuD3D9UnregisterVertexBuffer cuEGLStreamConsumerAcquireFrame cuEGLStreamConsumerConnect cuEGLStreamConsumerConnectWithFlags cuEGLStreamConsumerDisconnect cuEGLStreamConsumerReleaseFrame cuEGLStreamProducerConnect cuEGLStreamProducerDisconnect cuEGLStreamProducerPresentFrame cuEGLStreamProducerReturnFrame cuEventCreate cuEventCreateFromEGLSync cuEventCreateFromNVNSync cuEventDestroy cuEventDestroy_v2 cuEventQuery cuEventRecord cuEventRecord_ptsz cuEventSynchronize cuGLCtxCreate cuGLCtxCreate_v2 cuGLInit cuGLMapBufferObject cuGLMapBufferObjectAsync cuGLMapBufferObjectAsync_v2 cuGLMapBufferObjectAsync_v2_ptsz cuGLMapBufferObject_v2 cuGLMapBufferObject_v2_ptds cuGLRegisterBufferObject cuGLUnmapBufferObject cuGLUnmapBufferObjectAsync cuGLUnregisterBufferObject cuGraphicsD3D10RegisterResource cuGraphicsD3D11RegisterResource cuGraphicsD3D9RegisterResource cuGraphicsEGLRegisterImage cuGraphicsGLRegisterBuffer cuGraphicsGLRegisterImage cuGraphicsMapResources cuGraphicsMapResources_ptsz cuGraphicsUnmapResources cuGraphicsUnmapResources_ptsz cuGraphicsUnregisterResource cuGraphicsVDPAURegisterOutputSurface cuGraphicsVDPAURegisterVideoSurface cuInit cuLaunch cuLaunchCooperativeKernel cuLaunchCooperativeKernelMultiDevice cuLaunchCooperativeKernel_ptsz cuLaunchGrid cuLaunchGridAsync cuLaunchKernel cuLaunchKernel_ptsz cuLinkComplete cuLinkCreate cuLinkCreate_v2 cuLinkDestroy cuMemAlloc cuMemAllocHost cuMemAllocHost_v2 cuMemAllocManaged cuMemAllocPitch cuMemAllocPitch_v2 cuMemAlloc_v2 cuMemFree cuMemFreeHost cuMemFree_v2 cuMemGetInfo cuMemGetInfo_v2 cuMemHostAlloc cuMemHostAlloc_v2 cuMemHostRegister cuMemHostRegister_v2 cuMemHostUnregister cuMemPeerRegister cuMemPeerUnregister cuMemPrefetchAsync cuMemPrefetchAsync_ptsz cuMemcpy cuMemcpy2D cuMemcpy2DAsync cuMemcpy2DAsync_v2 cuMemcpy2DAsync_v2_ptsz cuMemcpy2DUnaligned cuMemcpy2DUnaligned_v2 cuMemcpy2DUnaligned_v2_ptds cuMemcpy2D_v2 cuMemcpy2D_v2_ptds cuMemcpy3D cuMemcpy3DAsync cuMemcpy3DAsync_v2 cuMemcpy3DAsync_v2_ptsz cuMemcpy3DPeer cuMemcpy3DPeerAsync cuMemcpy3DPeerAsync_ptsz cuMemcpy3DPeer_ptds cuMemcpy3D_v2 cuMemcpy3D_v2_ptds cuMemcpyAsync cuMemcpyAsync_ptsz cuMemcpyAtoA cuMemcpyAtoA_v2 cuMemcpyAtoA_v2_ptds cuMemcpyAtoD cuMemcpyAtoD_v2 cuMemcpyAtoD_v2_ptds cuMemcpyAtoH cuMemcpyAtoHAsync cuMemcpyAtoHAsync_v2 cuMemcpyAtoHAsync_v2_ptsz cuMemcpyAtoH_v2 cuMemcpyAtoH_v2_ptds cuMemcpyDtoA cuMemcpyDtoA_v2 cuMemcpyDtoA_v2_ptds cuMemcpyDtoD cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2 cuMemcpyDtoDAsync_v2_ptsz cuMemcpyDtoD_v2 cuMemcpyDtoD_v2_ptds cuMemcpyDtoH cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2 cuMemcpyDtoHAsync_v2_ptsz cuMemcpyDtoH_v2 cuMemcpyDtoH_v2_ptds cuMemcpyHtoA cuMemcpyHtoAAsync cuMemcpyHtoAAsync_v2 cuMemcpyHtoAAsync_v2_ptsz cuMemcpyHtoA_v2 cuMemcpyHtoA_v2_ptds cuMemcpyHtoD cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2 cuMemcpyHtoDAsync_v2_ptsz cuMemcpyHtoD_v2 cuMemcpyHtoD_v2_ptds cuMemcpyPeer cuMemcpyPeerAsync cuMemcpyPeerAsync_ptsz cuMemcpyPeer_ptds cuMemcpy_ptds cuMemcpy_v2 cuMemsetD16 cuMemsetD16Async cuMemsetD16Async_ptsz cuMemsetD16_v2 cuMemsetD16_v2_ptds cuMemsetD2D16 cuMemsetD2D16Async cuMemsetD2D16Async_ptsz cuMemsetD2D16_v2 cuMemsetD2D16_v2_ptds cuMemsetD2D32 cuMemsetD2D32Async cuMemsetD2D32Async_ptsz cuMemsetD2D32_v2 cuMemsetD2D32_v2_ptds cuMemsetD2D8 cuMemsetD2D8Async cuMemsetD2D8Async_ptsz cuMemsetD2D8_v2 cuMemsetD2D8_v2_ptds cuMemsetD32 cuMemsetD32Async cuMemsetD32Async_ptsz cuMemsetD32_v2 cuMemsetD32_v2_ptds cuMemsetD8 cuMemsetD8Async cuMemsetD8Async_ptsz cuMemsetD8_v2 cuMemsetD8_v2_ptds cuMipmappedArrayCreate cuMipmappedArrayDestroy cuModuleLoad cuModuleLoadData cuModuleLoadDataEx cuModuleLoadFatBinary cuModuleUnload cuStreamAddCallback cuStreamAddCallback_ptsz cuStreamAttachMemAsync cuStreamAttachMemAsync_ptsz cuStreamBatchMemOp cuStreamBatchMemOp_ptsz cuStreamCreate cuStreamCreateWithPriority cuStreamDestroy cuStreamDestroy_v2 cuStreamSynchronize cuStreamSynchronize_ptsz cuStreamWaitEvent cuStreamWaitEvent_ptsz cuStreamWaitValue32 cuStreamWaitValue32_ptsz cuStreamWaitValue64 cuStreamWaitValue64_ptsz cuStreamWriteValue32 cuStreamWriteValue32_ptsz cuStreamWriteValue64 cuStreamWriteValue64_ptsz cuSurfObjectCreate cuSurfObjectDestroy cuSurfRefCreate cuSurfRefDestroy cuTexObjectCreate cuTexObjectDestroy cuTexRefCreate cuTexRefDestroy cuVDPAUCtxCreate cuVDPAUCtxCreate_v2", "keywords": []}, {"id": 57, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cuda-gpu-memory-allocation-graph", "display_name": "CUDA GPU Memory Allocation Graph", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cuda-gpu-memory-allocation-graph", "priority": -1, "content": "When the Collect GPU Memory Usage option is selected from the Collect CUDA trace option set, Nsight Systems will track CUDA GPU memory allocations and deallocations and present a graph of this information in the timeline. This is not the same as the GPU memory graph generated during stutter analysis on the Windows target (see Stutter Memory Trace ) Below, in the report on the left, memory is allocated and freed during the collection. In the report on the right, memory is allocated, but not freed during the collection. Here is another example, where allocations are happening on multiple GPUs", "keywords": []}, {"id": 58, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cuda-graph-trace", "display_name": "CUDA Graph Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cuda-graph-trace", "priority": -1, "content": "Nsight Systems is capable of capturing information about CUDA graphs in your application at either the graph or node granularity. This can be set in the CLI using the --cuda-graph-trace option, or in the GUI by setting the appropriate drop down. When CUDA graph trace is set to graph , the users sees each graph as one item on the timeline: When CUDA graph trace is set to node , the users sees each graph as a set of nodes on the timeline: Tracing CUDA graphs at the graph level rather than the tracing the underlying nodes results in significantly less overhead. This option is only available with CUDA driver 515.43 or higher.", "keywords": []}, {"id": 59, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cuda-python-backtrace", "display_name": "CUDA Python Backtrace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cuda-python-backtrace", "priority": -1, "content": "Nsight Systems for Arm server (SBSA) platforms and x86 Linux targets, is capable of capturing Python backtrace information when CUDA backtrace is being captured. To enable CUDA Python backtrace from Nsight Systems : CLI \u2014 Set --python-backtrace=cuda . GUI \u2014 Select the Collect Python backtrace for selected API calls checkbox. Example screenshot:", "keywords": []}, {"id": 60, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cuda-trace", "display_name": "CUDA Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cuda-trace", "priority": -1, "content": "Nsight Systems is capable of capturing information about CUDA execution in the profiled process. The following information can be collected and presented on the timeline in the report: CUDA API trace \u2014 trace of CUDA Runtime and CUDA Driver calls made by the application. CUDA Runtime calls typically start with cuda prefix (e.g. cudaLaunch ). CUDA Driver calls typically start with cu prefix (e.g. cuDeviceGetCount ). CUDA workload trace \u2014 trace of activity happening on the GPU, which includes memory operations (e.g., Host-to-Device memory copies) and kernel executions. Within the threads that use the CUDA API, additional child rows will appear in the timeline tree. On Nsight Systems Workstation Edition , cuDNN and cuBLAS API tracing and OpenACC tracing. Near the bottom of the timeline row tree, the GPU node will appear and contain a CUDA node. Within the CUDA node, each CUDA context used within the process will be shown along with its corresponding CUDA streams. Steams will contain memory operations and kernel launches on the GPU. Kernel launches are represented by blue, while memory transfers are displayed in red. The easiest way to capture CUDA information is to launch the process from Nsight Systems , and it will setup the environment for you. To do so, simply set up a normal launch and select the Collect CUDA trace checkbox. For Nsight Systems Workstation Edition this looks like: For Nsight Systems Embedded Platforms Edition this looks like: Additional configuration parameters are available: Collect backtraces for API calls longer than X seconds - turns on collection of CUDA API backtraces and sets the minimum time a CUDA API event must take before its backtraces are collected. Setting this value too low can cause high application overhead and seriously increase the size of your results file. Flush data periodically \u2014 specifies the period after which an attempt to flush CUDA trace data will be made. Normally, in order to collect full CUDA trace, the application needs to finalize the device used for CUDA work (call cudaDeviceReset() , and then let the application gracefully exit (as opposed to crashing). This option allows flushing CUDA trace data even before the device is finalized. However, it might introduce additional overhead to a random CUDA Driver or CUDA Runtime API call. Skip some API calls \u2014 avoids tracing insignificant CUDA Runtime API calls (namely, cudaConfigureCall() , cudaSetupArgument() , cudaHostGetDevicePointers() ). Not tracing these functions allows Nsight Systems to significantly reduce the profiling overhead, without losing any interesting data. (See CUDA Trace Filters, below) Collect GPU Memory Usage - collects information used to generate a graph of CUDA allocated memory across time. Note that this will increase overhead. See section on CUDA GPU Memory Allocation Graph below. Collect Unified Memory CPU page faults - collects information on page faults that occur when CPU code tries to access a memory page that resides on the device. See section on Unified Memory CPU Page Faults in the Unified Memory Transfer Trace documentation below. Collect Unified Memory GPU page faults - collects information on page faults that occur when GPU code tries to access a memory page that resides on the CPU. See section on Unified Memory GPU Page Faults in the Unified Memory Transfer Trace documentation below. Collect CUDA Graph trace - by default, CUDA tracing will collect and expose information on a whole graph basis. The user can opt to collect on a node per node basis. See section on CUDA Graph Trace below. For Nsight Systems Workstation Edition , Collect cuDNN trace , Collect cuBLAS trace , Collect OpenACC trace - selects which (if any) extra libraries that depend on CUDA to trace. OpenACC versions 2.0, 2.5, and 2.6 are supported when using PGI runtime version 15.7 or greater and not compiling statically. In order to differentiate constructs, a PGI runtime of 16.1 or later is required. Note that Nsight Systems Workstation Edition does not support the GCC implementation of OpenACC at this time. If your application crashes before all collected CUDA trace data has been copied out, some or all data might be lost and not present in the report. Nsight Systems will not have information about CUDA events that were still in device buffers when analysis terminated. It is a good idea, if using cudaProfilerAPI to control analysis to call cudaDeviceReset before ending analysis.", "keywords": []}, {"id": 61, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#cudnn-function-list-for-x86-cli", "display_name": "cuDNN Function List for X86 CLI", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "cudnn-function-list-for-x86-cli", "priority": -1, "content": "cuDNN API functions cudnnActivationBackward cudnnActivationBackward_v3 cudnnActivationBackward_v4 cudnnActivationForward cudnnActivationForward_v3 cudnnActivationForward_v4 cudnnAddTensor cudnnBatchNormalizationBackward cudnnBatchNormalizationBackwardEx cudnnBatchNormalizationForwardInference cudnnBatchNormalizationForwardTraining cudnnBatchNormalizationForwardTrainingEx cudnnCTCLoss cudnnConvolutionBackwardBias cudnnConvolutionBackwardData cudnnConvolutionBackwardFilter cudnnConvolutionBiasActivationForward cudnnConvolutionForward cudnnCreate cudnnCreateAlgorithmPerformance cudnnDestroy cudnnDestroyAlgorithmPerformance cudnnDestroyPersistentRNNPlan cudnnDivisiveNormalizationBackward cudnnDivisiveNormalizationForward cudnnDropoutBackward cudnnDropoutForward cudnnDropoutGetReserveSpaceSize cudnnDropoutGetStatesSize cudnnFindConvolutionBackwardDataAlgorithm cudnnFindConvolutionBackwardDataAlgorithmEx cudnnFindConvolutionBackwardFilterAlgorithm cudnnFindConvolutionBackwardFilterAlgorithmEx cudnnFindConvolutionForwardAlgorithm cudnnFindConvolutionForwardAlgorithmEx cudnnFindRNNBackwardDataAlgorithmEx cudnnFindRNNBackwardWeightsAlgorithmEx cudnnFindRNNForwardInferenceAlgorithmEx cudnnFindRNNForwardTrainingAlgorithmEx cudnnFusedOpsExecute cudnnIm2Col cudnnLRNCrossChannelBackward cudnnLRNCrossChannelForward cudnnMakeFusedOpsPlan cudnnMultiHeadAttnBackwardData cudnnMultiHeadAttnBackwardWeights cudnnMultiHeadAttnForward cudnnOpTensor cudnnPoolingBackward cudnnPoolingForward cudnnRNNBackwardData cudnnRNNBackwardDataEx cudnnRNNBackwardWeights cudnnRNNBackwardWeightsEx cudnnRNNForwardInference cudnnRNNForwardInferenceEx cudnnRNNForwardTraining cudnnRNNForwardTrainingEx cudnnReduceTensor cudnnReorderFilterAndBias cudnnRestoreAlgorithm cudnnRestoreDropoutDescriptor cudnnSaveAlgorithm cudnnScaleTensor cudnnSoftmaxBackward cudnnSoftmaxForward cudnnSpatialTfGridGeneratorBackward cudnnSpatialTfGridGeneratorForward cudnnSpatialTfSamplerBackward cudnnSpatialTfSamplerForward cudnnTransformFilter cudnnTransformTensor cudnnTransformTensorEx", "keywords": []}, {"id": 62, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#custom-etw-trace", "display_name": "Custom ETW Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "custom-etw-trace", "priority": -1, "content": "Use the custom ETW trace feature to enable and collect any manifest-based ETW log. The collected events are displayed on the timeline on dedicated rows for each event type. Custom ETW is available on Windows target machines. To retain the .etl trace files captured, so that they can be viewed in other tools (e.g. GPUView), change the \u201cSave ETW log files in project folder\u201d option under \u201cProfile Behavior\u201d in Nsight Systems \u2019s global Options dialog. The .etl files will appear in the same folder as the .nsys-rep file, accessible by right-clicking the report in the Project Explorer and choosing \u201cShow in Folder\u2026\u201d. Data collected from each ETW provider will appear in its own .etl file, and an additional .etl file named \u201cReport XX-Merged-*.etl\u201d, containing the events from all captured sources, will be created as well.", "keywords": []}, {"id": 63, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#d3d11-api-trace", "display_name": "D3D11 API trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "d3d11-api-trace", "priority": -1, "content": "Nsight Systems can capture information about Direct3D 11 API calls made by the profiled process. This includes capturing the execution time of D3D11 API functions, performance markers, and frame durations. SLI Trace Trace SLI queries and peer-to-peer transfers of D3D11 applications. Requires SLI hardware and an active SLI profile definition in the NVIDIA console.", "keywords": []}, {"id": 64, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#d3d12-api-trace", "display_name": "D3D12 API Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "d3d12-api-trace", "priority": -1, "content": "Direct3D 12 is a low-overhead 3D graphics and compute API for Microsoft Windows. Information about Direct3D 12 can be found at the Direct3D 12 Programming Guide . Nsight Systems can capture information about Direct3D 12 usage by the profiled process. This includes capturing the execution time of D3D12 API functions, corresponding workloads executed on the GPU, performance markers, and frame durations. The Command List Creation row displays time periods when command lists were being created. This enables developers to improve their application\u2019s multi-threaded command list creation. Command list creation time period is measured between the call to ID3D12GraphicsCommandList::Reset and the call to ID3D12GraphicsCommandList::Close . The GPU row shows a compressed view of the D3D12 queue activity, color-coded by the queue type. Expanding it will show the individual queues and their corresponding API calls. A Command Queue row is displayed for each D3D12 command queue created by the profiled application. The row\u2019s header displays the queue\u2019s running index and its type (Direct, Compute, Copy). The DX12 API Memory Ops row displays all API memory operations and non-persistent resource mappings. Event ranges in the row are color-coded by the heap type they belong to (Default, Readback, Upload, Custom, or CPU-Visible VRAM), with usage warnings highlighted in yellow. A breakdown of the operations can be found by expanding the row to show rows for each individual heap type. The following operations and warnings are shown: Calls to ID3D12Device::CreateCommittedResource , ID3D12Device4::CreateCommittedResource1 , and ID3D12Device8::CreateCommittedResource2 A warning will be reported if D3D12_HEAP_FLAG_CREATE_NOT_ZEROED is not set in the method\u2019s HeapFlags parameter Calls to ID3D12Device::CreateHeap and ID3D12Device4::CreateHeap1 A warning will be reported if D3D12_HEAP_FLAG_CREATE_NOT_ZEROED is not set in the Flags field of the method\u2019s pDesc parameter Calls to ID3D12Resource::ReadFromSubResource A warning will be reported if the read is to a D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE CPU page or from a D3D12_HEAP_TYPE_UPLOAD resource Calls to ID3D12Resource::WriteToSubResource A warning will be reported if the write is from a D3D12_CPU_PAGE_PROPERTY_WRITE_BACK CPU page or to a D3D12_HEAP_TYPE_READBACK resource Calls to ID3D12Resource::Map and ID3D12Resource::Unmap will be matched into [Map, Unmap] ranges for non-persistent mappings. If a mapping range is nested, only the most external range (reference count = 1) will be shown. The API row displays time periods where ID3D12CommandQueue::ExecuteCommandLists was called. The GPU Workload row displays time periods where workloads were executed by the GPU. The workload\u2019s type (Graphics, Compute, Copy, etc.) is displayed on the bar representing the workload\u2019s GPU execution. In addition, you can see the PIX command queue CPU-side performance markers, GPU-side performance markers and the GPU Command List performance markers, each in their row. Clicking on a GPU workload highlights the corresponding ID3D12CommandQueue::ExecuteCommandLists , ID3D12GraphicsCommandList::Reset and ID3D12GraphicsCommandList::Close API calls, and vice versa. Detecting which CPU thread was blocked by a fence can be difficult in complex apps that run tens of CPU threads. The timeline view displays the 3 operations involved: The CPU thread pushing a signal command and fence value into the command queue. This is displayed on the DX12 Synchronization sub-row of the calling thread. The GPU executing that command, setting the fence value and signaling the fence. This is displayed on the GPU Queue Synchronization sub-row. The CPU thread calling a Win32 wait API to block-wait until the fence is signaled. This is displayed on the Thread\u2019s OS runtime libraries row. Clicking one of these will highlight it and the corresponding other two calls.", "keywords": []}, {"id": 65, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#debug-versions-of-elf-files", "display_name": "Debug Versions of ELF Files", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "debug-versions-of-elf-files", "priority": -1, "content": "Often, after a binary is built, especially if it is built with debug information ( -g compiler flag), it gets stripped before deploying or installing. In this case, ELF sections that contain useful information, such as non-export function names or unwind information, can get stripped as well. One solution is to deploy or install the original unstripped library instead of the stripped one, but in many cases this would be inconvenient. Nsight Systems can use missing information from alternative locations. For target devices with Ubuntu, see Debug Symbol Packages . These packages typically install debug ELF files with /usr/lib/debug prefix. Nsight Systems can find debug libraries there, and if it matches the original library (e.g., the built-in BuildID is the same), it will be picked up and used to provide symbol names and unwind information. Many packages have debug companions in the same repository and can be directly installed with APT ( apt-get ). Look for packages with the -dbg suffix. For other packages, refer to the Debug Symbol Packages wiki page on how to add the debs package repository. After setting up the repository and running apt-get update, look for packages with -dbgsym suffix. To verify that a debug version of a library has been picked up and downloaded from the target device, look in the Module Summary section of Analysis Summary :", "keywords": []}, {"id": 66, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#diagnostics-summary-view", "display_name": "Diagnostics Summary View", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "diagnostics-summary-view", "priority": -1, "content": "This view shows important messages. Some of them were generated during the profiling session, while some were added while processing and analyzing data in the report. Messages can be one of the following types: Informational messages Warnings Errors To draw attention to important diagnostics messages, a summary line is displayed on the timeline view in the top right corner: Information from this view can be selected and copied using the mouse cursor.", "keywords": []}, {"id": 67, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#direct3d-trace", "display_name": "Direct3D Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "direct3d-trace", "priority": -1, "content": "Nsight Systems has the ability to trace both the Direct3D 11 API and the Direct3D 12 API on Windows targets.", "keywords": []}, {"id": 68, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#events-view", "display_name": "Events View", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "events-view", "priority": -1, "content": "The Events View provides a tabular display of the trace events. The view contents can be searched and sorted. Double-clicking an item in the Events View automatically focuses the Timeline View on the corresponding timeline item. API calls, GPU executions, and debug markers that occurred within the boundaries of a debug marker are displayed nested to that debug marker. Multiple levels of nesting are supported. Events view recognizes these types of debug markers: NVTX Vulkan VK_EXT_debug_marker markers, VK_EXT_debug_utils labels PIX events and markers OpenGL KHR_debug markers You can copy and paste from the events view by highlighting rows, using Shift or Ctrl to enable multi-select. Right clicking on the selection will give you a copy option. Pasting into text gives you a tab separated view: Pasting into spreadsheet properly copies into rows and columns:", "keywords": []}, {"id": 69, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#example-interactive-cli-command-sequences", "display_name": "Example Interactive CLI Command Sequences", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "example-interactive-cli-command-sequences", "priority": -1, "content": "Collect from beginning of application, end manually nsys start --stop-on-exit=false nsys launch --trace=cuda,nvtx --sample=none <application> [application-arguments] nsys stop Effect: Create interactive CLI process and set it up to begin collecting as soon as an application is launched. Launch the application, set up to allow tracing of CUDA and NVTX as well as collection of thread schedule information. Stop only when explicitly requested. Generate the report#.nsys-rep in the default location. If you start a collection and fail to stop the collection (or if you are allowing it to stop on exit, and the application runs for too long) your system\u2019s storage space may be filled with collected data causing significant issues for the system. Nsight Systems will collect a different amount of data/sec depending on options, but in general Nsight Systems does not support runs of more than 5 minutes duration. Run application, begin collection manually, run until process ends nsys launch -w true <application> [application-arguments] nsys start Effect: Create interactive CLI and launch an application set up for default analysis. Send application output to the terminal. No data is collected until you manually start collection at area of interest. Profile until the application ends. Generate the report#.nsys-rep in the default location. If you launch an application and that application and any descendants exit before start is called Nsight Systems will create a fully formed .nsys-rep file containing no data. Run application, start/stop collection using cudaProfilerStart/Stop nsys start -c cudaProfilerApi nsys launch -w true <application> [application-arguments] Effect: Create interactive CLI process and set it up to begin collecting as soon as a cudaProfileStart() is detected. Launch application for default analysis, sending application output to the terminal. Stop collection at next call to cudaProfilerStop, when the user calls nsys stop , or when the root process terminates. Generate the report#.nsys-rep in the default location. If you call nsys launch before nsys start -c cudaProfilerApi and the code contains a large number of short duration cudaProfilerStart/Stop pairs, Nsight Systems may be unable to process them correctly, causing a fault. This will be corrected in a future version. The Nsight Systems CLI does not support multiple calls to the cudaProfilerStart/Stop API at this time. Run application, start/stop collection using NVTX nsys start -c nvtx nsys launch -w true -p MESSAGE@DOMAIN <application> [application-arguments] Effect: Create interactive CLI process and set it up to begin collecting as soon as an NVTX range with given message in given domain (capture range) is opened. Launch application for default analysis, sending application output to the terminal. Stop collection when all capture ranges are closed, when the user calls nsys stop , or when the root process terminates. Generate the report#.nsys-rep in the default location. The Nsight Systems CLI only triggers the profiling session for the first capture range. NVTX capture range can be specified: Message@Domain: All ranges with given message in given domain are capture ranges. For example: nsys launch -w true -p profiler@service ./app This would make the profiling start when the first range with message \u201cprofiler\u201d is opened in domain \u201cservice\u201d. Message@*: All ranges with given message in all domains are capture ranges. For example: nsys launch -w true -p profiler@* ./app This would make the profiling start when the first range with message \u201cprofiler\u201d is opened in any domain. Message: All ranges with given message in default domain are capture ranges. For example: nsys launch -w true -p profiler ./app This would make the profiling start when the first range with message \u201cprofiler\u201d is opened in the default domain. By default only messages, provided by NVTX registered strings are considered to avoid additional overhead. To enable non-registered strings check please launch your application with NSYS_NVTX_PROFILER_REGISTER_ONLY=0 environment: nsys launch -w true -p profiler@service -e NSYS_NVTX_PROFILER_REGISTER_ONLY=0 ./app The separator \u2018@\u2019 can be escaped with backslash \u2018\\\u2019. If multiple separators without escape character are specified, only the last one is applied, all others are discarded. Run application, start/stop collection multiple times The interactive CLI supports multiple sequential collections per launch. nsys launch <application> [application-arguments] nsys start nsys stop nsys start nsys stop nsys shutdown --kill sigkill Effect: Create interactive CLI and launch an application set up for default analysis. Send application output to the terminal. No data is collected until the start command is executed. Collect data from start until stop requested, generate report#.qstrm in the current working directory. Collect data from second start until the second stop request, generate report#.nsys-rep (incremented by one) in the current working directory. Shutdown the interactive CLI and send sigkill to the target application\u2019s process group. Calling nsys cancel after nsys start will cancel the collection without generating a report.", "keywords": []}, {"id": 70, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#example-mpi", "display_name": "Example: MPI", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "example-mpi", "priority": -1, "content": "A typical scenario is when a computing job is run using one of the MPI implementations. Each instance of the app can be profiled separately, resulting in multiple report files. For example: # Run MPI job without the profiler: mpirun <mpirun-options> ./myApp # Run MPI job and profile each instance of the application: mpirun <mpirun-options> nsys profile -o report-%p <nsys-options>./myApp When each MPI rank runs on a different node, the command above works fine, since the default pairing mode (different hardware) will be used. When all MPI ranks run the localhost only, use this command (value \u201cA\u201d was chosen arbitrarily, it can be any non-empty string): NSYS_SYSTEM_ID=A mpirun <mpirun-options> nsys profile -o report-%p < nsys -options> ./myApp For convenience, the MPI rank can be encoded into the report filename. For Open MPI, use the following command to create report files based on the global rank value: mpirun <mpirun-options> nsys profile -o report-%q{OMPI_COMM_WORLD_RANK} < nsys -options> ./myApp MPICH-based implementations set the environment variable PMI_RANK and Slurm ( srun ) provides the global MPI rank in SLURM_PROCID .", "keywords": []}, {"id": 71, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#example-of-using-timeline-with-function-table", "display_name": "Example of Using Timeline with Function Table", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "example-of-using-timeline-with-function-table", "priority": -1, "content": "Here is an example walkthrough of using the timeline and function table with Instruction Pointer (IP)/backtrace Sampling Data Timeline When a collection result is opened in the Nsight Systems GUI, there are multiple ways to view the CPU profiling data - especially the CPU IP / backtrace data. In the timeline, yellow-orange marks can be found under each thread\u2019s timeline that indicate the moment an IP / backtrace sample was collected on that thread (e.g. see the yellow-orange marks in the Specific Samples box above). Hovering the cursor over a mark will cause a tooltip to display the backtrace for that sample. Below the Timeline is a drop-down list with multiple options including Events View, Top-Down View, Bottom-Up View, and Flat View. All four of these views can be used to view CPU IP / backtrace sampling data. If the Bottom-Up View is selected, here is the sampling summary shown in the bottom half of the Timeline View screen. Notice that the summary includes the phrase \u201c65,022 samples are used\u201d indicating how many samples are summarized. By default, functions that were found in less less than 0.5% of the samples are not show. Use the filter button to modify that setting. When sampling data is filtered, the Sampling Summary will summarize the selected samples. Samples can be filtered on an OS thread basis, on a time basis, or both. Above, deselecting a checkbox next to a thread removes its samples from the sampling summary. Dragging the cursor over the timeline and selecting \u201cFilter and Zoom In\u201d chooses the samples during the time selected, as seen below. The sample summary includes the phrase \u201c0.35% (225 samples) of data is shown due to applied filters\u201d indicating that only 225 samples are included in the summary results. Deselecting threads one at a time by deselecting their checkbox can be tedious. Click on the down arrow next to a thread and choose Show Only This Thread to deselect all threads except that thread. If Events View is selected in the Timeline View\u2019s drop-down list, right click on a specific thread and choose Show in Events View. The samples collected while that thread executed will be shown in the Events View. Double clicking on a specific sample in the Events view causes the timeline to show when that sample was collected - see the green boxes below. The backtrace for that sample is also shown in the Events View. Backtraces To understand the code path used to get to a specific function shown in the sampling summary, right click on a function and select Expand. The above shows what happens when a function\u2019s backtraces are expanded. In this case, the PCQueuePop function was called from the CmiGetNonLocal function which was called by the CsdNextMessage function which was called by the CsdScheduleForever function. The [Max depth] string marks the end of the collected backtrace. Note that, by default, backtraces with less than 0.5% of the total backtraces are hidden. This behavior can make the percentage results hard to understand. If all backtraces are shown (i.e. the filter is disabled), the results look very different and the numbers add up as expected. To disable the filter, click on the Filter\u2026 button and uncheck the Hide functions with CPU usage below X% checkbox. When the filter is disabled, the backtraces are recalculated. Note that you may need to right click on the function and select Expand again to get all of the backtraces to be shown. When backtraces are collected, the whole sample (IP and backtrace) is handled as a single sample. If two samples have the exact same IP and backtrace, they are summed in the final results. If two samples have the same IP but a different backtrace, they will be shown as having the same leaf (i.e. IP) but a different backtrace. As mentioned earlier, when backtraces end, they are marked with the [Max depth] string (unless the backtrace can be traced back to its origin - e.g. __libc_start_main) or the backtrace breaks because an IP cannot be resolved. Above, the leaf function is PCQueuePop. In this case, there are 11 different backtraces that lead to PCQueuPop - all of them end with [Max depth]. For example, the dominant path is PCQueuPop<-CmiGetNonLocal<-CsdNextmessage<-CsdScheduleForever<-[Max depth]. This path accounts for 5.67% of all samples as shown in line 5 (red numbers). The second most dominant path is PCQueuPop<-CmiGetNonLocal<-[Max depth] which accounts for 0.44% of all samples as shown in line 24 (red numbers). The path PCQueuPop<-CmiGetNonLocal<-CsdNextmessage<-CsdScheduleForever<-Sequencer::integrate(int)<-[Max depth] accounts for 0.03% of the samples as shown in line 7 (red numbers). Adding up percentages shown in the [Max depth] lines (lines 5, 7, 9, 13, 15, 16, 17, 19, 21, 23, and 24) generates 7.04% which equals the percentage of samples associated with the PCQueuePop function shown in line 0 (red numbers).", "keywords": []}, {"id": 72, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#example-output-from-stats-option", "display_name": "Example Output from --stats Option", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "example-output-from-stats-option", "priority": -1, "content": "The nsys stats command can be used post analysis to generate specific or personalized reports. For a default fixed set of summary statistics to be automatically generated, you can use the --stats option with the nsys profile or nsys start command to generate a fixed set of useful summary statistics. If your run traces CUDA, these include CUDA API, Kernel, and Memory Operation statistics: If your run traces OS runtime events or NVTX push-pop ranges: If your run traces graphics debug markers these include DX11 debug markers, DX12 debug markers, Vulkan debug markers or KHR debug markers: Recipes for these statistics as well as documentation on how to create your own metrics will be available in a future version of the tool.", "keywords": []}, {"id": 73, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#example-single-command-lines", "display_name": "Example Single Command Lines", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "example-single-command-lines", "priority": -1, "content": "Version Information nsys -v Effect: Prints tool version information to the screen. Run with elevated privilege sudo nsys profile <app> Effect: Nsight Systems CLI (and target application) will run with elevated privilege. This is necessary for some features, such as FTrace or system-wide CPU sampling. If you don\u2019t want the target application to be elevated, use --run-as option. Default analysis run nsys profile <application> [application-arguments] Effect: Launch the application using the given arguments. Start collecting immediately and end collection when the application stops. Trace CUDA, OpenGL, NVTX, and OS runtime libraries APIs. Collect CPU sampling information and thread scheduling information. With Nsight Systems Embedded Platforms Edition this will only analysis the single process. With Nsight Systems Workstation Edition this will trace the process tree. Generate the report#.nsys-rep file in the default location, incrementing the report number if needed to avoid overwriting any existing output files. Limited trace only run nsys profile --trace=cuda,nvtx -d 20 --sample=none --cpuctxsw=none -o my_test <application> [application-arguments] Effect: Launch the application using the given arguments. Start collecting immediately and end collection after 20 seconds or when the application ends. Trace CUDA and NVTX APIs. Do not collect CPU sampling information or thread scheduling information. Profile any child processes. Generate the output file as my_test.nsys-rep in the current working directory. Delayed start run nsys profile -e TEST_ONLY=0 -y 20 <application> [application-arguments] Effect: Set environment variable TEST_ONLY=0. Launch the application using the given arguments. Start collecting after 20 seconds and end collection at application exit. Trace CUDA, OpenGL, NVTX, and OS runtime libraries APIs. Collect CPU sampling and thread schedule information. Profile any child processes. Generate the report#.nsys-rep file in the default location, incrementing if needed to avoid overwriting any existing output files. Collect ftrace events nsys profile --ftrace=drm/drm_vblank_event -d 20 Effect: Collect ftrace drm_vblank_event events for 20 seconds. Generate the report#.nsys-rep file in the current working directory. Note that ftrace event collection requires running as root. To get a list of ftrace events available from the kernel, run the following: sudo cat /sys/kernel/debug/tracing/available_events Run GPU metric sampling on one TU10x nsys profile --gpu-metrics-device=0 --gpu-metrics-set=tu10x-gfxt <application> Effect: Launch application. Collect default options and GPU metrics for the first GPU (a TU10x), using the tu10x-gfxt metric set at the default frequency (10 kHz). Profile any child processes. Generate the report#.nsys-rep file in the default location, incrementing if needed to avoid overwriting any existing output files. Run GPU metric sampling on all GPUs at a set frequency nsys profile --gpu-metrics-device=all --gpu-metrics-frequency=20000 <application> Effect: Launch application. Collect default options and GPU metrics for all available GPUs using the first suitable metric set for each and sampling at 20 kHz. Profile any child processes. Generate the report#.nsys-rep file in the default location, incrementing if needed to avoid overwriting any existing output files. Collect CPU IP/backtrace and CPU context switch nsys profile --sample=system-wide --duration=5 Effect: Collects both CPU IP/backtrace samples using the default backtrace mechanism and traces CPU context switch activity for the whole system for 5 seconds. Note that it requires root permission to run. No hardware or OS events are sampled. Post processing of this collection will take longer due to the large number of symbols to be resolved caused by system-wide sampling. Get list of available CPU core events nsys profile --cpu-core-events=help Effect: Lists the CPU events that can be sampled and the maximum number of CPU events that can be sampled concurrently. Collect system-wide CPU events and trace application nsys profile --event-sample=system-wide --cpu-core-events='1,2' --event-sampling-frequency=5 <app> [app args] Effect:Collects CPU IP/backtrace samples using the default backtrace mechanism, traces CPU context switch activity, and samples each CPU\u2019s \u201cCPU Cycles\u201d and \u201cInstructions Retired\u201d event every 200 ms for the whole system. Note that it requires root permission to run. Note that CUDA, NVTX, OpenGL, and OSRT within the app launched by Nsight Systems are traced by default while using this command. Post processing of this collection will take longer due to the large number of symbols to be resolved caused by system-wide sampling. Collect custom ETW trace using configuration file nsys profile --etw-provider=file.JSON Effect: Configure custom ETW collectors using the contents of file.JSON. Collect data for 20 seconds. Generate the report#.nsys-rep file in the current working directory. A template JSON configuration file is located at in the Nsight Systems installation directory as \\target-windows-x64\\etw_providers_template.json. This path will show up automatically if you call nsys profile --help The level attribute can only be set to one of the following: TRACE_LEVEL_CRITICAL TRACE_LEVEL_ERROR TRACE_LEVEL_WARNING TRACE_LEVEL_INFORMATION TRACE_LEVEL_VERBOSE The flags attribute can only be set to one or more of the following: EVENT_TRACE_FLAG_ALPC EVENT_TRACE_FLAG_CSWITCH EVENT_TRACE_FLAG_DBGPRINT EVENT_TRACE_FLAG_DISK_FILE_IO EVENT_TRACE_FLAG_DISK_IO EVENT_TRACE_FLAG_DISK_IO_INIT EVENT_TRACE_FLAG_DISPATCHER EVENT_TRACE_FLAG_DPC EVENT_TRACE_FLAG_DRIVER EVENT_TRACE_FLAG_FILE_IO EVENT_TRACE_FLAG_FILE_IO_INIT EVENT_TRACE_FLAG_IMAGE_LOAD EVENT_TRACE_FLAG_INTERRUPT EVENT_TRACE_FLAG_JOB EVENT_TRACE_FLAG_MEMORY_HARD_FAULTS EVENT_TRACE_FLAG_MEMORY_PAGE_FAULTS EVENT_TRACE_FLAG_NETWORK_TCPIP EVENT_TRACE_FLAG_NO_SYSCONFIG EVENT_TRACE_FLAG_PROCESS EVENT_TRACE_FLAG_PROCESS_COUNTERS EVENT_TRACE_FLAG_PROFILE EVENT_TRACE_FLAG_REGISTRY EVENT_TRACE_FLAG_SPLIT_IO EVENT_TRACE_FLAG_SYSTEMCALL EVENT_TRACE_FLAG_THREAD EVENT_TRACE_FLAG_VAMAP EVENT_TRACE_FLAG_VIRTUAL_ALLOC Typical case: profile a Python script that uses CUDA nsys profile --trace=cuda,cudnn,cublas,osrt,nvtx --delay=60 python my_dnn_script.py Effect: Launch a Python script and start profiling it 60 seconds after the launch, tracing CUDA, cuDNN, cuBLAS, OS runtime APIs, and NVTX as well as collecting thread schedule information. Typical case: profile an app that uses Vulkan nsys profile --trace=vulkan,osrt,nvtx --delay=60 ./myapp Effect: Launch an app and start profiling it 60 seconds after the launch, tracing Vulkan, OS runtime APIs, and NVTX as well as collecting CPU sampling and thread schedule information.", "keywords": []}, {"id": 74, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#example-stats-command-sequences", "display_name": "Example Stats Command Sequences", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "example-stats-command-sequences", "priority": -1, "content": "Display default statistics nsys stats report1.nsys-rep Effect: Export an SQLite file named report1.sqlite from report1.nsys-rep (assuming it does not already exist). Print the default reports in column format to the console. Note: The following two command sequences should present very similar information: nsys profile --stats=true <application> or nsys profile <application> nsys stats report1.nsys-rep Display specific data from a report nsys stats --report cuda_gpu_trace report1.nsys-rep Effect: Export an SQLite file named report1.sqlite from report1.nsys-rep (assuming it does not already exist). Print the report generated by the cuda_gpu_trace script to the console in column format. Generate multiple reports, in multiple formats, output multiple places nsys stats --report cuda_gpu_trace --report cuda_gpu_kern_sum --report cuda_api_sum --format csv,column --output .,- report1.nsys-rep Effect: Export an SQLite file named report1.sqlite from report1.nsys-rep (assuming it does not already exist). Generate three reports. The first, the cuda_gpu_trace report, will be output to the file report1_cuda_gpu_trace.csv in CSV format. The other two reports, cuda_gpu_kern_sum and cuda_api_sum , will be output to the console as columns of data. Although three reports were given, only two formats and outputs are given. To reconcile this, both the list of formats and outputs is expanded to match the list of reports by repeating the last element. Submit report data to a command nsys stats --report cuda_api_sum --format table \\ --output @\u201cgrep -E (-|Name|cudaFree\u201d test.sqlite Effect: Open test.sqlite and run the cuda_api_sum script on that file. Generate table data and feed that into the command grep -E (-|Name|cudaFree) . The grep command will filter out everything but the header, formatting, and the cudaFree data, and display the results to the console. Note: When the output name starts with @, it is defined as a command. The command is run, and the output of the report is piped to the command\u2019s stdin (standard-input). The command\u2019s stdout and stderr remain attached to the console, so any output will be displayed directly to the console. Be aware there are some limitations in how the command string is parsed. No shell expansions (including *, ?, [], and ~) are supported. The command cannot be piped to another command, nor redirected to a file using shell syntax. The command and command arguments are split on whitespace, and no quotes (within the command syntax) are supported. For commands that require complex command line syntax, it is suggested that the command be put into a shell script file, and the script designated as the output command", "keywords": []}, {"id": 75, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#expert-system-rules", "display_name": "Expert System Rules", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "expert-system-rules", "priority": -1, "content": "Rules are scripts that run on the SQLite DB output from Nsight Systems to find common improvable usage patterns. Each rule has an advice summary with explanation of the problem found and suggestions to address it. Only the top 50 results are displayed by default. There are currently six rules in the expert system. They are described below. Additional rules will be made available in a future version of Nsight Systems .", "keywords": []}, {"id": 76, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#expert-systems-analysis", "display_name": "Expert Systems Analysis", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "expert-systems-analysis", "priority": -1, "content": "The Nsight Systems expert system is a feature aimed at automatic detection of performance optimization opportunities in an application\u2019s profile. It uses a set of predefined rules to determine if the application has known bad patterns.", "keywords": []}, {"id": 77, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#exporting-and-querying-data", "display_name": "Exporting and Querying Data", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "exporting-and-querying-data", "priority": -1, "content": "It is possible to access metric values for automated processing using the Nsight Systems CLI export capabilities. An example that extracts values of \u201cSM Active\u201d: $ nsys export -t sqlite report.nsys-rep $ sqlite3 report.sqlite "SELECT rawTimestamp, CAST(JSON_EXTRACT(data, '$.\\"SM Active\\"') as INTEGER) as value FROM GENERIC_EVENTS WHERE value != 0 LIMIT 10" 309277039|80 309301295|99 309325583|99 309349776|99 309373872|60 309397872|19 309421840|100 309446000|100 309470096|100 309494161|99 An overview of data stored in each event (JSON): $ sqlite3 report.sqlite "SELECT data FROM GENERIC_EVENTS LIMIT 1" { "Unallocated Warps in Active SM": "0", "Compute Warps In Flight": "52", "Pixel Warps In Flight": "0", "Vertex\\/Tess\\/Geometry Warps In Flight": "0", "Total SM Occupancy": "52", "GR Active (GE\\/CE)": "100", "Sync Compute In Flight": "0", "Async Compute In Flight": "98", "NVLink bytes received": "0", "NVLink bytes transmitted": "0", "PCIe Rx Throughput": "0", "PCIe Tx Throughput": "1", "DRAM Read Throughput": "0", "DRAM Write Throughput": "0", "Tensor Active \\/ FP16 Active": "0", "SM Issue": "10", "SM Active": "52" } Values are integer percentages (0..100)", "keywords": []}, {"id": 78, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#feature-videos", "display_name": "Feature Videos", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "feature-videos", "priority": -1, "content": "Short videos, only a minute or two, to introduce new features. OpenMP Trace Feature Spotlight Command Line Sessions Video Spotlight Direct3D11 Feature Spotlight Vulkan Trace Statistics Driven Profiling Analyzing NCCL Usage with NVDIA Nsight Systems", "keywords": []}, {"id": 79, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#filter-dialog", "display_name": "Filter Dialog", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "filter-dialog", "priority": -1, "content": "Collapse unresolved lines is useful if some of the binary code does not have symbols. In this case, subtrees that consist of only unresolved symbols get collapsed in the Top-Down view, since they provide very little useful information. Hide functions with CPU usage below X% is useful for large applications, where the sampling profiler hits lots of function just a few times. To filter out the \u201clong tail,\u201d which is typically not important for CPU performance bottleneck analysis, this checkbox should be selected.", "keywords": []}, {"id": 80, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#for-more-support", "display_name": "For More Support", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "for-more-support", "priority": -1, "content": "To file a bug report or to ask a question on the Nsight Systems forums, you will need to register with the NVIDIA Developer Program. See the FAQ . You do not need to register to read the forums. After that, you can access Nsight Systems Forums and the NVIDIA Bug Tracking System . To submit feedback directly from the GUI, go to Help->Send Feedback and fill out the form. Enter your email address if you would like to hear back from the Nsight Systems team.", "keywords": []}, {"id": 81, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#fps-overview", "display_name": "FPS Overview", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "fps-overview", "priority": -1, "content": "The Frame Duration section displays frame durations on both the CPU and the GPU. The frame duration row displays live FPS statistics for the current timeline viewport. Values shown are: Number of CPU frames shown of the total number captured Average, minimal, and maximal CPU frame time of the currently displayed time range Average FPS value for the currently displayed frames The 99th percentile value of the frame lengths (such that only 1% of the frames in the range are longer than this value). The values will update automatically when scrolling, zooming or filtering the timeline view. The stutter row highlights frames that are significantly longer than the other frames in their immediate vicinity. The stutter row uses an algorithm that compares the duration of each frame to the median duration of the surrounding 19 frames. Duration difference under 4 milliseconds is never considered a stutter, to avoid cluttering the display with frames whose absolute stutter is small and not noticeable to the user. For example, if the stutter threshold is set at 20%: Median duration is 10 ms. Frame with 13 ms time will not be reported (relative difference > 20%, absolute difference < 4 ms) Median duration is 60 ms. Frame with 71 ms time will not be reported (relative difference < 20%, absolute difference > 4 ms) Median duration is 60 ms. Frame with 80 ms is a stutter (relative difference > 20%, absolute difference > 4 ms, both conditions met) OSC detection The \u201c19 frame window median\u201d algorithm by itself may not work well with some cases of \u201coscillation\u201d (consecutive fast and slow frames), resulting in some false positives. The median duration is not meaningful in cases of oscillation and can be misleading. To address the issue and identify if oscillating frames, the following method is applied: For every frame, calculate the median duration, 1st and 3rd quartiles of 19-frames window. Calculate the delta and ratio between 1st and 3rd quartiles. If the 90th percentile of 3rd - 1st quartile delta array > 4 ms AND the 90th percentile of 3rd/1st quartile array > 1.2 (120%) then mark the results with \u201cOSC\u201d text. Right-clicking the Frame Duration row caption lets you choose the target frame rate (30, 60, 90 or custom frames per second). By clicking the Customize FPS Display option, a customization dialog pops up. In the dialog, you can now define the frame duration threshold to customize the view of the potentially problematic frames. In addition, you can define the threshold for the stutter analysis frames. Frame duration bars are color coded: Green, the frame duration is shorter than required by the target FPS ratio. Yellow, duration is slightly longer than required by the target FPS rate. Red, duration far exceeds that required to maintain the target FPS rate. The CPU Frame Duration row displays the CPU frame duration measured between the ends of consecutive frame boundary calls: The OpenGL frame boundaries are eglSwapBuffers/glXSwapBuffers/SwapBuffers calls. The D3D11 and D3D12 frame boundaries are IDXGISwapChainX::Present calls. The Vulkan frame boundaries are vkQueuePresentKHR calls. The timing of the actual calls to the frame boundary calls can be seen in the blue bar at the bottom of the CPU frame duration row The GPU Frame Duration row displays the time measured between The start time of the first GPU workload execution of this frame. The start time of the first GPU workload execution of the next frame. Reflex SDK NVIDIA Reflex SDK is a series of NVAPI calls that allow applications to integrate the Ultra Low Latency driver feature more directly into their game to further optimize synchronization between simulation and rendering stages and lower the latency between user input and final image rendering. For more details about Reflex SDK, see Reflex SDK Site . Nsight Systems will automatically capture NVAPI functions when either Direct3D 11, Direct3D 12, or Vulkan API trace are enabled. The Reflex SDK row displays timeline ranges for the following types of latency markers: RenderSubmit. Simulation. Present. Driver. OS Render Queue. GPU Render. Performance Warnings row This row shows performance warnings and common pitfalls that are automatically detected based on the enabled capture types. Warnings are reported for: ETW performance warnings Vulkan calls to vkQueueSubmit and D3D12 calls to ID3D12CommandQueue::ExecuteCommandList that take a longer time to execute than the total time of the GPU workloads they generated D3D12 Memory Operation warnings Usage of Vulkan API functions that may adversely affect performance Creation of a Vulkan device with memory zeroing, whether by physical device default or manually Vulkan command buffer barrier which can be combined or removed, such as subsequent barriers or read-to-read barriers", "keywords": []}, {"id": 82, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#frame-health", "display_name": "Frame Health", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "frame-health", "priority": -1, "content": "The Frame Health row displays actions that took significantly a longer time during the current frame, compared to the median time of the same actions executed during the surrounding 19-frames. This is a great tool for detecting the reason for frame time stuttering. Such actions may be: shader compilation, present, memory mapping, and more. Nsight Systems measures the accumulated time of such actions in each frame. For example: calculating the accumulated time of shader compilations in each frame and comparing it to the accumulated time of shader compilations in the surrounding 19 frames. Example of a Vulkan frame health row:", "keywords": []}, {"id": 83, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#function-table-modes", "display_name": "Function Table Modes", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "function-table-modes", "priority": -1, "content": "The function table can work in three modes: Top-Down View \u2014 In this mode, expanding top-level functions provides information about the callee functions. One of the top-level functions is typically the main function of your application, or another entry point defined by the runtime libraries. Bottom-Up View \u2014 This is a reverse of the Top-Down view. On the top level, there are functions directly hit by the sampling profiler. To explore all possible call chains leading to these functions, you need to expand the subtrees of the top-level functions. Flat View \u2014 This view enumerates all functions ever observed by the profiler, even if they have never been directly hit, but just appeared somewhere on the call stack. This view typically provides a high-level overview of which parts of the code are CPU-intensive. Each of the views helps understand particular performance issues of the application being profiled. For example: When trying to find specific bottleneck functions that can be optimized, the Bottom-Up view should be used. Typically, the top few functions should be examined. Expand them to understand in which contexts they are being used. To navigate the call tree of the application and while generally searching for algorithms and parts of the code that consume unexpectedly large amount of CPU time, the Top-Down view should be used. To quickly assess which parts of the application, or high level parts of an algorithm, consume significant amount of CPU time, use the Flat view. The Top-Down and Bottom-Up views have Self and Total columns, while the Flat view has a Flat column. It is important to understand the meaning of each of the columns: Top-Down view Self column denotes the relative amount of time spent executing instructions of this particular function. Total column shows how much time has been spent executing this function, including all other functions called from this one. Total values of sibling rows sum up to the Total value of the parent row, or 100% for the top-level rows. Bottom-Up view Self column for top-level rows , as in the Top-Down view, shows how much time has been spent directly in this function. Self times of all top-level rows add up to 100%. Self column for children rows breaks down the value of the parent row based on the various call chains leading to that function. Self times of sibling rows add up to the value of the parent row. Flat view Flat column shows how much time this function has been anywhere on the call stack. Values in this column do not add up or have other significant relationships. If low-impact functions have been filtered out, values may not add up correctly to 100%, or to the value of the parent row. This filtering can be disabled. Contents of the symbols table is tightly related to the timeline. Users can apply and modify filters on the timeline, and they will affect which information is displayed in the symbols table: Per-thread filtering \u2014 Each thread that has sampling information associated with it has a checkbox next to it on the timeline. Only threads with selected checkboxes are represented in the symbols table. Time filtering \u2014 A time filter can be setup on the timeline by pressing the left mouse button, dragging over a region of interest on the timeline, and then choosing Filter by selection in the dropdown menu. In this case, only sampling information collected during the selected time range will be used to build the symbols table. If too little sampling data is being used to build the symbols table (for example, when the sampling rate is configured to be low, and a short period of time is used for time-based filtering), the numbers in the symbols table might not be representative or accurate in some cases.", "keywords": []}, {"id": 84, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#function-table-notes", "display_name": "Function Table Notes", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "function-table-notes", "priority": -1, "content": "Last Branch Records vs Frame Pointers Two of the mechanisms available for collecting backtraces are Intel Last Branch Records (LBRs) and frame pointers. LBRs are used to trace every branch instruction via a limited set of hardware registers. They can be configured to generate backtraces but have finite depth based on the CPU\u2019s microarchitecture. LBRs are effectively free to collect but may not be as deep as you need in order to fully understand how the workload arrived a specific Instruction Pointer (IP). Frame pointers only work when a binary is compiled with the -fno-omit-frame-pointer compiler switch. To determine if frame pointers are enabled on an x86_64 binary running on Linux, dump a binary\u2019s assembly code using the objdump -d [binary_file] command and look for this pattern at the beginning of all functions; push %rbp mov %rsp,%rbp When frame pointers are available in a binary, full stack traces will be captured. Note that libraries that are frequently used by apps and ship with the operating system, such as libc, are generated in release mode and therefore do not include frame pointers. Frequently, when a backtrace includes an address from a system library, the backtrace will fail to resolve further as the frame pointer trail goes cold due to a missing frame pointer. A simple application was developed to show the difference. The application calls function a(), which calls b(), which calls c(), etc. Function z() calls a heavy compute function called matrix_multiply(). Almost all of the IP samples are collected while matrix_multiple is executing. The next two screen shots show one of the main differences between frame pointers and LBRs. Note that the frame pointer example, shows the full stack trace while the LBR example, only shows part of the stack due to the limited number of LBR registers in the CPU. Kernel Samples When an IP sample is captured while a kernel mode (i.e. operating system) function is executing, the sample will be shown with an address that starts with 0xffffffff and map to the [kernel.kallsyms] module. [vdso] Samples may be collected while a CPU is executing functions in the Virtual Dynamic Shared Object. In this case, the sample will be resolved (i.e. mapped) to the [vdso] module. The vdso man page provides the following description of the vdso: The \u201cvDSO\u201c (virtual dynamic shared object) is a small shared library that the kernel automatically maps into the address space of all user-space applications. Applications usually do not need to concern themselves with these details as the vDSO is most commonly called by the C library. This way you can code in the normal way using standard functions and the C library will take care of using any functionality that is available via the vDSO. Why does the vDSO exist at all? There are some system calls the kernel provides that user-space code ends up using frequently, to the point that such calls can dominate overall performance. This is due both to the frequency of the call as well as the context-switch overhead that results from exiting user space and entering the kernel. [Unknown] When an address can not be resolved (i.e. mapped to a module), its address within the process\u2019 address space will be shown and its module will be marked as [Unknown].", "keywords": []}, {"id": 85, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#general-troubleshooting", "display_name": "General Troubleshooting", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "general-troubleshooting", "priority": -1, "content": "Profiling If the profiler behaves unexpectedly during the profiling session, or the profiling session fails to start, try the following steps: Close the host application. Restart the target device. Start the host application and connect to the target device. Nsight Systems uses a settings file ( NVIDIA Nsight Systems.ini ) on the host to store information about loaded projects, report files, window layout configuration, etc. Location of the settings file is described in the Help \u2192 About dialog. Deleting the settings file will restore Nsight Systems to a fresh state, but all projects and reports will disappear from the Project Explorer. Environment Variables By default, Nsight Systems writes temporary files to /tmp directory. If you are using a system that does not allow writing to /tmp or where the /tmp directory has limited storage you can use the TMPDIR environment variable to set a different location. An example: TMPDIR=/testdata ./bin/nsys profile -t cuda matrixMul Environment variable control support for Windows target trace is not available, but there is a quick workaround: Create a batch file that sets the env vars and launches your application. Set Nsight Systems to launch the batch file as its target, i.e. set the project settings target path to the path of batch file. Start the trace. Nsight Systems will launch the batch file in a new cmd instance and trace any child process it launches. In fact, it will trace the whole process tree whose root is the cmd running your batch file. WebGL Testing Nsight Systems cannot profile using the default Chrome launch command. To profile WebGL please follow the following command structure: \u201cC:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe\u201d --inprocess-gpu --no-sandbox --disable-gpu-watchdog --use-angle=gl https://webglsamples.org/aquarium/aquarium.html Common Issues with QNX Targets Make sure that tracelogger utility is available and can be run on the target. Make sure that /tmp directory is accessible and supports sub-directories. When switching between Nsight Systems versions, processes related to the previous version, including profiled applications forked by the daemon, must be killed before the new version is used. If you experience issues after switching between Nsight Systems versions, try rebooting the target.", "keywords": []}, {"id": 86, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#generating-a-new-report", "display_name": "Generating a New Report", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "generating-a-new-report", "priority": -1, "content": "Users can generate a new report by stopping a profiling session. If a profiling session has been canceled, a report will not be generated, and all collected data will be discarded. A new .nsys-rep file will be created and put into the same directory as the project file ( .qdproj ).", "keywords": []}, {"id": 87, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#gpu-memory", "display_name": "GPU Memory", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "gpu-memory", "priority": -1, "content": "Each GPU has two rows detailing its memory utilization: GPU VRAM , showing the memory consumed on the device, and GPU WDDM SYSMEM , showing the memory consumed on the host computer RAM. These rows show a green-colored line graph for the memory budget for this memory segment, and an orange-colored line graph for the actual amount of memory used. Note that these graphs are scaled to fit the highest value enconutered, as indicated by the \u201cY axis\u201d value in the row header. You can use the vertical zoom slider in the top-right of the timeline view to make the row taller and view the graph in more detail. Note that the value in the GPU VRAM row is not the same as the CUDA kernel memory allocation graph, see CUDA GPU Memory Graph for that functionality. The GPU VRAM row also has several child rows, accessed by expanding the row in the tree view VidMm Device Suspension This row displays time ranges when the GPU memory manager suspended all memory transfer operations, pending the completion of a single memory transfer. Demoted Memory This row displays the amount of VRAM that was demoted from GPU local memory to non-local memory (possibly due to exceeding the VRAM budget) as a blue-colored line graph. High amounts of demoted memory could be indicative of video memory leaks or poor memory management. Note that the Demoted memory row is scaled to its highest value, similar to the GPU VRAM and GPU WDDM SYSMEM rows. Resource Allocations This row shows markers indicating resource allocation events. VRAM resources are shown as green markers while SYSMEM resources are shown in gray. Hovering over a marker or selecting it in the Events view will display all the allocation parameters as well as the call stack that led to the allocation event. Resource Migrations This row displays a breakdown of resources\u2019 movement between VRAM and SYSMEM, focusing on resource evictions. The main row shows a timeline of total evicted resource memory and count as a red-colored line graph. Each child row displays a timeline of the status of each resource, as reflected by WDDM events related to it. If the object has been named using PIX or ID3D11Object::SetName / ID3D12Object::SetName , the name will be shown in the row title. Whether named or not, the row title will also show the resource dimensions, format, priority, and the memory size migrated. If the resource was migrated in parts using subresources, the row can be expanded to show the status for each subresource at any given time. Expanding the row for a resource will show the individual WDDM events relevant to it and the call stacks that led to each event. By default, the resources are sorted by Relevance (most / largest migrations). Right-clicking the main Resource Migrations row header allows choosing between the following sorting options: Relevance Name Format Priority Earliest allocation timestamp (order of appearance on the host) Earliest migration timestamp (order of appearance on the device) The top 5 resources are shown initially. If more than 5 resources exist, a row showing the number of hidden resources and buttons allowing to show more or fewer of them will appear below them. Right-click this row and select \u201cshow all\u201d or \u201cshow all collapsed\u201d to display all the resources at once. Memory Transfer This row shows an overview of all memory transfer operations. Device-to-host transfers are shown in orange, host-to-device transfers are shown in green, discarded device memory is shown in light green, and unknown events are shown in dark gray. The height of each event marker corresponds to the amount of memory that the event affected. Hovering over the marker will show the exact amount. Expanding the row will show a breakdown of the events by each specific type.", "keywords": []}, {"id": 88, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#gui-troubleshooting", "display_name": "GUI Troubleshooting", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "gui-troubleshooting", "priority": -1, "content": "If opening the Nsight Systems Linux GUI fails with one of the following errors, you may be missing some required libraries: This application failed to start because it could not find or load the Qt platform plugin "xcb" in "". Available platform plugins are: xcb. Reinstalling the application may fix this problem. or error while loading shared libraries: [library_name]: cannot open shared object file: No such file or directory", "keywords": []}, {"id": 89, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#gui-vnc-container", "display_name": "GUI VNC container", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "gui-vnc-container", "priority": -1, "content": "Nsight Systems provides a build script to build a self isolated Docker container with the Nsight Systems GUI and VNC server. You can find the build.py script in the host-linux-x64/Scripts/VncContainer directory (or similar on other architectures) under your Nsight Systems installation directory. You will need to have Docker , and Python 3.5 or later. Available Parameters Short Name Full Name Description --vnc-password (optional) Default password for VNC access (at least 6 characters). If it is specified and empty - will be asked during the build. Can be changed when running a container. -aba --additional-build-arguments (optional) Additional arguments, which will be passed to the \u201cdocker build\u201d command. -hd --nsys-host-directory (optional) The directory with Nsight Systems host binaries (with GUI). -td --nsys-target-directory (optional, repeatable) The directory with Nsight Systems target binaries (can be specified multiple times). --tigervnc (optional) Use TigerVNC instead of x11vnc. --http (optional) Install noVNC in the Docker container for HTTP access. --rdp (optional) Install xRDP in the Docker for RDP access. --geometry (optional) Default VNC server resolution in the format WidthxHeight (default 1920x1080). --build-directory (optional) The directory to save temporary files (with the write access for the current user). By default, script or tmp directory will be used. Ports These ports can be published from the container to provide access to the Docker container: Port Purpose Condition TCP 5900 Port for VNC access TCP 80 (optional) Port for HTTP access to noVNC server Container is build with --http parameter TCP 3389 (optional) Port for RDP access Container is build with --rdp parameter Volumes Docker folder Purpose Description /mnt/host Root path for shared folders Folder owned by the Docker user (inner content can be accessed from Nsight Systems GUI) /mnt/host/Projects Folder with projects and reports, created by Nsight Systems UI in container /mnt/host/logs Folder with inner services logs May be useful to send reports to developers Environment variables Variable Name Purpose VNC_PASSWORD Password for VNC access (at least 6 characters) NSYS_WINDOW_WIDTH Width of VNC server display (in pixels) NSYS_WINDOW_HEIGHT Height of VNC server display (in pixels) Examples With VNC access on port 5916: sudo docker run -p 5916:5900/tcp -ti nsys-ui-vnc:1.0 With VNC access on port 5916 and HTTP access on port 8080: sudo docker run -p 5916:5900/tcp -p 8080:80/tcp -ti nsys-ui-vnc:1.0 With VNC access on port 5916, HTTP access on port 8080 and RDP access on port 33890: sudo docker run -p 5916:5900/tcp -p 8080:80/tcp -p 33890:3389/tcp -ti nsys-ui-vnc:1.0 With VNC access on port 5916, shared \u201cHOME\u201d folder from the host, VNC server resolution 3840x2160, and custom VNC password sudo docker run -p 5916:5900/tcp -v $HOME:/mnt/host/home -e NSYS_WINDOW_WIDTH=3840 -e NSYS_WINDOW_HEIGHT=2160 -e VNC_PASSWORD=7654321 -ti nsys-ui-vnc:1.0 With VNC access on port 5916, shared \u201cHOME\u201d folder from the host, and the projects folder to access reports created by Nsight Systems GUI in container sudo docker run -p 5916:5900/tcp -v $HOME:/mnt/host/home -v /opt/NsysProjects:/mnt/host/Projects -ti nsys-ui-vnc:1.0", "keywords": []}, {"id": 90, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#gui-webrtc-container", "display_name": "GUI WebRTC container", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "gui-webrtc-container", "priority": -1, "content": "Instructions for creating a self-isolated Docker container for accessing Nsight Systems through browser using WebRTC. Prerequisites x86_64 Linux Docker Internet access for downloading Ubuntu packages inside the container. Build To build the docker container use the follwing command: $ sudo ./setup/build-docker.sh The above command will create a docker image, which can be run using ./start-nsys.sh Build environment variables Following environment variables can be used to configure build parameters. Variable Description Default Value USERNAME User name for NVIDIA Nsight Systems GUI. Password can be set on container start nvidia Additional docker build arguments Additional Docker Build arguments may be passed to the build-docker.sh. For example: $ sudo ./setup/build-docker.sh --network=host Run To run the docker container: $ sudo ./start-nsys.sh At the end of start-nsys.sh it will provide you with a URL to connect to the WebRTC client. It will look something like http://$HOST_IP:8080/ . You can use this address in your browser to access Nsight Systems GUI interface. Additional docker run arguments Additional Docker Run arguments may be passed to the start-nsys.sh. These argument can be used to mount host directories with Nsight Systems reports to the docker container. For example: $ sudo ./start-nsys.sh -v $HOME:/mnt/host/home -v /myawesomereports:/mnt/host/myawesomereports Runtime environment variables Runtime environment variables can be used to configure runtime parameters. Variable Description Default Value PASSWORD Password for WebUI. Username can be set only on the build step nvidia HOST_IP IP of the server that will be sent to client. This IP should be accessible from the client side to establish client/server connection. The IP address of the first available network interface. HTTP_PORT Port for HTTP access to Nsight Systems user interface. 8080 CONNECTION_UDP_PORT UDP port which will be used for handling the incoming connection. 8081 FALLBACK_CONNECTION_TCP_PORT TCP port which will be used for handling the incoming connection in case of connection failure over TCP (can be the same port number as CONNECTION_UDP_PORT). 8081 SCREEN Resolution and refresh rate of the screen used for rendering. 1920x1080@30 USE_OPENH264_BUILD_CACHE Setting this option to false disables caching of openh264 binaries. It should be reenabled on each container start. true OPENH264_BUILD_CACHE_VOLUME_NAME Directory or docker volume name for openh264 binaries cache. nvidia-devtools-streamer-openh264-volume Video encoding By default, the container uses the VP8 codec for video streaming. For an improved experience, the H.264 codec can be enabled. If internet is available to download the required libraries: $ sudo docker exec nvidia-devtools-streamer /setup/enable-h264-streaming.sh If USE_OPENH264_BUILD_CACHE was not set to false, openh264 binaries will be cached in OPENH264_BUILD_CACHE_VOLUME_NAME and H.264 codec will be used during future launches of the container. Currently, only software encoding is supported. If internet is not available: $ sudo -- sh -c 'MY_IMAGE_NAME=my-openh264-nsys-streamer:1.0 USE_OPENH264_BUILD_CACHE=false ./start-nsys.sh && docker exec nvidia-devtools-streamer /setup/enable-h264-streaming.sh && docker commit nvidia-devtools-streamer $MY_IMAGE_NAME && docker save -o my-openh264-nsys-streamer.tar $MY_IMAGE_NAME' As a result, my-openh264-nsys-streamer.tar will contain the image with enabled H.264 codec. This file should be transferred to the target machine without internet access. Then, on a machine without internet access, the container can be started using the following command: $ sudo -- sh -c 'CONTAINER_IMAGE=my-openh264-nsys-streamer:1.0 USE_OPENH264_BUILD_CACHE=false docker load -i my-openh264-nsys-streamer.tar && ./start-nsys.sh' Volumes Docker folder Purpose Description /mnt/host/logs Folder with inner services logs May be useful to send reports to NVIDIA developer Example To run the container on 10.10.10.10 network interface, using 8000 HTTP port, 8888 connection port, without caching openh264 binaries: $ sudo HOST_IP=10.10.10.10 HTTP_PORT=8000 CONNECTION_UDP_PORT=8888 USE_OPENH264_BUILD_CACHE=false ./start-nsys.sh", "keywords": []}, {"id": 91, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#hotkey-trace-start-stop", "display_name": "Hotkey Trace Start/Stop", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "hotkey-trace-start-stop", "priority": -1, "content": "Nsight Systems Workstation Edition can use hotkeys to control profiling. Press the hotkey to start and/or stop a trace session from within the target application\u2019s graphic window. This is useful when tracing games and graphic applications that use fullscreen display. In these scenarios switching to Nsight Systems \u2019 UI would unnecessarily introduce the window manager\u2019s footprint into the trace. To enable the use of Hotkey check the Hotkey checkbox in the project settings page: The default hotkey is F12.", "keywords": []}, {"id": 92, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#id1", "display_name": "Hotkey Trace Start/Stop", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "id1", "priority": -1, "content": "Nsight Systems Workstation Edition can use hotkeys to control profiling. Press the hotkey to start and/or stop a trace session from within the target application\u2019s graphic window. This is useful when tracing games and graphic applications that use fullscreen display. In these scenarios switching to Nsight Systems \u2019 UI would unnecessarily introduce the window manager\u2019s footprint into the trace. To enable the use of Hotkey check the Hotkey checkbox in the project settings page: The default hotkey is F12. Changing the Default Hotkey Binding - A different hotkey binding can be configured by setting the HotKeyIntValue configuration field in the config.ini file. Set the decimal numeric identifier of the hotkey you would like to use for triggering start/stop from the target app graphics window. The default value is 123 which corresponds to 0x7B, or the F12 key. Virtual key identifiers are detailed in MSDN\u2019s Virtual-Key Codes . Note that you must convert the hexadecimal values detailed in this page to their decimal counterpart before using them in the file. For example, to use the F1 key as a start/stop trace hotkey, use the following settings in the config.ini file: HotKeyIntValue=112", "keywords": []}, {"id": 93, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#id2", "display_name": "Limitations", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "id2", "priority": -1, "content": "If metric sets with NVLink are used but the links are not active, they may appear as fully utilized. Only one tool that subscribes to these counters can be used at a time, therefore, Nsight Systems GPU Metrics feature cannot be used at the same time as the following tools: Nsight Graphics Nsight Compute DCGM (Data Center GPU Manager) Use the following command: dcgmi profile --pause dcgmi profile --resume Or API: dcgmProfPause dcgmProfResume Non-NVIDIA products which use: CUPTI sampling used directly in the application. CUPTI trace is okay (although it will block Nsight Systems CUDA trace) DCGM library Nsight Systems limits the amount of memory that can be used to store GPU Metrics samples. Analysis with higher sampling rates or on GPUs with more SMs has a risk of exceeding this limit. This will lead to gaps on timeline filled with Missing Data ranges. Future releases will reduce the frequency of this happening.", "keywords": []}, {"id": 94, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#id3", "display_name": "Overview", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "id3", "priority": -1, "content": "SOC Metrics feature is intended to identify performance limiters in applications running on NVIDIA SOCs and is similar to GPU Metrics. Nsight Systems SOC Metrics is only available for Linux and QNX targets on aarch64. It requires NVIDIA Orin architecture or newer.", "keywords": []}, {"id": 95, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#id4", "display_name": "Available metrics", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "id4", "priority": -1, "content": "CPU Read Throughput mcc__dram_throughput_srcnode_cpu_op_read.avg.pct_of_peak_sustained_elapsed CPU Write Throughput mcc__dram_throughput_srcnode_cpu_op_write.avg.pct_of_peak_sustained_elapsed The ratio of cycles the SOC memory controllers were actively processing read/write operations from the CPU to the number of cycles in the same sample period as a percentage. GPU Read Throughput mcc__dram_throughput_srcnode_gpu_op_read.avg.pct_of_peak_sustained_elapsed GPU Write Throughput mcc__dram_throughput_srcnode_gpu_op_write.avg.pct_of_peak_sustained_elapsed The ratio of cycles the SOC memory controllers were actively processing read/write operations from the GPU to the number of cycles in the same sample period as a percentage. DBB Read Throughput mcc__dram_throughput_srcnode_dbb_op_read.avg.pct_of_peak_sustained_elapsed DBB Write Throughput mcc__dram_throughput_srcnode_dbb_op_write.avg.pct_of_peak_sustained_elapsed The ratio of cycles the SOC memory controllers were actively processing read/write operations from not-CPU/not-GPU to the number of cycles in the same sample period as a percentage. DRAM Read Throughput mcc__dram_throughput_op_read.avg.pct_of_peak_sustained_elapsed DRAM Write Throughput mcc__dram_throughput_op_write.avg.pct_of_peak_sustained_elapsed The ratio of cycles the SOC memory controllers were actively processing read/write operations to the number of cycles in the same sample period as a percentage. DLA Active nvdla__cycles_active.avg.pct_of_peak_sustained_elapsed The ratio of cycles the DLA (Deep Learning Accelerator) was actively processing a command to the number of cycles in the same sample period as a percentage. DLA Read Throughput nvdla__dbb2nvdla_read_throughput.avg.pct_of_peak_sustained_elapsed DLA Write Throughput nvdla__nvdla2dbb_write_throughput.avg.pct_of_peak_sustained_elapsed The ratio of cycles the DLA (Deep Learning Accelerator) was actively processing read/write operations to the number of cycles in the same sample period as a percentage. NVENC Active nvenc__cycles_active.avg.pct_of_peak_sustained_elapsed The ratio of cycles the NVENC unit was actively processing a command to the number of cycles in the same sample period as a percentage. NVENC Read Throughput nvenc__memif2nvenc_read_throughput.avg.pct_of_peak_sustained_elapsed NVENC Write Throughput nvenc__nvenc2memif_write_throughput.avg.pct_of_peak_sustained_elapsed The ratio of cycles the NVENC unit was actively processing read/write operations to the number of cycles in the same sample period as a percentage. PVA VPU Active pvavpu__vpu_cycles_active.avg.pct_of_peak_sustained_elapsed The ratio of cycles the PVA (Programmable Vision Accelerator) VPU (Vector Processing Unit) was actively processing a command to the number of cycles in the same sample period as a percentage. PVA DMA Read Throughput pva__dbb2pvadma_read_throughput.avg.pct_of_peak_sustained_elapsed PVA DMA Write Throughput pva__pvadma2dbb_write_throughput.avg.pct_of_peak_sustained_elapsed The ratio of cycles the PVA (Programmable Vision Accelerator) VPU (Vector Processing Unit) was actively processing read/write operations to the number of cycles in the same sample period as a percentage. OFA Active ofa_cycles_active.avg.pct_of_peak_sustained_elapsed The ratio of cycles the OFA (Optical Flow Accelerator) was actively processing a command to the number of cycles in the same sample period as a percentage. OFA Read Throughput ofa__memif2ofa_read_throughput.avg.pct_of_peak_sustained_elapsed OFA Write Throughput ofa__ofa2memif_write_throughput.avg.pct_of_peak_sustained_elapsed The ratio of cycles the OFA (Optical Flow Accelerator) was actively processing read/write operations to the number of cycles in the same sample period as a percentage. VIC Active vic_cycles_active.avg.pct_of_peak_sustained_elapsed The ratio of cycles the VIC (Video Image Compositor) was actively processing a command to the number of cycles in the same sample period as a percentage. VIC Read Throughput vic__dbb2vic_read_throughput.avg.pct_of_peak_sustained_elapsed VIC Write Throughput vic__vic2dbb_write_throughput.avg.pct_of_peak_sustained_elapsed The ratio of cycles the VIC (Video Image Compositor) was actively processing read/write operations to the number of cycles in the same sample period as a percentage.", "keywords": []}, {"id": 96, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#id5", "display_name": "Limitations", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "id5", "priority": -1, "content": "Only report files collected with Nsight Systems version 2021.3 and newer are fully supported. Sequential reports collected in a single CLI profiling session cannot be loaded into a single timeline yet.", "keywords": []}, {"id": 97, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#import-nvtxt", "display_name": "Import NVTXT", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "import-nvtxt", "priority": -1, "content": "ImportNvtxt is an utility which allows conversion of a NVTXT file to a Nsight Systems report file (*.nsys-rep) or to merge it with an existing report file. Note : NvtxtImport supports custom TimeBase values. Only these values are supported: Manual \u2014 timestamps are set using absolute values. Relative \u2014 timestamps are set using relative values with regards to report file which is being merged with nvtxt file. ClockMonotonicRaw \u2014 timestamps values in nvtxt file are considered to be gathered on the same target as the report file which is to be merged with nvtxt using clock_gettime(CLOCK_MONOTONIC_RAW, ...) call. CNTVCT \u2014 timestamps values in nvtxt file are considered to be gathered on the same target as the report file which is to be merged with nvtxt using CNTVCT values. You can get usage info via help message: Print help message: -h [ --help ] Show information about report file: --cmd info -i [--input] arg Create report file from existing nvtxt file: --cmd create -n [--nvtxt] arg -o [--output] arg [-m [--mode] mode_name mode_args] [--target <Hw:Vm>] [--update_report_time] Merge nvtxt file to existing report file: --cmd merge -i [--input] arg -n [--nvtxt] arg -o [--output] arg [-m [--mode] mode_name mode_args] [--target <Hw:Vm>] [--update_report_time] Modes description: lerp - Insert with linear interpolation --mode lerp --ns_a arg --ns_b arg [--nvtxt_a arg --nvtxt_b arg] lin - insert with linear equation --mode lin --ns_a arg --freq arg [--nvtxt_a arg] Modes\u2019 parameters: ns_a - a nanoseconds value ns_b - a nanoseconds value (greater than ns_a ) nvtxt_a - an nvtxt file\u2019s time unit value corresponding to ns_a nanoseconds nvtxt_b - an nvtxt file\u2019s time unit value corresponding to ns_b nanoseconds freq - the nvtxt file\u2019s timer frequency --target <Hw:Vm> - specify target id, e.g. --target 0:1 --update_report_time - prolong report\u2019s profiling session time while merging if needed. Without this option all events outside the profiling session time window will be skipped during merging.", "keywords": []}, {"id": 98, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#importing-and-viewing-command-line-results-files", "display_name": "Importing and Viewing Command Line Results Files", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "importing-and-viewing-command-line-results-files", "priority": -1, "content": "The CLI generates a .qdstrm file. The .qdstrm file is an intermediate result file, not intended for multiple imports. It needs to be processed, either by importing it into the GUI or by using the standalone QdstrmImporter to generate an optimized .nsys-rep file. Use this .nsys-rep file when re-opening the result on the same machine, opening the result on a different machine, or sharing results with teammates. This version of Nsight Systems will attempt to automatically convert the .qdstrm file to a .nsys-rep file with the same name after the run finishes if the required libraries are available. The ability to turn off auto-conversion will be added in a later version. Import Into the GUI The CLI and host GUI versions must match to import a .qdstrm file successfully. The host GUI is backward compatible only with .nsys-rep files. Copy the .qdstrm file you are interested in viewing to a system where the Nsight Systems host GUI is installed. Launch the Nsight Systems GUI. Select File->Import\u2026 and choose the .qdstrm file you wish to open. The import of really large, multi-gigabyte, .qdstrm files may take up all of the memory on the host computer and lock up the system. This will be fixed in a later version. Importing Windows ETL files For Windows targets, ETL files captured with Xperf or the log.cmd command supplied with GPUView in the Windows Performance Toolkit can be imported to create reports as if they were captured with Nsight Systems \u2019s \u201cWDDM trace\u201d and \u201cCustom ETW trace\u201d features. Simply choose the .etl file from the Import dialog to convert it to a .nsys-rep file. Create .nsys-rep Using QdstrmImporter The CLI and QdstrmImporter versions must match to convert a .qdstrm file into a .nsys-rep file. This .nsys-rep file can then be opened in the same version or more recent versions of the GUI. To run QdstrmImporter on the host system, find the QdstrmImporter binary in the Host-x86_64 directory in your installation. QdstrmImporter is available for all host platforms. See options below. To run QdstrmImporter on the target system, copy the Linux Host-x86_64 directory to the target Linux system or install Nsight Systems for Linux host directly on the target. The Windows or macOS host QdstrmImporter will not work on a Linux Target. See options below. Short Long Parameter Description -h --help Help message providing information about available options and their parameters. -v --version Output QdstrmImporter version information -i --input-file filename or path Import .qdstrm file from this location. -o --output-file filename or path Provide a different file name or path for the resulting .nsys-rep file. Default is the same name and path as the .qdstrm file", "keywords": []}, {"id": 99, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#infiniband-switch-metric-sampling", "display_name": "InfiniBand Switch Metric Sampling", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "infiniband-switch-metric-sampling", "priority": -1, "content": "NVIDIA Quantum InfiniBand switches offer high-bandwidth, low-latency communication. Viewing switch metrics, on Nsight Systems timeline, enables developers to better understand their application\u2019s network usage. Developers can use this information to optimize the application\u2019s performance. Limitations/Requirements IB switch metric sampling supports all NVIDIA Quantum switches. The user needs to have permission to query the InfiniBand switch metrics. To check if the current user has permissions to query the InfiniBand switch metrics, check that the user have permission to access /dev/umad To give user permissions to query InfiniBand switch metrics on RedHat systems, follow the directions at RedHat Solutions . To collect InfiniBand switch performance metric, using Nsight Systems CLI, add the --ib-switch-metrics-device command line switch, followed by a comma separated list of InfiniBand switch GUIDs. For example: nsys profile --ib-switch-metrics-device=<IB switch GUID> my_app To get a list of InfiniBand switches, reachable by a given NIC, use: sudo ibswitches -C <nic name> Available Metrics Bytes sent - Number of bytes sent through all switch ports Bytes received - Number of bytes received by all switch ports Send waits - The number of ticks during which switch ports, selected by PortSelect, had data to transmit but no data was sent during the entire tick (either because of insufficient credits or of lack of arbitration) Average sent packet size - Average sent InfiniBand packet size Average received packet size - Average received InfiniBand packet size", "keywords": []}, {"id": 100, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#installing-the-cli-on-your-target", "display_name": "Installing the CLI on Your Target", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "installing-the-cli-on-your-target", "priority": -1, "content": "The Nsight Systems CLI provides a simple interface to collect on a target without using the GUI. The collected data can then be copied to any system and analyzed later. The CLI is distributed in the Target directory of the standard Nsight Systems download package. Users who want to install the CLI as a standalone tool can do so by copying the files within the Target directory. If you want the CLI output file (.qdstrm) to be auto-converted (to .nsys-rep) after the analysis is complete, you will need to copy the host directory as well. If you wish to run the CLI without root (recommended mode), you will want to install in a directory where you have full access. Note that you must run the CLI on Windows as administrator.", "keywords": []}, {"id": 101, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#json-and-text-format-description", "display_name": "JSON and Text Format Description", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "json-and-text-format-description", "priority": -1, "content": "JSON and TXT export formats are generated by serializing buffered messages, each on a new line. First, all collected events are processed. Then strings are serialized, followed by stdout, stderr streams if any, followed by thread names. Output layout: {Event #1} {Event #2} ... {Event #N} {Strings} {Streams} {Threads} For easier grepping of JSON output, the --separate-strings switch may be used to force manual splitting of strings, streams and thread names data. Example line split: nsys export --export-json --separate-strings sample.nsys-rep -- - {"type":"String","id":"3720","value":"Process 14944 was launched by the profiler"} {"type":"String","id":"3721","value":"Profiling has started."} {"type":"String","id":"3722","value":"Profiler attached to the process."} {"type":"String","id":"3723","value":"Profiling has stopped."} {"type":"ThreadName","globalTid":"72057844756653436","nameId":"14","priority":"10"} {"type":"ThreadName","globalTid":"72057844756657940","nameId":"15","priority":"10"} {"type":"ThreadName","globalTid":"72057844756654400","nameId":"24","priority":"10"} Compare with: nsys export --export-json sample.nsys-rep -- - {"data":["[Unknown]","[Unknown kernel module]","[Max depth]","[Broken backtraces]", "[Called from Java]","QnxKernelTrace","mm_","task_submit","class_id","syncpt_id", "syncpt_thresh","pid","tid","FTrace","[NSys]","[NSys Comms]", "..." ,"Process 14944 was launched by the profiler","Profiling has started.","Profiler attached to the process.","Profiling has stopped."]} {"data":[{"nameIdx":"14","priority":"10","globalTid":"72057844756653436"}, {"nameIdx":"15","priority":"10","globalTid":"72057844756657940"},{"nameIdx":"24", "priority":"10","globalTid":"72057844756654400"}]} Note, that only last few lines are shown here for clarity and that carriage returns and indents were added to avoid wrapping documentation.", "keywords": []}, {"id": 102, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#launch-processes-in-stopped-state", "display_name": "Launch Processes in Stopped State", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "launch-processes-in-stopped-state", "priority": -1, "content": "In many cases, it is important to profile an application from the very beginning of its execution. When launching processes, Nsight Systems takes care of it by making sure that the profiling session is fully initialized before making the exec() system call on Linux. If the process launch capabilities of Nsight Systems are not sufficient, the application should be launched manually, and the profiler should be configured to attach to the already launched process. One approach would be to call sleep() somewhere early in the application code, which would provide time for the user to attach to the process in Nsight Systems Embedded Platforms Edition , but there are two other more convenient mechanisms that can be used on Linux, without the need to recompile the application. (Note that the rest of this section is only applicable to Linux-based target devices.) Both mechanisms ensure that between the time the process is created (and therefore its PID is known) and the time any of the application\u2019s code is called, the process is stopped and waits for a signal to be delivered before continuing.", "keywords": []}, {"id": 103, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#launcher", "display_name": "Launcher", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "launcher", "priority": -1, "content": "The second mechanism can be used with any binary. Use [installation_directory]/launcher to launch your application, for example: $ /opt/nvidia/nsight_systems/launcher ./my-binary --arguments The process will be launched, daemonized, and wait for SIGUSR1 signal. After attaching to the process with Nsight Systems , the user needs to manually resume execution of the process from command line: $ pkill -USR1 launcher Note that pkill will send the signal to any process with the matching name. If that is not desirable, use kill to send it to a specific process. The standard output and error streams are redirected to /tmp/stdout_<PID>.txt and /tmp/stderr_<PID>.txt . The launcher mechanism is more complex and less automated than the LD_PRELOAD option, but gives more control to the user.", "keywords": []}, {"id": 104, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#launching-gpu-metrics-from-the-cli", "display_name": "Launching GPU Metrics from the CLI", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "launching-gpu-metrics-from-the-cli", "priority": -1, "content": "GPU Metrics feature is controlled with 3 CLI switches: --gpu-metrics-device=[all, none, <index>] selects GPUs to sample (default is none) --gpu-metrics-set=[<index>, <alias>] selects metric set to use (default is the 1st suitable from the list) --gpu-metrics-frequency=[10..200000] selects sampling frequency in Hz (default is 10000) To profile with default options and sample GPU Metrics on GPU 0: # Must have elevated permissions (see https://developer.nvidia.com/ERR_NVGPUCTRPERM) or be root (Linux) or Administrator (Windows) $ nsys profile --gpu-metrics-device=0 ./my-app To list available GPUs, use: $ nsys profile --gpu-metrics-device=help Possible --gpu-metrics-device values are: 0: Quadro GV100 PCI[0000:17:00.0] 1: GeForce RTX 2070 SUPER PCI[0000:65:00.0] all: Select all supported GPUs none: Disable GPU Metrics [Default] By default, the first metric set which supports all selected GPUs is used. But you can manually select another metric set from the list. To see available metric sets, use: $ nsys profile --gpu-metrics-set=help Possible --gpu-metrics-set values are: [0] [tu10x] General Metrics for NVIDIA TU10x (any frequency) [1] [tu11x] General Metrics for NVIDIA TU11x (any frequency) [2] [ga100] General Metrics for NVIDIA GA100 (any frequency) [3] [ga10x] General Metrics for NVIDIA GA10x (any frequency) [4] [tu10x-gfxt] Graphics Throughput Metrics for NVIDIA TU10x (frequency >= 10kHz) [5] [ga10x-gfxt] Graphics Throughput Metrics for NVIDIA GA10x (frequency >= 10kHz) [6] [ga10x-gfxact] Graphics Async Compute Triage Metrics for NVIDIA GA10x (frequency >= 10kHz) By default, sampling frequency is set to 10 kHz. But you can manually set it from 10 Hz to 200 kHz using --gpu-metrics-frequency=<value>", "keywords": []}, {"id": 105, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#launching-gpu-metrics-from-the-gui", "display_name": "Launching GPU Metrics from the GUI", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "launching-gpu-metrics-from-the-gui", "priority": -1, "content": "For commands to launch GPU Metrics from the CLI with examples, see the CLI documentation . When launching analysis in Nsight Systems , select Collect GPU Metrics . Select the GPUs dropdown to pick which GPUs you wish to sample. Select the Metric set: dropdown to choose which available metric set you would like to sample. Note that metric sets for GPUs that are not being sampled will be greyed out.", "keywords": []}, {"id": 106, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#launching-processes", "display_name": "Launching Processes", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "launching-processes", "priority": -1, "content": "Nsight Systems can launch new processes for profiling on target devices. Profiler ensures that all environment variables are set correctly to successfully collect trace information The Edit arguments\u2026 link will open an editor window, where every command line argument is edited on a separate line. This is convenient when arguments contain spaces or quotes.", "keywords": []}, {"id": 107, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#launching-soc-metrics-from-the-cli", "display_name": "Launching SOC Metrics from the CLI", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "launching-soc-metrics-from-the-cli", "priority": -1, "content": "SOC Metrics feature is controlled with 3 CLI switches: --soc-metrics=[true, false] enables SOC Metrics sampling (default is false) --soc-metrics-set=[<alias>] selects metric set to use (default is the 1st suitable from the list) --soc-metrics-frequency=[100..200000] selects sampling frequency in Hz (default is 10000) To profile with default options: # Must have elevated permissions (see https://developer.nvidia.com/ERR_NVGPUCTRPERM) or be root $ nsys profile --soc-metrics=true ./my-app", "keywords": []}, {"id": 108, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#launching-soc-metrics-from-the-gui", "display_name": "Launching SOC Metrics from the GUI", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "launching-soc-metrics-from-the-gui", "priority": -1, "content": "When launching analysis in Nsight Systems , select Collect SOC Metrics . The settings are similar to GPU Metrics . For commands to launch SOC Metrics from the CLI with examples, see the CLI documentation .", "keywords": []}, {"id": 109, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#ld-preload", "display_name": "LD_PRELOAD", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "ld-preload", "priority": -1, "content": "The first mechanism uses LD_PRELOAD environment variable. It only works with dynamically linked binaries, since static binaries do not invoke the runtime linker, and therefore are not affected by the LD_PRELOAD environment variable. For ARMv7 binaries, preload /opt/nvidia/nsight_systems/libLauncher32.so Otherwise if running from host, preload /opt/nvidia/nsight_systems/libLauncher64.so Otherwise if running from CLI, preload [installation_directory]/libLauncher64.so The most common way to do that is to specify the environment variable as part of the process launch command, for example: $ LD_PRELOAD=/opt/nvidia/nsight_systems/libLauncher64.so ./my-aarch64-binary --arguments When loaded, this library will send itself a SIGSTOP signal, which is equivalent to typing Ctrl+Z in the terminal. The process is now a background job, and you can use standard commands like jobs, fg and bg to control them. Use jobs -l to see the PID of the launched process. When attaching to a stopped process, Nsight Systems will send SIGCONT signal, which is equivalent to using the bg command.", "keywords": []}, {"id": 110, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#limitations", "display_name": "Limitations", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "limitations", "priority": -1, "content": "Nsight Systems only traces syscall wrappers exposed by the C runtime. It is not able to trace syscall invoked through assembly code. Additional thread states, as well as backtrace collection on long calls, are only enabled if sampling is turned on. It is not possible to configure the depth and duration threshold when collecting backtraces. Currently, only OS runtime libraries calls longer than 80 \u03bcs will generate a backtrace with a maximum of 24 frames. This limitation will be removed in a future version of the product. It is required to compile your application and libraries with the -funwind-tables compiler flag in order for Nsight Systems to unwind the backtraces correctly.", "keywords": []}, {"id": 111, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#locking-a-resource", "display_name": "Locking a Resource", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "locking-a-resource", "priority": -1, "content": "The functions listed below receive a special treatment. If the tool detects that the resource is already acquired by another thread and will induce a blocking call, we always trace it. Otherwise, it will never be traced. pthread_mutex_lock pthread_rwlock_rdlock pthread_rwlock_wrlock pthread_spin_lock sem_wait Note that even if a call is determined as potentially blocking, there is a chance that it may not actually block after a few cycles have elapsed. The call will still be traced in this scenario.", "keywords": []}, {"id": 112, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#logging", "display_name": "Logging", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "logging", "priority": -1, "content": "To enable logging on the host, refer to this config file: host-linux-x64/nvlog.config.template When reporting any bugs please include the build version number as described in the Help \u2192 About dialog. If possible, attach log files and report ( .nsys-rep ) files, as they already contain necessary version information.", "keywords": []}, {"id": 113, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#mpi-api-trace", "display_name": "MPI API Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "mpi-api-trace", "priority": -1, "content": "For Linux x86_64, ARM and Power targets, Nsight Systems is capable of capturing information about the MPI APIs executed in the profiled process. It has built-in API trace support for Open MPI and MPICH based MPI implementations. Only a subset of the MPI API, including blocking and non-blocking point-to-point and collective communication, and file I/O operations, is traced. If you require more control over the list of traced APIs or if you are using a different MPI implementation, you can use the NVTX wrappers for MPI . If you set the environment variable LD_PRELOAD to the path of generated wrapper library, Nsight Systems will capture and report the MPI API trace information when NVTX tracing is enabled. Choose an NVTX domain name other than \u201cMPI\u201d, since it is filtered out by Nsight Systems when MPI tracing is not enabled. MPI Communication Parameters Nsight Systems can get additional information about MPI communication parameters. Currently, the parameters are only visible in the mouseover tooltips or in the eventlog. This means that the data is only available via the GUI. Future versions of the tool will export this information into the SQLite data files for postrun analysis. In order to fully interpret MPI communications, data for all ranks associated with a communication operation must be loaded into Nsight Systems . Here is an example of MPI_COMM_WORLD data. This does not require any additional team data, since local rank is the same as global rank. (Screenshot shows communication parameters for an MPI_Bcast call on rank 3) When not all processes that are involved in an MPI communication are loaded into Nsight Systems the following information is available. Right-hand screenshot shows a reused communicator handle (last number increased). Encoding: MPI_COMM[*team size*]*global-group-root-rank*.*group-ID* When all reports are loaded into Nsight Systems : World rank is shown in addition to group-local rank \u201c(world rank X)\u201d Encoding: MPI_COMM[*team size*]{rank0, rank1, \u2026} At most 8 ranks are shown (the numbers represent world ranks, the position in the list is the group-local rank) MPI functions traced: MPI_Init[_thread], MPI_Finalize MPI_Send, MPI_{B,S,R}send, MPI_Recv, MPI_Mrecv MPI_Sendrecv[_replace] MPI_Barrier, MPI_Bcast MPI_Scatter[v], MPI_Gather[v] MPI_Allgather[v], MPI_Alltoall[{v,w}] MPI_Allreduce, MPI_Reduce[_{scatter,scatter_block,local}] MPI_Scan, MPI_Exscan MPI_Isend, MPI_I{b,s,r}send, MPI_I[m]recv MPI_{Send,Bsend,Ssend,Rsend,Recv}_init MPI_Start[all] MPI_Ibarrier, MPI_Ibcast MPI_Iscatter[v], MPI_Igather[v] MPI_Iallgather[v], MPI_Ialltoall[{v,w}] MPI_Iallreduce, MPI_Ireduce[{scatter,scatter_block}] MPI_I[ex]scan MPI_Wait[{all,any,some}] MPI_Put, MPI_Rput, MPI_Get, MPI_Rget MPI_Accumulate, MPI_Raccumulate MPI_Get_accumulate, MPI_Rget_accumulate MPI_Fetch_and_op, MPI_Compare_and_swap MPI_Win_allocate[_shared] MPI_Win_create[_dynamic] MPI_Win_{attach, detach} MPI_Win_free MPI_Win_fence MPI_Win_{start, complete, post, wait} MPI_Win_[un]lock[_all] MPI_Win_flush[_local][_all] MPI_Win_sync MPI_File_{open,close,delete,sync} MPI_File_{read,write}[_{all,all_begin,all_end}] MPI_File_{read,write}_at[_{all,all_begin,all_end}] MPI_File_{read,write}_shared MPI_File_{read,write}_ordered[_{begin,end}] MPI_File_i{read,write}[_{all,at,at_all,shared}] MPI_File_set_{size,view,info} MPI_File_get_{size,view,info,group,amode} MPI_File_preallocate MPI_Pack[_external] MPI_Unpack[_external]", "keywords": []}, {"id": 114, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#multi-report-analysis", "display_name": "Multi-Report Analysis", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "multi-report-analysis", "priority": -1, "content": "PREVIEW FEATURE Nsight Systems Multi-Report Analysis is new functionality that is being added to the Nsight Systems tool to better support complex statistical analysis across multiple result files. Possible use cases for this functionality include: Multi-Node Analysis - When you run Nsight Systems across a cluster, it typically generates one result file per rank on the cluster. While you can load multiple result files into the GUI for visualization, this analysis system allows you to run statistical analysis across all of the result files. Multi-Pass Analysis - Some features in Nsight Systems cannot be run together due to overhead or hardware considerations. For example, there are frequently more CPU performance counters available than your CPU has registers. Using this analysis, you could run multiple runs with different sets of counters and then analyze the results together. Multi-Run Analysis - Sometimes you want to compare two runs that were not taken at the same time together. Perhaps you ran the tool on two different hardware configurations and want to see what changed. Perhaps you are doing regression testing or performance improvement analysis and want to check your status. Comparing those result files statistically can show patterns. Analysis Steps Prior to using multi-report analysis, please make sure that you have installed all required dependencies. See Installing Multi-Report Analysis System in the Installation Guide for more information. Generate the reports - Generate the reports as you always have, in fact, you can use reports that you have generated previously. Set up - Choose the recipe (See Available Recipes, below), give it any required parameters, and run. Launch Analysis - Nsight Systems will run the analysis, using your local system or Dask, as you have selected. Output - the output is an .nsys-analysis file, which can then be opened within the Nsight Systems GUI. View the data - depending on your recipe, you can have any number of visualizations, from simple tabular information to Jupyter notebooks which can be opened inside the GUI.", "keywords": []}, {"id": 115, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#network-communication-profiling", "display_name": "Network Communication Profiling", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "network-communication-profiling", "priority": -1, "content": "Nsight Systems can be used to profiles several popular network communication protocols. To enable this, please select the Communication profiling options dropdown. Then select the libraries you would like to trace:", "keywords": []}, {"id": 116, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#next-steps", "display_name": "Next Steps", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "next-steps", "priority": -1, "content": "NVIDIA Visual Profiler (NVVP) and NVIDIA nvprof are deprecated. New GPUs and features will not be supported by those tools. We encourage you to make the move to Nsight Systems now. For additional information, suggestions, and rationale, see the blog series in Other Resources .", "keywords": []}, {"id": 117, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#nic-metric-sampling", "display_name": "NIC Metric Sampling", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "nic-metric-sampling", "priority": -1, "content": "Overview NVIDIA ConnectX smart network interface cards (smart NICs) offer advanced hardware offloads and accelerations for network operations. Viewing smart NICs metrics, on Nsight Systems timeline, enables developers to better understand their application\u2019s network usage. Developers can use this information to optimize the application\u2019s performance. Limitations/Requirements NIC metric sampling supports NVIDIA ConnectX boards starting with ConnectX 5 NIC metric sampling is supported on Linux x86_64 and ARM Server (SBSA) machines only, having minimum Linux kernel 4.12 and minimum MLNX_OFED 4.1. You can download the latest and archived versions of the MLX_OFED driver from the MLNX_OFED Download Center . If collecting NIC metrics within a container, make sure that the container has access to the driver on the host machine. To check manually if OFED is installed and get its version you can run: /usr/bin/ofed_info cat /sys/module/"$(cat /proc/modules | grep -o -E "^mlx._core")"/version To check if the target system meets the requirements for NIC metrics collection you can run nsys status --network . Collecting NIC Metrics Using the Command Line To collect NIC performance metrics, using Nsight Systems CLI, add the --nic-metrics command line switch: nsys profile --nic-metrics=true my_app Available Metrics Bytes sent - Number of bytes sent through all NIC ports. Bytes received - Number of bytes received by all NIC ports. CNPs sent - Number of congestion notification packets sent by the NIC. CNPs received - Number of congestion notification packets received and handled by the NIC. Send waits - The number of ticks during which ports had data to transmit but no data was sent during the entire tick (either because of insufficient credits or because of lack of arbitration) Note: Each one of the mentioned metrics is shown only if it has non-zero value during profiling. Usage Examples The Bytes sent/sec and the Bytes received/sec metrics enables identifying idle and busy NIC times. Developers may shift network operations from busy to idle times to reduce network congestion and latency. Developers can use idle NIC times to send additional data without reducing application performance. CNPs (congestion notification packets) received/sent and Send waits metrics may explain network latencies. A developer seeing the time periods when the network was congested may rewrite his algorithm to avoid the observed congestions. RDMA over Converged Ethernet (RoCE) traffic is not logged into the Nsight Systems NIC metrics.", "keywords": []}, {"id": 118, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#nv-decoder-api-functions-traced-by-default", "display_name": "NV Decoder API Functions Traced by Default", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "nv-decoder-api-functions-traced-by-default", "priority": -1, "content": "cuvidCreateVideoSource cuvidCreateVideoSourceW cuvidDestroyVideoSource cuvidSetVideoSourceState cudaVideoState cuvidGetSourceVideoFormat cuvidGetSourceAudioFormat cuvidCreateVideoParser cuvidParseVideoData cuvidDestroyVideoParser cuvidCreateDecoder cuvidDestroyDecoder cuvidDecodePicture cuvidGetDecodeStatus cuvidReconfigureDecoder cuvidMapVideoFrame cuvidUnmapVideoFrame cuvidMapVideoFrame64 cuvidUnmapVideoFrame64 cuvidCtxLockCreate cuvidCtxLockDestroy cuvidCtxLock cuvidCtxUnlock", "keywords": []}, {"id": 119, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#nv-encoder-api-functions-traced-by-default", "display_name": "NV Encoder API Functions Traced by Default", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "nv-encoder-api-functions-traced-by-default", "priority": -1, "content": "NvEncodeAPICreateInstance nvEncOpenEncodeSession nvEncGetEncodeGUIDCount nvEncGetEncodeGUIDs nvEncGetEncodeProfileGUIDCount nvEncGetEncodeProfileGUIDs nvEncGetInputFormatCount nvEncGetInputFormats nvEncGetEncodeCaps nvEncGetEncodePresetCount nvEncGetEncodePresetGUIDs nvEncGetEncodePresetConfig nvEncGetEncodePresetConfigEx nvEncInitializeEncoder nvEncCreateInputBuffer nvEncDestroyInputBuffer nvEncCreateBitstreamBuffer nvEncDestroyBitstreamBuffer nvEncEncodePicture nvEncLockBitstream nvEncUnlockBitstream nvEncLockInputBuffer nvEncUnlockInputBuffer nvEncGetEncodeStats nvEndGetSequenceParams nvEncRegisterAsyncEvent nvEncUnregisterAsyncEvent nvEncMapInputResource nvEncUnmapInputResource nvEncDestroyEncoder nvEncInvalidateRefFrames nvEncOpenEncodeSessionEx nvEncRegisterResource nvEncUnregisterResource nvEncReconfigureEncoder nvEncCreateMVBuffer nvEncDestroyMVBuffer nvEncRunMotionEstimationOnly nvEncGetLastErrorString nvEncSetIOCudaStreams nvEncGetSequenceParamEx", "keywords": []}, {"id": 120, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#nv-jpeg-api-functions-traced-by-default", "display_name": "NV JPEG API Functions Traced by Default", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "nv-jpeg-api-functions-traced-by-default", "priority": -1, "content": "nvjpegBufferDeviceCreate nvjpegBufferDeviceDestroy nvjpegBufferDeviceRetrieve nvjpegBufferPinnedCreate nvjpegBufferPinnedDestroy nvjpegBufferPinnedRetrieve nvjpegCreate nvjpegCreateEx nvjpegCreateSimple nvjpegDecode nvjpegDecodeBatched nvjpegDecodeBatchedEx nvjpegDecodeBatchedInitialize nvjpegDecodeBatchedPreAllocate nvjpegDecodeBatchedSupported nvjpegDecodeBatchedSupportedEx nvjpegDecodeJpeg nvjpegDecodeJpegDevice nvjpegDecodeJpegHost nvjpegDecodeJpegTransferToDevice nvjpegDecodeParamsCreate nvjpegDecodeParamsDestroy nvjpegDecodeParamsSetAllowCMYK nvjpegDecodeParamsSetOutputFormat nvjpegDecodeParamsSetROI nvjpegDecodeParamsSetScaleFactor nvjpegDecoderCreate nvjpegDecoderDestroy nvjpegDecoderJpegSupported nvjpegDecoderStateCreate nvjpegDestroy nvjpegEncodeGetBufferSize nvjpegEncodeImage nvjpegEncodeRetrieveBitstream nvjpegEncodeRetrieveBitstreamDevice nvjpegEncoderParamsCopyHuffmanTables nvjpegEncoderParamsCopyMetadata nvjpegEncoderParamsCopyQuantizationTables nvjpegEncoderParamsCreate nvjpegEncoderParamsDestroy nvjpegEncoderParamsSetEncoding nvjpegEncoderParamsSetOptimizedHuffman nvjpegEncoderParamsSetQuality nvjpegEncoderParamsSetSamplingFactors nvjpegEncoderStateCreate nvjpegEncoderStateDestroy nvjpegEncodeYUV,(nvjpegHandle_t handle nvjpegGetCudartProperty nvjpegGetDeviceMemoryPadding nvjpegGetImageInfo nvjpegGetPinnedMemoryPadding nvjpegGetProperty nvjpegJpegStateCreate nvjpegJpegStateDestroy nvjpegJpegStreamCreate nvjpegJpegStreamDestroy nvjpegJpegStreamGetChromaSubsampling nvjpegJpegStreamGetComponentDimensions nvjpegJpegStreamGetComponentsNum nvjpegJpegStreamGetFrameDimensions nvjpegJpegStreamGetJpegEncoding nvjpegJpegStreamParse nvjpegJpegStreamParseHeader nvjpegSetDeviceMemoryPadding nvjpegSetPinnedMemoryPadding nvjpegStateAttachDeviceBuffer nvjpegStateAttachPinnedBuffer", "keywords": []}, {"id": 121, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#nvidia-nvshmem-and-nccl-trace", "display_name": "NVIDIA NVSHMEM and NCCL Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "nvidia-nvshmem-and-nccl-trace", "priority": -1, "content": "The NVIDIA network communication libraries NVSHMEM and NCCL have been instrumented using NVTX annotations. To enable tracing these libraries in Nsight Systems , turn on NVTX tracing in the GUI or CLI. To enable the NVTX instrumentation of the NVSHMEM library, make sure that the environment variable NVSHMEM_NVTX is set properly, e.g. NVSHMEM_NVTX=common .", "keywords": []}, {"id": 122, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#nvidia-video-codec-sdk-trace", "display_name": "NVIDIA Video Codec SDK Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "nvidia-video-codec-sdk-trace", "priority": -1, "content": "Nsight Systems for x86 Linux and Windows targets can trace calls from the NV Video Codec SDK. This software trace can be launched from the GUI or using the --trace nvvideo from the CLI On the timeline, calls on the CPU to the NV Encoder API and NV Decoder API will be shown.", "keywords": []}, {"id": 123, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#nvtx-trace", "display_name": "NVTX Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "nvtx-trace", "priority": -1, "content": "The NVIDIA Tools Extension Library (NVTX) is a powerful mechanism that allows users to manually instrument their application. Nsight Systems can then collect the information and present it on the timeline. Nsight Systems supports version 3.0 of the NVTX specification. The following features are supported: Domains nvtxDomainCreate(), nvtxDomainDestroy() nvtxDomainRegisterString() Push-pop ranges (nested ranges that start and end in the same thread). nvtxRangePush(), nvtxRangePushEx() nvtxRangePop() nvtxDomainRangePushEx() nvtxDomainRangePop() Start-end ranges (ranges that are global to the process and are not restricted to a single thread) nvtxRangeStart(), nvtxRangeStartEx() nvtxRangeEnd() nvtxDomainRangeStartEx() nvtxDomainRangeEnd() Marks nvtxMark(), nvtxMarkEx() nvtxDomainMarkEx() Thread names nvtxNameOsThread() Categories nvtxNameCategory() nvtxDomainNameCategory() To learn more about specific features of NVTX, please refer to the NVTX header file: nvToolsExt.h or the NVTX documentation . To use NVTX in your application, follow these steps: Add #include "nvtx3/nvToolsExt.h" in your source code. The nvtx3 directory is located in the Nsight Systems package in the Target-<architecture>/nvtx/include directory and is available via github at http://github.com/NVIDIA/NVTX . Add the following compiler flag: -ldl Add calls to the NVTX API functions. For example, try adding nvtxRangePush("main") in the beginning of the main() function, and nvtxRangePop() just before the return statement in the end. For convenience in C++ code, consider adding a wrapper that implements RAII (resource acquisition is initialization) pattern, which would guarantee that every range gets closed. In the project settings, select the Collect NVTX trace checkbox. In addition, by enabling the \u201cInsert NVTX Marker hotkey\u201d option it is possible to add NVTX markers to a running non-console applications by pressing the F11 key. These will appear in the report under the NVTX Domain named \u201cHotKey markers\u201d. Typically calls to NVTX functions can be left in the source code even if the application is not being built for profiling purposes, since the overhead is very low when the profiler is not attached. NVTX is not intended to annotate very small pieces of code that are being called very frequently. A good rule of thumb to use: if code being annotated usually takes less than 1 microsecond to execute, adding an NVTX range around this code should be done carefully. Range annotations should be matched carefully. If many ranges are opened but not closed, Nsight Systems has no meaningful way to visualize it. A rule of thumb is to not have more than a couple dozen ranges open at any point in time. Nsight Systems does not support reports with many unclosed ranges. NVTX Domains and Categories NVTX domains enable scoping of annotations. Unless specified differently, all events and annotations are in the default domain. Additionally, categories can be used to group events. Nsight Systems gives the user the ability to include or exclude NVTX events from a particular domain. This can be especially useful if you are profiling across multiple libraries and are only interested in nvtx events from some of them. This functionality is also available from the CLI. See the CLI documentation for --nvtx-domain-include and --nvtx-domain-exclude for more details. Categories that are set in by the user will be recognized and displayed in the GUI.", "keywords": []}, {"id": 124, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#openacc-trace", "display_name": "OpenACC Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "openacc-trace", "priority": -1, "content": "Nsight Systems for Linux x86_64 and Power targets is capable of capturing information about OpenACC execution in the profiled process. OpenACC versions 2.0, 2.5, and 2.6 are supported when using PGI runtime version 15.7 or later. In order to differentiate constructs (see tooltip below), a PGI runtime of 16.0 or later is required. Note that Nsight Systems does not support the GCC implementation of OpenACC at this time. Under the CPU rows in the timeline tree, each thread that uses OpenACC will show OpenACC trace information. You can click on a OpenACC API call to see correlation with the underlying CUDA API calls (highlighted in teal): If the OpenACC API results in GPU work, that will also be highlighted: Hovering over a particular OpenACC construct will bring up a tooltip with details about that construct: To capture OpenACC information from the Nsight Systems GUI, select the Collect OpenACC trace checkbox under Collect CUDA trace configurations. Note that turning on OpenACC tracing will also turn on CUDA tracing. Please note that if your application crashes before all collected OpenACC trace data has been copied out, some or all data might be lost and not present in the report.", "keywords": []}, {"id": 125, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#opengl-trace", "display_name": "OpenGL Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "opengl-trace", "priority": -1, "content": "OpenGL and OpenGL ES APIs can be traced to assist in the analysis of CPU and GPU interactions. A few usage examples are: Visualize how long eglSwapBuffers (or similar) is taking. API trace can easily show correlations between thread state and graphics driver\u2019s behavior, uncovering where the CPU may be waiting on the GPU. Spot bubbles of opportunity on the GPU, where more GPU workload could be created. Use KHR_debug extension to trace GL events on both the CPU and GPU. OpenGL trace feature in Nsight Systems consists of two different activities which will be shown in the CPU rows for those threads CPU trace : interception of API calls that an application does to APIs (such as OpenGL, OpenGL ES, EGL, GLX, WGL, etc.). GPU trace (or workload trace ): trace of GPU workload (activity) triggered by use of OpenGL or OpenGL ES. Since draw calls are executed back-to-back, the GPU workload trace ranges include many OpenGL draw calls and operations in order to optimize performance overhead, rather than tracing each individual operation. To collect GPU trace, the glQueryCounter() function is used to measure how much time batches of GPU workload take to complete. Ranges defined by the KHR_debug calls are represented similarly to OpenGL API and OpenGL GPU workload trace. GPU ranges in this case represent incremental draw cost . They cannot fully account for GPUs that can execute multiple draw calls in parallel. In this case, Nsight Systems will not show overlapping GPU ranges.", "keywords": []}, {"id": 126, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#opengl-trace-using-command-line", "display_name": "OpenGL Trace Using Command Line", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "opengl-trace-using-command-line", "priority": -1, "content": "For general information on using the target CLI, see CLI Profiling on Linux . For the CLI, the functions that are traced are set to the following list: glWaitSync glReadPixels glReadnPixelsKHR glReadnPixelsEXT glReadnPixelsARB glReadnPixels glFlush glFinishFenceNV glFinish glClientWaitSync glClearTexSubImage glClearTexImage glClearStencil glClearNamedFramebufferuiv glClearNamedFramebufferiv glClearNamedFramebufferfv glClearNamedFramebufferfi glClearNamedBufferSubDataEXT glClearNamedBufferSubData glClearNamedBufferDataEXT glClearNamedBufferData glClearIndex glClearDepthx glClearDepthf glClearDepthdNV glClearDepth glClearColorx glClearColorIuiEXT glClearColorIiEXT glClearColor glClearBufferuiv glClearBufferSubData glClearBufferiv glClearBufferfv glClearBufferfi glClearBufferData glClearAccum glClear glDispatchComputeIndirect glDispatchComputeGroupSizeARB glDispatchCompute glComputeStreamNV glNamedFramebufferDrawBuffers glNamedFramebufferDrawBuffer glMultiDrawElementsIndirectEXT glMultiDrawElementsIndirectCountARB glMultiDrawElementsIndirectBindlessNV glMultiDrawElementsIndirectBindlessCountNV glMultiDrawElementsIndirectAMD glMultiDrawElementsIndirect glMultiDrawElementsEXT glMultiDrawElementsBaseVertex glMultiDrawElements glMultiDrawArraysIndirectEXT glMultiDrawArraysIndirectCountARB glMultiDrawArraysIndirectBindlessNV glMultiDrawArraysIndirectBindlessCountNV glMultiDrawArraysIndirectAMD glMultiDrawArraysIndirect glMultiDrawArraysEXT glMultiDrawArrays glListDrawCommandsStatesClientNV glFramebufferDrawBuffersEXT glFramebufferDrawBufferEXT glDrawTransformFeedbackStreamInstanced glDrawTransformFeedbackStream glDrawTransformFeedbackNV glDrawTransformFeedbackInstancedEXT glDrawTransformFeedbackInstanced glDrawTransformFeedbackEXT glDrawTransformFeedback glDrawTexxvOES glDrawTexxOES glDrawTextureNV glDrawTexsvOES glDrawTexsOES glDrawTexivOES glDrawTexiOES glDrawTexfvOES glDrawTexfOES glDrawRangeElementsEXT glDrawRangeElementsBaseVertexOES glDrawRangeElementsBaseVertexEXT glDrawRangeElementsBaseVertex glDrawRangeElements glDrawPixels glDrawElementsInstancedNV glDrawElementsInstancedEXT glDrawElementsInstancedBaseVertexOES glDrawElementsInstancedBaseVertexEXT glDrawElementsInstancedBaseVertexBaseInstanceEXT glDrawElementsInstancedBaseVertexBaseInstance glDrawElementsInstancedBaseVertex glDrawElementsInstancedBaseInstanceEXT glDrawElementsInstancedBaseInstance glDrawElementsInstancedARB glDrawElementsInstanced glDrawElementsIndirect glDrawElementsBaseVertexOES glDrawElementsBaseVertexEXT glDrawElementsBaseVertex glDrawElements glDrawCommandsStatesNV glDrawCommandsStatesAddressNV glDrawCommandsNV glDrawCommandsAddressNV glDrawBuffersNV glDrawBuffersATI glDrawBuffersARB glDrawBuffers glDrawBuffer glDrawArraysInstancedNV glDrawArraysInstancedEXT glDrawArraysInstancedBaseInstanceEXT glDrawArraysInstancedBaseInstance glDrawArraysInstancedARB glDrawArraysInstanced glDrawArraysIndirect glDrawArraysEXT glDrawArrays eglSwapBuffersWithDamageKHR eglSwapBuffers glXSwapBuffers glXQueryDrawable glXGetCurrentReadDrawable glXGetCurrentDrawable glGetQueryObjectuivEXT glGetQueryObjectuivARB glGetQueryObjectuiv glGetQueryObjectivARB glGetQueryObjectiv", "keywords": []}, {"id": 127, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#opening-an-existing-report", "display_name": "Opening an Existing Report", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "opening-an-existing-report", "priority": -1, "content": "An existing .nsys-rep file can be opened using File > Open\u2026 .", "keywords": []}, {"id": 128, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#opening-in-jupyter-notebook", "display_name": "Opening in Jupyter Notebook", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "opening-in-jupyter-notebook", "priority": -1, "content": "Running the recipe command creates a new analysis file (.nsys-analysis). Open the Nsight Systems GUI and select File->Open , and pick your file. Open the folder icon and click on the notebook icon to open the Jupyter notebook. Run the Jupyter notebook: And the output appears on-screen. In this case a heat map of activity running a Jacobi solver.", "keywords": []}, {"id": 129, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#openmp-trace", "display_name": "OpenMP Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "openmp-trace", "priority": -1, "content": "Nsight Systems for Linux is capable of capturing information about OpenMP events. This functionality is built on the OpenMP Tools Interface (OMPT), full support is available only for runtime libraries supporting tools interface defined in OpenMP 5.0 or greater. As an example, LLVM OpenMP runtime library partially implements tools interface. If you use PGI compiler <= 20.4 to build your OpenMP applications, add -mp=libomp switch to use LLVM OpenMP runtime and enable OMPT based tracing. If you use Clang, make sure the LLVM OpenMP runtime library you link to was compiled with tools interface enabled. Only a subset of the OMPT callbacks are processed: ompt_callback_parallel_begin ompt_callback_parallel_end ompt_callback_sync_region ompt_callback_task_create ompt_callback_task_schedule ompt_callback_implicit_task ompt_callback_master ompt_callback_reduction ompt_callback_task_create ompt_callback_cancel ompt_callback_mutex_acquire, ompt_callback_mutex_acquired ompt_callback_mutex_acquired, ompt_callback_mutex_released ompt_callback_mutex_released ompt_callback_work ompt_callback_dispatch ompt_callback_flush The raw OMPT events are used to generate ranges indicating the runtime of OpenMP operations and constructs. Example screenshot:", "keywords": []}, {"id": 130, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#openshmem-library-trace", "display_name": "OpenSHMEM Library Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "openshmem-library-trace", "priority": -1, "content": "If OpenSHMEM library trace is selected Nsight Systems will trace the subset of OpenSHMEM API functions that are most likely be involved in performance bottlenecks. To keep overhead low Nsight Systems does not trace all functions. OpenSHMEM 1.5 Functions Not Traced shmem_my_pe shmem_n_pes shmem_global_exit shmem_pe_accessible shmem_addr_accessible shmem_ctx_{create,destroy,get_team} shmem_global_exit shmem_info_get_{version,name} shmem_{my_pe,n_pes,pe_accessible,ptr} shmem_query_thread shmem_team_{create_ctx,destroy} shmem_team_get_config shmem_team_{my_pe,n_pes,translate_pe} shmem_team_split_{2d,strided} shmem_test*", "keywords": []}, {"id": 131, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#os-runtime-default-function-list", "display_name": "OS Runtime Default Function List", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "os-runtime-default-function-list", "priority": -1, "content": "Libc system call wrappers accept accept4 acct alarm arch_prctl bind bpf brk chroot clock_nanosleep connect copy_file_range creat creat64 dup dup2 dup3 epoll_ctl epoll_pwait epoll_wait fallocate fallocate64 fcntl fdatasync flock fork fsync ftruncate futex ioctl ioperm iopl kill killpg listen membarrier mlock mlock2 mlockall mmap mmap64 mount move_pages mprotect mq_notify mq_open mq_receive mq_send mq_timedreceive mq_timedsend mremap msgctl msgget msgrcv msgsnd msync munmap nanosleep nfsservctl open open64 openat openat64 pause pipe pipe2 pivot_root poll ppoll prctl pread pread64 preadv preadv2 preadv64 process_vm_readv process_vm_writev pselect6 ptrace pwrite pwrite64 pwritev pwritev2 pwritev64 read readv reboot recv recvfrom recvmmsg recvmsg rt_sigaction rt_sigqueueinfo rt_sigsuspend rt_sigtimedwait sched_yield seccomp select semctl semget semop semtimedop send sendfile sendfile64 sendmmsg sendmsg sendto shmat shmctl shmdt shmget shutdown sigaction sigsuspend sigtimedwait socket socketpair splice swapoff swapon sync sync_file_range syncfs tee tgkill tgsigqueueinfo tkill truncate umount2 unshare uselib vfork vhangup vmsplice wait wait3 wait4 waitid waitpid write writev _sysctl POSIX Threads pthread_barrier_wait pthread_cancel pthread_cond_broadcast pthread_cond_signal pthread_cond_timedwait pthread_cond_wait pthread_create pthread_join pthread_kill pthread_mutex_lock pthread_mutex_timedlock pthread_mutex_trylock pthread_rwlock_rdlock pthread_rwlock_timedrdlock pthread_rwlock_timedwrlock pthread_rwlock_tryrdlock pthread_rwlock_trywrlock pthread_rwlock_wrlock pthread_spin_lock pthread_spin_trylock pthread_timedjoin_np pthread_tryjoin_np pthread_yield sem_timedwait sem_trywait sem_wait I/O aio_fsync aio_fsync64 aio_suspend aio_suspend64 fclose fcloseall fflush fflush_unlocked fgetc fgetc_unlocked fgets fgets_unlocked fgetwc fgetwc_unlocked fgetws fgetws_unlocked flockfile fopen fopen64 fputc fputc_unlocked fputs fputs_unlocked fputwc fputwc_unlocked fputws fputws_unlocked fread fread_unlocked freopen freopen64 ftrylockfile fwrite fwrite_unlocked getc getc_unlocked getdelim getline getw getwc getwc_unlocked lockf lockf64 mkfifo mkfifoat posix_fallocate posix_fallocate64 putc putc_unlocked putwc putwc_unlocked Miscellaneous forkpty popen posix_spawn posix_spawnp sigwait sigwaitinfo sleep system usleep", "keywords": []}, {"id": 132, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#os-runtime-libraries-trace", "display_name": "OS Runtime Libraries Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "os-runtime-libraries-trace", "priority": -1, "content": "On Linux, OS runtime libraries can be traced to gather information about low-level userspace APIs. This traces the system call wrappers and thread synchronization interfaces exposed by the C runtime and POSIX Threads (pthread) libraries. This does not perform a complete runtime library API trace, but instead focuses on the functions that can take a long time to execute, or could potentially cause your thread be unscheduled from the CPU while waiting for an event to complete. OS runtime trace is not available for Windows targets. OS runtime tracing complements and enhances sampling information by: Visualizing when the process is communicating with the hardware, controlling resources, performing multi-threading synchronization or interacting with the kernel scheduler. Adding additional thread states by correlating how OS runtime libraries traces affect the thread scheduling: Waiting \u2014 the thread is not scheduled on a CPU, it is inside of an OS runtime libraries trace and is believed to be waiting on the firmware to complete a request. In OS runtime library function \u2014 the thread is scheduled on a CPU and inside of an OS runtime libraries trace. If the trace represents a system call, the process is likely running in kernel mode. Collecting backtraces for long OS runtime libraries call. This provides a way to gather blocked-state backtraces, allowing you to gain more context about why the thread was blocked so long, yet avoiding unnecessary overhead for short events. To enable OS runtime libraries tracing from Nsight Systems : CLI \u2014 Use the -t , --trace option with the osrt parameter. See Command Line Options for more information. GUI \u2014 Select the Collect OS runtime libraries trace checkbox. You can also use Skip if shorter than . This will skip calls shorter than the given threshold. Enabling this option will improve performances as well as reduce noise on the timeline. We strongly encourage you to skip OS runtime libraries call shorter than 1 \u03bcs.", "keywords": []}, {"id": 133, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#os-runtime-libraries-trace-filters", "display_name": "OS Runtime Libraries Trace Filters", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "os-runtime-libraries-trace-filters", "priority": -1, "content": "The OS runtime libraries tracing is limited to a select list of functions. It also depends on the version of the C runtime linked to the application.", "keywords": []}, {"id": 134, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#other-platforms-or-if-the-previous-steps-did-not-help", "display_name": "Other platforms, or if the previous steps did not help", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "other-platforms-or-if-the-previous-steps-did-not-help", "priority": -1, "content": "Launch Nsight Systems using the following command line to determine which libraries are missing and install them. $ QT_DEBUG_PLUGINS=1 ./nsys-ui If the workload does not run when launched via Nsight Systems or the timeline is empty, check the stderr.log and stdout.log (click on drop-down menu showing Timeline View and click on Files ) to see the errors encountered by the app.", "keywords": []}, {"id": 135, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#other-resources", "display_name": "Other Resources", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "other-resources", "priority": -1, "content": "Looking for information to help you use Nsight Systems the most effectively? Here are some more resources you might want to review:", "keywords": []}, {"id": 136, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#overview", "display_name": "Overview", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "overview", "priority": -1, "content": "GPU Metrics feature is intended to identify performance limiters in applications using GPU for computations and graphics. It uses periodic sampling to gather performance metrics and detailed timing statistics associated with different GPU hardware units taking advantage of specialized hardware to capture this data in a single pass with minimal overhead. Note: GPU Metrics will give you precise device level information, but it does not know which process or context is involved. GPU context switch trace provides less precise information, but will give you process and context information. These metrics provide an overview of GPU efficiency over time within compute, graphics, and input/output (IO) activities such as: IO throughputs: PCIe, NVLink, and GPU memory bandwidth SM utilization: SMs activity, tensor core activity, instructions issued, warp occupancy, and unassigned warp slots It is designed to help users answer the common questions: Is my GPU idle? Is my GPU full? Enough kernel grids size and streams? Are my SMs and warp slots full? Am I using TensorCores? Is my instruction rate high? Am I possibly blocked on IO, or number of warps, etc Nsight Systems GPU Metrics is only available for Linux targets on x86-64 and aarch64, and for Windows targets. It requires NVIDIA Turing architecture or newer. Minimum required driver versions: NVIDIA Turing architecture TU10x, TU11x - r440 NVIDIA Ampere architecture GA100 - r450 NVIDIA Ampere architecture GA100 MIG - r470 TRD1 NVIDIA Ampere architecture GA10x - r455 Permissions: Elevated permissions are required. On Linux use sudo to elevate privileges. On Windows the user must run from an admin command prompt or accept the UAC escalation dialog. See Permissions Issues and Performance Counters for more information. Tensor Core: If you run nsys profile --gpu-metrics-device all , the Tensor Core utilization can be found in the GUI under the SM instructions/Tensor Active row. Please note that it is not practical to expect a CUDA kernel to reach 100% Tensor Core utilization since there are other overheads. In general, the more computation-intensive an operation is, the higher Tensor Core utilization rate the CUDA kernel can achieve.", "keywords": []}, {"id": 137, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#pipeline-creation-feedback", "display_name": "Pipeline Creation Feedback", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "pipeline-creation-feedback", "priority": -1, "content": "When tracing target application calls to Vulkan pipeline creation APIs, Nsight Systems leverages the Pipeline Creation Feedback extension to collect more details about the duration of individual pipeline creation stages. See Pipeline Creation Feedback extension for details about this extension. Vulkan pipeline creation feedback is available on NVIDIA driver release 435 or later.", "keywords": []}, {"id": 138, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#post-collection-analysis", "display_name": "Post-Collection Analysis", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "post-collection-analysis", "priority": -1, "content": "Once you have profiled using Nsight Systems there are many options for analyzing the collected data as well as to output it in various formats. These options are available from the CLI or the GUI", "keywords": []}, {"id": 139, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#profiling-qnx-targets-from-the-gui", "display_name": "Profiling QNX Targets from the GUI", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "profiling-qnx-targets-from-the-gui", "priority": -1, "content": "Profiling on QNX devices is similar to the profiling on Linux devices. Please refer to the Profiling Linux Targets from the GUI section for the detailed documentation. The major differences on the platforms are listed below: Backtrace sampling is not supported. Instead backtraces are collected for long OS runtime libraries calls. Please refer to the OS Runtime Libraries Trace section for the detailed documentation. CUDA support is limited to CUDA 9.0+ Filesystem on QNX device might be mounted read-only. In that case Nsight Systems is not able to install target-side binaries, required to run the profiling session. Please make sure that target filesystem is writable before connecting to QNX target. For example, make sure the following command works: echo XX > /xx && ls -l /xx", "keywords": []}, {"id": 140, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#profiling-windows-targets-from-the-gui", "display_name": "Profiling Windows Targets from the GUI", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "profiling-windows-targets-from-the-gui", "priority": -1, "content": "Profiling on Windows devices is similar to the profiling on Linux devices. Please refer to the Profiling Linux Targets from the GUI section for the detailed documentation and connection information. The major differences on the platforms are listed below:", "keywords": []}, {"id": 141, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#python-backtrace-sampling", "display_name": "Python Backtrace Sampling", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "python-backtrace-sampling", "priority": -1, "content": "Nsight Systems for Arm server (SBSA) platforms, x86 Linux and Windows targets, is capable of periodically capturing Python backtrace information. This functionality is available when tracing Python interpreters of version 3.9 or later. Capturing python backtrace is done in periodic samples, in a selected frequency ranging from 1Hz - 2KHz with a default value of 1KHz. Note that this feature provides meaningful backtraces for Python processes, when profiling Python-only workflows, consider disabling the CPU sampling option to reduce overhead. To enable Python backtrace sampling from Nsight Systems : CLI \u2014 Set --python-sampling=true and use the --python-sampling-frequency option to set the sampling rate. GUI \u2014 Select the Collect Python backtrace samples checkbox. Example screenshot:", "keywords": []}, {"id": 142, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#python-gil-tracing", "display_name": "Python GIL Tracing", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "python-gil-tracing", "priority": -1, "content": "Nsight Systems for Arm server (SBSA) platforms, x86 Linux and Windows targets, is capable of tracing when Python threads are waiting to hold and holding the GIL (Global Interpreter Lock). The Python source code does not require any changes. This feature requires CPython interpreter, release 3.9 or later. CLI \u2014 Set --trace=python-gil . GUI \u2014 Select the Trace GIL checkbox under Python profiling options . Example screenshot:", "keywords": []}, {"id": 143, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#python-nvtx-annotations", "display_name": "Python NVTX Annotations", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "python-nvtx-annotations", "priority": -1, "content": "Nsight Systems for Arm server (SBSA) platforms, x86 Linux and Windows targets, is capable of using NVTX to annotate Python functions. The Python source code does not require any changes. This feature requires CPython interpreter, release 3.8 or later. The annotations are configured in a JSON file. An example file is located in Nsight Systems installation folder in <target-platform-folder>/PythonNvtx/annotations.json . Notes: Annotating function from module __main__ is not supported. To enable Python NVTX annotations from Nsight Systems : CLI \u2014 Set --python-nvtx-annotations=<json_file> . GUI \u2014 Select the Python NVTX annotations checkbox and specify the JSON file. Example screenshot:", "keywords": []}, {"id": 144, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#remoting-to-a-windows-based-machine", "display_name": "Remoting to a Windows Based Machine", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "remoting-to-a-windows-based-machine", "priority": -1, "content": "To perform remote profiling to a target Windows based machines, install and configure an OpenSSH Server on the target machine.", "keywords": []}, {"id": 145, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#report-formatters-shipped-with-product-name", "display_name": "Report Formatters Shipped With Nsight Systems", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "report-formatters-shipped-with-product-name", "priority": -1, "content": "The following formats are available in Nsight Systems", "keywords": []}, {"id": 146, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#report-tab", "display_name": "Report Tab", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "report-tab", "priority": -1, "content": "While generating a new report or loading an existing one, a new tab will be created. The most important parts of the report tab are: View selector \u2014 Allows switching between Analysis Summary , Timeline View , Diagnostics Summary , and Symbol Resolution Logs views. Timeline \u2014 This is where all charts are displayed. Function table \u2014 Located below the timeline, it displays statistical information about functions in the target application in multiple ways. Additionally, the following controls are available: Zoom slider \u2014 Allows you to vertically zoom the charts on the timeline.", "keywords": []}, {"id": 147, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#sampling-frequency", "display_name": "Sampling frequency", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "sampling-frequency", "priority": -1, "content": "Sampling frequency can be selected from the range of 10 Hz - 200 kHz. The default value is 10 kHz. The maximum sampling frequency without buffer overflow events depends on GPU (SM count), GPU load intensity, and overall system load. The bigger the chip and the higher the load, the lower the maximum frequency. If you need higher frequency, you can increase it until you get \u201cBuffer overflow\u201d message in the Diagnostics Summary report page. Each metric set has a recommended sampling frequency range in its description. These ranges are approximate. If you observe Inconsistent Data or Missing Data ranges on timeline, please try closer to the recommended frequency.", "keywords": []}, {"id": 148, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#sharing-a-report-file", "display_name": "Sharing a Report File", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "sharing-a-report-file", "priority": -1, "content": "Report files ( .nsys-rep ) are self-contained and can be shared with other users of Nsight Systems . The only requirement is that the same or newer version of Nsight Systems is always used to open report files. Project files ( .qdproj ) are currently not shareable, since they contain full paths to the report files. To quickly navigate to the directory containing the report file, right click on it in the Project Explorer, and choose Show in folder\u2026 in the context menu.", "keywords": []}, {"id": 149, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#sqlite-schema-event-values", "display_name": "SQLite Schema Event Values", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "sqlite-schema-event-values", "priority": -1, "content": "Here are the set values stored in enums in the Nsight Systems SQLite schema CUDA Event Class Values 0 - TRACE_PROCESS_EVENT_CUDA_RUNTIME 1 - TRACE_PROCESS_EVENT_CUDA_DRIVER 13 - TRACE_PROCESS_EVENT_CUDA_EGL_DRIVER 28 - TRACE_PROCESS_EVENT_CUDNN 29 - TRACE_PROCESS_EVENT_CUBLAS 33 - TRACE_PROCESS_EVENT_CUDNN_START 34 - TRACE_PROCESS_EVENT_CUDNN_FINISH 35 - TRACE_PROCESS_EVENT_CUBLAS_START 36 - TRACE_PROCESS_EVENT_CUBLAS_FINISH 67 - TRACE_PROCESS_EVENT_CUDABACKTRACE 77 - TRACE_PROCESS_EVENT_CUDA_GRAPH_NODE_CREATION See CUPTI documentation for detailed information on collected event and data types. NVTX Event Type Values 33 - NvtxCategory 34 - NvtxMark 39 - NvtxThread 59 - NvtxPushPopRange 60 - NvtxStartEndRange 75 - NvtxDomainCreate 76 - NvtxDomainDestroy The difference between text and textId columns is that if an NVTX event message was passed via call to nvtxDomainRegisterString function, then the message will be available through textId field, otherwise the text field will contain the message if it was provided. OpenGL Events KHR event class values 62 - KhrDebugPushPopRange 63 - KhrDebugGpuPushPopRange KHR source kind values 0x8249 - GL_DEBUG_SOURCE_THIRD_PARTY 0x824A - GL_DEBUG_SOURCE_APPLICATION KHR type values 0x824C - GL_DEBUG_TYPE_ERROR 0x824D - GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR 0x824E - GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR 0x824F - GL_DEBUG_TYPE_PORTABILITY 0x8250 - GL_DEBUG_TYPE_PERFORMANCE 0x8251 - GL_DEBUG_TYPE_OTHER 0x8268 - GL_DEBUG_TYPE_MARKER 0x8269 - GL_DEBUG_TYPE_PUSH_GROUP 0x826A - GL_DEBUG_TYPE_POP_GROUP KHR severity values 0x826B - GL_DEBUG_SEVERITY_NOTIFICATION 0x9146 - GL_DEBUG_SEVERITY_HIGH 0x9147 - GL_DEBUG_SEVERITY_MEDIUM 0x9148 - GL_DEBUG_SEVERITY_LOW OSRT Event Class Values OS runtime libraries can be traced to gather information about low-level userspace APIs. This traces the system call wrappers and thread synchronization interfaces exposed by the C runtime and POSIX Threads (pthread) libraries. This does not perform a complete runtime library API trace, but instead focuses on the functions that can take a long time to execute, or could potentially cause your thread be unscheduled from the CPU while waiting for an event to complete. OSRT events may have callchains attached to them, depending on selected profiling settings. In such cases, one can use callchainId column to select relevant callchains from OSRT_CALLCHAINS table OSRT event class values 27 - TRACE_PROCESS_EVENT_OS_RUNTIME 31 - TRACE_PROCESS_EVENT_OS_RUNTIME_START 32 - TRACE_PROCESS_EVENT_OS_RUNTIME_FINISH DX12 Event Class Values 41 - TRACE_PROCESS_EVENT_DX12_API 42 - TRACE_PROCESS_EVENT_DX12_WORKLOAD 43 - TRACE_PROCESS_EVENT_DX12_START 44 - TRACE_PROCESS_EVENT_DX12_FINISH 52 - TRACE_PROCESS_EVENT_DX12_DISPLAY 59 - TRACE_PROCESS_EVENT_DX12_CREATE_OBJECT PIX Event Class Values 65 - TRACE_PROCESS_EVENT_DX12_DEBUG_API 75 - TRACE_PROCESS_EVENT_DX11_DEBUG_API Vulkan Event Class Values 53 - TRACE_PROCESS_EVENT_VULKAN_API 54 - TRACE_PROCESS_EVENT_VULKAN_WORKLOAD 55 - TRACE_PROCESS_EVENT_VULKAN_START 56 - TRACE_PROCESS_EVENT_VULKAN_FINISH 60 - TRACE_PROCESS_EVENT_VULKAN_CREATE_OBJECT 66 - TRACE_PROCESS_EVENT_VULKAN_DEBUG_API Vulkan Flags VALID_BIT = 0x00000001 CACHE_HIT_BIT = 0x00000002 BASE_PIPELINE_ACCELERATION_BIT = 0x00000004 SLI Event Class Values 62 - TRACE_PROCESS_EVENT_SLI 63 - TRACE_PROCESS_EVENT_SLI_START 64 - TRACE_PROCESS_EVENT_SLI_FINISH SLI Transfer Info Values 0 - P2P_SKIPPED 1 - P2P_EARLY_PUSH 2 - P2P_PUSH_FAILED 3 - P2P_2WAY_OR_PULL 4 - P2P_PRESENT 5 - P2P_DX12_INIT_PUSH_ON_WRITE WDDM Event Values VIDMM operation type values 0 - None 101 - RestoreSegments 102 - PurgeSegments 103 - CleanupPrimary 104 - AllocatePagingBufferResources 105 - FreePagingBufferResources 106 - ReportVidMmState 107 - RunApertureCoherencyTest 108 - RunUnmapToDummyPageTest 109 - DeferredCommand 110 - SuspendMemorySegmentAccess 111 - ResumeMemorySegmentAccess 112 - EvictAndFlush 113 - CommitVirtualAddressRange 114 - UncommitVirtualAddressRange 115 - DestroyVirtualAddressAllocator 116 - PageInDevice 117 - MapContextAllocation 118 - InitPagingProcessVaSpace 200 - CloseAllocation 202 - ComplexLock 203 - PinAllocation 204 - FlushPendingGpuAccess 205 - UnpinAllocation 206 - MakeResident 207 - Evict 208 - LockInAperture 209 - InitContextAllocation 210 - ReclaimAllocation 211 - DiscardAllocation 212 - SetAllocationPriority 1000 - EvictSystemMemoryOfferList Paging queue type values 0 - VIDMM_PAGING_QUEUE_TYPE_UMD 1 - VIDMM_PAGING_QUEUE_TYPE_Default 2 - VIDMM_PAGING_QUEUE_TYPE_Evict 3 - VIDMM_PAGING_QUEUE_TYPE_Reclaim Packet type values 0 - DXGKETW_RENDER_COMMAND_BUFFER 1 - DXGKETW_DEFERRED_COMMAND_BUFFER 2 - DXGKETW_SYSTEM_COMMAND_BUFFER 3 - DXGKETW_MMIOFLIP_COMMAND_BUFFER 4 - DXGKETW_WAIT_COMMAND_BUFFER 5 - DXGKETW_SIGNAL_COMMAND_BUFFER 6 - DXGKETW_DEVICE_COMMAND_BUFFER 7 - DXGKETW_SOFTWARE_COMMAND_BUFFER Engine type values 0 - DXGK_ENGINE_TYPE_OTHER 1 - DXGK_ENGINE_TYPE_3D 2 - DXGK_ENGINE_TYPE_VIDEO_DECODE 3 - DXGK_ENGINE_TYPE_VIDEO_ENCODE 4 - DXGK_ENGINE_TYPE_VIDEO_PROCESSING 5 - DXGK_ENGINE_TYPE_SCENE_ASSEMBLY 6 - DXGK_ENGINE_TYPE_COPY 7 - DXGK_ENGINE_TYPE_OVERLAY 8 - DXGK_ENGINE_TYPE_CRYPTO DMA interrupt type values 1 = DXGK_INTERRUPT_DMA_COMPLETED 2 = DXGK_INTERRUPT_DMA_PREEMPTED 4 = DXGK_INTERRUPT_DMA_FAULTED 9 = DXGK_INTERRUPT_DMA_PAGE_FAULTED Queue type values 0 = Queue_Packet 1 = Dma_Packet 2 = Paging_Queue_Packet Driver Events Load balance event type values 1 - LoadBalanceEvent_GPU 8 - LoadBalanceEvent_CPU 21 - LoadBalanceMasterEvent_GPU 22 - LoadBalanceMasterEvent_CPU OpenMP Events OpenMP event class values 78 - TRACE_PROCESS_EVENT_OPENMP 79 - TRACE_PROCESS_EVENT_OPENMP_START 80 - TRACE_PROCESS_EVENT_OPENMP_FINISH OpenMP event kind values 15 - OPENMP_EVENT_KIND_TASK_CREATE 16 - OPENMP_EVENT_KIND_TASK_SCHEDULE 17 - OPENMP_EVENT_KIND_CANCEL 20 - OPENMP_EVENT_KIND_MUTEX_RELEASED 21 - OPENMP_EVENT_KIND_LOCK_INIT 22 - OPENMP_EVENT_KIND_LOCK_DESTROY 25 - OPENMP_EVENT_KIND_DISPATCH 26 - OPENMP_EVENT_KIND_FLUSH 27 - OPENMP_EVENT_KIND_THREAD 28 - OPENMP_EVENT_KIND_PARALLEL 29 - OPENMP_EVENT_KIND_SYNC_REGION_WAIT 30 - OPENMP_EVENT_KIND_SYNC_REGION 31 - OPENMP_EVENT_KIND_TASK 32 - OPENMP_EVENT_KIND_MASTER 33 - OPENMP_EVENT_KIND_REDUCTION 34 - OPENMP_EVENT_KIND_MUTEX_WAIT 35 - OPENMP_EVENT_KIND_CRITICAL_SECTION 36 - OPENMP_EVENT_KIND_WORKSHARE OpenMP thread type values 1 - OpenMP Initial Thread 2 - OpenMP Worker Thread 3 - OpenMP Internal Thread 4 - Unknown OpenMP sync region kind values 1 - Barrier 2 - Implicit barrier 3 - Explicit barrier 4 - Implementation-dependent barrier 5 - Taskwait 6 - Taskgroup OpenMP task kind values 1 - Initial task 2 - Implicit task 3 - Explicit task OpenMP prior task status values 1 - Task completed 2 - Task yielded to another task 3 - Task was cancelled 7 - Task was switched out for other reasons OpenMP mutex kind values 1 - Waiting for lock 2 - Testing lock 3 - Waiting for nested lock 4 - Tesing nested lock 5 - Waitng for entering critical section region 6 - Waiting for entering atomic region 7 - Waiting for entering ordered region OpenMP critical section kind values 5 - Critical section region 6 - Atomic region 7 - Ordered region OpenMP workshare kind values 1 - Loop region 2 - Sections region 3 - Single region (executor) 4 - Single region (waiting) 5 - Workshare region 6 - Distrubute region 7 - Taskloop region OpenMP dispatch kind values 1 - Iteration 2 - Section", "keywords": []}, {"id": 150, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#sqlite-schema-reference", "display_name": "SQLite Schema Reference", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "sqlite-schema-reference", "priority": -1, "content": "Nsight Systems has the ability to export SQLite database files from the .nsys-rep results file. From the CLI, use nsys export . From the GUI, call File->Export... . Note: The .nsys-rep report format is the only data format for Nsight Systems that should be considered forward compatible. The SQLite schema can and will change in the future. The schema for a concrete database can be obtained with the sqlite3 tool built-in command .schema . The sqlite3 tool can be located in the Target or Host directory of your Nsight Systems installation. Note: Currently tables are created lazily, and therefore not every table described in the documentation will be present in a particular database. This will change in a future version of the product. If you want a full schema of all possible tables, use nsys export --lazy=false during export phase. Currently, a table is created for each data type in the exported database. Since usage patterns for exported data may vary greatly and no default use cases have been established, no indexes or extra constraints are created. Instead, refer to the SQLite Examples section for a list of common recipes. This may change in a future version of the product. To check the version of your exported SQLite file, check the value of EXPORT_SCHEMA_VERSION in the EXPORT_META_DATA table. The schema version is a common three-value major/minor/micro version number. The first value, or major value, indicates the overall format of the database, and is only changed if there is a major re-write or re-factor of the entire database format. It is assumed that if the major version changes, all scripts or queries will break. The middle, or minor, version is changed anytime there is a more localized, but potentially breaking change, such as renaming an existing column, or changing the type of an existing column. The last, or micro version is changed any time there are additions, such as a new table or column, that should not introduce any breaking change when used with well-written, best-practices queries. This is the schema as of the 2023.2 release, schema version 3.1.7. CREATE TABLE StringIds ( -- Consolidation of repetitive string values. id INTEGER NOT NULL PRIMARY KEY, -- ID reference value. value TEXT NOT NULL -- String value. ); CREATE TABLE ThreadNames ( nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Thread name priority INTEGER, -- Priority of the thread. globalTid INTEGER -- Serialized GlobalId. ); CREATE TABLE ProcessStreams ( globalPid INTEGER NOT NULL, -- Serialized GlobalId. filenameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- File name contentId INTEGER NOT NULL -- REFERENCES StringIds(id) -- Stream content ); CREATE TABLE TARGET_INFO_SYSTEM_ENV ( globalVid INTEGER, -- Serialized GlobalId. devStateName TEXT NOT NULL, -- Device state name. name TEXT NOT NULL, -- Property name. nameEnum INTEGER NOT NULL, -- Property enum value. value TEXT NOT NULL -- Property value. ); CREATE TABLE TARGET_INFO_NIC_INFO ( globalId INTEGER, -- Device state globalId stateName TEXT NOT NULL, -- Device state name nicId INTEGER NOT NULL, -- Network interface Id. name TEXT NOT NULL, -- Network interface name deviceId INTEGER NOT NULL, -- REFERENCES ENUM_NET_DEVICE_ID(id) vendorId INTEGER NOT NULL, -- REFERENCES ENUM_NET_VENDOR_ID(id) linkLayer INTEGER NOT NULL -- REFERENCES ENUM_NET_LINK_TYPE(id) ); CREATE TABLE TARGET_INFO_SESSION_START_TIME ( utcEpochNs INTEGER, -- UTC Epoch timestamp at start of the capture (ns). utcTime TEXT, -- Start of the capture in UTC. localTime TEXT -- Start of the capture in local time of target. ); CREATE TABLE ANALYSIS_DETAILS ( -- Details about the analysis session. globalVid INTEGER NOT NULL, -- Serialized GlobalId. duration INTEGER NOT NULL, -- The total time span of the entire trace (ns). startTime INTEGER NOT NULL, -- Trace start timestamp in nanoseconds. stopTime INTEGER NOT NULL -- Trace stop timestamp in nanoseconds. ); CREATE TABLE TARGET_INFO_GPU ( vmId INTEGER NOT NULL, -- Serialized GlobalId. id INTEGER NOT NULL, -- Device ID. name TEXT, -- Device name. busLocation TEXT, -- PCI bus location. isDiscrete INTEGER, -- True if discrete, false if integrated. l2CacheSize INTEGER, -- Size of L2 cache (B). totalMemory INTEGER, -- Total amount of memory on the device (B). memoryBandwidth INTEGER, -- Amount of memory transferred (B). clockRate INTEGER, -- Clock frequency (Hz). smCount INTEGER, -- Number of multiprocessors on the device. pwGpuId INTEGER, -- PerfWorks GPU ID. uuid TEXT, -- Device UUID. luid INTEGER, -- Device LUID. chipName TEXT, -- Chip name. cuDevice INTEGER, -- CUDA device ID. ctxswDevPath TEXT, -- GPU context switch device node path. ctrlDevPath TEXT, -- GPU control device node path. revision INTEGER, -- Revision number. nodeMask INTEGER, -- Device node mask. constantMemory INTEGER, -- Memory available on device for __constant__ variables (B). maxIPC INTEGER, -- Maximum instructions per count. maxRegistersPerBlock INTEGER, -- Maximum number of 32-bit registers available per block. maxShmemPerBlock INTEGER, -- Maximum optin shared memory per block. maxShmemPerBlockOptin INTEGER, -- Maximum optin shared memory per block. maxShmemPerSm INTEGER, -- Maximum shared memory available per multiprocessor (B). maxRegistersPerSm INTEGER, -- Maximum number of 32-bit registers available per multiprocessor. threadsPerWarp INTEGER, -- Warp size in threads. asyncEngines INTEGER, -- Number of asynchronous engines. maxWarpsPerSm INTEGER, -- Maximum number of warps per multiprocessor. maxBlocksPerSm INTEGER, -- Maximum number of blocks per multiprocessor. maxThreadsPerBlock INTEGER, -- Maximum number of threads per block. maxBlockDimX INTEGER, -- Maximum X-dimension of a block. maxBlockDimY INTEGER, -- Maximum Y-dimension of a block. maxBlockDimZ INTEGER, -- Maximum Z-dimension of a block. maxGridDimX INTEGER, -- Maximum X-dimension of a grid. maxGridDimY INTEGER, -- Maximum Y-dimension of a grid. maxGridDimZ INTEGER, -- Maximum Z-dimension of a grid. computeMajor INTEGER, -- Major compute capability version number. computeMinor INTEGER, -- Minor compute capability version number. smMajor INTEGER, -- Major multiprocessor version number. smMinor INTEGER -- Minor multiprocessor version number. ); CREATE TABLE TARGET_INFO_XMC_SPEC ( vmId INTEGER NOT NULL, -- Serialized GlobalId. clientId INTEGER NOT NULL, -- Client ID. type TEXT NOT NULL, -- Client type. name TEXT NOT NULL, -- Client name. groupId TEXT NOT NULL -- Client group ID. ); CREATE TABLE TARGET_INFO_PROCESS ( processId INTEGER NOT NULL, -- Process ID. openGlVersion TEXT NOT NULL, -- OpenGL version. correlationId INTEGER NOT NULL, -- Correlation ID of the kernel. nameId INTEGER NOT NULL -- REFERENCES StringIds(id) -- Function name ); CREATE TABLE TARGET_INFO_NVTX_CUDA_DEVICE ( name TEXT NOT NULL, -- CUDA device name assigned using NVTX. hwId INTEGER NOT NULL, -- Hardware ID. vmId INTEGER NOT NULL, -- VM ID. deviceId INTEGER NOT NULL -- Device ID. ); CREATE TABLE TARGET_INFO_NVTX_CUDA_CONTEXT ( name TEXT NOT NULL, -- CUDA context name assigned using NVTX. hwId INTEGER NOT NULL, -- Hardware ID. vmId INTEGER NOT NULL, -- VM ID. processId INTEGER NOT NULL, -- Process ID. deviceId INTEGER NOT NULL, -- Device ID. contextId INTEGER NOT NULL -- Context ID. ); CREATE TABLE TARGET_INFO_NVTX_CUDA_STREAM ( name TEXT NOT NULL, -- CUDA stream name assigned using NVTX. hwId INTEGER NOT NULL, -- Hardware ID. vmId INTEGER NOT NULL, -- VM ID. processId INTEGER NOT NULL, -- Process ID. deviceId INTEGER NOT NULL, -- Device ID. contextId INTEGER NOT NULL, -- Context ID. streamId INTEGER NOT NULL -- Stream ID. ); CREATE TABLE TARGET_INFO_CUDA_NULL_STREAM ( streamId INTEGER NOT NULL, -- Stream ID. hwId INTEGER NOT NULL, -- Hardware ID. vmId INTEGER NOT NULL, -- VM ID. processId INTEGER NOT NULL, -- Process ID. deviceId INTEGER NOT NULL, -- Device ID. contextId INTEGER NOT NULL -- Context ID. ); CREATE TABLE TARGET_INFO_CUDA_STREAM ( streamId INTEGER NOT NULL, -- Stream ID. hwId INTEGER NOT NULL, -- Hardware ID. vmId INTEGER NOT NULL, -- VM ID. processId INTEGER NOT NULL, -- Process ID. contextId INTEGER NOT NULL, -- Context ID. priority INTEGER NOT NULL, -- Priority of the stream. flag INTEGER NOT NULL -- REFERENCES ENUM_CUPTI_STREAM_TYPE(id) ); CREATE TABLE TARGET_INFO_WDDM_CONTEXTS ( context INTEGER NOT NULL, engineType INTEGER NOT NULL, nodeOrdinal INTEGER NOT NULL, friendlyName TEXT NOT NULL ); CREATE TABLE TARGET_INFO_PERF_COUNTER ( counterId INTEGER NOT NULL, -- Counter ID value name TEXT NOT NULL, -- Counter name description TEXT NOT NULL, -- Counter description unit TEXT NOT NULL -- Counter measurement unit ); CREATE TABLE TARGET_INFO_NETWORK_METRICS ( metricsListId INTEGER NOT NULL, -- Metric list ID metricsIdx INTEGER NOT NULL, -- List index of metric name TEXT NOT NULL, -- Name of metric description TEXT NOT NULL, -- Description of metric unit TEXT NOT NULL -- Measurement unit of metric ); CREATE TABLE EXPORT_META_DATA ( -- information about nsys export process name TEXT NOT NULL, -- Name of export meta-data record value TEXT -- Value of export meta-data record ); CREATE TABLE ENUM_NSYS_EVENT_TYPE ( -- Nsys event type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_NSYS_EVENT_CLASS ( -- Nsys event class labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_GPU_CTX_SWITCH ( -- GPU context switch labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_CUDA_MEMCPY_OPER ( -- CUDA memcpy operation labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_CUDA_MEM_KIND ( -- CUDA memory kind labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_CUDA_MEMPOOL_TYPE ( -- CUDA mempool type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_CUDA_MEMPOOL_OPER ( -- CUDA mempool operation labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_CUDA_DEV_MEM_EVENT_OPER ( -- CUDA device mem event operation labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_CUDA_KERNEL_LAUNCH_TYPE ( -- CUDA kernel launch type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_CUDA_SHARED_MEM_LIMIT_CONFIG ( -- CUDA shared memory limit config labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_CUDA_UNIF_MEM_MIGRATION ( -- CUDA unified memory migration cause labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_CUDA_UNIF_MEM_ACCESS_TYPE ( -- CUDA unified memory access type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_CUDA_FUNC_CACHE_CONFIG ( -- CUDA function cache config labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_CUPTI_STREAM_TYPE ( -- CUPTI stream type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_CUPTI_SYNC_TYPE ( -- CUPTI synchronization type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_STACK_UNWIND_METHOD ( -- Stack unwind method labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_SAMPLING_THREAD_STATE ( -- Sampling thread state labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_OPENGL_DEBUG_SOURCE ( -- OpenGL debug source labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_OPENGL_DEBUG_TYPE ( -- OpenGL debug type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_OPENGL_DEBUG_SEVERITY ( -- OpenGL debug severity labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_VULKAN_PIPELINE_CREATION_FLAGS ( -- Vulkan pipeline creation feedback flag labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_D3D12_HEAP_TYPE ( -- D3D12 heap type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_D3D12_PAGE_PROPERTY ( -- D3D12 CPU page property labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_D3D12_HEAP_FLAGS ( -- D3D12 heap flag labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_D3D12_CMD_LIST_TYPE ( -- D3D12 command list type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_OPENACC_DEVICE ( -- OpenACC device type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_OPENACC_EVENT_KIND ( -- OpenACC event type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_OPENMP_EVENT_KIND ( -- OpenMP event kind labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_OPENMP_THREAD ( -- OpenMP thread labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_OPENMP_DISPATCH ( -- OpenMP dispatch labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_OPENMP_SYNC_REGION ( -- OpenMP sync region labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_OPENMP_WORK ( -- OpenMP work labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_OPENMP_MUTEX ( -- OpenMP mutex labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_OPENMP_TASK_FLAG ( -- OpenMP task flags labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_OPENMP_TASK_STATUS ( -- OpenMP task status labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_SLI_TRANSER ( -- OpenMP task status labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_DXGI_FORMAT ( -- DXGI image format labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_NVDRIVER_EVENT_ID ( -- NV-Driver event it labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_WDDM_PAGING_QUEUE_TYPE ( -- WDDM paging queue type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_WDDM_PACKET_TYPE ( -- WDDM packet type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_WDDM_ENGINE_TYPE ( -- WDDM engine type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_WDDM_INTERRUPT_TYPE ( -- WDDM DMA interrupt type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_WDDM_VIDMM_OP_TYPE ( -- WDDM VidMm operation type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_NET_LINK_TYPE ( -- NIC link layer labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_NET_DEVICE_ID ( -- NIC PCIe device id labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_NET_VENDOR_ID ( -- NIC PCIe vendor id labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE ENUM_ETW_MEMORY_TRANSFER_TYPE ( -- memory transfer type labels id INTEGER NOT NULL PRIMARY KEY, -- Enum numerical value name TEXT, -- Enum symbol name label TEXT -- Enum human name ); CREATE TABLE GENERIC_EVENT_SOURCES ( sourceId INTEGER NOT NULL PRIMARY KEY, -- Serialized GlobalId. data TEXT NOT NULL -- JSON encoded generic event source description. ); CREATE TABLE GENERIC_EVENT_TYPES ( typeId INTEGER NOT NULL PRIMARY KEY, -- Serialized GlobalId. sourceId INTEGER NOT NULL, -- REFERENCES GENERIC_EVENT_SOURCES(sourceId) data TEXT NOT NULL -- JSON encoded generic event type description. ); CREATE TABLE GENERIC_EVENTS ( -- Events for which the schema is known only at the run-time. rawTimestamp INTEGER NOT NULL, -- Raw event timestamp recorded during profiling. timestamp INTEGER, -- Event timestamp converted to the profiling session timeline. typeId INTEGER NOT NULL, -- REFERENCES GENERIC_EVENT_TYPES(typeId) data TEXT NOT NULL -- JSON encoded event data. ); CREATE TABLE ETW_PROVIDERS ( -- Names and identifiers of ETW providers captured in the report. providerId INTEGER NOT NULL PRIMARY KEY, -- Provider ID. providerNameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Provider name guid TEXT NOT NULL -- ETW Provider GUID. ); CREATE TABLE ETW_TASKS ( -- Names and identifiers of ETW tasks captured in the report. taskNameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Task name taskId INTEGER NOT NULL, -- The event task ID. providerId INTEGER NOT NULL -- Provider ID. ); CREATE TABLE ETW_EVENTS ( -- Raw ETW events captured in the report. timestamp INTEGER NOT NULL, -- Event start timestamp (ns). processId INTEGER, -- Process ID. threadId INTEGER, -- Thread ID. providerId INTEGER NOT NULL, -- Provider ID. taskId INTEGER NOT NULL, -- The event task ID. eventId INTEGER NOT NULL, -- Event ID. version INTEGER NOT NULL, -- The event version. opcode INTEGER, -- The event opcode. data TEXT NOT NULL -- JSON encoded event data. ); CREATE TABLE TARGET_INFO_GPU_METRICS ( -- GPU Metrics, metric names and ids. typeId INTEGER NOT NULL, -- REFERENCES GENERIC_EVENT_TYPES(typeId) sourceId INTEGER NOT NULL, -- REFERENCES GENERIC_EVENT_SOURCES(sourceId) typeName TEXT NOT NULL, -- Name of event type. metricId INTEGER NOT NULL, -- Id of metric in event; not assumed to be stable. metricName TEXT NOT NULL -- Definitive name of metric. ); CREATE TABLE GPU_METRICS ( -- GPU Metrics, events and values. timestamp INTEGER, -- Event timestamp (ns). typeId INTEGER NOT NULL, -- REFERENCES TARGET_INFO_GPU_METRICS(typeId) and GENERIC_EVENT_TYPES(typeId) metricId INTEGER NOT NULL, -- REFERENCES TARGET_INFO_GPU_METRICS(metricId) value INTEGER NOT NULL -- Counter data value ); CREATE TABLE MPI_COMMUNICATORS ( -- Identification of MPI communication groups. rank INTEGER, -- Active MPI rank timestamp INTEGER, -- Time of MPI communicator creation. commHandle INTEGER, -- MPI communicator handle parentHandle INTEGER, -- MPI communicator handle localRank INTEGER, -- Local MPI rank in a communicator. size INTEGER, -- MPI communicator size. groupRoot INTEGER, -- Root rank (global) in MPI communicator. groupRootUid INTEGER, -- Group root's communicator ID. members TEXT -- MPI communicator members (index is global rank). ); CREATE TABLE CUPTI_ACTIVITY_KIND_MEMCPY ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). deviceId INTEGER NOT NULL, -- Device ID. contextId INTEGER NOT NULL, -- Context ID. streamId INTEGER NOT NULL, -- Stream ID. correlationId INTEGER, -- REFERENCES CUPTI_ACTIVITY_KIND_RUNTIME(correlationId) globalPid INTEGER, -- Serialized GlobalId. bytes INTEGER NOT NULL, -- Number of bytes transferred (B). copyKind INTEGER NOT NULL, -- REFERENCES ENUM_CUDA_MEMCPY_OPER(id) deprecatedSrcId INTEGER, -- Deprecated, use srcDeviceId instead. srcKind INTEGER, -- REFERENCES ENUM_CUDA_MEM_KIND(id) dstKind INTEGER, -- REFERENCES ENUM_CUDA_MEM_KIND(id) srcDeviceId INTEGER, -- Source device ID. srcContextId INTEGER, -- Source context ID. dstDeviceId INTEGER, -- Destination device ID. dstContextId INTEGER, -- Destination context ID. migrationCause INTEGER, -- REFERENCES ENUM_CUDA_UNIF_MEM_MIGRATION(id) graphNodeId INTEGER, -- REFERENCES CUDA_GRAPH_EVENTS(graphNodeId) virtualAddress INTEGER -- Virtual base address of the page/s being transferred. ); CREATE TABLE CUPTI_ACTIVITY_KIND_MEMSET ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). deviceId INTEGER NOT NULL, -- Device ID. contextId INTEGER NOT NULL, -- Context ID. streamId INTEGER NOT NULL, -- Stream ID. correlationId INTEGER, -- REFERENCES CUPTI_ACTIVITY_KIND_RUNTIME(correlationId) globalPid INTEGER, -- Serialized GlobalId. value INTEGER NOT NULL, -- Value assigned to memory. bytes INTEGER NOT NULL, -- Number of bytes set (B). graphNodeId INTEGER, -- REFERENCES CUDA_GRAPH_EVENTS(graphNodeId) memKind INTEGER -- REFERENCES ENUM_CUDA_MEM_KIND(id) ); CREATE TABLE CUPTI_ACTIVITY_KIND_KERNEL ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). deviceId INTEGER NOT NULL, -- Device ID. contextId INTEGER NOT NULL, -- Context ID. streamId INTEGER NOT NULL, -- Stream ID. correlationId INTEGER, -- REFERENCES CUPTI_ACTIVITY_KIND_RUNTIME(correlationId) globalPid INTEGER, -- Serialized GlobalId. demangledName INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Kernel function name w/ templates shortName INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Base kernel function name mangledName INTEGER, -- REFERENCES StringIds(id) -- Raw C++ mangled kernel function name launchType INTEGER, -- REFERENCES ENUM_CUDA_KERNEL_LAUNCH_TYPE(id) cacheConfig INTEGER, -- REFERENCES ENUM_CUDA_FUNC_CACHE_CONFIG(id) registersPerThread INTEGER NOT NULL, -- Number of registers required for each thread executing the kernel. gridX INTEGER NOT NULL, -- X-dimension grid size. gridY INTEGER NOT NULL, -- Y-dimension grid size. gridZ INTEGER NOT NULL, -- Z-dimension grid size. blockX INTEGER NOT NULL, -- X-dimension block size. blockY INTEGER NOT NULL, -- Y-dimension block size. blockZ INTEGER NOT NULL, -- Z-dimension block size. staticSharedMemory INTEGER NOT NULL, -- Static shared memory allocated for the kernel (B). dynamicSharedMemory INTEGER NOT NULL, -- Dynamic shared memory reserved for the kernel (B). localMemoryPerThread INTEGER NOT NULL, -- Amount of local memory reserved for each thread (B). localMemoryTotal INTEGER NOT NULL, -- Total amount of local memory reserved for the kernel (B). gridId INTEGER NOT NULL, -- Unique grid ID of the kernel assigned at runtime. sharedMemoryExecuted INTEGER, -- Shared memory size set by the driver. graphNodeId INTEGER, -- REFERENCES CUDA_GRAPH_EVENTS(graphNodeId) sharedMemoryLimitConfig INTEGER -- REFERENCES ENUM_CUDA_SHARED_MEM_LIMIT_CONFIG(id) ); CREATE TABLE CUPTI_ACTIVITY_KIND_SYNCHRONIZATION ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). deviceId INTEGER NOT NULL, -- Device ID. contextId INTEGER NOT NULL, -- Context ID. streamId INTEGER NOT NULL, -- Stream ID. correlationId INTEGER, -- Correlation ID of the synchronization API to which this result is associated. globalPid INTEGER, -- Serialized GlobalId. syncType INTEGER NOT NULL, -- REFERENCES ENUM_CUPTI_SYNC_TYPE(id) eventId INTEGER NOT NULL -- Event ID for which the synchronization API is called. ); CREATE TABLE CUPTI_ACTIVITY_KIND_CUDA_EVENT ( deviceId INTEGER NOT NULL, -- Device ID. contextId INTEGER NOT NULL, -- Context ID. streamId INTEGER NOT NULL, -- Stream ID. correlationId INTEGER, -- Correlation ID of the event record API to which this result is associated. globalPid INTEGER, -- Serialized GlobalId. eventId INTEGER NOT NULL -- Event ID for which the event record API is called. ); CREATE TABLE CUPTI_ACTIVITY_KIND_RUNTIME ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- ID used to identify events that this function call has triggered. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name returnValue INTEGER NOT NULL, -- Return value of the function call. callchainId INTEGER -- REFERENCES CUDA_CALLCHAINS(id) ); CREATE TABLE CUDNN_EVENTS ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. nameId INTEGER NOT NULL -- REFERENCES StringIds(id) -- Function name ); CREATE TABLE CUBLAS_EVENTS ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. nameId INTEGER NOT NULL -- REFERENCES StringIds(id) -- Function name ); CREATE TABLE CUDA_GRAPH_EVENTS ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name graphNodeId INTEGER NOT NULL, -- REFERENCES CUDA_GRAPH_EVENTS(graphNodeId) originalGraphNodeId INTEGER -- Reference to the original graph node ID, if cloned node. ); CREATE TABLE CUDA_UM_CPU_PAGE_FAULT_EVENTS ( start INTEGER NOT NULL, -- Event start timestamp (ns). globalPid INTEGER NOT NULL, -- Serialized GlobalId. address INTEGER NOT NULL, -- Virtual address of the page that faulted. originalFaultPc INTEGER, -- Program counter of the CPU instruction that caused the page fault. CpuInstruction INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name module INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Module name unresolvedFaultPc INTEGER -- True if the program counter was not resolved. ); CREATE TABLE CUDA_UM_GPU_PAGE_FAULT_EVENTS ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalPid INTEGER NOT NULL, -- Serialized GlobalId. deviceId INTEGER NOT NULL, -- Device ID. address INTEGER NOT NULL, -- Virtual address of the page that faulted. numberOfPageFaults INTEGER NOT NULL, -- Number of page faults for the same page. faultAccessType INTEGER NOT NULL -- REFERENCES ENUM_CUDA_UNIF_MEM_ACCESS_TYPE(id) ); CREATE TABLE CUDA_GPU_MEMORY_USAGE_EVENTS ( start INTEGER NOT NULL, -- Event start timestamp (ns). globalPid INTEGER NOT NULL, -- Serialized GlobalId. deviceId INTEGER NOT NULL, -- Device ID. contextId INTEGER NOT NULL, -- Context ID. address INTEGER NOT NULL, -- Virtual address of the allocation/deallocation. pc INTEGER NOT NULL, -- Program counter of the allocation/deallocation. bytes INTEGER NOT NULL, -- Number of bytes allocated/deallocated (B). memKind INTEGER NOT NULL, -- REFERENCES ENUM_CUDA_MEM_KIND(id) memoryOperationType INTEGER NOT NULL, -- REFERENCES ENUM_CUDA_DEV_MEM_EVENT_OPER(id) name TEXT, -- Variable name, if available. correlationId INTEGER, -- REFERENCES CUPTI_ACTIVITY_KIND_RUNTIME(correlationId) localMemoryPoolAddress INTEGER, -- Base address of the local memory pool used localMemoryPoolReleaseThreshold INTEGER, -- Release threshold of the local memory pool used localMemoryPoolSize INTEGER, -- Size of the local memory pool used localMemoryPoolUtilizedSize INTEGER, -- Utilized size of the local memory pool used importedMemoryPoolAddress INTEGER, -- Base address of the imported memory pool used importedMemoryPoolProcessId INTEGER -- Process ID of the imported memory pool used ); CREATE TABLE CUDA_GPU_MEMORY_POOL_EVENTS ( start INTEGER NOT NULL, -- Event start timestamp (ns). globalPid INTEGER NOT NULL, -- Serialized GlobalId. deviceId INTEGER NOT NULL, -- Device ID. address INTEGER NOT NULL, -- The base virtual address of the memory pool. operationType INTEGER NOT NULL, -- REFERENCES ENUM_CUDA_MEMPOOL_OPER(id) poolType INTEGER NOT NULL, -- REFERENCES ENUM_CUDA_MEMPOOL_TYPE(id) correlationId INTEGER, -- REFERENCES CUPTI_ACTIVITY_KIND_RUNTIME(correlationId) minBytesToKeep INTEGER, -- Minimum number of bytes to keep of the memory pool. localMemoryPoolReleaseThreshold INTEGER, -- Release threshold of the local memory pool used localMemoryPoolSize INTEGER, -- Size of the local memory pool used localMemoryPoolUtilizedSize INTEGER -- Utilized size of the local memory pool used ); CREATE TABLE CUDA_CALLCHAINS ( id INTEGER NOT NULL, -- Part of PRIMARY KEY (id, stackDepth). symbol INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name module INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Module name unresolved INTEGER, -- True if the symbol was not resolved. originalIP INTEGER, -- Instruction pointer value. stackDepth INTEGER NOT NULL, -- Zero-base index of the given function in call stack. PRIMARY KEY (id, stackDepth) ); CREATE TABLE MPI_RANKS ( -- Mapping of global thread IDs (gtid) to MPI ranks globalTid INTEGER NOT NULL, -- Serialized GlobalId. rank INTEGER NOT NULL -- MPI rank ); CREATE TABLE MPI_P2P_EVENTS ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. textId INTEGER, -- REFERENCES StringIds(id) -- Registered NVTX domain commHandle INTEGER, -- MPI communicator handle tag INTEGER, -- MPI message tag remoteRank INTEGER, -- MPI remote rank (destination or source) size INTEGER, -- MPI message size in bytes requestHandle INTEGER -- MPI request handle ); CREATE TABLE MPI_COLLECTIVES_EVENTS ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. textId INTEGER, -- REFERENCES StringIds(id) -- Registered NVTX domain commHandle INTEGER, -- MPI communicator handle rootRank INTEGER, -- root rank in the collective size INTEGER, -- MPI message size in bytes (send size for bidirectional ops) recvSize INTEGER, -- MPI receive size in bytes requestHandle INTEGER -- MPI request handle ); CREATE TABLE MPI_START_WAIT_EVENTS ( -- MPI_Wait* and MPI_Start* start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. textId INTEGER, -- REFERENCES StringIds(id) -- Registered NVTX domain requestHandle INTEGER -- MPI request handle ); CREATE TABLE MPI_OTHER_EVENTS ( -- MPI events without additional parameters start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. textId INTEGER -- REFERENCES StringIds(id) -- Registered NVTX domain ); CREATE TABLE NVTX_EVENTS ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER, -- Event end timestamp (ns). eventType INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_TYPE(id) rangeId INTEGER, -- Correlation ID returned from a nvtxRangeStart call. category INTEGER, -- User-controlled ID that can be used to group events. color INTEGER, -- Encoded ARGB color value. text TEXT, -- Optional text message for non registered strings. globalTid INTEGER, -- Serialized GlobalId. endGlobalTid INTEGER, -- Serialized GlobalId. textId INTEGER, -- REFERENCES StringIds(id) -- Registered NVTX domain domainId INTEGER, -- User-controlled ID that can be used to group events. uint64Value INTEGER, -- One of possible payload value union members. int64Value INTEGER, -- One of possible payload value union members. doubleValue REAL, -- One of possible payload value union members. uint32Value INTEGER, -- One of possible payload value union members. int32Value INTEGER, -- One of possible payload value union members. floatValue REAL, -- One of possible payload value union members. jsonTextId INTEGER, -- One of possible payload value union members. jsonText TEXT -- One of possible payload value union members. ); CREATE TABLE OPENGL_API ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_TYPE(id) globalTid INTEGER, -- Serialized GlobalId. endGlobalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- First ID matching an API call to GPU workloads. endCorrelationId INTEGER, -- Last ID matching an API call to GPU workloads. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- First function name endNameId INTEGER, -- REFERENCES StringIds(id) -- Last function name returnValue INTEGER NOT NULL, -- Return value of the function call. frameId INTEGER, -- Index of the graphics frame starting from 1. contextId INTEGER, -- Context ID. gpu INTEGER, -- GPU index. display INTEGER -- Display ID. ); CREATE TABLE OPENGL_WORKLOAD ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_TYPE(id) globalTid INTEGER, -- Serialized GlobalId. endGlobalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- First ID matching an API call to GPU workloads. endCorrelationId INTEGER, -- Last ID matching an API call to GPU workloads. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- First function name endNameId INTEGER, -- REFERENCES StringIds(id) -- Last function name returnValue INTEGER NOT NULL, -- Return value of the function call. frameId INTEGER, -- Index of the graphics frame starting from 1. contextId INTEGER, -- Context ID. gpu INTEGER, -- GPU index. display INTEGER -- Display ID. ); CREATE TABLE KHR_DEBUG_EVENTS ( eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_TYPE(id) start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER, -- Event end timestamp (ns). textId INTEGER, -- REFERENCES StringIds(id) -- Debug marker/group text globalTid INTEGER, -- Serialized GlobalId. source INTEGER, -- REFERENCES ENUM_OPENGL_DEBUG_SOURCE(id) khrdType INTEGER, -- REFERENCES ENUM_OPENGL_DEBUG_TYPE(id) id INTEGER, -- KHR event ID. severity INTEGER, -- REFERENCES ENUM_OPENGL_DEBUG_SEVERITY(id) correlationId INTEGER, -- ID used to correlate KHR CPU trace to GPU trace. context INTEGER -- Context ID. ); CREATE TABLE OSRT_API ( -- OS runtime libraries traced to gather information about low-level userspace APIs. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name returnValue INTEGER NOT NULL, -- Return value of the function call. nestingLevel INTEGER, -- Zero-base index of the nesting level. callchainId INTEGER NOT NULL -- REFERENCES OSRT_CALLCHAINS(id) ); CREATE TABLE OSRT_CALLCHAINS ( -- Callchains attached to OSRT events, depending on selected profiling settings. id INTEGER NOT NULL, -- Part of PRIMARY KEY (id, stackDepth). symbol INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name module INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Module name kernelMode INTEGER, -- True if kernel mode. thumbCode INTEGER, -- True if thumb code. unresolved INTEGER, -- True if the symbol was not resolved. specialEntry INTEGER, -- True if artifical entry added during processing callchain. originalIP INTEGER, -- Instruction pointer value. unwindMethod INTEGER, -- REFERENCES ENUM_STACK_UNWIND_METHOD(id) stackDepth INTEGER NOT NULL, -- Zero-base index of the given function in call stack. PRIMARY KEY (id, stackDepth) ); CREATE TABLE UnwindMethodType ( number INTEGER PRIMARY KEY, name TEXT NOT NULL ); CREATE TABLE PROFILER_OVERHEAD ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name returnValue INTEGER NOT NULL -- Return value of the function call. ); CREATE TABLE SCHED_EVENTS ( -- Thread scheduling events. start INTEGER NOT NULL, -- Event start timestamp (ns). cpu INTEGER NOT NULL, -- ID of CPU this thread was scheduled in or out. isSchedIn INTEGER NOT NULL, -- 0 if thread was scheduled out, non-zero otherwise. globalTid INTEGER -- Serialized GlobalId. ); CREATE TABLE COMPOSITE_EVENTS ( -- Thread sampling events. id INTEGER NOT NULL PRIMARY KEY, -- ID of the composite event. start INTEGER NOT NULL, -- Event start timestamp (ns). cpu INTEGER, -- ID of CPU this thread was running on. threadState INTEGER, -- REFERENCES ENUM_SAMPLING_THREAD_STATE(id) globalTid INTEGER, -- Serialized GlobalId. cpuCycles INTEGER NOT NULL -- Value of Performance Monitoring Unit (PMU) counter. ); CREATE TABLE SAMPLING_CALLCHAINS ( -- Callchain entries obtained from composite events, used to construct function table views. id INTEGER NOT NULL, -- REFERENCES COMPOSITE_EVENTS(id) symbol INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name module INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Module name kernelMode INTEGER, -- True if kernel mode. thumbCode INTEGER, -- True if thumb code. unresolved INTEGER, -- True if the symbol was not resolved. specialEntry INTEGER, -- True if artifical entry added during processing callchain. originalIP INTEGER, -- Instruction pointer value. unwindMethod INTEGER, -- REFERENCES ENUM_STACK_UNWIND_METHOD(id) stackDepth INTEGER NOT NULL, -- Zero-base index of the given function in call stack. PRIMARY KEY (id, stackDepth) ); CREATE TABLE PERF_EVENT_CPU_COUNTER ( -- Sampled CPU Performance Counters. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). vmId INTEGER, -- VM ID. componentId INTEGER, -- Perf Event Component ID (usually CPU Core ID) counterId INTEGER, -- REFERENCES TARGET_INFO_PERF_COUNTER(counterId) value INTEGER -- Counter data value ); CREATE TABLE SLI_QUERIES ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. frameId INTEGER NOT NULL, -- Index of the graphics frame starting from 1. occQueryIssued INTEGER NOT NULL, -- Occlusion query issued. occQueryAsked INTEGER NOT NULL, -- Occlusion query asked. eventQueryIssued INTEGER NOT NULL, -- Event query issued. eventQueryAsked INTEGER NOT NULL, -- Event query asked. numberOfTransferEvents INTEGER NOT NULL, -- Number of transfer events. amountOfTransferredData INTEGER NOT NULL -- Cumulative size of resource data that was transferred. ); CREATE TABLE SLI_P2P ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. frameId INTEGER NOT NULL, -- Index of the graphics frame starting from 1. transferSkipped INTEGER NOT NULL, -- Number of transfers that were skipped. srcGpu INTEGER NOT NULL, -- Source GPU ID. dstGpu INTEGER NOT NULL, -- Destination GPU ID. numSubResources INTEGER NOT NULL, -- Number of sub-resources to transfer. resourceSize INTEGER NOT NULL, -- Size of resource. subResourceIdx INTEGER NOT NULL, -- Sub-resource index. smplWidth INTEGER, -- Sub-resource surface width in samples. smplHeight INTEGER, -- Sub-resource surface height in samples. smplDepth INTEGER, -- Sub-resource surface depth in samples. bytesPerElement INTEGER, -- Number of bytes per element. dxgiFormat INTEGER, -- REFERENCES ENUM_DXGI_FORMAT(id) logSurfaceNames TEXT, -- Surface name. transferInfo INTEGER, -- REFERENCES ENUM_SLI_TRANSER(id) isEarlyPushManagedByNvApi INTEGER, -- True if early push managed by NVAPI. False otherwise. useAsyncP2pForResolve INTEGER, -- True if async Peer-to-Peer used for resolve. False otherwise. transferFuncName TEXT, -- "A - BE" for asynchronous transfer, "S - BE" for synchronous transfer. regimeName TEXT, -- Name of the regime scope that includes the resource. debugName TEXT, -- Debug name assigned to the resource by the application code. bindType TEXT -- Bind type. ); CREATE TABLE SLI_STATS ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. countComplexFrames INTEGER NOT NULL, -- Complex frames count. countStats INTEGER NOT NULL, -- Number of frame statistics collected for the inactive-time histogram. totalInactiveTime INTEGER NOT NULL, -- Total inactive time (\u00b5s). minPbSize INTEGER NOT NULL, -- Min push buffer size. maxPbSize INTEGER NOT NULL, -- Max push buffer size. totalPbSize INTEGER NOT NULL -- Total push buffer size. ); CREATE TABLE DX12_API ( id INTEGER NOT NULL PRIMARY KEY, start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- First ID matching an API call to GPU workloads. endCorrelationId INTEGER, -- Last ID matching an API call to GPU workloads. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name shortContextId INTEGER, -- Short form of the COM interface object address. frameId INTEGER, -- Index of the graphics frame starting from 1. color INTEGER, -- Encoded ARGB color value. textId INTEGER, -- REFERENCES StringIds(id) -- PIX marker text commandListType INTEGER, -- REFERENCES ENUM_D3D12_CMD_LIST_TYPE(id) objectNameId INTEGER, -- REFERENCES StringIds(id) -- D3D12 object name longContextId INTEGER -- Long form of the COM interface object address. ); CREATE TABLE DX12_WORKLOAD ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- First ID matching an API call to GPU workloads. endCorrelationId INTEGER, -- Last ID matching an API call to GPU workloads. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name shortContextId INTEGER, -- Short form of the COM interface object address. frameId INTEGER, -- Index of the graphics frame starting from 1. gpu INTEGER, -- GPU index. color INTEGER, -- Encoded ARGB color value. textId INTEGER, -- REFERENCES StringIds(id) -- PIX marker text commandListType INTEGER, -- REFERENCES ENUM_D3D12_CMD_LIST_TYPE(id) objectNameId INTEGER, -- REFERENCES StringIds(id) -- D3D12 object name longContextId INTEGER -- Long form of the COM interface object address. ); CREATE TABLE DX12_MEMORY_OPERATION ( gpu INTEGER, -- GPU index. rangeStart INTEGER, -- Offset denoting the beginning of a memory range (B). rangeEnd INTEGER, -- Offset denoting the end of a memory range (B). subresourceId INTEGER, -- Subresource index. heapType INTEGER, -- REFERENCES ENUM_D3D12_HEAP_TYPE(id) heapFlags INTEGER, -- REFERENCES ENUM_D3D12_HEAP_FLAGS(id) cpuPageProperty INTEGER, -- REFERENCES ENUM_D3D12_PAGE_PROPERTY(id) nvApiFlags INTEGER, -- NV specific flags. See docs for specifics. traceEventId INTEGER NOT NULL -- REFERENCES DX12_API(id) ); CREATE TABLE VULKAN_API ( id INTEGER NOT NULL PRIMARY KEY, start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- First ID matching an API call to GPU workloads. endCorrelationId INTEGER, -- Last ID matching an API call to GPU workloads. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name contextId INTEGER -- Short form of the interface object address. ); CREATE TABLE VULKAN_WORKLOAD ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- First ID matching an API call to GPU workloads. endCorrelationId INTEGER, -- Last ID matching an API call to GPU workloads. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name gpu INTEGER, -- GPU index. contextId INTEGER, -- Short form of the interface object address. color INTEGER, -- Encoded ARGB color value. textId INTEGER -- REFERENCES StringIds(id) -- Vulkan CPU debug marker string ); CREATE TABLE VULKAN_DEBUG_API ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- First ID matching an API call to GPU workloads. endCorrelationId INTEGER, -- Last ID matching an API call to GPU workloads. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name contextId INTEGER, -- Short form of the interface object address. color INTEGER, -- Encoded ARGB color value. textId INTEGER -- REFERENCES StringIds(id) -- Vulkan CPU debug marker string ); CREATE TABLE VULKAN_PIPELINE_CREATION_EVENTS ( id INTEGER NOT NULL PRIMARY KEY, -- ID of the pipeline creation event. duration INTEGER, -- Event duration (ns). flags INTEGER, -- REFERENCES ENUM_VULKAN_PIPELINE_CREATION_FLAGS(id) traceEventId INTEGER NOT NULL -- REFERENCES VULKAN_API(id) -- ID of the attached vulkan API. ); CREATE TABLE VULKAN_PIPELINE_STAGE_EVENTS ( id INTEGER NOT NULL PRIMARY KEY, -- ID of the pipeline stage event. duration INTEGER, -- Event duration (ns). flags INTEGER, -- REFERENCES ENUM_VULKAN_PIPELINE_CREATION_FLAGS(id) creationEventId INTEGER NOT NULL -- REFERENCES VULKAN_PIPELINE_CREATION_EVENTS(id) -- ID of the attached pipeline creation event. ); CREATE TABLE GPU_CONTEXT_SWITCH_EVENTS ( tag INTEGER NOT NULL, -- REFERENCES ENUM_GPU_CTX_SWITCH(id) vmId INTEGER NOT NULL, -- VM ID. seqNo INTEGER NOT NULL, -- Sequential event number. contextId INTEGER NOT NULL, -- Context ID. timestamp INTEGER NOT NULL, -- Event start timestamp (ns). globalPid INTEGER, -- Serialized GlobalId. gpuId INTEGER -- GPU index. ); CREATE TABLE ETW_EVENTS_DEPRECATED_TABLE ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. providerName INTEGER, -- REFERENCES StringIds(id) -- Tracing events provider name taskName INTEGER, -- REFERENCES StringIds(id) -- Event task name description INTEGER -- REFERENCES StringIds(id) -- Decoded event value ); CREATE TABLE OPENMP_EVENT_KIND_THREAD ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) threadId INTEGER, -- Internal thread sequence starting from 1. threadType INTEGER -- REFERENCES ENUM_OPENMP_THREAD(id) ); CREATE TABLE OPENMP_EVENT_KIND_PARALLEL ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) parallelId INTEGER, -- Internal parallel region sequence starting from 1. parentTaskId INTEGER -- ID for task that creates this parallel region. ); CREATE TABLE OPENMP_EVENT_KIND_SYNC_REGION_WAIT ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) parallelId INTEGER, -- ID of the parallel region that this event belongs to. taskId INTEGER, -- ID of the task that this event belongs to. kind INTEGER -- REFERENCES ENUM_OPENMP_SYNC_REGION(id) ); CREATE TABLE OPENMP_EVENT_KIND_SYNC_REGION ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) parallelId INTEGER, -- ID of the parallel region that this event belongs to. taskId INTEGER, -- ID of the task that this event belongs to. kind INTEGER -- REFERENCES ENUM_OPENMP_SYNC_REGION(id) ); CREATE TABLE OPENMP_EVENT_KIND_TASK ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) parallelId INTEGER, -- ID of the parallel region that this event belongs to. taskId INTEGER, -- ID of the task that this event belongs to. kind INTEGER -- REFERENCES ENUM_OPENMP_TASK_FLAG(id) ); CREATE TABLE OPENMP_EVENT_KIND_MASTER ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) parallelId INTEGER, -- ID of the parallel region that this event belongs to. taskId INTEGER -- ID of the task that this event belongs to. ); CREATE TABLE OPENMP_EVENT_KIND_REDUCTION ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) parallelId INTEGER, -- ID of the parallel region that this event belongs to. taskId INTEGER -- ID of the task that this event belongs to. ); CREATE TABLE OPENMP_EVENT_KIND_TASK_CREATE ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) parentTaskId INTEGER, -- ID of the parent task that is creating a new task. newTaskId INTEGER -- ID of the new task that is being created. ); CREATE TABLE OPENMP_EVENT_KIND_TASK_SCHEDULE ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) parallelId INTEGER, -- ID of the parallel region that this event belongs to. priorTaskId INTEGER, -- ID of the task that is being switched out. priorTaskStatus INTEGER, -- REFERENCES ENUM_OPENMP_TASK_STATUS(id) nextTaskId INTEGER -- ID of the task that is being switched in. ); CREATE TABLE OPENMP_EVENT_KIND_CANCEL ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) taskId INTEGER -- ID of the task that is being cancelled. ); CREATE TABLE OPENMP_EVENT_KIND_MUTEX_WAIT ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) kind INTEGER, -- REFERENCES ENUM_OPENMP_MUTEX(id) waitId INTEGER, -- ID indicating the object being waited. taskId INTEGER -- ID of the task that this event belongs to. ); CREATE TABLE OPENMP_EVENT_KIND_CRITICAL_SECTION ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) kind INTEGER, -- REFERENCES ENUM_OPENMP_MUTEX(id) waitId INTEGER -- ID indicating the object being held. ); CREATE TABLE OPENMP_EVENT_KIND_MUTEX_RELEASED ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) kind INTEGER, -- REFERENCES ENUM_OPENMP_MUTEX(id) waitId INTEGER, -- ID indicating the object being released. taskId INTEGER -- ID of the task that this event belongs to. ); CREATE TABLE OPENMP_EVENT_KIND_LOCK_INIT ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) kind INTEGER, -- REFERENCES ENUM_OPENMP_MUTEX(id) waitId INTEGER -- ID indicating object being created/destroyed. ); CREATE TABLE OPENMP_EVENT_KIND_LOCK_DESTROY ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) kind INTEGER, -- REFERENCES ENUM_OPENMP_MUTEX(id) waitId INTEGER -- ID indicating object being created/destroyed. ); CREATE TABLE OPENMP_EVENT_KIND_WORKSHARE ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) kind INTEGER, -- REFERENCES ENUM_OPENMP_WORK(id) parallelId INTEGER, -- ID of the parallel region that this event belongs to. taskId INTEGER, -- ID of the task that this event belongs to. count INTEGER -- Measure of the quantity of work involved in the region. ); CREATE TABLE OPENMP_EVENT_KIND_DISPATCH ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) kind INTEGER, -- REFERENCES ENUM_OPENMP_DISPATCH(id) parallelId INTEGER, -- ID of the parallel region that this event belongs to. taskId INTEGER -- ID of the task that this event belongs to. ); CREATE TABLE OPENMP_EVENT_KIND_FLUSH ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- Currently unused. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name eventKind INTEGER, -- REFERENCES ENUM_OPENMP_EVENT_KIND(id) threadId INTEGER -- ID of the thread that this event belongs to. ); CREATE TABLE D3D11_PIX_DEBUG_API ( -- D3D11 debug marker events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- First ID matching an API call to GPU workloads. endCorrelationId INTEGER, -- Last ID matching an API call to GPU workloads. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name shortContextId INTEGER, -- Short form of the COM interface object address. frameId INTEGER, -- Index of the graphics frame starting from 1. color INTEGER, -- Encoded ARGB color value. textId INTEGER -- REFERENCES StringIds(id) -- PIX marker text ); CREATE TABLE D3D12_PIX_DEBUG_API ( -- D3D12 debug marker events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. correlationId INTEGER, -- First ID matching an API call to GPU workloads. endCorrelationId INTEGER, -- Last ID matching an API call to GPU workloads. nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Function name shortContextId INTEGER, -- Short form of the COM interface object address. frameId INTEGER, -- Index of the graphics frame starting from 1. color INTEGER, -- Encoded ARGB color value. textId INTEGER, -- REFERENCES StringIds(id) -- PIX marker text commandListType INTEGER, -- REFERENCES ENUM_D3D12_CMD_LIST_TYPE(id) objectNameId INTEGER, -- REFERENCES StringIds(id) -- D3D12 object name longContextId INTEGER -- Long form of the COM interface object address. ); CREATE TABLE WDDM_EVICT_ALLOCATION_EVENTS ( -- Raw ETW EvictAllocation events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. allocationHandle INTEGER NOT NULL -- Global allocation handle. ); CREATE TABLE WDDM_PAGING_QUEUE_PACKET_START_EVENTS ( -- Raw ETW PagingQueuePacketStart events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. dxgDevice INTEGER, -- Address of an IDXGIDevice. dxgAdapter INTEGER, -- Address of an IDXGIAdapter. pagingQueue INTEGER NOT NULL, -- Address of the paging queue. pagingQueuePacket INTEGER NOT NULL, -- Address of the paging queue packet. sequenceId INTEGER NOT NULL, -- Internal sequence starting from 0. alloc INTEGER, -- Allocation handle. vidMmOpType INTEGER NOT NULL, -- REFERENCES ENUM_WDDM_VIDMM_OP_TYPE(id) pagingQueueType INTEGER NOT NULL -- REFERENCES ENUM_WDDM_PAGING_QUEUE_TYPE(id) ); CREATE TABLE WDDM_PAGING_QUEUE_PACKET_STOP_EVENTS ( -- Raw ETW PagingQueuePacketStop events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. pagingQueue INTEGER NOT NULL, -- Address of the paging queue. pagingQueuePacket INTEGER NOT NULL, -- Address of the paging queue packet. sequenceId INTEGER NOT NULL -- Internal sequence starting from 0. ); CREATE TABLE WDDM_PAGING_QUEUE_PACKET_INFO_EVENTS ( -- Raw ETW PagingQueuePacketInfo events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. pagingQueue INTEGER NOT NULL, -- Address of the paging queue. pagingQueuePacket INTEGER NOT NULL, -- Address of the paging queue packet. sequenceId INTEGER NOT NULL -- Internal sequence starting from 0. ); CREATE TABLE WDDM_QUEUE_PACKET_START_EVENTS ( -- Raw ETW QueuePacketStart events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. context INTEGER NOT NULL, -- The context ID of WDDM queue. dmaBufferSize INTEGER NOT NULL, -- The dma buffer size. dmaBuffer INTEGER NOT NULL, -- The reported address of dma buffer. queuePacket INTEGER NOT NULL, -- The address of queue packet. progressFenceValue INTEGER NOT NULL, -- The fence value. packetType INTEGER NOT NULL, -- REFERENCES ENUM_WDDM_PACKET_TYPE(id) submitSequence INTEGER NOT NULL, -- Internal sequence starting from 1. allocationListSize INTEGER NOT NULL, -- The number of allocations referenced. patchLocationListSize INTEGER NOT NULL, -- The number of patch locations. present INTEGER NOT NULL, -- True or False if the packet is a present packet. engineType INTEGER NOT NULL, -- REFERENCES ENUM_WDDM_ENGINE_TYPE(id) syncObject INTEGER -- The address of fence object. ); CREATE TABLE WDDM_QUEUE_PACKET_STOP_EVENTS ( -- Raw ETW QueuePacketStop events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. context INTEGER NOT NULL, -- The context ID of WDDM queue. queuePacket INTEGER NOT NULL, -- The address of queue packet. packetType INTEGER NOT NULL, -- REFERENCES ENUM_WDDM_PACKET_TYPE(id) submitSequence INTEGER NOT NULL, -- Internal sequence starting from 1. preempted INTEGER NOT NULL, -- True or False if the packet is preempted. timeouted INTEGER NOT NULL, -- True or False if the packet is timeouted. engineType INTEGER NOT NULL -- REFERENCES ENUM_WDDM_ENGINE_TYPE(id) ); CREATE TABLE WDDM_QUEUE_PACKET_INFO_EVENTS ( -- Raw ETW QueuePacketInfo events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. context INTEGER NOT NULL, -- The context ID of WDDM queue. packetType INTEGER NOT NULL, -- REFERENCES ENUM_WDDM_PACKET_TYPE(id) submitSequence INTEGER NOT NULL, -- Internal sequence starting from 1. engineType INTEGER NOT NULL -- REFERENCES ENUM_WDDM_ENGINE_TYPE(id) ); CREATE TABLE WDDM_DMA_PACKET_START_EVENTS ( -- Raw ETW DmaPacketStart events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. context INTEGER NOT NULL, -- The context ID of WDDM queue. queuePacketContext INTEGER NOT NULL, -- The queue packet context. uliSubmissionId INTEGER NOT NULL, -- The queue packet submission ID. dmaBuffer INTEGER NOT NULL, -- The reported address of dma buffer. packetType INTEGER NOT NULL, -- REFERENCES ENUM_WDDM_PACKET_TYPE(id) ulQueueSubmitSequence INTEGER NOT NULL, -- Internal sequence starting from 1. quantumStatus INTEGER NOT NULL, -- The quantum Status. engineType INTEGER NOT NULL -- REFERENCES ENUM_WDDM_ENGINE_TYPE(id) ); CREATE TABLE WDDM_DMA_PACKET_STOP_EVENTS ( -- Raw ETW DmaPacketStop events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. context INTEGER NOT NULL, -- The context ID of WDDM queue. uliCompletionId INTEGER NOT NULL, -- The queue packet completion ID. packetType INTEGER NOT NULL, -- REFERENCES ENUM_WDDM_PACKET_TYPE(id) ulQueueSubmitSequence INTEGER NOT NULL, -- Internal sequence starting from 1. preempted INTEGER NOT NULL, -- True or False if the packet is preempted. engineType INTEGER NOT NULL -- REFERENCES ENUM_WDDM_ENGINE_TYPE(id) ); CREATE TABLE WDDM_DMA_PACKET_INFO_EVENTS ( -- Raw ETW DmaPacketInfo events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. context INTEGER NOT NULL, -- The context ID of WDDM queue. uliCompletionId INTEGER NOT NULL, -- The queue packet completion ID. faultedVirtualAddress INTEGER NOT NULL, -- The virtual address of faulted process. faultedProcessHandle INTEGER NOT NULL, -- The address of faulted process. packetType INTEGER NOT NULL, -- REFERENCES ENUM_WDDM_PACKET_TYPE(id) ulQueueSubmitSequence INTEGER NOT NULL, -- Internal sequence starting from 1. interruptType INTEGER NOT NULL, -- REFERENCES ENUM_WDDM_INTERRUPT_TYPE(id) quantumStatus INTEGER NOT NULL, -- The quantum Status. pageFaultFlags INTEGER NOT NULL, -- The page fault flag ID. engineType INTEGER NOT NULL -- REFERENCES ENUM_WDDM_ENGINE_TYPE(id) ); CREATE TABLE WDDM_HW_QUEUE_EVENTS ( -- Raw ETW HwQueueStart events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. context INTEGER NOT NULL, -- The context ID of WDDM queue. hwQueue INTEGER NOT NULL, -- The address of HW queue. parentDxgHwQueue INTEGER NOT NULL -- The address of parent Dxg HW queue. ); CREATE TABLE NVVIDEO_ENCODER_API ( -- NV Video Encoder API traced to gather information about NVIDIA Video Codek SDK Encoder APIs. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. nameId INTEGER NOT NULL -- REFERENCES StringIds(id) -- Function name ); CREATE TABLE NVVIDEO_DECODER_API ( -- NV Video Encoder API traced to gather information about NVIDIA Video Codek SDK Decoder APIs. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. nameId INTEGER NOT NULL -- REFERENCES StringIds(id) -- Function name ); CREATE TABLE NVVIDEO_JPEG_API ( -- NV Video Encoder API traced to gather information about NVIDIA Video Codek SDK JPEG APIs. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). eventClass INTEGER NOT NULL, -- REFERENCES ENUM_NSYS_EVENT_CLASS(id) globalTid INTEGER, -- Serialized GlobalId. nameId INTEGER NOT NULL -- REFERENCES StringIds(id) -- Function name ); CREATE TABLE GPU_MEMORY_BUDGET_EVENTS ( -- Raw ETW VidMmProcessBudgetChange events. timestamp INTEGER NOT NULL, -- Event start timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. newBudget INTEGER, -- The new budget size in bytes. segmentGroup INTEGER -- The segment group ID. ); CREATE TABLE GPU_MEMORY_USAGE_EVENTS ( -- Raw ETW VidMmProcessUsageChange events. timestamp INTEGER NOT NULL, -- Event start timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. newUsage INTEGER, -- The new usage size in bytes. segmentGroup INTEGER -- The segment group ID. ); CREATE TABLE DEMOTED_BYTES_EVENTS ( -- Raw ETW VidMmProcessDemotedCommitmentChange events. timestamp INTEGER NOT NULL, -- Event start timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. commitment INTEGER -- Total demoted bytes. ); CREATE TABLE TOTAL_BYTES_RESIDENT_IN_SEGMENT_EVENTS ( -- Raw ETW TotalBytesResidentInSegment events. timestamp INTEGER NOT NULL, -- Event start timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. totalBytesResident INTEGER, -- Total bytes resident in segment. segmentGroup INTEGER, -- The segment group ID. segmentId INTEGER -- The segment ID. ); CREATE TABLE MEMORY_TRANSFER_EVENTS ( -- Raw ETW Memory Transfer events. start INTEGER NOT NULL, -- Event start timestamp (ns). globalTid INTEGER, -- Serialized GlobalId. gpu INTEGER, -- GPU index. taskId INTEGER NOT NULL, -- The event task ID. eventId INTEGER NOT NULL, -- Event ID. allocationGlobalHandle INTEGER NOT NULL, -- Address of the global allocation handle. dmaBuffer INTEGER NOT NULL, -- The reported address of dma buffer. size INTEGER NOT NULL, -- The size of the dma buffer in bytes. offset INTEGER NOT NULL, -- The offset from the start of the reported dma buffer in bytes. memoryTransferType INTEGER NOT NULL -- REFERENCES ENUM_ETW_MEMORY_TRANSFER_TYPE(id) ); CREATE TABLE NV_LOAD_BALANCE_MASTER_EVENTS ( -- Raw ETW NV-wgf2um LoadBalanceMaster events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER NOT NULL, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. eventId INTEGER NOT NULL, -- Event ID. task TEXT NOT NULL, -- The task name. frameCount INTEGER NOT NULL, -- The frame ID. frameTime REAL NOT NULL, -- Frame duration. averageFrameTime REAL NOT NULL, -- Average of frame duration. averageLatency REAL NOT NULL, -- Average of latency. minLatency REAL NOT NULL, -- The minimum latency. averageQueuedFrames REAL NOT NULL, -- Average number of queued frames. totalActiveMs REAL NOT NULL, -- Total active time in milliseconds. totalIdleMs REAL NOT NULL, -- Total idle time in milliseconds. idlePercent REAL NOT NULL, -- The percentage of idle time. isGPUAlmostOneFrameAhead INTEGER NOT NULL -- True or False if GPU is almost one frame ahead. ); CREATE TABLE NV_LOAD_BALANCE_EVENTS ( -- Raw ETW NV-wgf2um LoadBalance events. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalTid INTEGER NOT NULL, -- Serialized GlobalId. gpu INTEGER NOT NULL, -- GPU index. eventId INTEGER NOT NULL, -- Event ID. task TEXT NOT NULL, -- The task name. averageFPS REAL NOT NULL, -- Average frame per second. queuedFrames REAL NOT NULL, -- The amount of queued frames. averageQueuedFrames REAL NOT NULL, -- Average number of queued frames. currentCPUTime REAL NOT NULL, -- The current CPU time. averageCPUTime REAL NOT NULL, -- Average CPU time. averageStallTime REAL NOT NULL, -- Average of stall time. averageCPUIdleTime REAL NOT NULL, -- Average CPU idle time. isGPUAlmostOneFrameAhead INTEGER NOT NULL -- True or False if GPU is almost one frame ahead. ); CREATE TABLE PROCESSES ( -- Names and identifiers of processes captured in the report. globalPid INTEGER, -- Serialized GlobalId. pid INTEGER, -- The process ID. name TEXT -- The process name. ); CREATE TABLE CUPTI_ACTIVITY_KIND_OPENACC_DATA ( -- OpenACC data events collected using CUPTI. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name globalTid INTEGER, -- Serialized GlobalId. eventKind INTEGER NOT NULL, -- REFERENCES ENUM_OPENACC_EVENT_KIND(id) DeviceType INTEGER NOT NULL, -- REFERENCES ENUM_OPENACC_DEVICE(id) lineNo INTEGER NOT NULL, -- Line number of the directive or program construct. cuDeviceId INTEGER NOT NULL, -- CUDA device ID. Valid only if deviceType is acc_device_nvidia. cuContextId INTEGER NOT NULL, -- CUDA context ID. Valid only if deviceType is acc_device_nvidia. cuStreamId INTEGER NOT NULL, -- CUDA stream ID. Valid only if deviceType is acc_device_nvidia. srcFile INTEGER, -- REFERENCES StringIds(id) -- Source file name or path funcName INTEGER, -- REFERENCES StringIds(id) -- Function in which event occurred bytes INTEGER, -- Number of bytes. varName INTEGER -- REFERENCES StringIds(id) -- Variable name ); CREATE TABLE CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH ( -- OpenACC launch events collected using CUPTI. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name globalTid INTEGER, -- Serialized GlobalId. eventKind INTEGER NOT NULL, -- REFERENCES ENUM_OPENACC_EVENT_KIND(id) DeviceType INTEGER NOT NULL, -- REFERENCES ENUM_OPENACC_DEVICE(id) lineNo INTEGER NOT NULL, -- Line number of the directive or program construct. cuDeviceId INTEGER NOT NULL, -- CUDA device ID. Valid only if deviceType is acc_device_nvidia. cuContextId INTEGER NOT NULL, -- CUDA context ID. Valid only if deviceType is acc_device_nvidia. cuStreamId INTEGER NOT NULL, -- CUDA stream ID. Valid only if deviceType is acc_device_nvidia. srcFile INTEGER, -- REFERENCES StringIds(id) -- Source file name or path funcName INTEGER, -- REFERENCES StringIds(id) -- Function in which event occurred numGangs INTEGER, -- Number of gangs created for this kernel launch. numWorkers INTEGER, -- Number of workers created for this kernel launch. vectorLength INTEGER, -- Number of vector lanes created for this kernel launch. kernelName INTEGER -- REFERENCES StringIds(id) -- Kernel name ); CREATE TABLE CUPTI_ACTIVITY_KIND_OPENACC_OTHER ( -- OpenACC other events collected using CUPTI. start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). nameId INTEGER NOT NULL, -- REFERENCES StringIds(id) -- Event name globalTid INTEGER, -- Serialized GlobalId. eventKind INTEGER NOT NULL, -- REFERENCES ENUM_OPENACC_EVENT_KIND(id) DeviceType INTEGER NOT NULL, -- REFERENCES ENUM_OPENACC_DEVICE(id) lineNo INTEGER NOT NULL, -- Line number of the directive or program construct. cuDeviceId INTEGER NOT NULL, -- CUDA device ID. Valid only if deviceType is acc_device_nvidia. cuContextId INTEGER NOT NULL, -- CUDA context ID. Valid only if deviceType is acc_device_nvidia. cuStreamId INTEGER NOT NULL, -- CUDA stream ID. Valid only if deviceType is acc_device_nvidia. srcFile INTEGER, -- REFERENCES StringIds(id) -- Source file name or path funcName INTEGER -- REFERENCES StringIds(id) -- Function in which event occurred ); CREATE TABLE NET_NIC_METRIC ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalId INTEGER NOT NULL, -- Serialized GlobalId. metricsListId INTEGER NOT NULL, -- REFERENCES TARGET_INFO_NETWORK_METRICS(metricsListId) metricsIdx INTEGER NOT NULL, -- REFERENCES TARGET_INFO_NETWORK_METRICS(metricsIdx) value INTEGER NOT NULL -- Counter data value ); CREATE TABLE NET_IB_SWITCH_METRIC ( start INTEGER NOT NULL, -- Event start timestamp (ns). end INTEGER NOT NULL, -- Event end timestamp (ns). globalId INTEGER NOT NULL, -- Serialized GlobalId. metricsListId INTEGER NOT NULL, -- REFERENCES TARGET_INFO_NETWORK_METRICS(metricsListId) metricsIdx INTEGER NOT NULL, -- REFERENCES TARGET_INFO_NETWORK_METRICS(metricsIdx) value INTEGER NOT NULL -- Counter data value );", "keywords": []}, {"id": 151, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#statistical-reports-shipped-with-product-name", "display_name": "Statistical Reports Shipped With Nsight Systems", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "statistical-reports-shipped-with-product-name", "priority": -1, "content": "The Nsight Systems development team created and maintains a set of report scripts for some of the commonly requested statistical reports. These scripts will be updated to adapt to any changes in SQLite schema or internal data structures. These scripts are located in the Nsight Systems package in the Target-<architecture>/reports directory. The following standard reports are available: Note: The ability to display mangled names is a recent addition to the report file format, and requires that the profile data be captured with a recent version of Nsys. Re-exporting an existing report file is not sufficient. If the raw, mangled kernel name data is not available, the default demangled names will be used. Note: All time values given in nanoseconds by default. If you wish to output the results using a different time unit, use the --timeunit option when running the recipe.", "keywords": []}, {"id": 152, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#stutter-analysis", "display_name": "Stutter Analysis", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "stutter-analysis", "priority": -1, "content": "Stutter Analysis Overview Nsight Systems on Windows targets displays stutter analysis visualization aids for profiled graphics applications that use either OpenGL, D3D11, D3D12 or Vulkan, as detailed below in the following sections.", "keywords": []}, {"id": 153, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#symbol-locations", "display_name": "Symbol Locations", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "symbol-locations", "priority": -1, "content": "Symbol resolution happens on host, and therefore does not affect performance of profiling on the target. Press the Symbol locations\u2026 button to open the Configure debug symbols location dialog. Use this dialog to specify: Paths of PDB files Symbols servers The location of the local symbol cache To use a symbol server: Install Debugging Tools for Windows , a part of the Windows 10 SDK . Add the symbol server URL using the Add Server button. Information about Microsoft\u2019s public symbol server, which enables getting Windows operating system related debug symbols can be found here .", "keywords": []}, {"id": 154, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#symbol-resolution", "display_name": "Symbol Resolution", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "symbol-resolution", "priority": -1, "content": "If stack trace information is missing symbols and you have a symbol file, you can manually re-resolve using the ResolveSymbols utility. This can be done by right-clicking the report file in the Project Explorer window and selecting \u201cResolve Symbols\u2026\u201d. Alternatively, you can find the utility as a separate executable in the [installation_path]\\Host directory. This utility works with ELF format files, with Windows PDB directories and symbol servers, or with files where each line is in the format <start><length><name> . Short Long Argument Description -h --help Help message providing information about available options. -l --process-list Print global process IDs list -s --sym-file filename Path to symbol file -b --base-addr address If set then <start> in symbol file is treated as relative address starting from this base address -p --global-pid pid Which process in the report should be resolved. May be omitted if there is only one process in the report. -f --force This option forces use of a given symbol file. -i --report filename Path to the report with unresolved symbols. -o --output filename Path and name of the output file. If it is omitted then \u201cresolved\u201d suffix is added to the original filename. -d --directories directory paths List of symbol folder paths, separated by semi-colon characters. Available only on Windows. -v --servers server URLs List of symbol servers that uses the same format as _NT_SYMBOL_PATH environment variable, i.e. srv*<LocalStore>*<SymbolServerURL> . Available only on Windows. -n --ignore-nt-sym-path Ignore the symbol locations stored in the _NT_SYMBOL_PATH environment variable. Available only on Windows.", "keywords": []}, {"id": 155, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#symbol-resolution-logs-view", "display_name": "Symbol Resolution Logs View", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "symbol-resolution-logs-view", "priority": -1, "content": "This view shows all messages related to the process of resolving symbols. It might be useful to debug issues when some of the symbol names in the symbols table of the timeline view are unresolved.", "keywords": []}, {"id": 156, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#target-sampling-options", "display_name": "Target Sampling Options", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "target-sampling-options", "priority": -1, "content": "Target sampling behavior is somewhat different for Nsight Systems Workstation Edition and Nsight Systems Embedded Platforms Edition .", "keywords": []}, {"id": 157, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#target-sampling-options-on-windows", "display_name": "Target Sampling Options on Windows", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "target-sampling-options-on-windows", "priority": -1, "content": "Nsight Systems can sample one process tree. Sampling here means interrupting each processor periodically. The sampling rate is defined in the project settings and is either 100Hz, 1KHz (default value), 2Khz, 4KHz, or 8KHz. On Windows, Nsight Systems can collect thread activity of one process tree. Collecting thread activity means that each thread context switch event is logged and (optionally) a backtrace is collected at the point that the thread is scheduled back for execution. Thread states are displayed on the timeline. If it was collected, the thread backtrace is displayed when hovering over a region where the thread execution is blocked.", "keywords": []}, {"id": 158, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#time-synchronization", "display_name": "Time Synchronization", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "time-synchronization", "priority": -1, "content": "When multiple reports are loaded into a single timeline, timestamps between them need to be adjusted, such that events that happened at the same time appear to be aligned. Nsight Systems can automatically adjust timestamps based on UTC time recorded around the collection start time. This method is used by default when other more precise methods are not available. This time can be seen as UTC time at t=0 in the Analysis Summary page of the report file. Refer to your OS documentation to learn how to sync the software clock using the Network Time Protocol (NTP). NTP-based time synchronization is not very precise, with the typical errors on the scale of one to tens of milliseconds. Reports collected on the same physical machine can use synchronization based on Timestamp Counter (TSC) values . These are platform-specific counters, typically accessed in user space applications using the RDTSC instruction on x86_64 architecture, or by reading the CNTVCT register on Arm64. Their values converted to nanoseconds can be seen as TSC value at t=0 in the Analysis Summary page of the report file. Reports synchronized using TSC values can be aligned with nanoseconds-level precision. TSC-based time synchronization is activated automatically, when Nsight Systems detects that reports come from same target and that the same TSC value corresponds to very close UTC times. Targets are considered to be the same when either explicitly set environment variables NSYS_HW_ID are the same for both reports or when target hostnames are the same and NSYS_HW_ID is not set for either target. The difference between UTC and TSC time offsets must be below 1 second to choose TSC-based time synchronization. To find out which synchronization method was used, navigate to the Analysis Summary tab of an added report and check the Report alignment source property of a target. Note, that the first report won\u2019t have this parameter. When loading multiple reports into a single timeline, it is always advisable to first check that time synchronization looks correct, by zooming into synchronization or communication events that are expected to be aligned.", "keywords": []}, {"id": 159, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#timeline", "display_name": "Timeline", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "timeline", "priority": -1, "content": "Timeline is a versatile control that contains a tree-like hierarchy on the left, and corresponding charts on the right. Contents of the hierarchy depend on the project settings used to collect the report. For example, if a certain feature has not been enabled, corresponding rows will not be show on the timeline. To generate a timeline screenshot without opening the full GUI, use the command nsys-ui.exe --screenshot filename.nsys-rep", "keywords": []}, {"id": 160, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#timeline-hierarchy", "display_name": "Timeline Hierarchy", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "timeline-hierarchy", "priority": -1, "content": "When reports are added to the same timeline Nsight Systems will automatically line them up by timestamps as described above. If you want Nsight Systems to also recognize matching process or hardware information, you will need to set environment variables NSYS_SYSTEM_ID and NSYS_HW_ID as shown below at the time of report collection (such as when using \u201cnsys profile \u2026\u201d command). When loading a pair of given report files into the same timeline, they will be merged in one of the following configurations: Different hardware \u2014 is used when reports are coming from different physical machines, and no hardware resources are shared in these reports. This mode is used when neither NSYS_HW_ID or NSYS_SYSTEM_ID is set and target hostnames are different or absent, and can be additionally signalled by specifying different NSYS_HW_ID values. Different systems, same hardware \u2014 is used when reports are collected on different virtual machines (VMs) or containers on the same physical machine. To activate this mode, specify the same value of NSYS_HW_ID when collecting the reports. Same system \u2014 is used when reports are collected within the same operating system (or container) environment. In this mode a process identifier (PID) 100 will refer to the same process in both reports. To manually activate this mode, specify the same value of NSYS_SYSTEM_ID when collecting the reports. This mode is automatically selected when target hostnames are the same and neither NSYS_HW_ID or NSYS_SYSTEM_ID is provided. The following diagrams demonstrate typical cases:", "keywords": []}, {"id": 161, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#timeline-view", "display_name": "Timeline View", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "timeline-view", "priority": -1, "content": "The timeline view consists of two main controls: the timeline at the top, and a bottom pane that contains the events view and the function table. In some cases, when sampling of a process has not been enabled, the function table might be empty and hidden. The bottom view selector sets the view that is displayed in the bottom pane.", "keywords": []}, {"id": 162, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#tracing-mpi-api-calls", "display_name": "Tracing MPI API calls", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "tracing-mpi-api-calls", "priority": -1, "content": "The Nsight Systems CLI has built-in API trace support for Open MPI and MPICH based MPI implementations via --trace=mpi . It traces a subset of the MPI API, including blocking and non-blocking point-to-point and collective communication as well as MPI one-sided communication, file I/O and pack operations (see MPI functions traced ). If you require more control over the list of traced APIs or if you are using a different MPI implementation, you can use the NVTX wrappers for MPI on GitHub. Choose an NVTX domain name other than \u201cMPI\u201d, since it is filtered out by Nsight Systems when MPI tracing is not enabled. Use the NVTX-instrumented MPI wrapper library as follows: nsys profile -e LD_PRELOAD=${PATH_TO_YOUR_NVTX_MPI_LIB} --trace=nvtx", "keywords": []}, {"id": 163, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#training-seminars", "display_name": "Training Seminars", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "training-seminars", "priority": -1, "content": "NVIDIA Deep Learning Institute Training - Self-Paced Online Course Optimizing CUDA Machine Learning Codes With Nsight Profiling Tools 2018 NCSA Blue Waters Webinar - Video Only Introduction to NVIDIA Nsight Systems", "keywords": []}, {"id": 164, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#tutorial-create-a-user-defined-recipe", "display_name": "Tutorial: Create a User-Defined Recipe", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "tutorial-create-a-user-defined-recipe", "priority": -1, "content": "The Nsight Systems recipe system is designed to be extensible and we hope that many users will use it to create their own recipes. This short tutorial will highlight the steps needed to create a recipe that is a customized version of one of the recipes that is included in the Nsight Systems recipe package. Step 1: Create the recipe directory and script Make a new directory in the <install-dir>/target-linux-x64/python/packages/nsys_recipe folder based on the name of your new recipe. For this example, we will call our new recipe new_metric_util_map. We will copy the existing gpu_metric_util_map.py script and create a new script called new_metric_util_map.py in the new_metric_util_map directory. We will also copy the heatmap.ipynb file into the new_metric_util_map directory. Type these steps in a Linux terminal window: > cd <install-dir>/target-linux-x64/python/packages/nsys_recipe > mkdir new_metric_util_map > cp gpu_metric_util_map/metadata.json new_metric_util_map/metadata.json > cp gpu_metric_util_map/heatmap.ipynb new_metric_util_map/heatmap.ipynb Replace the module name in metadata.json with new_metric_util_map and update the display name and description to your preference. Also, rename the class name GpuMetricUtilMap in gpu_metric_util_map.py to NewMetricUtilMap. We will discuss the detailed functionality of the new recipe code in the subsequent steps. Step 2: Modify the mapper function Many recipes are structured as a map-reduce algorithm. The mapper function is called for every .nsys-rep file in the report directory. The mapper function performs a series of calculations on the events in each Nsight Systems report and produces an intermediate data set. The intermediate results are then combined by the reduce function to produce the final results. The mapper function can be called in parallel, either on multiple cores of a single node (using the concurrent python module), or multiple ranks of a multi-node recipe analysis (using the Dask distributed module). When we create a new recipe, we need to create a class that derives from the Recipe base class. For our example, that class will be called NewMetricUtilMap (which we had renamed in step 1). The mapper function is called mapper_func(). It will first convert the .nsys-rep file into an SQLite database, if the SQLite file does not already exist. It then reads all the necessary tables from the SQLite file into Pandas Dataframes needed by the recipe. GPU Metric data is stored using a database schema table called GENERIC_EVENTS. For extra flexibility, GENERIC_EVENTS represents the data as a JSON object, which is stored as a string. The NewMetricUtilMap class extracts fields from the JSON object and accumulates them over the histogram bins of the heat map. The original script retrieved three GPU metrics: SM Active, SM Issue, and Tensor Active. In our new version of the script, we will extract a fourth metric, Unallocated Warps in Active SMs. Find this line (approximately line 65): metric_cols = ['SM Active', 'SM Issue', 'Tensor Active'] Add the Unallocated Warps in Active SMs metric: metric_cols = ['SM Active', 'SM Issue', 'Tensor Active', 'Unallocated Warps in Active SMs'] Step 3: Modify the reduce function Our new mapper function will extract four GPU metrics and return them as a Pandas DataFrame. The reduce function receives a list of DataFrames, one for each .nsys-rep file in the analysis, and combines them into a single DataFrame using the Pandas concat function. Since the reducer function is generic in our case, no modifications are needed. However, if you would like to add any additional post-processing, you can do so in this function. Step 4: Add a plot to the Jupyter notebook Our new recipe class will create a Parquet output file with all the data produced by the reducer function, using the to_parquet() function. It will also create a Jupyter notebook file using the create_notebook() function. In this step, we will change the create_notebook() function to produce a plot for our fourth metric. To do this, we need to change these two lines (located in the second cell of new_metric_util_map/heatmap.ipynb): metrics = ('SM Active', 'SM Issue', 'Tensor Active') To this: metrics = ('SM Active', 'SM Issue', 'Tensor Active', 'Unallocated Warps in Active SMs') That completes all the modifications for our NewMetricUtilMap class. Step 5: Run the new recipe If the new recipe is located in the default recipe directory nsys_recipe/recipes, we can directly run it using the nsys recipe command like this: > nsys recipe new_metric_util_map --input <directory of reports> It is also possible to have a recipe located outside of this directory. In this case, you need to set the environment variable NSYS_RECIPE_PATH to the directory containing the recipe when running the nsys recipe command. When successful, the recipe should produce a new recipe result directory called new_metric_util_map-1. If we open the Jupyter notebook in that recipe and execute the code, we should see our new heatmap along with the three plots produced by the original version of the recipe. Here is an example:", "keywords": []}, {"id": 165, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#ubuntu-18-04-20-04-22-04-and-centos-7-8-9-with-root-privileges", "display_name": "Ubuntu 18.04/20.04/22.04 and CentOS 7/8/9 with root privileges", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "ubuntu-18-04-20-04-22-04-and-centos-7-8-9-with-root-privileges", "priority": -1, "content": "Launch the following command, which will install all the required libraries in system directories: [installation_path]/host-linux-[arch]/Scripts/DependenciesInstaller/install-dependencies.sh Launch the Linux GUI as usual.", "keywords": []}, {"id": 166, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#ubuntu-18-04-20-04-22-04-and-centos-7-8-9-without-root-privileges", "display_name": "Ubuntu 18.04/20.04/22.04 and CentOS 7/8/9 without root privileges", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "ubuntu-18-04-20-04-22-04-and-centos-7-8-9-without-root-privileges", "priority": -1, "content": "Choose the directory where dependencies will be installed ( dependencies_path ). This directory should be writeable for the current user. Launch the following command (if it has already been run, move to the next step), which will install all the required libraries in [dependencies_path] : [installation_path]/host-linux-[arch]/Scripts/DependenciesInstaller/install-dependencies-without-root.sh [dependencies_path] Further, use the following command to launch the Linux GUI: source [installation_path]/host-linux-[arch]/Scripts/DependenciesInstaller/setup-dependencies-environment.sh [dependencies_path] && [installation_path]/host-linux-x64/nsys-ui", "keywords": []}, {"id": 167, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#ucx-library-trace", "display_name": "UCX Library Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "ucx-library-trace", "priority": -1, "content": "If UCX library trace is selected Nsight Systems will trace the subset of functions of the UCX protocol layer UCP that are most likely be involved in performance bottlenecks. To keep overhead low Nsight Systems does not trace all functions. UCX functions traced: ucp_am_send_nb[x] ucp_am_recv_data_nbx ucp_am_data_release ucp_atomic_{add{32,64},cswap{32,64},fadd{32,64},swap{32,64}} ucp_atomic_{post,fetch_nb,op_nbx} ucp_cleanup ucp_config_{modify,read,release} ucp_disconnect_nb ucp_dt_{create_generic,destroy} ucp_ep_{create,destroy,modify_nb,close_nbx} ucp_ep_flush[{_nb,_nbx}] ucp_listener_{create,destroy,query,reject} ucp_mem_{advise,map,unmap,query} ucp_{put,get}[_nbi] ucp_{put,get}_nb[x] ucp_request_{alloc,cancel,is_completed} ucp_rkey_{buffer_release,destroy,pack,ptr} ucp_stream_data_release ucp_stream_recv_data_nb ucp_stream_{send,recv}_nb[x] ucp_stream_worker_poll ucp_tag_msg_recv_nb[x] ucp_tag_{send,recv}_nbr ucp_tag_{send,recv}_nb[x] ucp_tag_send_sync_nb[x] ucp_worker_{create,destroy,get_address,get_efd,arm,fence,wait,signal,wait_mem} ucp_worker_flush[{_nb,_nbx}] ucp_worker_set_am_{handler,recv_handler} UCX Functions Not Traced: ucp_config_print ucp_conn_request_query ucp_context_{query,print_info} ucp_get_version[_string] ucp_ep_{close_nb,print_info,query,rkey_unpack} ucp_mem_print_info ucp_request_{check_status,free,query,release,test} ucp_stream_recv_request_test ucp_tag_probe_nb ucp_tag_recv_request_test ucp_worker_{address_query,print_info,progress,query,release_address} Additional API functions from other UCX layers may be added in a future version of the product.", "keywords": []}, {"id": 168, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#unified-memory-cpu-page-faults", "display_name": "Unified Memory CPU Page Faults", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "unified-memory-cpu-page-faults", "priority": -1, "content": "The Unified Memory CPU page faults feature in Nsight Systems tracks the page faults that occur when CPU code tries to access a memory page that resides on the device. Collecting Unified Memory CPU page faults can cause overhead of up to 70% in testing. Please use this functionality only when needed.", "keywords": []}, {"id": 169, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#unified-memory-gpu-page-faults", "display_name": "Unified Memory GPU Page Faults", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "unified-memory-gpu-page-faults", "priority": -1, "content": "The Unified Memory GPU page faults feature in Nsight Systems tracks the page faults that occur when GPU code tries to access a memory page that resides on the host. Collecting Unified Memory GPU page faults can cause overhead of up to 70% in testing. Please use this functionality only when needed.", "keywords": []}, {"id": 170, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#unified-memory-transfer-trace", "display_name": "Unified Memory Transfer Trace", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "unified-memory-transfer-trace", "priority": -1, "content": "For Nsight Systems Workstation Edition , Unified Memory (also called Managed Memory) transfer trace is enabled automatically in Nsight Systems when CUDA trace is selected. It incurs no overhead in programs that do not perform any Unified Memory transfers. Data is displayed in the Managed Memory area of the timeline: HtoD transfer indicates the CUDA kernel accessed managed memory that was residing on the host, so the kernel execution paused and transferred the data to the device. Heavy traffic here will incur performance penalties in CUDA kernels, so consider using manual cudaMemcpy operations from pinned host memory instead. PtoP transfer indicates the CUDA kernel accessed managed memory that was residing on a different device, so the kernel execution paused and transferred the data to this device. Heavy traffic here will incur performance penalties, so consider using manual cudaMemcpyPeer operations to transfer from other devices\u2019 memory instead. The row showing these events is for the destination device - the source device is shown in the tooltip for each transfer event. DtoH transfer indicates the CPU accessed managed memory that was residing on a CUDA device, so the CPU execution paused and transferred the data to system memory. Heavy traffic here will incur performance penalties in CPU code, so consider using manual cudaMemcpy operations from pinned host memory instead. Some Unified Memory transfers are highlighted with red to indicate potential performance issues: Transfers with the following migration causes are highlighted: Coherence Unified Memory migration occurred to guarantee data coherence. SMs (streaming multiprocessors) stop until the migration completes. Eviction Unified Memory migrated to the CPU because it was evicted to make room for another block of memory on the GPU. This happens due to memory overcommitment which is available on Linux with Compute Capability \u2265 6.", "keywords": []}, {"id": 171, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#using-expert-system-from-the-cli", "display_name": "Using Expert System from the CLI", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "using-expert-system-from-the-cli", "priority": -1, "content": "usage: nsys [global-options] analyze [options] [nsys-rep-or-sqlite-file] If a .nsys-rep file is given as the input file and there is no .sqlite file with the same name in the same directory, it will be generated. Note: The Expert System view in the GUI will give you the equivalent command line.", "keywords": []}, {"id": 172, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#using-expert-system-from-the-gui", "display_name": "Using Expert System from the GUI", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "using-expert-system-from-the-gui", "priority": -1, "content": "The Expert System View can be found in the same drop-down as the Events View. If there is no .sqlite file with the same name as the .nsys-rep file in the same directory, it will be generated. The Expert System View has the following components: Drop-down to select the rule to be run Rule description and advice summary CLI command that will give the same result Table containing results of running the rule Settings button that allows users to specify the rule\u2019s arguments A context menu is available to correlate the table entry with the timeline. The options are the same as the Events View: Zoom to Selected on Timeline (ctrl+double-click) The highlighting is not supported for rules that do not return an event but rather an arbitrary time range (e.g. GPU utilization rules). The CLI and GUI share the same rule scripts and messages. There might be some formatting differences between the output table in GUI and CLI.", "keywords": []}, {"id": 173, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#using-the-cli-to-profile-applications-launched-with-mpirun", "display_name": "Using the CLI to Profile Applications Launched with mpirun", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "using-the-cli-to-profile-applications-launched-with-mpirun", "priority": -1, "content": "The Nsight Systems CLI supports concurrent use of the nsys profile command. Each instance will create a separate report file. You cannot use multiple instances of the interactive CLI concurrently, or use the interactive CLI concurrently with nsys profile in this version. Nsight Systems can be used to profile applications launched with mpirun or mpiexec . Since concurrent use of the CLI is supported only when using the nsys profile command, Nsight Systems cannot profile each node from the GUI or from the interactive CLI. Profile all MPI ranks on a single node: nsys can be prefixed before mpirun/mpiexec . Only a single report file will be created. nsys [nsys options] mpirun [mpirun options] Profile multi-node runs: nsys profile has to be prefixed before the program to be profiled. One report file will be created for each MPI rank. This works also for single-node runs. mpirun [mpirun options] nsys profile [nsys options] You can use %q{OMPI_COMM_WORLD_RANK} (Open MPI), %q{PMI_RANK} (MPICH) or %q{SLURM_PROCID} (Slurm) with the -o option to appropriately name the report files. Profile a single MPI process or a subset of MPI processes: Use a wrapper script similar to the following script (called \u201cprofile_rank0.sh\u201d). #!/bin/bash # Use $PMI_RANK for MPICH and $SLURM_PROCID with srun. if [ $OMPI_COMM_WORLD_RANK -eq 0 ]; then nsys profile -e NSYS_MPI_STORE_TEAMS_PER_RANK=1 -t mpi "$@" else "$@" fi The script runs nsys on rank 0 only. Add appropriate profiling options to the script and execute it with mpirun [mpirun options] ./profile_rank0.sh ./myapp [app options] . If only a subset of MPI ranks is profiled, set the environment variable NSYS_MPI_STORE_TEAMS_PER_RANK=1 to store all members of custom MPI communicators per MPI rank. Otherwise, the execution might hang or fail with an MPI error. Avoid redundant GPU and NIC metrics collection: If multiple instances of nsys profile are executed concurrently on the same node and GPU and/or NIC metrics collection is enabled, each process will collect metrics for all available NICs and tries to collect GPU metrics for the specified devices. This can be avoided with a simple bash script similar to the following: #!/bin/bash # Use $SLURM_LOCALID with srun. if [ $OMPI_COMM_WORLD_LOCAL_RANK -eq 0 ]; then nsys profile --nic-metrics=true --gpu-metrics-device=all "$@" else nsys profile "$@" fi This above script will collect NIC and GPU metrics only for one rank, the node-local rank 0. Alternatively, if one rank per GPU is used, the GPU metrics devices can be specified based on the node-local rank in a wrapper script as follows: #!/bin/bash # Use $SLURM_LOCALID with srun. nsys profile -e CUDA_VISIBLE_DEVICES=${OMPI_COMM_WORLD_LOCAL_RANK} \\ --gpu-metrics-device=${OMPI_COMM_WORLD_LOCAL_RANK} "$@"", "keywords": []}, {"id": 174, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#using-the-product-name-cli-nvprof-command", "display_name": "Using the Nsight Systems CLI nvprof Command", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "using-the-product-name-cli-nvprof-command", "priority": -1, "content": "The nvprof command of the Nsight Systems CLI is intended to help former nvprof users transition to nsys. Many nvprof switches are not supported by nsys, often because they are now part of NVIDIA Nsight Compute. The full nvprof documentation can be found at https://docs.nvidia.com/cuda/profiler-users-guide . The nvprof transition guide for Nsight Compute can be found at https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html#nvprof-guide . Any nvprof switch not listed below is not supported by the nsys nvprof command. No additional nsys functionality is available through this command. New features will not be added to this command in the future.", "keywords": []}, {"id": 175, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#verbose-cli-logging-on-linux-targets", "display_name": "Verbose CLI Logging on Linux Targets", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "verbose-cli-logging-on-linux-targets", "priority": -1, "content": "To enable verbose logging of the Nsight Systems CLI and the target application\u2019s injection behavior: In the target-linux-x64 directory, rename the nvlog.config.template file to nvlog.config. Inside that file, change the line $ }}{{{}nsys-ui.log to $ }}{{{}nsys-agent.log Run a collection and the target-linux.x64 directory should include a file named nsys-agent.log . Please note that in some cases, debug logging can significantly slow down the profiler.", "keywords": []}, {"id": 176, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#verbose-logging-on-windows-targets", "display_name": "Verbose Logging on Windows Targets", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "verbose-logging-on-windows-targets", "priority": -1, "content": "Verbose logging is available when connecting to a Windows-based device from the GUI on the host. Nsight Systems installs its executable and library files into the following directory by default: C:\\Program Files\\NVIDIA Corporation\\Nsight Systems 2023.3 To enable verbose logging on the target device, when launched from the host, follow these steps: Close the host application. Terminate the nsys process. Place nvlog.config from host directory next to Nsight Systems Windows agent on the target device Local Windows target: C:\\Program Files\\NVIDIA Corporation\\Nsight Systems 2023.3\\target-windows-x64 Remote Windows target: C:\\Users\\<user name>\\AppData\\Local\\Temp\\nvidia\\nsight_systems Start the host application and connect to the target device. Logs on the target devices are collected into this file (if enabled): nsight-sys.log in the same directory as Nsight Systems Windows agent. Please note that in some cases debug logging can significantly slow down the profiler.", "keywords": []}, {"id": 177, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#verbose-remote-logging-on-linux-targets", "display_name": "Verbose Remote Logging on Linux Targets", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "verbose-remote-logging-on-linux-targets", "priority": -1, "content": "Verbose logging is available when connecting to a Linux-based device from the GUI on the host. This extra debug information is not available when launching via the command line. Nsight Systems installs its executable and library files into the following directory: /opt/nvidia/nsight_systems/ To enable verbose logging on the target device, when launched from the host, follow these steps: Close the host application. Restart the target device. Place nvlog.config from host directory to the /opt/nvidia/nsight_systems directory on target. From SSH console, launch the following command: sudo /opt/nvidia/nsight_systems/nsys --daemon --debug Start the host application and connect to the target device. Logs on the target devices are collected into this file (if enabled): nsys.log in the directory where nsys command was launched. Please note that in some cases, debug logging can significantly slow down the profiler.", "keywords": []}, {"id": 178, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#vertical-synchronization", "display_name": "Vertical Synchronization", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "vertical-synchronization", "priority": -1, "content": "The VSYNC rows display when the monitor\u2019s vertical synchronizations occur.", "keywords": []}, {"id": 179, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#visual-studio-integration", "display_name": "Visual Studio Integration", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "visual-studio-integration", "priority": -1, "content": "NVIDIA Nsight Integration is a Visual Studio extension that allows you to access the power of Nsight Systems from within Visual Studio. When Nsight Systems is installed along with NVIDIA Nsight Integration, Nsight Systems activities will appear under the NVIDIA Nsight menu in the Visual Studio menu bar. These activities launch Nsight Systems with the current project settings and executable. Selecting the \u201cTrace\u201d command will launch Nsight Systems , create a new Nsight Systems project and apply settings from the current Visual Studio project: Target application path Command line parameters Working folder If the \u201cTrace\u201d command has already been used with this Visual Studio project then Nsight Systems will load the respective Nsight Systems project and any previously captured trace sessions will be available for review using the Nsight Systems project explorer tree. For more information about using Nsight Systems from within Visual Studio, please visit NVIDIA Nsight Integration Overview NVIDIA Nsight Integration User Guide", "keywords": []}, {"id": 180, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#vulkan-gpu-trace-notes", "display_name": "Vulkan GPU Trace Notes", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "vulkan-gpu-trace-notes", "priority": -1, "content": "Vulkan GPU trace is available only when tracing apps that use NVIDIA GPUs. The endings of Vulkan Command Buffers execution ranges on Compute and Transfer queues may appear earlier on the timeline than their actual occurrence.", "keywords": []}, {"id": 181, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#vulkan-overview", "display_name": "Vulkan Overview", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "vulkan-overview", "priority": -1, "content": "Vulkan is a low-overhead, cross-platform 3D graphics and compute API, targeting a wide variety of devices from PCs to mobile phones and embedded platforms. The Vulkan API is defined by the Khronos Group. Information about Vulkan and the Khronos Group can be found at the Khronos Vulkan Site . Nsight Systems can capture information about Vulkan usage by the profiled process. This includes capturing the execution time of Vulkan API functions, corresponding GPU workloads, debug util labels, and frame durations. Vulkan profiling is supported on both Windows and x86 Linux operating systems. The Command Buffer Creation row displays time periods when command buffers were being created. This enables developers to improve their application\u2019s multi-threaded command buffer creation. Command buffer creation time period is measured between the call to vkBeginCommandBuffer and the call to vkEndCommandBuffer . A Queue row is displayed for each Vulkan queue created by the profiled application. The API sub-row displays time periods where vkQueueSubmit was called. The GPU Workload sub-row displays time periods where workloads were executed by the GPU. In addition, you can see Vulkan debug util labels on both the CPU and the GPU. Clicking on a GPU workload highlights the corresponding vkQueueSubmit call, and vice versa. The Vulkan Memory Operations row contains an aggregation of all the Vulkan host-side memory operations, such as host-blocking writes and reads or non-persistent map-unmap ranges. The row is separated into sub-rows by heap index and memory type - the tooltip for each row and the ranges inside show the heap flags and the memory property flags.", "keywords": []}, {"id": 182, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#wddm-hw-scheduler", "display_name": "WDDM HW Scheduler", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "wddm-hw-scheduler", "priority": -1, "content": "When GPU Hardware Scheduling is enabled in Windows 10 or newer version, the Windows Display Driver Model (WDDM) uses the DxgKrnl ETW provider to expose report of NVIDIA GPUs\u2019 hardware scheduling context switches. Nsight Systems can capture these context switch events, and display under the GPUs in the timeline rows titled WDDM HW Scheduler - [HW Queue type]. The ranges under each queue will show the process name and PID assoicated with the GPU work during the time period. The events will be captured if GPU Hardware Scheduling is enabled in the Windows System Display settings, and \u201cCollect WDDM Trace\u201d is enabled in the Nsight Systems Project Settings.", "keywords": []}, {"id": 183, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "page", "name": "UserGuide/index#wddm-queues", "display_name": "WDDM Queues", "type": "section", "display_type": "Page section", "docname": "UserGuide/index", "anchor": "wddm-queues", "priority": -1, "content": "The Windows Display Driver Model (WDDM) architecture uses queues to send work packets from the CPU to the GPU. Each D3D device in each process is associated with one or more contexts. Graphics, compute, and copy commands that the profiled application uses are associated with a context, batched in a command buffer, and pushed into the relevant queue associated with that context. Nsight Systems can capture the state of these queues during the trace session. Enabling the \u201cCollect additional range of ETW events\u201d option will also capture extended DxgKrnl events from the Microsoft-Windows-DxgKrnl provider, such as context status, allocations, sync wait, signal events, etc. A command buffer in a WDDM queues may have one the following types: Render Deferred System MMIOFlip Wait Signal Device Software It may also be marked as a Present buffer, indicating that the application has finished rendering and requests to display the source surface. See the Microsoft documentation for the WDDM architecture and the DXGKETW_QUEUE_PACKET_TYPE enumeration. To retain the .etl trace files captured, so that they can be viewed in other tools (e.g. GPUView), change the \u201cSave ETW log files in project folder\u201d option under \u201cProfile Behavior\u201d in Nsight Systems \u2019s global Options dialog. The .etl files will appear in the same folder as the .nsys-rep file, accessible by right-clicking the report in the Project Explorer and choosing \u201cShow in Folder\u2026\u201d. Data collected from each ETW provider will appear in its own .etl file, and an additional .etl file named \u201cReport XX-Merged-*.etl\u201d, containing the events from all captured sources, will be created as well.", "keywords": []}, {"id": 184, "doc_id": 184, "filename": "UserGuide/index.html", "domain_name": "std", "name": "UserGuide/index", "display_name": "User Guide", "type": "doc", "display_type": "Page", "docname": "UserGuide/index", "anchor": "", "priority": -1, "content": "Nsight Systems nsys Nsight Systems Workstation Edition Nsight Systems Embedded Platforms Edition 2023-11-11 11 November 2023 2023.4.1 2023.4 2023 4 2023.4 NVIDIA Nsight Systems user guide", "keywords": []}, {"id": 185, "doc_id": 185, "filename": "defs.html", "domain_name": "std", "name": "defs", "display_name": "<no title>", "type": "doc", "display_type": "Page", "docname": "defs", "anchor": "", "priority": -1, "content": "Nsight Systems nsys Nsight Systems Workstation Edition Nsight Systems Embedded Platforms Edition 2023-11-11 11 November 2023 2023.4.1 2023.4 2023 4 2023.4", "keywords": []}, {"id": 186, "doc_id": 187, "filename": "index.html", "domain_name": "page", "name": "index#developer-interfaces", "display_name": "Developer Interfaces", "type": "section", "display_type": "Page section", "docname": "index", "anchor": "developer-interfaces", "priority": -1, "content": "Archives Documentation for previous versions of the NVIDIA Nsight Systems.", "keywords": []}, {"id": 187, "doc_id": 187, "filename": "index.html", "domain_name": "std", "name": "index", "display_name": "Nsight Systems", "type": "doc", "display_type": "Page", "docname": "index", "anchor": "", "priority": -1, "content": "Nsight Systems nsys Nsight Systems Workstation Edition Nsight Systems Embedded Platforms Edition 2023-11-11 11 November 2023 2023.4.1 2023.4 2023 4 2023.4 Release Notes Release notes and known issues. Installation Guide NVIDIA Nsight Systems installation guide. User Guide NVIDIA Nsight Systems user guide. Copyright and Licenses Information on the NVIDIA Software License Agreement as well as third party software and tools used by Nsight Systems.", "keywords": []}]};
|