#!/usr/bin/env python3
#
# Copyright (C) Advanced Micro Devices. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import json
import multiprocessing
import os
import sys
import threading
import unittest


amdsmi_path = os.environ.get("AMDSMI_PATH", "/opt/rocm/share/amd_smi")
if not os.path.exists(amdsmi_path):
    raise FileNotFoundError(f"AMDSMI_PATH '{amdsmi_path}' does not exist. Please set the correct path in your environment.")
sys.path.append(amdsmi_path)

try:
    import amdsmi
except ImportError as exc:
    raise ImportError(f'Could not import {amdsmi_path}') from exc

class TestAmdSmiInit(unittest.TestCase):
    def test_init(self):
        amdsmi.amdsmi_init()
        amdsmi.amdsmi_shut_down()

class TestAmdSmiPythonInterface(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.verbose = verbose
        self.max_num_physical_devices = amdsmi.amdsmi_interface.AMDSMI_MAX_NUM_XCP * amdsmi.amdsmi_interface.AMDSMI_MAX_DEVICES
        global has_info_printed
        if self.verbose and has_info_printed is False:
            # Execute the following to print the asic and board info once per test run
            has_info_printed = True
            self.setUp()
            processors = amdsmi.amdsmi_get_processor_handles()
            self.assertGreaterEqual(len(processors), 1)
            self.assertLessEqual(len(processors), self.max_num_physical_devices)
            for i in range(0, len(processors)):
                try:
                    # Print asic info
                    msg = f'asic info(gpu={i})'
                    ret = amdsmi.amdsmi_get_gpu_asic_info(processors[i])
                    print(msg)
                    print(json.dumps(ret, sort_keys=False, indent=4), flush=True)
                except amdsmi.AmdSmiLibraryException as e:
                    raise e
            for i in range(0, len(processors)):
                try:
                    # Print board info
                    msg = f'board info(gpu={i})'
                    ret = amdsmi.amdsmi_get_gpu_board_info(processors[i])
                    print(msg)
                    print(json.dumps(ret, sort_keys=False, indent=4), flush=True)
                except amdsmi.AmdSmiLibraryException as e:
                    raise e
            self.tearDown()
        return

    def _check_exception(self, e):
        error_code = e.get_error_code()
        if error_code == amdsmi.amdsmi_wrapper.AMDSMI_STATUS_NOT_SUPPORTED:
            print("  Not Supported, skipping...")
            return
        else:
            raise e

    def setUp(self):
        amdsmi.amdsmi_init()

    def tearDown(self):
        amdsmi.amdsmi_shut_down()

    def _print_vbios_info(self, vbios_info):
        print(f"  vbios_info['part_number'] is: {vbios_info['part_number']}")
        print(f"  vbios_info['build_date'] is: {vbios_info['build_date']}")
        print(f"  vbios_info['name'] is: {vbios_info['name']}")
        print(f"  vbios_info['version'] is: {vbios_info['version']}")
        if 'boot_firmware' in vbios_info:
            print(f"  vbios_info['boot_firmware'] is: {vbios_info['boot_firmware']}")
        else:
            print("  vbios_info['boot_firmware'] is: N/A")
        return

    def test_asic_kfd_info(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_asic_info \n")
                asic_info = amdsmi.amdsmi_get_gpu_asic_info(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  asic_info['market_name'] is: {}".format(
                asic_info['market_name']))
            print("  asic_info['vendor_id'] is: {}".format(
                asic_info['vendor_id']))
            print("  asic_info['vendor_name'] is: {}".format(
                asic_info['vendor_name']))
            print("  asic_info['device_id'] is: {}".format(
                asic_info['device_id']))
            print("  asic_info['rev_id'] is: {}".format(
                asic_info['rev_id']))
            print("  asic_info['subsystem_id'] is: {}".format(
                asic_info['subsystem_id']))
            print("  asic_info['asic_serial'] is: {}".format(
                asic_info['asic_serial']))
            print("  asic_info['oam_id'] is: {}".format(
                asic_info['oam_id']))
            print("  asic_info['target_graphics_version'] is: {}".format(
                asic_info['target_graphics_version']))
            print("  asic_info['num_compute_units'] is: {}".format(
                asic_info['num_compute_units']))
            try:
                print("\n###Test amdsmi_get_gpu_kfd_info \n")
                kfd_info = amdsmi.amdsmi_get_gpu_kfd_info(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  kfd_info['kfd_id'] is: {}".format(
                kfd_info['kfd_id']))
            print("  kfd_info['node_id'] is: {}".format(
                kfd_info['node_id']))
            print("  kfd_info['current_partition_id'] is: {}\n".format(
                kfd_info['current_partition_id']))
        print("\n")
    # amdsmi_get_vram_info should be supported on all ASICs
    def test_get_vram_info(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))

            vram_types = {
                amdsmi.AmdSmiVramType.UNKNOWN: "UNKNOWN",
                amdsmi.AmdSmiVramType.HBM:     "HBM",
                amdsmi.AmdSmiVramType.HBM2:    "HBM2",
                amdsmi.AmdSmiVramType.HBM2E:   "HBM2E",
                amdsmi.AmdSmiVramType.HBM3:    "HBM3",
                amdsmi.AmdSmiVramType.DDR2:    "DDR2",
                amdsmi.AmdSmiVramType.DDR3:    "DDR3",
                amdsmi.AmdSmiVramType.DDR4:    "DDR4",
                amdsmi.AmdSmiVramType.GDDR1:   "GDDR1",
                amdsmi.AmdSmiVramType.GDDR2:   "GDDR2",
                amdsmi.AmdSmiVramType.GDDR3:   "GDDR3",
                amdsmi.AmdSmiVramType.GDDR4:   "GDDR4",
                amdsmi.AmdSmiVramType.GDDR5:   "GDDR5",
                amdsmi.AmdSmiVramType.GDDR6:   "GDDR6",
                amdsmi.AmdSmiVramType.GDDR7:   "GDDR7",
                amdsmi.AmdSmiVramType.MAX:     "MAX"
            }

            try:
                print("\n###Test amdsmi_get_gpu_vram_info \n")
                vram_info = amdsmi.amdsmi_get_gpu_vram_info(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  vram_info['vram_type'] is: {}".format(
                vram_types[vram_info['vram_type']]))
            print("  vram_info['vram_vendor'] is: {}".format(
                vram_info['vram_vendor']))
            print("  vram_info['vram_size'] is: {} MB".format(
                vram_info['vram_size']))
            print("  vram_info['vram_bit_width'] is: {}".format(
                vram_info['vram_bit_width']))
            print("  vram_info['vram_max_bandwidth'] is: {} GB/s".format(
                vram_info['vram_max_bandwidth']))
    
    # amdsmi_get_gpu_xcd_counter should be supported on all ASICs
    def test_get_xcd_counter(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_xcd_counter \n")
                xcd_count = amdsmi.amdsmi_get_gpu_xcd_counter(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  xcd_counter['counter'] is: {}".format(
                xcd_count))

    # amdsmi_get_gpu_bad_page_info is not supported in Navi2x, Navi3x
    def test_bad_page_info(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            try:
                print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
                processor = amdsmi.amdsmi_get_processor_handle_from_bdf(bdf)
                print("\n###Test amdsmi_get_gpu_bad_page_info \n")
                bad_page_info = amdsmi.amdsmi_get_gpu_bad_page_info(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("bad_page_info: " + str(bad_page_info))
            print("Number of bad pages: {}".format(len(bad_page_info)))
            j = 0
            for table_record in bad_page_info:
                print("\ntable_record[\"value\"]" + str(table_record["value"]))
                print("Page: {}".format(j))
                print("Page Address: " + str(table_record["page_address"]))
                print("Page Size: " + str(table_record["page_size"]))
                print("Status: " + str(table_record["status"]))
                print("\n")
                j += 1
        print("\n")

    def test_gpu_cache_info(self):
        print("\n\n###Test amdsmi_interface.amdsmi_get_gpu_cache_info")
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            print("\n\n###Test Processor {}, bdf: {}".format(i, amdsmi.amdsmi_get_gpu_device_bdf(processors[i])))
            try:
                print("\n###Test amdsmi_interface.amdsmi_get_gpu_cache_info \n")
                cache_info = amdsmi.amdsmi_interface.amdsmi_get_gpu_cache_info(processors[i])
            except Exception as e:
                print(f"  Exception in amdsmi_get_gpu_cache_info: {e}")
                self.fail(f"Test failed due to exception: {e}")

            if isinstance(cache_info, dict):
                for key, value in cache_info.items():
                    print(f"{key}: {value}")
                for cache_entry in cache_info.get('cache', []):
                    self.assertIn('cache_size', cache_entry)
                    self.assertIn('cache_level', cache_entry)
                    self.assertIn('num_cache_instance', cache_entry)
                    self.assertIn('max_num_cu_shared', cache_entry)
            else:
                self.assertIsInstance(cache_info, dict)

    def test_get_gpu_compute_partition(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreater(len(processors), 0)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            try:
                result = amdsmi.amdsmi_get_gpu_compute_partition(processors[i])
                self.assertIsInstance(result, str)
                self.assertTrue(len(result) > 0)
                print(f"\nCompute partition for handle {bdf}: {result}")
            except Exception as e:
                print(f"\nCompute partition not supported for handle {bdf}: {e}")
                continue
        print("\n")

    def test_bdf_device_id(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_processor_handle_from_bdf \n")
                processor = amdsmi.amdsmi_get_processor_handle_from_bdf(bdf)
                print("\n###Test amdsmi_get_gpu_vbios_info \n")
                vbios_info = amdsmi.amdsmi_get_gpu_vbios_info(processor)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            self._print_vbios_info(vbios_info)
            try:
                print("\n###Test amdsmi_get_gpu_device_uuid \n")
                uuid = amdsmi.amdsmi_get_gpu_device_uuid(processor)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  uuid is: {}".format(uuid))
        print("\n")

    def test_board_info(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_board_info \n")
                board_info = amdsmi.amdsmi_get_gpu_board_info(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  board_info['model_number'] is: {}".format(
                board_info['model_number']))
            print("  board_info['product_serial'] is: {}".format(
                board_info['product_serial']))
            print("  board_info['fru_id'] is: {}".format(
                board_info['fru_id']))
            print("  board_info['manufacturer_name'] is: {}".format(
                board_info['manufacturer_name']))
            print("  board_info['product_name'] is: {}".format(
                board_info['product_name']))
        print("\n")

    def test_clock_frequency(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_clk_freq \n")
                clock_frequency = amdsmi.amdsmi_get_clk_freq(processors[i], amdsmi.AmdSmiClkType.SYS)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  SYS clock_frequency['num_supported']: {}".format(
                clock_frequency['num_supported']))
            print("  SYS clock_frequency['current']: {}".format(
                clock_frequency['current']))
            print("  SYS clock_frequency['frequency']: {}".format(
                clock_frequency['frequency']))
            try:
                clock_frequency = amdsmi.amdsmi_get_clk_freq(processors[i], amdsmi.AmdSmiClkType.DF)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  DF clock_frequency['num_supported']: {}".format(
                clock_frequency['num_supported']))
            print("  DF clock_frequency['current']: {}".format(
                clock_frequency['current']))
            print("  DF clock_frequency['frequency']: {}".format(
                clock_frequency['frequency']))
        print("\n")
        

    # amdsmi_get_clk_freq with AmdSmiClkType.DCEF is not supported in MI210, MI300A
    def test_clock_frequency_DCEF(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_clk_freq \n")
                clock_frequency = amdsmi.amdsmi_get_clk_freq(processors[i], amdsmi.AmdSmiClkType.DCEF)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  DCEF clock_frequency['num_supported']: {}".format(
                clock_frequency['num_supported']))
            print("  DCEF clock_frequency['current']: {}".format(
                clock_frequency['current']))
            print("  DCEF clock_frequency['frequency']: {}".format(
                clock_frequency['frequency']))
        print("\n")
        

    def test_clock_info(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_clock_info \n")
                clock_measure = amdsmi.amdsmi_get_clock_info(processors[i], amdsmi.AmdSmiClkType.GFX)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Current clock for domain GFX is: {}".format(
                clock_measure['clk']))
            print("  Max clock for domain GFX is: {}".format(
                clock_measure['max_clk']))
            print("  Min clock for domain GFX is: {}".format(
                clock_measure['min_clk']))
            print("  Is GFX clock locked: {}".format(
                clock_measure['clk_locked']))
            print("  Is GFX clock in deep sleep: {}".format(
                clock_measure['clk_deep_sleep']))
            try:
                clock_measure = amdsmi.amdsmi_get_clock_info(processors[i], amdsmi.AmdSmiClkType.MEM)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Current clock for domain MEM is: {}".format(
                clock_measure['clk']))
            print("  Max clock for domain MEM is: {}".format(
                clock_measure['max_clk']))
            print("  Min clock for domain MEM is: {}".format(
                clock_measure['min_clk']))
            print("  Is MEM clock in deep sleep: {}".format(
                clock_measure['clk_deep_sleep']))
        print("\n")
        

    # AmdSmiClkType.VCLK0 and DCLK0 are not supported in MI210
    def test_clock_info_vclk0_dclk0(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_clock_info \n")
                clock_measure = amdsmi.amdsmi_get_clock_info(processors[i], amdsmi.AmdSmiClkType.VCLK0)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Current clock for domain VCLK0 is: {}".format(
                clock_measure['clk']))
            print("  Max clock for domain VCLK0 is: {}".format(
                clock_measure['max_clk']))
            print("  Min clock for domain VCLK0 is: {}".format(
                clock_measure['min_clk']))
            print("  Is VCLK0 clock in deep sleep: {}".format(
                clock_measure['clk_deep_sleep']))
            try:
                clock_measure = amdsmi.amdsmi_get_clock_info(processors[i], amdsmi.AmdSmiClkType.DCLK0)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Current clock for domain DCLK0 is: {}".format(
                clock_measure['clk']))
            print("  Max clock for domain DCLK0 is: {}".format(
                clock_measure['max_clk']))
            print("  Min clock for domain DCLK0 is: {}".format(
                clock_measure['min_clk']))
            print("  Is DCLK0 clock in deep sleep: {}".format(
                clock_measure['clk_deep_sleep']))
        print("\n")
        

    # AmdSmiClkType.VCLK1 and DCLK1 are not supported in MI210, MI300A, MI300X
    def test_clock_info_vclk1_dclk1(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_clock_info \n")
                clock_measure = amdsmi.amdsmi_get_clock_info(processors[i], amdsmi.AmdSmiClkType.VCLK1)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Current clock for domain VCLK1 is: {}".format(
                clock_measure['clk']))
            print("  Max clock for domain VCLK1 is: {}".format(
                clock_measure['max_clk']))
            print("  Min clock for domain VCLK1 is: {}".format(
                clock_measure['min_clk']))
            print("  Is VCLK1 clock in deep sleep: {}".format(
                clock_measure['clk_deep_sleep']))
            try:
                clock_measure = amdsmi.amdsmi_get_clock_info(processors[i], amdsmi.AmdSmiClkType.DCLK1)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Current clock for domain DCLK1 is: {}".format(
                clock_measure['clk']))
            print("  Max clock for domain DCLK1 is: {}".format(
                clock_measure['max_clk']))
            print("  Min clock for domain DCLK1 is: {}".format(
                clock_measure['min_clk']))
            print("  Is DCLK1 clock in deep sleep: {}".format(
                clock_measure['clk_deep_sleep']))
        print("\n")
        

    def test_driver_info(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_driver_info \n")
                driver_info = amdsmi.amdsmi_get_gpu_driver_info(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("Driver info:  {}".format(driver_info))
        print("\n")
        

    # amdsmi_get_gpu_ecc_count is not supported in Navi2x, Navi3x, MI210, MI300A
    def test_ecc_count_block(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        gpu_blocks = {
            "INVALID": amdsmi.AmdSmiGpuBlock.INVALID,
            "UMC": amdsmi.AmdSmiGpuBlock.UMC,
            "SDMA": amdsmi.AmdSmiGpuBlock.SDMA,
            "GFX": amdsmi.AmdSmiGpuBlock.GFX,
            "MMHUB": amdsmi.AmdSmiGpuBlock.MMHUB,
            "ATHUB": amdsmi.AmdSmiGpuBlock.ATHUB,
            "PCIE_BIF": amdsmi.AmdSmiGpuBlock.PCIE_BIF,
            "HDP": amdsmi.AmdSmiGpuBlock.HDP,
            "XGMI_WAFL": amdsmi.AmdSmiGpuBlock.XGMI_WAFL,
            "DF": amdsmi.AmdSmiGpuBlock.DF,
            "SMN": amdsmi.AmdSmiGpuBlock.SMN,
            "SEM": amdsmi.AmdSmiGpuBlock.SEM,
            "MP0": amdsmi.AmdSmiGpuBlock.MP0,
            "MP1": amdsmi.AmdSmiGpuBlock.MP1,
            "FUSE": amdsmi.AmdSmiGpuBlock.FUSE,
            "MCA": amdsmi.AmdSmiGpuBlock.MCA,
            "VCN": amdsmi.AmdSmiGpuBlock.VCN,
            "JPEG": amdsmi.AmdSmiGpuBlock.JPEG,
            "IH": amdsmi.AmdSmiGpuBlock.IH,
            "MPIO": amdsmi.AmdSmiGpuBlock.MPIO,
            "RESERVED": amdsmi.AmdSmiGpuBlock.RESERVED
        }
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            print("\n###Test amdsmi_get_gpu_ecc_count \n")
            for block_name, block_code in gpu_blocks.items():
                try:
                    ecc_count = amdsmi.amdsmi_get_gpu_ecc_count(processors[i], block_code)
                except amdsmi.AmdSmiLibraryException as e:
                    self._check_exception(e)
                    continue
                print("  Number of uncorrectable errors for {}: {}".format(
                    block_name, ecc_count['uncorrectable_count']))
                print("  Number of correctable errors for {}: {}".format(
                    block_name, ecc_count['correctable_count']))
                print("  Number of deferred errors for {}: {}".format(
                    block_name, ecc_count['deferred_count']))
                self.assertGreaterEqual(ecc_count['uncorrectable_count'], 0)
                self.assertGreaterEqual(ecc_count['correctable_count'], 0)
                self.assertGreaterEqual(ecc_count['deferred_count'], 0)
            print("\n")
        print("\n")
        

    def test_ecc_count_total(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_total_ecc_count \n")
                ecc_info = amdsmi.amdsmi_get_gpu_total_ecc_count(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("Number of uncorrectable errors: {}".format(
                ecc_info['uncorrectable_count']))
            print("Number of correctable errors: {}".format(
                ecc_info['correctable_count']))
            print("Number of deferred errors: {}".format(
                ecc_info['deferred_count']))
            self.assertGreaterEqual(ecc_info['uncorrectable_count'], 0)
            self.assertGreaterEqual(ecc_info['correctable_count'], 0)
            self.assertGreaterEqual(ecc_info['deferred_count'], 0)
        print("\n")
        

    def test_fw_info(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_fw_info \n")
                fw_info = amdsmi.amdsmi_get_fw_info(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            num_fw_blocks = len(fw_info['fw_list'])
            self.assertLessEqual(num_fw_blocks, len(amdsmi.AmdSmiFwBlock))
            for fw in fw_info['fw_list']:
                # Skip firmware blocks with version 0 as they are not valid or not present
                if fw['fw_version'] != 0:
                    print("  FW name:           {}".format(
                        str(fw['fw_name'])))
                    print("  FW version:        {}".format(
                        fw['fw_version']))
        print("\n")


    def test_gpu_activity(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_activity \n")
                engine_usage = amdsmi.amdsmi_get_gpu_activity(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  engine_usage['gfx_activity'] is: {} %".format(
                engine_usage['gfx_activity']))
            print("  engine_usage['umc_activity'] is: {} %".format(
                engine_usage['umc_activity']))
            print("  engine_usage['mm_activity'] is: {} %".format(
                engine_usage['mm_activity']))
        print("\n")
        

    def test_memory_usage(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_memory_usage \n")
                memory_usage = amdsmi.amdsmi_get_gpu_memory_usage(processors[i], amdsmi.AmdSmiMemoryType.VRAM)
                print("  memory_usage for VRAM is: {}".format(memory_usage))
                memory_usage = amdsmi.amdsmi_get_gpu_memory_usage(processors[i], amdsmi.AmdSmiMemoryType.VIS_VRAM)
                print("  memory_usage for VIS_VRAM is: {}".format(memory_usage))
                memory_usage = amdsmi.amdsmi_get_gpu_memory_usage(processors[i], amdsmi.AmdSmiMemoryType.GTT)
                print("  memory_usage for GTT is: {}".format(memory_usage))
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
        print("\n")
        

    def test_pcie_info(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_pcie_info \n")
                pcie_info = amdsmi.amdsmi_get_pcie_info(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  pcie_info['pcie_metric']['pcie_width'] is: {}".format(
                pcie_info['pcie_metric']['pcie_width']))
            print("  pcie_info['pcie_static']['max_pcie_width'] is: {} ".format(
                pcie_info['pcie_static']['max_pcie_width']))
            print("  pcie_info['pcie_metric']['pcie_speed'] is: {} MT/s".format(
                pcie_info['pcie_metric']['pcie_speed']))
            print("  pcie_info['pcie_static']['max_pcie_speed'] is: {} ".format(
                pcie_info['pcie_static']['max_pcie_speed']))
            print("  pcie_info['pcie_static']['pcie_interface_version'] is: {}".format(
                pcie_info['pcie_static']['pcie_interface_version']))
            print("  pcie_info['pcie_static']['slot_type'] is: {}".format(
                pcie_info['pcie_static']['slot_type']))
            print("  pcie_info['pcie_metric']['pcie_replay_count'] is: {}".format(
                pcie_info['pcie_metric']['pcie_replay_count']))
            print("  pcie_info['pcie_metric']['pcie_bandwidth'] is: {}".format(
                pcie_info['pcie_metric']['pcie_bandwidth']))
            print("  pcie_info['pcie_metric']['pcie_l0_to_recovery_count'] is: {}".format(
                pcie_info['pcie_metric']['pcie_l0_to_recovery_count']))
            print("  pcie_info['pcie_metric']['pcie_replay_roll_over_count'] is: {}".format(
                pcie_info['pcie_metric']['pcie_replay_roll_over_count']))
            print("  pcie_info['pcie_metric']['pcie_nak_sent_count'] is: {}".format(
                pcie_info['pcie_metric']['pcie_nak_sent_count']))
            print("  pcie_info['pcie_metric']['pcie_nak_received_count'] is: {}".format(
                pcie_info['pcie_metric']['pcie_nak_received_count']))
            print("  pcie_info['pcie_metric']['pcie_lc_perf_other_end_recovery_count'] is: {}".format(
                pcie_info['pcie_metric']['pcie_lc_perf_other_end_recovery_count']))
        print("\n")
        

    def test_power_info(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_power_info \n")
                power_info = amdsmi.amdsmi_get_power_info(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  power_info['current_socket_power'] is: {}".format(
                power_info['current_socket_power']))
            print("  power_info['average_socket_power'] is: {}".format(
                power_info['average_socket_power']))
            print("  power_info['gfx_voltage'] is: {}".format(
                power_info['gfx_voltage']))
            print("  power_info['soc_voltage'] is: {}".format(
                power_info['soc_voltage']))
            print("  power_info['mem_voltage'] is: {}".format(
                power_info['mem_voltage']))
            print("  power_info['power_limit'] is: {}".format(
                power_info['power_limit']))
            try:
                print("\n###Test amdsmi_get_power_cap_info \n")
                power_cap_info = amdsmi.amdsmi_get_power_cap_info(processors[i], 0)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  power_info['dpm_cap'] is: {}".format(
                power_cap_info['dpm_cap']))
            print("  power_info['power_cap'] is: {}".format(
                power_cap_info['power_cap']))
            try:
                print("\n###Test amdsmi_is_gpu_power_management_enabled \n")
                is_power_management_enabled = amdsmi.amdsmi_is_gpu_power_management_enabled(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Power management enabled: {}".format(
                is_power_management_enabled))
        print("\n")
        

    def test_process_list(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_process_list \n")
                process_list = amdsmi.amdsmi_get_gpu_process_list(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Process list: {}".format(process_list))
        print("\n")

    def test_processor_type(self):
        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_processor_type \n")
                processor_type = amdsmi.amdsmi_get_processor_type(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            if isinstance(processor_type, dict) and 'processor_type' in processor_type:
                print("  Processor type is: {}".format(processor_type['processor_type']))
            else:
                print("  Processor type (non-dict): {}".format(processor_type))
                self.assertIsInstance(processor_type, (str, int), "Unexpected processor_type type")
        print("\n")

    # amdsmi_get_gpu_ras_block_features_enabled is not supported in Navi2x, Navi3x
    def test_ras_block_features_enabled(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_ras_block_features_enabled \n")
                ras_enabled = amdsmi.amdsmi_get_gpu_ras_block_features_enabled(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            for j in range(0, len(ras_enabled)):
                print("  RAS status for {} is: {}".format(ras_enabled[j]['block'], ras_enabled[j]['status']))
        print("\n")
        

    # amdsmi_get_gpu_ras_feature_info is not supported in Navi2x, Navi3x
    def test_ras_feature_info(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_ras_feature_info \n")
                ras_feature = amdsmi.amdsmi_get_gpu_ras_feature_info(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            if ras_feature != None:
                print("RAS eeprom version: {}".format(ras_feature['eeprom_version']))
                print("RAS parity schema: {}".format(ras_feature['parity_schema']))
                print("RAS single bit schema: {}".format(ras_feature['single_bit_schema']))
                print("RAS double bit schema: {}".format(ras_feature['double_bit_schema']))
                print("Poisoning supported: {}".format(ras_feature['poison_schema']))
        print("\n")
        

    def test_socket_info(self):

        try:
            print("\n\n###Test amdsmi_get_socket_handles")
            sockets = amdsmi.amdsmi_get_socket_handles()
        except amdsmi.AmdSmiLibraryException as e:
            self._check_exception(e)
            
        for i in range(0, len(sockets)):
            print("\n\n###Test Socket {}".format(i))
            try:
                print("\n###Test amdsmi_get_socket_info \n")
                socket_name = amdsmi.amdsmi_get_socket_info(sockets[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Socket: {}".format(socket_name))
        print("\n")
        

    def test_temperature_metric(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_temp_metric \n")
                temperature_measure = amdsmi.amdsmi_get_temp_metric(
                    processors[i], amdsmi.AmdSmiTemperatureType.HOTSPOT, amdsmi.AmdSmiTemperatureMetric.CURRENT)
                print("  Current temperature for HOTSPOT is: {}".format(
                    temperature_measure))
                temperature_measure = amdsmi.amdsmi_get_temp_metric(
                    processors[i], amdsmi.AmdSmiTemperatureType.VRAM, amdsmi.AmdSmiTemperatureMetric.CURRENT)
                print("  Current temperature for VRAM is: {}".format(
                    temperature_measure))
                print("\n###Test amdsmi_get_temp_metric \n")
                temperature_measure = amdsmi.amdsmi_get_temp_metric(
                    processors[i], amdsmi.AmdSmiTemperatureType.HOTSPOT, amdsmi.AmdSmiTemperatureMetric.CRITICAL)
                print("  Limit (critical) temperature for HOTSPOT is: {}".format(
                    temperature_measure))
                temperature_measure = amdsmi.amdsmi_get_temp_metric(
                    processors[i], amdsmi.AmdSmiTemperatureType.VRAM, amdsmi.AmdSmiTemperatureMetric.CRITICAL)
                print("  Limit (critical) temperature for VRAM is: {}".format(
                    temperature_measure))
                print("\n###Test amdsmi_get_temp_metric \n")
                temperature_measure = amdsmi.amdsmi_get_temp_metric(
                    processors[i], amdsmi.AmdSmiTemperatureType.HOTSPOT, amdsmi.AmdSmiTemperatureMetric.EMERGENCY)
                print("  Shutdown (emergency) temperature for HOTSPOT is: {}".format(
                    temperature_measure))
                temperature_measure = amdsmi.amdsmi_get_temp_metric(
                    processors[i], amdsmi.AmdSmiTemperatureType.VRAM, amdsmi.AmdSmiTemperatureMetric.EMERGENCY)
                print("  Shutdown (emergency) temperature for VRAM is: {}".format(
                    temperature_measure))
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
        print("\n")
        

    # AmdSmiTemperatureType.EDGE is not supported in MI300A, MI300X
    def test_temperature_metric_edge(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_temp_metric \n")
                temperature_measure = amdsmi.amdsmi_get_temp_metric(
                    processors[i], amdsmi.AmdSmiTemperatureType.EDGE, amdsmi.AmdSmiTemperatureMetric.CURRENT)
                print("  Current temperature for EDGE is: {}".format(
                    temperature_measure))
                temperature_measure = amdsmi.amdsmi_get_temp_metric(
                    processors[i], amdsmi.AmdSmiTemperatureType.EDGE, amdsmi.AmdSmiTemperatureMetric.CRITICAL)
                print("  Limit (critical) temperature for EDGE is: {}".format(
                    temperature_measure))
                temperature_measure = amdsmi.amdsmi_get_temp_metric(
                    processors[i], amdsmi.AmdSmiTemperatureType.EDGE, amdsmi.AmdSmiTemperatureMetric.EMERGENCY)
                print("  Shutdown (emergency) temperature for EDGE is: {}".format(
                    temperature_measure))
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
        print("\n")
        

    def test_temperature_metric_plx(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_temp_metric \n")
                temperature_measure = amdsmi.amdsmi_get_temp_metric(
                    processors[i], amdsmi.AmdSmiTemperatureType.PLX, amdsmi.AmdSmiTemperatureMetric.CURRENT)
                print("  Current temperature for PLX is: {}".format(
                    temperature_measure))
                temperature_measure = amdsmi.amdsmi_get_temp_metric(
                    processors[i], amdsmi.AmdSmiTemperatureType.PLX, amdsmi.AmdSmiTemperatureMetric.CRITICAL)
                print("  Limit (critical) temperature for PLX is: {}".format(
                    temperature_measure))
                temperature_measure = amdsmi.amdsmi_get_temp_metric(
                    processors[i], amdsmi.AmdSmiTemperatureType.PLX, amdsmi.AmdSmiTemperatureMetric.EMERGENCY)
                print("  Shutdown (emergency) temperature for PLX is: {}".format(
                    temperature_measure))
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
        print("\n")
        

    # AmdSmiTemperatureType.HBM_0, HBM_1, HBM_2, HBM_3 are not supported in Navi2x, Navi3x, MI210, MI300A
    def test_temperature_metric_hbm(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        temp_types = {
            "HBM_0": amdsmi.AmdSmiTemperatureType.HBM_0,
            "HBM_1": amdsmi.AmdSmiTemperatureType.HBM_1,
            "HBM_2": amdsmi.AmdSmiTemperatureType.HBM_2,
            "HBM_3": amdsmi.AmdSmiTemperatureType.HBM_3,
        }
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            print("\n###Test amdsmi_get_temp_metric \n")
            for temp_type_name, temp_type_code in temp_types.items():
                try:
                    temperature_measure = amdsmi.amdsmi_get_temp_metric(
                        processors[i], temp_type_code, amdsmi.AmdSmiTemperatureMetric.CURRENT)
                    print("  Current temperature for {} is: {}".format(
                        temp_type_name, temperature_measure))
                    temperature_measure = amdsmi.amdsmi_get_temp_metric(
                        processors[i], temp_type_code, amdsmi.AmdSmiTemperatureMetric.CRITICAL)
                    print("  Limit (critical) temperature for {} is: {}".format(
                        temp_type_name, temperature_measure))
                    temperature_measure = amdsmi.amdsmi_get_temp_metric(
                        processors[i], temp_type_code, amdsmi.AmdSmiTemperatureMetric.EMERGENCY)
                    print("  Shutdown (emergency) temperature for {} is: {}".format(
                        temp_type_name, temperature_measure))
                except amdsmi.AmdSmiLibraryException as e:
                    self._check_exception(e)
                    continue
        print("\n")
        

    def test_utilization_count(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            print("\n###Test amdsmi_get_utilization_count \n")
            utilization_counter_types = [
                amdsmi.AmdSmiUtilizationCounterType.COARSE_GRAIN_GFX_ACTIVITY,
                amdsmi.AmdSmiUtilizationCounterType.COARSE_GRAIN_MEM_ACTIVITY,
                amdsmi.AmdSmiUtilizationCounterType.COARSE_DECODER_ACTIVITY
            ]
            try:
                utilization_count = amdsmi.amdsmi_get_utilization_count(processors[i], utilization_counter_types)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Timestamp: {}".format(
                utilization_count[0]['timestamp']))
            print("  Utilization count for {} is: {}".format(
                utilization_count[1]['type'], utilization_count[1]['value']))
            print("  Utilization count for {} is: {}".format(
                utilization_count[2]['type'], utilization_count[2]['value']))
            print("  Utilization count for {} is: {}".format(
                utilization_count[3]['type'], utilization_count[3]['value']))
            self.assertLessEqual(len(processors), self.max_num_physical_devices)
            print("\n")
            utilization_counter_types = [
                amdsmi.AmdSmiUtilizationCounterType.FINE_GRAIN_GFX_ACTIVITY,
                amdsmi.AmdSmiUtilizationCounterType.FINE_GRAIN_MEM_ACTIVITY,
                amdsmi.AmdSmiUtilizationCounterType.FINE_DECODER_ACTIVITY
            ]
            try:
                utilization_count = amdsmi.amdsmi_get_utilization_count(processors[i], utilization_counter_types)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Timestamp: {}".format(
                utilization_count[0]['timestamp']))
            print("  Utilization count for {} is: {}".format(
                utilization_count[1]['type'], utilization_count[1]['value']))
            print("  Utilization count for {} is: {}".format(
                utilization_count[2]['type'], utilization_count[2]['value']))
            print("  Utilization count for {} is: {}".format(
                utilization_count[3]['type'], utilization_count[3]['value']))
        print("\n")

    def test_vbios_info(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_vbios_info \n")
                vbios_info = amdsmi.amdsmi_get_gpu_vbios_info(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            self._print_vbios_info(vbios_info)
        print("\n")

    def test_vendor_name(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_vendor_name \n")
                vendor_name = amdsmi.amdsmi_get_gpu_vendor_name(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Vendor name is: {}".format(vendor_name))
        print("\n")

    # @unittest.SkipTest
    def test_accelerator_partition_profile(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_accelerator_partition_profile \n")
                accelerator_partition = amdsmi.amdsmi_get_gpu_accelerator_partition_profile(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Current partition id: {}".format(
                accelerator_partition['partition_id']))
            print("  Profile_type: {}".format(
                accelerator_partition['partition_profile']['profile_type']))
            print("  profile_index: {}".format(
                accelerator_partition['partition_profile']['profile_index']))
            print("  memory_caps: {}".format(
                accelerator_partition['partition_profile']['memory_caps']))
            print("  num_resources: {}".format(
                accelerator_partition['partition_profile']['num_resources']))
        print("\n")
        

    # Requires sudo (to see full resource/config detail).
    # Should only be supported on MI300+ ASICs
    def test_accelerator_partition_profile_config(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_gpu_accelerator_partition_profile_config \n")
                profile_config = amdsmi.amdsmi_get_gpu_accelerator_partition_profile_config(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  num_profiles: {}".format(profile_config['num_profiles']))
            print("  num_resource_profiles: {}".format(profile_config['num_resource_profiles']))
            print("  default_profile_index: {}".format(profile_config['default_profile_index']))
            for p in profile_config['profiles']:
                print("\t\t  profile_type: {}".format(p['profile_type']))
                print("\t\t  num_partitions: {}".format(p['num_partitions']))
                print("\t\t  profile_index: {}".format(p['profile_index']))
                print("\t\t  num_resources: {}".format(p['num_resources']))
                for r in range(0, p['num_resources']):
                    print("\t\t\t  profile_index: {}".format(p['resources'][r]['profile_index']))
                    print("\t\t\t  resource_type: {}".format(p['resources'][r]['resource_type']))
                    print("\t\t\t  partition_resource: {}".format(p['resources'][r]['partition_resource']))
                    print("\t\t\t  num_partitions_share_resource: {}".format(
                        p['resources'][r]['num_partitions_share_resource']))
        print("\n")
        

    # amdsmi_get_violation_status is only supported on MI300+ ASICs
    # We should expect a not supported status for Navi / MI100 / MI2x ASICs
    def test_get_violation_status(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
            try:
                print("\n###Test amdsmi_get_violation_status \n")
                violation_status = amdsmi.amdsmi_get_violation_status(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  Reference Timestamp: {}".format(
                violation_status['reference_timestamp']))
            print("  Violation Timestamp: {}".format(
                violation_status['violation_timestamp']))

            print(" Current Prochot Thrm Accumulated (Count): {}".format(
                violation_status['acc_prochot_thrm']))
            print(" Current PVIOL (acc_ppt_pwr) Accumulated (Count): {}".format(
                violation_status['acc_ppt_pwr']))
            print(" Current TVIOL (acc_socket_thrm) Accumulated (Count): {}".format(
                violation_status['acc_socket_thrm']))
            print(" Current VR_THRM Accumulated (Count): {}".format(
                violation_status['acc_vr_thrm']))
            print(" Current HBM Thrm Accumulated (Count): {}".format(
                violation_status['acc_hbm_thrm']))
            print(" Current GFX CLK Below Host Limit Accumulated (Count): {}".format(
                violation_status['acc_gfx_clk_below_host_limit']))

            print(" Prochot Thrm Violation (%): {}".format(
                violation_status['per_prochot_thrm']))
            print(" PVIOL (per_ppt_pwr) (%): {}".format(
                violation_status['per_ppt_pwr']))
            print(" TVIOL (per_socket_thrm) (%): {}".format(
                violation_status['per_socket_thrm']))
            print(" VR_THRM Violation (%): {}".format(
                violation_status['per_vr_thrm']))
            print(" HBM Thrm Violation (%): {}".format(
                violation_status['per_hbm_thrm']))
            print(" GFX CLK Below Host Limit Violation (%): {}".format(
                violation_status['per_gfx_clk_below_host_limit']))

            print(" Prochot Thrm Violation (bool): {}".format(
                violation_status['active_prochot_thrm']))
            print(" PVIOL (active_ppt_pwr) (bool): {}".format(
                violation_status['active_ppt_pwr']))
            print(" TVIOL (active_socket_thrm) (bool): {}".format(
                violation_status['active_socket_thrm']))
            print(" VR_THRM Violation (bool): {}".format(
                violation_status['active_vr_thrm']))
            print(" HBM Thrm Violation (bool): {}".format(
                violation_status['active_hbm_thrm']))
            print(" GFX CLK Below Host Limit Violation (bool): {}".format(
                violation_status['active_gfx_clk_below_host_limit']))
        print("\n")
        
    
    # Add test for amdsmi_get_gpu_reg_table_info
    def test_gpu_reg_table_info(self):

        print("\n\n###Test amdsmi_get_gpu_reg_table_info")
        processors = amdsmi.amdsmi_get_processor_handles()
        for i in range(0, len(processors)):
            print("\n\n###Test Processor {}".format(i))
            try:
                print("\n###Test amdsmi_get_gpu_reg_table_info \n")
                reg_table_info = amdsmi.amdsmi_get_gpu_reg_table_info(processors[i], amdsmi.amdsmi_interface.AmdSmiRegType.PCIE)
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  reg_table_info['reg_table'] is: {}".format(
                reg_table_info))
        print("\n")
        
    
    def test_get_gpu_revision(self):

        processors = amdsmi.amdsmi_get_processor_handles()
        self.assertGreaterEqual(len(processors), 1)
        self.assertLessEqual(len(processors), self.max_num_physical_devices)
        for i in range(0, len(processors)):
            bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
            print(f"\n\n###Test Processor {i}, bdf: {bdf}")
            try:
                print("\n###Test amdsmi_get_gpu_revision \n")
                revision = amdsmi.amdsmi_get_gpu_revision(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print(f"  GPU revision is: {revision}")
        print("\n")
        
    
    # Add test for amdsmi_get_gpu_pm_metrics_info
    def test_gpu_pm_metrics_info(self):

        print("\n\n###Test amdsmi_get_gpu_pm_metrics_info")
        processors = amdsmi.amdsmi_get_processor_handles()
        for i in range(0, len(processors)):
            print("\n\n###Test Processor {}".format(i))
            try:
                print("\n###Test amdsmi_get_gpu_pm_metrics_info \n")
                pm_metrics_info = amdsmi.amdsmi_get_gpu_pm_metrics_info(processors[i])
            except amdsmi.AmdSmiLibraryException as e:
                self._check_exception(e)
                continue
            print("  pm_metrics_info['pm_metrics'] is: {}".format(
                pm_metrics_info))
        print("\n")
        

    def test_walkthrough(self):
        print("\n\n#######################################################################")
        print("========> test_walkthrough start <========\n")
        self.test_asic_kfd_info()
        self.test_power_info()
        self.test_vbios_info()
        self.test_board_info()
        self.test_fw_info()
        self.test_driver_info()
        print("\n========> test_walkthrough end <========")
        print("#######################################################################\n")

    # Unstable on workstation cards
    # def test_walkthrough_multiprocess(self):
    #     print("\n\n========> test_walkthrough_multiprocess start <========\n")
    #     processors = amdsmi.amdsmi_get_processor_handles()
    #     self.assertGreaterEqual(len(processors), 1)
    #     self.assertLessEqual(len(processors), self.max_num_physical_devices)
    #     p0 = multiprocessing.Process(target=walk_through, args=[self])
    #     p1 = multiprocessing.Process(target=walk_through, args=[self])
    #     p2 = multiprocessing.Process(target=walk_through, args=[self])
    #     p3 = multiprocessing.Process(target=walk_through, args=[self])
    #     p0.start()
    #     p1.start()
    #     p2.start()
    #     p3.start()
    #     p0.join()
    #     p1.join()
    #     p2.join()
    #     p3.join()
    #     print("\n========> test_walkthrough_multiprocess end <========\n")

    # Unstable on workstation cards
    # def test_walkthrough_multithread(self):
    #     print("\n\n========> test_walkthrough_multithread start <========\n")
    #     processors = amdsmi.amdsmi_get_processor_handles()
    #     self.assertGreaterEqual(len(processors), 1)
    #     self.assertLessEqual(len(processors), self.max_num_physical_devices)
    #     t0 = threading.Thread(target=walk_through, args=[self])
    #     t1 = threading.Thread(target=walk_through, args=[self])
    #     t2 = threading.Thread(target=walk_through, args=[self])
    #     t3 = threading.Thread(target=walk_through, args=[self])
    #     t0.start()
    #     t1.start()
    #     t2.start()
    #     t3.start()
    #     t0.join()
    #     t1.join()
    #     t2.join()
    #     t3.join()
    #     print("\n========> test_walkthrough_multithread end <========\n")

    # # Unstable - do not run
    # def test_z_gpureset_asicinfo_multithread(self):
    #     def get_asic_info(processor):
    #         try:
    #             print("\n###Test amdsmi_get_gpu_asic_info \n")
    #             asic_info = amdsmi.amdsmi_get_gpu_asic_info(processor)
    #         except amdsmi.AmdSmiLibraryException as e:
    #             self._check_exception(e)
    #             continue
    #         print("  asic_info['market_name'] is: {}".format(
    #             asic_info['market_name']))
    #         print("  asic_info['vendor_id'] is: {}".format(
    #             asic_info['vendor_id']))
    #         print("  asic_info['vendor_name'] is: {}".format(
    #             asic_info['vendor_name']))
    #         print("  asic_info['device_id'] is: {}".format(
    #             asic_info['device_id']))
    #         print("  asic_info['rev_id'] is: {}".format(
    #             asic_info['rev_id']))
    #         print("  asic_info['asic_serial'] is: {}".format(
    #             asic_info['asic_serial']))
    #         print("  asic_info['oam_id'] is: {}\n".format(
    #             asic_info['oam_id']))
    #     def gpu_reset(processor):
    #         print("\n###Test amdsmi_reset_gpu \n")
    #         amdsmi.amdsmi_reset_gpu(processor)
    #         print("  GPU reset completed.\n")
    #     print("\n\n========> test_z_gpureset_asicinfo_multithread start <========\n")
    #     processors = amdsmi.amdsmi_get_processor_handles()
    #     self.assertGreaterEqual(len(processors), 1)
    #     self.assertLessEqual(len(processors), self.max_num_physical_devices)
    #     for i in range(0, len(processors)):
    #         bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
    #         print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
    #         t0 = threading.Thread(target=get_asic_info, args=[processors[i]])
    #         t1 = threading.Thread(target=gpu_reset, args=[processors[i]])
    #         # t2 = threading.Thread(target=walk_through, args=[self])
    #         # t3 = threading.Thread(target=walk_through, args=[self])
    #         t0.start()
    #         t1.start()
    #         # t2.start()
    #         # t3.start()
    #         t0.join()
    #         t1.join()
    #         # t2.join()
    #         # t3.join()
    #     print("\n========> test_z_gpureset_asicinfo_multithread end <========\n")


def print_test_ids(suite):
    for test in suite:
        if isinstance(test, unittest.TestSuite):
            print_test_ids(test)
        else:
            print(" -", test.id())

if __name__ == '__main__':
    import sys
    import unittest

    print("AMD SMI Integration Tests")
    verbose=1
    if '-q' in sys.argv or '--quiet' in sys.argv:
        verbose=0
    elif '-v' in sys.argv or '--verbose' in sys.argv:
        verbose=2
    has_info_printed = False
    
    # If no -k or --keyword argument is given, print all available tests
    if not ('-k' in sys.argv or '--keyword' in sys.argv):
        loader = unittest.TestLoader()
        suite = loader.loadTestsFromModule(sys.modules[__name__])
        print("==============================================================")
        print("Available tests:")
        print_test_ids(suite)

    # Provide Legend for test results, otherwise it is not clear what the output means
    print("==============================================================")
    print("Legend: . = pass, s = skipped, F = fail, E = error")
    print("==============================================================")
    print("Running tests...\n")
    
    # Detect if ran without sudo or root privileges
    if os.geteuid() != 0:
        print("Warning: Some tests may require elevated privileges (sudo/root) to run completely.\n")
        print("Please relaunch with elevated privileges.\n")
        sys.exit(1)

    runner = unittest.TextTestRunner(verbosity=verbose)
    unittest.main(testRunner=runner)
