# Copyright (c) 2026, Thomas Goirand <zigo@debian.org>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import io
import random
import requests
import socket
import string
import time
import urllib

import paramiko

from keystoneauth1.exceptions.catalog import EndpointNotFound
import openstack
from openstack import exceptions as os_exceptions
from oslo_log import log as logging

LOG = logging.getLogger(__name__, project='vigietools')


def _generate_ssh_key_pair(bits=4096):
    """Generate an SSH RSA key pair.

    :param bits: Key size (default: 4096)
    :return: (private_key_str, public_key_str)
    """
    key = paramiko.RSAKey.generate(bits)

    # Private key
    private_buf = io.StringIO()
    key.write_private_key(private_buf)
    private_key = private_buf.getvalue()

    # Public key (OpenSSH format)
    public_key = f"{key.get_name()} {key.get_base64()}"

    return private_key, public_key


def _create_keypair(user_conn, keypair_name, public_key):
    try:
        # Delete the keypair if it already exists
        keypair = user_conn.compute.find_keypair(keypair_name)
        if keypair:
            LOG.info(f"Deleting existing keypair '{keypair_name}'")
            user_conn.compute.delete_keypair(keypair)

        # Create a new keypair
        LOG.info(f"Creating keypair '{keypair_name}'")
        keypair = user_conn.compute.create_keypair(name=keypair_name,
                                                   public_key=public_key)
        LOG.info(f"Keypair '{keypair_name}' created successfully.")

    except Exception as e:
        LOG.error(f"Failed to create keypair: {e}")
        raise

    return keypair


def _create_security_group(user_conn, security_group_name):
    try:
        # Find the security group
        security_group = user_conn.network.find_security_group(
            security_group_name
        )
        if security_group:
            LOG.info(f"Security group '{security_group_name}' already exists.")
            return security_group

        # Create the security group
        LOG.info(f"Creating security group '{security_group_name}'")
        security_group = user_conn.network.create_security_group(
            name=security_group_name,
            description="Security group with only ping and SSH open"
        )
        LOG.info(f"Security group '{security_group_name}' created "
                 "successfully.")

        # Add rules to allow ping (ICMP)
        user_conn.network.create_security_group_rule(
            security_group_id=security_group.id,
            protocol='icmp',
            direction='ingress',
            port_range_min=None,
            port_range_max=None,
            remote_ip_prefix='0.0.0.0/0'
        )
        LOG.info("Added rule to allow ping (ICMP) to security group "
                 f"'{security_group_name}'")

        # Add rules to allow SSH (TCP port 22)
        user_conn.network.create_security_group_rule(
            security_group_id=security_group.id,
            protocol='tcp',
            direction='ingress',
            port_range_min=22,
            port_range_max=22,
            remote_ip_prefix='0.0.0.0/0'
        )
        LOG.info("Added rule to allow SSH (TCP port 22) to security group "
                 f"'{security_group_name}'")

    except Exception as e:
        LOG.error(f"Failed to create security group: {e}")
        raise

    return


def _create_vm(user_conn, volume_name, flavor_name, keypair_name,
               security_group_name, network_name, server_name,
               availability_zone):
    try:
        # Find the volume
        volume = user_conn.volume.find_volume(volume_name)
        if not volume:
            raise Exception(f"Volume '{volume_name}' not found")

        # Find the flavor
        flavor = user_conn.compute.find_flavor(flavor_name)
        if not flavor:
            raise Exception(f"Flavor '{flavor_name}' not found")

        # Find the security group
        security_group = user_conn.network.find_security_group(
            security_group_name
        )
        if not security_group:
            raise Exception(f"Security group '{security_group_name}' not "
                            "found")

        # Find the network
        network = user_conn.network.find_network(network_name)
        if not network:
            raise Exception(f"Network '{network_name}' not found")

        # Create the server
        LOG.info(f"Creating server '{server_name}'")
        server = user_conn.compute.create_server(
            name=server_name,
            flavor_id=flavor.id,
            block_device_mapping_v2=[
                {
                    'uuid': volume.id,
                    'source_type': 'volume',
                    'destination_type': 'volume',
                    'boot_index': 0,
                    'delete_on_termination': False,
                }
            ],
            key_name=keypair_name,
            security_groups=[{'name': security_group_name}],
            networks=[{'uuid': network.id}],
            availability_zone=availability_zone
        )
        LOG.info(f"Server '{server_name}' created successfully.")

        LOG.info(f"Waiting for server '{server_name}' to become ACTIVE "
                 "(timeout: 300 seconds)")
        start_time = time.time()
        while True:
            server = user_conn.compute.get_server(server.id)
            if server.status == 'ACTIVE':
                LOG.info(f"Server '{server_name}' is up and running.")
                break
            elif server.status in ['ERROR', 'DELETED']:
                raise Exception(f"Server '{server_name}' failed to reach "
                                "ACTIVE state. Current status: "
                                f"{server.status}")
            elif time.time() - start_time > 300:
                raise Exception(f"Timeout waiting for server '{server_name}' "
                                "to be up.")
            time.sleep(5)

        LOG.info(f"Server '{server_name}' is up and running.")

    except Exception as e:
        LOG.error(f"Failed to create server: {e}")
        raise

    return server


def _generate_random_password(length=32):
    chars = string.ascii_letters + string.digits
    return ''.join(random.choice(chars) for _ in range(length))


def _setup_vgt_project(conn=None, project_name='vgt', user_name='vgt'):
    if conn is None:
        raise ValueError("Connection object is mandatory.")

    # 1. Create/Find Project
    project = conn.identity.find_project(project_name)
    if not project:
        LOG.info(f"Creating project '{project_name}'")
        project = conn.identity.create_project(name=project_name)
    else:
        LOG.info(f"Project '{project_name}' already exists.")

    # 2. Create/Update User
    password = _generate_random_password()
    user = conn.identity.find_user(user_name)

    if not user:
        LOG.info(f"Creating user '{user_name}'")
        user = conn.identity.create_user(
            name=user_name,
            project_id=project.id,
            password=password,
            enabled=True
        )
    else:
        LOG.info(f"User '{user_name}' already exists. Updating password.")
        conn.identity.update_user(user.id, password=password)

    # Find the role (try standard names)
    role = conn.identity.find_role('member')

    if not role:
        LOG.error("Could not find a standard 'member' role.")
        raise

    # Assign the role (idempotent)
    try:
        # If it succeeds, it assigns
        conn.identity.assign_project_role_to_user(
            project=project.id,
            user=user.id,
            role=role.id,
        )
        LOG.info(
            f"Assigned role '{role.name}' to '{user_name}' on '{project_name}'"
        )

    except os_exceptions.HttpException as e:
        if e.status_code == 409:
            LOG.info(
                f"User '{user_name}' already has role '{role.name}' "
                f"on '{project_name}'"
            )
        else:
            LOG.error(f"Failed to assign role: {e}")
            raise

    except Exception as e:
        LOG.error(f"Failed to assign role: {e}")
        raise

    return project, user, password


def _get_server_ip(server):
    """Fetch the IPv4 address of the server from its network information.

    :param server: The server object from OpenStack.
    :return: The IPv4 address of the server.
    """
    networks = server.addresses
    for network_name, addresses in networks.items():
        for address in addresses:
            if (address['OS-EXT-IPS:type'] == 'fixed' and
                    ':' not in address['addr']):
                return address['addr']
    raise Exception("IPv4 address not found for the server.")


def _wait_for_ssh(server_ip, timeout=300, interval=5):
    """Wait for the SSH port (22) to be open on the given server IP.

    :param server_ip: The IP address of the server.
    :param timeout: The maximum time to wait for the SSH port to be open
                    (default: 300 seconds).
    :param interval: The interval between checks (default: 5 seconds).
    :return: True if the SSH port is open, False otherwise.
    """
    LOG.info(f"Waiting for SSH port (22) to be open on {server_ip}.")
    start_time = time.time()
    while True:
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        result = sock.connect_ex((server_ip, 22))
        sock.close()
        if result == 0:
            LOG.info(f"SSH port (22) is open on {server_ip}.")
            return True
        elif time.time() - start_time > timeout:
            LOG.error("Timeout waiting for SSH port (22) to be open on "
                      f"{server_ip}.")
            return False
        time.sleep(interval)


def _test_ssh_connection(server_ip, username, private_key):
    """Test SSH connection to the server using Paramiko.

    :param server_ip: The IP address of the server.
    :param username: The username for SSH.
    :param private_key: The private key string.
    :return: True if the SSH connection is successful, False otherwise.
    """
    try:
        key = paramiko.RSAKey.from_private_key(io.StringIO(private_key))
        ssh = paramiko.SSHClient()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        ssh.connect(server_ip, username=username, pkey=key)
        LOG.info(f"SSH connection to {server_ip} successful.")
        ssh.close()
        return True
    except Exception as e:
        LOG.error(f"Failed to SSH into {server_ip}: {e}")
        return False


def _enable_compute_service(conn, host):
    """Enable the compute service for the given host.

    :param conn: The OpenStack connection object.
    :param host: The hostname of the compute service to enable.
    """
    try:
        LOG.info(f"Enabling compute service for host '{host}'.")
        # List all services and filter the one that matches
        # the given host and binary
        services = list(conn.compute.services(binary='nova-compute',
                                              host=host))
        if not services:
            raise os_exceptions.ResourceNotFound("No nova-compute service "
                                                 f"found for host '{host}'")

        for svc in services:
            if svc.status.lower() != 'enabled':
                conn.compute.enable_service(svc)
                LOG.info(f"Service '{svc.binary}' on host '{host}' "
                         "enabled successfully.")
            else:
                LOG.info(f"Service '{svc.binary}' on host '{host}' "
                         "is already enabled.")

    except Exception as e:
        LOG.error(f"Failed to enable compute service for host '{host}': {e}")
        raise


def _get_resource_provider_uuid(conn, hostname):
    """Get the resource provider UUID for the given hostname.

    :param conn: OpenStack connection object
    :param hostname: The hostname of the compute node
    :return: The UUID of the resource provider
    """
    try:
        if hasattr(conn.placement, "resource_providers"):
            rps = conn.placement.resource_providers()
            for rp in rps:
                if rp.name == hostname:
                    return rp.id
            raise os_exceptions.ResourceNotFound("Resource Provider "
                                                 f"{hostname} not found")
        else:
            endpoint = conn.session.get_endpoint(
                service_type="placement",
                interface='public',
                region_name=conn.config.region_name
            )
            token = conn.session.get_token()

            # Find the resource provider UUID
            rp_list_url = urllib.parse.urljoin(endpoint.rstrip("/") + "/",
                                               "resource_providers?name="
                                               f"{hostname}")
            headers = {
                "X-Auth-Token": token,
                "OpenStack-API-Version": "placement 1.6"
            }
            resp = requests.get(rp_list_url, headers=headers)
            resp.raise_for_status()

            rp_data = resp.json().get('resource_providers', [])
            if not rp_data:
                raise os_exceptions.ResourceNotFound("Resource Provider "
                                                     f"{hostname} not found")

            return rp_data[0]['uuid']

    except os_exceptions.ResourceNotFound:
        LOG.error(f"Placement Resource Provider for host '{hostname}' "
                  "not found. Aborting.")
        raise
    except Exception as e:
        LOG.error(f"Failed to retrieve resource provider UUID: {e}. Aborting.")
        raise


def _get_traits_from_resource_provider(conn, resource_provider_uuid):
    """Fetch all traits from the given resource provider UUID.

    :param conn: OpenStack connection object
    :param resource_provider_uuid: The UUID of the resource provider
    :return: A set of traits
    """
    try:
        endpoint = conn.session.get_endpoint(
            service_type="placement",
            interface='public',
            region_name=conn.config.region_name
        )
        traits_url = urllib.parse.urljoin(
            endpoint.rstrip("/") + "/",
            f"resource_providers/{resource_provider_uuid}/traits"
        )
        token = conn.session.get_token()

        headers = {
            "X-Auth-Token": token,
            "OpenStack-API-Version": "placement 1.6",
            "Accept": "application/json"
        }
        resp = requests.get(traits_url, headers=headers)
        if resp.status_code == 404:
            raise Exception("Trait endpoint returned 404 for "
                            f"{resource_provider_uuid}. Aborting.")

        resp.raise_for_status()
        return set(resp.json().get('traits', []))

    except Exception as e:
        LOG.error(f"Failed to retrieve traits: {e}. Aborting.")
        raise


def _add_trait_to_resource_provider(conn, resource_provider_uuid,
                                    current_traits, trait_name):
    """Add the specified trait to the resource provider if not present.

    :param conn: OpenStack connection object
    :param resource_provider_uuid: The UUID of the resource provider
    :param current_traits: The current set of traits
    :param trait_name: The trait to add
    """
    try:
        if trait_name not in current_traits:
            updated_traits = list(current_traits | {trait_name})

            endpoint = conn.session.get_endpoint(
                service_type="placement",
                interface='public',
                region_name=conn.config.region_name
            )
            token = conn.session.get_token()

            headers = {
                "X-Auth-Token": token,
                "OpenStack-API-Version": "placement 1.6"
            }

            # Get the resource provider generation
            rp_url = urllib.parse.urljoin(
                endpoint.rstrip("/") + "/",
                f"resource_providers/{resource_provider_uuid}"
            )
            resp = requests.get(rp_url, headers=headers)
            resp.raise_for_status()
            rp_data = resp.json()
            rp_gen = rp_data['generation']

            # Update the traits
            url = urllib.parse.urljoin(
                endpoint.rstrip("/") + "/",
                f"resource_providers/{resource_provider_uuid}/traits"
            )
            payload = {
                'resource_provider_generation': rp_gen,
                'traits': updated_traits
            }
            requests.put(url, headers=headers, json=payload)
            LOG.info(f"Added trait '{trait_name}' via HTTP API")
        else:
            LOG.info(f"Trait '{trait_name}' already exists for resource "
                     f"provider {resource_provider_uuid}.")

    except Exception as e:
        LOG.error(f"Failed to add trait '{trait_name}' to resource provider "
                  f"{resource_provider_uuid}: {e}")
        raise


def _remove_trait_from_resource_provider(conn, resource_provider_uuid,
                                         current_traits, trait_name):
    """Remove the specified trait from the resource provider if it is present.

    :param conn: OpenStack connection object
    :param resource_provider_uuid: The UUID of the resource provider
    :param current_traits: The current set of traits
    :param trait_name: The trait to remove
    """
    try:
        if trait_name not in current_traits:
            LOG.info(
                f"Trait '{trait_name}' not present on resource provider "
                f"{resource_provider_uuid}, nothing to remove."
            )
            return

        updated_traits = list(current_traits - {trait_name})

        endpoint = conn.session.get_endpoint(
            service_type="placement",
            interface='public',
            region_name=conn.config.region_name
        )
        token = conn.session.get_token()

        headers = {
            "X-Auth-Token": token,
            "OpenStack-API-Version": "placement 1.6"
        }

        # Fetch resource provider generation (mandatory)
        rp_url = urllib.parse.urljoin(
            endpoint.rstrip("/") + "/",
            f"resource_providers/{resource_provider_uuid}"
        )
        resp = requests.get(rp_url, headers=headers)
        resp.raise_for_status()
        rp_gen = resp.json()["generation"]

        # Update traits
        traits_url = urllib.parse.urljoin(
            endpoint.rstrip("/") + "/",
            f"resource_providers/{resource_provider_uuid}/traits"
        )
        payload = {
            "resource_provider_generation": rp_gen,
            "traits": updated_traits
        }
        resp = requests.put(traits_url, headers=headers, json=payload)
        resp.raise_for_status()

        LOG.info(
            f"Removed trait '{trait_name}' from resource provider "
            f"{resource_provider_uuid} via HTTP API"
        )

    except Exception as e:
        LOG.error(
            f"Failed to remove trait '{trait_name}' from resource provider "
            f"{resource_provider_uuid}: {e}"
        )
        raise


def _remove_compute_from_aggregate(conn, aggregate_name, hostname):
    LOG.info(f"Removing host '{hostname}' from aggregate '{aggregate_name}'.")

    aggregate = conn.compute.find_aggregate(aggregate_name)
    if not aggregate:
        LOG.error(f"Aggregate '{aggregate_name}' not found.")
        return

    if hostname not in aggregate.hosts:
        LOG.error(f"Host '{hostname}' not found in aggregate "
                  f"'{aggregate_name}'.")
        return

    conn.compute.remove_host_from_aggregate(aggregate, hostname)
    LOG.info(
        f"Host '{hostname}' removed from aggregate '{aggregate_name}' "
        "successfully."
    )


def _check_compute_exists(conn, hostname):
    # Check if the compute hostname exists.
    services = list(conn.compute.services(binary='nova-compute',
                                          host=hostname))
    if not services:
        raise ValueError(f"Compute host '{hostname}' not found in the cluster")


def _create_aggregate(conn, aggregate_name):
    # Check if aggregate already exists, create it if not.
    agg = conn.compute.find_aggregate(aggregate_name)
    if agg is None:
        LOG.info(f"Creating aggregate '{aggregate_name}'")
        agg = conn.compute.create_aggregate(name=aggregate_name)
    else:
        LOG.info(f"Aggregate '{aggregate_name}' already exists.")

    return agg


def _add_property_to_aggregate(conn, aggregate, aggregate_name,
                               prop_key, prop_val):
    metadata = aggregate.metadata or {}

    if metadata.get(prop_key) != prop_val:
        LOG.info(f"Setting property {prop_key}={prop_val} on "
                 f"aggregate '{aggregate_name}'")
        conn.compute.set_aggregate_metadata(aggregate,
                                            metadata={prop_key: prop_val})
    else:
        LOG.info(f"Aggregate '{aggregate_name}' already has property "
                 f"{prop_key}={prop_val}.")


def _add_host_to_aggregate(conn, aggregate, aggregate_name, hostname):
    try:
        LOG.info(f"Adding host '{hostname}' to aggregate '{aggregate_name}'.")
        current_hosts = getattr(aggregate, "hosts", []) or []

        if hostname not in current_hosts:
            conn.compute.add_host_to_aggregate(aggregate, hostname)
            LOG.info(f"Added host '{hostname}' to aggregate "
                     f"'{aggregate_name}'.")
        else:
            LOG.info(f"Host {hostname} already in aggregate "
                     f"'{aggregate_name}'.")
    except Exception:
        raise


def _check_admin_priv(conn):
    try:
        list(conn.compute.aggregates())
    except os_exceptions.Forbidden:
        raise PermissionError("Admin credentials are required.")


def _add_trait_in_placement(conn, trait_name):
    # Make sure the trait exists in placement
    try:
        if hasattr(conn.placement, "find_trait"):
            trait = conn.placement.find_trait(trait_name, ignore_missing=True)
            if not trait:
                conn.placement.create_trait(name=trait_name)

        # Fallback for older version of openstacksdk doesn't have support
        # for placement at all: no choice but direct URL.
        else:
            endpoint = conn.session.get_endpoint(
                service_type="placement",
                interface='public',
                region_name=conn.config.region_name
            )
            if not endpoint:
                raise RuntimeError("Cannot find Placement endpoint in "
                                   "Keystone catalog")

            trait_url = urllib.parse.urljoin(endpoint.rstrip("/") + "/",
                                             f"traits/{trait_name}")

            token = conn.session.get_token()
            headers = {
                "X-Auth-Token": token,
                "OpenStack-API-Version": "placement 1.6"
            }

            resp = requests.put(trait_url, headers=headers)
            resp.raise_for_status()

    except Exception as e:
        LOG.error(f"Could not ensure trait '{trait_name}': {e}")
        raise


def _volume_set_image_metadata(user_conn, volume,
                               image_prop_key, image_prop_value):
    # Set the trait on the volume
    try:
        vol = user_conn.volume.get_volume(volume.id)
        current_meta = vol.volume_image_metadata or {}

        if current_meta.get(image_prop_key) == image_prop_value:
            LOG.info(
                f"Volume already has image property "
                f"{image_prop_key}={image_prop_value}"
            )
        else:
            LOG.info(
                f"Setting volume image property "
                f"{image_prop_key}={image_prop_value}"
            )
            try:
                endpoint = user_conn.session.get_endpoint(
                    service_type="volumev2",
                    interface='public',
                    region_name=user_conn.config.region_name
                )
            except EndpointNotFound:
                # Fallback to "volumev3" endpoint
                try:
                    endpoint = user_conn.session.get_endpoint(
                        service_type="volumev3",
                        interface='public',
                        region_name=user_conn.config.region_name
                    )
                except EndpointNotFound:
                    raise RuntimeError("Cannot find Volume endpoint in "
                                       "Keystone catalog")
                except Exception:
                    raise RuntimeError("Cannot find Volume endpoint in "
                                       "Keystone catalog")

            token = user_conn.session.get_token()
            headers = {
                "X-Auth-Token": token,
                "Content-Type": "application/json"
            }
            url = urllib.parse.urljoin(endpoint.rstrip("/") + "/",
                                       f"volumes/{volume.id}/action")
            payload = {
                "os-set_image_metadata": {
                    "metadata": {
                        image_prop_key: image_prop_value
                    }
                }
            }
            resp = requests.post(url, headers=headers, json=payload)
            resp.raise_for_status()
            LOG.info(
                f"Successfully set image property on volume {volume.id} "
                "via HTTP API"
            )

    except Exception as e:
        LOG.error(f"Failed to set volume image properties: {e}")
        raise


def _cleanup_aggregate(conn, aggregate_name):
    try:
        LOG.info(f"Deleting empty aggregate '{aggregate_name}'")
        aggregate = conn.compute.find_aggregate(aggregate_name)
        if aggregate:
            # Check if the aggregate is empty (no hosts)
            if not aggregate.hosts:
                conn.compute.delete_aggregate(aggregate)
                LOG.info(f"Aggregate '{aggregate_name}' deleted successfully.")
            else:
                LOG.info(f"Aggregate '{aggregate_name}' still has hosts, "
                         "not deleting.")
        else:
            LOG.info(f"Aggregate '{aggregate_name}' not found, "
                     "skipping deletion.")
    except Exception as e:
        LOG.warning(f"Could not clean up aggregate '{aggregate_name}': {e}")


def _cleanup_trait(conn, trait_name):
    """Remove the trait from placement."""
    try:
        LOG.info(f"Removing '{trait_name}' trait from placement.")
        if hasattr(conn.placement, "find_trait"):
            trait = conn.placement.find_trait(trait_name, ignore_missing=True)
            if trait:
                conn.placement.delete_trait(trait)
                LOG.info(f"Trait '{trait_name}' deleted from placement.")
            else:
                LOG.info(f"Trait '{trait_name}' not found in placement, "
                         "skipping deletion.")
        else:
            # Fallback for older versions without placement trait support
            endpoint = conn.session.get_endpoint(
                service_type="placement",
                interface='public',
                region_name=conn.config.region_name
            )
            if not endpoint:
                LOG.warning("Cannot find Placement endpoint in Keystone "
                            "catalog")
                return

            trait_url = urllib.parse.urljoin(endpoint.rstrip("/") + "/",
                                             f"traits/{trait_name}")
            token = conn.session.get_token()
            headers = {
                "X-Auth-Token": token,
                "OpenStack-API-Version": "placement 1.6"
            }

            resp = requests.delete(trait_url, headers=headers)
            if resp.status_code == 204:
                LOG.info(f"Trait '{trait_name}' deleted from placement via "
                         "HTTP API.")
            elif resp.status_code == 404:
                LOG.info(f"Trait '{trait_name}' not found in placement, "
                         "skipping deletion.")
            else:
                resp.raise_for_status()

    except Exception as e:
        LOG.warning(f"Could not clean up trait '{trait_name}': {e}")


def _cleanup_user_resources(conn, project, user, password, volume_name,
                            keypair_name, security_group_name):
    """Delete volume, keypair, and security group."""
    try:
        LOG.info(f"Deleting '{project.name}' resources.")

        # Create connection with the user's credentials
        user_conn = openstack.connect(
            auth_url=conn.session.get_endpoint(
                service_type='identity'
            ),
            username=user.name,
            password=password,
            project_name=project.name,
            project_domain_name='Default',
            user_domain_name='Default',
            region_name=getattr(conn.config, 'region_name', None)
        )

        # Delete keypair
        LOG.info(f"Deleting keypair '{keypair_name}'")
        keypair = user_conn.compute.find_keypair(keypair_name)
        if keypair:
            user_conn.compute.delete_keypair(keypair)
            LOG.info(f"Keypair '{keypair_name}' deleted successfully.")
        else:
            LOG.info(f"Keypair '{keypair_name}' not found, skipping deletion.")

        # Delete security group
        LOG.info(f"Deleting security group '{security_group_name}'")
        security_group = user_conn.network.find_security_group(
            security_group_name
        )
        if security_group:
            user_conn.network.delete_security_group(security_group)
            LOG.info(f"Security group '{security_group_name}' "
                     "deleted successfully.")
        else:
            LOG.info(f"Security group '{security_group_name}' not found, "
                     "skipping deletion.")

        # Delete volume
        LOG.info(f"Deleting volume '{volume_name}'")
        volume = user_conn.volume.find_volume(volume_name)
        if volume:
            user_conn.volume.delete_volume(volume)
            LOG.info(f"Volume '{volume_name}' deleted successfully.")
        else:
            LOG.info(f"Volume '{volume_name}' not found, skipping deletion.")

    except Exception as e:
        LOG.warning(f"Could not clean up user resources: {e}")

    return


def _cleanup_user_and_project(conn, project_name, user_name):
    LOG.info(f"Deleting user '{user_name}'")
    try:
        project = conn.identity.find_project(project_name)
        if not project:
            LOG.info(f"Project '{project_name}' not found, skipping deletion.")
            return

        # First, remove all role assignments for the user
        user = conn.identity.find_user(user_name)
        if user:
            conn.identity.delete_user(user)
            LOG.info(f"User '{user_name}' deleted successfully.")

        # Finally, delete the project
        LOG.info(f"Deleting project '{project_name}'")
        conn.identity.delete_project(project)
        LOG.info(f"Project '{project_name}' deleted successfully.")

    except Exception as e:
        LOG.warning(f"Could not clean up project and user: {e}")


def _disable_compute_service(conn, host):
    """Disable the compute service for the given host.

    :param conn: The OpenStack connection object.
    :param host: The hostname of the compute service to disable.
    """
    try:
        LOG.info(f"Disabling compute service for host '{host}'.")
        # List all services and filter the one that matches
        # the given host and binary
        services = list(conn.compute.services(binary='nova-compute',
                                              host=host))
        if not services:
            raise os_exceptions.ResourceNotFound("No nova-compute service "
                                                 f"found for host '{host}'")

        for svc in services:
            if svc.status.lower() != 'disabled':
                conn.compute.disable_service(svc)
                LOG.info(f"Service '{svc.binary}' on host '{host}' disabled "
                         "successfully.")
            else:
                LOG.info(f"Service '{svc.binary}' on host '{host}' is "
                         "already disabled.")

    except Exception as e:
        LOG.error(f"Failed to disable compute service for host '{host}': {e}")
        raise


def validate_cleanup(conn=None,
                     c_agg='hosts-to-validate',
                     trait_name='CUSTOM_COMPUTE_TO_VALIDATE',
                     project_name='vgt', user_name='vgt',
                     volume_name='vgt_validation',
                     server_name='vgt-validation-vm',
                     keypair_name='vgt-keypair',
                     security_group_name='vgt-sec-group'):
    """Clean up all resources created by validate_compute function

    excluding those already cleaned up at the end of validate_compute().

    :param conn: OpenStack connection object
    :param c_agg: aggregate name used during validation
    :param trait_name: trait name used during validation
    :param project_name: project name (default: 'vgt')
    :param user_name: user name (default: 'vgt')
    :param volume_name: volume name (default: 'vgt_validation')
    :param server_name: server name (default: 'vgt-validation-vm')
    :param keypair_name: keypair name (default: 'vgt-keypair')
    :param security_group_name: security group name (default: 'vgt-sec-group')
    """
    if conn is None:
        raise ValueError("Connection object is mandatory.")

    LOG.info("Starting cleanup process...")

    _cleanup_aggregate(conn, c_agg)
    _cleanup_trait(conn, trait_name)

    project, user, password = _setup_vgt_project(conn=conn,
                                                 project_name=project_name,
                                                 user_name=user_name)

    # Delete volume, keypair, and security group
    _cleanup_user_resources(conn, project, user, password, volume_name,
                            keypair_name, security_group_name)
    _cleanup_user_and_project(conn, project_name, user_name)

    LOG.info("Cleanup process completed.")


# Params:
# conn: An OpenStack connection object
# hostname: the compute hostname to validate
def validate_compute(conn=None, hostname=None,
                     c_agg='hosts-to-validate',
                     trait_name='CUSTOM_COMPUTE_TO_VALIDATE',
                     project_name='vgt', user_name='vgt',
                     image_name='Debian 13',
                     flavor_name='medium-flavor',
                     network_name='internal-shared-routable-network',
                     ssh_username='debian',
                     volume_name='vgt_validation',
                     server_name='vgt-validation-vm',
                     keypair_name='vgt-keypair',
                     security_group_name='vgt-sec-group',
                     keep_enabled=False):

    validation_successful = True

    if conn is None:
        raise ValueError("Connection object is mandatory.")

    if hostname is None:
        raise ValueError("Hostname is mandatory.")

    _check_admin_priv(conn)
    _check_compute_exists(conn, hostname)
    agg = _create_aggregate(conn, c_agg)
    _add_property_to_aggregate(conn, agg, c_agg,
                               f"trait:{trait_name}", 'required')
    _add_host_to_aggregate(conn, agg, c_agg, hostname)
    _add_trait_in_placement(conn, trait_name)

    # Fetch the current compute traits list, to later add the
    # CUSTOM_COMPUTE_TO_VALIDATE trait to it.
    try:
        resource_provider_uuid = _get_resource_provider_uuid(conn, hostname)
        current_traits = _get_traits_from_resource_provider(
            conn, resource_provider_uuid
        )

        _add_trait_to_resource_provider(conn, resource_provider_uuid,
                                        current_traits, trait_name)

    except os_exceptions.ResourceNotFound:
        LOG.error(f"Placement Resource Provider for host '{hostname}' not "
                  "found. Aborting.")
        raise

    except Exception as e:
        LOG.error(f"Failed to retrieve current traits: {e}. Aborting.")
        raise

    # Enable the compute so we can spawn a VM on it.
    # It's ok to do this before it's fully validated,
    # as VMs without the trait wont spawn on it anyways.
    _enable_compute_service(conn, hostname)

    # Find the AZ of the host, so we ask to spawn the VM on it.
    services = list(conn.compute.services(binary='nova-compute',
                                          host=hostname))

    if not services:
        raise Exception(f"Compute host '{hostname}' not found in the cluster")

    service = services[0]

    availability_zone = service.availability_zone

    if availability_zone is None:
        raise RuntimeError("Cannot determine availability zone for host "
                           f"'{hostname}'")
    LOG.info(f"Using availability zone '{availability_zone}' for host "
             f"'{hostname}'")

    # Setup a vgt project. If it already exists, only its
    # password will be reset.
    project, user, password = _setup_vgt_project(conn=conn,
                                                 project_name=project_name,
                                                 user_name=user_name)

    # Create a new OpenStack connection as non-admin
    user_conn = openstack.connect(
        auth_url=conn.session.get_endpoint(
            service_type='identity'
        ),
        username=user.name,
        password=password,
        project_name=project.name,
        project_domain_name='Default',
        user_domain_name='Default',
        region_name=getattr(conn.config, 'region_name', None)
    )

    # Find the image to use to create the volume.
    image = user_conn.image.find_image(image_name)
    if not image:
        raise Exception("Image 'Debian 13 trixie' not found")

    # Create the volume if it doesn't exist.
    volume = user_conn.volume.find_volume(volume_name)
    if volume:
        LOG.info(f"Volume {volume_name} already exists.")
    else:
        volume = user_conn.volume.create_volume(
            name=volume_name,
            size=5,
            image_id=image.id
        )
        LOG.info(f"Volume created successfully: {volume.id}")

    # Wait for volume to be in available status
    if volume.status not in ('available',):
        LOG.info(f"Waiting for volume {volume.id} (current: {volume.status})")
        volume = user_conn.volume.wait_for_status(
            volume,
            status='available',
            failures=['error', 'error_deleting'],
            wait=300,
            interval=5,
        )

    LOG.info(f"Volume '{volume.name}' (ID: {volume.id}) is ready and "
             "available.")

    # Add trait:CUSTOM_COMPUTE_TO_VALIDATE=require to the volume's
    # glance_image_metadata.
    _volume_set_image_metadata(conn, volume, f"trait:{trait_name}", 'required')
    # Create a keypair for the user
    private_key, public_key = _generate_ssh_key_pair()
    # Create the keypair in OpenStack
    _create_keypair(user_conn, keypair_name, public_key)
    # Create the security group
    _create_security_group(user_conn, security_group_name)
    LOG.info(f"Security group '{security_group_name}' created "
             "or found successfully.")

    # Create the server and wait for it to become ACTIVE
    # within a 300 seconds timeout.
    server = _create_vm(user_conn, volume_name, flavor_name, keypair_name,
                        security_group_name, network_name, server_name,
                        availability_zone)

    # Get the server IP
    server_ip = _get_server_ip(server)
    LOG.info(f"Server IP: {server_ip}")

    # Wait for SSH, and test connection.
    if not _wait_for_ssh(server_ip):
        LOG.error("SSH port is not open. Validation FAILED.")
        validation_successful = False

    if validation_successful is True:
        if not _test_ssh_connection(server_ip, ssh_username, private_key):
            validation_successful = False
            LOG.error("SSH connection test failed.")
        else:
            LOG.info("SSH connection test passed.")
    else:
        validation_successful = False
        LOG.error("SSH port is not open. Cannot proceed with "
                  "SSH connection test.")

    # VM removal
    LOG.info(f"Deleting validation VM '{server_name}'.")
    user_conn.compute.delete_server(server)
    LOG.info(f"VM '{server_name}' deleted successfully.")

    if not keep_enabled:
        _disable_compute_service(conn, hostname)
        LOG.info(f"Compute host '{hostname}' has been disabled after "
                 "validation (default behavior).")
    else:
        if validation_successful is False:
            _disable_compute_service(conn, hostname)
            LOG.info(f"Compute host '{hostname}' has been disabled after "
                     "validation failure.")
        else:
            LOG.info(f"Compute host '{hostname}' remains enabled after "
                     "validation as requested.")

    # Remove from aggregate
    _remove_compute_from_aggregate(conn, c_agg, hostname)
    # Remove trait from compute
    current_traits = _get_traits_from_resource_provider(
        conn, resource_provider_uuid
    )
    _remove_trait_from_resource_provider(conn, resource_provider_uuid,
                                         current_traits, trait_name)

    LOG.info("Validation and configuration completed: new compute host "
             f"'{hostname}' is enabled and ready!")