# pylint: disable=line-too-long,useless-suppression
# coding=utf-8
# --------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# Code generated by Microsoft (R) Python Code Generator.
# Changes may cause incorrect behavior and will be lost if the code is regenerated.
# --------------------------------------------------------------------------
import os
import time
import pytest
from dotenv import load_dotenv
from devtools_testutils import (
    test_proxy,
    add_general_regex_sanitizer,
    add_body_key_sanitizer,
    add_header_regex_sanitizer,
    is_live,
)
from azure.planetarycomputer.models import (
    StacCollection,
    StacExtensionSpatialExtent,
    StacCollectionTemporalExtent,
    StacExtensionExtent,
)

load_dotenv()


# For security, please avoid record sensitive identity information in recordings
@pytest.fixture(scope="session", autouse=True)
def add_sanitizers(test_proxy):
    # Remove default AZSDK sanitizers that would sanitize collection_id and item_id
    # These are public data and should not be sanitized
    from devtools_testutils import remove_batch_sanitizers

    # AZSDK3493: Sanitizes JSON path $..name
    # AZSDK3430: Sanitizes JSON path $..id
    # AZSDK2003: Default hostname sanitizer that would reduce URLs to just "Sanitized.com"
    remove_batch_sanitizers(["AZSDK3493", "AZSDK3430", "AZSDK2003"])

    planetarycomputer_subscription_id = os.environ.get(
        "PLANETARYCOMPUTER_SUBSCRIPTION_ID", "00000000-0000-0000-0000-000000000000"
    )
    planetarycomputer_tenant_id = os.environ.get(
        "PLANETARYCOMPUTER_TENANT_ID", "00000000-0000-0000-0000-000000000000"
    )
    planetarycomputer_client_id = os.environ.get(
        "PLANETARYCOMPUTER_CLIENT_ID", "00000000-0000-0000-0000-000000000000"
    )
    planetarycomputer_client_secret = os.environ.get(
        "PLANETARYCOMPUTER_CLIENT_SECRET", "00000000-0000-0000-0000-000000000000"
    )
    add_general_regex_sanitizer(
        regex=planetarycomputer_subscription_id,
        value="00000000-0000-0000-0000-000000000000",
    )
    add_general_regex_sanitizer(
        regex=planetarycomputer_tenant_id, value="00000000-0000-0000-0000-000000000000"
    )
    add_general_regex_sanitizer(
        regex=planetarycomputer_client_id, value="00000000-0000-0000-0000-000000000000"
    )
    add_general_regex_sanitizer(
        regex=planetarycomputer_client_secret,
        value="00000000-0000-0000-0000-000000000000",
    )

    add_header_regex_sanitizer(key="Set-Cookie", value="[set-cookie;]")
    add_header_regex_sanitizer(key="Cookie", value="cookie;")
    add_body_key_sanitizer(json_path="$..access_token", value="access_token")

    # Sanitize request tracking headers
    add_header_regex_sanitizer(
        key="X-Request-ID", value="00000000000000000000000000000000"
    )
    add_header_regex_sanitizer(key="Date", value="Mon, 01 Jan 2024 00:00:00 GMT")
    add_header_regex_sanitizer(key="Server-Timing", value="total;dur=0.0")
    add_header_regex_sanitizer(
        key="traceparent",
        value="00-00000000000000000000000000000000-0000000000000000-00",
    )
    # Note: Removed Content-Length sanitizer as it was causing matching issues with DELETE requests
    # add_header_regex_sanitizer(key="Content-Length", value="100000")
    add_header_regex_sanitizer(
        key="mise-correlation-id", value="00000000-0000-0000-0000-000000000000"
    )

    # Sanitize the endpoint hostname to match the test proxy's format
    from devtools_testutils import add_uri_regex_sanitizer, add_general_string_sanitizer

    # Use the same format as the test proxy: Sanitized.sanitized_label.sanitized_location
    # This matches what the test proxy does automatically, avoiding conflicts
    fake_endpoint = "https://Sanitized.sanitized_label.sanitized_location.geocatalog.spatio.azure.com"

    # Replace any real geocatalog hostname with our standardized fake value
    add_uri_regex_sanitizer(
        regex=r"https?://[a-zA-Z0-9\-\.]+\.geocatalog\.[a-zA-Z0-9\-\.]+\.azure\.com",
        value=fake_endpoint,
    )
    add_uri_regex_sanitizer(
        regex=r"https?://[a-zA-Z0-9\-\.]+\.geocatalog\.azure\.com", value=fake_endpoint
    )

    # In live mode, also add a string sanitizer for the real endpoint value
    # This ensures that the EnvironmentVariableLoader's auto-sanitizer uses our fake value
    if is_live():
        real_endpoint = os.environ.get("PLANETARYCOMPUTER_ENDPOINT", "")
        if real_endpoint:
            add_general_string_sanitizer(target=real_endpoint, value=fake_endpoint)

    # Sanitize full container URLs to preserve structure instead of just "Sanitized"
    # This replaces real container URLs with a fake URL that maintains the structure
    # Example: https://realaccount.blob.core.windows.net/realcontainer → https://SANITIZED.blob.core.windows.net/sample-container
    # IMPORTANT: This MUST come BEFORE the general storage account sanitizers below
    fake_container_url = "https://SANITIZED.blob.core.windows.net/sample-container"

    # Replace the default "Sanitized" value with our structured fake URL for container URLs
    # This uses body key sanitizer to target specific JSON fields
    from devtools_testutils import add_body_regex_sanitizer

    add_body_regex_sanitizer(
        regex=r'"containerUrl"\s*:\s*"Sanitized"',
        value=f'"containerUrl": "{fake_container_url}"',
    )
    add_body_regex_sanitizer(
        regex=r'"containerUri"\s*:\s*"Sanitized"',
        value=f'"containerUri": "{fake_container_url}"',
    )

    # In live mode, also replace the real container URL with our fake URL
    if is_live():
        # Try both environment variables that might contain container URLs
        for env_var in [
            "AZURE_INGESTION_CONTAINER_URI",
            "PLANETARYCOMPUTER_INGESTION_CONTAINER_URI",
        ]:
            real_container_url = os.environ.get(env_var, "")
            if real_container_url:
                add_general_string_sanitizer(
                    target=real_container_url, value=fake_container_url
                )

    # Sanitize storage account URLs WITH URL-encoded protocol prefix (e.g., in query parameters)
    # Matches: https%3A%2F%2Fcontosdatasa.blob.core.windows.net → https%3A%2F%2FSANITIZED.blob.core.windows.net
    # Note: %2F%2F is the URL-encoded form of // (two forward slashes)
    # Storage account names can only contain lowercase letters and numbers (no uppercase, no hyphens at start/end)
    add_uri_regex_sanitizer(
        regex=r"https%3A%2F%2F[a-z0-9]+\.blob\.core\.windows\.net",
        value="https%3A%2F%2FSANITIZED.blob.core.windows.net",
    )

    # Sanitize ALL blob storage URLs in response bodies (for asset URLs, error messages, etc.)
    # This catches URLs like: https://contosdatasa.blob.core.windows.net/container/path
    # → https://SANITIZED.blob.core.windows.net/container/path
    # Storage account names: 3-24 characters, lowercase letters and numbers only
    add_body_regex_sanitizer(
        regex=r"https://[a-z0-9]{3,24}\.blob\.core\.windows\.net",
        value="https://SANITIZED.blob.core.windows.net",
    )

    # Sanitize storage account URLs in URIs (normal URLs) - for URLs without container paths
    # Matches: contosdatasa.blob.core.windows.net → SANITIZED.blob.core.windows.net
    # Note: This is a fallback for URLs that don't include a container path
    add_uri_regex_sanitizer(
        regex=r"[a-z0-9]{3,24}\.blob\.core\.windows\.net(?!/)",
        value="SANITIZED.blob.core.windows.net",
    )

    # Sanitize storage account URLs in response bodies ONLY (not request bodies)
    # Use body regex sanitizer to replace storage account URLs in JSON response bodies
    # NOTE: This sanitizer applies to both request and response bodies during recording,
    # but we need it to avoid double-sanitization issues. The test proxy will apply
    # URI sanitizers to request bodies automatically, so we rely on those instead.
    # This body sanitizer is primarily for response bodies that aren't caught by URI sanitizers.
    # Commenting out to prevent double-sanitization in request bodies
    # add_body_regex_sanitizer(
    #     regex=r"[a-zA-Z0-9\-]+\.blob\.core\.windows\.net",
    #     value="SANITIZED_STORAGE.blob.core.windows.net"
    # )

    # Prevent double-sanitization of already-sanitized storage URLs in query parameters
    # When playback mode re-applies sanitizers, this ensures SANITIZED_STORAGE stays as-is
    add_uri_regex_sanitizer(
        regex=r"SANITIZED_[A-Z_]*STORAGE\.blob\.core\.windows\.net",
        value="SANITIZED_STORAGE.blob.core.windows.net",
    )

    # Sanitize operation IDs (UUIDs/GUIDs) in URLs
    # This matches patterns like /operations/8492e7c3-0531-44c9-b9e3-a6811c2b2078
    # and replaces them with a zero UUID to ensure consistent playback
    add_uri_regex_sanitizer(
        regex=r"/operations/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}",
        value="/operations/00000000-0000-0000-0000-000000000000",
    )

    # Sanitize ingestion source IDs (UUIDs/GUIDs) in URLs
    # This matches patterns like /ingestion-sources/89d8d34d-b7eb-491a-a8e9-49a154697ebb
    add_uri_regex_sanitizer(
        regex=r"/ingestion-sources/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}",
        value="/ingestion-sources/00000000-0000-0000-0000-000000000000",
    )

    # Sanitize ingestion IDs (UUIDs/GUIDs) in URLs
    # This matches patterns like /ingestions/8492e7c3-0531-44c9-b9e3-a6811c2b2078
    add_uri_regex_sanitizer(
        regex=r"/ingestions/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}",
        value="/ingestions/00000000-0000-0000-0000-000000000000",
    )

    # Sanitize run IDs (UUIDs/GUIDs) in URLs
    # This matches patterns like /runs/8492e7c3-0531-44c9-b9e3-a6811c2b2078
    add_uri_regex_sanitizer(
        regex=r"/runs/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}",
        value="/runs/00000000-0000-0000-0000-000000000000",
    )

    # Sanitize UUIDs in response bodies (JSON)
    # This ensures operation IDs, source IDs, etc. in response bodies are also sanitized
    add_body_regex_sanitizer(
        regex=r'"id"\s*:\s*"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}"',
        value='"id": "00000000-0000-0000-0000-000000000000"',
    )

    # Sanitize operation-location header for LRO polling
    # This header contains the polling URL with operation UUID that needs to be sanitized
    add_header_regex_sanitizer(
        key="operation-location",
        regex=r"/operations/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}",
        value="/operations/00000000-0000-0000-0000-000000000000",
    )

    # Sanitize Location header for resource creation
    # This header contains the created resource URL with UUID that needs to be sanitized
    add_header_regex_sanitizer(
        key="Location",
        regex=r"/ingestion-sources/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}",
        value="/ingestion-sources/00000000-0000-0000-0000-000000000000",
    )
    add_header_regex_sanitizer(
        key="Location",
        regex=r"/ingestions/[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}",
        value="/ingestions/00000000-0000-0000-0000-000000000000",
    )

    # Sanitize collection IDs with random hash suffixes
    # Pattern: naip-atl-bde3e846 -> naip-atl-00000000
    # The service appends a random 8-character hex hash to collection IDs at runtime
    # The env var may be "naip-atl" but the service will return "naip-atl-bde3e846"
    planetarycomputer_collection_id = os.environ.get(
        "PLANETARYCOMPUTER_COLLECTION_ID", "naip-atl"
    )

    # ALWAYS sanitize any collection ID with hash suffix pattern
    # We use the base collection name from env var (which may or may not already have a hash)
    import re

    collection_base_match = re.match(
        r"^(.+)-[a-f0-9]{8}$", planetarycomputer_collection_id
    )
    if collection_base_match:
        # Env var already has hash: use the base part
        collection_base = collection_base_match.group(1)
    else:
        # Env var has no hash: use it as-is (service will add hash at runtime)
        collection_base = planetarycomputer_collection_id

    # Sanitize collection IDs with hash: base-XXXXXXXX -> base-00000000
    add_uri_regex_sanitizer(
        regex=rf"{re.escape(collection_base)}-[a-f0-9]{{8}}",
        value=f"{collection_base}-00000000",
    )
    add_body_regex_sanitizer(
        regex=rf'"{re.escape(collection_base)}-[a-f0-9]{{8}}"',
        value=f'"{collection_base}-00000000"',
    )
    # else: no hash suffix, use collection ID as-is (like "naip-atl")
