File: update_voice_data.py

package info (click to toggle)
hass-nabucasa 1.5.1-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 832 kB
sloc: python: 10,774; sh: 6; makefile: 3
file content (96 lines) | stat: -rwxr-xr-x 2,515 bytes
parent folder | download | duplicates (2)
#!/usr/bin/python3
"""Update the voice data."""

# ruff: noqa: T201

import asyncio
from pathlib import Path
import subprocess

import aiohttp

voice_data_path = Path(__file__).parent.parent / "hass_nabucasa/voice_data.py"

REGION = "westus"
LIST_VOICES_URL = (
    f"https://{REGION}.tts.speech.microsoft.com/cognitiveservices/voices/list"
)


def main() -> None:
    """Run script."""
    token = None

    while not token:
        token = input("Please enter your Azure token: ").strip()

    raw_data = asyncio.run(get_data(token))

    data = {}
    for voice in raw_data:
        if voice["Status"] != "GA":
            continue
        locale = voice["Locale"]
        voice_id = voice["ShortName"][len(locale) + 1 :]
        voice_name = voice["DisplayName"]
        if voice_name.endswith("Neural"):
            voice_name = voice_name[:-7].strip()

        # Skip variants
        if ":" in voice_id or "Multilingual" in voice_id:
            continue

        voice_info = {
            "name": voice_name,
        }
        if style_list := voice.get("StyleList"):
            voice_info = {
                "name": voice_name,
                "variants": style_list,
            }
        else:
            voice_info = voice_name

        data.setdefault(locale, {})[voice_id] = voice_info

    # Sort the data
    for locale, info in data.items():
        data[locale] = dict(
            sorted(
                info.items(),
                key=lambda x: x[1]["name"] if isinstance(x[1], dict) else x[1],
            )
        )
    data = dict(sorted(data.items()))

    parts = [
        '"""',
        "Available voices for TTS.",
        "",
        "Automatically generated file, do not edit this file directly.",
        "Run python3 -m scripts/update_voice_data.py to update this file.",
        '"""',
        "",
        f"TTS_VOICES: dict[str, dict[str, dict | str]] = {data}",
    ]
    voice_data_path.write_text("\n".join(parts))
    subprocess.run(  # noqa: S603
        ["ruff", "format", voice_data_path],  # noqa: S607
        check=True,
        stdout=subprocess.DEVNULL,
    )
    print("Updated voice_data.py with new voice data.")


async def get_data(token: str) -> dict:
    """Gather data."""
    async with aiohttp.ClientSession() as session:
        voices = await session.get(
            LIST_VOICES_URL, headers={"Authorization": f"Bearer {token}"}
        )
        voices.raise_for_status()
        return await voices.json()


if __name__ == "__main__":
    main()