File: update_voice_data.py

package info (click to toggle)
hass-nabucasa 1.5.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 832 kB
  • sloc: python: 10,774; sh: 6; makefile: 3
file content (96 lines) | stat: -rwxr-xr-x 2,515 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/python3
"""Update the voice data."""

# ruff: noqa: T201

import asyncio
from pathlib import Path
import subprocess

import aiohttp

voice_data_path = Path(__file__).parent.parent / "hass_nabucasa/voice_data.py"

REGION = "westus"
LIST_VOICES_URL = (
    f"https://{REGION}.tts.speech.microsoft.com/cognitiveservices/voices/list"
)


def main() -> None:
    """Run script."""
    token = None

    while not token:
        token = input("Please enter your Azure token: ").strip()

    raw_data = asyncio.run(get_data(token))

    data = {}
    for voice in raw_data:
        if voice["Status"] != "GA":
            continue
        locale = voice["Locale"]
        voice_id = voice["ShortName"][len(locale) + 1 :]
        voice_name = voice["DisplayName"]
        if voice_name.endswith("Neural"):
            voice_name = voice_name[:-7].strip()

        # Skip variants
        if ":" in voice_id or "Multilingual" in voice_id:
            continue

        voice_info = {
            "name": voice_name,
        }
        if style_list := voice.get("StyleList"):
            voice_info = {
                "name": voice_name,
                "variants": style_list,
            }
        else:
            voice_info = voice_name

        data.setdefault(locale, {})[voice_id] = voice_info

    # Sort the data
    for locale, info in data.items():
        data[locale] = dict(
            sorted(
                info.items(),
                key=lambda x: x[1]["name"] if isinstance(x[1], dict) else x[1],
            )
        )
    data = dict(sorted(data.items()))

    parts = [
        '"""',
        "Available voices for TTS.",
        "",
        "Automatically generated file, do not edit this file directly.",
        "Run python3 -m scripts/update_voice_data.py to update this file.",
        '"""',
        "",
        f"TTS_VOICES: dict[str, dict[str, dict | str]] = {data}",
    ]
    voice_data_path.write_text("\n".join(parts))
    subprocess.run(  # noqa: S603
        ["ruff", "format", voice_data_path],  # noqa: S607
        check=True,
        stdout=subprocess.DEVNULL,
    )
    print("Updated voice_data.py with new voice data.")


async def get_data(token: str) -> dict:
    """Gather data."""
    async with aiohttp.ClientSession() as session:
        voices = await session.get(
            LIST_VOICES_URL, headers={"Authorization": f"Bearer {token}"}
        )
        voices.raise_for_status()
        return await voices.json()


if __name__ == "__main__":
    main()