Source code for banterbot.managers.azure_neural_voice_manager

import logging
import os
import re
from itertools import chain
from typing import Optional, Union

import azure.cognitiveservices.speech as speechsdk

from banterbot.data.enums import EnvVar
from banterbot.models.azure_neural_voice_profile import AzureNeuralVoiceProfile



[docs]
class AzureNeuralVoiceManager:
    """
    Management utility for loading Microsoft Azure Cognitive Services Neural Voice models from the Speech SDK. Only one
    instance per name is permitted to exist at a time, and loading occurs lazily, meaning that when the voices are
    downloaded, they are subsequently stored in cache as instances of class `AzureNeuralVoice`, and all future calls
    refer to these same cached instances.
    """

    _data = {}

    @classmethod
    def _download(cls) -> None:
        """
        Download all the Neural Voices to cache as instances of class `AzureNeuralVoice` using the `get_voices_async`
        method from the Microsoft Azure Cognitive Services Speech SDK class `SpeechSynthesizer`.
        """
        speech_config = speechsdk.SpeechConfig(
            subscription=os.environ.get(EnvVar.AZURE_SPEECH_KEY.value),
            region=os.environ.get(EnvVar.AZURE_SPEECH_REGION.value),
        )
        result_future = speechsdk.SpeechSynthesizer(speech_config=speech_config).get_voices_async()
        synthesis_voices_result = result_future.get()

        # Regex pattern that extracts language, country, region, and a key from each voice's `short_name` attribute,
        pattern = re.compile(r"([a-z]+)\-([A-Z]+)\-(?:(\w+)\-)?(\w+?)Neural")

        for voice in synthesis_voices_result.voices:
            # Specify that we only want to store Neural Voices.
            if voice.voice_type == speechsdk.SynthesisVoiceType.OnlineNeural:
                match = re.fullmatch(pattern=pattern, string=voice.short_name)

                if not match:
                    logging.debug(f"Unable to parse Azure Cognitive Services Neural Voice: {voice.short_name}.")
                else:
                    language = match[1]
                    country = match[2]
                    region = match[3]
                    name = match[4]

                    if name in cls._data:
                        key = name + "-" + region
                    else:
                        key = name

                    cls._data[key.lower()] = AzureNeuralVoiceProfile(
                        country=country,
                        description=voice.name,
                        gender=voice.gender,
                        language=language,
                        locale=voice.locale,
                        name=name,
                        short_name=voice.short_name.lower(),
                        style_list=voice.style_list if voice.style_list else None,
                        region=region,
                    )


[docs]
    @classmethod
    def data(cls) -> dict[str, AzureNeuralVoiceProfile]:
        """
        Access the data dictionary, downloading it first using the `_download` classmethod if necessary.

        Returns:
            dict[str, AzureNeuralVoiceProfile]: A dict containing the downloaded `AzureNeuralVoiceProfile` instances.
        """
        if not cls._data:
            cls._download()

        return cls._data


    @classmethod
    def _preprocess_search_arg(cls, arg: Optional[Union[list[str], str]] = None) -> Optional[Union[list[str], str]]:
        """
        Prepare an arbitrary argument given to the `search` method by lowering its value(s).

        Args:
            arg (Optional[Union[list[str], str]]): A string, list of strings, or None value.

        Returns:
            Optional[Union[list[str], str]]: The same input but lowered, if applicable.
        """
        if not arg:
            return None
        elif isinstance(arg, str):
            return [arg.lower()]
        elif isinstance(arg, (list, tuple, set)):
            return sorted([i.lower() for i in arg])


[docs]
    @classmethod
    def search(
        cls,
        gender: Optional[Union[list[str], str]] = None,
        language: Optional[Union[list[str], str]] = None,
        country: Optional[Union[list[str], str]] = None,
        region: Optional[Union[list[str], str]] = None,
        style: Optional[Union[list[str], str]] = None,
    ) -> list[AzureNeuralVoiceProfile]:
        """
        Search through all the available Microsoft Azure Cognitive Services Neural Voice models using any combination
        of the provided arguments to get a list of relevant `AzureNeuralVoiceProfile` instances. For information on
        searchable languages, countries, and regions, visit:

        https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts#supported-languages

        Args:
            gender (Optional[Union[list[str], str]]): Can take the values MALE, FEMALE, and/or UNKNOWN.
            language (Optional[Union[list[str], str]]): Can take any language abbreviations (e.g., en, fr, etc.)
            country (Optional[Union[list[str], str]]): Can take any country abbreviations (e.g., US, FR, etc.)
            region (Optional[Union[list[str], str]]): Can take any region names (e.g., shaanxi, sichuan, etc.)

        Returns:
            list[AzureNeuralVoiceProfile]: A list of `AzureNeuralVoiceProfile` instances.
        """
        search_results = []

        # Convert any provided string values to lowercase if applicable.
        gender = cls._preprocess_search_arg(arg=gender)
        language = cls._preprocess_search_arg(arg=language)
        country = cls._preprocess_search_arg(arg=country)
        region = cls._preprocess_search_arg(arg=region)
        style = cls._preprocess_search_arg(arg=style)

        for voice in cls.data().values():
            # Convert the voice attributes to lowercase if applicable.
            voice_gender = voice.gender.name.lower() if voice.gender.name else None
            voice_language = voice.language.lower() if voice.language else None
            voice_country = voice.country.lower() if voice.country else None
            voice_region = voice.region.lower() if voice.region else None
            voice_styles = sorted([s.lower() for s in voice.style_list]) if voice.style_list else []

            # Prepare a set of search conditions.
            condition_gender = gender is None or voice_gender in gender
            condition_language = language is None or voice_language in language
            condition_country = country is None or voice_country in country
            condition_region = region is None or voice_region in region
            condition_styles = style is None or all(s in voice_styles for s in style)

            # If all conditions are met, add the voice to the search results.
            if all([condition_gender, condition_language, condition_country, condition_region, condition_styles]):
                search_results.append(voice)

        return search_results



[docs]
    @classmethod
    def list_countries(cls) -> list[str]:
        """
        Returns a list of two-character country codes (e.g., us, fr, etc.)

        Returns:
            list[str]: A list of country codes.
        """
        voices = cls.data()
        return sorted(list(set(voice.country for voice in voices.values() if voice.country)))



[docs]
    @classmethod
    def list_genders(cls) -> list[str]:
        """
        Returns a list of available voice genders

        Returns:
            list[str]: A list of genders.
        """
        voices = cls.data()
        return sorted(list(set(voice.gender.name for voice in voices.values() if voice.gender.name)))



[docs]
    @classmethod
    def list_languages(cls) -> list[str]:
        """
        Returns a list of two-character language codes (e.g., en, fr, etc.)

        Returns:
            list[str]: A list of language codes.
        """
        voices = cls.data()
        return sorted(list(set(voice.language for voice in voices.values() if voice.language)))



[docs]
    @classmethod
    def list_locales(cls) -> list[str]:
        """
        Returns a list of locales, which are language codes followed by countries, in some cases followed by a region,
        (e.g., en-US fr-FR, etc.).

        Returns:
            list[str]: A list of locales.
        """
        voices = cls.data()
        return sorted(list(set(voice.locale for voice in voices.values() if voice.locale)))



[docs]
    @classmethod
    def list_regions(cls) -> list[str]:
        """
        Returns a list of regions (e.g., sichuan, shandong, etc.)

        Returns:
            list[str]: A list of regions.
        """
        voices = cls.data()
        return sorted(list(set(voice.region for voice in voices.values() if voice.region)))



[docs]
    @classmethod
    def list_styles(cls) -> list[str]:
        """
        Returns a list of styles (e.g., sichuan, shandong, etc.)

        Returns:
            list[str]: A list of styles.
        """
        voices = cls.data()
        return sorted(list(set(chain.from_iterable(voice.style_list for voice in voices.values() if voice.style_list))))



[docs]
    @classmethod
    def load(cls, name: str) -> AzureNeuralVoiceProfile:
        """
        Retrieve or initialize an `AzureNeuralVoice` instance by a name in the Neural Voices resource JSON.

        Args:
            name (str): The name of the voice profile.

        Returns:
            AzureNeuralVoice: An `AzureNeuralVoice` instance loaded with data from the specified name.

        Raises:
            KeyError: If the specified name is not found in the resource file defined by `config.azure_neural_voices`.
        """
        voices = cls.data()
        if (name := name.lower()) not in voices:
            message = (
                "BanterBot was unable to locate a Microsoft Azure Cognitive Services Neural Voice model named: "
                f"`{name}`. Use AzureNeuralVoiceManager.search(gender, language, country, region) to search for a "
                "specified gender, language, country, and/or region. These arguments can be strings, lists of strings, "
                "or None."
            )
            raise KeyError(message)

        return voices[name]