Module `livekit.plugins.speechify`

Speechify plugin for LiveKit Agents

See https://docs.livekit.io/agents/integrations/tts/speechify/ for more information.

Classes

class TTS (*, voice_id: NotGivenOr[str] = 'jack', encoding: NotGivenOr[TTSEncoding] = NOT_GIVEN, model: NotGivenOr[TTSModels] = NOT_GIVEN, base_url: NotGivenOr[str] = NOT_GIVEN, api_key: NotGivenOr[str] = NOT_GIVEN, language: NotGivenOr[str] = NOT_GIVEN, loudness_normalization: NotGivenOr[bool] = NOT_GIVEN, text_normalization: NotGivenOr[bool] = NOT_GIVEN, http_session: aiohttp.ClientSession | None = None, follow_redirects: bool = True)

Expand source code

class TTS(tts.TTS):
    def __init__(
        self,
        *,
        voice_id: NotGivenOr[str] = DEFAULT_VOICE_ID,
        encoding: NotGivenOr[TTSEncoding] = NOT_GIVEN,
        model: NotGivenOr[TTSModels] = NOT_GIVEN,
        base_url: NotGivenOr[str] = NOT_GIVEN,
        api_key: NotGivenOr[str] = NOT_GIVEN,
        language: NotGivenOr[str] = NOT_GIVEN,
        loudness_normalization: NotGivenOr[bool] = NOT_GIVEN,
        text_normalization: NotGivenOr[bool] = NOT_GIVEN,
        http_session: aiohttp.ClientSession | None = None,
        follow_redirects: bool = True,
    ) -> None:
        """
        Create a new instance of Speechify TTS.

        Args:
            voice_id (NotGivenOr[str]): Voice ID. Defaults to `cliff`.
            encoding (NotGivenOr[TTSEncoding]): Audio encoding to use. Optional. Defaults to `wav_48000`.
            model (NotGivenOr[TTSModels]): TTS model to use. Optional.
            base_url (NotGivenOr[str]): Custom base URL for the API. Optional.
            api_key (NotGivenOr[str]): Speechify API key. Can be set via argument or `SPEECHIFY_API_KEY` environment variable
            language (NotGivenOr[str]): Language code for the TTS model. Optional.
            loudness_normalization (NotGivenOr[bool]): Whether to normalize the loudness of the audio. Optional.
            text_normalization (NotGivenOr[bool]): Whether to normalize the text. Optional.
            http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional.
            follow_redirects (bool): Whether to follow redirects in HTTP requests. Defaults to True.
        """  # noqa: E501

        if not is_given(encoding):
            encoding = _DefaultEncoding

        super().__init__(
            capabilities=tts.TTSCapabilities(
                streaming=False,
            ),
            sample_rate=_sample_rate_from_encoding(encoding),
            num_channels=1,
        )

        speechify_token = api_key if is_given(api_key) else os.environ.get("SPEECHIFY_API_KEY")
        if not (speechify_token):
            raise ValueError(
                "Speechify API key is required, either as argument or set SPEECHIFY_API_KEY environment variable"  # noqa: E501
            )

        self._opts = _TTSOptions(
            model=model,
            voice_id=voice_id or DEFAULT_VOICE_ID,
            language=language,
            base_url=base_url if is_given(base_url) else API_BASE_URL_V1,
            token=speechify_token,
            follow_redirects=follow_redirects,
            encoding=encoding,
            sample_rate=_sample_rate_from_encoding(encoding),
            loudness_normalization=loudness_normalization,
            text_normalization=text_normalization,
        )
        self._session = http_session

    @property
    def model(self) -> str:
        return self._opts.model if is_given(self._opts.model) else "unknown"

    @property
    def provider(self) -> str:
        return "Speechify"

    def _ensure_session(self) -> aiohttp.ClientSession:
        if not self._session:
            self._session = utils.http_context.http_session()

        return self._session

    async def list_voices(self) -> list[Voice]:
        async with self._ensure_session().get(
            f"{self._opts.base_url}/voices", headers=_get_headers(self._opts.token)
        ) as resp:
            return await resp.json()  # type: ignore

    def update_options(
        self,
        *,
        voice_id: NotGivenOr[str] = NOT_GIVEN,
        model: NotGivenOr[TTSModels] = NOT_GIVEN,
        language: NotGivenOr[str] = NOT_GIVEN,
        loudness_normalization: NotGivenOr[bool] = NOT_GIVEN,
        text_normalization: NotGivenOr[bool] = NOT_GIVEN,
    ) -> None:
        """
        Args:
            voice_id (NotGivenOr[str]): Voice ID.
            model (NotGivenOr[TTSModels | str]): TTS model to use.
            language (NotGivenOr[str]): Language code for the TTS model.
        """
        if is_given(model):
            self._opts.model = cast(TTSModels, model)
        if is_given(voice_id):
            self._opts.voice_id = voice_id
        if is_given(language):
            self._opts.language = language
        if is_given(loudness_normalization):
            self._opts.loudness_normalization = loudness_normalization
        if is_given(text_normalization):
            self._opts.text_normalization = text_normalization

    def synthesize(
        self,
        text: str,
        *,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> ChunkedStream:
        return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)

Helper class that provides a standard way to create an ABC using inheritance.

Create a new instance of Speechify TTS.

Args

voice_id : NotGivenOr[str]: Voice ID. Defaults to cliff.
encoding : NotGivenOr[TTSEncoding]: Audio encoding to use. Optional. Defaults to wav_48000.
model : NotGivenOr[TTSModels]: TTS model to use. Optional.
base_url : NotGivenOr[str]: Custom base URL for the API. Optional.
api_key : NotGivenOr[str]: Speechify API key. Can be set via argument or SPEECHIFY_API_KEY environment variable
language : NotGivenOr[str]: Language code for the TTS model. Optional.
loudness_normalization : NotGivenOr[bool]: Whether to normalize the loudness of the audio. Optional.
text_normalization : NotGivenOr[bool]: Whether to normalize the text. Optional.
http_session : aiohttp.ClientSession | None: Custom HTTP session for API requests. Optional.
follow_redirects : bool: Whether to follow redirects in HTTP requests. Defaults to True.

Ancestors

livekit.agents.tts.tts.TTS
abc.ABC
EventEmitter
typing.Generic

Instance variables

prop model : str

Expand source code

@property
def model(self) -> str:
    return self._opts.model if is_given(self._opts.model) else "unknown"

Get the model name/identifier for this TTS instance.

Returns

The model name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their model information.

prop provider : str

Expand source code

@property
def provider(self) -> str:
    return "Speechify"

Get the provider name/identifier for this TTS instance.

Returns

The provider name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their provider information.

Methods

async def list_voices(self) ‑> list[livekit.plugins.speechify.tts.Voice]

Expand source code

async def list_voices(self) -> list[Voice]:
    async with self._ensure_session().get(
        f"{self._opts.base_url}/voices", headers=_get_headers(self._opts.token)
    ) as resp:
        return await resp.json()  # type: ignore

def synthesize(self, text: str, *, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.speechify.tts.ChunkedStream

Expand source code

def synthesize(
    self,
    text: str,
    *,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> ChunkedStream:
    return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)

def update_options(self, *, voice_id: NotGivenOr[str] = NOT_GIVEN, model: NotGivenOr[TTSModels] = NOT_GIVEN, language: NotGivenOr[str] = NOT_GIVEN, loudness_normalization: NotGivenOr[bool] = NOT_GIVEN, text_normalization: NotGivenOr[bool] = NOT_GIVEN) ‑> None

Expand source code

def update_options(
    self,
    *,
    voice_id: NotGivenOr[str] = NOT_GIVEN,
    model: NotGivenOr[TTSModels] = NOT_GIVEN,
    language: NotGivenOr[str] = NOT_GIVEN,
    loudness_normalization: NotGivenOr[bool] = NOT_GIVEN,
    text_normalization: NotGivenOr[bool] = NOT_GIVEN,
) -> None:
    """
    Args:
        voice_id (NotGivenOr[str]): Voice ID.
        model (NotGivenOr[TTSModels | str]): TTS model to use.
        language (NotGivenOr[str]): Language code for the TTS model.
    """
    if is_given(model):
        self._opts.model = cast(TTSModels, model)
    if is_given(voice_id):
        self._opts.voice_id = voice_id
    if is_given(language):
        self._opts.language = language
    if is_given(loudness_normalization):
        self._opts.loudness_normalization = loudness_normalization
    if is_given(text_normalization):
        self._opts.text_normalization = text_normalization

Args

voice_id : NotGivenOr[str]: Voice ID.
model : NotGivenOr[TTSModels | str]: TTS model to use.
language : NotGivenOr[str]: Language code for the TTS model.

Inherited members

EventEmitter:
- emit
- off
- on
- once

class Voice (id: str, type: VoiceType, display_name: str, gender: Gender, avatar_image: str | None, models: list[TTSModels], locale: str)

Expand source code

@dataclass
class Voice:
    id: str
    type: VoiceType
    display_name: str
    gender: Gender
    avatar_image: str | None
    models: list[TTSModels]
    locale: str

Voice(id: 'str', type: 'VoiceType', display_name: 'str', gender: 'Gender', avatar_image: 'str | None', models: 'list[TTSModels]', locale: 'str')

Instance variables

var avatar_image : str | None
var display_name : str
var gender : Literal['male', 'female', 'neutral']
var id : str
var locale : str
var models : list[typing.Literal['simba-english', 'simba-multilingual']]
var type : Literal['shared', 'personal']