Module livekit.plugins.inworld

Inworld plugin for LiveKit Agents

See https://docs.livekit.io/agents/integrations/tts/inworld/ for more information.

Classes

class ChunkedStream (*,
tts: TTS,
input_text: str,
conn_options: APIConnectOptions)
Expand source code
class ChunkedStream(tts.ChunkedStream):
    def __init__(self, *, tts: TTS, input_text: str, conn_options: APIConnectOptions) -> None:
        super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
        self._tts: TTS = tts
        self._opts = replace(tts._opts)

    async def _run(self, output_emitter: tts.AudioEmitter) -> None:
        try:
            audio_config: dict[str, Any] = {
                "audioEncoding": self._opts.encoding,
                "bitrate": self._opts.bit_rate,
                "sampleRateHertz": self._opts.sample_rate,
                "temperature": self._opts.temperature,
                "speakingRate": self._opts.speaking_rate,
            }

            body_params: dict[str, Any] = {
                "text": self._input_text,
                "voiceId": self._opts.voice,
                "modelId": self._opts.model,
                "audioConfig": audio_config,
            }
            if utils.is_given(self._opts.timestamp_type):
                body_params["timestampType"] = self._opts.timestamp_type
            if utils.is_given(self._opts.text_normalization):
                body_params["applyTextNormalization"] = self._opts.text_normalization
            body_params["timestampTransportStrategy"] = self._opts.timestamp_transport_strategy

            x_request_id = str(uuid.uuid4())
            async with self._tts._ensure_session().post(
                urljoin(self._tts._base_url, "/tts/v1/voice:stream"),
                headers={
                    "Authorization": self._tts._authorization,
                    "X-User-Agent": USER_AGENT,
                    "X-Request-Id": x_request_id,
                },
                json=body_params,
                timeout=aiohttp.ClientTimeout(sock_connect=self._conn_options.timeout),
                # large read_bufsize to avoid `ValueError: Chunk too big`
                read_bufsize=10 * 1024 * 1024,
            ) as resp:
                resp.raise_for_status()

                request_id = utils.shortuuid()
                output_emitter.initialize(
                    request_id=request_id,
                    sample_rate=self._opts.sample_rate,
                    num_channels=NUM_CHANNELS,
                    mime_type=self._opts.mime_type,
                )

                async for raw_line in resp.content:
                    line = raw_line.strip()
                    if not line:
                        continue

                    try:
                        data = json.loads(line)
                    except json.JSONDecodeError:
                        logger.warning("failed to parse Inworld response line: %s", line)
                        continue

                    if result := data.get("result"):
                        # Handle timestamp info if present
                        if timestamp_info := result.get("timestampInfo"):
                            timed_strings = _parse_timestamp_info(timestamp_info)
                            if timed_strings:
                                output_emitter.push_timed_transcript(timed_strings)

                        if audio_content := result.get("audioContent"):
                            output_emitter.push(base64.b64decode(audio_content))
                            output_emitter.flush()
                    elif error := data.get("error"):
                        raise APIStatusError(
                            message=error.get("message"),
                            status_code=error.get("code"),
                            request_id=x_request_id,
                            body=None,
                        )
        except asyncio.TimeoutError:
            raise APITimeoutError() from None
        except aiohttp.ClientResponseError as e:
            raise APIStatusError(
                message=e.message, status_code=e.status, request_id=x_request_id, body=None
            ) from None
        except Exception as e:
            raise APIConnectionError() from e

Used by the non-streamed synthesize API, some providers support chunked http responses

Ancestors

  • livekit.agents.tts.tts.ChunkedStream
  • abc.ABC
class SynthesizeStream (*,
tts: TTS,
conn_options: APIConnectOptions)
Expand source code
class SynthesizeStream(tts.SynthesizeStream):
    def __init__(self, *, tts: TTS, conn_options: APIConnectOptions):
        super().__init__(tts=tts, conn_options=conn_options)
        self._tts: TTS = tts
        self._opts = replace(tts._opts)

    async def _run(self, output_emitter: tts.AudioEmitter) -> None:
        request_id = utils.shortuuid()
        sent_tokenizer_stream = self._tts._sentence_tokenizer.stream()

        output_emitter.initialize(
            request_id=request_id,
            sample_rate=self._opts.sample_rate,
            num_channels=NUM_CHANNELS,
            mime_type=self._opts.mime_type,
            stream=True,
        )

        pool = await self._tts._get_pool()
        context_id, waiter, connection = await pool.acquire_context(
            emitter=output_emitter,
            opts=self._opts,
            timeout=self._conn_options.timeout,
        )

        async def _input_task() -> None:
            async for data in self._input_ch:
                if isinstance(data, self._FlushSentinel):
                    sent_tokenizer_stream.flush()
                    continue
                sent_tokenizer_stream.push_text(data)
            sent_tokenizer_stream.end_input()

        async def _send_task() -> None:
            async for ev in sent_tokenizer_stream:
                text = ev.token
                # Chunk to stay within Inworld's 1000 char limit
                for i in range(0, len(text), 1000):
                    connection.send_text(context_id, text[i : i + 1000])
                    self._mark_started()
            connection.flush_context(context_id)
            connection.close_context(context_id)

        tasks = [
            asyncio.create_task(_input_task()),
            asyncio.create_task(_send_task()),
        ]

        try:
            await asyncio.wait_for(waiter, timeout=self._conn_options.timeout + 60)
        except asyncio.TimeoutError:
            connection.close_context(context_id)
            raise APITimeoutError() from None
        except asyncio.CancelledError:
            connection.close_context(context_id)
            raise
        except APIError:
            raise
        except Exception as e:
            logger.error("Inworld stream error", extra={"context_id": context_id, "error": e})
            connection.close_context(context_id)
            raise APIConnectionError() from e
        finally:
            await utils.aio.gracefully_cancel(*tasks)
            await sent_tokenizer_stream.aclose()
            output_emitter.end_input()

Helper class that provides a standard way to create an ABC using inheritance.

Ancestors

  • livekit.agents.tts.tts.SynthesizeStream
  • abc.ABC
class TTS (*,
api_key: NotGivenOr[str] = NOT_GIVEN,
voice: NotGivenOr[str] = NOT_GIVEN,
model: NotGivenOr[str] = NOT_GIVEN,
encoding: NotGivenOr[Encoding] = NOT_GIVEN,
bit_rate: NotGivenOr[int] = NOT_GIVEN,
sample_rate: NotGivenOr[int] = NOT_GIVEN,
speaking_rate: NotGivenOr[float] = NOT_GIVEN,
temperature: NotGivenOr[float] = NOT_GIVEN,
timestamp_type: NotGivenOr[TimestampType] = NOT_GIVEN,
text_normalization: NotGivenOr[TextNormalization] = NOT_GIVEN,
timestamp_transport_strategy: NotGivenOr[TimestampTransportStrategy] = NOT_GIVEN,
buffer_char_threshold: NotGivenOr[int] = NOT_GIVEN,
max_buffer_delay_ms: NotGivenOr[int] = NOT_GIVEN,
base_url: str = 'https://api.inworld.ai/',
ws_url: str = 'wss://api.inworld.ai/',
http_session: aiohttp.ClientSession | None = None,
tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN,
retain_format: NotGivenOr[bool] = NOT_GIVEN,
max_connections: int = 20,
idle_connection_timeout: float = 300.0)
Expand source code
class TTS(tts.TTS):
    def __init__(
        self,
        *,
        api_key: NotGivenOr[str] = NOT_GIVEN,
        voice: NotGivenOr[str] = NOT_GIVEN,
        model: NotGivenOr[str] = NOT_GIVEN,
        encoding: NotGivenOr[Encoding] = NOT_GIVEN,
        bit_rate: NotGivenOr[int] = NOT_GIVEN,
        sample_rate: NotGivenOr[int] = NOT_GIVEN,
        speaking_rate: NotGivenOr[float] = NOT_GIVEN,
        temperature: NotGivenOr[float] = NOT_GIVEN,
        timestamp_type: NotGivenOr[TimestampType] = NOT_GIVEN,
        text_normalization: NotGivenOr[TextNormalization] = NOT_GIVEN,
        timestamp_transport_strategy: NotGivenOr[TimestampTransportStrategy] = NOT_GIVEN,
        buffer_char_threshold: NotGivenOr[int] = NOT_GIVEN,
        max_buffer_delay_ms: NotGivenOr[int] = NOT_GIVEN,
        base_url: str = DEFAULT_URL,
        ws_url: str = DEFAULT_WS_URL,
        http_session: aiohttp.ClientSession | None = None,
        tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN,
        retain_format: NotGivenOr[bool] = NOT_GIVEN,
        max_connections: int = DEFAULT_MAX_CONNECTIONS,
        idle_connection_timeout: float = DEFAULT_IDLE_CONNECTION_TIMEOUT,
    ) -> None:
        """
        Create a new instance of Inworld TTS.

        Args:
            api_key (str, optional): The Inworld API key.
                If not provided, it will be read from the INWORLD_API_KEY environment variable.
            voice (str, optional): The voice to use. Defaults to "Ashley".
            model (str, optional): The Inworld model to use. Defaults to "inworld-tts-1".
            encoding (str, optional): The encoding to use. Defaults to "OGG_OPUS".
            bit_rate (int, optional): Bits per second of the audio. Defaults to 64000.
            sample_rate (int, optional): The audio sample rate in Hz. Defaults to 48000.
            speaking_rate (float, optional): The speed of the voice, in the range [0.5, 1.5].
                Defaults to 1.0.
            temperature (float, optional): Determines the degree of randomness when sampling audio
                tokens to generate the response. Range [0, 2]. Defaults to 1.1.
            timestamp_type (str, optional): Controls timestamp metadata returned with the audio.
                Use "WORD" for word-level timestamps or "CHARACTER" for character-level.
                Useful for karaoke-style captions, word highlighting, and lipsync.
            text_normalization (str, optional): Controls text normalization. When "ON", numbers,
                dates, and abbreviations are expanded (e.g., "Dr." -> "Doctor"). When "OFF",
                text is read exactly as written. Defaults to automatic.
            timestamp_transport_strategy (str, optional): Controls how timestamp info is
                transported relative to audio data. "SYNC" returns timestamps in the same
                message as audio data. "ASYNC" allows timestamps to return in trailing
                messages after the audio data. Defaults to "ASYNC".
            buffer_char_threshold (int, optional): For streaming, the minimum number of characters
                in the buffer that automatically triggers audio generation. Defaults to 1000.
            max_buffer_delay_ms (int, optional): For streaming, the maximum time in ms to buffer
                before starting generation. Defaults to 3000.
            base_url (str, optional): The base URL for the Inworld TTS API.
                Defaults to "https://api.inworld.ai/".
            ws_url (str, optional): The WebSocket URL for streaming TTS.
                Defaults to "wss://api.inworld.ai/".
            http_session (aiohttp.ClientSession, optional): The HTTP session to use.
            tokenizer (tokenize.SentenceTokenizer, optional): The tokenizer to use for streaming.
                Defaults to `livekit.agents.tokenize.blingfire.SentenceTokenizer`.
            retain_format (bool, optional): Whether to retain the format of the text when tokenizing.
                Defaults to True.
            max_connections (int, optional): Maximum number of concurrent WebSocket connections.
                Each connection supports up to 5 concurrent synthesis streams. Defaults to 20.
            idle_connection_timeout (float, optional): Time in seconds after which idle connections
                are closed. Defaults to 300 (5 minutes).
        """
        if not is_given(sample_rate):
            sample_rate = DEFAULT_SAMPLE_RATE
        super().__init__(
            capabilities=tts.TTSCapabilities(
                streaming=True,
                aligned_transcript=is_given(timestamp_type)
                and timestamp_type != "TIMESTAMP_TYPE_UNSPECIFIED",
            ),
            sample_rate=sample_rate,
            num_channels=NUM_CHANNELS,
        )

        key = api_key if is_given(api_key) else os.getenv("INWORLD_API_KEY")
        if not key:
            raise ValueError(
                "Inworld API key is required, either as argument or set"
                " INWORLD_API_KEY environment variable"
            )

        self._authorization = f"Basic {key}"
        self._base_url = base_url
        self._ws_url = ws_url
        self._session = http_session

        self._opts = _TTSOptions(
            voice=voice if is_given(voice) else DEFAULT_VOICE,
            model=model if is_given(model) else DEFAULT_MODEL,
            encoding=encoding if is_given(encoding) else DEFAULT_ENCODING,
            bit_rate=bit_rate if is_given(bit_rate) else DEFAULT_BIT_RATE,
            sample_rate=sample_rate if is_given(sample_rate) else DEFAULT_SAMPLE_RATE,
            speaking_rate=speaking_rate if is_given(speaking_rate) else DEFAULT_SPEAKING_RATE,
            temperature=temperature if is_given(temperature) else DEFAULT_TEMPERATURE,
            timestamp_type=timestamp_type,
            text_normalization=text_normalization,
            timestamp_transport_strategy=cast(
                TimestampTransportStrategy, timestamp_transport_strategy
            )
            if is_given(timestamp_transport_strategy)
            else DEFAULT_TIMESTAMP_TRANSPORT_STRATEGY,
            buffer_char_threshold=buffer_char_threshold
            if is_given(buffer_char_threshold)
            else DEFAULT_BUFFER_CHAR_THRESHOLD,
            max_buffer_delay_ms=max_buffer_delay_ms
            if is_given(max_buffer_delay_ms)
            else DEFAULT_MAX_BUFFER_DELAY_MS,
        )

        self._max_connections = max_connections
        self._idle_connection_timeout = idle_connection_timeout
        self._pool: _ConnectionPool | None = None
        self._pool_lock = asyncio.Lock()
        self._streams = weakref.WeakSet[SynthesizeStream]()
        self._sentence_tokenizer = (
            tokenizer
            if is_given(tokenizer)
            else tokenize.blingfire.SentenceTokenizer(
                retain_format=retain_format if is_given(retain_format) else True
            )
        )

    @property
    def model(self) -> str:
        return self._opts.model

    @property
    def provider(self) -> str:
        return "Inworld"

    async def _get_pool(self) -> _ConnectionPool:
        """Get the connection pool, creating if needed."""
        async with self._pool_lock:
            if self._pool is None or self._pool._closed:
                self._pool = _ConnectionPool(
                    session=self._ensure_session(),
                    ws_url=self._ws_url,
                    authorization=self._authorization,
                    max_connections=self._max_connections,
                    idle_timeout=self._idle_connection_timeout,
                )
            return self._pool

    def update_options(
        self,
        *,
        voice: NotGivenOr[str] = NOT_GIVEN,
        model: NotGivenOr[str] = NOT_GIVEN,
        encoding: NotGivenOr[Encoding] = NOT_GIVEN,
        bit_rate: NotGivenOr[int] = NOT_GIVEN,
        sample_rate: NotGivenOr[int] = NOT_GIVEN,
        speaking_rate: NotGivenOr[float] = NOT_GIVEN,
        temperature: NotGivenOr[float] = NOT_GIVEN,
        timestamp_type: NotGivenOr[TimestampType] = NOT_GIVEN,
        text_normalization: NotGivenOr[TextNormalization] = NOT_GIVEN,
        timestamp_transport_strategy: NotGivenOr[TimestampTransportStrategy] = NOT_GIVEN,
        buffer_char_threshold: NotGivenOr[int] = NOT_GIVEN,
        max_buffer_delay_ms: NotGivenOr[int] = NOT_GIVEN,
    ) -> None:
        """
        Update the TTS configuration options.

        Args:
            voice (str, optional): The voice to use.
            model (str, optional): The Inworld model to use.
            encoding (str, optional): The encoding to use.
            bit_rate (int, optional): Bits per second of the audio.
            sample_rate (int, optional): The audio sample rate in Hz.
            speaking_rate (float, optional): The speed of the voice.
            temperature (float, optional): Determines the degree of randomness when sampling audio
                tokens to generate the response.
            timestamp_type (str, optional): Controls timestamp metadata ("WORD" or "CHARACTER").
            text_normalization (str, optional): Controls text normalization ("ON" or "OFF").
            timestamp_transport_strategy (str, optional): Controls timestamp transport strategy
                ("SYNC" or "ASYNC").
            buffer_char_threshold (int, optional): For streaming, min characters before triggering.
            max_buffer_delay_ms (int, optional): For streaming, max time to buffer.
        """
        if is_given(voice):
            self._opts.voice = voice
        if is_given(model):
            self._opts.model = model
        if is_given(encoding):
            self._opts.encoding = encoding
        if is_given(bit_rate):
            self._opts.bit_rate = bit_rate
        if is_given(sample_rate):
            self._opts.sample_rate = sample_rate
        if is_given(speaking_rate):
            self._opts.speaking_rate = speaking_rate
        if is_given(temperature):
            self._opts.temperature = temperature
        if is_given(timestamp_type):
            self._opts.timestamp_type = cast(TimestampType, timestamp_type)
        if is_given(text_normalization):
            self._opts.text_normalization = cast(TextNormalization, text_normalization)
        if is_given(timestamp_transport_strategy):
            self._opts.timestamp_transport_strategy = cast(
                TimestampTransportStrategy, timestamp_transport_strategy
            )
        if is_given(buffer_char_threshold):
            self._opts.buffer_char_threshold = buffer_char_threshold
        if is_given(max_buffer_delay_ms):
            self._opts.max_buffer_delay_ms = max_buffer_delay_ms

    def _ensure_session(self) -> aiohttp.ClientSession:
        if not self._session:
            self._session = utils.http_context.http_session()

        return self._session

    def prewarm(self) -> None:
        asyncio.create_task(self._prewarm_impl())

    async def _prewarm_impl(self) -> None:
        # Just ensure the pool is created - first acquire will establish a connection
        await self._get_pool()

    def synthesize(
        self,
        text: str,
        *,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> tts.ChunkedStream:
        return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)

    def stream(
        self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
    ) -> SynthesizeStream:
        stream = SynthesizeStream(tts=self, conn_options=conn_options)
        self._streams.add(stream)
        return stream

    async def aclose(self) -> None:
        for stream in list(self._streams):
            await stream.aclose()

        self._streams.clear()
        if self._pool:
            await self._pool.aclose()
            self._pool = None

    async def list_voices(self, language: str | None = None) -> list[dict[str, Any]]:
        """
        List all available voices in the workspace associated with the API key.

        Args:
            language (str, optional): ISO 639-1 language code to filter voices (e.g., 'en', 'es', 'fr').
        """
        url = urljoin(self._base_url, "tts/v1/voices")
        params = {}
        if language:
            params["filter"] = f"language={language}"

        async with self._ensure_session().get(
            url,
            headers={
                "Authorization": self._authorization,
                "X-User-Agent": USER_AGENT,
                "X-Request-Id": str(uuid.uuid4()),
            },
            params=params,
        ) as resp:
            if not resp.ok:
                error_body = await resp.json()
                raise APIStatusError(
                    message=error_body.get("message"),
                    status_code=resp.status,
                    request_id=None,
                    body=None,
                )

            data = await resp.json()
            return cast(list[dict[str, Any]], data.get("voices", []))

Helper class that provides a standard way to create an ABC using inheritance.

Create a new instance of Inworld TTS.

Args

api_key : str, optional
The Inworld API key. If not provided, it will be read from the INWORLD_API_KEY environment variable.
voice : str, optional
The voice to use. Defaults to "Ashley".
model : str, optional
The Inworld model to use. Defaults to "inworld-tts-1".
encoding : str, optional
The encoding to use. Defaults to "OGG_OPUS".
bit_rate : int, optional
Bits per second of the audio. Defaults to 64000.
sample_rate : int, optional
The audio sample rate in Hz. Defaults to 48000.
speaking_rate : float, optional
The speed of the voice, in the range [0.5, 1.5]. Defaults to 1.0.
temperature : float, optional
Determines the degree of randomness when sampling audio tokens to generate the response. Range [0, 2]. Defaults to 1.1.
timestamp_type : str, optional
Controls timestamp metadata returned with the audio. Use "WORD" for word-level timestamps or "CHARACTER" for character-level. Useful for karaoke-style captions, word highlighting, and lipsync.
text_normalization : str, optional
Controls text normalization. When "ON", numbers, dates, and abbreviations are expanded (e.g., "Dr." -> "Doctor"). When "OFF", text is read exactly as written. Defaults to automatic.
timestamp_transport_strategy : str, optional
Controls how timestamp info is transported relative to audio data. "SYNC" returns timestamps in the same message as audio data. "ASYNC" allows timestamps to return in trailing messages after the audio data. Defaults to "ASYNC".
buffer_char_threshold : int, optional
For streaming, the minimum number of characters in the buffer that automatically triggers audio generation. Defaults to 1000.
max_buffer_delay_ms : int, optional
For streaming, the maximum time in ms to buffer before starting generation. Defaults to 3000.
base_url : str, optional
The base URL for the Inworld TTS API. Defaults to "https://api.inworld.ai/".
ws_url : str, optional
The WebSocket URL for streaming TTS. Defaults to "wss://api.inworld.ai/".
http_session : aiohttp.ClientSession, optional
The HTTP session to use.
tokenizer : tokenize.SentenceTokenizer, optional
The tokenizer to use for streaming. Defaults to SentenceTokenizer.
retain_format : bool, optional
Whether to retain the format of the text when tokenizing. Defaults to True.
max_connections : int, optional
Maximum number of concurrent WebSocket connections. Each connection supports up to 5 concurrent synthesis streams. Defaults to 20.
idle_connection_timeout : float, optional
Time in seconds after which idle connections are closed. Defaults to 300 (5 minutes).

Ancestors

  • livekit.agents.tts.tts.TTS
  • abc.ABC
  • EventEmitter
  • typing.Generic

Instance variables

prop model : str
Expand source code
@property
def model(self) -> str:
    return self._opts.model

Get the model name/identifier for this TTS instance.

Returns

The model name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their model information.

prop provider : str
Expand source code
@property
def provider(self) -> str:
    return "Inworld"

Get the provider name/identifier for this TTS instance.

Returns

The provider name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their provider information.

Methods

async def aclose(self) ‑> None
Expand source code
async def aclose(self) -> None:
    for stream in list(self._streams):
        await stream.aclose()

    self._streams.clear()
    if self._pool:
        await self._pool.aclose()
        self._pool = None
async def list_voices(self, language: str | None = None) ‑> list[dict[str, typing.Any]]
Expand source code
async def list_voices(self, language: str | None = None) -> list[dict[str, Any]]:
    """
    List all available voices in the workspace associated with the API key.

    Args:
        language (str, optional): ISO 639-1 language code to filter voices (e.g., 'en', 'es', 'fr').
    """
    url = urljoin(self._base_url, "tts/v1/voices")
    params = {}
    if language:
        params["filter"] = f"language={language}"

    async with self._ensure_session().get(
        url,
        headers={
            "Authorization": self._authorization,
            "X-User-Agent": USER_AGENT,
            "X-Request-Id": str(uuid.uuid4()),
        },
        params=params,
    ) as resp:
        if not resp.ok:
            error_body = await resp.json()
            raise APIStatusError(
                message=error_body.get("message"),
                status_code=resp.status,
                request_id=None,
                body=None,
            )

        data = await resp.json()
        return cast(list[dict[str, Any]], data.get("voices", []))

List all available voices in the workspace associated with the API key.

Args

language : str, optional
ISO 639-1 language code to filter voices (e.g., 'en', 'es', 'fr').
def prewarm(self) ‑> None
Expand source code
def prewarm(self) -> None:
    asyncio.create_task(self._prewarm_impl())

Pre-warm connection to the TTS service

def stream(self,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.inworld.tts.SynthesizeStream
Expand source code
def stream(
    self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
) -> SynthesizeStream:
    stream = SynthesizeStream(tts=self, conn_options=conn_options)
    self._streams.add(stream)
    return stream
def synthesize(self,
text: str,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.agents.tts.tts.ChunkedStream
Expand source code
def synthesize(
    self,
    text: str,
    *,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> tts.ChunkedStream:
    return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
def update_options(self,
*,
voice: NotGivenOr[str] = NOT_GIVEN,
model: NotGivenOr[str] = NOT_GIVEN,
encoding: NotGivenOr[Encoding] = NOT_GIVEN,
bit_rate: NotGivenOr[int] = NOT_GIVEN,
sample_rate: NotGivenOr[int] = NOT_GIVEN,
speaking_rate: NotGivenOr[float] = NOT_GIVEN,
temperature: NotGivenOr[float] = NOT_GIVEN,
timestamp_type: NotGivenOr[TimestampType] = NOT_GIVEN,
text_normalization: NotGivenOr[TextNormalization] = NOT_GIVEN,
timestamp_transport_strategy: NotGivenOr[TimestampTransportStrategy] = NOT_GIVEN,
buffer_char_threshold: NotGivenOr[int] = NOT_GIVEN,
max_buffer_delay_ms: NotGivenOr[int] = NOT_GIVEN) ‑> None
Expand source code
def update_options(
    self,
    *,
    voice: NotGivenOr[str] = NOT_GIVEN,
    model: NotGivenOr[str] = NOT_GIVEN,
    encoding: NotGivenOr[Encoding] = NOT_GIVEN,
    bit_rate: NotGivenOr[int] = NOT_GIVEN,
    sample_rate: NotGivenOr[int] = NOT_GIVEN,
    speaking_rate: NotGivenOr[float] = NOT_GIVEN,
    temperature: NotGivenOr[float] = NOT_GIVEN,
    timestamp_type: NotGivenOr[TimestampType] = NOT_GIVEN,
    text_normalization: NotGivenOr[TextNormalization] = NOT_GIVEN,
    timestamp_transport_strategy: NotGivenOr[TimestampTransportStrategy] = NOT_GIVEN,
    buffer_char_threshold: NotGivenOr[int] = NOT_GIVEN,
    max_buffer_delay_ms: NotGivenOr[int] = NOT_GIVEN,
) -> None:
    """
    Update the TTS configuration options.

    Args:
        voice (str, optional): The voice to use.
        model (str, optional): The Inworld model to use.
        encoding (str, optional): The encoding to use.
        bit_rate (int, optional): Bits per second of the audio.
        sample_rate (int, optional): The audio sample rate in Hz.
        speaking_rate (float, optional): The speed of the voice.
        temperature (float, optional): Determines the degree of randomness when sampling audio
            tokens to generate the response.
        timestamp_type (str, optional): Controls timestamp metadata ("WORD" or "CHARACTER").
        text_normalization (str, optional): Controls text normalization ("ON" or "OFF").
        timestamp_transport_strategy (str, optional): Controls timestamp transport strategy
            ("SYNC" or "ASYNC").
        buffer_char_threshold (int, optional): For streaming, min characters before triggering.
        max_buffer_delay_ms (int, optional): For streaming, max time to buffer.
    """
    if is_given(voice):
        self._opts.voice = voice
    if is_given(model):
        self._opts.model = model
    if is_given(encoding):
        self._opts.encoding = encoding
    if is_given(bit_rate):
        self._opts.bit_rate = bit_rate
    if is_given(sample_rate):
        self._opts.sample_rate = sample_rate
    if is_given(speaking_rate):
        self._opts.speaking_rate = speaking_rate
    if is_given(temperature):
        self._opts.temperature = temperature
    if is_given(timestamp_type):
        self._opts.timestamp_type = cast(TimestampType, timestamp_type)
    if is_given(text_normalization):
        self._opts.text_normalization = cast(TextNormalization, text_normalization)
    if is_given(timestamp_transport_strategy):
        self._opts.timestamp_transport_strategy = cast(
            TimestampTransportStrategy, timestamp_transport_strategy
        )
    if is_given(buffer_char_threshold):
        self._opts.buffer_char_threshold = buffer_char_threshold
    if is_given(max_buffer_delay_ms):
        self._opts.max_buffer_delay_ms = max_buffer_delay_ms

Update the TTS configuration options.

Args

voice : str, optional
The voice to use.
model : str, optional
The Inworld model to use.
encoding : str, optional
The encoding to use.
bit_rate : int, optional
Bits per second of the audio.
sample_rate : int, optional
The audio sample rate in Hz.
speaking_rate : float, optional
The speed of the voice.
temperature : float, optional
Determines the degree of randomness when sampling audio tokens to generate the response.
timestamp_type : str, optional
Controls timestamp metadata ("WORD" or "CHARACTER").
text_normalization : str, optional
Controls text normalization ("ON" or "OFF").
timestamp_transport_strategy : str, optional
Controls timestamp transport strategy ("SYNC" or "ASYNC").
buffer_char_threshold : int, optional
For streaming, min characters before triggering.
max_buffer_delay_ms : int, optional
For streaming, max time to buffer.

Inherited members