Module livekit.plugins.inworld

Inworld plugin for LiveKit Agents

See https://docs.livekit.io/agents/integrations/tts/inworld/ for more information.

Classes

class TTS (*,
api_key: NotGivenOr[str] = NOT_GIVEN,
voice: NotGivenOr[str] = NOT_GIVEN,
model: NotGivenOr[str] = NOT_GIVEN,
encoding: NotGivenOr[Encoding] = NOT_GIVEN,
bit_rate: NotGivenOr[int] = NOT_GIVEN,
sample_rate: NotGivenOr[int] = NOT_GIVEN,
pitch: NotGivenOr[float] = NOT_GIVEN,
speaking_rate: NotGivenOr[float] = NOT_GIVEN,
temperature: NotGivenOr[float] = NOT_GIVEN,
base_url: str = 'https://api.inworld.ai/',
http_session: aiohttp.ClientSession | None = None)
Expand source code
class TTS(tts.TTS):
    def __init__(
        self,
        *,
        api_key: NotGivenOr[str] = NOT_GIVEN,
        voice: NotGivenOr[str] = NOT_GIVEN,
        model: NotGivenOr[str] = NOT_GIVEN,
        encoding: NotGivenOr[Encoding] = NOT_GIVEN,
        bit_rate: NotGivenOr[int] = NOT_GIVEN,
        sample_rate: NotGivenOr[int] = NOT_GIVEN,
        pitch: NotGivenOr[float] = NOT_GIVEN,
        speaking_rate: NotGivenOr[float] = NOT_GIVEN,
        temperature: NotGivenOr[float] = NOT_GIVEN,
        base_url: str = DEFAULT_URL,
        http_session: aiohttp.ClientSession | None = None,
    ) -> None:
        """
        Create a new instance of Inworld TTS.

        Args:
            api_key (str, optional): The Inworld API key.
                If not provided, it will be read from the INWORLD_API_KEY environment variable.
            voice (str, optional): The voice to use. Defaults to "Ashley".
            model (str, optional): The Inworld model to use. Defaults to "inworld-tts-1".
            encoding (str, optional): The encoding to use. Defaults to "MP3".
            bit_rate (int, optional): Bits per second of the audio. Defaults to 64000.
            sample_rate (int, optional): The audio sample rate in Hz. Defaults to 24000.
            pitch (float, optional): The pitch of the voice. Defaults to 0.0.
            speaking_rate (float, optional): The speed of the voice. Defaults to 1.0.
            temperature (float, optional): Determines the degree of randomness when sampling audio
                tokens to generate the response. Defaults to 0.8.
            base_url (str, optional): The base URL for the Inworld TTS API.
                Defaults to "https://api.inworld.ai/".
            http_session (aiohttp.ClientSession, optional): The HTTP session to use.
        """
        if not utils.is_given(sample_rate):
            sample_rate = DEFAULT_SAMPLE_RATE
        super().__init__(
            capabilities=tts.TTSCapabilities(streaming=False),
            sample_rate=sample_rate,
            num_channels=NUM_CHANNELS,
        )

        api_key = api_key or os.getenv("INWORLD_API_KEY", "")
        if not api_key:
            raise ValueError("Inworld API key required. Set INWORLD_API_KEY or provide api_key.")

        self._authorization = f"Basic {api_key}"
        self._base_url = base_url
        self._session = http_session

        self._opts = _TTSOptions(
            voice=voice if utils.is_given(voice) else DEFAULT_VOICE,
            model=model if utils.is_given(model) else DEFAULT_MODEL,
            encoding=encoding if utils.is_given(encoding) else DEFAULT_ENCODING,
            bit_rate=bit_rate if utils.is_given(bit_rate) else DEFAULT_BIT_RATE,
            sample_rate=sample_rate if utils.is_given(sample_rate) else DEFAULT_SAMPLE_RATE,
            pitch=pitch,
            speaking_rate=speaking_rate,
            temperature=temperature,
        )

    def update_options(
        self,
        *,
        voice: NotGivenOr[str] = NOT_GIVEN,
        model: NotGivenOr[str] = NOT_GIVEN,
        encoding: NotGivenOr[Encoding] = NOT_GIVEN,
        bit_rate: NotGivenOr[int] = NOT_GIVEN,
        sample_rate: NotGivenOr[int] = NOT_GIVEN,
        pitch: NotGivenOr[float] = NOT_GIVEN,
        speaking_rate: NotGivenOr[float] = NOT_GIVEN,
        temperature: NotGivenOr[float] = NOT_GIVEN,
    ) -> None:
        """
        Update the TTS configuration options.

        Args:
            voice (str, optional): The voice to use.
            model (str, optional): The Inworld model to use.
            encoding (str, optional): The encoding to use.
            bit_rate (int, optional): Bits per second of the audio.
            sample_rate (int, optional): The audio sample rate in Hz.
            pitch (float, optional): The pitch of the voice.
            speaking_rate (float, optional): The speed of the voice.
            temperature (float, optional): Determines the degree of randomness when sampling audio
                tokens to generate the response. Defaults to 0.8.
        """
        if utils.is_given(voice):
            self._opts.voice = voice
        if utils.is_given(model):
            self._opts.model = model
        if utils.is_given(encoding):
            self._opts.encoding = encoding
        if utils.is_given(bit_rate):
            self._opts.bit_rate = bit_rate
        if utils.is_given(sample_rate):
            self._opts.sample_rate = sample_rate
        if utils.is_given(pitch):
            self._opts.pitch = pitch
        if utils.is_given(speaking_rate):
            self._opts.speaking_rate = speaking_rate
        if utils.is_given(temperature):
            self._opts.temperature = temperature

    def _ensure_session(self) -> aiohttp.ClientSession:
        if not self._session:
            self._session = utils.http_context.http_session()

        return self._session

    def synthesize(
        self,
        text: str,
        *,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> tts.ChunkedStream:
        return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)

Helper class that provides a standard way to create an ABC using inheritance.

Create a new instance of Inworld TTS.

Args

api_key : str, optional
The Inworld API key. If not provided, it will be read from the INWORLD_API_KEY environment variable.
voice : str, optional
The voice to use. Defaults to "Ashley".
model : str, optional
The Inworld model to use. Defaults to "inworld-tts-1".
encoding : str, optional
The encoding to use. Defaults to "MP3".
bit_rate : int, optional
Bits per second of the audio. Defaults to 64000.
sample_rate : int, optional
The audio sample rate in Hz. Defaults to 24000.
pitch : float, optional
The pitch of the voice. Defaults to 0.0.
speaking_rate : float, optional
The speed of the voice. Defaults to 1.0.
temperature : float, optional
Determines the degree of randomness when sampling audio tokens to generate the response. Defaults to 0.8.
base_url : str, optional
The base URL for the Inworld TTS API. Defaults to "https://api.inworld.ai/".
http_session : aiohttp.ClientSession, optional
The HTTP session to use.

Ancestors

  • livekit.agents.tts.tts.TTS
  • abc.ABC
  • EventEmitter
  • typing.Generic

Methods

def synthesize(self,
text: str,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.agents.tts.tts.ChunkedStream
Expand source code
def synthesize(
    self,
    text: str,
    *,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> tts.ChunkedStream:
    return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
def update_options(self,
*,
voice: NotGivenOr[str] = NOT_GIVEN,
model: NotGivenOr[str] = NOT_GIVEN,
encoding: NotGivenOr[Encoding] = NOT_GIVEN,
bit_rate: NotGivenOr[int] = NOT_GIVEN,
sample_rate: NotGivenOr[int] = NOT_GIVEN,
pitch: NotGivenOr[float] = NOT_GIVEN,
speaking_rate: NotGivenOr[float] = NOT_GIVEN,
temperature: NotGivenOr[float] = NOT_GIVEN) ‑> None
Expand source code
def update_options(
    self,
    *,
    voice: NotGivenOr[str] = NOT_GIVEN,
    model: NotGivenOr[str] = NOT_GIVEN,
    encoding: NotGivenOr[Encoding] = NOT_GIVEN,
    bit_rate: NotGivenOr[int] = NOT_GIVEN,
    sample_rate: NotGivenOr[int] = NOT_GIVEN,
    pitch: NotGivenOr[float] = NOT_GIVEN,
    speaking_rate: NotGivenOr[float] = NOT_GIVEN,
    temperature: NotGivenOr[float] = NOT_GIVEN,
) -> None:
    """
    Update the TTS configuration options.

    Args:
        voice (str, optional): The voice to use.
        model (str, optional): The Inworld model to use.
        encoding (str, optional): The encoding to use.
        bit_rate (int, optional): Bits per second of the audio.
        sample_rate (int, optional): The audio sample rate in Hz.
        pitch (float, optional): The pitch of the voice.
        speaking_rate (float, optional): The speed of the voice.
        temperature (float, optional): Determines the degree of randomness when sampling audio
            tokens to generate the response. Defaults to 0.8.
    """
    if utils.is_given(voice):
        self._opts.voice = voice
    if utils.is_given(model):
        self._opts.model = model
    if utils.is_given(encoding):
        self._opts.encoding = encoding
    if utils.is_given(bit_rate):
        self._opts.bit_rate = bit_rate
    if utils.is_given(sample_rate):
        self._opts.sample_rate = sample_rate
    if utils.is_given(pitch):
        self._opts.pitch = pitch
    if utils.is_given(speaking_rate):
        self._opts.speaking_rate = speaking_rate
    if utils.is_given(temperature):
        self._opts.temperature = temperature

Update the TTS configuration options.

Args

voice : str, optional
The voice to use.
model : str, optional
The Inworld model to use.
encoding : str, optional
The encoding to use.
bit_rate : int, optional
Bits per second of the audio.
sample_rate : int, optional
The audio sample rate in Hz.
pitch : float, optional
The pitch of the voice.
speaking_rate : float, optional
The speed of the voice.
temperature : float, optional
Determines the degree of randomness when sampling audio tokens to generate the response. Defaults to 0.8.

Inherited members