Module livekit.plugins.inworld
Inworld plugin for LiveKit Agents
See https://docs.livekit.io/agents/integrations/tts/inworld/ for more information.
Classes
class TTS (*,
api_key: NotGivenOr[str] = NOT_GIVEN,
voice: NotGivenOr[str] = NOT_GIVEN,
model: NotGivenOr[str] = NOT_GIVEN,
encoding: NotGivenOr[Encoding] = NOT_GIVEN,
bit_rate: NotGivenOr[int] = NOT_GIVEN,
sample_rate: NotGivenOr[int] = NOT_GIVEN,
pitch: NotGivenOr[float] = NOT_GIVEN,
speaking_rate: NotGivenOr[float] = NOT_GIVEN,
temperature: NotGivenOr[float] = NOT_GIVEN,
base_url: str = 'https://api.inworld.ai/',
http_session: aiohttp.ClientSession | None = None)-
Expand source code
class TTS(tts.TTS): def __init__( self, *, api_key: NotGivenOr[str] = NOT_GIVEN, voice: NotGivenOr[str] = NOT_GIVEN, model: NotGivenOr[str] = NOT_GIVEN, encoding: NotGivenOr[Encoding] = NOT_GIVEN, bit_rate: NotGivenOr[int] = NOT_GIVEN, sample_rate: NotGivenOr[int] = NOT_GIVEN, pitch: NotGivenOr[float] = NOT_GIVEN, speaking_rate: NotGivenOr[float] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN, base_url: str = DEFAULT_URL, http_session: aiohttp.ClientSession | None = None, ) -> None: """ Create a new instance of Inworld TTS. Args: api_key (str, optional): The Inworld API key. If not provided, it will be read from the INWORLD_API_KEY environment variable. voice (str, optional): The voice to use. Defaults to "Ashley". model (str, optional): The Inworld model to use. Defaults to "inworld-tts-1". encoding (str, optional): The encoding to use. Defaults to "MP3". bit_rate (int, optional): Bits per second of the audio. Defaults to 64000. sample_rate (int, optional): The audio sample rate in Hz. Defaults to 24000. pitch (float, optional): The pitch of the voice. Defaults to 0.0. speaking_rate (float, optional): The speed of the voice. Defaults to 1.0. temperature (float, optional): Determines the degree of randomness when sampling audio tokens to generate the response. Defaults to 0.8. base_url (str, optional): The base URL for the Inworld TTS API. Defaults to "https://api.inworld.ai/". http_session (aiohttp.ClientSession, optional): The HTTP session to use. """ if not utils.is_given(sample_rate): sample_rate = DEFAULT_SAMPLE_RATE super().__init__( capabilities=tts.TTSCapabilities(streaming=False), sample_rate=sample_rate, num_channels=NUM_CHANNELS, ) api_key = api_key or os.getenv("INWORLD_API_KEY", "") if not api_key: raise ValueError("Inworld API key required. Set INWORLD_API_KEY or provide api_key.") self._authorization = f"Basic {api_key}" self._base_url = base_url self._session = http_session self._opts = _TTSOptions( voice=voice if utils.is_given(voice) else DEFAULT_VOICE, model=model if utils.is_given(model) else DEFAULT_MODEL, encoding=encoding if utils.is_given(encoding) else DEFAULT_ENCODING, bit_rate=bit_rate if utils.is_given(bit_rate) else DEFAULT_BIT_RATE, sample_rate=sample_rate if utils.is_given(sample_rate) else DEFAULT_SAMPLE_RATE, pitch=pitch, speaking_rate=speaking_rate, temperature=temperature, ) def update_options( self, *, voice: NotGivenOr[str] = NOT_GIVEN, model: NotGivenOr[str] = NOT_GIVEN, encoding: NotGivenOr[Encoding] = NOT_GIVEN, bit_rate: NotGivenOr[int] = NOT_GIVEN, sample_rate: NotGivenOr[int] = NOT_GIVEN, pitch: NotGivenOr[float] = NOT_GIVEN, speaking_rate: NotGivenOr[float] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN, ) -> None: """ Update the TTS configuration options. Args: voice (str, optional): The voice to use. model (str, optional): The Inworld model to use. encoding (str, optional): The encoding to use. bit_rate (int, optional): Bits per second of the audio. sample_rate (int, optional): The audio sample rate in Hz. pitch (float, optional): The pitch of the voice. speaking_rate (float, optional): The speed of the voice. temperature (float, optional): Determines the degree of randomness when sampling audio tokens to generate the response. Defaults to 0.8. """ if utils.is_given(voice): self._opts.voice = voice if utils.is_given(model): self._opts.model = model if utils.is_given(encoding): self._opts.encoding = encoding if utils.is_given(bit_rate): self._opts.bit_rate = bit_rate if utils.is_given(sample_rate): self._opts.sample_rate = sample_rate if utils.is_given(pitch): self._opts.pitch = pitch if utils.is_given(speaking_rate): self._opts.speaking_rate = speaking_rate if utils.is_given(temperature): self._opts.temperature = temperature def _ensure_session(self) -> aiohttp.ClientSession: if not self._session: self._session = utils.http_context.http_session() return self._session def synthesize( self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> tts.ChunkedStream: return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
Helper class that provides a standard way to create an ABC using inheritance.
Create a new instance of Inworld TTS.
Args
api_key
:str
, optional- The Inworld API key. If not provided, it will be read from the INWORLD_API_KEY environment variable.
voice
:str
, optional- The voice to use. Defaults to "Ashley".
model
:str
, optional- The Inworld model to use. Defaults to "inworld-tts-1".
encoding
:str
, optional- The encoding to use. Defaults to "MP3".
bit_rate
:int
, optional- Bits per second of the audio. Defaults to 64000.
sample_rate
:int
, optional- The audio sample rate in Hz. Defaults to 24000.
pitch
:float
, optional- The pitch of the voice. Defaults to 0.0.
speaking_rate
:float
, optional- The speed of the voice. Defaults to 1.0.
temperature
:float
, optional- Determines the degree of randomness when sampling audio tokens to generate the response. Defaults to 0.8.
base_url
:str
, optional- The base URL for the Inworld TTS API. Defaults to "https://api.inworld.ai/".
http_session
:aiohttp.ClientSession
, optional- The HTTP session to use.
Ancestors
- livekit.agents.tts.tts.TTS
- abc.ABC
- EventEmitter
- typing.Generic
Methods
def synthesize(self,
text: str,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.agents.tts.tts.ChunkedStream-
Expand source code
def synthesize( self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> tts.ChunkedStream: return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
def update_options(self,
*,
voice: NotGivenOr[str] = NOT_GIVEN,
model: NotGivenOr[str] = NOT_GIVEN,
encoding: NotGivenOr[Encoding] = NOT_GIVEN,
bit_rate: NotGivenOr[int] = NOT_GIVEN,
sample_rate: NotGivenOr[int] = NOT_GIVEN,
pitch: NotGivenOr[float] = NOT_GIVEN,
speaking_rate: NotGivenOr[float] = NOT_GIVEN,
temperature: NotGivenOr[float] = NOT_GIVEN) ‑> None-
Expand source code
def update_options( self, *, voice: NotGivenOr[str] = NOT_GIVEN, model: NotGivenOr[str] = NOT_GIVEN, encoding: NotGivenOr[Encoding] = NOT_GIVEN, bit_rate: NotGivenOr[int] = NOT_GIVEN, sample_rate: NotGivenOr[int] = NOT_GIVEN, pitch: NotGivenOr[float] = NOT_GIVEN, speaking_rate: NotGivenOr[float] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN, ) -> None: """ Update the TTS configuration options. Args: voice (str, optional): The voice to use. model (str, optional): The Inworld model to use. encoding (str, optional): The encoding to use. bit_rate (int, optional): Bits per second of the audio. sample_rate (int, optional): The audio sample rate in Hz. pitch (float, optional): The pitch of the voice. speaking_rate (float, optional): The speed of the voice. temperature (float, optional): Determines the degree of randomness when sampling audio tokens to generate the response. Defaults to 0.8. """ if utils.is_given(voice): self._opts.voice = voice if utils.is_given(model): self._opts.model = model if utils.is_given(encoding): self._opts.encoding = encoding if utils.is_given(bit_rate): self._opts.bit_rate = bit_rate if utils.is_given(sample_rate): self._opts.sample_rate = sample_rate if utils.is_given(pitch): self._opts.pitch = pitch if utils.is_given(speaking_rate): self._opts.speaking_rate = speaking_rate if utils.is_given(temperature): self._opts.temperature = temperature
Update the TTS configuration options.
Args
voice
:str
, optional- The voice to use.
model
:str
, optional- The Inworld model to use.
encoding
:str
, optional- The encoding to use.
bit_rate
:int
, optional- Bits per second of the audio.
sample_rate
:int
, optional- The audio sample rate in Hz.
pitch
:float
, optional- The pitch of the voice.
speaking_rate
:float
, optional- The speed of the voice.
temperature
:float
, optional- Determines the degree of randomness when sampling audio tokens to generate the response. Defaults to 0.8.
Inherited members