Module livekit.plugins.lmnt
LMNT plugin for LiveKit Agents
See https://docs.livekit.io/agents/integrations/tts/lmnt/ for more information.
Classes
class ChunkedStream (*,
tts: TTS,
input_text: str,
conn_options: APIConnectOptions)-
Expand source code
class ChunkedStream(tts.ChunkedStream): """Synthesize text to speech in chunks.""" def __init__( self, *, tts: TTS, input_text: str, conn_options: APIConnectOptions, ) -> None: super().__init__(tts=tts, input_text=input_text, conn_options=conn_options) self._tts = tts self._opts = replace(tts._opts) async def _run(self, output_emitter: tts.AudioEmitter) -> None: data = { "text": self._input_text, "voice": self._opts.voice, "language": self._opts.language, "sample_rate": self._opts.sample_rate, "model": self._opts.model, "format": self._opts.format, "temperature": self._opts.temperature, "top_p": self._opts.top_p, } try: async with self._tts._ensure_session().post( LMNT_BASE_URL, headers={ "Content-Type": "application/json", "X-API-Key": self._opts.api_key, }, json=data, timeout=aiohttp.ClientTimeout( total=30, sock_connect=self._conn_options.timeout, ), ) as resp: resp.raise_for_status() output_emitter.initialize( request_id=utils.shortuuid(), sample_rate=self._opts.sample_rate, num_channels=NUM_CHANNELS, mime_type=MIME_TYPE[self._opts.format], ) async for data, _ in resp.content.iter_chunks(): output_emitter.push(data) output_emitter.flush() except asyncio.TimeoutError: raise APITimeoutError() from None except aiohttp.ClientResponseError as e: raise APIStatusError( message=e.message, status_code=e.status, request_id=None, body=None, ) from None except Exception as e: raise APIConnectionError() from e
Synthesize text to speech in chunks.
Ancestors
- livekit.agents.tts.tts.ChunkedStream
- abc.ABC
class TTS (*,
model: LMNTModels = 'blizzard',
voice: str = 'leah',
language: LMNTLanguages | None = None,
format: LMNTAudioFormats = 'mp3',
sample_rate: LMNTSampleRate = 24000,
api_key: str | None = None,
http_session: aiohttp.ClientSession | None = None,
temperature: float = 1.0,
top_p: float = 0.8)-
Expand source code
class TTS(tts.TTS): """ Text-to-Speech (TTS) plugin for LMNT. """ def __init__( self, *, model: LMNTModels = "blizzard", voice: str = "leah", language: LMNTLanguages | None = None, format: LMNTAudioFormats = "mp3", sample_rate: LMNTSampleRate = 24000, api_key: str | None = None, http_session: aiohttp.ClientSession | None = None, temperature: float = 1.0, top_p: float = 0.8, ) -> None: """ Create a new instance of LMNT TTS. See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes Args: model: The model to use for synthesis. Default is "blizzard". Learn more at: https://docs.lmnt.com/guides/models voice: The voice ID to use. Default is "leah". Find more amazing voices at https://app.lmnt.com/ language: Two-letter ISO 639-1 language code. Defaults to None. See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-language format: Output file format. Options: aac, mp3, mulaw, raw, wav. Default is "mp3". sample_rate: Output sample rate in Hz. Default is 24000. See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-sample-rate api_key: API key for authentication. Defaults to the LMNT_API_KEY environment variable. http_session: Optional aiohttp ClientSession. A new session is created if not provided. temperature: Influences how expressive and emotionally varied the speech becomes. Lower values (like 0.3) create more neutral, consistent speaking styles. Higher values (like 1.0) allow for more dynamic emotional range and speaking styles. Default is 1.0. top_p: Controls the stability of the generated speech. A lower value (like 0.3) produces more consistent, reliable speech. A higher value (like 0.9) gives more flexibility in how words are spoken, but might occasionally produce unusual intonations or speech patterns. Default is 0.8. """ super().__init__( capabilities=tts.TTSCapabilities(streaming=False), sample_rate=sample_rate, num_channels=NUM_CHANNELS, ) api_key = api_key or os.environ.get("LMNT_API_KEY") if not api_key: raise ValueError( "LMNT API key is required. " "Set it via environment variable or pass it as an argument." ) if not language: language = "auto" if model == "blizzard" else "en" self._opts = _TTSOptions( model=model, sample_rate=sample_rate, num_channels=NUM_CHANNELS, language=language, voice=voice, format=format, api_key=api_key, temperature=temperature, top_p=top_p, ) self._session = http_session def synthesize( self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> ChunkedStream: return ChunkedStream( tts=self, input_text=text, conn_options=conn_options, ) def update_options( self, *, model: NotGivenOr[LMNTModels] = NOT_GIVEN, voice: NotGivenOr[str] = NOT_GIVEN, language: NotGivenOr[LMNTLanguages] = NOT_GIVEN, format: NotGivenOr[LMNTAudioFormats] = NOT_GIVEN, sample_rate: NotGivenOr[LMNTSampleRate] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN, top_p: NotGivenOr[float] = NOT_GIVEN, ) -> None: """ Update the TTS options. Args: model: The model to use for synthesis. Learn more at: https://docs.lmnt.com/guides/models voice: The voice ID to update. language: Two-letter ISO 639-1 code. See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-language format: Audio output format. Options: aac, mp3, mulaw, raw, wav. sample_rate: Output sample rate in Hz. temperature: Controls the expressiveness of the speech. A number between 0.0 and 1.0. top_p: Controls the stability of the generated speech. A number between 0.0 and 1.0. """ if is_given(model): self._opts.model = model if is_given(voice): self._opts.voice = voice if is_given(language): self._opts.language = language if is_given(format): self._opts.format = format if is_given(sample_rate): self._opts.sample_rate = sample_rate if is_given(temperature): self._opts.temperature = temperature if is_given(top_p): self._opts.top_p = top_p def _ensure_session(self) -> aiohttp.ClientSession: if not self._session: self._session = utils.http_context.http_session() return self._session
Text-to-Speech (TTS) plugin for LMNT.
Create a new instance of LMNT TTS.
See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes
Args
model
- The model to use for synthesis. Default is "blizzard". Learn more at: https://docs.lmnt.com/guides/models
voice
- The voice ID to use. Default is "leah". Find more amazing voices at https://app.lmnt.com/
language
- Two-letter ISO 639-1 language code. Defaults to None. See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-language
format
- Output file format. Options: aac, mp3, mulaw, raw, wav. Default is "mp3".
sample_rate
- Output sample rate in Hz. Default is 24000. See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-sample-rate
api_key
- API key for authentication. Defaults to the LMNT_API_KEY environment variable.
http_session
- Optional aiohttp ClientSession. A new session is created if not provided.
temperature
- Influences how expressive and emotionally varied the speech becomes. Lower values (like 0.3) create more neutral, consistent speaking styles. Higher values (like 1.0) allow for more dynamic emotional range and speaking styles. Default is 1.0.
top_p
- Controls the stability of the generated speech. A lower value (like 0.3) produces more consistent, reliable speech. A higher value (like 0.9) gives more flexibility in how words are spoken, but might occasionally produce unusual intonations or speech patterns. Default is 0.8.
Ancestors
- livekit.agents.tts.tts.TTS
- abc.ABC
- EventEmitter
- typing.Generic
Methods
def synthesize(self,
text: str,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.lmnt.tts.ChunkedStream-
Expand source code
def synthesize( self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> ChunkedStream: return ChunkedStream( tts=self, input_text=text, conn_options=conn_options, )
def update_options(self,
*,
model: NotGivenOr[LMNTModels] = NOT_GIVEN,
voice: NotGivenOr[str] = NOT_GIVEN,
language: NotGivenOr[LMNTLanguages] = NOT_GIVEN,
format: NotGivenOr[LMNTAudioFormats] = NOT_GIVEN,
sample_rate: NotGivenOr[LMNTSampleRate] = NOT_GIVEN,
temperature: NotGivenOr[float] = NOT_GIVEN,
top_p: NotGivenOr[float] = NOT_GIVEN) ‑> None-
Expand source code
def update_options( self, *, model: NotGivenOr[LMNTModels] = NOT_GIVEN, voice: NotGivenOr[str] = NOT_GIVEN, language: NotGivenOr[LMNTLanguages] = NOT_GIVEN, format: NotGivenOr[LMNTAudioFormats] = NOT_GIVEN, sample_rate: NotGivenOr[LMNTSampleRate] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN, top_p: NotGivenOr[float] = NOT_GIVEN, ) -> None: """ Update the TTS options. Args: model: The model to use for synthesis. Learn more at: https://docs.lmnt.com/guides/models voice: The voice ID to update. language: Two-letter ISO 639-1 code. See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-language format: Audio output format. Options: aac, mp3, mulaw, raw, wav. sample_rate: Output sample rate in Hz. temperature: Controls the expressiveness of the speech. A number between 0.0 and 1.0. top_p: Controls the stability of the generated speech. A number between 0.0 and 1.0. """ if is_given(model): self._opts.model = model if is_given(voice): self._opts.voice = voice if is_given(language): self._opts.language = language if is_given(format): self._opts.format = format if is_given(sample_rate): self._opts.sample_rate = sample_rate if is_given(temperature): self._opts.temperature = temperature if is_given(top_p): self._opts.top_p = top_p
Update the TTS options.
Args
model
- The model to use for synthesis. Learn more at: https://docs.lmnt.com/guides/models
voice
- The voice ID to update.
language
- Two-letter ISO 639-1 code. See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-language
format
- Audio output format. Options: aac, mp3, mulaw, raw, wav.
sample_rate
- Output sample rate in Hz.
temperature
- Controls the expressiveness of the speech. A number between 0.0 and 1.0.
top_p
- Controls the stability of the generated speech. A number between 0.0 and 1.0.
Inherited members