Module `livekit.plugins.lmnt`

LMNT plugin for LiveKit Agents

See https://docs.livekit.io/agents/integrations/tts/lmnt/ for more information.

Classes

class ChunkedStream (*, tts: TTS, input_text: str, conn_options: APIConnectOptions)

Expand source code

class ChunkedStream(tts.ChunkedStream):
    """Synthesize text to speech in chunks."""

    def __init__(
        self,
        *,
        tts: TTS,
        input_text: str,
        conn_options: APIConnectOptions,
    ) -> None:
        super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
        self._tts = tts
        self._opts = replace(tts._opts)

    async def _run(self, output_emitter: tts.AudioEmitter) -> None:
        data = {
            "text": self._input_text,
            "voice": self._opts.voice,
            "language": self._opts.language,
            "sample_rate": self._opts.sample_rate,
            "model": self._opts.model,
            "format": self._opts.format,
            "temperature": self._opts.temperature,
            "top_p": self._opts.top_p,
        }

        try:
            async with self._tts._ensure_session().post(
                LMNT_BASE_URL,
                headers={
                    "Content-Type": "application/json",
                    "X-API-Key": self._opts.api_key,
                },
                json=data,
                timeout=aiohttp.ClientTimeout(
                    total=30,
                    sock_connect=self._conn_options.timeout,
                ),
            ) as resp:
                resp.raise_for_status()
                output_emitter.initialize(
                    request_id=utils.shortuuid(),
                    sample_rate=self._opts.sample_rate,
                    num_channels=NUM_CHANNELS,
                    mime_type=MIME_TYPE[self._opts.format],
                )
                async for data, _ in resp.content.iter_chunks():
                    output_emitter.push(data)

                output_emitter.flush()
        except asyncio.TimeoutError:
            raise APITimeoutError() from None
        except aiohttp.ClientResponseError as e:
            raise APIStatusError(
                message=e.message,
                status_code=e.status,
                request_id=None,
                body=None,
            ) from None
        except Exception as e:
            raise APIConnectionError() from e

Synthesize text to speech in chunks.

Ancestors

livekit.agents.tts.tts.ChunkedStream
abc.ABC

class TTS (*, model: LMNTModels = 'blizzard', voice: str = 'leah', language: LMNTLanguages | None = None, format: LMNTAudioFormats = 'mp3', sample_rate: LMNTSampleRate = 24000, api_key: str | None = None, http_session: aiohttp.ClientSession | None = None, temperature: float = 1.0, top_p: float = 0.8)

Expand source code

class TTS(tts.TTS):
    """
    Text-to-Speech (TTS) plugin for LMNT.
    """

    def __init__(
        self,
        *,
        model: LMNTModels = "blizzard",
        voice: str = "leah",
        language: LMNTLanguages | None = None,
        format: LMNTAudioFormats = "mp3",
        sample_rate: LMNTSampleRate = 24000,
        api_key: str | None = None,
        http_session: aiohttp.ClientSession | None = None,
        temperature: float = 1.0,
        top_p: float = 0.8,
    ) -> None:
        """
        Create a new instance of LMNT TTS.

        See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes

        Args:
            model: The model to use for synthesis. Default is "blizzard".
                Learn more at: https://docs.lmnt.com/guides/models
            voice: The voice ID to use. Default is "leah". Find more amazing voices at https://app.lmnt.com/
            language: Two-letter ISO 639-1 language code. Defaults to None.
                See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-language
            format: Output file format. Options: aac, mp3, mulaw, raw, wav. Default is "mp3".
            sample_rate: Output sample rate in Hz. Default is 24000.
                See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-sample-rate
            api_key: API key for authentication. Defaults to the LMNT_API_KEY environment variable.
            http_session: Optional aiohttp ClientSession. A new session is created if not provided.
            temperature: Influences how expressive and emotionally varied the speech becomes.
                Lower values (like 0.3) create more neutral, consistent speaking styles.
                Higher values (like 1.0) allow for more dynamic emotional range and speaking styles.
                Default is 1.0.
            top_p: Controls the stability of the generated speech.
                A lower value (like 0.3) produces more consistent, reliable speech.
                A higher value (like 0.9) gives more flexibility in how words are spoken,
                but might occasionally produce unusual intonations or speech patterns.
                Default is 0.8.
        """
        super().__init__(
            capabilities=tts.TTSCapabilities(streaming=False),
            sample_rate=sample_rate,
            num_channels=NUM_CHANNELS,
        )
        api_key = api_key or os.environ.get("LMNT_API_KEY")
        if not api_key:
            raise ValueError(
                "LMNT API key is required. "
                "Set it via environment variable or pass it as an argument."
            )

        if not language:
            language = "auto" if model == "blizzard" else "en"

        self._opts = _TTSOptions(
            model=model,
            sample_rate=sample_rate,
            num_channels=NUM_CHANNELS,
            language=language,
            voice=voice,
            format=format,
            api_key=api_key,
            temperature=temperature,
            top_p=top_p,
        )

        self._session = http_session

    @property
    def model(self) -> str:
        return self._opts.model

    @property
    def provider(self) -> str:
        return "LMNT"

    def synthesize(
        self,
        text: str,
        *,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> ChunkedStream:
        return ChunkedStream(
            tts=self,
            input_text=text,
            conn_options=conn_options,
        )

    def update_options(
        self,
        *,
        model: NotGivenOr[LMNTModels] = NOT_GIVEN,
        voice: NotGivenOr[str] = NOT_GIVEN,
        language: NotGivenOr[LMNTLanguages] = NOT_GIVEN,
        format: NotGivenOr[LMNTAudioFormats] = NOT_GIVEN,
        sample_rate: NotGivenOr[LMNTSampleRate] = NOT_GIVEN,
        temperature: NotGivenOr[float] = NOT_GIVEN,
        top_p: NotGivenOr[float] = NOT_GIVEN,
    ) -> None:
        """
        Update the TTS options.

        Args:
            model: The model to use for synthesis. Learn more at: https://docs.lmnt.com/guides/models
            voice: The voice ID to update.
            language: Two-letter ISO 639-1 code.
                See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-language
            format: Audio output format. Options: aac, mp3, mulaw, raw, wav.
            sample_rate: Output sample rate in Hz.
            temperature: Controls the expressiveness of the speech. A number between 0.0 and 1.0.
            top_p: Controls the stability of the generated speech. A number between 0.0 and 1.0.
        """
        if is_given(model):
            self._opts.model = model
        if is_given(voice):
            self._opts.voice = voice
        if is_given(language):
            self._opts.language = language
        if is_given(format):
            self._opts.format = format
        if is_given(sample_rate):
            self._opts.sample_rate = sample_rate
        if is_given(temperature):
            self._opts.temperature = temperature
        if is_given(top_p):
            self._opts.top_p = top_p

    def _ensure_session(self) -> aiohttp.ClientSession:
        if not self._session:
            self._session = utils.http_context.http_session()

        return self._session

Text-to-Speech (TTS) plugin for LMNT.

Create a new instance of LMNT TTS.

See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes

Args

model: The model to use for synthesis. Default is "blizzard". Learn more at: https://docs.lmnt.com/guides/models
voice: The voice ID to use. Default is "leah". Find more amazing voices at https://app.lmnt.com/
language: Two-letter ISO 639-1 language code. Defaults to None. See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-language
format: Output file format. Options: aac, mp3, mulaw, raw, wav. Default is "mp3".
sample_rate: Output sample rate in Hz. Default is 24000. See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-sample-rate
api_key: API key for authentication. Defaults to the LMNT_API_KEY environment variable.
http_session: Optional aiohttp ClientSession. A new session is created if not provided.
temperature: Influences how expressive and emotionally varied the speech becomes. Lower values (like 0.3) create more neutral, consistent speaking styles. Higher values (like 1.0) allow for more dynamic emotional range and speaking styles. Default is 1.0.
top_p: Controls the stability of the generated speech. A lower value (like 0.3) produces more consistent, reliable speech. A higher value (like 0.9) gives more flexibility in how words are spoken, but might occasionally produce unusual intonations or speech patterns. Default is 0.8.

Ancestors

livekit.agents.tts.tts.TTS
abc.ABC
EventEmitter
typing.Generic

Instance variables

prop model : str

Expand source code

@property
def model(self) -> str:
    return self._opts.model

Get the model name/identifier for this TTS instance.

Returns

The model name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their model information.

prop provider : str

Expand source code

@property
def provider(self) -> str:
    return "LMNT"

Get the provider name/identifier for this TTS instance.

Returns

The provider name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their provider information.

Methods

def synthesize(self, text: str, *, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.lmnt.tts.ChunkedStream

Expand source code

def synthesize(
    self,
    text: str,
    *,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> ChunkedStream:
    return ChunkedStream(
        tts=self,
        input_text=text,
        conn_options=conn_options,
    )

def update_options(self, *, model: NotGivenOr[LMNTModels] = NOT_GIVEN, voice: NotGivenOr[str] = NOT_GIVEN, language: NotGivenOr[LMNTLanguages] = NOT_GIVEN, format: NotGivenOr[LMNTAudioFormats] = NOT_GIVEN, sample_rate: NotGivenOr[LMNTSampleRate] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN, top_p: NotGivenOr[float] = NOT_GIVEN) ‑> None

Expand source code

def update_options(
    self,
    *,
    model: NotGivenOr[LMNTModels] = NOT_GIVEN,
    voice: NotGivenOr[str] = NOT_GIVEN,
    language: NotGivenOr[LMNTLanguages] = NOT_GIVEN,
    format: NotGivenOr[LMNTAudioFormats] = NOT_GIVEN,
    sample_rate: NotGivenOr[LMNTSampleRate] = NOT_GIVEN,
    temperature: NotGivenOr[float] = NOT_GIVEN,
    top_p: NotGivenOr[float] = NOT_GIVEN,
) -> None:
    """
    Update the TTS options.

    Args:
        model: The model to use for synthesis. Learn more at: https://docs.lmnt.com/guides/models
        voice: The voice ID to update.
        language: Two-letter ISO 639-1 code.
            See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-language
        format: Audio output format. Options: aac, mp3, mulaw, raw, wav.
        sample_rate: Output sample rate in Hz.
        temperature: Controls the expressiveness of the speech. A number between 0.0 and 1.0.
        top_p: Controls the stability of the generated speech. A number between 0.0 and 1.0.
    """
    if is_given(model):
        self._opts.model = model
    if is_given(voice):
        self._opts.voice = voice
    if is_given(language):
        self._opts.language = language
    if is_given(format):
        self._opts.format = format
    if is_given(sample_rate):
        self._opts.sample_rate = sample_rate
    if is_given(temperature):
        self._opts.temperature = temperature
    if is_given(top_p):
        self._opts.top_p = top_p

Update the TTS options.

Args

model: The model to use for synthesis. Learn more at: https://docs.lmnt.com/guides/models
voice: The voice ID to update.
language: Two-letter ISO 639-1 code. See: https://docs.lmnt.com/api-reference/speech/synthesize-speech-bytes#body-language
format: Audio output format. Options: aac, mp3, mulaw, raw, wav.
sample_rate: Output sample rate in Hz.
temperature: Controls the expressiveness of the speech. A number between 0.0 and 1.0.
top_p: Controls the stability of the generated speech. A number between 0.0 and 1.0.

Inherited members

EventEmitter:
- emit
- off
- on
- once