Module `livekit.plugins.mistralai`

LiveKit plugin for Mistral AI models. Supports Chat, STT, and TTS models

Sub-modules

livekit.plugins.mistralai.tools

Classes

class LLM (*, client: Mistral | None = None, api_key: NotGivenOr[str] = NOT_GIVEN, model: NotGivenOr[ChatModels | str] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN, top_p: NotGivenOr[float] = NOT_GIVEN, presence_penalty: NotGivenOr[float] = NOT_GIVEN, frequency_penalty: NotGivenOr[float] = NOT_GIVEN, random_seed: NotGivenOr[int] = NOT_GIVEN, tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN, max_completion_tokens: NotGivenOr[int] = NOT_GIVEN)

Expand source code

class LLM(llm.LLM):
    def __init__(
        self,
        *,
        client: Mistral | None = None,
        api_key: NotGivenOr[str] = NOT_GIVEN,
        model: NotGivenOr[ChatModels | str] = NOT_GIVEN,
        temperature: NotGivenOr[float] = NOT_GIVEN,
        top_p: NotGivenOr[float] = NOT_GIVEN,
        presence_penalty: NotGivenOr[float] = NOT_GIVEN,
        frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
        random_seed: NotGivenOr[int] = NOT_GIVEN,
        tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
        max_completion_tokens: NotGivenOr[int] = NOT_GIVEN,
    ) -> None:
        """
        Create a new instance of MistralAI LLM.

        Uses the Mistral Conversations API, which supports both function tools
        and provider tools (web search, document library, code interpreter).

        Args:
            client: Optional pre-configured MistralAI client instance.
            api_key: Your Mistral AI API key. If not provided, will use the MISTRAL_API_KEY environment variable.
            model: The Mistral AI model to use, default is "ministral-8b-latest".
            temperature: The temperature to use the LLM with.
            top_p: Nucleus sampling parameter.
            presence_penalty: Penalize new tokens based on their presence in the text so far.
            frequency_penalty: Penalize new tokens based on their frequency in the text so far.
            random_seed: Random seed for reproducibility.
            tool_choice: Default tool choice strategy ("auto", "required", "none").
            max_completion_tokens: The max. number of tokens the LLM can output.
        """
        super().__init__()
        self._opts = _LLMOptions(
            model=model if is_given(model) else DEFAULT_MODEL,
            temperature=temperature if is_given(temperature) else None,
            top_p=top_p if is_given(top_p) else None,
            presence_penalty=presence_penalty if is_given(presence_penalty) else None,
            frequency_penalty=frequency_penalty if is_given(frequency_penalty) else None,
            random_seed=random_seed if is_given(random_seed) else None,
            tool_choice=tool_choice if is_given(tool_choice) else None,
            max_completion_tokens=max_completion_tokens
            if is_given(max_completion_tokens)
            else None,
        )

        mistral_api_key = api_key if is_given(api_key) else os.environ.get("MISTRAL_API_KEY")
        if not client and not mistral_api_key:
            raise ValueError("Mistral AI API key is required. Set MISTRAL_API_KEY or pass api_key")
        self._client = client or Mistral(api_key=mistral_api_key)

    @property
    def model(self) -> str:
        return self._opts.model

    @property
    def provider(self) -> str:
        return "MistralAI"

    def update_options(
        self,
        *,
        model: NotGivenOr[ChatModels | str] = NOT_GIVEN,
        max_completion_tokens: NotGivenOr[int] = NOT_GIVEN,
        temperature: NotGivenOr[float] = NOT_GIVEN,
        top_p: NotGivenOr[float] = NOT_GIVEN,
        presence_penalty: NotGivenOr[float] = NOT_GIVEN,
        frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
        random_seed: NotGivenOr[int] = NOT_GIVEN,
        tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
    ) -> None:
        if is_given(model):
            self._opts.model = model
        if is_given(max_completion_tokens):
            self._opts.max_completion_tokens = max_completion_tokens
        if is_given(temperature):
            self._opts.temperature = temperature
        if is_given(top_p):
            self._opts.top_p = top_p
        if is_given(presence_penalty):
            self._opts.presence_penalty = presence_penalty
        if is_given(frequency_penalty):
            self._opts.frequency_penalty = frequency_penalty
        if is_given(random_seed):
            self._opts.random_seed = random_seed
        if is_given(tool_choice):
            self._opts.tool_choice = tool_choice

    def chat(
        self,
        *,
        chat_ctx: ChatContext,
        tools: list[llm.Tool] | None = None,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
        parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
        tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
        extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
    ) -> LLMStream:
        extra: dict[str, Any] = {}
        if is_given(extra_kwargs):
            extra.update(extra_kwargs)

        completion_args: dict[str, Any] = {}
        if self._opts.max_completion_tokens is not None:
            completion_args["max_tokens"] = self._opts.max_completion_tokens
        if self._opts.temperature is not None:
            completion_args["temperature"] = self._opts.temperature
        if self._opts.top_p is not None:
            completion_args["top_p"] = self._opts.top_p
        if self._opts.presence_penalty is not None:
            completion_args["presence_penalty"] = self._opts.presence_penalty
        if self._opts.frequency_penalty is not None:
            completion_args["frequency_penalty"] = self._opts.frequency_penalty
        if self._opts.random_seed is not None:
            completion_args["random_seed"] = self._opts.random_seed

        resolved_tool_choice = tool_choice if is_given(tool_choice) else self._opts.tool_choice
        if resolved_tool_choice is not None:
            has_provider_tools = any(isinstance(t, MistralTool) for t in (tools or []))
            if isinstance(resolved_tool_choice, dict) or resolved_tool_choice == "required":
                completion_args["tool_choice"] = "auto" if has_provider_tools else "required"
            elif resolved_tool_choice in ("auto", "none"):
                completion_args["tool_choice"] = resolved_tool_choice
        if completion_args:
            extra["completion_args"] = CompletionArgs(**completion_args)

        return LLMStream(
            self,
            model=self._opts.model,
            client=self._client,
            chat_ctx=chat_ctx,
            tools=tools or [],
            conn_options=conn_options,
            extra_kwargs=extra,
        )

Helper class that provides a standard way to create an ABC using inheritance.

Create a new instance of MistralAI LLM.

Uses the Mistral Conversations API, which supports both function tools and provider tools (web search, document library, code interpreter).

Args

client: Optional pre-configured MistralAI client instance.
api_key: Your Mistral AI API key. If not provided, will use the MISTRAL_API_KEY environment variable.
model: The Mistral AI model to use, default is "ministral-8b-latest".
temperature: The temperature to use the LLM with.
top_p: Nucleus sampling parameter.
presence_penalty: Penalize new tokens based on their presence in the text so far.
frequency_penalty: Penalize new tokens based on their frequency in the text so far.
random_seed: Random seed for reproducibility.
tool_choice: Default tool choice strategy ("auto", "required", "none").
max_completion_tokens: The max. number of tokens the LLM can output.

Ancestors

livekit.agents.llm.llm.LLM
abc.ABC
EventEmitter
typing.Generic

Instance variables

prop model : str

Expand source code

@property
def model(self) -> str:
    return self._opts.model

Get the model name/identifier for this LLM instance.

Returns

The model name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their model information.

prop provider : str

Expand source code

@property
def provider(self) -> str:
    return "MistralAI"

Get the provider name/identifier for this LLM instance.

Returns

The provider name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their provider information.

Methods

def chat(self, *, chat_ctx: ChatContext, tools: list[llm.Tool] | None = None, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0), parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN, tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN, extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN) ‑> livekit.plugins.mistralai.llm.LLMStream

Expand source code

def chat(
    self,
    *,
    chat_ctx: ChatContext,
    tools: list[llm.Tool] | None = None,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
    tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
    extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
) -> LLMStream:
    extra: dict[str, Any] = {}
    if is_given(extra_kwargs):
        extra.update(extra_kwargs)

    completion_args: dict[str, Any] = {}
    if self._opts.max_completion_tokens is not None:
        completion_args["max_tokens"] = self._opts.max_completion_tokens
    if self._opts.temperature is not None:
        completion_args["temperature"] = self._opts.temperature
    if self._opts.top_p is not None:
        completion_args["top_p"] = self._opts.top_p
    if self._opts.presence_penalty is not None:
        completion_args["presence_penalty"] = self._opts.presence_penalty
    if self._opts.frequency_penalty is not None:
        completion_args["frequency_penalty"] = self._opts.frequency_penalty
    if self._opts.random_seed is not None:
        completion_args["random_seed"] = self._opts.random_seed

    resolved_tool_choice = tool_choice if is_given(tool_choice) else self._opts.tool_choice
    if resolved_tool_choice is not None:
        has_provider_tools = any(isinstance(t, MistralTool) for t in (tools or []))
        if isinstance(resolved_tool_choice, dict) or resolved_tool_choice == "required":
            completion_args["tool_choice"] = "auto" if has_provider_tools else "required"
        elif resolved_tool_choice in ("auto", "none"):
            completion_args["tool_choice"] = resolved_tool_choice
    if completion_args:
        extra["completion_args"] = CompletionArgs(**completion_args)

    return LLMStream(
        self,
        model=self._opts.model,
        client=self._client,
        chat_ctx=chat_ctx,
        tools=tools or [],
        conn_options=conn_options,
        extra_kwargs=extra,
    )

def update_options(self, *, model: NotGivenOr[ChatModels | str] = NOT_GIVEN, max_completion_tokens: NotGivenOr[int] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN, top_p: NotGivenOr[float] = NOT_GIVEN, presence_penalty: NotGivenOr[float] = NOT_GIVEN, frequency_penalty: NotGivenOr[float] = NOT_GIVEN, random_seed: NotGivenOr[int] = NOT_GIVEN, tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN) ‑> None

Expand source code

def update_options(
    self,
    *,
    model: NotGivenOr[ChatModels | str] = NOT_GIVEN,
    max_completion_tokens: NotGivenOr[int] = NOT_GIVEN,
    temperature: NotGivenOr[float] = NOT_GIVEN,
    top_p: NotGivenOr[float] = NOT_GIVEN,
    presence_penalty: NotGivenOr[float] = NOT_GIVEN,
    frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
    random_seed: NotGivenOr[int] = NOT_GIVEN,
    tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
) -> None:
    if is_given(model):
        self._opts.model = model
    if is_given(max_completion_tokens):
        self._opts.max_completion_tokens = max_completion_tokens
    if is_given(temperature):
        self._opts.temperature = temperature
    if is_given(top_p):
        self._opts.top_p = top_p
    if is_given(presence_penalty):
        self._opts.presence_penalty = presence_penalty
    if is_given(frequency_penalty):
        self._opts.frequency_penalty = frequency_penalty
    if is_given(random_seed):
        self._opts.random_seed = random_seed
    if is_given(tool_choice):
        self._opts.tool_choice = tool_choice

Inherited members

EventEmitter:
- emit
- off
- on
- once

class STT (client: Mistral | None = None, api_key: NotGivenOr[str] = NOT_GIVEN, model: NotGivenOr[str] = NOT_GIVEN, language: NotGivenOr[str] = NOT_GIVEN, context_bias: NotGivenOr[list[str]] = NOT_GIVEN, target_streaming_delay_ms: NotGivenOr[int] = NOT_GIVEN, vad: vad.VAD | None = None)

Expand source code

class STT(stt.STT):
    def __init__(
        self,
        client: Mistral | None = None,
        api_key: NotGivenOr[str] = NOT_GIVEN,
        model: NotGivenOr[str] = NOT_GIVEN,
        language: NotGivenOr[str] = NOT_GIVEN,
        context_bias: NotGivenOr[list[str]] = NOT_GIVEN,
        target_streaming_delay_ms: NotGivenOr[int] = NOT_GIVEN,
        vad: vad.VAD | None = None,
    ):
        """
        Create a new instance of MistralAI STT.

        Args:
            client: Optional pre-configured MistralAI client instance.
            api_key: Your Mistral AI API key. If not provided, will use the MISTRAL_API_KEY environment variable.
            model: The Mistral AI model to use for transcription, default is batch "voxtral-mini-latest".
            language: The optional language code to use for better transcription accuracy if language is already known (e.g., "fr" for French).
                Only used with batch models.
            context_bias: Up to 100 words or phrases to guide the model toward good spelling or names or domain-specific vocabulary.
                Only used with batch models.
            target_streaming_delay_ms: Target streaming delay in milliseconds for realtime mode. Only used with realtime models.
            vad: Voice Activity Detector used to trigger audio flush for realtime models (which lack server-side endpointing).
                When not provided, Silero VAD is auto-loaded with default settings. Only used with realtime models.
        """
        resolved_model = model if is_given(model) else DEFAULT_MODEL
        is_realtime = _is_realtime(resolved_model)
        super().__init__(
            capabilities=stt.STTCapabilities(
                streaming=is_realtime,
                interim_results=is_realtime,
                aligned_transcript=False,
                offline_recognize=not is_realtime,
            )
        )
        self._opts = _STTOptions(
            model=resolved_model,
            language=LanguageCode(language) if is_given(language) else None,
            context_bias=context_bias if is_given(context_bias) else None,
            target_streaming_delay_ms=target_streaming_delay_ms
            if is_given(target_streaming_delay_ms)
            else None,
        )

        if is_realtime and vad is None:
            try:
                from livekit.plugins.silero import VAD as SileroVAD

                vad = SileroVAD.load()
            except ImportError as e:
                raise ImportError(
                    "livekit-plugins-silero is required for Voxtral realtime models (no server-side endpointing)."
                ) from e
        self._vad = vad

        mistral_api_key = api_key if is_given(api_key) else os.environ.get("MISTRAL_API_KEY")
        if not client and not mistral_api_key:
            raise ValueError("Mistral AI API key is required. Set MISTRAL_API_KEY or pass api_key")
        self._client = client or Mistral(api_key=mistral_api_key)
        self._streams: weakref.WeakSet[SpeechStream] = weakref.WeakSet()
        self._pool = utils.ConnectionPool[RealtimeConnection](
            connect_cb=self._connect_ws,
            close_cb=self._close_ws,
        )

    async def _connect_ws(self, timeout: float) -> RealtimeConnection:
        rt = RealtimeTranscription(self._client.sdk_configuration)
        http_headers = None
        cfg = self._client.sdk_configuration
        client_headers = getattr(cfg.async_client, "headers", None) or getattr(
            cfg.client, "headers", None
        )
        if client_headers:
            http_headers = dict(client_headers)
        return await asyncio.wait_for(
            rt.connect(
                model=self._opts.model,
                target_streaming_delay_ms=self._opts.target_streaming_delay_ms,
                http_headers=http_headers,
            ),
            timeout=timeout,
        )

    async def _close_ws(self, conn: RealtimeConnection) -> None:
        await conn.close()
        ws = conn._websocket
        if ws.keepalive_task is not None:
            ws.keepalive_task.cancel()

    def prewarm(self) -> None:
        if _is_realtime(self._opts.model):
            self._pool.prewarm()

    async def aclose(self) -> None:
        await self._pool.aclose()

    @property
    def model(self) -> str:
        return self._opts.model

    @property
    def provider(self) -> str:
        return "MistralAI"

    def update_options(
        self,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        context_bias: NotGivenOr[list[str]] = NOT_GIVEN,
        target_streaming_delay_ms: NotGivenOr[int] = NOT_GIVEN,
    ) -> None:
        """
        Update the STT options.

        Args:
            language: The optional language code to use for better transcription accuracy if language is already known (e.g., "fr" for French).
                Only used with batch models.
            context_bias: Up to 100 words or phrases to guide the model toward good spelling or names or domain-specific vocabulary.
                Only used with batch models.
            target_streaming_delay_ms: Target streaming delay in milliseconds for realtime mode. Only used with realtime models.
        """
        if is_given(language):
            self._opts.language = LanguageCode(language)
        if is_given(context_bias):
            self._opts.context_bias = context_bias
        if is_given(target_streaming_delay_ms):
            self._opts.target_streaming_delay_ms = target_streaming_delay_ms
            self._pool.invalidate()
            for stream in self._streams:
                stream._reconnect_event.set()

    async def _recognize_impl(
        self,
        buffer: AudioBuffer,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions,
    ) -> stt.SpeechEvent:
        try:
            if is_given(language):
                self._opts.language = LanguageCode(language)
            data = rtc.combine_audio_frames(buffer).to_wav_bytes()

            resp = await self._client.audio.transcriptions.complete_async(
                model=self._opts.model,
                file={"content": data, "file_name": "audio.wav"},
                language=self._opts.language if self._opts.language else None,
                context_bias=self._opts.context_bias if self._opts.context_bias else None,
                timestamp_granularities=["segment"] if not self._opts.language else None,
            )

            return stt.SpeechEvent(
                type=stt.SpeechEventType.FINAL_TRANSCRIPT,
                alternatives=[
                    stt.SpeechData(
                        text=resp.text,
                        language=LanguageCode(resp.language)
                        if resp.language
                        else self._opts.language or LanguageCode(""),
                        start_time=resp.segments[0].start if resp.segments else 0,
                        end_time=resp.segments[-1].end if resp.segments else 0,
                        words=[
                            TimedString(
                                text=segment.text,
                                start_time=segment.start,
                                end_time=segment.end,
                            )
                            for segment in resp.segments
                        ]
                        if resp.segments
                        else None,
                    ),
                ],
            )

        except SDKError as e:
            if e.status_code in (408, 504):
                raise APITimeoutError() from e
            raise APIStatusError(e.message, status_code=e.status_code, body=e.body) from e
        except Exception as e:
            raise APIConnectionError() from e

    def stream(
        self,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> SpeechStream:
        stream = SpeechStream(
            stt=self,
            pool=self._pool,
            conn_options=conn_options,
            language=language,
            vad_instance=self._vad,
        )
        self._streams.add(stream)
        return stream

Helper class that provides a standard way to create an ABC using inheritance.

Create a new instance of MistralAI STT.

Args

client: Optional pre-configured MistralAI client instance.
api_key: Your Mistral AI API key. If not provided, will use the MISTRAL_API_KEY environment variable.
model: The Mistral AI model to use for transcription, default is batch "voxtral-mini-latest".
language: The optional language code to use for better transcription accuracy if language is already known (e.g., "fr" for French). Only used with batch models.
context_bias: Up to 100 words or phrases to guide the model toward good spelling or names or domain-specific vocabulary. Only used with batch models.
target_streaming_delay_ms: Target streaming delay in milliseconds for realtime mode. Only used with realtime models.
vad: Voice Activity Detector used to trigger audio flush for realtime models (which lack server-side endpointing). When not provided, Silero VAD is auto-loaded with default settings. Only used with realtime models.

Ancestors

livekit.agents.stt.stt.STT
abc.ABC
EventEmitter
typing.Generic

Instance variables

prop model : str

Expand source code

@property
def model(self) -> str:
    return self._opts.model

Get the model name/identifier for this STT instance.

Returns

The model name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their model information.

prop provider : str

Expand source code

@property
def provider(self) -> str:
    return "MistralAI"

Get the provider name/identifier for this STT instance.

Returns

The provider name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their provider information.

Methods

async def aclose(self) ‑> None

Expand source code

async def aclose(self) -> None:
    await self._pool.aclose()

Close the STT, and every stream/requests associated with it

def prewarm(self) ‑> None

Expand source code

def prewarm(self) -> None:
    if _is_realtime(self._opts.model):
        self._pool.prewarm()

Pre-warm connection to the STT service

def stream(self, *, language: NotGivenOr[str] = NOT_GIVEN, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.mistralai.stt.SpeechStream

Expand source code

def stream(
    self,
    *,
    language: NotGivenOr[str] = NOT_GIVEN,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> SpeechStream:
    stream = SpeechStream(
        stt=self,
        pool=self._pool,
        conn_options=conn_options,
        language=language,
        vad_instance=self._vad,
    )
    self._streams.add(stream)
    return stream

def update_options(self, *, language: NotGivenOr[str] = NOT_GIVEN, context_bias: NotGivenOr[list[str]] = NOT_GIVEN, target_streaming_delay_ms: NotGivenOr[int] = NOT_GIVEN) ‑> None

Expand source code

def update_options(
    self,
    *,
    language: NotGivenOr[str] = NOT_GIVEN,
    context_bias: NotGivenOr[list[str]] = NOT_GIVEN,
    target_streaming_delay_ms: NotGivenOr[int] = NOT_GIVEN,
) -> None:
    """
    Update the STT options.

    Args:
        language: The optional language code to use for better transcription accuracy if language is already known (e.g., "fr" for French).
            Only used with batch models.
        context_bias: Up to 100 words or phrases to guide the model toward good spelling or names or domain-specific vocabulary.
            Only used with batch models.
        target_streaming_delay_ms: Target streaming delay in milliseconds for realtime mode. Only used with realtime models.
    """
    if is_given(language):
        self._opts.language = LanguageCode(language)
    if is_given(context_bias):
        self._opts.context_bias = context_bias
    if is_given(target_streaming_delay_ms):
        self._opts.target_streaming_delay_ms = target_streaming_delay_ms
        self._pool.invalidate()
        for stream in self._streams:
            stream._reconnect_event.set()

Update the STT options.

Args

language: The optional language code to use for better transcription accuracy if language is already known (e.g., "fr" for French). Only used with batch models.
context_bias: Up to 100 words or phrases to guide the model toward good spelling or names or domain-specific vocabulary. Only used with batch models.
target_streaming_delay_ms: Target streaming delay in milliseconds for realtime mode. Only used with realtime models.

Inherited members

EventEmitter:
- emit
- off
- on
- once

class TTS (client: Mistral | None = None, api_key: NotGivenOr[str] = NOT_GIVEN, model: NotGivenOr[TTSModels | str] = NOT_GIVEN, voice: NotGivenOr[TTSVoices | str] = NOT_GIVEN, ref_audio: NotGivenOr[str] = NOT_GIVEN, response_format: NotGivenOr[RESPONSE_FORMAT] = NOT_GIVEN)

Expand source code

class TTS(tts.TTS):
    def __init__(
        self,
        client: Mistral | None = None,
        api_key: NotGivenOr[str] = NOT_GIVEN,
        model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
        voice: NotGivenOr[TTSVoices | str] = NOT_GIVEN,
        ref_audio: NotGivenOr[str] = NOT_GIVEN,
        response_format: NotGivenOr[RESPONSE_FORMAT] = NOT_GIVEN,
    ) -> None:
        """
        Create a new instance of MistralAI TTS.

        Args:
            client: Optional pre-configured MistralAI client instance.
            api_key: Your Mistral AI API key. If not provided, will use the MISTRAL_API_KEY environment variable.
            model: The Mistral AI model to use for text-to-speech, default is "voxtral-mini-tts-latest".
            voice: The voice ID to use for synthesis. Mutually exclusive with ``ref_audio``. Defaults to ``en_paul_neutral`` when neither is given.
            ref_audio: Base64-encoded audio sample (3–25 s) for zero-shot voice cloning. Mutually exclusive with ``voice``.
            response_format: The audio format of synthesized speech, between ``mp3``, ``wav``, ``pcm``, ``opus`` or ``flac``. Defaults to ``mp3``.
        """
        if is_given(voice) and is_given(ref_audio):
            raise ValueError("Only one of 'voice' or 'ref_audio' may be provided, not both")

        super().__init__(
            capabilities=tts.TTSCapabilities(streaming=False),
            sample_rate=SAMPLE_RATE,
            num_channels=NUM_CHANNELS,
        )
        self._opts = _TTSOptions(
            model=model if is_given(model) else DEFAULT_MODEL,
            voice=voice if is_given(voice) else (None if is_given(ref_audio) else DEFAULT_VOICE),
            ref_audio=ref_audio if is_given(ref_audio) else None,
            response_format=response_format
            if is_given(response_format)
            else DEFAULT_RESPONSE_FORMAT,
        )

        mistral_api_key = api_key if is_given(api_key) else os.environ.get("MISTRAL_API_KEY")
        if not client and not mistral_api_key:
            raise ValueError("Mistral AI API key is required. Set MISTRAL_API_KEY or pass api_key")
        self._client = client or Mistral(api_key=mistral_api_key)

    @property
    def model(self) -> str:
        return self._opts.model

    @property
    def provider(self) -> str:
        return "MistralAI"

    def update_options(
        self,
        *,
        model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
        voice: NotGivenOr[TTSVoices | str] = NOT_GIVEN,
        ref_audio: NotGivenOr[str] = NOT_GIVEN,
        response_format: NotGivenOr[RESPONSE_FORMAT] = NOT_GIVEN,
    ) -> None:
        """
        Update the TTS options.

        Args:
            model: The model to use for text-to-speech. Clears ``ref_audio``.
            voice: The voice ID to use for synthesis.
            ref_audio: Base64-encoded audio sample for zero-shot voice cloning. Clears ``voice``.
            response_format: The audio format of synthesized speech, between ``mp3``, ``wav``, ``pcm``, ``opus`` or ``flac``. Defaults to ``mp3``.
        """
        if is_given(voice) and is_given(ref_audio):
            raise ValueError("Only one of 'voice' or 'ref_audio' may be provided, not both")
        if is_given(model):
            self._opts.model = model
        if is_given(voice):
            self._opts.voice = voice
            self._opts.ref_audio = None
        if is_given(ref_audio):
            self._opts.ref_audio = ref_audio
            self._opts.voice = None
        if is_given(response_format):
            self._opts.response_format = response_format

    def synthesize(
        self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
    ) -> tts.ChunkedStream:
        return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)

Helper class that provides a standard way to create an ABC using inheritance.

Create a new instance of MistralAI TTS.

Args

client: Optional pre-configured MistralAI client instance.
api_key: Your Mistral AI API key. If not provided, will use the MISTRAL_API_KEY environment variable.
model: The Mistral AI model to use for text-to-speech, default is "voxtral-mini-tts-latest".
voice: The voice ID to use for synthesis. Mutually exclusive with ref_audio. Defaults to en_paul_neutral when neither is given.
ref_audio: Base64-encoded audio sample (3–25 s) for zero-shot voice cloning. Mutually exclusive with voice.
response_format: The audio format of synthesized speech, between mp3, wav, pcm, opus or flac. Defaults to mp3.

Ancestors

livekit.agents.tts.tts.TTS
abc.ABC
EventEmitter
typing.Generic

Instance variables

prop model : str

Expand source code

@property
def model(self) -> str:
    return self._opts.model

Get the model name/identifier for this TTS instance.

Returns

The model name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their model information.

prop provider : str

Expand source code

@property
def provider(self) -> str:
    return "MistralAI"

Get the provider name/identifier for this TTS instance.

Returns

The provider name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their provider information.

Methods

def synthesize(self, text: str, *, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.agents.tts.tts.ChunkedStream

Expand source code

def synthesize(
    self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
) -> tts.ChunkedStream:
    return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)

def update_options(self, *, model: NotGivenOr[TTSModels | str] = NOT_GIVEN, voice: NotGivenOr[TTSVoices | str] = NOT_GIVEN, ref_audio: NotGivenOr[str] = NOT_GIVEN, response_format: NotGivenOr[RESPONSE_FORMAT] = NOT_GIVEN) ‑> None

Expand source code

def update_options(
    self,
    *,
    model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
    voice: NotGivenOr[TTSVoices | str] = NOT_GIVEN,
    ref_audio: NotGivenOr[str] = NOT_GIVEN,
    response_format: NotGivenOr[RESPONSE_FORMAT] = NOT_GIVEN,
) -> None:
    """
    Update the TTS options.

    Args:
        model: The model to use for text-to-speech. Clears ``ref_audio``.
        voice: The voice ID to use for synthesis.
        ref_audio: Base64-encoded audio sample for zero-shot voice cloning. Clears ``voice``.
        response_format: The audio format of synthesized speech, between ``mp3``, ``wav``, ``pcm``, ``opus`` or ``flac``. Defaults to ``mp3``.
    """
    if is_given(voice) and is_given(ref_audio):
        raise ValueError("Only one of 'voice' or 'ref_audio' may be provided, not both")
    if is_given(model):
        self._opts.model = model
    if is_given(voice):
        self._opts.voice = voice
        self._opts.ref_audio = None
    if is_given(ref_audio):
        self._opts.ref_audio = ref_audio
        self._opts.voice = None
    if is_given(response_format):
        self._opts.response_format = response_format

Update the TTS options.

Args

model: The model to use for text-to-speech. Clears ref_audio.
voice: The voice ID to use for synthesis.
ref_audio: Base64-encoded audio sample for zero-shot voice cloning. Clears voice.
response_format: The audio format of synthesized speech, between mp3, wav, pcm, opus or flac. Defaults to mp3.

Inherited members

EventEmitter:
- emit
- off
- on
- once