Module `livekit.plugins.gnani`

Gnani Vachana plugin for LiveKit Agents

Support for speech-to-text and text-to-speech with Gnani's Vachana platform.

Vachana provides high-accuracy STT and low-latency TTS for Indian languages, including multilingual and code-switching scenarios.

See https://docs.livekit.io/agents/integrations/stt/gnani/ for more information.

Classes

class STT (*, language: GnaniSTTLanguages | str = 'en-IN', api_key: str | None = None, sample_rate: int = 16000, base_url: str = 'https://api.vachana.ai', preferred_language: str | None = None, format: GnaniSTTFormat = 'verbatim', itn_native_numerals: bool = False, use_streaming: bool = True, **kwargs: Any)

Expand source code

class STT(stt.STT):
    """Gnani Vachana Speech-to-Text implementation.

    Provides speech-to-text functionality using Gnani's Vachana platform.
    Supports REST recognition and real-time streaming via WebSocket.

    Args:
        language: BCP-47 language code (e.g. "hi-IN", "en-IN").
        api_key: Gnani API key (falls back to GNANI_API_KEY env var).
        sample_rate: Audio sample rate for streaming (8000, 16000, 44100, or 48000).
        base_url: Vachana API base URL.
        preferred_language: Force single-language model for this code.
        format: "verbatim" (default) or "transcribe" (enables ITN).
        itn_native_numerals: Render digits in native script when format="transcribe".
        use_streaming: When True (default), transcribe over the WebSocket stream
            (wss://.../stt/v3/stream). When False, use REST recognition
            (POST /stt/v3), which requires a local VAD — LiveKit wraps the STT
            with ``stt.StreamAdapter`` automatically.
    """

    def __init__(
        self,
        *,
        language: GnaniSTTLanguages | str = "en-IN",
        api_key: str | None = None,
        sample_rate: int = SAMPLE_RATE_16K,
        base_url: str = GNANI_STT_BASE_URL,
        preferred_language: str | None = None,
        format: GnaniSTTFormat = "verbatim",
        itn_native_numerals: bool = False,
        use_streaming: bool = True,
        **kwargs: Any,
    ) -> None:
        super().__init__(
            capabilities=stt.STTCapabilities(
                streaming=use_streaming,
                interim_results=False,
                aligned_transcript=False,
            )
        )

        _check_deprecated_args(kwargs)

        self._api_key = api_key or os.environ.get("GNANI_API_KEY")
        if not self._api_key:
            raise ValueError(
                "Gnani API key is required. "
                "Provide it directly or set GNANI_API_KEY environment variable."
            )

        if sample_rate not in STREAM_SUPPORTED_SAMPLE_RATES:
            allowed = ", ".join(str(r) for r in STREAM_SUPPORTED_SAMPLE_RATES)
            raise ValueError(f"sample_rate must be one of {allowed}, got {sample_rate}")

        self._opts = GnaniSTTOptions(
            api_key=self._api_key,
            language=language,
            sample_rate=sample_rate,
            base_url=base_url,
            preferred_language=preferred_language,
            format=format,
            itn_native_numerals=itn_native_numerals,
            use_streaming=use_streaming,
        )
        self._session: aiohttp.ClientSession | None = None

    @property
    def model(self) -> str:
        return "vachana-stt-v3"

    @property
    def provider(self) -> str:
        return "Gnani"

    def _ensure_session(self) -> aiohttp.ClientSession:
        if not self._session:
            self._session = utils.http_context.http_session()
        return self._session

    @staticmethod
    def _single_attempt(conn_options: APIConnectOptions) -> APIConnectOptions:
        return APIConnectOptions(
            max_retry=0,
            retry_interval=conn_options.retry_interval,
            timeout=conn_options.timeout,
        )

    async def recognize(
        self,
        buffer: AudioBuffer,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> stt.SpeechEvent:
        return await super().recognize(
            buffer,
            language=language,
            conn_options=self._single_attempt(conn_options),
        )

    async def _recognize_impl(
        self,
        buffer: AudioBuffer,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> stt.SpeechEvent:
        lang = language if is_given(language) else self._opts.language

        wav_bytes = rtc.combine_audio_frames(buffer).to_wav_bytes()

        form_data = aiohttp.FormData()
        form_data.add_field("audio_file", wav_bytes, filename="audio.wav", content_type="audio/wav")
        form_data.add_field("language_code", lang)
        form_data.add_field("format", self._opts.format)

        if self._opts.preferred_language is not None:
            form_data.add_field("preferred_language", self._opts.preferred_language)
        if self._opts.itn_native_numerals:
            form_data.add_field("itn_native_numerals", "true")

        headers: dict[str, str] = {
            "X-API-Key-ID": self._opts.api_key,
        }

        try:
            async with self._ensure_session().post(
                url=f"{self._opts.base_url}/stt/v3",
                data=form_data,
                headers=headers,
                timeout=aiohttp.ClientTimeout(
                    total=conn_options.timeout,
                    sock_connect=conn_options.timeout,
                ),
            ) as res:
                if res.status != 200:
                    error_text = await res.text()
                    logger.error(f"Gnani STT API error: {res.status} - {error_text}")
                    raise APIStatusError(
                        message=f"Gnani STT API Error ({res.status}): {error_text}",
                        status_code=res.status,
                        body=error_text,
                    )

                response_json = await res.json()
                transcript = response_json.get("transcript", "")
                request_id = response_json.get("request_id", "")

                return stt.SpeechEvent(
                    type=stt.SpeechEventType.FINAL_TRANSCRIPT,
                    request_id=request_id,
                    alternatives=[
                        stt.SpeechData(
                            language=LanguageCode(lang),
                            text=transcript,
                            confidence=1.0,
                        )
                    ],
                )

        except asyncio.TimeoutError as e:
            raise APITimeoutError("Gnani STT API request timed out") from e
        except (APIStatusError, APIConnectionError, APITimeoutError):
            raise
        except Exception as e:
            raise APIConnectionError(f"Gnani STT error: {e}") from e

    def stream(
        self,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> SpeechStream:
        if not self._opts.use_streaming:
            return cast(
                "SpeechStream",
                super().stream(language=language, conn_options=conn_options),
            )

        opts = replace(self._opts)
        if is_given(language):
            opts.language = language
        return SpeechStream(
            stt=self,
            opts=opts,
            conn_options=self._single_attempt(conn_options),
        )

    async def aclose(self) -> None:
        pass

Gnani Vachana Speech-to-Text implementation.

Provides speech-to-text functionality using Gnani's Vachana platform. Supports REST recognition and real-time streaming via WebSocket.

Args

language: BCP-47 language code (e.g. "hi-IN", "en-IN").
api_key: Gnani API key (falls back to GNANI_API_KEY env var).
sample_rate: Audio sample rate for streaming (8000, 16000, 44100, or 48000).
base_url: Vachana API base URL.
preferred_language: Force single-language model for this code.
format: "verbatim" (default) or "transcribe" (enables ITN).
itn_native_numerals: Render digits in native script when format="transcribe".
use_streaming: When True (default), transcribe over the WebSocket stream (wss://…/stt/v3/stream). When False, use REST recognition (POST /stt/v3), which requires a local VAD — LiveKit wraps the STT with stt.StreamAdapter automatically.

Ancestors

livekit.agents.stt.stt.STT
abc.ABC
EventEmitter
typing.Generic

Instance variables

prop model : str

Expand source code

@property
def model(self) -> str:
    return "vachana-stt-v3"

Get the model name/identifier for this STT instance.

Returns

The model name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their model information.

prop provider : str

Expand source code

@property
def provider(self) -> str:
    return "Gnani"

Get the provider name/identifier for this STT instance.

Returns

The provider name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their provider information.

Methods

async def aclose(self) ‑> None

Expand source code

async def aclose(self) -> None:
    pass

Close the STT, and every stream/requests associated with it

async def recognize(self, buffer: AudioBuffer, *, language: NotGivenOr[str] = NOT_GIVEN, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> stt.SpeechEvent

Expand source code

async def recognize(
    self,
    buffer: AudioBuffer,
    *,
    language: NotGivenOr[str] = NOT_GIVEN,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> stt.SpeechEvent:
    return await super().recognize(
        buffer,
        language=language,
        conn_options=self._single_attempt(conn_options),
    )

def stream(self, *, language: NotGivenOr[str] = NOT_GIVEN, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.gnani.stt.SpeechStream

Expand source code

def stream(
    self,
    *,
    language: NotGivenOr[str] = NOT_GIVEN,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> SpeechStream:
    if not self._opts.use_streaming:
        return cast(
            "SpeechStream",
            super().stream(language=language, conn_options=conn_options),
        )

    opts = replace(self._opts)
    if is_given(language):
        opts.language = language
    return SpeechStream(
        stt=self,
        opts=opts,
        conn_options=self._single_attempt(conn_options),
    )

Inherited members

EventEmitter:
- emit
- off
- on
- once

class SpeechStream (*, stt: STT, opts: GnaniSTTOptions, conn_options: APIConnectOptions)

Expand source code

class SpeechStream(stt.RecognizeStream):
    """WebSocket-based streaming STT for Gnani Vachana.

    Connects to wss://api.vachana.ai/stt/v3/stream and sends raw PCM audio
    in 1024-byte chunks (512 samples, 16-bit mono).
    """

    def __init__(
        self,
        *,
        stt: STT,
        opts: GnaniSTTOptions,
        conn_options: APIConnectOptions,
    ) -> None:
        super().__init__(
            stt=stt,
            conn_options=conn_options,
            sample_rate=opts.sample_rate,
        )
        self._opts = opts

    def _build_ws_url(self) -> str:
        base = self._opts.base_url
        if base.startswith("https://"):
            ws_base = "wss://" + base[len("https://") :]
        elif base.startswith("http://"):
            ws_base = "ws://" + base[len("http://") :]
        else:
            ws_base = "wss://" + base
        return f"{ws_base}/stt/v3/stream"

    async def _run(self) -> None:
        import websockets

        ws_url = self._build_ws_url()
        headers: dict[str, str] = {
            "x-api-key-id": self._opts.api_key,
            "lang_code": self._opts.language,
            "x-sample-rate": str(self._opts.sample_rate),
        }
        if self._opts.format != "verbatim":
            headers["x-format"] = self._opts.format
        if self._opts.preferred_language is not None:
            headers["preferred_language"] = self._opts.preferred_language
        if self._opts.itn_native_numerals:
            headers["itn_native_numerals"] = "true"

        try:
            async with websockets.connect(
                ws_url,
                additional_headers=headers,
                ping_interval=20,
                ping_timeout=20,
                close_timeout=10,
            ) as ws:
                connected_msg = await asyncio.wait_for(ws.recv(), timeout=10)
                connected_data = json.loads(connected_msg)
                if connected_data.get("type") != "connected":
                    logger.warning(f"Unexpected first message from Gnani STT: {connected_data}")

                send_task = asyncio.create_task(self._send_audio(ws), name="gnani-stt-send")
                recv_task = asyncio.create_task(self._recv_messages(ws), name="gnani-stt-recv")

                try:
                    # Wait for send to finish; if recv errors first, propagate it.
                    done, _ = await asyncio.wait(
                        [send_task, recv_task],
                        return_when=asyncio.FIRST_COMPLETED,
                    )
                    for task in done:
                        task.result()

                    if send_task.done() and not recv_task.done():
                        with contextlib.suppress(asyncio.TimeoutError):
                            await asyncio.wait_for(asyncio.shield(recv_task), timeout=1.0)
                finally:
                    await utils.aio.gracefully_cancel(send_task, recv_task)

        except websockets.exceptions.ConnectionClosed as e:
            raise APIConnectionError(f"Gnani STT WebSocket closed unexpectedly: {e}") from e
        except asyncio.TimeoutError as e:
            raise APITimeoutError("Gnani STT WebSocket connection timed out") from e
        except (APIConnectionError, APIStatusError, APITimeoutError):
            raise
        except Exception as e:
            raise APIConnectionError(f"Gnani STT WebSocket error: {e}") from e

    async def _send_audio(self, ws: Any) -> None:
        audio_buffer = bytearray()

        async for data in self._input_ch:
            if isinstance(data, self._FlushSentinel):
                if audio_buffer:
                    await ws.send(bytes(audio_buffer))
                    audio_buffer.clear()
                continue

            frame: rtc.AudioFrame = data
            raw_pcm = frame.data.tobytes()
            audio_buffer.extend(raw_pcm)

            while len(audio_buffer) >= STREAM_CHUNK_BYTES:
                chunk = bytes(audio_buffer[:STREAM_CHUNK_BYTES])
                audio_buffer = audio_buffer[STREAM_CHUNK_BYTES:]
                await ws.send(chunk)

        if audio_buffer:
            await ws.send(bytes(audio_buffer))

    async def _recv_messages(self, ws: Any) -> None:
        try:
            async for msg in ws:
                if isinstance(msg, bytes):
                    continue

                data = json.loads(msg)
                msg_type = data.get("type", "")

                if msg_type == "transcript":
                    text = data.get("text", "")
                    if not text:
                        continue

                    self._event_ch.send_nowait(
                        stt.SpeechEvent(
                            type=stt.SpeechEventType.FINAL_TRANSCRIPT,
                            request_id=data.get("segment_id", ""),
                            alternatives=[
                                stt.SpeechData(
                                    language=LanguageCode(self._opts.language),
                                    text=text,
                                    confidence=1.0,
                                )
                            ],
                        )
                    )

                elif msg_type in ("speech_start", "vad_start"):
                    self._event_ch.send_nowait(
                        stt.SpeechEvent(
                            type=stt.SpeechEventType.START_OF_SPEECH,
                        )
                    )

                elif msg_type in ("speech_end", "vad_end"):
                    self._event_ch.send_nowait(
                        stt.SpeechEvent(
                            type=stt.SpeechEventType.END_OF_SPEECH,
                        )
                    )

                elif msg_type == "processing":
                    pass

                elif msg_type == "error":
                    error_msg = data.get("message", "Unknown error")
                    logger.error(f"Gnani STT stream error: {error_msg}")
                    raise APIStatusError(
                        message=f"Gnani STT stream error: {error_msg}",
                        status_code=500,
                        body=error_msg,
                    )

        except asyncio.CancelledError:
            raise
        except (APIStatusError, APIConnectionError, APITimeoutError):
            raise
        except Exception as e:
            raise APIConnectionError(f"Error receiving Gnani STT messages: {e}") from e

WebSocket-based streaming STT for Gnani Vachana.

Connects to wss://api.vachana.ai/stt/v3/stream and sends raw PCM audio in 1024-byte chunks (512 samples, 16-bit mono).

Args: sample_rate : int or None, optional The desired sample rate for the audio input. If specified, the audio input will be automatically resampled to match the given sample rate before being processed for Speech-to-Text. If not provided (None), the input will retain its original sample rate.

Ancestors

livekit.agents.stt.stt.RecognizeStream
abc.ABC

class SynthesizeStream (*, tts: TTS, conn_options: APIConnectOptions)

Expand source code

class SynthesizeStream(tts.SynthesizeStream):
    """WebSocket-based streaming TTS — wss://api.vachana.ai/api/v1/tts."""

    def __init__(self, *, tts: TTS, conn_options: APIConnectOptions):
        super().__init__(tts=tts, conn_options=conn_options)
        self._tts: TTS = tts
        self._opts = replace(tts._opts)

    def _build_ws_url(self) -> str:
        base = self._opts.base_url
        if base.startswith("https://"):
            ws_base = "wss://" + base[len("https://") :]
        elif base.startswith("http://"):
            ws_base = "ws://" + base[len("http://") :]
        else:
            ws_base = "wss://" + base
        return f"{ws_base}/api/v1/tts"

    async def _run(self, output_emitter: tts.AudioEmitter) -> None:
        import websockets

        request_id = utils.shortuuid()
        output_emitter.initialize(
            request_id=request_id,
            sample_rate=self._tts.sample_rate,
            num_channels=self._tts.num_channels,
            mime_type=_mime_type(self._opts),
            stream=True,
        )

        text_parts: list[str] = []
        async for data in self._input_ch:
            if isinstance(data, str):
                text_parts.append(data)
            elif isinstance(data, self._FlushSentinel):
                break

        full_text = "".join(text_parts).strip()
        if not full_text:
            return

        segment_id = utils.shortuuid()
        output_emitter.start_segment(segment_id=segment_id)

        try:
            ws_url = self._build_ws_url()
            async with websockets.connect(
                ws_url,
                additional_headers=_build_headers(self._opts),
                ping_interval=20,
                ping_timeout=20,
                close_timeout=10,
            ) as ws:
                request_body = _build_payload(self._opts, full_text)
                await ws.send(json.dumps(request_body))

                self._mark_started()

                async for msg in ws:
                    if isinstance(msg, bytes):
                        output_emitter.push(msg)
                        continue

                    payload = json.loads(msg)
                    msg_type = payload.get("type", "")

                    if msg_type == "audio":
                        inner = payload.get("data", {})
                        audio_b64 = inner.get("audio", "")
                        if audio_b64:
                            output_emitter.push(base64.b64decode(audio_b64))

                    elif msg_type == "complete":
                        inner = payload.get("data")
                        if inner is not None:
                            audio_b64 = inner.get("audio", "")
                            if audio_b64:
                                output_emitter.push(base64.b64decode(audio_b64))
                        break

                    elif msg_type == "error":
                        error_msg = payload.get("message", "Unknown error")
                        logger.error("Gnani TTS WS error: %s", error_msg)
                        raise APIStatusError(
                            message=f"Gnani TTS stream error: {error_msg}",
                            status_code=500,
                            body=error_msg,
                        )

        except websockets.exceptions.ConnectionClosed as e:
            raise APIConnectionError(f"Gnani TTS WebSocket closed: {e}") from e
        except asyncio.TimeoutError as e:
            raise APITimeoutError("Gnani TTS WebSocket timed out") from e
        except (APIStatusError, APIConnectionError, APITimeoutError):
            raise
        except Exception as e:
            raise APIConnectionError(f"Gnani TTS WebSocket error: {e}") from e

        # Only flush/close the segment on the success path. On error the
        # per-attempt output_emitter is discarded before retry, so flushing or
        # ending the segment here would only add an unnecessary final frame.
        output_emitter.flush()
        output_emitter.end_segment()

WebSocket-based streaming TTS — wss://api.vachana.ai/api/v1/tts.

Ancestors

livekit.agents.tts.tts.SynthesizeStream
abc.ABC

class TTS (*, voice: GnaniTTSVoices | str = 'Pranav', model: GnaniTTSModels | str = 'timbre-v2.0', language: GnaniTTSLanguages | str | None = None, sample_rate: int = 16000, num_channels: int = 1, encoding: GnaniTTSEncodings | str = 'linear_pcm', container: GnaniTTSContainers | str = 'wav', bitrate: GnaniTTSBitrates | str | None = None, api_key: str | None = None, base_url: str = 'https://api.vachana.ai', synthesize_method: GnaniTTSSynthesizeMethod = 'rest', **kwargs: Any)

Expand source code

class TTS(tts.TTS):
    """Gnani Vachana Text-to-Speech implementation.

    Provides text-to-speech functionality using Gnani's Vachana platform.
    Supports REST, SSE, and WebSocket synthesis modes.

    Args:
        voice: Voice to use for synthesis
            (see https://docs.gnani.ai/api/TTS/tts-sse#available-voices).
        model: TTS model name (default: timbre-v2.0; also: timbre-v2.5).
        language: BCP-47 language code for timbre-v2.5 only (e.g. "hi-IN").
        sample_rate: Audio output sample rate (8000-44100).
        encoding: Audio encoding (linear_pcm or oggopus).
        container: Audio container format (raw, mp3, wav, ogg).
        api_key: Gnani API key (falls back to GNANI_API_KEY env var).
        base_url: Vachana API base URL.
        synthesize_method: Synthesis mode — "rest", "sse", or "websocket".
    """

    def __init__(
        self,
        *,
        voice: GnaniTTSVoices | str = "Pranav",
        model: GnaniTTSModels | str = DEFAULT_MODEL,
        language: GnaniTTSLanguages | str | None = None,
        sample_rate: int = 16000,
        num_channels: int = 1,
        encoding: GnaniTTSEncodings | str = "linear_pcm",
        container: GnaniTTSContainers | str = "wav",
        bitrate: GnaniTTSBitrates | str | None = None,
        api_key: str | None = None,
        base_url: str = GNANI_TTS_BASE_URL,
        synthesize_method: GnaniTTSSynthesizeMethod = "rest",
        **kwargs: Any,
    ) -> None:
        _check_deprecated_tts_args(kwargs)

        if sample_rate not in SUPPORTED_SAMPLE_RATES:
            raise ValueError(
                f"sample_rate must be one of {SUPPORTED_SAMPLE_RATES}, got {sample_rate}"
            )

        super().__init__(
            capabilities=tts.TTSCapabilities(streaming=True),
            sample_rate=sample_rate,
            num_channels=num_channels,
        )

        self._api_key = api_key or os.environ.get("GNANI_API_KEY")
        if not self._api_key:
            raise ValueError(
                "Gnani API key is required. "
                "Provide it directly or set GNANI_API_KEY environment variable."
            )

        self._opts = GnaniTTSOptions(
            api_key=self._api_key,
            voice=voice,
            model=model,
            language=language,
            sample_rate=sample_rate,
            encoding=encoding,
            container=container,
            num_channels=num_channels,
            bitrate=bitrate,
            base_url=base_url,
            synthesize_method=synthesize_method,
        )
        self._session: aiohttp.ClientSession | None = None

    @property
    def model(self) -> str:
        return self._opts.model

    @property
    def provider(self) -> str:
        return "Gnani"

    def _ensure_session(self) -> aiohttp.ClientSession:
        if not self._session:
            self._session = utils.http_context.http_session()
        return self._session

    def synthesize(
        self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
    ) -> tts.ChunkedStream:
        if self._opts.synthesize_method == "sse":
            return SSEChunkedStream(tts=self, input_text=text, conn_options=conn_options)
        if self._opts.synthesize_method == "websocket":
            return WebSocketChunkedStream(tts=self, input_text=text, conn_options=conn_options)
        return RESTChunkedStream(tts=self, input_text=text, conn_options=conn_options)

    def stream(
        self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
    ) -> SynthesizeStream:
        return SynthesizeStream(tts=self, conn_options=conn_options)

    def update_options(
        self,
        *,
        voice: GnaniTTSVoices | str | None = None,
        model: GnaniTTSModels | str | None = None,
        language: GnaniTTSLanguages | str | None = None,
        **kwargs: Any,
    ) -> None:
        _check_deprecated_tts_args(kwargs, caller="TTS.update_options")

        if voice is not None:
            self._opts.voice = voice
        if model is not None:
            self._opts.model = model
        if language is not None:
            self._opts.language = language

    async def aclose(self) -> None:
        pass

Gnani Vachana Text-to-Speech implementation.

Provides text-to-speech functionality using Gnani's Vachana platform. Supports REST, SSE, and WebSocket synthesis modes.

Args

voice: Voice to use for synthesis (see https://docs.gnani.ai/api/TTS/tts-sse#available-voices).
model: TTS model name (default: timbre-v2.0; also: timbre-v2.5).
language: BCP-47 language code for timbre-v2.5 only (e.g. "hi-IN").
sample_rate: Audio output sample rate (8000-44100).
encoding: Audio encoding (linear_pcm or oggopus).
container: Audio container format (raw, mp3, wav, ogg).
api_key: Gnani API key (falls back to GNANI_API_KEY env var).
base_url: Vachana API base URL.
synthesize_method: Synthesis mode — "rest", "sse", or "websocket".

Ancestors

livekit.agents.tts.tts.TTS
abc.ABC
EventEmitter
typing.Generic

Instance variables

prop model : str

Expand source code

@property
def model(self) -> str:
    return self._opts.model

Get the model name/identifier for this TTS instance.

Returns

The model name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their model information.

prop provider : str

Expand source code

@property
def provider(self) -> str:
    return "Gnani"

Get the provider name/identifier for this TTS instance.

Returns

The provider name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their provider information.

Methods

async def aclose(self) ‑> None

Expand source code

async def aclose(self) -> None:
    pass

def stream(self, *, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.gnani.tts.SynthesizeStream

Expand source code

def stream(
    self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
) -> SynthesizeStream:
    return SynthesizeStream(tts=self, conn_options=conn_options)

def synthesize(self, text: str, *, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.agents.tts.tts.ChunkedStream

Expand source code

def synthesize(
    self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
) -> tts.ChunkedStream:
    if self._opts.synthesize_method == "sse":
        return SSEChunkedStream(tts=self, input_text=text, conn_options=conn_options)
    if self._opts.synthesize_method == "websocket":
        return WebSocketChunkedStream(tts=self, input_text=text, conn_options=conn_options)
    return RESTChunkedStream(tts=self, input_text=text, conn_options=conn_options)

def update_options(self, *, voice: GnaniTTSVoices | str | None = None, model: GnaniTTSModels | str | None = None, language: GnaniTTSLanguages | str | None = None, **kwargs: Any) ‑> None

Expand source code

def update_options(
    self,
    *,
    voice: GnaniTTSVoices | str | None = None,
    model: GnaniTTSModels | str | None = None,
    language: GnaniTTSLanguages | str | None = None,
    **kwargs: Any,
) -> None:
    _check_deprecated_tts_args(kwargs, caller="TTS.update_options")

    if voice is not None:
        self._opts.voice = voice
    if model is not None:
        self._opts.model = model
    if language is not None:
        self._opts.language = language

Inherited members

EventEmitter:
- emit
- off
- on
- once