Module `livekit.plugins.rtzr`

RTZR plugin for LiveKit Agents

See Streaming STT docs at: https://developers.rtzr.ai/docs/en/

Environment variables used: - RTZR_CLIENT_ID / RTZR_CLIENT_SECRET for authentication (required)

Classes

class STT (*, model: str = 'sommers_ko', language: str = 'ko', sample_rate: int = 8000, domain: str = 'CALL', epd_time: float = 0.3, noise_threshold: float = 0.6, active_threshold: float = 0.8, use_punctuation: bool = False, http_session: aiohttp.ClientSession | None = None)

Expand source code

class STT(stt.STT):
    """RTZR Streaming STT over WebSocket.

    Uses RTZROpenAPIClient for authentication and WebSocket connection.
    Audio frames streamed to `/v1/transcribe:streaming` endpoint.
    Server performs endpoint detection (EPD), final messages carry `final=true`.
    Stream is finalized by sending the string `EOS`.
    """

    def __init__(
        self,
        *,
        model: str = "sommers_ko",
        language: str = "ko",
        sample_rate: int = 8000,
        domain: str = "CALL",
        epd_time: float = 0.3,
        noise_threshold: float = 0.60,
        active_threshold: float = 0.80,
        use_punctuation: bool = False,
        http_session: aiohttp.ClientSession | None = None,
    ) -> None:
        super().__init__(capabilities=stt.STTCapabilities(streaming=True, interim_results=True))

        self._params = _STTOptions(
            model_name=model,
            language=language,
            sample_rate=sample_rate,
            domain=domain,
            epd_time=epd_time,
            noise_threshold=noise_threshold,
            active_threshold=active_threshold,
            use_punctuation=use_punctuation,
        )
        self._client = RTZROpenAPIClient(http_session=http_session)

    async def aclose(self) -> None:
        """Close the RTZR client and cleanup resources."""
        await self._client.close()

    async def _recognize_impl(
        self,
        buffer: utils.AudioBuffer,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> stt.SpeechEvent:
        raise NotImplementedError("Single-shot recognition is not supported; use stream().")

    def stream(
        self,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> SpeechStream:
        return SpeechStream(
            stt=self,
            conn_options=conn_options,
        )

RTZR Streaming STT over WebSocket.

Uses RTZROpenAPIClient for authentication and WebSocket connection. Audio frames streamed to /v1/transcribe:streaming endpoint. Server performs endpoint detection (EPD), final messages carry final=true. Stream is finalized by sending the string EOS.

Ancestors

livekit.agents.stt.stt.STT
abc.ABC
EventEmitter
typing.Generic

Methods

async def aclose(self) ‑> None

Expand source code

async def aclose(self) -> None:
    """Close the RTZR client and cleanup resources."""
    await self._client.close()

Close the RTZR client and cleanup resources.

def stream(self, *, language: NotGivenOr[str] = NOT_GIVEN, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.rtzr.stt.SpeechStream

Expand source code

def stream(
    self,
    *,
    language: NotGivenOr[str] = NOT_GIVEN,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> SpeechStream:
    return SpeechStream(
        stt=self,
        conn_options=conn_options,
    )

Inherited members

EventEmitter:
- emit
- off
- on
- once