Module livekit.plugins.soniox

Soniox plugin for LiveKit Agents

See https://docs.livekit.io/agents/integrations/stt/soniox/ for more information.

Classes

class STT (*,
api_key: str | None = None,
base_url: str = 'wss://stt-rt.soniox.com/transcribe-websocket',
http_session: aiohttp.ClientSession | None = None,
vad: vad.VAD | None = None,
params: STTOptions | None = None)
Expand source code
class STT(stt.STT):
    """Speech-to-Text service using Soniox Speech-to-Text API.

    This service connects to Soniox Speech-to-Text API for real-time transcription
    with support for multiple languages, custom context, speaker diarization,
    and more.

    For complete API documentation, see: https://soniox.com/docs/speech-to-text/api-reference/websocket-api
    """

    def __init__(
        self,
        *,
        api_key: str | None = None,
        base_url: str = BASE_URL,
        http_session: aiohttp.ClientSession | None = None,
        vad: vad.VAD | None = None,
        params: STTOptions | None = None,
    ):
        """Initialize instance of Soniox Speech-to-Text API service.

        Args:
            api_key: Soniox API key, if not provided, will look for SONIOX_API_KEY env variable.
            base_url: Base URL for Soniox Speech-to-Text API, default to BASE_URL defined in this
                module.
            http_session: Optional aiohttp.ClientSession to use for requests.
            vad: If passed, enable Voice Activity Detection (VAD) for audio frames.
            params: Additional configuration parameters, such as model, language hints, context and
                speaker diarization.
        """
        super().__init__(capabilities=stt.STTCapabilities(streaming=True, interim_results=True))

        self._api_key = api_key or os.getenv("SONIOX_API_KEY")
        self._base_url = base_url
        self._http_session = http_session
        self._vad_stream = vad.stream() if vad else None
        self._params = params or STTOptions()

    async def _recognize_impl(
        self,
        buffer: utils.AudioBuffer,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions,
    ) -> stt.SpeechEvent:
        """Raise error since single-frame recognition is not supported
        by Soniox Speech-to-Text API."""
        raise NotImplementedError(
            "Soniox Speech-to-Text API does not support single frame recognition"
        )

    def stream(
        self,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> SpeechStream:
        """Return a new LiveKit streaming speech-to-text session."""
        return SpeechStream(
            stt=self,
            conn_options=conn_options,
        )

Speech-to-Text service using Soniox Speech-to-Text API.

This service connects to Soniox Speech-to-Text API for real-time transcription with support for multiple languages, custom context, speaker diarization, and more.

For complete API documentation, see: https://soniox.com/docs/speech-to-text/api-reference/websocket-api

Initialize instance of Soniox Speech-to-Text API service.

Args

api_key
Soniox API key, if not provided, will look for SONIOX_API_KEY env variable.
base_url
Base URL for Soniox Speech-to-Text API, default to BASE_URL defined in this module.
http_session
Optional aiohttp.ClientSession to use for requests.
vad
If passed, enable Voice Activity Detection (VAD) for audio frames.
params
Additional configuration parameters, such as model, language hints, context and speaker diarization.

Ancestors

  • livekit.agents.stt.stt.STT
  • abc.ABC
  • EventEmitter
  • typing.Generic

Methods

def stream(self,
*,
language: NotGivenOr[str] = NOT_GIVEN,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.soniox.stt.SpeechStream
Expand source code
def stream(
    self,
    *,
    language: NotGivenOr[str] = NOT_GIVEN,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> SpeechStream:
    """Return a new LiveKit streaming speech-to-text session."""
    return SpeechStream(
        stt=self,
        conn_options=conn_options,
    )

Return a new LiveKit streaming speech-to-text session.

Inherited members

class STTOptions (model: str | None = 'stt-rt-preview',
language_hints: list[str] | None = None,
context: str | None = None,
num_channels: int = 1,
sample_rate: int = 16000,
enable_language_identification: bool = True,
enable_non_final_tokens: bool = True,
max_non_final_tokens_duration_ms: int | None = None,
client_reference_id: str | None = None)
Expand source code
@dataclass
class STTOptions:
    """Configuration options for Soniox Speech-to-Text service."""

    model: str | None = "stt-rt-preview"
    language_hints: list[str] | None = None
    context: str | None = None

    num_channels: int = 1
    sample_rate: int = 16000

    enable_language_identification: bool = True

    enable_non_final_tokens: bool = True
    max_non_final_tokens_duration_ms: int | None = None

    client_reference_id: str | None = None

Configuration options for Soniox Speech-to-Text service.

Instance variables

var client_reference_id : str | None
var context : str | None
var enable_language_identification : bool
var enable_non_final_tokens : bool
var language_hints : list[str] | None
var max_non_final_tokens_duration_ms : int | None
var model : str | None
var num_channels : int
var sample_rate : int