Module `livekit.plugins.soniox`

Soniox plugin for LiveKit Agents

See https://docs.livekit.io/agents/integrations/stt/soniox/ for more information.

Classes

class ContextGeneralItem (key: str, value: str)

Expand source code

@dataclass
class ContextGeneralItem:
    key: str
    value: str

ContextGeneralItem(key: 'str', value: 'str')

Instance variables

var key : str
var value : str

class ContextObject (general: list[ContextGeneralItem] | None = None, text: str | None = None, terms: list[str] | None = None, translation_terms: list[ContextTranslationTerm] | None = None)

Expand source code

@dataclass
class ContextObject:
    """Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.

    Learn more about context in the documentation:
    https://soniox.com/docs/stt/concepts/context
    """

    general: list[ContextGeneralItem] | None = None
    text: str | None = None
    terms: list[str] | None = None
    translation_terms: list[ContextTranslationTerm] | None = None

Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.

Learn more about context in the documentation: https://soniox.com/docs/stt/concepts/context

Instance variables

var general : list[livekit.plugins.soniox.stt.ContextGeneralItem] | None
var terms : list[str] | None
var text : str | None
var translation_terms : list[livekit.plugins.soniox.stt.ContextTranslationTerm] | None

class ContextTranslationTerm (source: str, target: str)

Expand source code

@dataclass
class ContextTranslationTerm:
    source: str
    target: str

ContextTranslationTerm(source: 'str', target: 'str')

Instance variables

var source : str
var target : str

class STT (*, api_key: str | None = None, base_url: str = 'wss://stt-rt.soniox.com/transcribe-websocket', http_session: aiohttp.ClientSession | None = None, vad: vad.VAD | None = None, params: STTOptions | None = None)

Expand source code

class STT(stt.STT):
    """Speech-to-Text service using Soniox Speech-to-Text API.

    This service connects to Soniox Speech-to-Text API for real-time transcription
    with support for multiple languages, custom context, speaker diarization,
    and more.

    For complete API documentation, see: https://soniox.com/docs/speech-to-text/api-reference/websocket-api
    """

    def __init__(
        self,
        *,
        api_key: str | None = None,
        base_url: str = BASE_URL,
        http_session: aiohttp.ClientSession | None = None,
        vad: vad.VAD | None = None,
        params: STTOptions | None = None,
    ):
        """Initialize instance of Soniox Speech-to-Text API service.

        Args:
            api_key: Soniox API key, if not provided, will look for SONIOX_API_KEY env variable.
            base_url: Base URL for Soniox Speech-to-Text API, default to BASE_URL defined in this
                module.
            http_session: Optional aiohttp.ClientSession to use for requests.
            vad: If passed, enable Voice Activity Detection (VAD) for audio frames.
            params: Additional configuration parameters, such as model, language hints, context and
                speaker diarization.
        """
        super().__init__(capabilities=stt.STTCapabilities(streaming=True, interim_results=True))

        self._api_key = api_key or os.getenv("SONIOX_API_KEY")
        self._base_url = base_url
        self._http_session = http_session
        self._vad_stream = vad.stream() if vad else None
        self._params = params or STTOptions()

    @property
    def model(self) -> str:
        return "unknown"

    @property
    def provider(self) -> str:
        return "Soniox"

    async def _recognize_impl(
        self,
        buffer: utils.AudioBuffer,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions,
    ) -> stt.SpeechEvent:
        """Raise error since single-frame recognition is not supported
        by Soniox Speech-to-Text API."""
        raise NotImplementedError(
            "Soniox Speech-to-Text API does not support single frame recognition"
        )

    def stream(
        self,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> SpeechStream:
        """Return a new LiveKit streaming speech-to-text session."""
        return SpeechStream(
            stt=self,
            conn_options=conn_options,
        )

Speech-to-Text service using Soniox Speech-to-Text API.

This service connects to Soniox Speech-to-Text API for real-time transcription with support for multiple languages, custom context, speaker diarization, and more.

For complete API documentation, see: https://soniox.com/docs/speech-to-text/api-reference/websocket-api

Initialize instance of Soniox Speech-to-Text API service.

Args

api_key: Soniox API key, if not provided, will look for SONIOX_API_KEY env variable.
base_url: Base URL for Soniox Speech-to-Text API, default to BASE_URL defined in this module.
http_session: Optional aiohttp.ClientSession to use for requests.
vad: If passed, enable Voice Activity Detection (VAD) for audio frames.
params: Additional configuration parameters, such as model, language hints, context and speaker diarization.

Ancestors

livekit.agents.stt.stt.STT
abc.ABC
EventEmitter
typing.Generic

Instance variables

prop model : str

Expand source code

@property
def model(self) -> str:
    return "unknown"

Get the model name/identifier for this STT instance.

Returns

The model name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their model information.

prop provider : str

Expand source code

@property
def provider(self) -> str:
    return "Soniox"

Get the provider name/identifier for this STT instance.

Returns

The provider name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their provider information.

Methods

def stream(self, *, language: NotGivenOr[str] = NOT_GIVEN, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.soniox.stt.SpeechStream

Expand source code

def stream(
    self,
    *,
    language: NotGivenOr[str] = NOT_GIVEN,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> SpeechStream:
    """Return a new LiveKit streaming speech-to-text session."""
    return SpeechStream(
        stt=self,
        conn_options=conn_options,
    )

Return a new LiveKit streaming speech-to-text session.

Inherited members

EventEmitter:
- emit
- off
- on
- once

class STTOptions (model: str | None = 'stt-rt-preview', language_hints: list[str] | None = None, context: ContextObject | str | None = None, num_channels: int = 1, sample_rate: int = 16000, enable_speaker_diarization: bool = False, enable_language_identification: bool = True, client_reference_id: str | None = None)

Expand source code

@dataclass
class STTOptions:
    """Configuration options for Soniox Speech-to-Text service."""

    model: str | None = "stt-rt-preview"

    language_hints: list[str] | None = None
    context: ContextObject | str | None = None

    num_channels: int = 1
    sample_rate: int = 16000

    enable_speaker_diarization: bool = False
    enable_language_identification: bool = True

    client_reference_id: str | None = None

Configuration options for Soniox Speech-to-Text service.

Instance variables

var client_reference_id : str | None
var context : livekit.plugins.soniox.stt.ContextObject | str | None
var enable_language_identification : bool
var enable_speaker_diarization : bool
var language_hints : list[str] | None
var model : str | None
var num_channels : int
var sample_rate : int