Module livekit.plugins.soniox

Soniox plugin for LiveKit Agents

See https://docs.livekit.io/agents/integrations/stt/soniox/ for more information.

Classes

class ContextGeneralItem (key: str, value: str)
Expand source code
@dataclass
class ContextGeneralItem:
    key: str
    value: str

ContextGeneralItem(key: 'str', value: 'str')

Instance variables

var key : str
var value : str
class ContextObject (general: list[ContextGeneralItem] | None = None,
text: str | None = None,
terms: list[str] | None = None,
translation_terms: list[ContextTranslationTerm] | None = None)
Expand source code
@dataclass
class ContextObject:
    """Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.

    Learn more about context in the documentation:
    https://soniox.com/docs/stt/concepts/context
    """

    general: list[ContextGeneralItem] | None = None
    text: str | None = None
    terms: list[str] | None = None
    translation_terms: list[ContextTranslationTerm] | None = None

Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.

Learn more about context in the documentation: https://soniox.com/docs/stt/concepts/context

Instance variables

var general : list[livekit.plugins.soniox.stt.ContextGeneralItem] | None
var terms : list[str] | None
var text : str | None
var translation_terms : list[livekit.plugins.soniox.stt.ContextTranslationTerm] | None
class ContextTranslationTerm (source: str, target: str)
Expand source code
@dataclass
class ContextTranslationTerm:
    source: str
    target: str

ContextTranslationTerm(source: 'str', target: 'str')

Instance variables

var source : str
var target : str
class STT (*,
api_key: str | None = None,
base_url: str = 'wss://stt-rt.soniox.com/transcribe-websocket',
http_session: aiohttp.ClientSession | None = None,
vad: vad.VAD | None = None,
params: STTOptions | None = None)
Expand source code
class STT(stt.STT):
    """Speech-to-Text service using Soniox Speech-to-Text API.

    This service connects to Soniox Speech-to-Text API for real-time transcription
    with support for multiple languages, custom context, speaker diarization,
    and more.

    For complete API documentation, see: https://soniox.com/docs/speech-to-text/api-reference/websocket-api
    """

    def __init__(
        self,
        *,
        api_key: str | None = None,
        base_url: str = BASE_URL,
        http_session: aiohttp.ClientSession | None = None,
        vad: vad.VAD | None = None,
        params: STTOptions | None = None,
    ):
        """Initialize instance of Soniox Speech-to-Text API service.

        Args:
            api_key: Soniox API key, if not provided, will look for SONIOX_API_KEY env variable.
            base_url: Base URL for Soniox Speech-to-Text API, default to BASE_URL defined in this
                module.
            http_session: Optional aiohttp.ClientSession to use for requests.
            vad: If passed, enable Voice Activity Detection (VAD) for audio frames.
            params: Additional configuration parameters, such as model, language hints, context and
                speaker diarization.
        """
        super().__init__(capabilities=stt.STTCapabilities(streaming=True, interim_results=True))

        self._api_key = api_key or os.getenv("SONIOX_API_KEY")
        self._base_url = base_url
        self._http_session = http_session
        self._vad_stream = vad.stream() if vad else None
        self._params = params or STTOptions()

    @property
    def model(self) -> str:
        return "unknown"

    @property
    def provider(self) -> str:
        return "Soniox"

    async def _recognize_impl(
        self,
        buffer: utils.AudioBuffer,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions,
    ) -> stt.SpeechEvent:
        """Raise error since single-frame recognition is not supported
        by Soniox Speech-to-Text API."""
        raise NotImplementedError(
            "Soniox Speech-to-Text API does not support single frame recognition"
        )

    def stream(
        self,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> SpeechStream:
        """Return a new LiveKit streaming speech-to-text session."""
        return SpeechStream(
            stt=self,
            conn_options=conn_options,
        )

Speech-to-Text service using Soniox Speech-to-Text API.

This service connects to Soniox Speech-to-Text API for real-time transcription with support for multiple languages, custom context, speaker diarization, and more.

For complete API documentation, see: https://soniox.com/docs/speech-to-text/api-reference/websocket-api

Initialize instance of Soniox Speech-to-Text API service.

Args

api_key
Soniox API key, if not provided, will look for SONIOX_API_KEY env variable.
base_url
Base URL for Soniox Speech-to-Text API, default to BASE_URL defined in this module.
http_session
Optional aiohttp.ClientSession to use for requests.
vad
If passed, enable Voice Activity Detection (VAD) for audio frames.
params
Additional configuration parameters, such as model, language hints, context and speaker diarization.

Ancestors

  • livekit.agents.stt.stt.STT
  • abc.ABC
  • EventEmitter
  • typing.Generic

Instance variables

prop model : str
Expand source code
@property
def model(self) -> str:
    return "unknown"

Get the model name/identifier for this STT instance.

Returns

The model name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their model information.

prop provider : str
Expand source code
@property
def provider(self) -> str:
    return "Soniox"

Get the provider name/identifier for this STT instance.

Returns

The provider name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their provider information.

Methods

def stream(self,
*,
language: NotGivenOr[str] = NOT_GIVEN,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.soniox.stt.SpeechStream
Expand source code
def stream(
    self,
    *,
    language: NotGivenOr[str] = NOT_GIVEN,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> SpeechStream:
    """Return a new LiveKit streaming speech-to-text session."""
    return SpeechStream(
        stt=self,
        conn_options=conn_options,
    )

Return a new LiveKit streaming speech-to-text session.

Inherited members

class STTOptions (model: str | None = 'stt-rt-preview',
language_hints: list[str] | None = None,
context: ContextObject | str | None = None,
num_channels: int = 1,
sample_rate: int = 16000,
enable_speaker_diarization: bool = False,
enable_language_identification: bool = True,
client_reference_id: str | None = None)
Expand source code
@dataclass
class STTOptions:
    """Configuration options for Soniox Speech-to-Text service."""

    model: str | None = "stt-rt-preview"

    language_hints: list[str] | None = None
    context: ContextObject | str | None = None

    num_channels: int = 1
    sample_rate: int = 16000

    enable_speaker_diarization: bool = False
    enable_language_identification: bool = True

    client_reference_id: str | None = None

Configuration options for Soniox Speech-to-Text service.

Instance variables

var client_reference_id : str | None
var context : livekit.plugins.soniox.stt.ContextObject | str | None
var enable_language_identification : bool
var enable_speaker_diarization : bool
var language_hints : list[str] | None
var model : str | None
var num_channels : int
var sample_rate : int