Module livekit.plugins.soniox

Soniox plugin for LiveKit Agents

See https://docs.livekit.io/agents/integrations/stt/soniox/ for more information.

Classes

class ContextGeneralItem (key: str, value: str)
Expand source code
@dataclass
class ContextGeneralItem:
    key: str
    value: str

ContextGeneralItem(key: 'str', value: 'str')

Instance variables

var key : str
var value : str
class ContextObject (general: list[ContextGeneralItem] | None = None,
text: str | None = None,
terms: list[str] | None = None,
translation_terms: list[ContextTranslationTerm] | None = None)
Expand source code
@dataclass
class ContextObject:
    """Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.

    Learn more about context in the documentation:
    https://soniox.com/docs/stt/concepts/context
    """

    general: list[ContextGeneralItem] | None = None
    text: str | None = None
    terms: list[str] | None = None
    translation_terms: list[ContextTranslationTerm] | None = None

Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.

Learn more about context in the documentation: https://soniox.com/docs/stt/concepts/context

Instance variables

var general : list[livekit.plugins.soniox.stt.ContextGeneralItem] | None
var terms : list[str] | None
var text : str | None
var translation_terms : list[livekit.plugins.soniox.stt.ContextTranslationTerm] | None
class ContextTranslationTerm (source: str, target: str)
Expand source code
@dataclass
class ContextTranslationTerm:
    source: str
    target: str

ContextTranslationTerm(source: 'str', target: 'str')

Instance variables

var source : str
var target : str
class STT (*,
api_key: str | None = None,
base_url: str = 'wss://stt-rt.soniox.com/transcribe-websocket',
http_session: aiohttp.ClientSession | None = None,
params: STTOptions | None = None)
Expand source code
class STT(stt.STT):
    """Speech-to-Text service using Soniox Speech-to-Text API.

    This service connects to Soniox Speech-to-Text API for real-time transcription
    with support for multiple languages, custom context, speaker diarization,
    and more.

    For complete API documentation, see: https://soniox.com/docs/stt/api-reference/websocket-api
    """

    def __init__(
        self,
        *,
        api_key: str | None = None,
        base_url: str = BASE_URL,
        http_session: aiohttp.ClientSession | None = None,
        params: STTOptions | None = None,
    ):
        """Initialize instance of Soniox Speech-to-Text API service.

        Args:
            api_key: Soniox API key, if not provided, will look for SONIOX_API_KEY env variable.
            base_url: Base URL for Soniox Speech-to-Text API, default to BASE_URL defined in this
                module.
            http_session: Optional aiohttp.ClientSession to use for requests.
            params: Additional configuration parameters, such as model, language hints, context and
                speaker diarization.
        """
        super().__init__(
            capabilities=stt.STTCapabilities(
                streaming=True, interim_results=True, aligned_transcript=False
            )
        )

        self._api_key = api_key or os.getenv("SONIOX_API_KEY")
        self._base_url = base_url
        self._http_session = http_session
        self._params = params or STTOptions()

    @property
    def model(self) -> str:
        return self._params.model

    @property
    def provider(self) -> str:
        return "Soniox"

    async def _recognize_impl(
        self,
        buffer: utils.AudioBuffer,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions,
    ) -> stt.SpeechEvent:
        """Raise error since single-frame recognition is not supported
        by Soniox Speech-to-Text API."""
        raise NotImplementedError(
            "Soniox Speech-to-Text API does not support single frame recognition"
        )

    def stream(
        self,
        *,
        language: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> SpeechStream:
        """Return a new LiveKit streaming speech-to-text session."""
        return SpeechStream(
            stt=self,
            conn_options=conn_options,
        )

Speech-to-Text service using Soniox Speech-to-Text API.

This service connects to Soniox Speech-to-Text API for real-time transcription with support for multiple languages, custom context, speaker diarization, and more.

For complete API documentation, see: https://soniox.com/docs/stt/api-reference/websocket-api

Initialize instance of Soniox Speech-to-Text API service.

Args

api_key
Soniox API key, if not provided, will look for SONIOX_API_KEY env variable.
base_url
Base URL for Soniox Speech-to-Text API, default to BASE_URL defined in this module.
http_session
Optional aiohttp.ClientSession to use for requests.
params
Additional configuration parameters, such as model, language hints, context and speaker diarization.

Ancestors

  • livekit.agents.stt.stt.STT
  • abc.ABC
  • EventEmitter
  • typing.Generic

Instance variables

prop model : str
Expand source code
@property
def model(self) -> str:
    return self._params.model

Get the model name/identifier for this STT instance.

Returns

The model name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their model information.

prop provider : str
Expand source code
@property
def provider(self) -> str:
    return "Soniox"

Get the provider name/identifier for this STT instance.

Returns

The provider name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their provider information.

Methods

def stream(self,
*,
language: NotGivenOr[str] = NOT_GIVEN,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.soniox.stt.SpeechStream
Expand source code
def stream(
    self,
    *,
    language: NotGivenOr[str] = NOT_GIVEN,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> SpeechStream:
    """Return a new LiveKit streaming speech-to-text session."""
    return SpeechStream(
        stt=self,
        conn_options=conn_options,
    )

Return a new LiveKit streaming speech-to-text session.

Inherited members

class STTOptions (model: str = 'stt-rt-v3',
language_hints: list[str] | None = None,
language_hints_strict: bool = False,
context: ContextObject | str | None = None,
num_channels: int = 1,
sample_rate: int = 16000,
enable_speaker_diarization: bool = False,
enable_language_identification: bool = True,
client_reference_id: str | None = None)
Expand source code
@dataclass
class STTOptions:
    """Configuration options for Soniox Speech-to-Text service."""

    model: str = "stt-rt-v3"

    language_hints: list[str] | None = None
    language_hints_strict: bool = False
    context: ContextObject | str | None = None

    num_channels: int = 1
    sample_rate: int = 16000

    enable_speaker_diarization: bool = False
    enable_language_identification: bool = True

    client_reference_id: str | None = None

Configuration options for Soniox Speech-to-Text service.

Instance variables

var client_reference_id : str | None
var context : livekit.plugins.soniox.stt.ContextObject | str | None
var enable_language_identification : bool
var enable_speaker_diarization : bool
var language_hints : list[str] | None
var language_hints_strict : bool
var model : str
var num_channels : int
var sample_rate : int