Module livekit.plugins.soniox
Soniox plugin for LiveKit Agents
See https://docs.livekit.io/agents/integrations/stt/soniox/ for more information.
Classes
class ContextGeneralItem (key: str, value: str)-
Expand source code
@dataclass class ContextGeneralItem: key: str value: strContextGeneralItem(key: 'str', value: 'str')
Instance variables
var key : strvar value : str
class ContextObject (general: list[ContextGeneralItem] | None = None,
text: str | None = None,
terms: list[str] | None = None,
translation_terms: list[ContextTranslationTerm] | None = None)-
Expand source code
@dataclass class ContextObject: """Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher. Learn more about context in the documentation: https://soniox.com/docs/stt/concepts/context """ general: list[ContextGeneralItem] | None = None text: str | None = None terms: list[str] | None = None translation_terms: list[ContextTranslationTerm] | None = NoneContext object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.
Learn more about context in the documentation: https://soniox.com/docs/stt/concepts/context
Instance variables
var general : list[livekit.plugins.soniox.stt.ContextGeneralItem] | Nonevar terms : list[str] | Nonevar text : str | Nonevar translation_terms : list[livekit.plugins.soniox.stt.ContextTranslationTerm] | None
class ContextTranslationTerm (source: str, target: str)-
Expand source code
@dataclass class ContextTranslationTerm: source: str target: strContextTranslationTerm(source: 'str', target: 'str')
Instance variables
var source : strvar target : str
class STT (*,
api_key: str | None = None,
base_url: str = 'wss://stt-rt.soniox.com/transcribe-websocket',
http_session: aiohttp.ClientSession | None = None,
params: STTOptions | None = None)-
Expand source code
class STT(stt.STT): """Speech-to-Text service using Soniox Speech-to-Text API. This service connects to Soniox Speech-to-Text API for real-time transcription with support for multiple languages, custom context, speaker diarization, and more. For complete API documentation, see: https://soniox.com/docs/stt/api-reference/websocket-api """ def __init__( self, *, api_key: str | None = None, base_url: str = BASE_URL, http_session: aiohttp.ClientSession | None = None, params: STTOptions | None = None, ): """Initialize instance of Soniox Speech-to-Text API service. Args: api_key: Soniox API key, if not provided, will look for SONIOX_API_KEY env variable. base_url: Base URL for Soniox Speech-to-Text API, default to BASE_URL defined in this module. http_session: Optional aiohttp.ClientSession to use for requests. params: Additional configuration parameters, such as model, language hints, context and speaker diarization. """ params = params or STTOptions() super().__init__( capabilities=stt.STTCapabilities( streaming=True, interim_results=True, aligned_transcript="chunk", offline_recognize=False, diarization=params.enable_speaker_diarization, ) ) self._api_key = api_key or os.getenv("SONIOX_API_KEY") if not self._api_key: raise ValueError("Soniox API key is required. Set SONIOX_API_KEY or pass api_key") self._base_url = base_url self._http_session = http_session self._params = params @property def model(self) -> str: return self._params.model @property def provider(self) -> str: return "Soniox" async def _recognize_impl( self, buffer: utils.AudioBuffer, *, language: NotGivenOr[str] = NOT_GIVEN, conn_options: APIConnectOptions, ) -> stt.SpeechEvent: """Raise error since single-frame recognition is not supported by Soniox Speech-to-Text API.""" raise NotImplementedError( "Soniox Speech-to-Text API does not support single frame recognition" ) def stream( self, *, language: NotGivenOr[str] = NOT_GIVEN, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> SpeechStream: """Return a new LiveKit streaming speech-to-text session.""" return SpeechStream( stt=self, conn_options=conn_options, )Speech-to-Text service using Soniox Speech-to-Text API.
This service connects to Soniox Speech-to-Text API for real-time transcription with support for multiple languages, custom context, speaker diarization, and more.
For complete API documentation, see: https://soniox.com/docs/stt/api-reference/websocket-api
Initialize instance of Soniox Speech-to-Text API service.
Args
api_key- Soniox API key, if not provided, will look for SONIOX_API_KEY env variable.
base_url- Base URL for Soniox Speech-to-Text API, default to BASE_URL defined in this module.
http_session- Optional aiohttp.ClientSession to use for requests.
params- Additional configuration parameters, such as model, language hints, context and speaker diarization.
Ancestors
- livekit.agents.stt.stt.STT
- abc.ABC
- EventEmitter
- typing.Generic
Instance variables
prop model : str-
Expand source code
@property def model(self) -> str: return self._params.modelGet the model name/identifier for this STT instance.
Returns
The model name if available, "unknown" otherwise.
Note
Plugins should override this property to provide their model information.
prop provider : str-
Expand source code
@property def provider(self) -> str: return "Soniox"Get the provider name/identifier for this STT instance.
Returns
The provider name if available, "unknown" otherwise.
Note
Plugins should override this property to provide their provider information.
Methods
def stream(self,
*,
language: NotGivenOr[str] = NOT_GIVEN,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.soniox.stt.SpeechStream-
Expand source code
def stream( self, *, language: NotGivenOr[str] = NOT_GIVEN, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> SpeechStream: """Return a new LiveKit streaming speech-to-text session.""" return SpeechStream( stt=self, conn_options=conn_options, )Return a new LiveKit streaming speech-to-text session.
Inherited members
class STTOptions (model: str = 'stt-rt-v4',
language_hints: list[str] | None = None,
language_hints_strict: bool = False,
context: ContextObject | str | None = None,
num_channels: int = 1,
sample_rate: int = 16000,
enable_speaker_diarization: bool = False,
enable_language_identification: bool = True,
max_endpoint_delay_ms: int = 500,
client_reference_id: str | None = None,
translation: TranslationConfig | None = None)-
Expand source code
@dataclass class STTOptions: """Configuration options for Soniox Speech-to-Text service.""" model: str = "stt-rt-v4" language_hints: list[str] | None = None language_hints_strict: bool = False context: ContextObject | str | None = None num_channels: int = 1 sample_rate: int = 16000 enable_speaker_diarization: bool = False enable_language_identification: bool = True max_endpoint_delay_ms: int = 500 """Maximum delay in milliseconds between speech cessation and endpoint detection. Range: 500–3000. See: https://soniox.com/docs/stt/rt/endpoint-detection""" client_reference_id: str | None = None translation: TranslationConfig | None = None def __post_init__(self) -> None: if not (500 <= self.max_endpoint_delay_ms <= 3000): raise ValueError("max_endpoint_delay_ms must be between 500 and 3000")Configuration options for Soniox Speech-to-Text service.
Instance variables
var client_reference_id : str | Nonevar context : livekit.plugins.soniox.stt.ContextObject | str | Nonevar enable_language_identification : boolvar enable_speaker_diarization : boolvar language_hints : list[str] | Nonevar language_hints_strict : boolvar max_endpoint_delay_ms : int-
Maximum delay in milliseconds between speech cessation and endpoint detection. Range: 500–3000. See: https://soniox.com/docs/stt/rt/endpoint-detection
var model : strvar num_channels : intvar sample_rate : intvar translation : livekit.plugins.soniox.stt.TranslationConfig | None
class TranslationConfig (type: "Literal['one_way', 'two_way']",
target_language: str | None = None,
language_a: str | None = None,
language_b: str | None = None)-
Expand source code
@dataclass class TranslationConfig: """Translation configuration for the Soniox Speech-to-Text API. See: https://soniox.com/docs/stt/api-reference/websocket-api """ type: Literal["one_way", "two_way"] target_language: str | None = None """Target language for one-way translation.""" language_a: str | None = None """First language for two-way translation.""" language_b: str | None = None """Second language for two-way translation.""" def __post_init__(self) -> None: if self.type == "one_way" and not self.target_language: raise ValueError("target_language is required for one_way translation") if self.type == "two_way" and not (self.language_a and self.language_b): raise ValueError("language_a and language_b are both required for two_way translation")Translation configuration for the Soniox Speech-to-Text API.
See: https://soniox.com/docs/stt/api-reference/websocket-api
Instance variables
var language_a : str | None-
First language for two-way translation.
var language_b : str | None-
Second language for two-way translation.
var target_language : str | None-
Target language for one-way translation.
var type : Literal['one_way', 'two_way']