Module livekit.plugins.soniox
Soniox plugin for LiveKit Agents
See https://docs.livekit.io/agents/integrations/stt/soniox/ for more information.
Classes
class STT (*,
api_key: str | None = None,
base_url: str = 'wss://stt-rt.soniox.com/transcribe-websocket',
http_session: aiohttp.ClientSession | None = None,
vad: vad.VAD | None = None,
params: STTOptions | None = None)-
Expand source code
class STT(stt.STT): """Speech-to-Text service using Soniox Speech-to-Text API. This service connects to Soniox Speech-to-Text API for real-time transcription with support for multiple languages, custom context, speaker diarization, and more. For complete API documentation, see: https://soniox.com/docs/speech-to-text/api-reference/websocket-api """ def __init__( self, *, api_key: str | None = None, base_url: str = BASE_URL, http_session: aiohttp.ClientSession | None = None, vad: vad.VAD | None = None, params: STTOptions | None = None, ): """Initialize instance of Soniox Speech-to-Text API service. Args: api_key: Soniox API key, if not provided, will look for SONIOX_API_KEY env variable. base_url: Base URL for Soniox Speech-to-Text API, default to BASE_URL defined in this module. http_session: Optional aiohttp.ClientSession to use for requests. vad: If passed, enable Voice Activity Detection (VAD) for audio frames. params: Additional configuration parameters, such as model, language hints, context and speaker diarization. """ super().__init__(capabilities=stt.STTCapabilities(streaming=True, interim_results=True)) self._api_key = api_key or os.getenv("SONIOX_API_KEY") self._base_url = base_url self._http_session = http_session self._vad_stream = vad.stream() if vad else None self._params = params or STTOptions() async def _recognize_impl( self, buffer: utils.AudioBuffer, *, language: NotGivenOr[str] = NOT_GIVEN, conn_options: APIConnectOptions, ) -> stt.SpeechEvent: """Raise error since single-frame recognition is not supported by Soniox Speech-to-Text API.""" raise NotImplementedError( "Soniox Speech-to-Text API does not support single frame recognition" ) def stream( self, *, language: NotGivenOr[str] = NOT_GIVEN, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> SpeechStream: """Return a new LiveKit streaming speech-to-text session.""" return SpeechStream( stt=self, conn_options=conn_options, )
Speech-to-Text service using Soniox Speech-to-Text API.
This service connects to Soniox Speech-to-Text API for real-time transcription with support for multiple languages, custom context, speaker diarization, and more.
For complete API documentation, see: https://soniox.com/docs/speech-to-text/api-reference/websocket-api
Initialize instance of Soniox Speech-to-Text API service.
Args
api_key
- Soniox API key, if not provided, will look for SONIOX_API_KEY env variable.
base_url
- Base URL for Soniox Speech-to-Text API, default to BASE_URL defined in this module.
http_session
- Optional aiohttp.ClientSession to use for requests.
vad
- If passed, enable Voice Activity Detection (VAD) for audio frames.
params
- Additional configuration parameters, such as model, language hints, context and speaker diarization.
Ancestors
- livekit.agents.stt.stt.STT
- abc.ABC
- EventEmitter
- typing.Generic
Methods
def stream(self,
*,
language: NotGivenOr[str] = NOT_GIVEN,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.soniox.stt.SpeechStream-
Expand source code
def stream( self, *, language: NotGivenOr[str] = NOT_GIVEN, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> SpeechStream: """Return a new LiveKit streaming speech-to-text session.""" return SpeechStream( stt=self, conn_options=conn_options, )
Return a new LiveKit streaming speech-to-text session.
Inherited members
class STTOptions (model: str | None = 'stt-rt-preview',
language_hints: list[str] | None = None,
context: str | None = None,
num_channels: int = 1,
sample_rate: int = 16000,
enable_language_identification: bool = True,
enable_non_final_tokens: bool = True,
max_non_final_tokens_duration_ms: int | None = None,
client_reference_id: str | None = None)-
Expand source code
@dataclass class STTOptions: """Configuration options for Soniox Speech-to-Text service.""" model: str | None = "stt-rt-preview" language_hints: list[str] | None = None context: str | None = None num_channels: int = 1 sample_rate: int = 16000 enable_language_identification: bool = True enable_non_final_tokens: bool = True max_non_final_tokens_duration_ms: int | None = None client_reference_id: str | None = None
Configuration options for Soniox Speech-to-Text service.
Instance variables
var client_reference_id : str | None
var context : str | None
var enable_language_identification : bool
var enable_non_final_tokens : bool
var language_hints : list[str] | None
var max_non_final_tokens_duration_ms : int | None
var model : str | None
var num_channels : int
var sample_rate : int