Module livekit.agents.inference.vad
Classes
class VAD (*,
model: VADModels = 'silero',
min_speech_duration: float = 0.05,
min_silence_duration: float = 0.25,
prefix_padding_duration: float = 0.5,
max_buffered_speech: float = 60.0,
activation_threshold: float = 0.5,
deactivation_threshold: NotGivenOr[float] = NOT_GIVEN)-
Expand source code
class VAD(vad.VAD): """Voice Activity Detection backed by ``livekit-local-inference``. The native model singleton is loaded once at module import (via the pybind11 ``.so`` constructor); each stream allocates its own per-instance LSTM/context state. """ def __init__( self, *, model: VADModels = "silero", min_speech_duration: float = 0.05, min_silence_duration: float = 0.25, prefix_padding_duration: float = 0.5, max_buffered_speech: float = 60.0, activation_threshold: float = 0.5, deactivation_threshold: NotGivenOr[float] = NOT_GIVEN, ) -> None: super().__init__(capabilities=vad.VADCapabilities(update_interval=0.032)) if model != "silero": raise ValueError(f"Unknown VAD model: {model!r}. Supported: 'silero'.") if is_given(deactivation_threshold) and deactivation_threshold <= 0: raise ValueError("deactivation_threshold must be greater than 0") self._model = model self._opts = _VADOptions( min_speech_duration=min_speech_duration, min_silence_duration=min_silence_duration, prefix_padding_duration=prefix_padding_duration, max_buffered_speech=max_buffered_speech, activation_threshold=activation_threshold, deactivation_threshold=deactivation_threshold if is_given(deactivation_threshold) else max(activation_threshold - 0.15, 0.01), ) self._streams: weakref.WeakSet[_VADStream] = weakref.WeakSet() @property def model(self) -> str: return self._model @property def provider(self) -> str: return "livekit-local-inference" def stream(self) -> vad.VADStream: # Each stream owns its own _VADOptions snapshot so that # _VADStream.update_options() can read the prior value of # max_buffered_speech before mutating it. Sharing the dataclass would # let VAD.update_options() mutate the stream's view first, and the # stream would never observe an increase. stream = _VADStream(self, replace(self._opts)) self._streams.add(stream) return stream def update_options( self, *, min_speech_duration: NotGivenOr[float] = NOT_GIVEN, min_silence_duration: NotGivenOr[float] = NOT_GIVEN, prefix_padding_duration: NotGivenOr[float] = NOT_GIVEN, max_buffered_speech: NotGivenOr[float] = NOT_GIVEN, activation_threshold: NotGivenOr[float] = NOT_GIVEN, deactivation_threshold: NotGivenOr[float] = NOT_GIVEN, ) -> None: if is_given(min_speech_duration): self._opts.min_speech_duration = min_speech_duration if is_given(min_silence_duration): self._opts.min_silence_duration = min_silence_duration if is_given(prefix_padding_duration): self._opts.prefix_padding_duration = prefix_padding_duration if is_given(max_buffered_speech): self._opts.max_buffered_speech = max_buffered_speech if is_given(activation_threshold): self._opts.activation_threshold = activation_threshold if is_given(deactivation_threshold): self._opts.deactivation_threshold = deactivation_threshold for stream in self._streams: stream.update_options( min_speech_duration=min_speech_duration, min_silence_duration=min_silence_duration, prefix_padding_duration=prefix_padding_duration, max_buffered_speech=max_buffered_speech, activation_threshold=activation_threshold, deactivation_threshold=deactivation_threshold, ) @property def min_silence_duration(self) -> float | None: return self._opts.min_silence_durationVoice Activity Detection backed by
livekit-local-inference.The native model singleton is loaded once at module import (via the pybind11
.soconstructor); each stream allocates its own per-instance LSTM/context state.Ancestors
- VAD
- abc.ABC
- EventEmitter
- typing.Generic
Instance variables
prop min_silence_duration : float | None-
Expand source code
@property def min_silence_duration(self) -> float | None: return self._opts.min_silence_duration prop model : str-
Expand source code
@property def model(self) -> str: return self._model prop provider : str-
Expand source code
@property def provider(self) -> str: return "livekit-local-inference"
Methods
def stream(self) ‑> VADStream-
Expand source code
def stream(self) -> vad.VADStream: # Each stream owns its own _VADOptions snapshot so that # _VADStream.update_options() can read the prior value of # max_buffered_speech before mutating it. Sharing the dataclass would # let VAD.update_options() mutate the stream's view first, and the # stream would never observe an increase. stream = _VADStream(self, replace(self._opts)) self._streams.add(stream) return stream def update_options(self,
*,
min_speech_duration: NotGivenOr[float] = NOT_GIVEN,
min_silence_duration: NotGivenOr[float] = NOT_GIVEN,
prefix_padding_duration: NotGivenOr[float] = NOT_GIVEN,
max_buffered_speech: NotGivenOr[float] = NOT_GIVEN,
activation_threshold: NotGivenOr[float] = NOT_GIVEN,
deactivation_threshold: NotGivenOr[float] = NOT_GIVEN) ‑> None-
Expand source code
def update_options( self, *, min_speech_duration: NotGivenOr[float] = NOT_GIVEN, min_silence_duration: NotGivenOr[float] = NOT_GIVEN, prefix_padding_duration: NotGivenOr[float] = NOT_GIVEN, max_buffered_speech: NotGivenOr[float] = NOT_GIVEN, activation_threshold: NotGivenOr[float] = NOT_GIVEN, deactivation_threshold: NotGivenOr[float] = NOT_GIVEN, ) -> None: if is_given(min_speech_duration): self._opts.min_speech_duration = min_speech_duration if is_given(min_silence_duration): self._opts.min_silence_duration = min_silence_duration if is_given(prefix_padding_duration): self._opts.prefix_padding_duration = prefix_padding_duration if is_given(max_buffered_speech): self._opts.max_buffered_speech = max_buffered_speech if is_given(activation_threshold): self._opts.activation_threshold = activation_threshold if is_given(deactivation_threshold): self._opts.deactivation_threshold = deactivation_threshold for stream in self._streams: stream.update_options( min_speech_duration=min_speech_duration, min_silence_duration=min_silence_duration, prefix_padding_duration=prefix_padding_duration, max_buffered_speech=max_buffered_speech, activation_threshold=activation_threshold, deactivation_threshold=deactivation_threshold, )
Inherited members