Module livekit.agents.stt.stt
Classes
class STT (*, capabilities: STTCapabilities)
-
Helper class that provides a standard way to create an ABC using inheritance.
Expand source code
class STT(ABC): def __init__(self, *, capabilities: STTCapabilities) -> None: self._capabilities = capabilities @property def capabilities(self) -> STTCapabilities: return self._capabilities @abstractmethod async def recognize( self, buffer: AudioBuffer, *, language: str | None = None ) -> SpeechEvent: pass def stream(self, *, language: str | None = None) -> "SpeechStream": raise NotImplementedError( "streaming is not supported by this STT, please use a different STT or use a StreamAdapter" ) async def aclose(self) -> None: """ Close the STT, and every stream/requests associated with it """ pass
Ancestors
- abc.ABC
Subclasses
- StreamAdapter
- livekit.plugins.azure.stt.STT
- livekit.plugins.deepgram.stt.STT
- livekit.plugins.google.stt.STT
- livekit.plugins.openai.stt.STT
Instance variables
prop capabilities : STTCapabilities
-
Expand source code
@property def capabilities(self) -> STTCapabilities: return self._capabilities
Methods
async def aclose(self) ‑> None
-
Close the STT, and every stream/requests associated with it
async def recognize(self, buffer: AudioBuffer, *, language: str | None = None) ‑> SpeechEvent
def stream(self, *, language: str | None = None) ‑> SpeechStream
class STTCapabilities (streaming: bool, interim_results: bool)
-
STTCapabilities(streaming: 'bool', interim_results: 'bool')
Expand source code
@dataclass class STTCapabilities: streaming: bool interim_results: bool
Class variables
var interim_results : bool
var streaming : bool
class SpeechData (language: str, text: str, start_time: float = 0.0, end_time: float = 0.0, confidence: float = 0.0)
-
SpeechData(language: 'str', text: 'str', start_time: 'float' = 0.0, end_time: 'float' = 0.0, confidence: 'float' = 0.0)
Expand source code
@dataclass class SpeechData: language: str text: str start_time: float = 0.0 end_time: float = 0.0 confidence: float = 0.0 # [0, 1]
Class variables
var confidence : float
var end_time : float
var language : str
var start_time : float
var text : str
class SpeechEvent (type: SpeechEventType, alternatives: List[SpeechData] = <factory>)
-
SpeechEvent(type: 'SpeechEventType', alternatives: 'List[SpeechData]' =
) Expand source code
@dataclass class SpeechEvent: type: SpeechEventType alternatives: List[SpeechData] = field(default_factory=list)
Class variables
var alternatives : List[SpeechData]
var type : SpeechEventType
class SpeechEventType (*args, **kwds)
-
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.str() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.
Expand source code
@unique class SpeechEventType(str, Enum): START_OF_SPEECH = "start_of_speech" """indicate the start of speech if the STT doesn't support this event, this will be emitted as the same time as the first INTERIM_TRANSCRIPT""" INTERIM_TRANSCRIPT = "interim_transcript" """interim transcript, useful for real-time transcription""" FINAL_TRANSCRIPT = "final_transcript" """final transcript, emitted when the STT is confident enough that a certain portion of speech will not change""" END_OF_SPEECH = "end_of_speech" """indicate the end of speech, emitted when the user stops speaking"""
Ancestors
- builtins.str
- enum.Enum
Class variables
var END_OF_SPEECH
-
indicate the end of speech, emitted when the user stops speaking
var FINAL_TRANSCRIPT
-
final transcript, emitted when the STT is confident enough that a certain portion of speech will not change
var INTERIM_TRANSCRIPT
-
interim transcript, useful for real-time transcription
var START_OF_SPEECH
-
indicate the start of speech if the STT doesn't support this event, this will be emitted as the same time as the first INTERIM_TRANSCRIPT
class SpeechStream
-
Helper class that provides a standard way to create an ABC using inheritance.
Expand source code
class SpeechStream(ABC): class _FlushSentinel: pass def __init__(self): self._input_ch = aio.Chan[Union[rtc.AudioFrame, SpeechStream._FlushSentinel]]() self._event_ch = aio.Chan[SpeechEvent]() self._task = asyncio.create_task(self._main_task()) self._task.add_done_callback(lambda _: self._event_ch.close()) @abstractmethod def _main_task(self) -> None: ... def push_frame(self, frame: rtc.AudioFrame) -> None: """Push audio to be recognized""" self._check_input_not_ended() self._check_not_closed() self._input_ch.send_nowait(frame) def flush(self) -> None: """Mark the end of the current segment""" self._check_input_not_ended() self._check_not_closed() self._input_ch.send_nowait(self._FlushSentinel()) def end_input(self) -> None: """Mark the end of input, no more text will be pushed""" self.flush() self._input_ch.close() async def aclose(self) -> None: """Close ths stream immediately""" self._input_ch.close() await aio.gracefully_cancel(self._task) self._event_ch.close() async def __anext__(self) -> SpeechEvent: return await self._event_ch.__anext__() def __aiter__(self) -> AsyncIterator[SpeechEvent]: return self def _check_not_closed(self) -> None: if self._event_ch.closed: cls = type(self) raise RuntimeError(f"{cls.__module__}.{cls.__name__} is closed") def _check_input_not_ended(self) -> None: if self._input_ch.closed: cls = type(self) raise RuntimeError(f"{cls.__module__}.{cls.__name__} input ended")
Ancestors
- abc.ABC
Subclasses
- StreamAdapterWrapper
- livekit.plugins.azure.stt.SpeechStream
- livekit.plugins.deepgram.stt.SpeechStream
- livekit.plugins.google.stt.SpeechStream
Methods
async def aclose(self) ‑> None
-
Close ths stream immediately
def end_input(self) ‑> None
-
Mark the end of input, no more text will be pushed
def flush(self) ‑> None
-
Mark the end of the current segment
def push_frame(self, frame: rtc.AudioFrame) ‑> None
-
Push audio to be recognized