Module livekit.plugins.aws.stt
Classes
class STT (*,
speech_region: str = 'us-east-1',
api_key: str | None = None,
api_secret: str | None = None,
sample_rate: int = 48000,
language: str = 'en-US',
encoding: str = 'pcm',
vocabulary_name: Optional[str] = None,
session_id: Optional[str] = None,
vocab_filter_method: Optional[str] = None,
vocab_filter_name: Optional[str] = None,
show_speaker_label: Optional[bool] = None,
enable_channel_identification: Optional[bool] = None,
number_of_channels: Optional[int] = None,
enable_partial_results_stabilization: Optional[bool] = None,
partial_results_stability: Optional[str] = None,
language_model_name: Optional[str] = None)-
Expand source code
class STT(stt.STT): def __init__( self, *, speech_region: str = "us-east-1", api_key: str | None = None, api_secret: str | None = None, sample_rate: int = 48000, language: str = "en-US", encoding: str = "pcm", vocabulary_name: Optional[str] = None, session_id: Optional[str] = None, vocab_filter_method: Optional[str] = None, vocab_filter_name: Optional[str] = None, show_speaker_label: Optional[bool] = None, enable_channel_identification: Optional[bool] = None, number_of_channels: Optional[int] = None, enable_partial_results_stabilization: Optional[bool] = None, partial_results_stability: Optional[str] = None, language_model_name: Optional[str] = None, ): super().__init__( capabilities=stt.STTCapabilities(streaming=True, interim_results=True) ) self._api_key, self._api_secret = _get_aws_credentials( api_key, api_secret, speech_region ) self._config = STTOptions( speech_region=speech_region, language=language, sample_rate=sample_rate, encoding=encoding, vocabulary_name=vocabulary_name, session_id=session_id, vocab_filter_method=vocab_filter_method, vocab_filter_name=vocab_filter_name, show_speaker_label=show_speaker_label, enable_channel_identification=enable_channel_identification, number_of_channels=number_of_channels, enable_partial_results_stabilization=enable_partial_results_stabilization, partial_results_stability=partial_results_stability, language_model_name=language_model_name, ) async def _recognize_impl( self, buffer: utils.AudioBuffer, *, language: str | None, conn_options: APIConnectOptions, ) -> stt.SpeechEvent: raise NotImplementedError( "Amazon Transcribe does not support single frame recognition" ) def stream( self, *, language: str | None = None, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> "SpeechStream": return SpeechStream( stt=self, conn_options=conn_options, opts=self._config, )Helper class that provides a standard way to create an ABC using inheritance.
Ancestors
- STT
- abc.ABC
- EventEmitter
- typing.Generic
Methods
def stream(self,
*,
language: str | None = None,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> SpeechStream-
Expand source code
def stream( self, *, language: str | None = None, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> "SpeechStream": return SpeechStream( stt=self, conn_options=conn_options, opts=self._config, )
Inherited members
class STTOptions (speech_region: str,
sample_rate: int,
language: str,
encoding: str,
vocabulary_name: Optional[str],
session_id: Optional[str],
vocab_filter_method: Optional[str],
vocab_filter_name: Optional[str],
show_speaker_label: Optional[bool],
enable_channel_identification: Optional[bool],
number_of_channels: Optional[int],
enable_partial_results_stabilization: Optional[bool],
partial_results_stability: Optional[str],
language_model_name: Optional[str])-
Expand source code
@dataclass class STTOptions: speech_region: str sample_rate: int language: str encoding: str vocabulary_name: Optional[str] session_id: Optional[str] vocab_filter_method: Optional[str] vocab_filter_name: Optional[str] show_speaker_label: Optional[bool] enable_channel_identification: Optional[bool] number_of_channels: Optional[int] enable_partial_results_stabilization: Optional[bool] partial_results_stability: Optional[str] language_model_name: Optional[str]STTOptions(speech_region: 'str', sample_rate: 'int', language: 'str', encoding: 'str', vocabulary_name: 'Optional[str]', session_id: 'Optional[str]', vocab_filter_method: 'Optional[str]', vocab_filter_name: 'Optional[str]', show_speaker_label: 'Optional[bool]', enable_channel_identification: 'Optional[bool]', number_of_channels: 'Optional[int]', enable_partial_results_stabilization: 'Optional[bool]', partial_results_stability: 'Optional[str]', language_model_name: 'Optional[str]')
Instance variables
var enable_channel_identification : bool | Nonevar enable_partial_results_stabilization : bool | Nonevar encoding : strvar language : strvar language_model_name : str | Nonevar number_of_channels : int | Nonevar partial_results_stability : str | Nonevar sample_rate : intvar session_id : str | Nonevar show_speaker_label : bool | Nonevar speech_region : strvar vocab_filter_method : str | Nonevar vocab_filter_name : str | Nonevar vocabulary_name : str | None
class SpeechStream (stt: STT,
opts: STTOptions,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0))-
Expand source code
class SpeechStream(stt.SpeechStream): def __init__( self, stt: STT, opts: STTOptions, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> None: super().__init__( stt=stt, conn_options=conn_options, sample_rate=opts.sample_rate ) self._opts = opts self._client = TranscribeStreamingClient(region=self._opts.speech_region) async def _run(self) -> None: stream = await self._client.start_stream_transcription( language_code=self._opts.language, media_sample_rate_hz=self._opts.sample_rate, media_encoding=self._opts.encoding, vocabulary_name=self._opts.vocabulary_name, session_id=self._opts.session_id, vocab_filter_method=self._opts.vocab_filter_method, vocab_filter_name=self._opts.vocab_filter_name, show_speaker_label=self._opts.show_speaker_label, enable_channel_identification=self._opts.enable_channel_identification, number_of_channels=self._opts.number_of_channels, enable_partial_results_stabilization=self._opts.enable_partial_results_stabilization, partial_results_stability=self._opts.partial_results_stability, language_model_name=self._opts.language_model_name, ) @utils.log_exceptions(logger=logger) async def input_generator(): async for frame in self._input_ch: if isinstance(frame, rtc.AudioFrame): await stream.input_stream.send_audio_event( audio_chunk=frame.data.tobytes() ) await stream.input_stream.end_stream() @utils.log_exceptions(logger=logger) async def handle_transcript_events(): async for event in stream.output_stream: if isinstance(event, TranscriptEvent): self._process_transcript_event(event) tasks = [ asyncio.create_task(input_generator()), asyncio.create_task(handle_transcript_events()), ] try: await asyncio.gather(*tasks) finally: await utils.aio.gracefully_cancel(*tasks) def _process_transcript_event(self, transcript_event: TranscriptEvent): stream = transcript_event.transcript.results for resp in stream: if resp.start_time and resp.start_time == 0.0: self._event_ch.send_nowait( stt.SpeechEvent(type=stt.SpeechEventType.START_OF_SPEECH) ) if resp.end_time and resp.end_time > 0.0: if resp.is_partial: self._event_ch.send_nowait( stt.SpeechEvent( type=stt.SpeechEventType.INTERIM_TRANSCRIPT, alternatives=[ _streaming_recognize_response_to_speech_data(resp) ], ) ) else: self._event_ch.send_nowait( stt.SpeechEvent( type=stt.SpeechEventType.FINAL_TRANSCRIPT, alternatives=[ _streaming_recognize_response_to_speech_data(resp) ], ) ) if not resp.is_partial: self._event_ch.send_nowait( stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH) )Helper class that provides a standard way to create an ABC using inheritance.
Args: sample_rate : int or None, optional The desired sample rate for the audio input. If specified, the audio input will be automatically resampled to match the given sample rate before being processed for Speech-to-Text. If not provided (None), the input will retain its original sample rate.
Ancestors
- RecognizeStream
- abc.ABC
Inherited members