Module livekit.plugins.clova
Clova plugin for LiveKit Agents
See https://docs.livekit.io/agents/integrations/stt/clova/ for more information.
Classes
class STT (*,
language: ClovaSttLanguages | str = 'en-US',
secret: NotGivenOr[str] = NOT_GIVEN,
invoke_url: NotGivenOr[str] = NOT_GIVEN,
http_session: aiohttp.ClientSession | None = None,
threshold: float = 0.5)-
Expand source code
class STT(stt.STT): def __init__( self, *, language: ClovaSttLanguages | str = "en-US", secret: NotGivenOr[str] = NOT_GIVEN, invoke_url: NotGivenOr[str] = NOT_GIVEN, http_session: aiohttp.ClientSession | None = None, threshold: float = 0.5, ): """ Create a new instance of Clova STT. ``secret`` and ``invoke_url`` must be set, either using arguments or by setting the ``CLOVA_STT_SECRET_KEY`` and ``CLOVA_STT_INVOKE_URL`` environmental variables, respectively. """ super().__init__(capabilities=STTCapabilities(streaming=False, interim_results=True)) self._secret = secret if is_given(secret) else os.environ.get("CLOVA_STT_SECRET_KEY") self._invoke_url = ( invoke_url if is_given(invoke_url) else os.environ.get("CLOVA_STT_INVOKE_URL") ) self._language = clova_languages_mapping.get(language, language) self._session = http_session if self._secret is None: raise ValueError( "Clova STT secret key is required. It should be set with env CLOVA_STT_SECRET_KEY" ) self.threshold = threshold def update_options(self, *, language: NotGivenOr[str] = NOT_GIVEN) -> None: if is_given(language): self._language = clova_languages_mapping.get(language, language) def _ensure_session(self) -> aiohttp.ClientSession: if not self._session: self._session = utils.http_context.http_session() return self._session def url_builder(self, process_method: ClovaSpeechAPIType = "recognizer/upload") -> str: return f"{self._invoke_url}/{process_method}" async def _recognize_impl( self, buffer: AudioBuffer, *, language: NotGivenOr[ClovaSttLanguages | str] = NOT_GIVEN, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> stt.SpeechEvent: try: url = self.url_builder() if is_given(language): self._language = clova_languages_mapping.get(language, language) payload = json.dumps({"language": self._language, "completion": "sync"}) buffer = merge_frames(buffer) buffer_bytes = resample_audio( buffer.data.tobytes(), buffer.sample_rate, CLOVA_INPUT_SAMPLE_RATE ) io_buffer = io.BytesIO() with wave.open(io_buffer, "wb") as wav: wav.setnchannels(1) wav.setsampwidth(2) # 16-bit wav.setframerate(CLOVA_INPUT_SAMPLE_RATE) wav.writeframes(buffer_bytes) io_buffer.seek(0) headers = {"X-CLOVASPEECH-API-KEY": self._secret} form_data = aiohttp.FormData() form_data.add_field("params", payload) form_data.add_field("media", io_buffer, filename="audio.wav", content_type="audio/wav") start = time.time() async with self._ensure_session().post( url, data=form_data, headers=headers, timeout=aiohttp.ClientTimeout( total=30, sock_connect=conn_options.timeout, ), ) as response: response_data = await response.json() end = time.time() text = response_data.get("text") confidence = response_data.get("confidence") logger.info(f"{text} | {confidence} | total_seconds: {end - start}") if not text or "error" in response_data: raise ValueError(f"Unexpected response: {response_data}") if confidence < self.threshold: raise ValueError( f"Confidence: {confidence} is bellow threshold {self.threshold}. Skipping." ) logger.info(f"final event: {response_data}") return self._transcription_to_speech_event(text=text) except asyncio.TimeoutError as e: raise APITimeoutError() from e except aiohttp.ClientResponseError as e: raise APIStatusError( message=e.message, status_code=e.status, request_id=None, body=None, ) from e def _transcription_to_speech_event( self, text: str, event_type: SpeechEventType = stt.SpeechEventType.INTERIM_TRANSCRIPT, ) -> stt.SpeechEvent: return stt.SpeechEvent( type=event_type, alternatives=[stt.SpeechData(text=text, language=self._language)], )
Helper class that provides a standard way to create an ABC using inheritance.
Create a new instance of Clova STT.
secret
andinvoke_url
must be set, either using arguments or by setting theCLOVA_STT_SECRET_KEY
andCLOVA_STT_INVOKE_URL
environmental variables, respectively.Ancestors
- livekit.agents.stt.stt.STT
- abc.ABC
- EventEmitter
- typing.Generic
Methods
def update_options(self, *, language: NotGivenOr[str] = NOT_GIVEN) ‑> None
-
Expand source code
def update_options(self, *, language: NotGivenOr[str] = NOT_GIVEN) -> None: if is_given(language): self._language = clova_languages_mapping.get(language, language)
def url_builder(self, process_method: ClovaSpeechAPIType = 'recognizer/upload') ‑> str
-
Expand source code
def url_builder(self, process_method: ClovaSpeechAPIType = "recognizer/upload") -> str: return f"{self._invoke_url}/{process_method}"
Inherited members