Module `livekit.plugins.clova`

Clova plugin for LiveKit Agents

See https://docs.livekit.io/agents/integrations/stt/clova/ for more information.

Classes

class STT (*, language: ClovaSttLanguages | str = 'en-US', secret: NotGivenOr[str] = NOT_GIVEN, invoke_url: NotGivenOr[str] = NOT_GIVEN, http_session: aiohttp.ClientSession | None = None, threshold: float = 0.5)

Expand source code

class STT(stt.STT):
    def __init__(
        self,
        *,
        language: ClovaSttLanguages | str = "en-US",
        secret: NotGivenOr[str] = NOT_GIVEN,
        invoke_url: NotGivenOr[str] = NOT_GIVEN,
        http_session: aiohttp.ClientSession | None = None,
        threshold: float = 0.5,
    ):
        """
        Create a new instance of Clova STT.

        ``secret`` and ``invoke_url`` must be set, either using arguments or by setting the
        ``CLOVA_STT_SECRET_KEY`` and ``CLOVA_STT_INVOKE_URL`` environmental variables, respectively.
        """

        super().__init__(
            capabilities=STTCapabilities(
                streaming=False, interim_results=True, aligned_transcript=False
            )
        )
        clova_secret = secret if is_given(secret) else os.environ.get("CLOVA_STT_SECRET_KEY")
        self._invoke_url = (
            invoke_url if is_given(invoke_url) else os.environ.get("CLOVA_STT_INVOKE_URL")
        )
        self._language = clova_languages_mapping.get(language, language)
        self._session = http_session
        if clova_secret is None:
            raise ValueError(
                "Clova STT secret key is required. It should be set with env CLOVA_STT_SECRET_KEY"
            )
        self._secret = clova_secret
        self.threshold = threshold

    @property
    def model(self) -> str:
        return "unknown"

    @property
    def provider(self) -> str:
        return "Clova"

    def update_options(self, *, language: NotGivenOr[str] = NOT_GIVEN) -> None:
        if is_given(language):
            self._language = clova_languages_mapping.get(language, language)

    def _ensure_session(self) -> aiohttp.ClientSession:
        if not self._session:
            self._session = utils.http_context.http_session()
        return self._session

    def url_builder(self, process_method: ClovaSpeechAPIType = "recognizer/upload") -> str:
        return f"{self._invoke_url}/{process_method}"

    async def _recognize_impl(
        self,
        buffer: AudioBuffer,
        *,
        language: NotGivenOr[ClovaSttLanguages | str] = NOT_GIVEN,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> stt.SpeechEvent:
        try:
            url = self.url_builder()
            if is_given(language):
                self._language = clova_languages_mapping.get(language, language)
            payload = json.dumps({"language": self._language, "completion": "sync"})

            buffer = merge_frames(buffer)
            buffer_bytes = resample_audio(
                buffer.data.tobytes(), buffer.sample_rate, CLOVA_INPUT_SAMPLE_RATE
            )

            io_buffer = io.BytesIO()
            with wave.open(io_buffer, "wb") as wav:
                wav.setnchannels(1)
                wav.setsampwidth(2)  # 16-bit
                wav.setframerate(CLOVA_INPUT_SAMPLE_RATE)
                wav.writeframes(buffer_bytes)
            io_buffer.seek(0)

            headers = {"X-CLOVASPEECH-API-KEY": self._secret}
            form_data = aiohttp.FormData()
            form_data.add_field("params", payload)
            form_data.add_field("media", io_buffer, filename="audio.wav", content_type="audio/wav")
            start = time.time()
            async with self._ensure_session().post(
                url,
                data=form_data,
                headers=headers,
                timeout=aiohttp.ClientTimeout(
                    total=30,
                    sock_connect=conn_options.timeout,
                ),
            ) as response:
                response_data = await response.json()
                end = time.time()
                text = response_data.get("text")
                confidence = response_data.get("confidence")
                logger.info(f"{text} | {confidence} | total_seconds: {end - start}")
                if not text or "error" in response_data:
                    raise ValueError(f"Unexpected response: {response_data}")
                if confidence < self.threshold:
                    raise ValueError(
                        f"Confidence: {confidence} is bellow threshold {self.threshold}. Skipping."
                    )
                logger.info(f"final event: {response_data}")
                return self._transcription_to_speech_event(text=text)

        except asyncio.TimeoutError as e:
            raise APITimeoutError() from e
        except aiohttp.ClientResponseError as e:
            raise APIStatusError(
                message=e.message,
                status_code=e.status,
                request_id=None,
                body=None,
            ) from e

    def _transcription_to_speech_event(
        self,
        text: str,
        event_type: SpeechEventType = stt.SpeechEventType.INTERIM_TRANSCRIPT,
    ) -> stt.SpeechEvent:
        return stt.SpeechEvent(
            type=event_type,
            alternatives=[stt.SpeechData(text=text, language=self._language)],
        )

Helper class that provides a standard way to create an ABC using inheritance.

Create a new instance of Clova STT.

secret and invoke_url must be set, either using arguments or by setting the CLOVA_STT_SECRET_KEY and CLOVA_STT_INVOKE_URL environmental variables, respectively.

Ancestors

livekit.agents.stt.stt.STT
abc.ABC
EventEmitter
typing.Generic

Instance variables

prop model : str

Expand source code

@property
def model(self) -> str:
    return "unknown"

Get the model name/identifier for this STT instance.

Returns

The model name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their model information.

prop provider : str

Expand source code

@property
def provider(self) -> str:
    return "Clova"

Get the provider name/identifier for this STT instance.

Returns

The provider name if available, "unknown" otherwise.

Note

Plugins should override this property to provide their provider information.

Methods

def update_options(self, *, language: NotGivenOr[str] = NOT_GIVEN) ‑> None

Expand source code

def update_options(self, *, language: NotGivenOr[str] = NOT_GIVEN) -> None:
    if is_given(language):
        self._language = clova_languages_mapping.get(language, language)

def url_builder(self, process_method: ClovaSpeechAPIType = 'recognizer/upload') ‑> str

Expand source code

def url_builder(self, process_method: ClovaSpeechAPIType = "recognizer/upload") -> str:
    return f"{self._invoke_url}/{process_method}"

Inherited members

EventEmitter:
- emit
- off
- on
- once