Module `livekit.agents.tokenize.utils`

Functions

def replace_words(*, text: str | AsyncIterable[str], replacements: dict[str, str]) ‑> str | AsyncIterable[str]

Expand source code

def replace_words(
    *,
    text: str | AsyncIterable[str],
    replacements: dict[str, str],
) -> str | AsyncIterable[str]:
    """
    Replace words in the given (async) text. The replacements are case-insensitive and the
    replacement will keep the case of the original word.
    Args:
        text: text to replace words in
        words: dictionary of words to replace
    """

    replacements = {k.lower(): v for k, v in replacements.items()}

    def _process_words(text, words):
        offset = 0
        processed_index = 0
        for word, start_index, end_index in words:
            no_punctuation = word.rstrip("".join(tokenizer.PUNCTUATIONS))
            punctuation_off = len(word) - len(no_punctuation)
            replacement = replacements.get(no_punctuation.lower())
            if replacement:
                text = (
                    text[: start_index + offset]
                    + replacement
                    + text[end_index + offset - punctuation_off :]
                )
                offset += len(replacement) - len(word) + punctuation_off

            processed_index = end_index + offset

        return text, processed_index

    if isinstance(text, str):
        words = _basic_word.split_words(text, ignore_punctuation=False)
        text, _ = _process_words(text, words)
        return text
    else:

        async def _replace_words():
            buffer = ""
            async for chunk in text:
                buffer += chunk
                words = _basic_word.split_words(buffer, ignore_punctuation=False)

                if len(words) <= 1:
                    continue

                buffer, procesed_index = _process_words(buffer, words[:-1])
                yield buffer[:procesed_index]
                buffer = buffer[procesed_index:]

            if buffer:
                words = _basic_word.split_words(buffer, ignore_punctuation=False)
                buffer, _ = _process_words(buffer, words)
                yield buffer

        return _replace_words()

Replace words in the given (async) text. The replacements are case-insensitive and the replacement will keep the case of the original word.

Args

text: text to replace words in
words: dictionary of words to replace