Module livekit.agents.tokenize

Sub-modules

livekit.agents.tokenize.basic
livekit.agents.tokenize.token_stream
livekit.agents.tokenize.tokenizer
livekit.agents.tokenize.utils

Classes

class BufferedSentenceStream (*, tokenizer: TokenizeCallable, min_token_len: int, min_ctx_len: int)

Helper class that provides a standard way to create an ABC using inheritance.

Expand source code
class BufferedSentenceStream(BufferedTokenStream, SentenceStream):
    def __init__(
        self,
        *,
        tokenizer: TokenizeCallable,
        min_token_len: int,
        min_ctx_len: int,
    ) -> None:
        super().__init__(
            tokenize_fnc=tokenizer,
            min_token_len=min_token_len,
            min_ctx_len=min_ctx_len,
        )

Ancestors

class BufferedWordStream (*, tokenizer: TokenizeCallable, min_token_len: int, min_ctx_len: int)

Helper class that provides a standard way to create an ABC using inheritance.

Expand source code
class BufferedWordStream(BufferedTokenStream, WordStream):
    def __init__(
        self,
        *,
        tokenizer: TokenizeCallable,
        min_token_len: int,
        min_ctx_len: int,
    ) -> None:
        super().__init__(
            tokenize_fnc=tokenizer,
            min_token_len=min_token_len,
            min_ctx_len=min_ctx_len,
        )

Ancestors

class SentenceStream

Helper class that provides a standard way to create an ABC using inheritance.

Expand source code
class SentenceStream(ABC):
    def __init__(self) -> None:
        self._event_ch = aio.Chan[TokenData]()

    @abstractmethod
    def push_text(self, text: str) -> None: ...

    @abstractmethod
    def flush(self) -> None: ...

    @abstractmethod
    def end_input(self) -> None: ...

    @abstractmethod
    async def aclose(self) -> None: ...

    async def __anext__(self) -> TokenData:
        return await self._event_ch.__anext__()

    def __aiter__(self) -> AsyncIterator[TokenData]:
        return self

    def _do_close(self) -> None:
        self._event_ch.close()

    def _check_not_closed(self) -> None:
        if self._event_ch.closed:
            cls = type(self)
            raise RuntimeError(f"{cls.__module__}.{cls.__name__} is closed")

Ancestors

  • abc.ABC

Subclasses

Methods

async def aclose(self) ‑> None
def end_input(self) ‑> None
def flush(self) ‑> None
def push_text(self, text: str) ‑> None
class SentenceTokenizer

Helper class that provides a standard way to create an ABC using inheritance.

Expand source code
class SentenceTokenizer(ABC):
    @abstractmethod
    def tokenize(self, text: str, *, language: str | None = None) -> list[str]:
        pass

    @abstractmethod
    def stream(self, *, language: str | None = None) -> "SentenceStream":
        pass

Ancestors

  • abc.ABC

Subclasses

Methods

def stream(self, *, language: str | None = None) ‑> SentenceStream
def tokenize(self, text: str, *, language: str | None = None) ‑> list[str]
class TokenData (segment_id: str = '', token: str = '')

TokenData(segment_id: 'str' = '', token: 'str' = '')

Expand source code
@dataclass
class TokenData:
    segment_id: str = ""
    token: str = ""

Class variables

var segment_id : str
var token : str
class WordStream

Helper class that provides a standard way to create an ABC using inheritance.

Expand source code
class WordStream(ABC):
    def __init__(self) -> None:
        self._event_ch = aio.Chan[TokenData]()

    @abstractmethod
    def push_text(self, text: str) -> None: ...

    @abstractmethod
    def flush(self) -> None: ...

    @abstractmethod
    def end_input(self) -> None: ...

    @abstractmethod
    async def aclose(self) -> None: ...

    async def __anext__(self) -> TokenData:
        return await self._event_ch.__anext__()

    def __aiter__(self) -> AsyncIterator[TokenData]:
        return self

    def _do_close(self) -> None:
        self._event_ch.close()

    def _check_not_closed(self) -> None:
        if self._event_ch.closed:
            cls = type(self)
            raise RuntimeError(f"{cls.__module__}.{cls.__name__} is closed")

Ancestors

  • abc.ABC

Subclasses

Methods

async def aclose(self) ‑> None
def end_input(self) ‑> None
def flush(self) ‑> None
def push_text(self, text: str) ‑> None
class WordTokenizer

Helper class that provides a standard way to create an ABC using inheritance.

Expand source code
class WordTokenizer(ABC):
    @abstractmethod
    def tokenize(self, text: str, *, language: str | None = None) -> list[str]:
        pass

    @abstractmethod
    def stream(self, *, language: str | None = None) -> "WordStream":
        pass

    def format_words(self, words: list[str]) -> str:
        return " ".join(words)

Ancestors

  • abc.ABC

Subclasses

Methods

def format_words(self, words: list[str]) ‑> str
def stream(self, *, language: str | None = None) ‑> WordStream
def tokenize(self, text: str, *, language: str | None = None) ‑> list[str]