Module livekit.agents.tokenize.blingfire
Classes
class SentenceTokenizer (*,
min_sentence_len: int = 20,
stream_context_len: int = 10,
retain_format: bool = False)-
Expand source code
class SentenceTokenizer(tokenizer.SentenceTokenizer): def __init__( self, *, min_sentence_len: int = 20, stream_context_len: int = 10, retain_format: bool = False, ) -> None: self._config = _TokenizerOptions( min_sentence_len=min_sentence_len, stream_context_len=stream_context_len, retain_format=retain_format, ) def tokenize(self, text: str, *, language: str | None = None) -> list[str]: return [ tok[0] for tok in _split_sentences( text, min_sentence_len=self._config.min_sentence_len, retain_format=self._config.retain_format, ) ] def stream(self, *, language: str | None = None) -> tokenizer.SentenceStream: return token_stream.BufferedSentenceStream( tokenizer=functools.partial( _split_sentences, min_sentence_len=self._config.min_sentence_len, retain_format=self._config.retain_format, ), min_token_len=self._config.min_sentence_len, min_ctx_len=self._config.stream_context_len, )
Helper class that provides a standard way to create an ABC using inheritance.
Ancestors
- livekit.agents.tokenize.tokenizer.SentenceTokenizer
- abc.ABC
Methods
def stream(self, *, language: str | None = None) ‑> livekit.agents.tokenize.tokenizer.SentenceStream
-
Expand source code
def stream(self, *, language: str | None = None) -> tokenizer.SentenceStream: return token_stream.BufferedSentenceStream( tokenizer=functools.partial( _split_sentences, min_sentence_len=self._config.min_sentence_len, retain_format=self._config.retain_format, ), min_token_len=self._config.min_sentence_len, min_ctx_len=self._config.stream_context_len, )
def tokenize(self, text: str, *, language: str | None = None) ‑> list[str]
-
Expand source code
def tokenize(self, text: str, *, language: str | None = None) -> list[str]: return [ tok[0] for tok in _split_sentences( text, min_sentence_len=self._config.min_sentence_len, retain_format=self._config.retain_format, ) ]