Module livekit.plugins.elevenlabs

Classes

class TTS (*,
voice: Voice = Voice(id='EXAVITQu4vr4xnSDxMaL', name='Bella', category='premade', settings=VoiceSettings(stability=0.71, similarity_boost=0.5, style=0.0, speed=1.0, use_speaker_boost=True)),
model: TTSModels | str = 'eleven_flash_v2_5',
encoding: NotGivenOr[TTSEncoding] = NOT_GIVEN,
api_key: NotGivenOr[str] = NOT_GIVEN,
base_url: NotGivenOr[str] = NOT_GIVEN,
streaming_latency: NotGivenOr[int] = NOT_GIVEN,
inactivity_timeout: int = 300,
word_tokenizer: NotGivenOr[tokenize.WordTokenizer] = NOT_GIVEN,
enable_ssml_parsing: bool = False,
chunk_length_schedule: NotGivenOr[list[int]] = NOT_GIVEN,
http_session: aiohttp.ClientSession | None = None,
language: NotGivenOr[str] = NOT_GIVEN)
Expand source code
class TTS(tts.TTS):
    def __init__(
        self,
        *,
        voice: Voice = DEFAULT_VOICE,
        model: TTSModels | str = "eleven_flash_v2_5",
        encoding: NotGivenOr[TTSEncoding] = NOT_GIVEN,
        api_key: NotGivenOr[str] = NOT_GIVEN,
        base_url: NotGivenOr[str] = NOT_GIVEN,
        streaming_latency: NotGivenOr[int] = NOT_GIVEN,
        inactivity_timeout: int = WS_INACTIVITY_TIMEOUT,
        word_tokenizer: NotGivenOr[tokenize.WordTokenizer] = NOT_GIVEN,
        enable_ssml_parsing: bool = False,
        chunk_length_schedule: NotGivenOr[list[int]] = NOT_GIVEN,  # range is [50, 500]
        http_session: aiohttp.ClientSession | None = None,
        language: NotGivenOr[str] = NOT_GIVEN,
    ) -> None:
        """
        Create a new instance of ElevenLabs TTS.

        Args:
            voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`.
            model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
            api_key (str | None): ElevenLabs API key. Can be set via argument or `ELEVEN_API_KEY` environment variable.
            base_url (str | None): Custom base URL for the API. Optional.
            streaming_latency (int): Optimize for streaming latency, defaults to 0 - disabled. 4 for max latency optimizations. deprecated
            inactivity_timeout (int): Inactivity timeout in seconds for the websocket connection. Defaults to 300.
            word_tokenizer (tokenize.WordTokenizer): Tokenizer for processing text. Defaults to basic WordTokenizer.
            enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
            chunk_length_schedule (list[int]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
            http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional.
            language (NotGivenOr[str]): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5".
        """  # noqa: E501

        if not is_given(chunk_length_schedule):
            chunk_length_schedule = [80, 120, 200, 260]

        if not is_given(encoding):
            encoding = _DefaultEncoding

        super().__init__(
            capabilities=tts.TTSCapabilities(
                streaming=True,
            ),
            sample_rate=_sample_rate_from_format(encoding),
            num_channels=1,
        )

        elevenlabs_api_key = api_key if is_given(api_key) else os.environ.get("ELEVEN_API_KEY")
        if not elevenlabs_api_key:
            raise ValueError(
                "ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable"  # noqa: E501
            )

        if not is_given(word_tokenizer):
            word_tokenizer = tokenize.basic.WordTokenizer(
                ignore_punctuation=False  # punctuation can help for intonation
            )

        self._opts = _TTSOptions(
            voice=voice,
            model=model,
            api_key=elevenlabs_api_key,
            base_url=base_url if is_given(base_url) else API_BASE_URL_V1,
            encoding=encoding,
            sample_rate=self.sample_rate,
            streaming_latency=streaming_latency,
            word_tokenizer=word_tokenizer,
            chunk_length_schedule=chunk_length_schedule,
            enable_ssml_parsing=enable_ssml_parsing,
            language=language,
            inactivity_timeout=inactivity_timeout,
        )
        self._session = http_session
        self._streams = weakref.WeakSet[SynthesizeStream]()

    def _ensure_session(self) -> aiohttp.ClientSession:
        if not self._session:
            self._session = utils.http_context.http_session()

        return self._session

    async def list_voices(self) -> list[Voice]:
        async with self._ensure_session().get(
            f"{self._opts.base_url}/voices",
            headers={AUTHORIZATION_HEADER: self._opts.api_key},
        ) as resp:
            return _dict_to_voices_list(await resp.json())

    def update_options(
        self,
        *,
        voice: NotGivenOr[Voice] = NOT_GIVEN,
        model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
        language: NotGivenOr[str] = NOT_GIVEN,
    ) -> None:
        """
        Args:
            voice (NotGivenOr[Voice]): Voice configuration.
            model (NotGivenOr[TTSModels | str]): TTS model to use.
            language (NotGivenOr[str]): Language code for the TTS model.
        """
        if is_given(model):
            self._opts.model = model
        if is_given(voice):
            self._opts.voice = voice
        if is_given(language):
            self._opts.language = language

    def synthesize(
        self,
        text: str,
        *,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> ChunkedStream:
        return ChunkedStream(
            tts=self,
            input_text=text,
            conn_options=conn_options,
            opts=self._opts,
            session=self._ensure_session(),
        )

    def stream(
        self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
    ) -> SynthesizeStream:
        stream = SynthesizeStream(
            tts=self,
            conn_options=conn_options,
            opts=self._opts,
            session=self._ensure_session(),
        )
        self._streams.add(stream)
        return stream

    async def aclose(self) -> None:
        for stream in list(self._streams):
            await stream.aclose()
        self._streams.clear()
        await super().aclose()

Helper class that provides a standard way to create an ABC using inheritance.

Create a new instance of ElevenLabs TTS.

Args

voice : Voice
Voice configuration. Defaults to DEFAULT_VOICE.
model : TTSModels | str
TTS model to use. Defaults to "eleven_turbo_v2_5".
api_key : str | None
ElevenLabs API key. Can be set via argument or ELEVEN_API_KEY environment variable.
base_url : str | None
Custom base URL for the API. Optional.
streaming_latency : int
Optimize for streaming latency, defaults to 0 - disabled. 4 for max latency optimizations. deprecated
inactivity_timeout : int
Inactivity timeout in seconds for the websocket connection. Defaults to 300.
word_tokenizer : tokenize.WordTokenizer
Tokenizer for processing text. Defaults to basic WordTokenizer.
enable_ssml_parsing : bool
Enable SSML parsing for input text. Defaults to False.
chunk_length_schedule : list[int]
Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
http_session : aiohttp.ClientSession | None
Custom HTTP session for API requests. Optional.
language : NotGivenOr[str]
Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5".

Ancestors

  • livekit.agents.tts.tts.TTS
  • abc.ABC
  • EventEmitter
  • typing.Generic

Methods

async def aclose(self) ‑> None
Expand source code
async def aclose(self) -> None:
    for stream in list(self._streams):
        await stream.aclose()
    self._streams.clear()
    await super().aclose()
async def list_voices(self) ‑> list[livekit.plugins.elevenlabs.tts.Voice]
Expand source code
async def list_voices(self) -> list[Voice]:
    async with self._ensure_session().get(
        f"{self._opts.base_url}/voices",
        headers={AUTHORIZATION_HEADER: self._opts.api_key},
    ) as resp:
        return _dict_to_voices_list(await resp.json())
def stream(self,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.elevenlabs.tts.SynthesizeStream
Expand source code
def stream(
    self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
) -> SynthesizeStream:
    stream = SynthesizeStream(
        tts=self,
        conn_options=conn_options,
        opts=self._opts,
        session=self._ensure_session(),
    )
    self._streams.add(stream)
    return stream
def synthesize(self,
text: str,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.elevenlabs.tts.ChunkedStream
Expand source code
def synthesize(
    self,
    text: str,
    *,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> ChunkedStream:
    return ChunkedStream(
        tts=self,
        input_text=text,
        conn_options=conn_options,
        opts=self._opts,
        session=self._ensure_session(),
    )
def update_options(self,
*,
voice: NotGivenOr[Voice] = NOT_GIVEN,
model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
language: NotGivenOr[str] = NOT_GIVEN) ‑> None
Expand source code
def update_options(
    self,
    *,
    voice: NotGivenOr[Voice] = NOT_GIVEN,
    model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
    language: NotGivenOr[str] = NOT_GIVEN,
) -> None:
    """
    Args:
        voice (NotGivenOr[Voice]): Voice configuration.
        model (NotGivenOr[TTSModels | str]): TTS model to use.
        language (NotGivenOr[str]): Language code for the TTS model.
    """
    if is_given(model):
        self._opts.model = model
    if is_given(voice):
        self._opts.voice = voice
    if is_given(language):
        self._opts.language = language

Args

voice : NotGivenOr[Voice]
Voice configuration.
model : NotGivenOr[TTSModels | str]
TTS model to use.
language : NotGivenOr[str]
Language code for the TTS model.

Inherited members

class Voice (id: str,
name: str,
category: str,
settings: NotGivenOr[VoiceSettings] = NOT_GIVEN)
Expand source code
@dataclass
class Voice:
    id: str
    name: str
    category: str
    settings: NotGivenOr[VoiceSettings] = NOT_GIVEN

Voice(id: 'str', name: 'str', category: 'str', settings: 'NotGivenOr[VoiceSettings]' = NOT_GIVEN)

Instance variables

var category : str
var id : str
var name : str
var settings : livekit.plugins.elevenlabs.tts.VoiceSettings | livekit.agents.types.NotGiven
class VoiceSettings (stability: float,
similarity_boost: float,
style: NotGivenOr[float] = NOT_GIVEN,
speed: NotGivenOr[float] = NOT_GIVEN,
use_speaker_boost: NotGivenOr[bool] = NOT_GIVEN)
Expand source code
@dataclass
class VoiceSettings:
    stability: float  # [0.0 - 1.0]
    similarity_boost: float  # [0.0 - 1.0]
    style: NotGivenOr[float] = NOT_GIVEN  # [0.0 - 1.0]
    speed: NotGivenOr[float] = NOT_GIVEN  # [0.8 - 1.2]
    use_speaker_boost: NotGivenOr[bool] = NOT_GIVEN

VoiceSettings(stability: 'float', similarity_boost: 'float', style: 'NotGivenOr[float]' = NOT_GIVEN, speed: 'NotGivenOr[float]' = NOT_GIVEN, use_speaker_boost: 'NotGivenOr[bool]' = NOT_GIVEN)

Instance variables

var similarity_boost : float
var speed : float | livekit.agents.types.NotGiven
var stability : float
var style : float | livekit.agents.types.NotGiven
var use_speaker_boost : bool | livekit.agents.types.NotGiven