Module `livekit.plugins.elevenlabs`

Classes

class TTS (*, voice_id: str = 'EXAVITQu4vr4xnSDxMaL', voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN, model: TTSModels | str = 'eleven_flash_v2_5', encoding: NotGivenOr[TTSEncoding] = NOT_GIVEN, api_key: NotGivenOr[str] = NOT_GIVEN, base_url: NotGivenOr[str] = NOT_GIVEN, streaming_latency: NotGivenOr[int] = NOT_GIVEN, inactivity_timeout: int = 300, word_tokenizer: NotGivenOr[tokenize.WordTokenizer] = NOT_GIVEN, enable_ssml_parsing: bool = False, chunk_length_schedule: NotGivenOr[list[int]] = NOT_GIVEN, http_session: aiohttp.ClientSession | None = None, language: NotGivenOr[str] = NOT_GIVEN)

Expand source code

class TTS(tts.TTS):
    def __init__(
        self,
        *,
        voice_id: str = DEFAULT_VOICE_ID,
        voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN,
        model: TTSModels | str = "eleven_flash_v2_5",
        encoding: NotGivenOr[TTSEncoding] = NOT_GIVEN,
        api_key: NotGivenOr[str] = NOT_GIVEN,
        base_url: NotGivenOr[str] = NOT_GIVEN,
        streaming_latency: NotGivenOr[int] = NOT_GIVEN,
        inactivity_timeout: int = WS_INACTIVITY_TIMEOUT,
        word_tokenizer: NotGivenOr[tokenize.WordTokenizer] = NOT_GIVEN,
        enable_ssml_parsing: bool = False,
        chunk_length_schedule: NotGivenOr[list[int]] = NOT_GIVEN,  # range is [50, 500]
        http_session: aiohttp.ClientSession | None = None,
        language: NotGivenOr[str] = NOT_GIVEN,
    ) -> None:
        """
        Create a new instance of ElevenLabs TTS.

        Args:
            voice_id (str): Voice ID. Defaults to `DEFAULT_VOICE_ID`.
            voice_settings (NotGivenOr[VoiceSettings]): Voice settings.
            model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
            api_key (NotGivenOr[str]): ElevenLabs API key. Can be set via argument or `ELEVEN_API_KEY` environment variable.
            base_url (NotGivenOr[str]): Custom base URL for the API. Optional.
            streaming_latency (NotGivenOr[int]): Optimize for streaming latency, defaults to 0 - disabled. 4 for max latency optimizations. deprecated
            inactivity_timeout (int): Inactivity timeout in seconds for the websocket connection. Defaults to 300.
            word_tokenizer (NotGivenOr[tokenize.WordTokenizer]): Tokenizer for processing text. Defaults to basic WordTokenizer.
            enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
            chunk_length_schedule (NotGivenOr[list[int]]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
            http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional.
            language (NotGivenOr[str]): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5".
        """  # noqa: E501

        if not is_given(chunk_length_schedule):
            chunk_length_schedule = [80, 120, 200, 260]

        if not is_given(encoding):
            encoding = _DefaultEncoding

        super().__init__(
            capabilities=tts.TTSCapabilities(
                streaming=True,
            ),
            sample_rate=_sample_rate_from_format(encoding),
            num_channels=1,
        )

        elevenlabs_api_key = api_key if is_given(api_key) else os.environ.get("ELEVEN_API_KEY")
        if not elevenlabs_api_key:
            raise ValueError(
                "ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable"  # noqa: E501
            )

        if not is_given(word_tokenizer):
            word_tokenizer = tokenize.basic.WordTokenizer(
                ignore_punctuation=False  # punctuation can help for intonation
            )

        self._opts = _TTSOptions(
            voice_id=voice_id,
            voice_settings=voice_settings,
            model=model,
            api_key=elevenlabs_api_key,
            base_url=base_url if is_given(base_url) else API_BASE_URL_V1,
            encoding=encoding,
            sample_rate=self.sample_rate,
            streaming_latency=streaming_latency,
            word_tokenizer=word_tokenizer,
            chunk_length_schedule=chunk_length_schedule,
            enable_ssml_parsing=enable_ssml_parsing,
            language=language,
            inactivity_timeout=inactivity_timeout,
        )
        self._session = http_session
        self._streams = weakref.WeakSet[SynthesizeStream]()

    def _ensure_session(self) -> aiohttp.ClientSession:
        if not self._session:
            self._session = utils.http_context.http_session()

        return self._session

    async def list_voices(self) -> list[Voice]:
        async with self._ensure_session().get(
            f"{self._opts.base_url}/voices",
            headers={AUTHORIZATION_HEADER: self._opts.api_key},
        ) as resp:
            return _dict_to_voices_list(await resp.json())

    def update_options(
        self,
        *,
        voice_id: NotGivenOr[str] = NOT_GIVEN,
        voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN,
        model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
        language: NotGivenOr[str] = NOT_GIVEN,
    ) -> None:
        """
        Args:
            voice_id (NotGivenOr[str]): Voice ID.
            voice_settings (NotGivenOr[VoiceSettings]): Voice settings.
            model (NotGivenOr[TTSModels | str]): TTS model to use.
            language (NotGivenOr[str]): Language code for the TTS model.
        """
        if is_given(model):
            self._opts.model = model
        if is_given(voice_id):
            self._opts.voice_id = voice_id
        if is_given(voice_settings):
            self._opts.voice_settings = voice_settings
        if is_given(language):
            self._opts.language = language

    def synthesize(
        self,
        text: str,
        *,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> ChunkedStream:
        return ChunkedStream(
            tts=self,
            input_text=text,
            conn_options=conn_options,
            opts=self._opts,
            session=self._ensure_session(),
        )

    def stream(
        self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
    ) -> SynthesizeStream:
        stream = SynthesizeStream(
            tts=self,
            conn_options=conn_options,
            opts=self._opts,
            session=self._ensure_session(),
        )
        self._streams.add(stream)
        return stream

    async def aclose(self) -> None:
        for stream in list(self._streams):
            await stream.aclose()
        self._streams.clear()
        await super().aclose()

Helper class that provides a standard way to create an ABC using inheritance.

Create a new instance of ElevenLabs TTS.

Args

voice_id : str: Voice ID. Defaults to DEFAULT_VOICE_ID.
voice_settings : NotGivenOr[VoiceSettings]: Voice settings.
model : TTSModels | str: TTS model to use. Defaults to "eleven_turbo_v2_5".
api_key : NotGivenOr[str]: ElevenLabs API key. Can be set via argument or ELEVEN_API_KEY environment variable.
base_url : NotGivenOr[str]: Custom base URL for the API. Optional.
streaming_latency : NotGivenOr[int]: Optimize for streaming latency, defaults to 0 - disabled. 4 for max latency optimizations. deprecated
inactivity_timeout : int: Inactivity timeout in seconds for the websocket connection. Defaults to 300.
word_tokenizer : NotGivenOr[tokenize.WordTokenizer]: Tokenizer for processing text. Defaults to basic WordTokenizer.
enable_ssml_parsing : bool: Enable SSML parsing for input text. Defaults to False.
chunk_length_schedule : NotGivenOr[list[int]]: Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
http_session : aiohttp.ClientSession | None: Custom HTTP session for API requests. Optional.
language : NotGivenOr[str]: Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5".

Ancestors

livekit.agents.tts.tts.TTS
abc.ABC
EventEmitter
typing.Generic

Methods

async def aclose(self) ‑> None

Expand source code

async def aclose(self) -> None:
    for stream in list(self._streams):
        await stream.aclose()
    self._streams.clear()
    await super().aclose()

async def list_voices(self) ‑> list[livekit.plugins.elevenlabs.tts.Voice]

Expand source code

async def list_voices(self) -> list[Voice]:
    async with self._ensure_session().get(
        f"{self._opts.base_url}/voices",
        headers={AUTHORIZATION_HEADER: self._opts.api_key},
    ) as resp:
        return _dict_to_voices_list(await resp.json())

def stream(self, *, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.elevenlabs.tts.SynthesizeStream

Expand source code

def stream(
    self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
) -> SynthesizeStream:
    stream = SynthesizeStream(
        tts=self,
        conn_options=conn_options,
        opts=self._opts,
        session=self._ensure_session(),
    )
    self._streams.add(stream)
    return stream

def synthesize(self, text: str, *, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.elevenlabs.tts.ChunkedStream

Expand source code

def synthesize(
    self,
    text: str,
    *,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> ChunkedStream:
    return ChunkedStream(
        tts=self,
        input_text=text,
        conn_options=conn_options,
        opts=self._opts,
        session=self._ensure_session(),
    )

def update_options(self, *, voice_id: NotGivenOr[str] = NOT_GIVEN, voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN, model: NotGivenOr[TTSModels | str] = NOT_GIVEN, language: NotGivenOr[str] = NOT_GIVEN) ‑> None

Expand source code

def update_options(
    self,
    *,
    voice_id: NotGivenOr[str] = NOT_GIVEN,
    voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN,
    model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
    language: NotGivenOr[str] = NOT_GIVEN,
) -> None:
    """
    Args:
        voice_id (NotGivenOr[str]): Voice ID.
        voice_settings (NotGivenOr[VoiceSettings]): Voice settings.
        model (NotGivenOr[TTSModels | str]): TTS model to use.
        language (NotGivenOr[str]): Language code for the TTS model.
    """
    if is_given(model):
        self._opts.model = model
    if is_given(voice_id):
        self._opts.voice_id = voice_id
    if is_given(voice_settings):
        self._opts.voice_settings = voice_settings
    if is_given(language):
        self._opts.language = language

Args

voice_id : NotGivenOr[str]: Voice ID.
voice_settings : NotGivenOr[VoiceSettings]: Voice settings.
model : NotGivenOr[TTSModels | str]: TTS model to use.
language : NotGivenOr[str]: Language code for the TTS model.

Inherited members

EventEmitter:
- emit
- off
- on
- once

class Voice (id: str, name: str, category: str)

Expand source code

@dataclass
class Voice:
    id: str
    name: str
    category: str

Voice(id: 'str', name: 'str', category: 'str')

Instance variables

var category : str
var id : str
var name : str

class VoiceSettings (stability: float, similarity_boost: float, style: NotGivenOr[float] = NOT_GIVEN, speed: NotGivenOr[float] = NOT_GIVEN, use_speaker_boost: NotGivenOr[bool] = NOT_GIVEN)

Expand source code

@dataclass
class VoiceSettings:
    stability: float  # [0.0 - 1.0]
    similarity_boost: float  # [0.0 - 1.0]
    style: NotGivenOr[float] = NOT_GIVEN  # [0.0 - 1.0]
    speed: NotGivenOr[float] = NOT_GIVEN  # [0.8 - 1.2]
    use_speaker_boost: NotGivenOr[bool] = NOT_GIVEN

VoiceSettings(stability: 'float', similarity_boost: 'float', style: 'NotGivenOr[float]' = NOT_GIVEN, speed: 'NotGivenOr[float]' = NOT_GIVEN, use_speaker_boost: 'NotGivenOr[bool]' = NOT_GIVEN)

Instance variables

var similarity_boost : float
var speed : float | livekit.agents.types.NotGiven
var stability : float
var style : float | livekit.agents.types.NotGiven
var use_speaker_boost : bool | livekit.agents.types.NotGiven