Module livekit.plugins.elevenlabs

Classes

class TTS (*, voice: Voice = Voice(id='EXAVITQu4vr4xnSDxMaL', name='Bella', category='premade', settings=VoiceSettings(stability=0.71, similarity_boost=0.5, style=0.0, use_speaker_boost=True)), model: TTSModels | str = 'eleven_turbo_v2_5', api_key: str | None = None, base_url: str | None = None, encoding: TTSEncoding = 'mp3_22050_32', streaming_latency: int = 3, word_tokenizer: tokenize.WordTokenizer = <livekit.agents.tokenize.basic.WordTokenizer object>, enable_ssml_parsing: bool = False, chunk_length_schedule: list[int] = [80, 120, 200, 260], http_session: aiohttp.ClientSession | None = None, model_id: TTSModels | str | None = None, language: str | None = None)

Helper class that provides a standard way to create an ABC using inheritance.

Create a new instance of ElevenLabs TTS.

Args

voice : Voice
Voice configuration. Defaults to DEFAULT_VOICE.
model : TTSModels | str
TTS model to use. Defaults to "eleven_turbo_v2_5".
api_key : str | None
ElevenLabs API key. Can be set via argument or ELEVEN_API_KEY environment variable.
base_url : str | None
Custom base URL for the API. Optional.
encoding : TTSEncoding
Audio encoding format. Defaults to "mp3_22050_32".
streaming_latency : int
Latency in seconds for streaming. Defaults to 3.
word_tokenizer : tokenize.WordTokenizer
Tokenizer for processing text. Defaults to basic WordTokenizer.
enable_ssml_parsing : bool
Enable SSML parsing for input text. Defaults to False.
chunk_length_schedule : list[int]
Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
http_session : aiohttp.ClientSession | None
Custom HTTP session for API requests. Optional.
language : str | None
Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5". Optional.
Expand source code
class TTS(tts.TTS):
    def __init__(
        self,
        *,
        voice: Voice = DEFAULT_VOICE,
        model: TTSModels | str = "eleven_turbo_v2_5",
        api_key: str | None = None,
        base_url: str | None = None,
        encoding: TTSEncoding = "mp3_22050_32",
        streaming_latency: int = 3,
        word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
            ignore_punctuation=False  # punctuation can help for intonation
        ),
        enable_ssml_parsing: bool = False,
        chunk_length_schedule: list[int] = [80, 120, 200, 260],  # range is [50, 500]
        http_session: aiohttp.ClientSession | None = None,
        # deprecated
        model_id: TTSModels | str | None = None,
        language: str | None = None,
    ) -> None:
        """
        Create a new instance of ElevenLabs TTS.

        Args:
            voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`.
            model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
            api_key (str | None): ElevenLabs API key. Can be set via argument or `ELEVEN_API_KEY` environment variable.
            base_url (str | None): Custom base URL for the API. Optional.
            encoding (TTSEncoding): Audio encoding format. Defaults to "mp3_22050_32".
            streaming_latency (int): Latency in seconds for streaming. Defaults to 3.
            word_tokenizer (tokenize.WordTokenizer): Tokenizer for processing text. Defaults to basic WordTokenizer.
            enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
            chunk_length_schedule (list[int]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
            http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional.
            language (str | None): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5". Optional.
        """

        super().__init__(
            capabilities=tts.TTSCapabilities(
                streaming=True,
            ),
            sample_rate=_sample_rate_from_format(encoding),
            num_channels=1,
        )

        if model_id is not None:
            logger.warning(
                "model_id is deprecated and will be removed in 1.5.0, use model instead",
            )
            model = model_id

        api_key = api_key or os.environ.get("ELEVEN_API_KEY")
        if not api_key:
            raise ValueError(
                "ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable"
            )

        self._opts = _TTSOptions(
            voice=voice,
            model=model,
            api_key=api_key,
            base_url=base_url or API_BASE_URL_V1,
            encoding=encoding,
            sample_rate=self.sample_rate,
            streaming_latency=streaming_latency,
            word_tokenizer=word_tokenizer,
            chunk_length_schedule=chunk_length_schedule,
            enable_ssml_parsing=enable_ssml_parsing,
            language=language,
        )
        self._session = http_session

    def _ensure_session(self) -> aiohttp.ClientSession:
        if not self._session:
            self._session = utils.http_context.http_session()

        return self._session

    async def list_voices(self) -> List[Voice]:
        async with self._ensure_session().get(
            f"{self._opts.base_url}/voices",
            headers={AUTHORIZATION_HEADER: self._opts.api_key},
        ) as resp:
            return _dict_to_voices_list(await resp.json())

    def update_options(
        self,
        *,
        voice: Voice = DEFAULT_VOICE,
        model: TTSModels | str = "eleven_turbo_v2_5",
    ) -> None:
        """
        Args:
            voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`.
            model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
        """
        self._opts.model = model or self._opts.model
        self._opts.voice = voice or self._opts.voice

    def synthesize(self, text: str) -> "ChunkedStream":
        return ChunkedStream(self, text, self._opts, self._ensure_session())

    def stream(self) -> "SynthesizeStream":
        return SynthesizeStream(self, self._ensure_session(), self._opts)

Ancestors

Methods

async def list_voices(self) ‑> List[livekit.plugins.elevenlabs.tts.Voice]
def stream(self) ‑> livekit.plugins.elevenlabs.tts.SynthesizeStream
def synthesize(self, text: str) ‑> livekit.plugins.elevenlabs.tts.ChunkedStream
def update_options(self, *, voice: Voice = Voice(id='EXAVITQu4vr4xnSDxMaL', name='Bella', category='premade', settings=VoiceSettings(stability=0.71, similarity_boost=0.5, style=0.0, use_speaker_boost=True)), model: TTSModels | str = 'eleven_turbo_v2_5') ‑> None

Args

voice : Voice
Voice configuration. Defaults to DEFAULT_VOICE.
model : TTSModels | str
TTS model to use. Defaults to "eleven_turbo_v2_5".

Inherited members

class Voice (id: str, name: str, category: str, settings: VoiceSettings | None = None)

Voice(id: 'str', name: 'str', category: 'str', settings: 'VoiceSettings | None' = None)

Expand source code
@dataclass
class Voice:
    id: str
    name: str
    category: str
    settings: VoiceSettings | None = None

Class variables

var category : str
var id : str
var name : str
var settings : livekit.plugins.elevenlabs.tts.VoiceSettings | None
class VoiceSettings (stability: float, similarity_boost: float, style: float | None = None, use_speaker_boost: bool | None = False)

VoiceSettings(stability: 'float', similarity_boost: 'float', style: 'float | None' = None, use_speaker_boost: 'bool | None' = False)

Expand source code
@dataclass
class VoiceSettings:
    stability: float  # [0.0 - 1.0]
    similarity_boost: float  # [0.0 - 1.0]
    style: float | None = None  # [0.0 - 1.0]
    use_speaker_boost: bool | None = False

Class variables

var similarity_boost : float
var stability : float
var style : float | None
var use_speaker_boost : Optional[bool]