Module livekit.plugins.elevenlabs

Classes

class TTS (*,
voice: Voice = Voice(id='EXAVITQu4vr4xnSDxMaL', name='Bella', category='premade', settings=VoiceSettings(stability=0.71, similarity_boost=0.5, style=0.0, use_speaker_boost=True)),
model: TTSModels | str = 'eleven_turbo_v2_5',
api_key: str | None = None,
base_url: str | None = None,
encoding: TTSEncoding = 'mp3_22050_32',
streaming_latency: int = 3,
word_tokenizer: tokenize.WordTokenizer = <livekit.agents.tokenize.basic.WordTokenizer object>,
enable_ssml_parsing: bool = False,
chunk_length_schedule: list[int] = [80, 120, 200, 260],
http_session: aiohttp.ClientSession | None = None,
model_id: TTSModels | str | None = None,
language: str | None = None)
Expand source code
class TTS(tts.TTS):
    def __init__(
        self,
        *,
        voice: Voice = DEFAULT_VOICE,
        model: TTSModels | str = "eleven_turbo_v2_5",
        api_key: str | None = None,
        base_url: str | None = None,
        encoding: TTSEncoding = "mp3_22050_32",
        streaming_latency: int = 3,
        word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
            ignore_punctuation=False  # punctuation can help for intonation
        ),
        enable_ssml_parsing: bool = False,
        chunk_length_schedule: list[int] = [80, 120, 200, 260],  # range is [50, 500]
        http_session: aiohttp.ClientSession | None = None,
        # deprecated
        model_id: TTSModels | str | None = None,
        language: str | None = None,
    ) -> None:
        """
        Create a new instance of ElevenLabs TTS.

        Args:
            voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`.
            model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
            api_key (str | None): ElevenLabs API key. Can be set via argument or `ELEVEN_API_KEY` environment variable.
            base_url (str | None): Custom base URL for the API. Optional.
            encoding (TTSEncoding): Audio encoding format. Defaults to "mp3_22050_32".
            streaming_latency (int): Latency in seconds for streaming. Defaults to 3.
            word_tokenizer (tokenize.WordTokenizer): Tokenizer for processing text. Defaults to basic WordTokenizer.
            enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
            chunk_length_schedule (list[int]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
            http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional.
            language (str | None): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5". Optional.
        """

        super().__init__(
            capabilities=tts.TTSCapabilities(
                streaming=True,
            ),
            sample_rate=_sample_rate_from_format(encoding),
            num_channels=1,
        )

        if model_id is not None:
            logger.warning(
                "model_id is deprecated and will be removed in 1.5.0, use model instead",
            )
            model = model_id

        api_key = api_key or os.environ.get("ELEVEN_API_KEY")
        if not api_key:
            raise ValueError(
                "ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable"
            )

        self._opts = _TTSOptions(
            voice=voice,
            model=model,
            api_key=api_key,
            base_url=base_url or API_BASE_URL_V1,
            encoding=encoding,
            sample_rate=self.sample_rate,
            streaming_latency=streaming_latency,
            word_tokenizer=word_tokenizer,
            chunk_length_schedule=chunk_length_schedule,
            enable_ssml_parsing=enable_ssml_parsing,
            language=language,
        )
        self._session = http_session

    def _ensure_session(self) -> aiohttp.ClientSession:
        if not self._session:
            self._session = utils.http_context.http_session()

        return self._session

    async def list_voices(self) -> List[Voice]:
        async with self._ensure_session().get(
            f"{self._opts.base_url}/voices",
            headers={AUTHORIZATION_HEADER: self._opts.api_key},
        ) as resp:
            return _dict_to_voices_list(await resp.json())

    def update_options(
        self,
        *,
        voice: Voice = DEFAULT_VOICE,
        model: TTSModels | str = "eleven_turbo_v2_5",
        language: str | None = None,
    ) -> None:
        """
        Args:
            voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`.
            model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
            language (str | None): Language code for the TTS model. Optional.
        """
        self._opts.model = model or self._opts.model
        self._opts.voice = voice or self._opts.voice
        self._opts.language = language or self._opts.language

    def synthesize(
        self,
        text: str,
        *,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
    ) -> "ChunkedStream":
        return ChunkedStream(
            tts=self,
            input_text=text,
            conn_options=conn_options,
            opts=self._opts,
            session=self._ensure_session(),
        )

    def stream(
        self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
    ) -> "SynthesizeStream":
        return SynthesizeStream(
            tts=self,
            conn_options=conn_options,
            opts=self._opts,
            session=self._ensure_session(),
        )

Helper class that provides a standard way to create an ABC using inheritance.

Create a new instance of ElevenLabs TTS.

Args

voice : Voice
Voice configuration. Defaults to DEFAULT_VOICE.
model : TTSModels | str
TTS model to use. Defaults to "eleven_turbo_v2_5".
api_key : str | None
ElevenLabs API key. Can be set via argument or ELEVEN_API_KEY environment variable.
base_url : str | None
Custom base URL for the API. Optional.
encoding : TTSEncoding
Audio encoding format. Defaults to "mp3_22050_32".
streaming_latency : int
Latency in seconds for streaming. Defaults to 3.
word_tokenizer : tokenize.WordTokenizer
Tokenizer for processing text. Defaults to basic WordTokenizer.
enable_ssml_parsing : bool
Enable SSML parsing for input text. Defaults to False.
chunk_length_schedule : list[int]
Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
http_session : aiohttp.ClientSession | None
Custom HTTP session for API requests. Optional.
language : str | None
Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5". Optional.

Ancestors

Methods

async def list_voices(self) ‑> List[livekit.plugins.elevenlabs.tts.Voice]
Expand source code
async def list_voices(self) -> List[Voice]:
    async with self._ensure_session().get(
        f"{self._opts.base_url}/voices",
        headers={AUTHORIZATION_HEADER: self._opts.api_key},
    ) as resp:
        return _dict_to_voices_list(await resp.json())
def stream(self,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=5.0, timeout=10.0)) ‑> livekit.plugins.elevenlabs.tts.SynthesizeStream
Expand source code
def stream(
    self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
) -> "SynthesizeStream":
    return SynthesizeStream(
        tts=self,
        conn_options=conn_options,
        opts=self._opts,
        session=self._ensure_session(),
    )
def synthesize(self,
text: str,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=5.0, timeout=10.0)) ‑> livekit.plugins.elevenlabs.tts.ChunkedStream
Expand source code
def synthesize(
    self,
    text: str,
    *,
    conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> "ChunkedStream":
    return ChunkedStream(
        tts=self,
        input_text=text,
        conn_options=conn_options,
        opts=self._opts,
        session=self._ensure_session(),
    )
def update_options(self,
*,
voice: Voice = Voice(id='EXAVITQu4vr4xnSDxMaL', name='Bella', category='premade', settings=VoiceSettings(stability=0.71, similarity_boost=0.5, style=0.0, use_speaker_boost=True)),
model: TTSModels | str = 'eleven_turbo_v2_5',
language: str | None = None) ‑> None
Expand source code
def update_options(
    self,
    *,
    voice: Voice = DEFAULT_VOICE,
    model: TTSModels | str = "eleven_turbo_v2_5",
    language: str | None = None,
) -> None:
    """
    Args:
        voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`.
        model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5".
        language (str | None): Language code for the TTS model. Optional.
    """
    self._opts.model = model or self._opts.model
    self._opts.voice = voice or self._opts.voice
    self._opts.language = language or self._opts.language

Args

voice : Voice
Voice configuration. Defaults to DEFAULT_VOICE.
model : TTSModels | str
TTS model to use. Defaults to "eleven_turbo_v2_5".
language : str | None
Language code for the TTS model. Optional.

Inherited members

class Voice (id: str,
name: str,
category: str,
settings: VoiceSettings | None = None)
Expand source code
@dataclass
class Voice:
    id: str
    name: str
    category: str
    settings: VoiceSettings | None = None

Voice(id: 'str', name: 'str', category: 'str', settings: 'VoiceSettings | None' = None)

Class variables

var category : str
var id : str
var name : str
var settings : livekit.plugins.elevenlabs.tts.VoiceSettings | None
class VoiceSettings (stability: float,
similarity_boost: float,
style: float | None = None,
use_speaker_boost: bool | None = False)
Expand source code
@dataclass
class VoiceSettings:
    stability: float  # [0.0 - 1.0]
    similarity_boost: float  # [0.0 - 1.0]
    style: float | None = None  # [0.0 - 1.0]
    use_speaker_boost: bool | None = False

VoiceSettings(stability: 'float', similarity_boost: 'float', style: 'float | None' = None, use_speaker_boost: 'bool | None' = False)

Class variables

var similarity_boost : float
var stability : float
var style : float | None
var use_speaker_boost : bool | None