Module livekit.plugins.elevenlabs
Classes
class TTS (*,
voice_id: str = 'EXAVITQu4vr4xnSDxMaL',
voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN,
model: TTSModels | str = 'eleven_flash_v2_5',
encoding: NotGivenOr[TTSEncoding] = NOT_GIVEN,
api_key: NotGivenOr[str] = NOT_GIVEN,
base_url: NotGivenOr[str] = NOT_GIVEN,
streaming_latency: NotGivenOr[int] = NOT_GIVEN,
inactivity_timeout: int = 300,
word_tokenizer: NotGivenOr[tokenize.WordTokenizer] = NOT_GIVEN,
enable_ssml_parsing: bool = False,
chunk_length_schedule: NotGivenOr[list[int]] = NOT_GIVEN,
http_session: aiohttp.ClientSession | None = None,
language: NotGivenOr[str] = NOT_GIVEN)-
Expand source code
class TTS(tts.TTS): def __init__( self, *, voice_id: str = DEFAULT_VOICE_ID, voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN, model: TTSModels | str = "eleven_flash_v2_5", encoding: NotGivenOr[TTSEncoding] = NOT_GIVEN, api_key: NotGivenOr[str] = NOT_GIVEN, base_url: NotGivenOr[str] = NOT_GIVEN, streaming_latency: NotGivenOr[int] = NOT_GIVEN, inactivity_timeout: int = WS_INACTIVITY_TIMEOUT, word_tokenizer: NotGivenOr[tokenize.WordTokenizer] = NOT_GIVEN, enable_ssml_parsing: bool = False, chunk_length_schedule: NotGivenOr[list[int]] = NOT_GIVEN, # range is [50, 500] http_session: aiohttp.ClientSession | None = None, language: NotGivenOr[str] = NOT_GIVEN, ) -> None: """ Create a new instance of ElevenLabs TTS. Args: voice_id (str): Voice ID. Defaults to `DEFAULT_VOICE_ID`. voice_settings (NotGivenOr[VoiceSettings]): Voice settings. model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5". api_key (NotGivenOr[str]): ElevenLabs API key. Can be set via argument or `ELEVEN_API_KEY` environment variable. base_url (NotGivenOr[str]): Custom base URL for the API. Optional. streaming_latency (NotGivenOr[int]): Optimize for streaming latency, defaults to 0 - disabled. 4 for max latency optimizations. deprecated inactivity_timeout (int): Inactivity timeout in seconds for the websocket connection. Defaults to 300. word_tokenizer (NotGivenOr[tokenize.WordTokenizer]): Tokenizer for processing text. Defaults to basic WordTokenizer. enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False. chunk_length_schedule (NotGivenOr[list[int]]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260]. http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional. language (NotGivenOr[str]): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5". """ # noqa: E501 if not is_given(chunk_length_schedule): chunk_length_schedule = [80, 120, 200, 260] if not is_given(encoding): encoding = _DefaultEncoding super().__init__( capabilities=tts.TTSCapabilities( streaming=True, ), sample_rate=_sample_rate_from_format(encoding), num_channels=1, ) elevenlabs_api_key = api_key if is_given(api_key) else os.environ.get("ELEVEN_API_KEY") if not elevenlabs_api_key: raise ValueError( "ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable" # noqa: E501 ) if not is_given(word_tokenizer): word_tokenizer = tokenize.basic.WordTokenizer( ignore_punctuation=False # punctuation can help for intonation ) self._opts = _TTSOptions( voice_id=voice_id, voice_settings=voice_settings, model=model, api_key=elevenlabs_api_key, base_url=base_url if is_given(base_url) else API_BASE_URL_V1, encoding=encoding, sample_rate=self.sample_rate, streaming_latency=streaming_latency, word_tokenizer=word_tokenizer, chunk_length_schedule=chunk_length_schedule, enable_ssml_parsing=enable_ssml_parsing, language=language, inactivity_timeout=inactivity_timeout, ) self._session = http_session self._streams = weakref.WeakSet[SynthesizeStream]() def _ensure_session(self) -> aiohttp.ClientSession: if not self._session: self._session = utils.http_context.http_session() return self._session async def list_voices(self) -> list[Voice]: async with self._ensure_session().get( f"{self._opts.base_url}/voices", headers={AUTHORIZATION_HEADER: self._opts.api_key}, ) as resp: return _dict_to_voices_list(await resp.json()) def update_options( self, *, voice_id: NotGivenOr[str] = NOT_GIVEN, voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN, model: NotGivenOr[TTSModels | str] = NOT_GIVEN, language: NotGivenOr[str] = NOT_GIVEN, ) -> None: """ Args: voice_id (NotGivenOr[str]): Voice ID. voice_settings (NotGivenOr[VoiceSettings]): Voice settings. model (NotGivenOr[TTSModels | str]): TTS model to use. language (NotGivenOr[str]): Language code for the TTS model. """ if is_given(model): self._opts.model = model if is_given(voice_id): self._opts.voice_id = voice_id if is_given(voice_settings): self._opts.voice_settings = voice_settings if is_given(language): self._opts.language = language def synthesize( self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> ChunkedStream: return ChunkedStream( tts=self, input_text=text, conn_options=conn_options, opts=self._opts, session=self._ensure_session(), ) def stream( self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS ) -> SynthesizeStream: stream = SynthesizeStream( tts=self, conn_options=conn_options, opts=self._opts, session=self._ensure_session(), ) self._streams.add(stream) return stream async def aclose(self) -> None: for stream in list(self._streams): await stream.aclose() self._streams.clear() await super().aclose()
Helper class that provides a standard way to create an ABC using inheritance.
Create a new instance of ElevenLabs TTS.
Args
voice_id
:str
- Voice ID. Defaults to
DEFAULT_VOICE_ID
. voice_settings
:NotGivenOr[VoiceSettings]
- Voice settings.
model
:TTSModels | str
- TTS model to use. Defaults to "eleven_turbo_v2_5".
api_key
:NotGivenOr[str]
- ElevenLabs API key. Can be set via argument or
ELEVEN_API_KEY
environment variable. base_url
:NotGivenOr[str]
- Custom base URL for the API. Optional.
streaming_latency
:NotGivenOr[int]
- Optimize for streaming latency, defaults to 0 - disabled. 4 for max latency optimizations. deprecated
inactivity_timeout
:int
- Inactivity timeout in seconds for the websocket connection. Defaults to 300.
word_tokenizer
:NotGivenOr[tokenize.WordTokenizer]
- Tokenizer for processing text. Defaults to basic WordTokenizer.
enable_ssml_parsing
:bool
- Enable SSML parsing for input text. Defaults to False.
chunk_length_schedule
:NotGivenOr[list[int]]
- Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
http_session
:aiohttp.ClientSession | None
- Custom HTTP session for API requests. Optional.
language
:NotGivenOr[str]
- Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5".
Ancestors
- livekit.agents.tts.tts.TTS
- abc.ABC
- EventEmitter
- typing.Generic
Methods
async def aclose(self) ‑> None
-
Expand source code
async def aclose(self) -> None: for stream in list(self._streams): await stream.aclose() self._streams.clear() await super().aclose()
async def list_voices(self) ‑> list[livekit.plugins.elevenlabs.tts.Voice]
-
Expand source code
async def list_voices(self) -> list[Voice]: async with self._ensure_session().get( f"{self._opts.base_url}/voices", headers={AUTHORIZATION_HEADER: self._opts.api_key}, ) as resp: return _dict_to_voices_list(await resp.json())
def stream(self,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.elevenlabs.tts.SynthesizeStream-
Expand source code
def stream( self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS ) -> SynthesizeStream: stream = SynthesizeStream( tts=self, conn_options=conn_options, opts=self._opts, session=self._ensure_session(), ) self._streams.add(stream) return stream
def synthesize(self,
text: str,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.plugins.elevenlabs.tts.ChunkedStream-
Expand source code
def synthesize( self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS, ) -> ChunkedStream: return ChunkedStream( tts=self, input_text=text, conn_options=conn_options, opts=self._opts, session=self._ensure_session(), )
def update_options(self,
*,
voice_id: NotGivenOr[str] = NOT_GIVEN,
voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN,
model: NotGivenOr[TTSModels | str] = NOT_GIVEN,
language: NotGivenOr[str] = NOT_GIVEN) ‑> None-
Expand source code
def update_options( self, *, voice_id: NotGivenOr[str] = NOT_GIVEN, voice_settings: NotGivenOr[VoiceSettings] = NOT_GIVEN, model: NotGivenOr[TTSModels | str] = NOT_GIVEN, language: NotGivenOr[str] = NOT_GIVEN, ) -> None: """ Args: voice_id (NotGivenOr[str]): Voice ID. voice_settings (NotGivenOr[VoiceSettings]): Voice settings. model (NotGivenOr[TTSModels | str]): TTS model to use. language (NotGivenOr[str]): Language code for the TTS model. """ if is_given(model): self._opts.model = model if is_given(voice_id): self._opts.voice_id = voice_id if is_given(voice_settings): self._opts.voice_settings = voice_settings if is_given(language): self._opts.language = language
Args
voice_id
:NotGivenOr[str]
- Voice ID.
voice_settings
:NotGivenOr[VoiceSettings]
- Voice settings.
model
:NotGivenOr[TTSModels | str]
- TTS model to use.
language
:NotGivenOr[str]
- Language code for the TTS model.
Inherited members
class Voice (id: str, name: str, category: str)
-
Expand source code
@dataclass class Voice: id: str name: str category: str
Voice(id: 'str', name: 'str', category: 'str')
Instance variables
var category : str
var id : str
var name : str
class VoiceSettings (stability: float,
similarity_boost: float,
style: NotGivenOr[float] = NOT_GIVEN,
speed: NotGivenOr[float] = NOT_GIVEN,
use_speaker_boost: NotGivenOr[bool] = NOT_GIVEN)-
Expand source code
@dataclass class VoiceSettings: stability: float # [0.0 - 1.0] similarity_boost: float # [0.0 - 1.0] style: NotGivenOr[float] = NOT_GIVEN # [0.0 - 1.0] speed: NotGivenOr[float] = NOT_GIVEN # [0.8 - 1.2] use_speaker_boost: NotGivenOr[bool] = NOT_GIVEN
VoiceSettings(stability: 'float', similarity_boost: 'float', style: 'NotGivenOr[float]' = NOT_GIVEN, speed: 'NotGivenOr[float]' = NOT_GIVEN, use_speaker_boost: 'NotGivenOr[bool]' = NOT_GIVEN)
Instance variables
var similarity_boost : float
var speed : float | livekit.agents.types.NotGiven
var stability : float
var style : float | livekit.agents.types.NotGiven
var use_speaker_boost : bool | livekit.agents.types.NotGiven