Module livekit.plugins.elevenlabs
Classes
class TTS (*, voice: Voice = Voice(id='EXAVITQu4vr4xnSDxMaL', name='Bella', category='premade', settings=VoiceSettings(stability=0.71, similarity_boost=0.5, style=0.0, use_speaker_boost=True)), model: TTSModels | str = 'eleven_turbo_v2_5', api_key: str | None = None, base_url: str | None = None, encoding: TTSEncoding = 'mp3_22050_32', streaming_latency: int = 3, word_tokenizer: tokenize.WordTokenizer = <livekit.agents.tokenize.basic.WordTokenizer object>, enable_ssml_parsing: bool = False, chunk_length_schedule: list[int] = [80, 120, 200, 260], http_session: aiohttp.ClientSession | None = None, model_id: TTSModels | str | None = None, language: str | None = None)
-
Helper class that provides a standard way to create an ABC using inheritance.
Create a new instance of ElevenLabs TTS.
Args
voice
:Voice
- Voice configuration. Defaults to
DEFAULT_VOICE
. model
:TTSModels | str
- TTS model to use. Defaults to "eleven_turbo_v2_5".
api_key
:str | None
- ElevenLabs API key. Can be set via argument or
ELEVEN_API_KEY
environment variable. base_url
:str | None
- Custom base URL for the API. Optional.
encoding
:TTSEncoding
- Audio encoding format. Defaults to "mp3_22050_32".
streaming_latency
:int
- Latency in seconds for streaming. Defaults to 3.
word_tokenizer
:tokenize.WordTokenizer
- Tokenizer for processing text. Defaults to basic WordTokenizer.
enable_ssml_parsing
:bool
- Enable SSML parsing for input text. Defaults to False.
chunk_length_schedule
:list[int]
- Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
http_session
:aiohttp.ClientSession | None
- Custom HTTP session for API requests. Optional.
language
:str | None
- Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5". Optional.
Expand source code
class TTS(tts.TTS): def __init__( self, *, voice: Voice = DEFAULT_VOICE, model: TTSModels | str = "eleven_turbo_v2_5", api_key: str | None = None, base_url: str | None = None, encoding: TTSEncoding = "mp3_22050_32", streaming_latency: int = 3, word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer( ignore_punctuation=False # punctuation can help for intonation ), enable_ssml_parsing: bool = False, chunk_length_schedule: list[int] = [80, 120, 200, 260], # range is [50, 500] http_session: aiohttp.ClientSession | None = None, # deprecated model_id: TTSModels | str | None = None, language: str | None = None, ) -> None: """ Create a new instance of ElevenLabs TTS. Args: voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`. model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5". api_key (str | None): ElevenLabs API key. Can be set via argument or `ELEVEN_API_KEY` environment variable. base_url (str | None): Custom base URL for the API. Optional. encoding (TTSEncoding): Audio encoding format. Defaults to "mp3_22050_32". streaming_latency (int): Latency in seconds for streaming. Defaults to 3. word_tokenizer (tokenize.WordTokenizer): Tokenizer for processing text. Defaults to basic WordTokenizer. enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False. chunk_length_schedule (list[int]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260]. http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional. language (str | None): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5". Optional. """ super().__init__( capabilities=tts.TTSCapabilities( streaming=True, ), sample_rate=_sample_rate_from_format(encoding), num_channels=1, ) if model_id is not None: logger.warning( "model_id is deprecated and will be removed in 1.5.0, use model instead", ) model = model_id api_key = api_key or os.environ.get("ELEVEN_API_KEY") if not api_key: raise ValueError( "ElevenLabs API key is required, either as argument or set ELEVEN_API_KEY environmental variable" ) self._opts = _TTSOptions( voice=voice, model=model, api_key=api_key, base_url=base_url or API_BASE_URL_V1, encoding=encoding, sample_rate=self.sample_rate, streaming_latency=streaming_latency, word_tokenizer=word_tokenizer, chunk_length_schedule=chunk_length_schedule, enable_ssml_parsing=enable_ssml_parsing, language=language, ) self._session = http_session def _ensure_session(self) -> aiohttp.ClientSession: if not self._session: self._session = utils.http_context.http_session() return self._session async def list_voices(self) -> List[Voice]: async with self._ensure_session().get( f"{self._opts.base_url}/voices", headers={AUTHORIZATION_HEADER: self._opts.api_key}, ) as resp: return _dict_to_voices_list(await resp.json()) def update_options( self, *, voice: Voice = DEFAULT_VOICE, model: TTSModels | str = "eleven_turbo_v2_5", ) -> None: """ Args: voice (Voice): Voice configuration. Defaults to `DEFAULT_VOICE`. model (TTSModels | str): TTS model to use. Defaults to "eleven_turbo_v2_5". """ self._opts.model = model or self._opts.model self._opts.voice = voice or self._opts.voice def synthesize(self, text: str) -> "ChunkedStream": return ChunkedStream(self, text, self._opts, self._ensure_session()) def stream(self) -> "SynthesizeStream": return SynthesizeStream(self, self._ensure_session(), self._opts)
Ancestors
- TTS
- abc.ABC
- EventEmitter
- typing.Generic
Methods
async def list_voices(self) ‑> List[livekit.plugins.elevenlabs.tts.Voice]
def stream(self) ‑> livekit.plugins.elevenlabs.tts.SynthesizeStream
def synthesize(self, text: str) ‑> livekit.plugins.elevenlabs.tts.ChunkedStream
def update_options(self, *, voice: Voice = Voice(id='EXAVITQu4vr4xnSDxMaL', name='Bella', category='premade', settings=VoiceSettings(stability=0.71, similarity_boost=0.5, style=0.0, use_speaker_boost=True)), model: TTSModels | str = 'eleven_turbo_v2_5') ‑> None
-
Args
voice
:Voice
- Voice configuration. Defaults to
DEFAULT_VOICE
. model
:TTSModels | str
- TTS model to use. Defaults to "eleven_turbo_v2_5".
Inherited members
class Voice (id: str, name: str, category: str, settings: VoiceSettings | None = None)
-
Voice(id: 'str', name: 'str', category: 'str', settings: 'VoiceSettings | None' = None)
Expand source code
@dataclass class Voice: id: str name: str category: str settings: VoiceSettings | None = None
Class variables
var category : str
var id : str
var name : str
var settings : livekit.plugins.elevenlabs.tts.VoiceSettings | None
class VoiceSettings (stability: float, similarity_boost: float, style: float | None = None, use_speaker_boost: bool | None = False)
-
VoiceSettings(stability: 'float', similarity_boost: 'float', style: 'float | None' = None, use_speaker_boost: 'bool | None' = False)
Expand source code
@dataclass class VoiceSettings: stability: float # [0.0 - 1.0] similarity_boost: float # [0.0 - 1.0] style: float | None = None # [0.0 - 1.0] use_speaker_boost: bool | None = False
Class variables
var similarity_boost : float
var stability : float
var style : float | None
var use_speaker_boost : Optional[bool]