Module livekit.plugins.hume
Hume AI TTS plugin for LiveKit Agents
See https://docs.livekit.io/agents/integrations/tts/hume/ for more information.
Sub-modules
livekit.plugins.hume.log
Classes
class AudioFormat (*args, **kwds)
-
Expand source code
class AudioFormat(str, Enum): """Audio format for the synthesized speech.""" mp3 = "mp3" wav = "wav" pcm = "pcm"
Audio format for the synthesized speech.
Ancestors
- builtins.str
- enum.Enum
Class variables
var mp3
var pcm
var wav
class TTS (*,
api_key: str | None = None,
voice: VoiceById | VoiceByName | None = {'name': 'Male English Actor', 'provider': <VoiceProvider.hume: 'HUME_AI'>},
description: str | None = None,
speed: float | None = None,
trailing_silence: float | None = None,
context: str | list[Utterance] | None = None,
instant_mode: NotGivenOr[bool] = NOT_GIVEN,
audio_format: AudioFormat = AudioFormat.mp3,
base_url: str = 'https://api.hume.ai',
http_session: aiohttp.ClientSession | None = None)-
Expand source code
class TTS(tts.TTS): def __init__( self, *, api_key: str | None = None, voice: VoiceById | VoiceByName | None = DEFAULT_VOICE, description: str | None = None, speed: float | None = None, trailing_silence: float | None = None, context: str | list[Utterance] | None = None, instant_mode: NotGivenOr[bool] = NOT_GIVEN, audio_format: AudioFormat = AudioFormat.mp3, base_url: str = DEFAULT_BASE_URL, http_session: aiohttp.ClientSession | None = None, ): """Initialize the Hume AI TTS client. Options will be used for all future synthesis (until updated with update_options). Args: api_key: Hume AI API key. If not provided, will look for HUME_API_KEY environment variable. voice: A voice from the voice library specifed by name or id. description: Natural language instructions describing how the synthesized speech should sound (≤1000 characters). speed: Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0). trailing_silence: Duration of trailing silence (in seconds) to add to each utterance (≥0, ≤5.0, default: 0.35). context: Optional context for synthesis, either as text or list of utterances. instant_mode: Whether to use instant mode. Defaults to True if voice specified, False otherwise. Requires a voice to be specified when enabled. audio_format: Output audio format (mp3, wav, or pcm). Defaults to mp3. base_url: Base URL for Hume AI API. Defaults to https://api.hume.ai http_session: Optional aiohttp ClientSession to use for requests. """ super().__init__( capabilities=tts.TTSCapabilities(streaming=False), sample_rate=SUPPORTED_SAMPLE_RATE, num_channels=1, ) key = api_key or os.environ.get("HUME_API_KEY") if not key: raise ValueError("Hume API key is required via api_key or HUME_API_KEY env var") has_voice = voice is not None # Default instant_mode is True if a voice is specified, otherwise False # (Hume API requires a voice for instant mode) if not is_given(instant_mode): resolved_instant_mode = has_voice elif instant_mode and not has_voice: raise ValueError("Hume TTS: instant_mode cannot be enabled without specifying a voice") else: resolved_instant_mode = instant_mode self._opts = _TTSOptions( api_key=key, voice=voice, description=description, speed=speed, trailing_silence=trailing_silence, context=context, instant_mode=resolved_instant_mode, audio_format=audio_format, base_url=base_url, ) self._session = http_session def _ensure_session(self) -> aiohttp.ClientSession: if not self._session: self._session = utils.http_context.http_session() return self._session def update_options( self, *, description: NotGivenOr[str | None] = NOT_GIVEN, speed: NotGivenOr[float | None] = NOT_GIVEN, voice: NotGivenOr[VoiceById | VoiceByName | None] = NOT_GIVEN, trailing_silence: NotGivenOr[float | None] = NOT_GIVEN, context: NotGivenOr[str | list[Utterance] | None] = NOT_GIVEN, instant_mode: NotGivenOr[bool] = NOT_GIVEN, audio_format: NotGivenOr[AudioFormat] = NOT_GIVEN, ) -> None: """Update TTS options used for all future synthesis (until updated again) Args: voice: A voice from the voice library specifed by name or id. description: Natural language instructions describing how the synthesized speech should sound (≤1000 characters). speed: Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0). trailing_silence: Duration of trailing silence (in seconds) to add to each utterance. context: Optional context for synthesis, either as text or list of utterances. instant_mode: Whether to use instant mode. audio_format: Output audio format (mp3, wav, or pcm). """ if is_given(description): self._opts.description = description if is_given(speed): self._opts.speed = speed if is_given(voice): self._opts.voice = voice # type: ignore if is_given(trailing_silence): self._opts.trailing_silence = trailing_silence if is_given(context): self._opts.context = context # type: ignore if is_given(instant_mode): self._opts.instant_mode = instant_mode if is_given(audio_format): self._opts.audio_format = audio_format def synthesize( self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS ) -> tts.ChunkedStream: return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
Helper class that provides a standard way to create an ABC using inheritance.
Initialize the Hume AI TTS client. Options will be used for all future synthesis (until updated with update_options).
Args
api_key
- Hume AI API key. If not provided, will look for HUME_API_KEY environment variable.
voice
- A voice from the voice library specifed by name or id.
description
- Natural language instructions describing how the synthesized speech should sound (≤1000 characters).
speed
- Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0).
trailing_silence
- Duration of trailing silence (in seconds) to add to each utterance (≥0, ≤5.0, default: 0.35).
context
- Optional context for synthesis, either as text or list of utterances.
instant_mode
- Whether to use instant mode. Defaults to True if voice specified, False otherwise. Requires a voice to be specified when enabled.
audio_format
- Output audio format (mp3, wav, or pcm). Defaults to mp3.
base_url
- Base URL for Hume AI API. Defaults to https://api.hume.ai
http_session
- Optional aiohttp ClientSession to use for requests.
Ancestors
- livekit.agents.tts.tts.TTS
- abc.ABC
- EventEmitter
- typing.Generic
Methods
def synthesize(self,
text: str,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.agents.tts.tts.ChunkedStream-
Expand source code
def synthesize( self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS ) -> tts.ChunkedStream: return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
def update_options(self,
*,
description: NotGivenOr[str | None] = NOT_GIVEN,
speed: NotGivenOr[float | None] = NOT_GIVEN,
voice: NotGivenOr[VoiceById | VoiceByName | None] = NOT_GIVEN,
trailing_silence: NotGivenOr[float | None] = NOT_GIVEN,
context: NotGivenOr[str | list[Utterance] | None] = NOT_GIVEN,
instant_mode: NotGivenOr[bool] = NOT_GIVEN,
audio_format: NotGivenOr[AudioFormat] = NOT_GIVEN) ‑> None-
Expand source code
def update_options( self, *, description: NotGivenOr[str | None] = NOT_GIVEN, speed: NotGivenOr[float | None] = NOT_GIVEN, voice: NotGivenOr[VoiceById | VoiceByName | None] = NOT_GIVEN, trailing_silence: NotGivenOr[float | None] = NOT_GIVEN, context: NotGivenOr[str | list[Utterance] | None] = NOT_GIVEN, instant_mode: NotGivenOr[bool] = NOT_GIVEN, audio_format: NotGivenOr[AudioFormat] = NOT_GIVEN, ) -> None: """Update TTS options used for all future synthesis (until updated again) Args: voice: A voice from the voice library specifed by name or id. description: Natural language instructions describing how the synthesized speech should sound (≤1000 characters). speed: Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0). trailing_silence: Duration of trailing silence (in seconds) to add to each utterance. context: Optional context for synthesis, either as text or list of utterances. instant_mode: Whether to use instant mode. audio_format: Output audio format (mp3, wav, or pcm). """ if is_given(description): self._opts.description = description if is_given(speed): self._opts.speed = speed if is_given(voice): self._opts.voice = voice # type: ignore if is_given(trailing_silence): self._opts.trailing_silence = trailing_silence if is_given(context): self._opts.context = context # type: ignore if is_given(instant_mode): self._opts.instant_mode = instant_mode if is_given(audio_format): self._opts.audio_format = audio_format
Update TTS options used for all future synthesis (until updated again)
Args
voice
- A voice from the voice library specifed by name or id.
description
- Natural language instructions describing how the synthesized speech should sound (≤1000 characters).
speed
- Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0).
trailing_silence
- Duration of trailing silence (in seconds) to add to each utterance.
context
- Optional context for synthesis, either as text or list of utterances.
instant_mode
- Whether to use instant mode.
audio_format
- Output audio format (mp3, wav, or pcm).
Inherited members
class Utterance (*args, **kwargs)
-
Expand source code
class Utterance(TypedDict, total=False): """Utterance for TTS synthesis.""" text: str description: str | None speed: float | None voice: VoiceById | VoiceByName | None trailing_silence: float | None
Utterance for TTS synthesis.
Ancestors
- builtins.dict
Class variables
var description : str | None
var speed : float | None
var text : str
var trailing_silence : float | None
var voice : livekit.plugins.hume.tts.VoiceById | livekit.plugins.hume.tts.VoiceByName | None
class VoiceById (*args, **kwargs)
-
Expand source code
class VoiceById(TypedDict, total=False): id: str provider: VoiceProvider | None
dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)
Ancestors
- builtins.dict
Class variables
var id : str
var provider : livekit.plugins.hume.tts.VoiceProvider | None
class VoiceByName (*args, **kwargs)
-
Expand source code
class VoiceByName(TypedDict, total=False): name: str provider: VoiceProvider | None
dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)
Ancestors
- builtins.dict
Class variables
var name : str
var provider : livekit.plugins.hume.tts.VoiceProvider | None
class VoiceProvider (*args, **kwds)
-
Expand source code
class VoiceProvider(str, Enum): """Voice provider for the voice library.""" hume = "HUME_AI" custom = "CUSTOM_VOICE"
Voice provider for the voice library.
Ancestors
- builtins.str
- enum.Enum
Class variables
var custom
var hume