Module livekit.plugins.hume
Hume AI TTS plugin for LiveKit Agents
See https://docs.livekit.io/agents/integrations/tts/hume/ for more information.
Sub-modules
livekit.plugins.hume.log
Classes
class AudioFormat (*args, **kwds)-
Expand source code
class AudioFormat(str, Enum): """Audio format for the synthesized speech.""" mp3 = "mp3" wav = "wav" pcm = "pcm"Audio format for the synthesized speech.
Ancestors
- builtins.str
- enum.Enum
Class variables
var mp3var pcmvar wav
class TTS (*,
api_key: str | None = None,
voice: VoiceById | VoiceByName | None = {'name': 'Male English Actor', 'provider': <VoiceProvider.hume: 'HUME_AI'>},
model_version: ModelVersion | None = '1',
description: str | None = None,
speed: float | None = None,
trailing_silence: float | None = None,
context: str | list[Utterance] | None = None,
instant_mode: NotGivenOr[bool] = NOT_GIVEN,
audio_format: AudioFormat = AudioFormat.mp3,
base_url: str = 'https://api.hume.ai',
http_session: aiohttp.ClientSession | None = None)-
Expand source code
class TTS(tts.TTS): def __init__( self, *, api_key: str | None = None, voice: VoiceById | VoiceByName | None = DEFAULT_VOICE, model_version: ModelVersion | None = "1", description: str | None = None, speed: float | None = None, trailing_silence: float | None = None, context: str | list[Utterance] | None = None, instant_mode: NotGivenOr[bool] = NOT_GIVEN, audio_format: AudioFormat = AudioFormat.mp3, base_url: str = DEFAULT_BASE_URL, http_session: aiohttp.ClientSession | None = None, ): """Initialize the Hume AI TTS client. Options will be used for all future synthesis (until updated with update_options). Args: api_key: Hume AI API key. If not provided, will look for HUME_API_KEY environment variable. voice: A voice from the voice library specified by name or id. model_version: Specifies which version of Octave to use. See Hume's documentation for details on model version differences: https://dev.hume.ai/docs/text-to-speech-tts/overview. description: Natural language instructions describing how the synthesized speech should sound (≤1000 characters). speed: Speed multiplier for the synthesized speech (≥0.5, ≤2.0, default: 1.0). trailing_silence: Duration of trailing silence (in seconds) to add to each utterance (≥0, ≤5.0, default: 0.35). context: Optional context for synthesis, either as text or list of utterances. instant_mode: Whether to use instant mode. Defaults to True if voice specified, False otherwise. Requires a voice to be specified when enabled. audio_format: Output audio format (mp3, wav, or pcm). Defaults to mp3. base_url: Base URL for Hume AI API. Defaults to https://api.hume.ai http_session: Optional aiohttp ClientSession to use for requests. """ super().__init__( capabilities=tts.TTSCapabilities(streaming=False), sample_rate=SUPPORTED_SAMPLE_RATE, num_channels=1, ) key = api_key or os.environ.get("HUME_API_KEY") if not key: raise ValueError("Hume API key is required via api_key or HUME_API_KEY env var") has_voice = voice is not None # Default instant_mode is True if a voice is specified, otherwise False # (Hume API requires a voice for instant mode) if not is_given(instant_mode): resolved_instant_mode = has_voice elif instant_mode and not has_voice: raise ValueError("Hume TTS: instant_mode cannot be enabled without specifying a voice") else: resolved_instant_mode = instant_mode self._opts = _TTSOptions( api_key=key, voice=voice, model_version=model_version, description=description, speed=speed, trailing_silence=trailing_silence, context=context, instant_mode=resolved_instant_mode, audio_format=audio_format, base_url=base_url, ) self._session = http_session @property def model(self) -> str: return "Octave" @property def provider(self) -> str: return "Hume" def _ensure_session(self) -> aiohttp.ClientSession: if not self._session: self._session = utils.http_context.http_session() return self._session def update_options( self, *, description: NotGivenOr[str | None] = NOT_GIVEN, speed: NotGivenOr[float | None] = NOT_GIVEN, voice: NotGivenOr[VoiceById | VoiceByName | None] = NOT_GIVEN, trailing_silence: NotGivenOr[float | None] = NOT_GIVEN, context: NotGivenOr[str | list[Utterance] | None] = NOT_GIVEN, instant_mode: NotGivenOr[bool] = NOT_GIVEN, audio_format: NotGivenOr[AudioFormat] = NOT_GIVEN, ) -> None: """Update TTS options used for all future synthesis (until updated again) Args: voice: A voice from the voice library specified by name or id. description: Natural language instructions describing how the synthesized speech should sound (≤1000 characters). speed: Speed multiplier for the synthesized speech (≥0.5, ≤2.0, default: 1.0). trailing_silence: Duration of trailing silence (in seconds) to add to each utterance. context: Optional context for synthesis, either as text or list of utterances. instant_mode: Whether to use instant mode. audio_format: Output audio format (mp3, wav, or pcm). """ if is_given(description): self._opts.description = description if is_given(speed): self._opts.speed = speed if is_given(voice): self._opts.voice = voice # type: ignore if is_given(trailing_silence): self._opts.trailing_silence = trailing_silence if is_given(context): self._opts.context = context # type: ignore if is_given(instant_mode): self._opts.instant_mode = instant_mode if is_given(audio_format): self._opts.audio_format = audio_format def synthesize( self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS ) -> tts.ChunkedStream: return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)Helper class that provides a standard way to create an ABC using inheritance.
Initialize the Hume AI TTS client. Options will be used for all future synthesis (until updated with update_options).
Args
api_key- Hume AI API key. If not provided, will look for HUME_API_KEY environment variable.
voice- A voice from the voice library specified by name or id.
model_version- Specifies which version of Octave to use. See Hume's documentation for details on model version differences: https://dev.hume.ai/docs/text-to-speech-tts/overview.
description- Natural language instructions describing how the synthesized speech should sound (≤1000 characters).
speed- Speed multiplier for the synthesized speech (≥0.5, ≤2.0, default: 1.0).
trailing_silence- Duration of trailing silence (in seconds) to add to each utterance (≥0, ≤5.0, default: 0.35).
context- Optional context for synthesis, either as text or list of utterances.
instant_mode- Whether to use instant mode. Defaults to True if voice specified, False otherwise. Requires a voice to be specified when enabled.
audio_format- Output audio format (mp3, wav, or pcm). Defaults to mp3.
base_url- Base URL for Hume AI API. Defaults to https://api.hume.ai
http_session- Optional aiohttp ClientSession to use for requests.
Ancestors
- livekit.agents.tts.tts.TTS
- abc.ABC
- EventEmitter
- typing.Generic
Instance variables
prop model : str-
Expand source code
@property def model(self) -> str: return "Octave"Get the model name/identifier for this TTS instance.
Returns
The model name if available, "unknown" otherwise.
Note
Plugins should override this property to provide their model information.
prop provider : str-
Expand source code
@property def provider(self) -> str: return "Hume"Get the provider name/identifier for this TTS instance.
Returns
The provider name if available, "unknown" otherwise.
Note
Plugins should override this property to provide their provider information.
Methods
def synthesize(self,
text: str,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.agents.tts.tts.ChunkedStream-
Expand source code
def synthesize( self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS ) -> tts.ChunkedStream: return ChunkedStream(tts=self, input_text=text, conn_options=conn_options) def update_options(self,
*,
description: NotGivenOr[str | None] = NOT_GIVEN,
speed: NotGivenOr[float | None] = NOT_GIVEN,
voice: NotGivenOr[VoiceById | VoiceByName | None] = NOT_GIVEN,
trailing_silence: NotGivenOr[float | None] = NOT_GIVEN,
context: NotGivenOr[str | list[Utterance] | None] = NOT_GIVEN,
instant_mode: NotGivenOr[bool] = NOT_GIVEN,
audio_format: NotGivenOr[AudioFormat] = NOT_GIVEN) ‑> None-
Expand source code
def update_options( self, *, description: NotGivenOr[str | None] = NOT_GIVEN, speed: NotGivenOr[float | None] = NOT_GIVEN, voice: NotGivenOr[VoiceById | VoiceByName | None] = NOT_GIVEN, trailing_silence: NotGivenOr[float | None] = NOT_GIVEN, context: NotGivenOr[str | list[Utterance] | None] = NOT_GIVEN, instant_mode: NotGivenOr[bool] = NOT_GIVEN, audio_format: NotGivenOr[AudioFormat] = NOT_GIVEN, ) -> None: """Update TTS options used for all future synthesis (until updated again) Args: voice: A voice from the voice library specified by name or id. description: Natural language instructions describing how the synthesized speech should sound (≤1000 characters). speed: Speed multiplier for the synthesized speech (≥0.5, ≤2.0, default: 1.0). trailing_silence: Duration of trailing silence (in seconds) to add to each utterance. context: Optional context for synthesis, either as text or list of utterances. instant_mode: Whether to use instant mode. audio_format: Output audio format (mp3, wav, or pcm). """ if is_given(description): self._opts.description = description if is_given(speed): self._opts.speed = speed if is_given(voice): self._opts.voice = voice # type: ignore if is_given(trailing_silence): self._opts.trailing_silence = trailing_silence if is_given(context): self._opts.context = context # type: ignore if is_given(instant_mode): self._opts.instant_mode = instant_mode if is_given(audio_format): self._opts.audio_format = audio_formatUpdate TTS options used for all future synthesis (until updated again)
Args
voice- A voice from the voice library specified by name or id.
description- Natural language instructions describing how the synthesized speech should sound (≤1000 characters).
speed- Speed multiplier for the synthesized speech (≥0.5, ≤2.0, default: 1.0).
trailing_silence- Duration of trailing silence (in seconds) to add to each utterance.
context- Optional context for synthesis, either as text or list of utterances.
instant_mode- Whether to use instant mode.
audio_format- Output audio format (mp3, wav, or pcm).
Inherited members
class Utterance (*args, **kwargs)-
Expand source code
class Utterance(TypedDict, total=False): """Utterance for TTS synthesis.""" text: str description: str | None speed: float | None voice: VoiceById | VoiceByName | None trailing_silence: float | NoneUtterance for TTS synthesis.
Ancestors
- builtins.dict
Class variables
var description : str | Nonevar speed : float | Nonevar text : strvar trailing_silence : float | Nonevar voice : livekit.plugins.hume.tts.VoiceById | livekit.plugins.hume.tts.VoiceByName | None
class VoiceById (*args, **kwargs)-
Expand source code
class VoiceById(TypedDict, total=False): id: str provider: VoiceProvider | Nonedict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)
Ancestors
- builtins.dict
Class variables
var id : strvar provider : livekit.plugins.hume.tts.VoiceProvider | None
class VoiceByName (*args, **kwargs)-
Expand source code
class VoiceByName(TypedDict, total=False): name: str provider: VoiceProvider | Nonedict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)
Ancestors
- builtins.dict
Class variables
var name : strvar provider : livekit.plugins.hume.tts.VoiceProvider | None
class VoiceProvider (*args, **kwds)-
Expand source code
class VoiceProvider(str, Enum): """Voice provider for the voice library.""" hume = "HUME_AI" custom = "CUSTOM_VOICE"Voice provider for the voice library.
Ancestors
- builtins.str
- enum.Enum
Class variables
var customvar hume