Module livekit.plugins.hume

Hume AI TTS plugin for LiveKit Agents

See https://docs.livekit.io/agents/integrations/tts/hume/ for more information.

Sub-modules

livekit.plugins.hume.log

Classes

class AudioFormat (*args, **kwds)
Expand source code
class AudioFormat(str, Enum):
    """Audio format for the synthesized speech."""

    mp3 = "mp3"
    wav = "wav"
    pcm = "pcm"

Audio format for the synthesized speech.

Ancestors

  • builtins.str
  • enum.Enum

Class variables

var mp3
var pcm
var wav
class TTS (*,
api_key: str | None = None,
voice: VoiceById | VoiceByName | None = {'name': 'Male English Actor', 'provider': <VoiceProvider.hume: 'HUME_AI'>},
description: str | None = None,
speed: float | None = None,
trailing_silence: float | None = None,
context: str | list[Utterance] | None = None,
instant_mode: NotGivenOr[bool] = NOT_GIVEN,
audio_format: AudioFormat = AudioFormat.mp3,
base_url: str = 'https://api.hume.ai',
http_session: aiohttp.ClientSession | None = None)
Expand source code
class TTS(tts.TTS):
    def __init__(
        self,
        *,
        api_key: str | None = None,
        voice: VoiceById | VoiceByName | None = DEFAULT_VOICE,
        description: str | None = None,
        speed: float | None = None,
        trailing_silence: float | None = None,
        context: str | list[Utterance] | None = None,
        instant_mode: NotGivenOr[bool] = NOT_GIVEN,
        audio_format: AudioFormat = AudioFormat.mp3,
        base_url: str = DEFAULT_BASE_URL,
        http_session: aiohttp.ClientSession | None = None,
    ):
        """Initialize the Hume AI TTS client. Options will be used for all future synthesis
        (until updated with update_options).

        Args:
            api_key: Hume AI API key. If not provided, will look for HUME_API_KEY environment
                variable.
            voice: A voice from the voice library specifed by name or id.
            description: Natural language instructions describing how the synthesized speech
                should sound (≤1000 characters).
            speed: Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0).
            trailing_silence: Duration of trailing silence (in seconds) to add to each utterance
                (≥0, ≤5.0, default: 0.35).
            context: Optional context for synthesis, either as text or list of utterances.
            instant_mode: Whether to use instant mode. Defaults to True if voice specified,
                False otherwise. Requires a voice to be specified when enabled.
            audio_format: Output audio format (mp3, wav, or pcm). Defaults to mp3.
            base_url: Base URL for Hume AI API. Defaults to https://api.hume.ai
            http_session: Optional aiohttp ClientSession to use for requests.
        """
        super().__init__(
            capabilities=tts.TTSCapabilities(streaming=False),
            sample_rate=SUPPORTED_SAMPLE_RATE,
            num_channels=1,
        )
        key = api_key or os.environ.get("HUME_API_KEY")
        if not key:
            raise ValueError("Hume API key is required via api_key or HUME_API_KEY env var")

        has_voice = voice is not None

        # Default instant_mode is True if a voice is specified, otherwise False
        # (Hume API requires a voice for instant mode)
        if not is_given(instant_mode):
            resolved_instant_mode = has_voice
        elif instant_mode and not has_voice:
            raise ValueError("Hume TTS: instant_mode cannot be enabled without specifying a voice")
        else:
            resolved_instant_mode = instant_mode

        self._opts = _TTSOptions(
            api_key=key,
            voice=voice,
            description=description,
            speed=speed,
            trailing_silence=trailing_silence,
            context=context,
            instant_mode=resolved_instant_mode,
            audio_format=audio_format,
            base_url=base_url,
        )
        self._session = http_session

    def _ensure_session(self) -> aiohttp.ClientSession:
        if not self._session:
            self._session = utils.http_context.http_session()

        return self._session

    def update_options(
        self,
        *,
        description: NotGivenOr[str | None] = NOT_GIVEN,
        speed: NotGivenOr[float | None] = NOT_GIVEN,
        voice: NotGivenOr[VoiceById | VoiceByName | None] = NOT_GIVEN,
        trailing_silence: NotGivenOr[float | None] = NOT_GIVEN,
        context: NotGivenOr[str | list[Utterance] | None] = NOT_GIVEN,
        instant_mode: NotGivenOr[bool] = NOT_GIVEN,
        audio_format: NotGivenOr[AudioFormat] = NOT_GIVEN,
    ) -> None:
        """Update TTS options used for all future synthesis (until updated again)

        Args:
            voice: A voice from the voice library specifed by name or id.
            description: Natural language instructions describing how the synthesized speech
                should sound (≤1000 characters).
            speed: Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0).
            trailing_silence: Duration of trailing silence (in seconds) to add to each utterance.
            context: Optional context for synthesis, either as text or list of utterances.
            instant_mode: Whether to use instant mode.
            audio_format: Output audio format (mp3, wav, or pcm).
        """
        if is_given(description):
            self._opts.description = description
        if is_given(speed):
            self._opts.speed = speed
        if is_given(voice):
            self._opts.voice = voice  # type: ignore
        if is_given(trailing_silence):
            self._opts.trailing_silence = trailing_silence
        if is_given(context):
            self._opts.context = context  # type: ignore
        if is_given(instant_mode):
            self._opts.instant_mode = instant_mode
        if is_given(audio_format):
            self._opts.audio_format = audio_format

    def synthesize(
        self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
    ) -> tts.ChunkedStream:
        return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)

Helper class that provides a standard way to create an ABC using inheritance.

Initialize the Hume AI TTS client. Options will be used for all future synthesis (until updated with update_options).

Args

api_key
Hume AI API key. If not provided, will look for HUME_API_KEY environment variable.
voice
A voice from the voice library specifed by name or id.
description
Natural language instructions describing how the synthesized speech should sound (≤1000 characters).
speed
Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0).
trailing_silence
Duration of trailing silence (in seconds) to add to each utterance (≥0, ≤5.0, default: 0.35).
context
Optional context for synthesis, either as text or list of utterances.
instant_mode
Whether to use instant mode. Defaults to True if voice specified, False otherwise. Requires a voice to be specified when enabled.
audio_format
Output audio format (mp3, wav, or pcm). Defaults to mp3.
base_url
Base URL for Hume AI API. Defaults to https://api.hume.ai
http_session
Optional aiohttp ClientSession to use for requests.

Ancestors

  • livekit.agents.tts.tts.TTS
  • abc.ABC
  • EventEmitter
  • typing.Generic

Methods

def synthesize(self,
text: str,
*,
conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0)) ‑> livekit.agents.tts.tts.ChunkedStream
Expand source code
def synthesize(
    self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
) -> tts.ChunkedStream:
    return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
def update_options(self,
*,
description: NotGivenOr[str | None] = NOT_GIVEN,
speed: NotGivenOr[float | None] = NOT_GIVEN,
voice: NotGivenOr[VoiceById | VoiceByName | None] = NOT_GIVEN,
trailing_silence: NotGivenOr[float | None] = NOT_GIVEN,
context: NotGivenOr[str | list[Utterance] | None] = NOT_GIVEN,
instant_mode: NotGivenOr[bool] = NOT_GIVEN,
audio_format: NotGivenOr[AudioFormat] = NOT_GIVEN) ‑> None
Expand source code
def update_options(
    self,
    *,
    description: NotGivenOr[str | None] = NOT_GIVEN,
    speed: NotGivenOr[float | None] = NOT_GIVEN,
    voice: NotGivenOr[VoiceById | VoiceByName | None] = NOT_GIVEN,
    trailing_silence: NotGivenOr[float | None] = NOT_GIVEN,
    context: NotGivenOr[str | list[Utterance] | None] = NOT_GIVEN,
    instant_mode: NotGivenOr[bool] = NOT_GIVEN,
    audio_format: NotGivenOr[AudioFormat] = NOT_GIVEN,
) -> None:
    """Update TTS options used for all future synthesis (until updated again)

    Args:
        voice: A voice from the voice library specifed by name or id.
        description: Natural language instructions describing how the synthesized speech
            should sound (≤1000 characters).
        speed: Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0).
        trailing_silence: Duration of trailing silence (in seconds) to add to each utterance.
        context: Optional context for synthesis, either as text or list of utterances.
        instant_mode: Whether to use instant mode.
        audio_format: Output audio format (mp3, wav, or pcm).
    """
    if is_given(description):
        self._opts.description = description
    if is_given(speed):
        self._opts.speed = speed
    if is_given(voice):
        self._opts.voice = voice  # type: ignore
    if is_given(trailing_silence):
        self._opts.trailing_silence = trailing_silence
    if is_given(context):
        self._opts.context = context  # type: ignore
    if is_given(instant_mode):
        self._opts.instant_mode = instant_mode
    if is_given(audio_format):
        self._opts.audio_format = audio_format

Update TTS options used for all future synthesis (until updated again)

Args

voice
A voice from the voice library specifed by name or id.
description
Natural language instructions describing how the synthesized speech should sound (≤1000 characters).
speed
Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0).
trailing_silence
Duration of trailing silence (in seconds) to add to each utterance.
context
Optional context for synthesis, either as text or list of utterances.
instant_mode
Whether to use instant mode.
audio_format
Output audio format (mp3, wav, or pcm).

Inherited members

class Utterance (*args, **kwargs)
Expand source code
class Utterance(TypedDict, total=False):
    """Utterance for TTS synthesis."""

    text: str
    description: str | None
    speed: float | None
    voice: VoiceById | VoiceByName | None
    trailing_silence: float | None

Utterance for TTS synthesis.

Ancestors

  • builtins.dict

Class variables

var description : str | None
var speed : float | None
var text : str
var trailing_silence : float | None
var voice : livekit.plugins.hume.tts.VoiceById | livekit.plugins.hume.tts.VoiceByName | None
class VoiceById (*args, **kwargs)
Expand source code
class VoiceById(TypedDict, total=False):
    id: str
    provider: VoiceProvider | None

dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)

Ancestors

  • builtins.dict

Class variables

var id : str
var provider : livekit.plugins.hume.tts.VoiceProvider | None
class VoiceByName (*args, **kwargs)
Expand source code
class VoiceByName(TypedDict, total=False):
    name: str
    provider: VoiceProvider | None

dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)

Ancestors

  • builtins.dict

Class variables

var name : str
var provider : livekit.plugins.hume.tts.VoiceProvider | None
class VoiceProvider (*args, **kwds)
Expand source code
class VoiceProvider(str, Enum):
    """Voice provider for the voice library."""

    hume = "HUME_AI"
    custom = "CUSTOM_VOICE"

Voice provider for the voice library.

Ancestors

  • builtins.str
  • enum.Enum

Class variables

var custom
var hume