Module livekit.agents.voice.background_audio

Classes

class AudioConfig (source: ForwardRef('AudioSource'),
volume: ForwardRef('float') = 1.0,
probability: ForwardRef('float') = 1.0)
Expand source code
class AudioConfig(NamedTuple):
    """
    Definition for the audio to be played in the background

    Args:
        volume: The volume of the audio (0.0-1.0)
        probability: The probability of the audio being played, when multiple
            AudioConfigs are provided (0.0-1.0)
    """

    source: AudioSource
    volume: float = 1.0
    probability: float = 1.0

Definition for the audio to be played in the background

Args

volume
The volume of the audio (0.0-1.0)
probability
The probability of the audio being played, when multiple AudioConfigs are provided (0.0-1.0)

Ancestors

  • builtins.tuple

Instance variables

var probability : float
Expand source code
class AudioConfig(NamedTuple):
    """
    Definition for the audio to be played in the background

    Args:
        volume: The volume of the audio (0.0-1.0)
        probability: The probability of the audio being played, when multiple
            AudioConfigs are provided (0.0-1.0)
    """

    source: AudioSource
    volume: float = 1.0
    probability: float = 1.0

Alias for field number 2

var source : AsyncIterator[AudioFrame] | str | BuiltinAudioClip
Expand source code
class AudioConfig(NamedTuple):
    """
    Definition for the audio to be played in the background

    Args:
        volume: The volume of the audio (0.0-1.0)
        probability: The probability of the audio being played, when multiple
            AudioConfigs are provided (0.0-1.0)
    """

    source: AudioSource
    volume: float = 1.0
    probability: float = 1.0

Alias for field number 0

var volume : float
Expand source code
class AudioConfig(NamedTuple):
    """
    Definition for the audio to be played in the background

    Args:
        volume: The volume of the audio (0.0-1.0)
        probability: The probability of the audio being played, when multiple
            AudioConfigs are provided (0.0-1.0)
    """

    source: AudioSource
    volume: float = 1.0
    probability: float = 1.0

Alias for field number 1

class BackgroundAudioPlayer (*,
ambient_sound: NotGivenOr[AudioSource | AudioConfig | list[AudioConfig] | None] = NOT_GIVEN,
thinking_sound: NotGivenOr[AudioSource | AudioConfig | list[AudioConfig] | None] = NOT_GIVEN,
stream_timeout_ms: int = 200)
Expand source code
class BackgroundAudioPlayer:
    def __init__(
        self,
        *,
        ambient_sound: NotGivenOr[AudioSource | AudioConfig | list[AudioConfig] | None] = NOT_GIVEN,
        thinking_sound: NotGivenOr[
            AudioSource | AudioConfig | list[AudioConfig] | None
        ] = NOT_GIVEN,
        stream_timeout_ms: int = 200,
    ) -> None:
        """
        Initializes the BackgroundAudio component with optional ambient and thinking sounds.

        This component creates and publishes a continuous audio track to a LiveKit room while managing
        the playback of ambient and agent “thinking” sounds. It supports three types of audio sources:
        - A BuiltinAudioClip enum value, which will use a pre-defined sound from the package resources
        - A file path (string) pointing to an audio file, which can be looped.
        - An AsyncIterator that yields rtc.AudioFrame

        When a list (or AudioConfig) is supplied, the component considers each sound’s volume and probability:
        - The probability value determines the chance that a particular sound is selected for playback.
        - A total probability below 1.0 means there is a chance no sound will be selected (resulting in silence).

        Args:
            ambient_sound (NotGivenOr[Union[AudioSource, AudioConfig, List[AudioConfig], None]], optional):
                The ambient sound to be played continuously. For file paths, the sound will be looped.
                For AsyncIterator sources, ensure the iterator is infinite or looped.

            thinking_sound (NotGivenOr[Union[AudioSource, AudioConfig, List[AudioConfig], None]], optional):
                The sound to be played when the associated agent enters a “thinking” state. This can be a single
                sound source or a list of AudioConfig objects (with volume and probability settings).

        """  # noqa: E501

        self._ambient_sound = ambient_sound if is_given(ambient_sound) else None
        self._thinking_sound = thinking_sound if is_given(thinking_sound) else None

        self._audio_source = rtc.AudioSource(48000, 1, queue_size_ms=_AUDIO_SOURCE_BUFFER_MS)
        self._audio_mixer = rtc.AudioMixer(
            48000, 1, blocksize=4800, capacity=1, stream_timeout_ms=stream_timeout_ms
        )
        self._publication: rtc.LocalTrackPublication | None = None
        self._lock = asyncio.Lock()

        self._republish_task: asyncio.Task[None] | None = None  # republish the task on reconnect
        self._mixer_atask: asyncio.Task[None] | None = None

        self._play_tasks: list[asyncio.Task[None]] = []

        self._ambient_handle: PlayHandle | None = None
        self._thinking_handle: PlayHandle | None = None

    def _select_sound_from_list(self, sounds: list[AudioConfig]) -> AudioConfig | None:
        """
        Selects a sound from a list of BackgroundSound based on their probabilities.
        Returns None if no sound is selected (when sum of probabilities < 1.0).
        """
        total_probability = sum(sound.probability for sound in sounds)
        if total_probability <= 0:
            return None

        if total_probability < 1.0 and random.random() > total_probability:
            return None

        normalize_factor = 1.0 if total_probability <= 1.0 else total_probability
        r = random.random() * min(total_probability, 1.0)
        cumulative = 0.0

        for sound in sounds:
            if sound.probability <= 0:
                continue

            norm_prob = sound.probability / normalize_factor
            cumulative += norm_prob

            if r <= cumulative:
                return sound

        return sounds[-1]

    def _normalize_sound_source(
        self, source: AudioSource | AudioConfig | list[AudioConfig] | None
    ) -> tuple[AudioSource, float] | None:
        if source is None:
            return None

        if isinstance(source, BuiltinAudioClip):
            return self._normalize_builtin_audio(source), 1.0
        elif isinstance(source, list):
            selected = self._select_sound_from_list(cast(list[AudioConfig], source))
            if selected is None:
                return None
            return selected.source, selected.volume
        elif isinstance(source, AudioConfig):
            return self._normalize_builtin_audio(source.source), source.volume

        return source, 1.0

    def _normalize_builtin_audio(self, source: AudioSource) -> AsyncIterator[rtc.AudioFrame] | str:
        if isinstance(source, BuiltinAudioClip):
            return source.path()
        else:
            return source

    def play(
        self,
        audio: AudioSource | AudioConfig | list[AudioConfig],
        *,
        loop: bool = False,
    ) -> PlayHandle:
        """
        Plays an audio once or in a loop.

        Args:
            audio (Union[AudioSource, AudioConfig, List[AudioConfig]]):
                The audio to play. Can be:
                - A string pointing to a file path
                - An AsyncIterator that yields `rtc.AudioFrame`
                - An AudioConfig object with volume and probability
                - A list of AudioConfig objects, where one will be selected based on probability

                If a string is provided and `loop` is True, the sound will be looped.
                If an AsyncIterator is provided, it is played until exhaustion (and cannot be looped
                automatically).
            loop (bool, optional):
                Whether to loop the audio. Only applicable if `audio` is a string or contains strings.
                Defaults to False.

        Returns:
            PlayHandle: An object representing the playback handle. This can be
            awaited or stopped manually.
        """  # noqa: E501
        if not self._mixer_atask:
            raise RuntimeError("BackgroundAudio is not started")

        normalized = self._normalize_sound_source(audio)
        if normalized is None:
            play_handle = PlayHandle()
            play_handle._mark_playout_done()
            return play_handle

        sound_source, volume = normalized

        if loop and isinstance(sound_source, AsyncIterator):
            raise ValueError(
                "Looping sound via AsyncIterator is not supported. Use a string file path or your own 'infinite' AsyncIterator with loop=False"  # noqa: E501
            )

        play_handle = PlayHandle()
        task = asyncio.create_task(self._play_task(play_handle, sound_source, volume, loop))
        task.add_done_callback(lambda _: self._play_tasks.remove(task))
        task.add_done_callback(lambda _: play_handle._mark_playout_done())
        self._play_tasks.append(task)
        return play_handle

    async def start(
        self,
        *,
        room: rtc.Room,
        agent_session: NotGivenOr[AgentSession] = NOT_GIVEN,
        track_publish_options: NotGivenOr[rtc.TrackPublishOptions] = NOT_GIVEN,
    ) -> None:
        """
        Starts the background audio system, publishing the audio track
        and beginning playback of any configured ambient sound.

        If `ambient_sound` is provided (and contains file paths), they will loop
        automatically. If `ambient_sound` contains AsyncIterators, they are assumed
        to be already infinite or looped.

        Args:
            room (rtc.Room):
                The LiveKit Room object where the audio track will be published.
            agent_session (NotGivenOr[AgentSession], optional):
                The session object used to track the agent's state (e.g., "thinking").
                Required if `thinking_sound` is provided.
            track_publish_options (NotGivenOr[rtc.TrackPublishOptions], optional):
                Options used when publishing the audio track. If not given, defaults will
                be used.
        """
        async with self._lock:
            self._room = room
            self._agent_session = agent_session or None
            self._track_publish_options = track_publish_options or None

            if cli.CLI_ARGUMENTS is not None and cli.CLI_ARGUMENTS.console:
                logger.warning(
                    "Background audio is not supported in console mode. Audio will not be played."
                )

            await self._publish_track()

            self._mixer_atask = asyncio.create_task(self._run_mixer_task())
            self._room.on("reconnected", self._on_reconnected)

            if self._agent_session:
                self._agent_session.on("agent_state_changed", self._agent_state_changed)

            if self._ambient_sound:
                normalized = self._normalize_sound_source(
                    cast(Union[AudioSource, AudioConfig, list[AudioConfig]], self._ambient_sound)
                )
                if normalized:
                    sound_source, volume = normalized
                    selected_sound = AudioConfig(sound_source, volume)
                    if isinstance(sound_source, str):
                        self._ambient_handle = self.play(selected_sound, loop=True)
                    else:
                        self._ambient_handle = self.play(selected_sound)

    async def aclose(self) -> None:
        """
        Gracefully closes the background audio system, canceling all ongoing
        playback tasks and unpublishing the audio track.
        """
        async with self._lock:
            if not self._mixer_atask:
                return  # not started

            await cancel_and_wait(*self._play_tasks)

            if self._republish_task:
                await cancel_and_wait(self._republish_task)

            await cancel_and_wait(self._mixer_atask)
            self._mixer_atask = None

            await self._audio_mixer.aclose()
            await self._audio_source.aclose()

            if self._agent_session:
                self._agent_session.off("agent_state_changed", self._agent_state_changed)

            self._room.off("reconnected", self._on_reconnected)

            with contextlib.suppress(Exception):
                if self._publication is not None:
                    await self._room.local_participant.unpublish_track(self._publication.sid)

    def _on_reconnected(self) -> None:
        if self._republish_task:
            self._republish_task.cancel()

        self._publication = None
        self._republish_task = asyncio.create_task(self._republish_track_task())

    def _agent_state_changed(self, ev: AgentStateChangedEvent) -> None:
        if not self._thinking_sound:
            return

        if ev.new_state == "thinking":
            if self._thinking_handle and not self._thinking_handle.done():
                return

            assert self._thinking_sound is not None
            self._thinking_handle = self.play(
                cast(Union[AudioSource, AudioConfig, list[AudioConfig]], self._thinking_sound)
            )

        elif self._thinking_handle:
            self._thinking_handle.stop()

    @log_exceptions(logger=logger)
    async def _play_task(
        self, play_handle: PlayHandle, sound: AudioSource, volume: float, loop: bool
    ) -> None:
        if isinstance(sound, BuiltinAudioClip):
            sound = sound.path()

        if isinstance(sound, str):
            if loop:
                sound = _loop_audio_frames(sound)
            else:
                sound = audio_frames_from_file(sound)

        async def _gen_wrapper() -> AsyncGenerator[rtc.AudioFrame, None]:
            async for frame in sound:
                if volume != 1.0:
                    data = np.frombuffer(frame.data, dtype=np.int16).astype(np.float32)
                    data *= 10 ** (np.log10(volume))
                    np.clip(data, -32768, 32767, out=data)
                    yield rtc.AudioFrame(
                        data=data.astype(np.int16).tobytes(),
                        sample_rate=frame.sample_rate,
                        num_channels=frame.num_channels,
                        samples_per_channel=frame.samples_per_channel,
                    )
                else:
                    yield frame

            # TODO(theomonnom): the wait_for_playout() may be innaccurate by 400ms
            play_handle._mark_playout_done()

        gen = _gen_wrapper()
        try:
            self._audio_mixer.add_stream(gen)
            await play_handle.wait_for_playout()  # wait for playout or interruption
        finally:
            self._audio_mixer.remove_stream(gen)
            play_handle._mark_playout_done()

            await asyncio.sleep(0)
            if play_handle._stop_fut.done():
                await gen.aclose()

    @log_exceptions(logger=logger)
    async def _run_mixer_task(self) -> None:
        async for frame in self._audio_mixer:
            await self._audio_source.capture_frame(frame)

    async def _publish_track(self) -> None:
        if self._publication is not None:
            return

        track = rtc.LocalAudioTrack.create_audio_track("background_audio", self._audio_source)
        self._publication = await self._room.local_participant.publish_track(
            track, self._track_publish_options or rtc.TrackPublishOptions()
        )

    @log_exceptions(logger=logger)
    async def _republish_track_task(self) -> None:
        # used to republish the track on agent reconnect
        async with self._lock:
            await self._publish_track()

Initializes the BackgroundAudio component with optional ambient and thinking sounds.

This component creates and publishes a continuous audio track to a LiveKit room while managing the playback of ambient and agent “thinking” sounds. It supports three types of audio sources: - A BuiltinAudioClip enum value, which will use a pre-defined sound from the package resources - A file path (string) pointing to an audio file, which can be looped. - An AsyncIterator that yields rtc.AudioFrame

When a list (or AudioConfig) is supplied, the component considers each sound’s volume and probability: - The probability value determines the chance that a particular sound is selected for playback. - A total probability below 1.0 means there is a chance no sound will be selected (resulting in silence).

Args

ambient_sound (NotGivenOr[Union[AudioSource, AudioConfig, List[AudioConfig], None]], optional): The ambient sound to be played continuously. For file paths, the sound will be looped. For AsyncIterator sources, ensure the iterator is infinite or looped.

thinking_sound (NotGivenOr[Union[AudioSource, AudioConfig, List[AudioConfig], None]], optional): The sound to be played when the associated agent enters a “thinking” state. This can be a single sound source or a list of AudioConfig objects (with volume and probability settings).

Methods

async def aclose(self) ‑> None
Expand source code
async def aclose(self) -> None:
    """
    Gracefully closes the background audio system, canceling all ongoing
    playback tasks and unpublishing the audio track.
    """
    async with self._lock:
        if not self._mixer_atask:
            return  # not started

        await cancel_and_wait(*self._play_tasks)

        if self._republish_task:
            await cancel_and_wait(self._republish_task)

        await cancel_and_wait(self._mixer_atask)
        self._mixer_atask = None

        await self._audio_mixer.aclose()
        await self._audio_source.aclose()

        if self._agent_session:
            self._agent_session.off("agent_state_changed", self._agent_state_changed)

        self._room.off("reconnected", self._on_reconnected)

        with contextlib.suppress(Exception):
            if self._publication is not None:
                await self._room.local_participant.unpublish_track(self._publication.sid)

Gracefully closes the background audio system, canceling all ongoing playback tasks and unpublishing the audio track.

def play(self,
audio: AudioSource | AudioConfig | list[AudioConfig],
*,
loop: bool = False) ‑> PlayHandle
Expand source code
def play(
    self,
    audio: AudioSource | AudioConfig | list[AudioConfig],
    *,
    loop: bool = False,
) -> PlayHandle:
    """
    Plays an audio once or in a loop.

    Args:
        audio (Union[AudioSource, AudioConfig, List[AudioConfig]]):
            The audio to play. Can be:
            - A string pointing to a file path
            - An AsyncIterator that yields `rtc.AudioFrame`
            - An AudioConfig object with volume and probability
            - A list of AudioConfig objects, where one will be selected based on probability

            If a string is provided and `loop` is True, the sound will be looped.
            If an AsyncIterator is provided, it is played until exhaustion (and cannot be looped
            automatically).
        loop (bool, optional):
            Whether to loop the audio. Only applicable if `audio` is a string or contains strings.
            Defaults to False.

    Returns:
        PlayHandle: An object representing the playback handle. This can be
        awaited or stopped manually.
    """  # noqa: E501
    if not self._mixer_atask:
        raise RuntimeError("BackgroundAudio is not started")

    normalized = self._normalize_sound_source(audio)
    if normalized is None:
        play_handle = PlayHandle()
        play_handle._mark_playout_done()
        return play_handle

    sound_source, volume = normalized

    if loop and isinstance(sound_source, AsyncIterator):
        raise ValueError(
            "Looping sound via AsyncIterator is not supported. Use a string file path or your own 'infinite' AsyncIterator with loop=False"  # noqa: E501
        )

    play_handle = PlayHandle()
    task = asyncio.create_task(self._play_task(play_handle, sound_source, volume, loop))
    task.add_done_callback(lambda _: self._play_tasks.remove(task))
    task.add_done_callback(lambda _: play_handle._mark_playout_done())
    self._play_tasks.append(task)
    return play_handle

Plays an audio once or in a loop.

Args

audio (Union[AudioSource, AudioConfig, List[AudioConfig]]): The audio to play. Can be: - A string pointing to a file path - An AsyncIterator that yields rtc.AudioFrame - An AudioConfig object with volume and probability - A list of AudioConfig objects, where one will be selected based on probability

If a string is provided and <code>loop</code> is True, the sound will be looped.
If an AsyncIterator is provided, it is played until exhaustion (and cannot be looped
automatically).

loop (bool, optional): Whether to loop the audio. Only applicable if audio is a string or contains strings. Defaults to False.

Returns

PlayHandle
An object representing the playback handle. This can be

awaited or stopped manually.

async def start(self,
*,
room: rtc.Room,
agent_session: NotGivenOr[AgentSession] = NOT_GIVEN,
track_publish_options: NotGivenOr[rtc.TrackPublishOptions] = NOT_GIVEN) ‑> None
Expand source code
async def start(
    self,
    *,
    room: rtc.Room,
    agent_session: NotGivenOr[AgentSession] = NOT_GIVEN,
    track_publish_options: NotGivenOr[rtc.TrackPublishOptions] = NOT_GIVEN,
) -> None:
    """
    Starts the background audio system, publishing the audio track
    and beginning playback of any configured ambient sound.

    If `ambient_sound` is provided (and contains file paths), they will loop
    automatically. If `ambient_sound` contains AsyncIterators, they are assumed
    to be already infinite or looped.

    Args:
        room (rtc.Room):
            The LiveKit Room object where the audio track will be published.
        agent_session (NotGivenOr[AgentSession], optional):
            The session object used to track the agent's state (e.g., "thinking").
            Required if `thinking_sound` is provided.
        track_publish_options (NotGivenOr[rtc.TrackPublishOptions], optional):
            Options used when publishing the audio track. If not given, defaults will
            be used.
    """
    async with self._lock:
        self._room = room
        self._agent_session = agent_session or None
        self._track_publish_options = track_publish_options or None

        if cli.CLI_ARGUMENTS is not None and cli.CLI_ARGUMENTS.console:
            logger.warning(
                "Background audio is not supported in console mode. Audio will not be played."
            )

        await self._publish_track()

        self._mixer_atask = asyncio.create_task(self._run_mixer_task())
        self._room.on("reconnected", self._on_reconnected)

        if self._agent_session:
            self._agent_session.on("agent_state_changed", self._agent_state_changed)

        if self._ambient_sound:
            normalized = self._normalize_sound_source(
                cast(Union[AudioSource, AudioConfig, list[AudioConfig]], self._ambient_sound)
            )
            if normalized:
                sound_source, volume = normalized
                selected_sound = AudioConfig(sound_source, volume)
                if isinstance(sound_source, str):
                    self._ambient_handle = self.play(selected_sound, loop=True)
                else:
                    self._ambient_handle = self.play(selected_sound)

Starts the background audio system, publishing the audio track and beginning playback of any configured ambient sound.

If ambient_sound is provided (and contains file paths), they will loop automatically. If ambient_sound contains AsyncIterators, they are assumed to be already infinite or looped.

Args

room (rtc.Room): The LiveKit Room object where the audio track will be published. agent_session (NotGivenOr[AgentSession], optional): The session object used to track the agent's state (e.g., "thinking"). Required if thinking_sound is provided. track_publish_options (NotGivenOr[rtc.TrackPublishOptions], optional): Options used when publishing the audio track. If not given, defaults will be used.

class BuiltinAudioClip (*args, **kwds)
Expand source code
class BuiltinAudioClip(enum.Enum):
    OFFICE_AMBIENCE = "office-ambience.ogg"
    KEYBOARD_TYPING = "keyboard-typing.ogg"
    KEYBOARD_TYPING2 = "keyboard-typing2.ogg"

    def path(self) -> str:
        file_path = files("livekit.agents.resources") / self.value
        return str(_resource_stack.enter_context(as_file(file_path)))

Create a collection of name/value pairs.

Example enumeration:

>>> class Color(Enum):
...     RED = 1
...     BLUE = 2
...     GREEN = 3

Access them by:

  • attribute access:

Color.RED

  • value lookup:

Color(1)

  • name lookup:

Color['RED']

Enumerations can be iterated over, and know how many members they have:

>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]

Methods can be added to enumerations, and members can have their own attributes – see the documentation for details.

Ancestors

  • enum.Enum

Class variables

var KEYBOARD_TYPING
var KEYBOARD_TYPING2
var OFFICE_AMBIENCE

Methods

def path(self) ‑> str
Expand source code
def path(self) -> str:
    file_path = files("livekit.agents.resources") / self.value
    return str(_resource_stack.enter_context(as_file(file_path)))
class PlayHandle
Expand source code
class PlayHandle:
    def __init__(self) -> None:
        self._done_fut = asyncio.Future[None]()
        self._stop_fut = asyncio.Future[None]()

    def done(self) -> bool:
        """
        Returns True if the sound has finished playing.
        """
        return self._done_fut.done()

    def stop(self) -> None:
        """
        Stops the sound from playing.
        """
        if self.done():
            return

        with contextlib.suppress(asyncio.InvalidStateError):
            self._stop_fut.set_result(None)
            self._mark_playout_done()  # TODO(theomonnom): move this to _play_task

    async def wait_for_playout(self) -> None:
        """
        Waits for the sound to finish playing.
        """
        await asyncio.shield(self._done_fut)

    def __await__(self) -> Generator[Any, None, PlayHandle]:
        async def _await_impl() -> PlayHandle:
            await self.wait_for_playout()
            return self

        return _await_impl().__await__()

    def _mark_playout_done(self) -> None:
        with contextlib.suppress(asyncio.InvalidStateError):
            self._done_fut.set_result(None)

Methods

def done(self) ‑> bool
Expand source code
def done(self) -> bool:
    """
    Returns True if the sound has finished playing.
    """
    return self._done_fut.done()

Returns True if the sound has finished playing.

def stop(self) ‑> None
Expand source code
def stop(self) -> None:
    """
    Stops the sound from playing.
    """
    if self.done():
        return

    with contextlib.suppress(asyncio.InvalidStateError):
        self._stop_fut.set_result(None)
        self._mark_playout_done()  # TODO(theomonnom): move this to _play_task

Stops the sound from playing.

async def wait_for_playout(self) ‑> None
Expand source code
async def wait_for_playout(self) -> None:
    """
    Waits for the sound to finish playing.
    """
    await asyncio.shield(self._done_fut)

Waits for the sound to finish playing.