Module livekit.agents.beta
Sub-modules
livekit.agents.beta.toolslivekit.agents.beta.toolsetslivekit.agents.beta.workflows
Classes
class EndCallTool (*,
extra_description: str = '',
delete_room: bool = True,
end_instructions: str | None = 'say goodbye to the user',
on_tool_called: collections.abc.Callable[[livekit.agents.llm.tool_context.Toolset.ToolCalledEvent], collections.abc.Awaitable[None]] | None = None,
on_tool_completed: collections.abc.Callable[[livekit.agents.llm.tool_context.Toolset.ToolCompletedEvent], collections.abc.Awaitable[None]] | None = None)-
Expand source code
class EndCallTool(Toolset): def __init__( self, *, extra_description: str = "", delete_room: bool = True, end_instructions: str | None = "say goodbye to the user", on_tool_called: Callable[[Toolset.ToolCalledEvent], Awaitable[None]] | None = None, on_tool_completed: Callable[[Toolset.ToolCompletedEvent], Awaitable[None]] | None = None, ): """ This tool allows the agent to end the call and disconnect from the room. Args: extra_description: Additional description to add to the end call tool. delete_room: Whether to delete the room when the user ends the call. deleting the room disconnects all remote users, including SIP callers. end_instructions: Tool output to the LLM for generating the tool response. on_tool_called: Callback to call when the tool is called. on_tool_completed: Callback to call when the tool is completed. """ end_call_tool = function_tool( self._end_call, name="end_call", description=f"{END_CALL_DESCRIPTION}\n{extra_description}", ) super().__init__(id="end_call", tools=[end_call_tool]) self._delete_room = delete_room self._extra_description = extra_description self._end_instructions = end_instructions self._on_tool_called = on_tool_called self._on_tool_completed = on_tool_completed self._shutdown_session_task: asyncio.Task[None] | None = None async def _end_call(self, ctx: RunContext) -> Any | None: logger.debug("end_call tool called") llm_v = ctx.session.current_agent._get_activity_or_raise().llm def _on_speech_done(_: SpeechHandle) -> None: if ( not isinstance(llm_v, RealtimeModel) or not llm_v.capabilities.auto_tool_reply_generation ): # tool reply will reuse the same speech handle, so we can shutdown the session # directly after this speech handle is done ctx.session.shutdown() else: self._shutdown_session_task = asyncio.create_task( self._delayed_session_shutdown(ctx) ) ctx.speech_handle.add_done_callback(_on_speech_done) ctx.session.once("close", self._on_session_close) if self._on_tool_called: await self._on_tool_called(Toolset.ToolCalledEvent(ctx=ctx, arguments={})) completed_ev = Toolset.ToolCompletedEvent(ctx=ctx, output=self._end_instructions) if self._on_tool_completed: await self._on_tool_completed(completed_ev) return completed_ev.output async def _delayed_session_shutdown(self, ctx: RunContext) -> None: """Shutdown the session after the tool reply is played out""" speech_created_fut = asyncio.Future[SpeechHandle]() @ctx.session.once("speech_created") def _on_speech_created(ev: SpeechCreatedEvent) -> None: if not speech_created_fut.done(): speech_created_fut.set_result(ev.speech_handle) try: speech_handle = await asyncio.wait_for(speech_created_fut, timeout=5.0) await speech_handle except asyncio.TimeoutError: logger.warning("tool reply timed out, shutting down session") finally: ctx.session.off("speech_created", _on_speech_created) ctx.session.shutdown() def _on_session_close(self, ev: CloseEvent) -> None: """Close the job process when AgentSession is closed""" if self._shutdown_session_task: # cleanup self._shutdown_session_task.cancel() self._shutdown_session_task = None job_ctx = get_job_context() if self._delete_room: async def _on_shutdown() -> None: logger.info("deleting the room because the user ended the call") await job_ctx.delete_room() job_ctx.add_shutdown_callback(_on_shutdown) # shutdown the job process job_ctx.shutdown(reason=ev.reason.value)This tool allows the agent to end the call and disconnect from the room.
Args
extra_description- Additional description to add to the end call tool.
delete_room- Whether to delete the room when the user ends the call. deleting the room disconnects all remote users, including SIP callers.
end_instructions- Tool output to the LLM for generating the tool response.
on_tool_called- Callback to call when the tool is called.
on_tool_completed- Callback to call when the tool is completed.
Ancestors
- livekit.agents.llm.tool_context.Toolset
class Instructions (audio: str, *, text: str | None = None)-
Expand source code
class Instructions(str): """Instructions that adapt based on the user's input modality (audio vs. text). ``str(self)`` is what providers see when treating this as a plain string. By default it equals the ``audio`` variant; after :meth:`as_modality` it equals the chosen variant. ``_audio_variant`` and ``_text_variant`` are always preserved so :meth:`as_modality` can be called again for a different modality (e.g., when the same ``ChatContext`` is reused across tool-call turns). """ _audio_variant: str _text_variant: str | None def __new__( cls, audio: str, *, text: str | None = None, _represent: str | None = None ) -> Instructions: """Create an Instructions object. Args: audio: The audio (voice) variant. text: The text variant. Falls back to ``audio`` when omitted. """ instance = super().__new__(cls, _represent if _represent is not None else audio) instance._audio_variant = audio instance._text_variant = text return instance @property def audio(self) -> str: """The audio (voice) variant of the instructions.""" return self._audio_variant @property def text(self) -> str: """The text variant of the instructions. Falls back to the audio variant when no text variant was provided. """ return self._text_variant if self._text_variant is not None else self._audio_variant def format(self, *args: object, **kwargs: object) -> Instructions: """Format the instructions with the given keyword arguments.""" any_instructions = any(isinstance(arg, Instructions) for arg in args) or any( isinstance(v, Instructions) for v in kwargs.values() ) if any_instructions: audio_args = tuple(arg.audio if isinstance(arg, Instructions) else arg for arg in args) text_args = tuple(arg.text if isinstance(arg, Instructions) else arg for arg in args) audio_kwargs = { k: v.audio if isinstance(v, Instructions) else v for k, v in kwargs.items() } text_kwargs = { k: v.text if isinstance(v, Instructions) else v for k, v in kwargs.items() } else: audio_args = text_args = args audio_kwargs = text_kwargs = kwargs return Instructions( audio=self.audio.format(*audio_args, **audio_kwargs), text=( self.text.format(*text_args, **text_kwargs) if any_instructions or self._text_variant is not None else None ), _represent=str(self).format(*args, **kwargs), ) def as_modality(self, modality: Literal["audio", "text"]) -> Instructions: """Return a copy whose ``str`` value is the correct variant for *modality*. Both ``_audio_variant`` and ``_text_variant`` are preserved so this can be called again for a different modality (e.g. across tool-call turns). """ return Instructions( audio=self._audio_variant, text=self._text_variant, _represent=self.audio if modality == "audio" else self.text, ) def __add__(self, other: object) -> Instructions: """Concatenate, propagating both variants and the current str value.""" if isinstance(other, Instructions): has_text = self._text_variant is not None or other._text_variant is not None return Instructions( audio=self.audio + other.audio, text=(self.text + other.text) if has_text else None, _represent=str(self) + str(other), ) if isinstance(other, str): return Instructions( audio=self.audio + other, text=(self._text_variant + other) if self._text_variant is not None else None, _represent=str(self) + other, ) raise TypeError(f"Cannot add Instructions and {type(other)}") def __radd__(self, other: object) -> Instructions: """Support ``plain_str + Instructions``, propagating both variants.""" if isinstance(other, str): return Instructions( audio=other + self.audio, text=(other + self._text_variant) if self._text_variant is not None else None, _represent=other + str(self), ) raise TypeError(f"Cannot add {type(other)} and Instructions") def __repr__(self) -> str: return f"Instructions({str(self)!r})" @classmethod def __get_pydantic_core_schema__(cls, source_type: Any, handler: Any) -> Any: from pydantic_core import core_schema def validate_python(v: Any) -> Instructions: if isinstance(v, Instructions): return v if isinstance(v, dict) and v.get("type") == "instructions": return cls(v["audio"], text=v.get("text")) raise ValueError(f"Cannot convert {type(v)!r} to Instructions") def validate_json(v: Any) -> Instructions: if isinstance(v, dict) and v.get("type") == "instructions": return cls(v["audio"], text=v.get("text")) raise ValueError(f"Cannot convert {type(v)!r} to Instructions") def serialize(v: Instructions) -> dict[str, Any]: d: dict[str, Any] = {"type": "instructions", "audio": v.audio} if v._text_variant is not None: d["text"] = v._text_variant return d return core_schema.json_or_python_schema( python_schema=core_schema.no_info_plain_validator_function(validate_python), json_schema=core_schema.no_info_plain_validator_function(validate_json), serialization=core_schema.plain_serializer_function_ser_schema( serialize, info_arg=False ), )Instructions that adapt based on the user's input modality (audio vs. text).
str(self)is what providers see when treating this as a plain string. By default it equals theaudiovariant; after :meth:as_modalityit equals the chosen variant._audio_variantand_text_variantare always preserved so :meth:as_modalitycan be called again for a different modality (e.g., when the sameChatContextis reused across tool-call turns).Ancestors
- builtins.str
Instance variables
prop audio : str-
Expand source code
@property def audio(self) -> str: """The audio (voice) variant of the instructions.""" return self._audio_variantThe audio (voice) variant of the instructions.
prop text : str-
Expand source code
@property def text(self) -> str: """The text variant of the instructions. Falls back to the audio variant when no text variant was provided. """ return self._text_variant if self._text_variant is not None else self._audio_variantThe text variant of the instructions.
Falls back to the audio variant when no text variant was provided.
Methods
def as_modality(self, modality: "Literal['audio', 'text']") ‑> livekit.agents.llm.chat_context.Instructions-
Expand source code
def as_modality(self, modality: Literal["audio", "text"]) -> Instructions: """Return a copy whose ``str`` value is the correct variant for *modality*. Both ``_audio_variant`` and ``_text_variant`` are preserved so this can be called again for a different modality (e.g. across tool-call turns). """ return Instructions( audio=self._audio_variant, text=self._text_variant, _represent=self.audio if modality == "audio" else self.text, )Return a copy whose
strvalue is the correct variant for modality.Both
_audio_variantand_text_variantare preserved so this can be called again for a different modality (e.g. across tool-call turns). def format(self, *args: object, **kwargs: object) ‑> livekit.agents.llm.chat_context.Instructions-
Expand source code
def format(self, *args: object, **kwargs: object) -> Instructions: """Format the instructions with the given keyword arguments.""" any_instructions = any(isinstance(arg, Instructions) for arg in args) or any( isinstance(v, Instructions) for v in kwargs.values() ) if any_instructions: audio_args = tuple(arg.audio if isinstance(arg, Instructions) else arg for arg in args) text_args = tuple(arg.text if isinstance(arg, Instructions) else arg for arg in args) audio_kwargs = { k: v.audio if isinstance(v, Instructions) else v for k, v in kwargs.items() } text_kwargs = { k: v.text if isinstance(v, Instructions) else v for k, v in kwargs.items() } else: audio_args = text_args = args audio_kwargs = text_kwargs = kwargs return Instructions( audio=self.audio.format(*audio_args, **audio_kwargs), text=( self.text.format(*text_args, **text_kwargs) if any_instructions or self._text_variant is not None else None ), _represent=str(self).format(*args, **kwargs), )Format the instructions with the given keyword arguments.