Module `livekit.plugins.google.beta.realtime`

Sub-modules

livekit.plugins.google.beta.realtime.api_proto
livekit.plugins.google.beta.realtime.realtime_api

Classes

class RealtimeModel (*, instructions: NotGivenOr[str] = NOT_GIVEN, model: NotGivenOr[LiveAPIModels | str] = NOT_GIVEN, api_key: NotGivenOr[str] = NOT_GIVEN, voice: Voice | str = 'Puck', language: NotGivenOr[str] = NOT_GIVEN, modalities: NotGivenOr[list[types.Modality]] = NOT_GIVEN, vertexai: NotGivenOr[bool] = NOT_GIVEN, project: NotGivenOr[str] = NOT_GIVEN, location: NotGivenOr[str] = NOT_GIVEN, candidate_count: int = 1, temperature: NotGivenOr[float] = NOT_GIVEN, max_output_tokens: NotGivenOr[int] = NOT_GIVEN, top_p: NotGivenOr[float] = NOT_GIVEN, top_k: NotGivenOr[int] = NOT_GIVEN, presence_penalty: NotGivenOr[float] = NOT_GIVEN, frequency_penalty: NotGivenOr[float] = NOT_GIVEN, input_audio_transcription: NotGivenOr[types.AudioTranscriptionConfig | None] = NOT_GIVEN, output_audio_transcription: NotGivenOr[types.AudioTranscriptionConfig | None] = NOT_GIVEN, image_encode_options: NotGivenOr[images.EncodeOptions] = NOT_GIVEN, enable_affective_dialog: NotGivenOr[bool] = NOT_GIVEN, proactivity: NotGivenOr[bool] = NOT_GIVEN, realtime_input_config: NotGivenOr[types.RealtimeInputConfig] = NOT_GIVEN, context_window_compression: NotGivenOr[types.ContextWindowCompressionConfig] = NOT_GIVEN, tool_behavior: NotGivenOr[types.Behavior] = NOT_GIVEN, tool_response_scheduling: NotGivenOr[types.FunctionResponseScheduling] = NOT_GIVEN, api_version: NotGivenOr[str] = NOT_GIVEN, conn_options: APIConnectOptions = APIConnectOptions(max_retry=3, retry_interval=2.0, timeout=10.0), http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN)

Expand source code

class RealtimeModel(llm.RealtimeModel):
    def __init__(
        self,
        *,
        instructions: NotGivenOr[str] = NOT_GIVEN,
        model: NotGivenOr[LiveAPIModels | str] = NOT_GIVEN,
        api_key: NotGivenOr[str] = NOT_GIVEN,
        voice: Voice | str = "Puck",
        language: NotGivenOr[str] = NOT_GIVEN,
        modalities: NotGivenOr[list[types.Modality]] = NOT_GIVEN,
        vertexai: NotGivenOr[bool] = NOT_GIVEN,
        project: NotGivenOr[str] = NOT_GIVEN,
        location: NotGivenOr[str] = NOT_GIVEN,
        candidate_count: int = 1,
        temperature: NotGivenOr[float] = NOT_GIVEN,
        max_output_tokens: NotGivenOr[int] = NOT_GIVEN,
        top_p: NotGivenOr[float] = NOT_GIVEN,
        top_k: NotGivenOr[int] = NOT_GIVEN,
        presence_penalty: NotGivenOr[float] = NOT_GIVEN,
        frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
        input_audio_transcription: NotGivenOr[types.AudioTranscriptionConfig | None] = NOT_GIVEN,
        output_audio_transcription: NotGivenOr[types.AudioTranscriptionConfig | None] = NOT_GIVEN,
        image_encode_options: NotGivenOr[images.EncodeOptions] = NOT_GIVEN,
        enable_affective_dialog: NotGivenOr[bool] = NOT_GIVEN,
        proactivity: NotGivenOr[bool] = NOT_GIVEN,
        realtime_input_config: NotGivenOr[types.RealtimeInputConfig] = NOT_GIVEN,
        context_window_compression: NotGivenOr[types.ContextWindowCompressionConfig] = NOT_GIVEN,
        tool_behavior: NotGivenOr[types.Behavior] = NOT_GIVEN,
        tool_response_scheduling: NotGivenOr[types.FunctionResponseScheduling] = NOT_GIVEN,
        api_version: NotGivenOr[str] = NOT_GIVEN,
        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
        http_options: NotGivenOr[types.HttpOptions] = NOT_GIVEN,
        _gemini_tools: NotGivenOr[list[_LLMTool]] = NOT_GIVEN,
    ) -> None:
        """
        Initializes a RealtimeModel instance for interacting with Google's Realtime API.

        Environment Requirements:
        - For VertexAI: Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the service account key file or use any of the other Google Cloud auth methods.
        The Google Cloud project and location can be set via `project` and `location` arguments or the environment variables
        `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION`. By default, the project is inferred from the service account key file,
        and the location defaults to "us-central1".
        - For Google Gemini API: Set the `api_key` argument or the `GOOGLE_API_KEY` environment variable.

        Args:
            instructions (str, optional): Initial system instructions for the model. Defaults to "".
            api_key (str, optional): Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
            modalities (list[Modality], optional): Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
            model (str, optional): The name of the model to use. Defaults to "gemini-2.0-flash-live-001" or "gemini-2.0-flash-exp" (vertexai).
            voice (api_proto.Voice, optional): Voice setting for audio outputs. Defaults to "Puck".
            language (str, optional): The language(BCP-47 Code) to use for the API. supported languages - https://ai.google.dev/gemini-api/docs/live#supported-languages
            temperature (float, optional): Sampling temperature for response generation. Defaults to 0.8.
            vertexai (bool, optional): Whether to use VertexAI for the API. Defaults to False.
                project (str, optional): The project id to use for the API. Defaults to None. (for vertexai)
                location (str, optional): The location to use for the API. Defaults to None. (for vertexai)
            candidate_count (int, optional): The number of candidate responses to generate. Defaults to 1.
            top_p (float, optional): The top-p value for response generation
            top_k (int, optional): The top-k value for response generation
            presence_penalty (float, optional): The presence penalty for response generation
            frequency_penalty (float, optional): The frequency penalty for response generation
            input_audio_transcription (AudioTranscriptionConfig | None, optional): The configuration for input audio transcription. Defaults to None.)
            output_audio_transcription (AudioTranscriptionConfig | None, optional): The configuration for output audio transcription. Defaults to AudioTranscriptionConfig().
            image_encode_options (images.EncodeOptions, optional): The configuration for image encoding. Defaults to DEFAULT_ENCODE_OPTIONS.
            enable_affective_dialog (bool, optional): Whether to enable affective dialog. Defaults to False.
            proactivity (bool, optional): Whether to enable proactive audio. Defaults to False.
            realtime_input_config (RealtimeInputConfig, optional): The configuration for realtime input. Defaults to None.
            context_window_compression (ContextWindowCompressionConfig, optional): The configuration for context window compression. Defaults to None.
            tool_behavior (Behavior, optional): The behavior for tool call. Default behavior is BLOCK in Gemini Realtime API.
            tool_response_scheduling (FunctionResponseScheduling, optional): The scheduling for tool response. Default scheduling is WHEN_IDLE.
            conn_options (APIConnectOptions, optional): The configuration for the API connection. Defaults to DEFAULT_API_CONNECT_OPTIONS.
            _gemini_tools (list[LLMTool], optional): Gemini-specific tools to use for the session. This parameter is experimental and may change.

        Raises:
            ValueError: If the API key is required but not found.
        """  # noqa: E501
        if not is_given(input_audio_transcription):
            input_audio_transcription = types.AudioTranscriptionConfig()
        if not is_given(output_audio_transcription):
            output_audio_transcription = types.AudioTranscriptionConfig()

        server_turn_detection = True
        if (
            is_given(realtime_input_config)
            and realtime_input_config.automatic_activity_detection
            and realtime_input_config.automatic_activity_detection.disabled
        ):
            server_turn_detection = False

        modalities = modalities if is_given(modalities) else [types.Modality.AUDIO]

        super().__init__(
            capabilities=llm.RealtimeCapabilities(
                message_truncation=False,
                turn_detection=server_turn_detection,
                user_transcription=input_audio_transcription is not None,
                auto_tool_reply_generation=True,
                audio_output=types.Modality.AUDIO in modalities,
                manual_function_calls=False,
            )
        )

        if not is_given(model):
            if vertexai:
                model = "gemini-2.0-flash-exp"
            else:
                model = "gemini-2.0-flash-live-001"

        gemini_api_key = api_key if is_given(api_key) else os.environ.get("GOOGLE_API_KEY")
        gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT")
        gcp_location: str | None = (
            location
            if is_given(location)
            else os.environ.get("GOOGLE_CLOUD_LOCATION") or "us-central1"
        )
        use_vertexai = (
            vertexai
            if is_given(vertexai)
            else os.environ.get("GOOGLE_GENAI_USE_VERTEXAI", "0").lower() in ["true", "1"]
        )

        if use_vertexai:
            if not gcp_project or not gcp_location:
                raise ValueError(
                    "Project is required for VertexAI via project kwarg or GOOGLE_CLOUD_PROJECT environment variable"  # noqa: E501
                )
            gemini_api_key = None  # VertexAI does not require an API key
        else:
            gcp_project = None
            gcp_location = None
            if not gemini_api_key:
                raise ValueError(
                    "API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable"  # noqa: E501
                )

        self._opts = _RealtimeOptions(
            model=model,
            api_key=gemini_api_key,
            voice=voice,
            response_modalities=modalities,
            vertexai=use_vertexai,
            project=gcp_project,
            location=gcp_location,
            candidate_count=candidate_count,
            temperature=temperature,
            max_output_tokens=max_output_tokens,
            top_p=top_p,
            top_k=top_k,
            presence_penalty=presence_penalty,
            frequency_penalty=frequency_penalty,
            instructions=instructions,
            input_audio_transcription=input_audio_transcription,
            output_audio_transcription=output_audio_transcription,
            language=language,
            image_encode_options=image_encode_options,
            enable_affective_dialog=enable_affective_dialog,
            proactivity=proactivity,
            realtime_input_config=realtime_input_config,
            context_window_compression=context_window_compression,
            api_version=api_version,
            gemini_tools=_gemini_tools,
            tool_behavior=tool_behavior,
            conn_options=conn_options,
            http_options=http_options,
        )

        self._sessions = weakref.WeakSet[RealtimeSession]()

    @property
    def model(self) -> str:
        return self._opts.model

    @property
    def provider(self) -> str:
        if self._opts.vertexai:
            return "Vertex AI"
        else:
            return "Gemini"

    def session(self) -> RealtimeSession:
        sess = RealtimeSession(self)
        self._sessions.add(sess)
        return sess

    def update_options(
        self,
        *,
        voice: NotGivenOr[str] = NOT_GIVEN,
        temperature: NotGivenOr[float] = NOT_GIVEN,
        tool_behavior: NotGivenOr[types.Behavior] = NOT_GIVEN,
        tool_response_scheduling: NotGivenOr[types.FunctionResponseScheduling] = NOT_GIVEN,
    ) -> None:
        """
        Update the options for the RealtimeModel.

        Args:
            voice (str, optional): The voice to use for the session.
            temperature (float, optional): The temperature to use for the session.
            tools (list[LLMTool], optional): The tools to use for the session.
        """
        if is_given(voice):
            self._opts.voice = voice

        if is_given(temperature):
            self._opts.temperature = temperature

        if is_given(tool_behavior):
            self._opts.tool_behavior = tool_behavior

        if is_given(tool_response_scheduling):
            self._opts.tool_response_scheduling = tool_response_scheduling

        for sess in self._sessions:
            sess.update_options(
                voice=self._opts.voice,
                temperature=self._opts.temperature,
                tool_behavior=self._opts.tool_behavior,
                tool_response_scheduling=self._opts.tool_response_scheduling,
            )

    async def aclose(self) -> None:
        pass

Initializes a RealtimeModel instance for interacting with Google's Realtime API.

Environment Requirements: - For VertexAI: Set the GOOGLE_APPLICATION_CREDENTIALS environment variable to the path of the service account key file or use any of the other Google Cloud auth methods. The Google Cloud project and location can be set via project and location arguments or the environment variables GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION. By default, the project is inferred from the service account key file, and the location defaults to "us-central1". - For Google Gemini API: Set the api_key argument or the GOOGLE_API_KEY environment variable.

Args

instructions : str, optional: Initial system instructions for the model. Defaults to "".
api_key : str, optional: Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
modalities : list[Modality], optional: Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
model : str, optional: The name of the model to use. Defaults to "gemini-2.0-flash-live-001" or "gemini-2.0-flash-exp" (vertexai).
voice : api_proto.Voice, optional: Voice setting for audio outputs. Defaults to "Puck".
language : str, optional: The language(BCP-47 Code) to use for the API. supported languages - https://ai.google.dev/gemini-api/docs/live#supported-languages
temperature : float, optional: Sampling temperature for response generation. Defaults to 0.8.
vertexai : bool, optional: Whether to use VertexAI for the API. Defaults to False. project (str, optional): The project id to use for the API. Defaults to None. (for vertexai) location (str, optional): The location to use for the API. Defaults to None. (for vertexai)
candidate_count : int, optional: The number of candidate responses to generate. Defaults to 1.
top_p : float, optional: The top-p value for response generation
top_k : int, optional: The top-k value for response generation
presence_penalty : float, optional: The presence penalty for response generation
frequency_penalty : float, optional: The frequency penalty for response generation
input_audio_transcription : AudioTranscriptionConfig | None, optional: The configuration for input audio transcription. Defaults to None.)
output_audio_transcription : AudioTranscriptionConfig | None, optional: The configuration for output audio transcription. Defaults to AudioTranscriptionConfig().
image_encode_options : images.EncodeOptions, optional: The configuration for image encoding. Defaults to DEFAULT_ENCODE_OPTIONS.
enable_affective_dialog : bool, optional: Whether to enable affective dialog. Defaults to False.
proactivity : bool, optional: Whether to enable proactive audio. Defaults to False.
realtime_input_config : RealtimeInputConfig, optional: The configuration for realtime input. Defaults to None.
context_window_compression : ContextWindowCompressionConfig, optional: The configuration for context window compression. Defaults to None.
tool_behavior : Behavior, optional: The behavior for tool call. Default behavior is BLOCK in Gemini Realtime API.
tool_response_scheduling : FunctionResponseScheduling, optional: The scheduling for tool response. Default scheduling is WHEN_IDLE.
conn_options : APIConnectOptions, optional: The configuration for the API connection. Defaults to DEFAULT_API_CONNECT_OPTIONS.
_gemini_tools : list[LLMTool], optional: Gemini-specific tools to use for the session. This parameter is experimental and may change.

Raises

ValueError: If the API key is required but not found.

Ancestors

livekit.agents.llm.realtime.RealtimeModel

Instance variables

prop model : str

Expand source code

@property
def model(self) -> str:
    return self._opts.model

prop provider : str

Expand source code

@property
def provider(self) -> str:
    if self._opts.vertexai:
        return "Vertex AI"
    else:
        return "Gemini"

Methods

async def aclose(self) ‑> None

Expand source code

async def aclose(self) -> None:
    pass

def session(self) ‑> RealtimeSession

Expand source code

def session(self) -> RealtimeSession:
    sess = RealtimeSession(self)
    self._sessions.add(sess)
    return sess

def update_options(self, *, voice: NotGivenOr[str] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN, tool_behavior: NotGivenOr[types.Behavior] = NOT_GIVEN, tool_response_scheduling: NotGivenOr[types.FunctionResponseScheduling] = NOT_GIVEN) ‑> None

Expand source code

def update_options(
    self,
    *,
    voice: NotGivenOr[str] = NOT_GIVEN,
    temperature: NotGivenOr[float] = NOT_GIVEN,
    tool_behavior: NotGivenOr[types.Behavior] = NOT_GIVEN,
    tool_response_scheduling: NotGivenOr[types.FunctionResponseScheduling] = NOT_GIVEN,
) -> None:
    """
    Update the options for the RealtimeModel.

    Args:
        voice (str, optional): The voice to use for the session.
        temperature (float, optional): The temperature to use for the session.
        tools (list[LLMTool], optional): The tools to use for the session.
    """
    if is_given(voice):
        self._opts.voice = voice

    if is_given(temperature):
        self._opts.temperature = temperature

    if is_given(tool_behavior):
        self._opts.tool_behavior = tool_behavior

    if is_given(tool_response_scheduling):
        self._opts.tool_response_scheduling = tool_response_scheduling

    for sess in self._sessions:
        sess.update_options(
            voice=self._opts.voice,
            temperature=self._opts.temperature,
            tool_behavior=self._opts.tool_behavior,
            tool_response_scheduling=self._opts.tool_response_scheduling,
        )

Update the options for the RealtimeModel.

Args

voice : str, optional: The voice to use for the session.
temperature : float, optional: The temperature to use for the session.
tools : list[LLMTool], optional: The tools to use for the session.