Module livekit.plugins.google.beta.realtime

Sub-modules

livekit.plugins.google.beta.realtime.api_proto
livekit.plugins.google.beta.realtime.realtime_api

Classes

class RealtimeModel (*,
instructions: NotGivenOr[str] = NOT_GIVEN,
model: NotGivenOr[LiveAPIModels | str] = NOT_GIVEN,
api_key: NotGivenOr[str] = NOT_GIVEN,
voice: Voice | str = 'Puck',
language: NotGivenOr[str] = NOT_GIVEN,
modalities: NotGivenOr[list[Modality]] = NOT_GIVEN,
vertexai: NotGivenOr[bool] = NOT_GIVEN,
project: NotGivenOr[str] = NOT_GIVEN,
location: NotGivenOr[str] = NOT_GIVEN,
candidate_count: int = 1,
temperature: NotGivenOr[float] = NOT_GIVEN,
max_output_tokens: NotGivenOr[int] = NOT_GIVEN,
top_p: NotGivenOr[float] = NOT_GIVEN,
top_k: NotGivenOr[int] = NOT_GIVEN,
presence_penalty: NotGivenOr[float] = NOT_GIVEN,
frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
input_audio_transcription: NotGivenOr[AudioTranscriptionConfig | None] = NOT_GIVEN,
output_audio_transcription: NotGivenOr[AudioTranscriptionConfig | None] = NOT_GIVEN,
image_encode_options: NotGivenOr[images.EncodeOptions] = NOT_GIVEN)
Expand source code
class RealtimeModel(llm.RealtimeModel):
    def __init__(
        self,
        *,
        instructions: NotGivenOr[str] = NOT_GIVEN,
        model: NotGivenOr[LiveAPIModels | str] = NOT_GIVEN,
        api_key: NotGivenOr[str] = NOT_GIVEN,
        voice: Voice | str = "Puck",
        language: NotGivenOr[str] = NOT_GIVEN,
        modalities: NotGivenOr[list[Modality]] = NOT_GIVEN,
        vertexai: NotGivenOr[bool] = NOT_GIVEN,
        project: NotGivenOr[str] = NOT_GIVEN,
        location: NotGivenOr[str] = NOT_GIVEN,
        candidate_count: int = 1,
        temperature: NotGivenOr[float] = NOT_GIVEN,
        max_output_tokens: NotGivenOr[int] = NOT_GIVEN,
        top_p: NotGivenOr[float] = NOT_GIVEN,
        top_k: NotGivenOr[int] = NOT_GIVEN,
        presence_penalty: NotGivenOr[float] = NOT_GIVEN,
        frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
        input_audio_transcription: NotGivenOr[AudioTranscriptionConfig | None] = NOT_GIVEN,
        output_audio_transcription: NotGivenOr[AudioTranscriptionConfig | None] = NOT_GIVEN,
        image_encode_options: NotGivenOr[images.EncodeOptions] = NOT_GIVEN,
    ) -> None:
        """
        Initializes a RealtimeModel instance for interacting with Google's Realtime API.

        Environment Requirements:
        - For VertexAI: Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the service account key file or use any of the other Google Cloud auth methods.
        The Google Cloud project and location can be set via `project` and `location` arguments or the environment variables
        `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION`. By default, the project is inferred from the service account key file,
        and the location defaults to "us-central1".
        - For Google Gemini API: Set the `api_key` argument or the `GOOGLE_API_KEY` environment variable.

        Args:
            instructions (str, optional): Initial system instructions for the model. Defaults to "".
            api_key (str, optional): Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
            modalities (list[Modality], optional): Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
            model (str, optional): The name of the model to use. Defaults to "gemini-2.0-flash-live-001" or "gemini-2.0-flash-exp" (vertexai).
            voice (api_proto.Voice, optional): Voice setting for audio outputs. Defaults to "Puck".
            language (str, optional): The language(BCP-47 Code) to use for the API. supported languages - https://ai.google.dev/gemini-api/docs/live#supported-languages
            temperature (float, optional): Sampling temperature for response generation. Defaults to 0.8.
            vertexai (bool, optional): Whether to use VertexAI for the API. Defaults to False.
                project (str, optional): The project id to use for the API. Defaults to None. (for vertexai)
                location (str, optional): The location to use for the API. Defaults to None. (for vertexai)
            candidate_count (int, optional): The number of candidate responses to generate. Defaults to 1.
            top_p (float, optional): The top-p value for response generation
            top_k (int, optional): The top-k value for response generation
            presence_penalty (float, optional): The presence penalty for response generation
            frequency_penalty (float, optional): The frequency penalty for response generation
            input_audio_transcription (AudioTranscriptionConfig | None, optional): The configuration for input audio transcription. Defaults to None.)
            output_audio_transcription (AudioTranscriptionConfig | None, optional): The configuration for output audio transcription. Defaults to AudioTranscriptionConfig().
            image_encode_options (images.EncodeOptions, optional): The configuration for image encoding. Defaults to DEFAULT_ENCODE_OPTIONS.

        Raises:
            ValueError: If the API key is required but not found.
        """  # noqa: E501
        if not is_given(input_audio_transcription):
            input_audio_transcription = AudioTranscriptionConfig()
        if not is_given(output_audio_transcription):
            output_audio_transcription = AudioTranscriptionConfig()

        super().__init__(
            capabilities=llm.RealtimeCapabilities(
                message_truncation=False,
                turn_detection=True,
                user_transcription=input_audio_transcription is not None,
                auto_tool_reply_generation=True,
            )
        )

        if not is_given(model):
            if vertexai:
                model = "gemini-2.0-flash-exp"
            else:
                model = "gemini-2.0-flash-live-001"

        gemini_api_key = api_key if is_given(api_key) else os.environ.get("GOOGLE_API_KEY")
        gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT")
        gcp_location = (
            location
            if is_given(location)
            else os.environ.get("GOOGLE_CLOUD_LOCATION") or "us-central1"
        )
        use_vertexai = (
            vertexai
            if is_given(vertexai)
            else os.environ.get("GOOGLE_GENAI_USE_VERTEXAI", "0").lower() in ["true", "1"]
        )

        if use_vertexai:
            if not gcp_project or not gcp_location:
                raise ValueError(
                    "Project is required for VertexAI via project kwarg or GOOGLE_CLOUD_PROJECT environment variable"  # noqa: E501
                )
            gemini_api_key = None  # VertexAI does not require an API key
        else:
            gcp_project = None
            gcp_location = None
            if not gemini_api_key:
                raise ValueError(
                    "API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable"  # noqa: E501
                )

        self._opts = _RealtimeOptions(
            model=model,
            api_key=gemini_api_key,
            voice=voice,
            response_modalities=modalities,
            vertexai=use_vertexai,
            project=gcp_project,
            location=gcp_location,
            candidate_count=candidate_count,
            temperature=temperature,
            max_output_tokens=max_output_tokens,
            top_p=top_p,
            top_k=top_k,
            presence_penalty=presence_penalty,
            frequency_penalty=frequency_penalty,
            instructions=instructions,
            input_audio_transcription=input_audio_transcription,
            output_audio_transcription=output_audio_transcription,
            language=language,
            image_encode_options=image_encode_options,
        )

        self._sessions = weakref.WeakSet[RealtimeSession]()

    def session(self) -> RealtimeSession:
        sess = RealtimeSession(self)
        self._sessions.add(sess)
        return sess

    def update_options(
        self, *, voice: NotGivenOr[str] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN
    ) -> None:
        if is_given(voice):
            self._opts.voice = voice

        if is_given(temperature):
            self._opts.temperature = temperature

        for sess in self._sessions:
            sess.update_options(voice=self._opts.voice, temperature=self._opts.temperature)

    async def aclose(self) -> None:
        pass

Initializes a RealtimeModel instance for interacting with Google's Realtime API.

Environment Requirements: - For VertexAI: Set the GOOGLE_APPLICATION_CREDENTIALS environment variable to the path of the service account key file or use any of the other Google Cloud auth methods. The Google Cloud project and location can be set via project and location arguments or the environment variables GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION. By default, the project is inferred from the service account key file, and the location defaults to "us-central1". - For Google Gemini API: Set the api_key argument or the GOOGLE_API_KEY environment variable.

Args

instructions : str, optional
Initial system instructions for the model. Defaults to "".
api_key : str, optional
Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
modalities : list[Modality], optional
Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
model : str, optional
The name of the model to use. Defaults to "gemini-2.0-flash-live-001" or "gemini-2.0-flash-exp" (vertexai).
voice : api_proto.Voice, optional
Voice setting for audio outputs. Defaults to "Puck".
language : str, optional
The language(BCP-47 Code) to use for the API. supported languages - https://ai.google.dev/gemini-api/docs/live#supported-languages
temperature : float, optional
Sampling temperature for response generation. Defaults to 0.8.
vertexai : bool, optional
Whether to use VertexAI for the API. Defaults to False. project (str, optional): The project id to use for the API. Defaults to None. (for vertexai) location (str, optional): The location to use for the API. Defaults to None. (for vertexai)
candidate_count : int, optional
The number of candidate responses to generate. Defaults to 1.
top_p : float, optional
The top-p value for response generation
top_k : int, optional
The top-k value for response generation
presence_penalty : float, optional
The presence penalty for response generation
frequency_penalty : float, optional
The frequency penalty for response generation
input_audio_transcription : AudioTranscriptionConfig | None, optional
The configuration for input audio transcription. Defaults to None.)
output_audio_transcription : AudioTranscriptionConfig | None, optional
The configuration for output audio transcription. Defaults to AudioTranscriptionConfig().
image_encode_options : images.EncodeOptions, optional
The configuration for image encoding. Defaults to DEFAULT_ENCODE_OPTIONS.

Raises

ValueError
If the API key is required but not found.

Ancestors

  • livekit.agents.llm.realtime.RealtimeModel

Methods

async def aclose(self) ‑> None
Expand source code
async def aclose(self) -> None:
    pass
def session(self) ‑> RealtimeSession
Expand source code
def session(self) -> RealtimeSession:
    sess = RealtimeSession(self)
    self._sessions.add(sess)
    return sess
def update_options(self,
*,
voice: NotGivenOr[str] = NOT_GIVEN,
temperature: NotGivenOr[float] = NOT_GIVEN) ‑> None
Expand source code
def update_options(
    self, *, voice: NotGivenOr[str] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN
) -> None:
    if is_given(voice):
        self._opts.voice = voice

    if is_given(temperature):
        self._opts.temperature = temperature

    for sess in self._sessions:
        sess.update_options(voice=self._opts.voice, temperature=self._opts.temperature)