Module `livekit.plugins.google.beta.realtime`

Sub-modules

livekit.plugins.google.beta.realtime.api_proto
livekit.plugins.google.beta.realtime.realtime_api
livekit.plugins.google.beta.realtime.transcriber

Classes

class RealtimeModel (*, instructions: str | None = None, model: LiveAPIModels | str = 'gemini-2.0-flash-exp', api_key: str | None = None, api_version: str = 'v1alpha', voice: Voice | str = 'Puck', modalities: list[Modality] = [<Modality.AUDIO: 'AUDIO'>], enable_user_audio_transcription: bool = True, enable_agent_audio_transcription: bool = True, vertexai: bool = False, project: str | None = None, location: str | None = None, candidate_count: int = 1, temperature: float | None = None, max_output_tokens: int | None = None, top_p: float | None = None, top_k: int | None = None, presence_penalty: float | None = None, frequency_penalty: float | None = None, loop: asyncio.AbstractEventLoop | None = None)

Expand source code

class RealtimeModel:
    def __init__(
        self,
        *,
        instructions: str | None = None,
        model: LiveAPIModels | str = "gemini-2.0-flash-exp",
        api_key: str | None = None,
        api_version: str = "v1alpha",
        voice: Voice | str = "Puck",
        modalities: list[Modality] = [Modality.AUDIO],
        enable_user_audio_transcription: bool = True,
        enable_agent_audio_transcription: bool = True,
        vertexai: bool = False,
        project: str | None = None,
        location: str | None = None,
        candidate_count: int = 1,
        temperature: float | None = None,
        max_output_tokens: int | None = None,
        top_p: float | None = None,
        top_k: int | None = None,
        presence_penalty: float | None = None,
        frequency_penalty: float | None = None,
        loop: asyncio.AbstractEventLoop | None = None,
    ):
        """
        Initializes a RealtimeModel instance for interacting with Google's Realtime API.

        Environment Requirements:
        - For VertexAI: Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the service account key file.
        The Google Cloud project and location can be set via `project` and `location` arguments or the environment variables
        `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION`. By default, the project is inferred from the service account key file,
        and the location defaults to "us-central1".
        - For Google Gemini API: Set the `api_key` argument or the `GOOGLE_API_KEY` environment variable.

        Args:
            instructions (str, optional): Initial system instructions for the model. Defaults to "".
            api_key (str or None, optional): Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
            api_version (str, optional): The version of the API to use. Defaults to "v1alpha".
            modalities (list[Modality], optional): Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
            model (str or None, optional): The name of the model to use. Defaults to "gemini-2.0-flash-exp".
            voice (api_proto.Voice, optional): Voice setting for audio outputs. Defaults to "Puck".
            enable_user_audio_transcription (bool, optional): Whether to enable user audio transcription. Defaults to True
            enable_agent_audio_transcription (bool, optional): Whether to enable agent audio transcription. Defaults to True
            temperature (float, optional): Sampling temperature for response generation. Defaults to 0.8.
            vertexai (bool, optional): Whether to use VertexAI for the API. Defaults to False.
                project (str or None, optional): The project id to use for the API. Defaults to None. (for vertexai)
                location (str or None, optional): The location to use for the API. Defaults to None. (for vertexai)
            candidate_count (int, optional): The number of candidate responses to generate. Defaults to 1.
            top_p (float, optional): The top-p value for response generation
            top_k (int, optional): The top-k value for response generation
            presence_penalty (float, optional): The presence penalty for response generation
            frequency_penalty (float, optional): The frequency penalty for response generation
            loop (asyncio.AbstractEventLoop or None, optional): Event loop to use for async operations. If None, the current event loop is used.

        Raises:
            ValueError: If the API key is not provided and cannot be found in environment variables.
        """
        super().__init__()
        self._capabilities = Capabilities(
            supports_truncate=False,
            input_audio_sample_rate=16000,
        )
        self._model = model
        self._loop = loop or asyncio.get_event_loop()
        self._api_key = api_key or os.environ.get("GOOGLE_API_KEY")
        self._project = project or os.environ.get("GOOGLE_CLOUD_PROJECT")
        self._location = location or os.environ.get("GOOGLE_CLOUD_LOCATION")
        if vertexai:
            if not self._project or not self._location:
                raise ValueError(
                    "Project and location are required for VertexAI either via project and location or GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION environment variables"
                )
            self._api_key = None  # VertexAI does not require an API key

        else:
            self._project = None
            self._location = None
            if not self._api_key:
                raise ValueError(
                    "API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable"
                )

        instructions_content = (
            Content(parts=[Part(text=instructions)]) if instructions else None
        )

        self._rt_sessions: list[GeminiRealtimeSession] = []
        self._opts = ModelOptions(
            model=model,
            api_version=api_version,
            api_key=self._api_key,
            voice=voice,
            enable_user_audio_transcription=enable_user_audio_transcription,
            enable_agent_audio_transcription=enable_agent_audio_transcription,
            response_modalities=modalities,
            vertexai=vertexai,
            project=self._project,
            location=self._location,
            candidate_count=candidate_count,
            temperature=temperature,
            max_output_tokens=max_output_tokens,
            top_p=top_p,
            top_k=top_k,
            presence_penalty=presence_penalty,
            frequency_penalty=frequency_penalty,
            instructions=instructions_content,
        )

    @property
    def sessions(self) -> list[GeminiRealtimeSession]:
        return self._rt_sessions

    @property
    def capabilities(self) -> Capabilities:
        return self._capabilities

    def session(
        self,
        *,
        chat_ctx: llm.ChatContext | None = None,
        fnc_ctx: llm.FunctionContext | None = None,
    ) -> GeminiRealtimeSession:
        session = GeminiRealtimeSession(
            opts=self._opts,
            chat_ctx=chat_ctx or llm.ChatContext(),
            fnc_ctx=fnc_ctx,
            loop=self._loop,
        )
        self._rt_sessions.append(session)

        return session

    async def aclose(self) -> None:
        for session in self._rt_sessions:
            await session.aclose()

Initializes a RealtimeModel instance for interacting with Google's Realtime API.

Environment Requirements: - For VertexAI: Set the GOOGLE_APPLICATION_CREDENTIALS environment variable to the path of the service account key file. The Google Cloud project and location can be set via project and location arguments or the environment variables GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION. By default, the project is inferred from the service account key file, and the location defaults to "us-central1". - For Google Gemini API: Set the api_key argument or the GOOGLE_API_KEY environment variable.

Args

instructions : str, optional: Initial system instructions for the model. Defaults to "".
api_key : str or None, optional: Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
api_version : str, optional: The version of the API to use. Defaults to "v1alpha".
modalities : list[Modality], optional: Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
model : str or None, optional: The name of the model to use. Defaults to "gemini-2.0-flash-exp".
voice : api_proto.Voice, optional: Voice setting for audio outputs. Defaults to "Puck".
enable_user_audio_transcription : bool, optional: Whether to enable user audio transcription. Defaults to True
enable_agent_audio_transcription : bool, optional: Whether to enable agent audio transcription. Defaults to True
temperature : float, optional: Sampling temperature for response generation. Defaults to 0.8.
vertexai : bool, optional: Whether to use VertexAI for the API. Defaults to False. project (str or None, optional): The project id to use for the API. Defaults to None. (for vertexai) location (str or None, optional): The location to use for the API. Defaults to None. (for vertexai)
candidate_count : int, optional: The number of candidate responses to generate. Defaults to 1.
top_p : float, optional: The top-p value for response generation
top_k : int, optional: The top-k value for response generation
presence_penalty : float, optional: The presence penalty for response generation
frequency_penalty : float, optional: The frequency penalty for response generation
loop : asyncio.AbstractEventLoop or None, optional: Event loop to use for async operations. If None, the current event loop is used.

Raises

ValueError: If the API key is not provided and cannot be found in environment variables.

Instance variables

prop capabilities : Capabilities

Expand source code

@property
def capabilities(self) -> Capabilities:
    return self._capabilities

prop sessions : list[GeminiRealtimeSession]

Expand source code

@property
def sessions(self) -> list[GeminiRealtimeSession]:
    return self._rt_sessions

Methods

async def aclose(self) ‑> None

Expand source code

async def aclose(self) -> None:
    for session in self._rt_sessions:
        await session.aclose()

def session(self, *, chat_ctx: llm.ChatContext | None = None, fnc_ctx: llm.FunctionContext | None = None) ‑> GeminiRealtimeSession

Expand source code

def session(
    self,
    *,
    chat_ctx: llm.ChatContext | None = None,
    fnc_ctx: llm.FunctionContext | None = None,
) -> GeminiRealtimeSession:
    session = GeminiRealtimeSession(
        opts=self._opts,
        chat_ctx=chat_ctx or llm.ChatContext(),
        fnc_ctx=fnc_ctx,
        loop=self._loop,
    )
    self._rt_sessions.append(session)

    return session