Module livekit.plugins.google.beta.realtime
Sub-modules
livekit.plugins.google.beta.realtime.api_proto
livekit.plugins.google.beta.realtime.realtime_api
livekit.plugins.google.beta.realtime.transcriber
Classes
class RealtimeModel (*,
instructions: str | None = None,
model: LiveAPIModels | str = 'gemini-2.0-flash-exp',
api_key: str | None = None,
api_version: str = 'v1alpha',
voice: Voice | str = 'Puck',
modalities: list[Modality] = [<Modality.AUDIO: 'AUDIO'>],
enable_user_audio_transcription: bool = True,
enable_agent_audio_transcription: bool = True,
vertexai: bool = False,
project: str | None = None,
location: str | None = None,
candidate_count: int = 1,
temperature: float | None = None,
max_output_tokens: int | None = None,
top_p: float | None = None,
top_k: int | None = None,
presence_penalty: float | None = None,
frequency_penalty: float | None = None,
loop: asyncio.AbstractEventLoop | None = None)-
Expand source code
class RealtimeModel: def __init__( self, *, instructions: str | None = None, model: LiveAPIModels | str = "gemini-2.0-flash-exp", api_key: str | None = None, api_version: str = "v1alpha", voice: Voice | str = "Puck", modalities: list[Modality] = [Modality.AUDIO], enable_user_audio_transcription: bool = True, enable_agent_audio_transcription: bool = True, vertexai: bool = False, project: str | None = None, location: str | None = None, candidate_count: int = 1, temperature: float | None = None, max_output_tokens: int | None = None, top_p: float | None = None, top_k: int | None = None, presence_penalty: float | None = None, frequency_penalty: float | None = None, loop: asyncio.AbstractEventLoop | None = None, ): """ Initializes a RealtimeModel instance for interacting with Google's Realtime API. Environment Requirements: - For VertexAI: Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the service account key file. The Google Cloud project and location can be set via `project` and `location` arguments or the environment variables `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION`. By default, the project is inferred from the service account key file, and the location defaults to "us-central1". - For Google Gemini API: Set the `api_key` argument or the `GOOGLE_API_KEY` environment variable. Args: instructions (str, optional): Initial system instructions for the model. Defaults to "". api_key (str or None, optional): Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY. api_version (str, optional): The version of the API to use. Defaults to "v1alpha". modalities (list[Modality], optional): Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"]. model (str or None, optional): The name of the model to use. Defaults to "gemini-2.0-flash-exp". voice (api_proto.Voice, optional): Voice setting for audio outputs. Defaults to "Puck". enable_user_audio_transcription (bool, optional): Whether to enable user audio transcription. Defaults to True enable_agent_audio_transcription (bool, optional): Whether to enable agent audio transcription. Defaults to True temperature (float, optional): Sampling temperature for response generation. Defaults to 0.8. vertexai (bool, optional): Whether to use VertexAI for the API. Defaults to False. project (str or None, optional): The project id to use for the API. Defaults to None. (for vertexai) location (str or None, optional): The location to use for the API. Defaults to None. (for vertexai) candidate_count (int, optional): The number of candidate responses to generate. Defaults to 1. top_p (float, optional): The top-p value for response generation top_k (int, optional): The top-k value for response generation presence_penalty (float, optional): The presence penalty for response generation frequency_penalty (float, optional): The frequency penalty for response generation loop (asyncio.AbstractEventLoop or None, optional): Event loop to use for async operations. If None, the current event loop is used. Raises: ValueError: If the API key is not provided and cannot be found in environment variables. """ super().__init__() self._capabilities = Capabilities( supports_truncate=False, input_audio_sample_rate=16000, ) self._model = model self._loop = loop or asyncio.get_event_loop() self._api_key = api_key or os.environ.get("GOOGLE_API_KEY") self._project = project or os.environ.get("GOOGLE_CLOUD_PROJECT") self._location = location or os.environ.get("GOOGLE_CLOUD_LOCATION") if vertexai: if not self._project or not self._location: raise ValueError( "Project and location are required for VertexAI either via project and location or GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION environment variables" ) self._api_key = None # VertexAI does not require an API key else: self._project = None self._location = None if not self._api_key: raise ValueError( "API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable" ) instructions_content = ( Content(parts=[Part(text=instructions)]) if instructions else None ) self._rt_sessions: list[GeminiRealtimeSession] = [] self._opts = ModelOptions( model=model, api_version=api_version, api_key=self._api_key, voice=voice, enable_user_audio_transcription=enable_user_audio_transcription, enable_agent_audio_transcription=enable_agent_audio_transcription, response_modalities=modalities, vertexai=vertexai, project=self._project, location=self._location, candidate_count=candidate_count, temperature=temperature, max_output_tokens=max_output_tokens, top_p=top_p, top_k=top_k, presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, instructions=instructions_content, ) @property def sessions(self) -> list[GeminiRealtimeSession]: return self._rt_sessions @property def capabilities(self) -> Capabilities: return self._capabilities def session( self, *, chat_ctx: llm.ChatContext | None = None, fnc_ctx: llm.FunctionContext | None = None, ) -> GeminiRealtimeSession: session = GeminiRealtimeSession( opts=self._opts, chat_ctx=chat_ctx or llm.ChatContext(), fnc_ctx=fnc_ctx, loop=self._loop, ) self._rt_sessions.append(session) return session async def aclose(self) -> None: for session in self._rt_sessions: await session.aclose()
Initializes a RealtimeModel instance for interacting with Google's Realtime API.
Environment Requirements: - For VertexAI: Set the
GOOGLE_APPLICATION_CREDENTIALS
environment variable to the path of the service account key file. The Google Cloud project and location can be set viaproject
andlocation
arguments or the environment variablesGOOGLE_CLOUD_PROJECT
andGOOGLE_CLOUD_LOCATION
. By default, the project is inferred from the service account key file, and the location defaults to "us-central1". - For Google Gemini API: Set theapi_key
argument or theGOOGLE_API_KEY
environment variable.Args
instructions
:str
, optional- Initial system instructions for the model. Defaults to "".
api_key
:str
orNone
, optional- Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
api_version
:str
, optional- The version of the API to use. Defaults to "v1alpha".
modalities
:list[Modality]
, optional- Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
model
:str
orNone
, optional- The name of the model to use. Defaults to "gemini-2.0-flash-exp".
voice
:api_proto.Voice
, optional- Voice setting for audio outputs. Defaults to "Puck".
enable_user_audio_transcription
:bool
, optional- Whether to enable user audio transcription. Defaults to True
enable_agent_audio_transcription
:bool
, optional- Whether to enable agent audio transcription. Defaults to True
temperature
:float
, optional- Sampling temperature for response generation. Defaults to 0.8.
vertexai
:bool
, optional- Whether to use VertexAI for the API. Defaults to False. project (str or None, optional): The project id to use for the API. Defaults to None. (for vertexai) location (str or None, optional): The location to use for the API. Defaults to None. (for vertexai)
candidate_count
:int
, optional- The number of candidate responses to generate. Defaults to 1.
top_p
:float
, optional- The top-p value for response generation
top_k
:int
, optional- The top-k value for response generation
presence_penalty
:float
, optional- The presence penalty for response generation
frequency_penalty
:float
, optional- The frequency penalty for response generation
loop
:asyncio.AbstractEventLoop
orNone
, optional- Event loop to use for async operations. If None, the current event loop is used.
Raises
ValueError
- If the API key is not provided and cannot be found in environment variables.
Instance variables
prop capabilities : Capabilities
-
Expand source code
@property def capabilities(self) -> Capabilities: return self._capabilities
prop sessions : list[GeminiRealtimeSession]
-
Expand source code
@property def sessions(self) -> list[GeminiRealtimeSession]: return self._rt_sessions
Methods
async def aclose(self) ‑> None
-
Expand source code
async def aclose(self) -> None: for session in self._rt_sessions: await session.aclose()
def session(self,
*,
chat_ctx: llm.ChatContext | None = None,
fnc_ctx: llm.FunctionContext | None = None) ‑> GeminiRealtimeSession-
Expand source code
def session( self, *, chat_ctx: llm.ChatContext | None = None, fnc_ctx: llm.FunctionContext | None = None, ) -> GeminiRealtimeSession: session = GeminiRealtimeSession( opts=self._opts, chat_ctx=chat_ctx or llm.ChatContext(), fnc_ctx=fnc_ctx, loop=self._loop, ) self._rt_sessions.append(session) return session