Module livekit.plugins.google.beta.realtime
Sub-modules
livekit.plugins.google.beta.realtime.api_proto
livekit.plugins.google.beta.realtime.realtime_api
Classes
class RealtimeModel (*,
instructions: NotGivenOr[str] = NOT_GIVEN,
model: NotGivenOr[LiveAPIModels | str] = NOT_GIVEN,
api_key: NotGivenOr[str] = NOT_GIVEN,
voice: Voice | str = 'Puck',
language: NotGivenOr[str] = NOT_GIVEN,
modalities: NotGivenOr[list[Modality]] = NOT_GIVEN,
vertexai: NotGivenOr[bool] = NOT_GIVEN,
project: NotGivenOr[str] = NOT_GIVEN,
location: NotGivenOr[str] = NOT_GIVEN,
candidate_count: int = 1,
temperature: NotGivenOr[float] = NOT_GIVEN,
max_output_tokens: NotGivenOr[int] = NOT_GIVEN,
top_p: NotGivenOr[float] = NOT_GIVEN,
top_k: NotGivenOr[int] = NOT_GIVEN,
presence_penalty: NotGivenOr[float] = NOT_GIVEN,
frequency_penalty: NotGivenOr[float] = NOT_GIVEN,
input_audio_transcription: NotGivenOr[AudioTranscriptionConfig | None] = NOT_GIVEN,
output_audio_transcription: NotGivenOr[AudioTranscriptionConfig | None] = NOT_GIVEN,
image_encode_options: NotGivenOr[images.EncodeOptions] = NOT_GIVEN)-
Expand source code
class RealtimeModel(llm.RealtimeModel): def __init__( self, *, instructions: NotGivenOr[str] = NOT_GIVEN, model: NotGivenOr[LiveAPIModels | str] = NOT_GIVEN, api_key: NotGivenOr[str] = NOT_GIVEN, voice: Voice | str = "Puck", language: NotGivenOr[str] = NOT_GIVEN, modalities: NotGivenOr[list[Modality]] = NOT_GIVEN, vertexai: NotGivenOr[bool] = NOT_GIVEN, project: NotGivenOr[str] = NOT_GIVEN, location: NotGivenOr[str] = NOT_GIVEN, candidate_count: int = 1, temperature: NotGivenOr[float] = NOT_GIVEN, max_output_tokens: NotGivenOr[int] = NOT_GIVEN, top_p: NotGivenOr[float] = NOT_GIVEN, top_k: NotGivenOr[int] = NOT_GIVEN, presence_penalty: NotGivenOr[float] = NOT_GIVEN, frequency_penalty: NotGivenOr[float] = NOT_GIVEN, input_audio_transcription: NotGivenOr[AudioTranscriptionConfig | None] = NOT_GIVEN, output_audio_transcription: NotGivenOr[AudioTranscriptionConfig | None] = NOT_GIVEN, image_encode_options: NotGivenOr[images.EncodeOptions] = NOT_GIVEN, ) -> None: """ Initializes a RealtimeModel instance for interacting with Google's Realtime API. Environment Requirements: - For VertexAI: Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the service account key file or use any of the other Google Cloud auth methods. The Google Cloud project and location can be set via `project` and `location` arguments or the environment variables `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION`. By default, the project is inferred from the service account key file, and the location defaults to "us-central1". - For Google Gemini API: Set the `api_key` argument or the `GOOGLE_API_KEY` environment variable. Args: instructions (str, optional): Initial system instructions for the model. Defaults to "". api_key (str, optional): Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY. modalities (list[Modality], optional): Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"]. model (str, optional): The name of the model to use. Defaults to "gemini-2.0-flash-live-001" or "gemini-2.0-flash-exp" (vertexai). voice (api_proto.Voice, optional): Voice setting for audio outputs. Defaults to "Puck". language (str, optional): The language(BCP-47 Code) to use for the API. supported languages - https://ai.google.dev/gemini-api/docs/live#supported-languages temperature (float, optional): Sampling temperature for response generation. Defaults to 0.8. vertexai (bool, optional): Whether to use VertexAI for the API. Defaults to False. project (str, optional): The project id to use for the API. Defaults to None. (for vertexai) location (str, optional): The location to use for the API. Defaults to None. (for vertexai) candidate_count (int, optional): The number of candidate responses to generate. Defaults to 1. top_p (float, optional): The top-p value for response generation top_k (int, optional): The top-k value for response generation presence_penalty (float, optional): The presence penalty for response generation frequency_penalty (float, optional): The frequency penalty for response generation input_audio_transcription (AudioTranscriptionConfig | None, optional): The configuration for input audio transcription. Defaults to None.) output_audio_transcription (AudioTranscriptionConfig | None, optional): The configuration for output audio transcription. Defaults to AudioTranscriptionConfig(). image_encode_options (images.EncodeOptions, optional): The configuration for image encoding. Defaults to DEFAULT_ENCODE_OPTIONS. Raises: ValueError: If the API key is required but not found. """ # noqa: E501 if not is_given(input_audio_transcription): input_audio_transcription = AudioTranscriptionConfig() if not is_given(output_audio_transcription): output_audio_transcription = AudioTranscriptionConfig() super().__init__( capabilities=llm.RealtimeCapabilities( message_truncation=False, turn_detection=True, user_transcription=input_audio_transcription is not None, auto_tool_reply_generation=True, ) ) if not is_given(model): if vertexai: model = "gemini-2.0-flash-exp" else: model = "gemini-2.0-flash-live-001" gemini_api_key = api_key if is_given(api_key) else os.environ.get("GOOGLE_API_KEY") gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT") gcp_location = ( location if is_given(location) else os.environ.get("GOOGLE_CLOUD_LOCATION") or "us-central1" ) use_vertexai = ( vertexai if is_given(vertexai) else os.environ.get("GOOGLE_GENAI_USE_VERTEXAI", "0").lower() in ["true", "1"] ) if use_vertexai: if not gcp_project or not gcp_location: raise ValueError( "Project is required for VertexAI via project kwarg or GOOGLE_CLOUD_PROJECT environment variable" # noqa: E501 ) gemini_api_key = None # VertexAI does not require an API key else: gcp_project = None gcp_location = None if not gemini_api_key: raise ValueError( "API key is required for Google API either via api_key or GOOGLE_API_KEY environment variable" # noqa: E501 ) self._opts = _RealtimeOptions( model=model, api_key=gemini_api_key, voice=voice, response_modalities=modalities, vertexai=use_vertexai, project=gcp_project, location=gcp_location, candidate_count=candidate_count, temperature=temperature, max_output_tokens=max_output_tokens, top_p=top_p, top_k=top_k, presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, instructions=instructions, input_audio_transcription=input_audio_transcription, output_audio_transcription=output_audio_transcription, language=language, image_encode_options=image_encode_options, ) self._sessions = weakref.WeakSet[RealtimeSession]() def session(self) -> RealtimeSession: sess = RealtimeSession(self) self._sessions.add(sess) return sess def update_options( self, *, voice: NotGivenOr[str] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN ) -> None: if is_given(voice): self._opts.voice = voice if is_given(temperature): self._opts.temperature = temperature for sess in self._sessions: sess.update_options(voice=self._opts.voice, temperature=self._opts.temperature) async def aclose(self) -> None: pass
Initializes a RealtimeModel instance for interacting with Google's Realtime API.
Environment Requirements: - For VertexAI: Set the
GOOGLE_APPLICATION_CREDENTIALS
environment variable to the path of the service account key file or use any of the other Google Cloud auth methods. The Google Cloud project and location can be set viaproject
andlocation
arguments or the environment variablesGOOGLE_CLOUD_PROJECT
andGOOGLE_CLOUD_LOCATION
. By default, the project is inferred from the service account key file, and the location defaults to "us-central1". - For Google Gemini API: Set theapi_key
argument or theGOOGLE_API_KEY
environment variable.Args
instructions
:str
, optional- Initial system instructions for the model. Defaults to "".
api_key
:str
, optional- Google Gemini API key. If None, will attempt to read from the environment variable GOOGLE_API_KEY.
modalities
:list[Modality]
, optional- Modalities to use, such as ["TEXT", "AUDIO"]. Defaults to ["AUDIO"].
model
:str
, optional- The name of the model to use. Defaults to "gemini-2.0-flash-live-001" or "gemini-2.0-flash-exp" (vertexai).
voice
:api_proto.Voice
, optional- Voice setting for audio outputs. Defaults to "Puck".
language
:str
, optional- The language(BCP-47 Code) to use for the API. supported languages - https://ai.google.dev/gemini-api/docs/live#supported-languages
temperature
:float
, optional- Sampling temperature for response generation. Defaults to 0.8.
vertexai
:bool
, optional- Whether to use VertexAI for the API. Defaults to False. project (str, optional): The project id to use for the API. Defaults to None. (for vertexai) location (str, optional): The location to use for the API. Defaults to None. (for vertexai)
candidate_count
:int
, optional- The number of candidate responses to generate. Defaults to 1.
top_p
:float
, optional- The top-p value for response generation
top_k
:int
, optional- The top-k value for response generation
presence_penalty
:float
, optional- The presence penalty for response generation
frequency_penalty
:float
, optional- The frequency penalty for response generation
input_audio_transcription
:AudioTranscriptionConfig | None
, optional- The configuration for input audio transcription. Defaults to None.)
output_audio_transcription
:AudioTranscriptionConfig | None
, optional- The configuration for output audio transcription. Defaults to AudioTranscriptionConfig().
image_encode_options
:images.EncodeOptions
, optional- The configuration for image encoding. Defaults to DEFAULT_ENCODE_OPTIONS.
Raises
ValueError
- If the API key is required but not found.
Ancestors
- livekit.agents.llm.realtime.RealtimeModel
Methods
async def aclose(self) ‑> None
-
Expand source code
async def aclose(self) -> None: pass
def session(self) ‑> RealtimeSession
-
Expand source code
def session(self) -> RealtimeSession: sess = RealtimeSession(self) self._sessions.add(sess) return sess
def update_options(self,
*,
voice: NotGivenOr[str] = NOT_GIVEN,
temperature: NotGivenOr[float] = NOT_GIVEN) ‑> None-
Expand source code
def update_options( self, *, voice: NotGivenOr[str] = NOT_GIVEN, temperature: NotGivenOr[float] = NOT_GIVEN ) -> None: if is_given(voice): self._opts.voice = voice if is_given(temperature): self._opts.temperature = temperature for sess in self._sessions: sess.update_options(voice=self._opts.voice, temperature=self._opts.temperature)