This example shows how to build a simple voice-to-voice translator: listen in English, translate with an LLM, and speak the result in French with ElevenLabs TTS. Instead of using LiveKit Inference, this example uses agent plugins to connect directly to OpenAI and ElevenLabs.
Prerequisites
- Add a
.envin this directory with your credentials:LIVEKIT_URL=your_livekit_urlLIVEKIT_API_KEY=your_api_keyLIVEKIT_API_SECRET=your_api_secretOPENAI_API_KEY=your_api_keyELEVENLABS_API_KEY=your_api_keyDEEPGRAM_API_KEY=your_api_key - Install dependencies:pip install "livekit-agents[silero,openai,elevenlabs,deepgram]" python-dotenv
Load environment, logging, and define an AgentServer
Load your .env and set up logging to trace translation events.
import loggingfrom dotenv import load_dotenvfrom livekit.agents import JobContext, JobProcess, AgentServer, cli, Agent, AgentSessionfrom livekit.plugins import openai, silero, deepgram, elevenlabsload_dotenv()logger = logging.getLogger("pipeline-translator")logger.setLevel(logging.INFO)server = AgentServer()
Define the translation agent
Keep the agent lightweight with focused instructions: always translate from English to French and respond only with the translation.
import loggingfrom dotenv import load_dotenvfrom livekit.agents import JobContext, JobProcess, AgentServer, cli, Agent, AgentSessionfrom livekit.plugins import openai, silero, deepgram, elevenlabsload_dotenv()logger = logging.getLogger("pipeline-translator")logger.setLevel(logging.INFO)server = AgentServer()class TranslatorAgent(Agent):def __init__(self) -> None:super().__init__(instructions="""You are a translator. You translate the user's speech from English to French.Every message you receive, translate it directly into French.Do not respond with anything else but the translation.""")async def on_enter(self):self.session.generate_reply()
Prewarm VAD for faster connections
Preload the VAD model once per process to reduce connection latency.
import loggingfrom dotenv import load_dotenvfrom livekit.agents import JobContext, JobProcess, AgentServer, cli, Agent, AgentSessionfrom livekit.plugins import openai, silero, deepgram, elevenlabsload_dotenv()logger = logging.getLogger("pipeline-translator")logger.setLevel(logging.INFO)server = AgentServer()class TranslatorAgent(Agent):def __init__(self) -> None:super().__init__(instructions="""You are a translator. You translate the user's speech from English to French.Every message you receive, translate it directly into French.Do not respond with anything else but the translation.""")async def on_enter(self):self.session.generate_reply()def prewarm(proc: JobProcess):proc.userdata["vad"] = silero.VAD.load()server.setup_fnc = prewarm
Define the rtc session with translation pipeline
Create the session with Deepgram STT, OpenAI LLM, and ElevenLabs multilingual TTS for French output.
import loggingfrom dotenv import load_dotenvfrom livekit.agents import JobContext, JobProcess, AgentServer, cli, Agent, AgentSessionfrom livekit.plugins import openai, silero, deepgram, elevenlabsload_dotenv()logger = logging.getLogger("pipeline-translator")logger.setLevel(logging.INFO)server = AgentServer()class TranslatorAgent(Agent):def __init__(self) -> None:super().__init__(instructions="""You are a translator. You translate the user's speech from English to French.Every message you receive, translate it directly into French.Do not respond with anything else but the translation.""")async def on_enter(self):self.session.generate_reply()def prewarm(proc: JobProcess):proc.userdata["vad"] = silero.VAD.load()server.setup_fnc = prewarm@server.rtc_session()async def entrypoint(ctx: JobContext):ctx.log_context_fields = {"room": ctx.room.name}session = AgentSession(stt=deepgram.STT(),llm=openai.LLM(),tts=elevenlabs.TTS(model="eleven_multilingual_v2"),vad=ctx.proc.userdata["vad"],preemptive_generation=True,)await session.start(agent=TranslatorAgent(), room=ctx.room)await ctx.connect()
import loggingfrom dotenv import load_dotenvfrom livekit.agents import JobContext, JobProcess, AgentServer, cli, Agent, AgentSessionfrom livekit.plugins import openai, silero, deepgram, elevenlabsload_dotenv()logger = logging.getLogger("pipeline-translator")logger.setLevel(logging.INFO)server = AgentServer()class TranslatorAgent(Agent):def __init__(self) -> None:super().__init__(instructions="""You are a translator. You translate the user's speech from English to French.Every message you receive, translate it directly into French.Do not respond with anything else but the translation.""")async def on_enter(self):self.session.generate_reply()def prewarm(proc: JobProcess):proc.userdata["vad"] = silero.VAD.load()server.setup_fnc = prewarm@server.rtc_session()async def entrypoint(ctx: JobContext):ctx.log_context_fields = {"room": ctx.room.name}session = AgentSession(stt=deepgram.STT(),llm=openai.LLM(),tts=elevenlabs.TTS(model="eleven_multilingual_v2"),vad=ctx.proc.userdata["vad"],preemptive_generation=True,)await session.start(agent=TranslatorAgent(), room=ctx.room)await ctx.connect()if __name__ == "__main__":cli.run_app(server)
Run it
python pipeline_translator.py console
How it works
- Deepgram handles English speech-to-text transcription.
- OpenAI generates a French translation from the transcript.
- ElevenLabs multilingual TTS speaks the translated text in French.
- Silero VAD controls turn-taking between user and agent.
- The agent triggers an initial response on entry so the user hears French output immediately.
Full example
import loggingfrom dotenv import load_dotenvfrom livekit.agents import JobContext, JobProcess, AgentServer, cli, Agent, AgentSessionfrom livekit.plugins import openai, silero, deepgram, elevenlabsload_dotenv()logger = logging.getLogger("pipeline-translator")logger.setLevel(logging.INFO)class TranslatorAgent(Agent):def __init__(self) -> None:super().__init__(instructions="""You are a translator. You translate the user's speech from English to French.Every message you receive, translate it directly into French.Do not respond with anything else but the translation.""")async def on_enter(self):self.session.generate_reply()server = AgentServer()def prewarm(proc: JobProcess):proc.userdata["vad"] = silero.VAD.load()server.setup_fnc = prewarm@server.rtc_session()async def entrypoint(ctx: JobContext):ctx.log_context_fields = {"room": ctx.room.name}session = AgentSession(stt=deepgram.STT(),llm=openai.LLM(),tts=elevenlabs.TTS(model="eleven_multilingual_v2"),vad=ctx.proc.userdata["vad"],preemptive_generation=True,)await session.start(agent=TranslatorAgent(), room=ctx.room)await ctx.connect()if __name__ == "__main__":cli.run_app(server)