LiveKit docs › Multimodality › Images & video › Images

---

# Images

> Add images to your agent's context, receive images from the frontend, and send images back to users.

## Overview

LiveKit Agents supports images as both input and output. On the input side, you can add images to your agent's chat context, receive images from the frontend, or sample video frames. On the output side, you can send images to the frontend using [byte streams](https://docs.livekit.io/transport/data/byte-streams.md).

## Image input

The agent's [chat context](https://docs.livekit.io/agents/logic/chat-context.md) supports images as well as text. You can add as many images as you want to the chat context, but keep in mind that larger context windows contribute to slow response times.

To add an image to the chat context, create an `ImageContent` object and include it in a chat message. The image content can be a base64 data URL, an external URL, or a frame from a [video track](https://docs.livekit.io/transport/media.md).

### Load into initial context

The following example shows an agent initialized with an image at startup. This example uses an external URL, but you can modify it to load a local file using a base64 data URL instead:

** Filename: `agent.py`**

```python
def entrypoint(ctx: JobContext):
    # ctx.connect, etc.

    session = AgentSession(
        # ... stt, tts, llm, etc.
    )

    initial_ctx = ChatContext()
    initial_ctx.add_message(
        role="user",
        content=[
            "Here is a picture of me",
            ImageContent(image="https://example.com/image.jpg")
        ],
    )

    await session.start(
        room=ctx.room,
        agent=Agent(chat_ctx=initial_ctx,),
        # ... room_options, etc.
    )

```

** Filename: `Required imports`**

```python
from livekit.agents.llm import ImageContent
from livekit.agents import Agent, AgentSession, ChatContext, JobContext

```

** Filename: `agent.ts`**

```typescript
export default defineAgent({
  entry: async (ctx: JobContext) => {
    // await ctx.connect(), etc

    const initialCtx = llm.ChatContext.empty();

    initialCtx.addMessage({
      role: 'user',
      content: [
        'Here is a picture of me',
        llm.createImageContent({
          image: 'https://example.com/image.jpg',
        }),
      ],
    });

    const agent = new voice.Agent({
      instructions: 'You are a helpful voice AI assistant.',
      chatCtx: initialCtx,
    });

    const session = new voice.AgentSession({
      // ... stt, tts, llm, etc.
    });

    await session.start({
      room: ctx.room,
      agent,
      // ... inputOptions, etc.
    });
  },
});

```

** Filename: `Required imports`**

```typescript
import { type JobContext, defineAgent, llm, voice } from '@livekit/agents';

```

> 🔥 **LLM provider support for external URLs**
> 
> Not every provider supports external image URLs. Consult their documentation for details.

### Upload from frontend

To upload an image from your frontend app, use the [sendFile method](https://docs.livekit.io/transport/data/byte-streams.md#sending-files) of the LiveKit SDK. Add a byte stream handler to your agent to receive the image data and add it to the chat context. Here is a simple agent capable of receiving images from the user on the byte stream topic `"images"`:

** Filename: `agent.py`**

```python
class Assistant(Agent):
    def __init__(self) -> None:
        self._tasks = [] # Prevent garbage collection of running tasks
        super().__init__(instructions="You are a helpful voice AI assistant.")

    async def on_enter(self):
        def _image_received_handler(reader, participant_identity):
            task = asyncio.create_task(
                self._image_received(reader, participant_identity)
            )
            self._tasks.append(task)
            task.add_done_callback(lambda t: self._tasks.remove(t))

        # Add the handler when the agent joins
        get_job_context().room.register_byte_stream_handler("images", _image_received_handler)

    async def _image_received(self, reader, participant_identity):
        image_bytes = bytes()
        async for chunk in reader:
            image_bytes += chunk

        chat_ctx = self.chat_ctx.copy()

        # Encode the image to base64 and add it to the chat context
        chat_ctx.add_message(
            role="user",
            content=[
                ImageContent(
                    image=f"data:image/png;base64,{base64.b64encode(image_bytes).decode('utf-8')}"
                )
            ],
        )
        await self.update_chat_ctx(chat_ctx)

```

** Filename: `Required imports`**

```python
import asyncio
import base64
from livekit.agents import Agent, get_job_context
from livekit.agents.llm import ImageContent

```

** Filename: `agent.ts`**

```typescript
class Assistant extends voice.Agent {
  private tasks: Set<Task<void>> = new Set(); // Prevent garbage collection of running tasks

  constructor() {
    super({
      instructions: 'You are a helpful voice AI assistant.',
    });
  }

  async onEnter(): Promise<void> {
    // Register byte stream handler for receiving images
    getJobContext().room.registerByteStreamHandler('images', async (stream: ByteStreamReader) => {
      const task = Task.from((controller) => this.imageReceived(stream, controller));
      this.tasks.add(task);

      task.result.finally(() => {
        this.tasks.delete(task);
      });
    });
  }

  private async imageReceived(
    stream: ByteStreamReader,
    controller: AbortController,
  ): Promise<void> {
    const chunks: Uint8Array[] = [];

    // Read all chunks from the stream
    for await (const chunk of stream) {
      if (controller.signal.aborted) return;
      chunks.push(chunk);
    }

    // Combine all chunks into a single buffer
    const totalLength = chunks.reduce((sum, chunk) => sum + chunk.length, 0);
    const imageBytes = new Uint8Array(totalLength);
    let offset = 0;

    for (const chunk of chunks) {
      imageBytes.set(chunk, offset);
      offset += chunk.length;
    }

    const chatCtx = this.chatCtx.copy();

    // Encode the image to base64 and add it to the chat context
    const imageContent = llm.createImageContent({
      image: `data:image/png;base64,${Buffer.from(imageBytes).toString('base64')}`,
      inferenceDetail: 'auto',
    });

    chatCtx.addMessage({
      role: 'user',
      content: [imageContent],
    });

    if (controller.signal.aborted) return;
    await this.updateChatCtx(chatCtx);
  }
}

```

** Filename: `Required imports`**

```typescript
import { Task, getJobContext, llm, voice } from '@livekit/agents';
import type { ByteStreamReader } from '@livekit/rtc-node';

```

### Inference detail

If your LLM provider supports it, you can set the `inference_detail` parameter to `"high"` or `"low"` to control the token usage and inference quality applied. The default is `"auto"`, which uses the provider's default.

## Image output

Your agent can send images to the frontend using [byte streams](https://docs.livekit.io/transport/data/byte-streams.md). Use this to share generated images, diagrams, screenshots, or any other visual content from your agent to the user.

To send an image, use the `send_file` method on the room's local participant. The frontend receives the image by registering a byte stream handler for the same topic.

### Send an image from your agent

** Filename: `agent.py`**

```python
class Assistant(Agent):
    def __init__(self) -> None:
        super().__init__(instructions="You are a helpful voice AI assistant.")

    async def on_enter(self):
        room = get_job_context().room

        # Send an image file to the frontend
        await room.local_participant.send_file(
            file_path="path/to/image.png",
            topic="agent-images",
        )

```

** Filename: `Required imports`**

```python
from livekit.agents import Agent, get_job_context

```

** Filename: `agent.ts`**

```typescript
class Assistant extends voice.Agent {
  constructor() {
    super({
      instructions: 'You are a helpful voice AI assistant.',
    });
  }

  async onEnter(): Promise<void> {
    const room = getJobContext().room;

    // Send an image file to the frontend
    await room.localParticipant!.sendFile('path/to/image.png', {
      topic: 'agent-images',
    });
  }
}

```

** Filename: `Required imports`**

```typescript
import { getJobContext, voice } from '@livekit/agents';

```

### Receive images in your frontend

Register a byte stream handler in your frontend to receive images from the agent:

**JavaScript**:

```typescript
room.registerByteStreamHandler('agent-images', async (reader, participantInfo) => {
  const data = await reader.readAll();
  const blob = new Blob(data, { type: reader.info.mimeType });
  const url = URL.createObjectURL(blob);

  // Display the image in your UI
  const img = document.createElement('img');
  img.src = url;
  document.body.appendChild(img);
});

```

---

**Swift**:

```swift
try await room.registerByteStreamHandler(for: "agent-images") { reader, participantIdentity in
    let data = try await reader.readAll()

    // Display the image in your UI
    DispatchQueue.main.async {
        let image = UIImage(data: data)
        let imageView = UIImageView(image: image)
        self.view.addSubview(imageView)
    }
}

```

---

**Android**:

```kotlin
room.registerByteStreamHandler("agent-images") { reader, participantIdentity ->
    myCoroutineScope.launch {
        val chunks = reader.readAll()
        val bytes = chunks.fold(ByteArray(0)) { acc, chunk -> acc + chunk }

        // Display the image in your UI
        withContext(Dispatchers.Main) {
            val bitmap = BitmapFactory.decodeByteArray(bytes, 0, bytes.size)
            imageView.setImageBitmap(bitmap)
        }
    }
}

```

---

**Flutter**:

```dart
room.registerByteStreamHandler('agent-images',
    (ByteStreamReader reader, String participantIdentity) async {
  final chunks = await reader.readAll();
  final bytes = chunks.expand((chunk) => chunk).toList();

  // Display the image in your UI
  setState(() {
    imageBytes = Uint8List.fromList(bytes);
  });
});

```

For full details on byte streams, see [Sending files & bytes](https://docs.livekit.io/transport/data/byte-streams.md).

---

This document was rendered at 2026-06-07T11:35:37.976Z.
For the latest version of this document, see [https://docs.livekit.io/agents/multimodality/vision/images.md](https://docs.livekit.io/agents/multimodality/vision/images.md).

To explore all LiveKit documentation, see [llms.txt](https://docs.livekit.io/llms.txt).