Module livekit.rtc.video_frame

Classes

class VideoFrame (width: int, height: int, type: int, data: Union[bytes, bytearray, memoryview])

Represents a video frame with associated metadata and pixel data.

This class provides methods to access video frame properties such as width, height, and pixel format, as well as methods for manipulating and converting video frames.

Initializes a new VideoFrame instance.

Args

width : int
The width of the video frame in pixels.
height : int
The height of the video frame in pixels.
type : proto_video.VideoBufferType.ValueType
The format type of the video frame data (e.g., RGBA, BGRA, RGB24, etc.).
data : Union[bytes, bytearray, memoryview]
The raw pixel data for the video frame.
Expand source code
class VideoFrame:
    """
    Represents a video frame with associated metadata and pixel data.

    This class provides methods to access video frame properties such as width, height,
    and pixel format, as well as methods for manipulating and converting video frames.
    """

    def __init__(
        self,
        width: int,
        height: int,
        type: proto_video.VideoBufferType.ValueType,
        data: Union[bytes, bytearray, memoryview],
    ) -> None:
        """
        Initializes a new VideoFrame instance.

        Args:
            width (int): The width of the video frame in pixels.
            height (int): The height of the video frame in pixels.
            type (proto_video.VideoBufferType.ValueType): The format type of the video frame data
                (e.g., RGBA, BGRA, RGB24, etc.).
            data (Union[bytes, bytearray, memoryview]): The raw pixel data for the video frame.
        """
        self._width = width
        self._height = height
        self._type = type
        self._data = bytearray(data)

    @property
    def width(self) -> int:
        """
        Returns the width of the video frame in pixels.

        Returns:
            int: The width of the video frame.
        """
        return self._width

    @property
    def height(self) -> int:
        """
        Returns the height of the video frame in pixels.

        Returns:
            int: The height of the video frame.
        """
        return self._height

    @property
    def type(self) -> proto_video.VideoBufferType.ValueType:
        """
        Returns the height of the video frame in pixels.

        Returns:
            int: The height of the video frame.
        """
        return self._type

    @property
    def data(self) -> memoryview:
        """
        Returns a memoryview of the raw pixel data for the video frame.

        Returns:
            memoryview: The raw pixel data of the video frame as a memoryview object.
        """
        return memoryview(self._data)

    @staticmethod
    def _from_owned_info(owned_info: proto_video.OwnedVideoBuffer) -> "VideoFrame":
        info = owned_info.info
        data_len = _get_plane_length(info.type, info.width, info.height)
        cdata = (ctypes.c_uint8 * data_len).from_address(info.data_ptr)
        data = bytearray(cdata)
        frame = VideoFrame(
            width=info.width,
            height=info.height,
            type=info.type,
            data=data,
        )
        FfiHandle(owned_info.handle.id)
        return frame

    def _proto_info(self) -> proto_video.VideoBufferInfo:
        info = proto_video.VideoBufferInfo()
        addr = get_address(self.data)
        info.components.extend(
            _get_plane_infos(addr, self.type, self.width, self.height)
        )
        info.width = self.width
        info.height = self.height
        info.type = self.type
        info.data_ptr = addr

        if self.type in [
            proto_video.VideoBufferType.ARGB,
            proto_video.VideoBufferType.ABGR,
            proto_video.VideoBufferType.RGBA,
            proto_video.VideoBufferType.BGRA,
        ]:
            info.stride = self.width * 4
        elif self.type == proto_video.VideoBufferType.RGB24:
            info.stride = self.width * 3

        return info

    def get_plane(self, plane_nth: int) -> Optional[memoryview]:
        """
        Returns the memoryview of a specific plane in the video frame, based on its index.

        Some video formats (e.g., I420, NV12) contain multiple planes (Y, U, V channels).
        This method allows access to individual planes by index.

        Args:
            plane_nth (int): The index of the plane to retrieve (starting from 0).

        Returns:
            Optional[memoryview]: A memoryview of the specified plane's data, or None if
            the index is out of bounds for the format.
        """
        plane_infos = _get_plane_infos(
            get_address(self.data), self.type, self.width, self.height
        )
        if plane_nth >= len(plane_infos):
            return None

        plane_info = plane_infos[plane_nth]
        cdata = (ctypes.c_uint8 * plane_info.size).from_address(plane_info.data_ptr)
        return memoryview(cdata)

    def convert(
        self, type: proto_video.VideoBufferType.ValueType, *, flip_y: bool = False
    ) -> "VideoFrame":
        """
        Converts the current video frame to a different format type, optionally flipping
        the frame vertically.

        Args:
            type (proto_video.VideoBufferType.ValueType): The target format type to convert to
                (e.g., RGBA, I420).
            flip_y (bool, optional): If True, the frame will be flipped vertically. Defaults to False.

        Returns:
            VideoFrame: A new VideoFrame object in the specified format.

        Raises:
            Exception: If there is an error during the conversion process.

        Example:
            Convert a frame from RGBA to I420 format:

            >>> frame = VideoFrame(width=1920, height=1080, type=proto_video.VideoBufferType.RGBA, data=raw_data)
            >>> converted_frame = frame.convert(proto_video.VideoBufferType.I420)
            >>> print(converted_frame.type)
            VideoBufferType.I420

        Example:
            Convert a frame from BGRA to RGB24 format and flip it vertically:

            >>> frame = VideoFrame(width=1280, height=720, type=proto_video.VideoBufferType.BGRA, data=raw_data)
            >>> converted_frame = frame.convert(proto_video.VideoBufferType.RGB24, flip_y=True)
            >>> print(converted_frame.type)
            VideoBufferType.RGB24
            >>> print(converted_frame.width, converted_frame.height)
            1280 720
        """
        req = proto.FfiRequest()
        req.video_convert.flip_y = flip_y
        req.video_convert.dst_type = type
        req.video_convert.buffer.CopyFrom(self._proto_info())
        resp = FfiClient.instance.request(req)
        if resp.video_convert.error:
            raise Exception(resp.video_convert.error)

        return VideoFrame._from_owned_info(resp.video_convert.buffer)

    def __repr__(self) -> str:
        return f"rtc.VideoFrame(width={self.width}, height={self.height}, type={self.type})"

Instance variables

prop data : memoryview

Returns a memoryview of the raw pixel data for the video frame.

Returns

memoryview
The raw pixel data of the video frame as a memoryview object.
Expand source code
@property
def data(self) -> memoryview:
    """
    Returns a memoryview of the raw pixel data for the video frame.

    Returns:
        memoryview: The raw pixel data of the video frame as a memoryview object.
    """
    return memoryview(self._data)
prop height : int

Returns the height of the video frame in pixels.

Returns

int
The height of the video frame.
Expand source code
@property
def height(self) -> int:
    """
    Returns the height of the video frame in pixels.

    Returns:
        int: The height of the video frame.
    """
    return self._height
prop type : int

Returns the height of the video frame in pixels.

Returns

int
The height of the video frame.
Expand source code
@property
def type(self) -> proto_video.VideoBufferType.ValueType:
    """
    Returns the height of the video frame in pixels.

    Returns:
        int: The height of the video frame.
    """
    return self._type
prop width : int

Returns the width of the video frame in pixels.

Returns

int
The width of the video frame.
Expand source code
@property
def width(self) -> int:
    """
    Returns the width of the video frame in pixels.

    Returns:
        int: The width of the video frame.
    """
    return self._width

Methods

def convert(self, type: int, *, flip_y: bool = False) ‑> VideoFrame

Converts the current video frame to a different format type, optionally flipping the frame vertically.

Args

type : proto_video.VideoBufferType.ValueType
The target format type to convert to (e.g., RGBA, I420).
flip_y : bool, optional
If True, the frame will be flipped vertically. Defaults to False.

Returns

VideoFrame
A new VideoFrame object in the specified format.

Raises

Exception
If there is an error during the conversion process.

Example

Convert a frame from RGBA to I420 format:

>>> frame = VideoFrame(width=1920, height=1080, type=proto_video.VideoBufferType.RGBA, data=raw_data)
>>> converted_frame = frame.convert(proto_video.VideoBufferType.I420)
>>> print(converted_frame.type)
VideoBufferType.I420

Example

Convert a frame from BGRA to RGB24 format and flip it vertically:

>>> frame = VideoFrame(width=1280, height=720, type=proto_video.VideoBufferType.BGRA, data=raw_data)
>>> converted_frame = frame.convert(proto_video.VideoBufferType.RGB24, flip_y=True)
>>> print(converted_frame.type)
VideoBufferType.RGB24
>>> print(converted_frame.width, converted_frame.height)
1280 720
def get_plane(self, plane_nth: int) ‑> Optional[memoryview]

Returns the memoryview of a specific plane in the video frame, based on its index.

Some video formats (e.g., I420, NV12) contain multiple planes (Y, U, V channels). This method allows access to individual planes by index.

Args

plane_nth : int
The index of the plane to retrieve (starting from 0).

Returns

Optional[memoryview]
A memoryview of the specified plane's data, or None if

the index is out of bounds for the format.