From fda4962713be468ee3e80c6b298f7baf64e46e96 Mon Sep 17 00:00:00 2001 From: Kim Minjong Date: Wed, 4 Feb 2026 16:27:50 +0900 Subject: [PATCH 1/9] Impl __dlpack__, keep cuda memory --- av/codec/hwaccel.pxd | 1 + av/codec/hwaccel.py | 10 ++ av/dlpack.pxd | 40 ++++++ av/hwcontext.pxd | 12 ++ av/video/codeccontext.py | 3 + av/video/frame.py | 30 +++- av/video/plane.py | 241 ++++++++++++++++++++++++++++++++- av/video/reformatter.py | 17 +++ include/libavcodec/avcodec.pxd | 1 + include/libavutil/avutil.pxd | 1 + 10 files changed, 346 insertions(+), 10 deletions(-) create mode 100644 av/dlpack.pxd create mode 100644 av/hwcontext.pxd diff --git a/av/codec/hwaccel.pxd b/av/codec/hwaccel.pxd index 46efdaf3b..41a6bdc86 100644 --- a/av/codec/hwaccel.pxd +++ b/av/codec/hwaccel.pxd @@ -19,3 +19,4 @@ cdef class HWAccel: cdef public bint allow_software_fallback cdef public dict options cdef public int flags + cdef public str output_format diff --git a/av/codec/hwaccel.py b/av/codec/hwaccel.py index ffa196e48..124716bd4 100644 --- a/av/codec/hwaccel.py +++ b/av/codec/hwaccel.py @@ -110,6 +110,7 @@ def __init__( allow_software_fallback=True, options=None, flags=None, + output_format="sw", ): if isinstance(device_type, HWDeviceType): self._device_type = device_type @@ -120,6 +121,14 @@ def __init__( else: raise ValueError("Unknown type for device_type") + if output_format is None: + output_format = "sw" + if isinstance(output_format, str): + output_format = output_format.lower() + if output_format not in {"sw", "hw"}: + raise ValueError("output_format must be 'sw' or 'hw'") + self.output_format = output_format + self._device = device self.allow_software_fallback = allow_software_fallback self.options = {} if not options else dict(options) @@ -165,6 +174,7 @@ def create(self, codec: Codec): device=self._device, allow_software_fallback=self.allow_software_fallback, options=self.options, + output_format=self.output_format, ) ret._initialize_hw_context(codec) return ret diff --git a/av/dlpack.pxd b/av/dlpack.pxd new file mode 100644 index 000000000..f3cdf857b --- /dev/null +++ b/av/dlpack.pxd @@ -0,0 +1,40 @@ +from libc.stdint cimport int64_t, uint8_t, uint16_t, uint64_t + +cdef enum DLDeviceType: + kDLCPU = 1 + kDLCUDA = 2 + +cdef enum DLDataTypeCode: + kDLInt = 0 + kDLUInt = 1 + kDLFloat = 2 + kDLBfloat = 4 + kDLComplex = 5 + kDLBool = 6 + +cdef struct DLDevice: + int device_type + int device_id + +cdef struct DLDataType: + uint8_t code + uint8_t bits + uint16_t lanes + +cdef struct DLTensor: + void* data + DLDevice device + int ndim + DLDataType dtype + int64_t* shape + int64_t* strides + uint64_t byte_offset + +cdef struct DLManagedTensor + +ctypedef void (*DLManagedTensorDeleter)(DLManagedTensor*) nogil + +cdef struct DLManagedTensor: + DLTensor dl_tensor + void* manager_ctx + DLManagedTensorDeleter deleter diff --git a/av/hwcontext.pxd b/av/hwcontext.pxd new file mode 100644 index 000000000..859c76b0f --- /dev/null +++ b/av/hwcontext.pxd @@ -0,0 +1,12 @@ +cimport libav as lib + +cdef extern from "libavutil/hwcontext.h": + ctypedef struct AVHWFramesContext: + const void *av_class + lib.AVBufferRef *device_ref + void *device_ctx + void *hwctx + lib.AVPixelFormat format + lib.AVPixelFormat sw_format + int width + int height diff --git a/av/video/codeccontext.py b/av/video/codeccontext.py index b7842a7d5..aa7c49560 100644 --- a/av/video/codeccontext.py +++ b/av/video/codeccontext.py @@ -127,6 +127,9 @@ def _transfer_hwframe(self, frame: Frame): # need to transfer. return frame + if self.hwaccel_ctx.output_format == "hw": + return frame + frame_sw: Frame = self._alloc_next_frame() err_check(lib.av_hwframe_transfer_data(frame_sw.ptr, frame.ptr, 0)) # TODO: Is there anything else to transfer? diff --git a/av/video/frame.py b/av/video/frame.py index 1320afad3..8253c3fb2 100644 --- a/av/video/frame.py +++ b/av/video/frame.py @@ -3,6 +3,7 @@ import cython from cython.cimports.av.error import err_check +from cython.cimports.av.hwcontext import AVHWFramesContext from cython.cimports.av.sidedata.sidedata import get_display_rotation from cython.cimports.av.utils import check_ndarray from cython.cimports.av.video.format import get_pix_fmt, get_video_format @@ -261,12 +262,19 @@ def planes(self): # We need to detect which planes actually exist, but also constrain ourselves to # the maximum plane count (as determined only by VideoFrames so far), in case # the library implementation does not set the last plane to NULL. + fmt = self.format + if self.ptr.hw_frames_ctx: + frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( + cython.pointer[AVHWFramesContext], self.ptr.hw_frames_ctx.data + ) + fmt = get_video_format(frames_ctx.sw_format, self.ptr.width, self.ptr.height) + max_plane_count: cython.int = 0 - for i in range(self.format.ptr.nb_components): - count = self.format.ptr.comp[i].plane + 1 + for i in range(fmt.ptr.nb_components): + count = fmt.ptr.comp[i].plane + 1 if max_plane_count < count: max_plane_count = count - if self.format.name == "pal8": + if fmt.name == "pal8": max_plane_count = 2 plane_count: cython.int = 0 @@ -446,7 +454,21 @@ def to_ndarray(self, channel_last=False, **kwargs): .. note:: For ``gbrp`` formats, channels are flipped to RGB order. """ - frame: VideoFrame = self.reformat(**kwargs) + kwargs2 = dict(kwargs) + if self.ptr.hw_frames_ctx and "format" not in kwargs2: + frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( + cython.pointer[AVHWFramesContext], self.ptr.hw_frames_ctx.data + ) + kwargs2["format"] = get_video_format( + frames_ctx.sw_format, self.ptr.width, self.ptr.height + ).name + + frame: VideoFrame = self.reformat(**kwargs2) + if frame.ptr.hw_frames_ctx: + raise ValueError( + "Cannot convert a hardware frame to numpy directly. " + "Specify a software format (e.g. format='rgb24') or decode with HWAccel(output_format='sw')." + ) import numpy as np diff --git a/av/video/plane.py b/av/video/plane.py index 495a9de4c..fb735dedc 100644 --- a/av/video/plane.py +++ b/av/video/plane.py @@ -1,25 +1,48 @@ import cython +import cython.cimports.libav as lib +from cython.cimports.av.buffer import Buffer +from cython.cimports.av.dlpack import DLManagedTensor, kDLCUDA, kDLUInt +from cython.cimports.av.error import err_check +from cython.cimports.av.hwcontext import AVHWFramesContext +from cython.cimports.av.video.format import get_pix_fmt, get_video_format from cython.cimports.av.video.frame import VideoFrame +from cython.cimports.cpython import PyBUF_WRITABLE, PyBuffer_FillInfo +from cython.cimports.cpython.buffer import Py_buffer +from cython.cimports.cpython.pycapsule import ( + PyCapsule_GetPointer, + PyCapsule_IsValid, + PyCapsule_New, +) +from cython.cimports.cpython.ref import PyObject +from cython.cimports.libc.stdint import int64_t +from cython.cimports.libc.stdlib import free, malloc @cython.cclass class VideoPlane(Plane): def __cinit__(self, frame: VideoFrame, index: cython.int): # The palette plane has no associated component or linesize; set fields manually - if frame.format.name == "pal8" and index == 1: + fmt = frame.format + if frame.ptr.hw_frames_ctx: + frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( + cython.pointer[AVHWFramesContext], frame.ptr.hw_frames_ctx.data + ) + fmt = get_video_format(frames_ctx.sw_format, frame.ptr.width, frame.ptr.height) + + if fmt.name == "pal8" and index == 1: self.width = 256 self.height = 1 self.buffer_size = 256 * 4 return - for i in range(frame.format.ptr.nb_components): - if frame.format.ptr.comp[i].plane == index: - component = frame.format.components[i] + for i in range(fmt.ptr.nb_components): + if fmt.ptr.comp[i].plane == index: + component = fmt.components[i] self.width = component.width self.height = component.height break - else: # nobreak - raise RuntimeError(f"could not find plane {index} of {frame.format!r}") + else: + raise RuntimeError(f"could not find plane {index} of {fmt!r}") # Sometimes, linesize is negative (and that is meaningful). We are only # insisting that the buffer size be based on the extent of linesize, and @@ -38,3 +61,209 @@ def line_size(self): :type: int """ return self.frame.ptr.linesize[self.index] + + @cython.cfunc + def _buffer_writable(self) -> cython.bint: + if self.frame.ptr.hw_frames_ctx: + return False + return True + + def __getbuffer__(self, view: cython.pointer[Py_buffer], flags: cython.int): + if self.frame.ptr.hw_frames_ctx: + raise TypeError( + "Hardware frame planes do not support the Python buffer protocol. " + "Use DLPack (__dlpack__) or download to a software frame." + ) + if flags & PyBUF_WRITABLE and not self._buffer_writable(): + raise ValueError("buffer is not writable") + PyBuffer_FillInfo(view, self, self._buffer_ptr(), self._buffer_size(), 0, flags) + + def __dlpack_device__(self): + if not self.frame.ptr.hw_frames_ctx: + raise TypeError("DLPack export is only supported for hardware frames") + if cython.cast(lib.AVPixelFormat, self.frame.ptr.format) != get_pix_fmt(b"cuda"): + raise NotImplementedError("DLPack export is only implemented for CUDA hw frames") + return (kDLCUDA, 0) + + def __dlpack__(self, stream=None): + if not self.frame.ptr.hw_frames_ctx: + raise TypeError("DLPack export is only supported for hardware frames") + if cython.cast(lib.AVPixelFormat, self.frame.ptr.format) != get_pix_fmt(b"cuda"): + raise NotImplementedError("DLPack export is only implemented for CUDA hw frames") + + frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( + cython.pointer[AVHWFramesContext], self.frame.ptr.hw_frames_ctx.data + ) + sw_fmt = frames_ctx.sw_format + + line_size = self.line_size + if line_size < 0: + raise NotImplementedError("negative linesize is not supported for DLPack export") + + nv12 = get_pix_fmt(b"nv12") + p010le = get_pix_fmt(b"p010le") + p016le = get_pix_fmt(b"p016le") + + ndim: cython.int + bits: cython.int + itemsize: cython.int + + s0: int64_t + s1: int64_t + s2: int64_t + st0: int64_t + st1: int64_t + st2: int64_t + + if sw_fmt == nv12: + itemsize = 1 + bits = 8 + if self.index == 0: + ndim = 2 + s0 = self.frame.ptr.height + s1 = self.frame.ptr.width + st0 = line_size + st1 = 1 + elif self.index == 1: + ndim = 3 + s0 = self.frame.ptr.height // 2 + s1 = self.frame.ptr.width // 2 + s2 = 2 + st0 = line_size + st1 = 2 + st2 = 1 + else: + raise ValueError("invalid plane index for NV12") + elif sw_fmt == p010le or sw_fmt == p016le: + itemsize = 2 + bits = 16 + if line_size % itemsize: + raise ValueError("linesize is not aligned to dtype") + if self.index == 0: + ndim = 2 + s0 = self.frame.ptr.height + s1 = self.frame.ptr.width + st0 = line_size // itemsize + st1 = 1 + elif self.index == 1: + ndim = 3 + s0 = self.frame.ptr.height // 2 + s1 = self.frame.ptr.width // 2 + s2 = 2 + st0 = line_size // itemsize + st1 = 2 + st2 = 1 + else: + raise ValueError("invalid plane index for P010/P016") + else: + raise NotImplementedError("unsupported sw_format for DLPack export") + + frame_ref: cython.pointer[lib.AVFrame] = lib.av_frame_alloc() + if frame_ref == cython.NULL: + raise MemoryError("av_frame_alloc() failed") + err_check(lib.av_frame_ref(frame_ref, self.frame.ptr)) + + shape = cython.cast(cython.pointer[int64_t], malloc(ndim * cython.sizeof(int64_t))) + strides = cython.cast(cython.pointer[int64_t], malloc(ndim * cython.sizeof(int64_t))) + if shape == cython.NULL or strides == cython.NULL: + if shape != cython.NULL: + free(shape) + if strides != cython.NULL: + free(strides) + lib.av_frame_free(cython.address(frame_ref)) + raise MemoryError("malloc() failed") + + if ndim == 2: + shape[0] = s0 + shape[1] = s1 + strides[0] = st0 + strides[1] = st1 + else: + shape[0] = s0 + shape[1] = s1 + shape[2] = s2 + strides[0] = st0 + strides[1] = st1 + strides[2] = st2 + + ctx = cython.cast(cython.pointer[cython.p_void], malloc(3 * cython.sizeof(cython.p_void))) + if ctx == cython.NULL: + free(shape) + free(strides) + lib.av_frame_free(cython.address(frame_ref)) + raise MemoryError("malloc() failed") + + ctx[0] = cython.cast(cython.p_void, frame_ref) + ctx[1] = cython.cast(cython.p_void, shape) + ctx[2] = cython.cast(cython.p_void, strides) + + managed = cython.cast(cython.pointer[DLManagedTensor], malloc(cython.sizeof(DLManagedTensor))) + if managed == cython.NULL: + free(ctx) + free(shape) + free(strides) + lib.av_frame_free(cython.address(frame_ref)) + raise MemoryError("malloc() failed") + + managed.dl_tensor.data = cython.cast(cython.p_void, frame_ref.data[self.index]) + managed.dl_tensor.device.device_type = kDLCUDA + managed.dl_tensor.device.device_id = 0 + managed.dl_tensor.ndim = ndim + managed.dl_tensor.dtype.code = kDLUInt + managed.dl_tensor.dtype.bits = bits + managed.dl_tensor.dtype.lanes = 1 + managed.dl_tensor.shape = shape + managed.dl_tensor.strides = strides + managed.dl_tensor.byte_offset = 0 + managed.manager_ctx = cython.cast(cython.p_void, ctx) + managed.deleter = _dlpack_managed_tensor_deleter + + try: + capsule = PyCapsule_New(cython.cast(cython.p_void, managed), b"dltensor", _dlpack_capsule_destructor) + except Exception: + _dlpack_managed_tensor_deleter(managed) + raise + + return capsule + + +@cython.cfunc +@cython.nogil +@cython.exceptval(check=False) +def _dlpack_managed_tensor_deleter(managed: cython.pointer[DLManagedTensor]) -> cython.void: + ctx: cython.pointer[cython.p_void] + frame_ref: cython.pointer[lib.AVFrame] + shape: cython.pointer[int64_t] + strides: cython.pointer[int64_t] + + if managed == cython.NULL: + return + + ctx = cython.cast(cython.pointer[cython.p_void], managed.manager_ctx) + if ctx != cython.NULL: + frame_ref = cython.cast(cython.pointer[lib.AVFrame], ctx[0]) + shape = cython.cast(cython.pointer[int64_t], ctx[1]) + strides = cython.cast(cython.pointer[int64_t], ctx[2]) + + if frame_ref != cython.NULL: + lib.av_frame_free(cython.address(frame_ref)) + if shape != cython.NULL: + free(shape) + if strides != cython.NULL: + free(strides) + free(ctx) + + free(managed) + + +@cython.cfunc +@cython.exceptval(check=False) +def _dlpack_capsule_destructor(capsule: object) -> cython.void: + managed: cython.pointer[DLManagedTensor] + if PyCapsule_IsValid(capsule, b"dltensor"): + managed = cython.cast( + cython.pointer[DLManagedTensor], + PyCapsule_GetPointer(capsule, b"dltensor"), + ) + if managed != cython.NULL: + managed.deleter(managed) diff --git a/av/video/reformatter.py b/av/video/reformatter.py index 786543744..5a30d4d9b 100644 --- a/av/video/reformatter.py +++ b/av/video/reformatter.py @@ -185,6 +185,23 @@ def _reformat( src_format = cython.cast(lib.AVPixelFormat, frame.ptr.format) # Shortcut! + if frame.ptr.hw_frames_ctx: + if ( + dst_format == src_format + and width == frame.ptr.width + and height == frame.ptr.height + and dst_colorspace == src_colorspace + and src_color_range == dst_color_range + ): + return frame + + frame_sw = alloc_video_frame() + err_check(lib.av_hwframe_transfer_data(frame_sw.ptr, frame.ptr, 0)) + frame_sw.pts = frame.pts + frame_sw._init_user_attributes() + frame = frame_sw + src_format = cython.cast(lib.AVPixelFormat, frame.ptr.format) + if ( dst_format == src_format and width == frame.ptr.width diff --git a/include/libavcodec/avcodec.pxd b/include/libavcodec/avcodec.pxd index b75769945..cfb474f4a 100644 --- a/include/libavcodec/avcodec.pxd +++ b/include/libavcodec/avcodec.pxd @@ -367,6 +367,7 @@ cdef extern from "libavcodec/avcodec.h" nogil: AVBufferRef *opaque_ref AVChannelLayout ch_layout int64_t duration + AVBufferRef *hw_frames_ctx cdef struct AVPacket: void *buf diff --git a/include/libavutil/avutil.pxd b/include/libavutil/avutil.pxd index 9d8486e1a..fe9085826 100644 --- a/include/libavutil/avutil.pxd +++ b/include/libavutil/avutil.pxd @@ -164,6 +164,7 @@ cdef extern from "libavutil/error.h" nogil: cdef extern from "libavutil/frame.h" nogil: cdef AVFrame* av_frame_alloc() cdef void av_frame_free(AVFrame**) + cdef int av_frame_ref(AVFrame *dst, const AVFrame *src) cdef void av_frame_unref(AVFrame *frame) cdef int av_frame_make_writable(AVFrame *frame) cdef int av_frame_copy_props(AVFrame *dst, const AVFrame *src) From aaa90dbb2766db3f629adf15a048cce5b6376722 Mon Sep 17 00:00:00 2001 From: Kim Minjong Date: Wed, 4 Feb 2026 15:08:48 +0000 Subject: [PATCH 2/9] Impl VideoFrame.from_dlpack --- av/dlpack.pxd | 2 +- av/hwcontext.pxd | 5 +- av/video/frame.py | 293 ++++++++++++++++++++++++++++++++- av/video/frame.pyi | 9 + include/libavcodec/avcodec.pxd | 8 +- 5 files changed, 311 insertions(+), 6 deletions(-) diff --git a/av/dlpack.pxd b/av/dlpack.pxd index f3cdf857b..3bbfb0ad5 100644 --- a/av/dlpack.pxd +++ b/av/dlpack.pxd @@ -32,7 +32,7 @@ cdef struct DLTensor: cdef struct DLManagedTensor -ctypedef void (*DLManagedTensorDeleter)(DLManagedTensor*) nogil +ctypedef void (*DLManagedTensorDeleter)(DLManagedTensor*) noexcept nogil cdef struct DLManagedTensor: DLTensor dl_tensor diff --git a/av/hwcontext.pxd b/av/hwcontext.pxd index 859c76b0f..8ff9a2ea4 100644 --- a/av/hwcontext.pxd +++ b/av/hwcontext.pxd @@ -1,6 +1,6 @@ cimport libav as lib -cdef extern from "libavutil/hwcontext.h": +cdef extern from "libavutil/hwcontext.h" nogil: ctypedef struct AVHWFramesContext: const void *av_class lib.AVBufferRef *device_ref @@ -10,3 +10,6 @@ cdef extern from "libavutil/hwcontext.h": lib.AVPixelFormat sw_format int width int height + + lib.AVBufferRef *av_hwframe_ctx_alloc(lib.AVBufferRef *device_ref) + int av_hwframe_ctx_init(lib.AVBufferRef *ref) diff --git a/av/video/frame.py b/av/video/frame.py index 8253c3fb2..08dc16973 100644 --- a/av/video/frame.py +++ b/av/video/frame.py @@ -2,13 +2,133 @@ from enum import IntEnum import cython +import cython.cimports.libav as lib +from cython.cimports.av.dictionary import Dictionary +from cython.cimports.av.dlpack import DLManagedTensor, kDLCUDA, kDLUInt from cython.cimports.av.error import err_check -from cython.cimports.av.hwcontext import AVHWFramesContext +from cython.cimports.av.hwcontext import ( + AVHWFramesContext, + av_hwframe_ctx_alloc, + av_hwframe_ctx_init, +) from cython.cimports.av.sidedata.sidedata import get_display_rotation from cython.cimports.av.utils import check_ndarray from cython.cimports.av.video.format import get_pix_fmt, get_video_format from cython.cimports.av.video.plane import VideoPlane -from cython.cimports.libc.stdint import uint8_t +from cython.cimports.cpython.exc import PyErr_Clear +from cython.cimports.cpython.pycapsule import ( + PyCapsule_GetPointer, + PyCapsule_IsValid, + PyCapsule_SetName, +) +from cython.cimports.libc.stdint import int64_t, uint8_t + + +_cuda_device_ctx_cache = {} +_cuda_frames_ctx_cache = {} + +@cython.cfunc +def _consume_dlpack(obj: object, stream: object) -> cython.pointer[DLManagedTensor]: + capsule: object + managed: cython.pointer[DLManagedTensor] + + if hasattr(obj, "__dlpack__"): + capsule = obj.__dlpack__() if stream is None else obj.__dlpack__(stream=stream) + else: + capsule = obj + + if not PyCapsule_IsValid(capsule, b"dltensor"): + PyErr_Clear() + raise TypeError("expected a DLPack capsule or an object implementing __dlpack__") + + managed = cython.cast( + cython.pointer[DLManagedTensor], + PyCapsule_GetPointer(capsule, b"dltensor"), + ) + if managed == cython.NULL: + raise ValueError("PyCapsule_GetPointer returned NULL") + + if PyCapsule_SetName(capsule, b"used_dltensor") != 0: + raise RuntimeError("PyCapsule_SetName failed") + + return managed + +@cython.cfunc +@cython.nogil +@cython.exceptval(check=False) +def _dlpack_avbuffer_free( + opaque: cython.p_void, + data: cython.pointer[uint8_t], +) -> cython.void: + managed: cython.pointer[DLManagedTensor] = cython.cast( + cython.pointer[DLManagedTensor], opaque + ) + if managed != cython.NULL: + managed.deleter(managed) + +@cython.cfunc +def _get_cuda_device_ctx(device_id: cython.int) -> cython.pointer[lib.AVBufferRef]: + cached = _cuda_device_ctx_cache.get(device_id) + if cached is not None: + return cython.cast( + cython.pointer[lib.AVBufferRef], + cython.cast(cython.size_t, cached), + ) + + device_ref: cython.pointer[lib.AVBufferRef] = cython.NULL + device_bytes = str(device_id).encode() + c_device: cython.p_char = device_bytes + options: Dictionary = Dictionary({"primary_ctx": "1"}) + + err_check( + lib.av_hwdevice_ctx_create( + cython.address(device_ref), + lib.AV_HWDEVICE_TYPE_CUDA, + c_device, + options.ptr, + 0, + ) + ) + + _cuda_device_ctx_cache[device_id] = cython.cast(cython.size_t, device_ref) + return device_ref + +@cython.cfunc +def _get_cuda_frames_ctx( + device_id: cython.int, + sw_fmt: lib.AVPixelFormat, + width: cython.int, + height: cython.int, +) -> cython.pointer[lib.AVBufferRef]: + key = (device_id, int(sw_fmt), int(width), int(height)) + cached = _cuda_frames_ctx_cache.get(key) + if cached is not None: + return cython.cast( + cython.pointer[lib.AVBufferRef], + cython.cast(cython.size_t, cached), + ) + + device_ref = _get_cuda_device_ctx(device_id) + frames_ref = av_hwframe_ctx_alloc(device_ref) + if frames_ref == cython.NULL: + raise MemoryError("av_hwframe_ctx_alloc() failed") + + try: + frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( + cython.pointer[AVHWFramesContext], frames_ref.data + ) + frames_ctx.format = get_pix_fmt(b"cuda") + frames_ctx.sw_format = sw_fmt + frames_ctx.width = width + frames_ctx.height = height + err_check(av_hwframe_ctx_init(frames_ref)) + except Exception: + lib.av_buffer_unref(cython.address(frames_ref)) + raise + + _cuda_frames_ctx_cache[key] = cython.cast(cython.size_t, frames_ref) + return frames_ref + _cinit_bypass_sentinel = object() @@ -1201,3 +1321,172 @@ def from_bytes( else: raise NotImplementedError(f"Format '{format}' is not supported.") return frame + + @staticmethod + def from_dlpack( + planes, + format: str = "nv12", + width: int = 0, + height: int = 0, + stream=None, + device_id: int | None = None, + ): + if not isinstance(planes, (tuple, list)): + planes = (planes,) + + if len(planes) != 2: + raise ValueError("from_dlpack currently supports 2-plane formats only (nv12/p010le/p016le)") + + sw_fmt: lib.AVPixelFormat = get_pix_fmt(format) + nv12 = get_pix_fmt(b"nv12") + p010le = get_pix_fmt(b"p010le") + p016le = get_pix_fmt(b"p016le") + + if sw_fmt not in {nv12, p010le, p016le}: + raise NotImplementedError("from_dlpack supports nv12, p010le, p016le only") + + expected_bits = 8 if sw_fmt == nv12 else 16 + itemsize = 1 if expected_bits == 8 else 2 + + m0: cython.pointer[DLManagedTensor] = cython.NULL + m1: cython.pointer[DLManagedTensor] = cython.NULL + frame: VideoFrame = None + + try: + m0 = _consume_dlpack(planes[0], stream) + m1 = _consume_dlpack(planes[1], stream) + + if m0.dl_tensor.device.device_type != kDLCUDA or m1.dl_tensor.device.device_type != kDLCUDA: + raise TypeError("only CUDA DLPack tensors are supported") + + dev0 = m0.dl_tensor.device.device_id + dev1 = m1.dl_tensor.device.device_id + if dev0 != dev1: + raise ValueError("plane tensors must be on the same CUDA device") + + if device_id is None: + device_id = dev0 + elif device_id != dev0: + raise ValueError("device_id does not match the DLPack tensor device_id") + + if ( + m0.dl_tensor.dtype.code != kDLUInt + or m0.dl_tensor.dtype.bits != expected_bits + or m0.dl_tensor.dtype.lanes != 1 + ): + raise TypeError("unexpected dtype for plane 0") + + if ( + m1.dl_tensor.dtype.code != kDLUInt + or m1.dl_tensor.dtype.bits != expected_bits + or m1.dl_tensor.dtype.lanes != 1 + ): + raise TypeError("unexpected dtype for plane 1") + + if m0.dl_tensor.ndim != 2: + raise ValueError("plane 0 must be 2D (H, W)") + + y_h = cython.cast(int64_t, m0.dl_tensor.shape[0]) + y_w = cython.cast(int64_t, m0.dl_tensor.shape[1]) + + if width == 0 and height == 0: + width = cython.cast(int, y_w) + height = cython.cast(int, y_h) + elif width == 0 or height == 0: + raise ValueError("either specify both width/height or neither") + else: + if y_w != width or y_h != height: + raise ValueError("plane 0 shape does not match width/height") + + if width % 2 or height % 2: + raise ValueError("width/height must be even for nv12/p010le/p016le") + + if m0.dl_tensor.strides != cython.NULL: + if m0.dl_tensor.strides[1] != 1: + raise ValueError("plane 0 must be contiguous in the last dimension") + y_pitch_elems = cython.cast(int64_t, m0.dl_tensor.strides[0]) + else: + y_pitch_elems = cython.cast(int64_t, width) + + y_linesize = cython.cast(int, y_pitch_elems * itemsize) + y_size = cython.cast(int, y_linesize * height) + + uv_ndim = m1.dl_tensor.ndim + uv_h_expected = height // 2 + + if uv_ndim == 2: + uv_h = cython.cast(int, m1.dl_tensor.shape[0]) + uv_w = cython.cast(int, m1.dl_tensor.shape[1]) + if uv_h != uv_h_expected or uv_w != width: + raise ValueError("plane 1 must have shape (H/2, W) for 2D UV") + if m1.dl_tensor.strides != cython.NULL: + if m1.dl_tensor.strides[1] != 1: + raise ValueError("plane 1 must be contiguous in the last dimension") + uv_pitch_elems = cython.cast(int64_t, m1.dl_tensor.strides[0]) + else: + uv_pitch_elems = cython.cast(int64_t, uv_w) + elif uv_ndim == 3: + uv_h = cython.cast(int, m1.dl_tensor.shape[0]) + uv_w2 = cython.cast(int, m1.dl_tensor.shape[1]) + uv_c = cython.cast(int, m1.dl_tensor.shape[2]) + if uv_h != uv_h_expected or uv_w2 != (width // 2) or uv_c != 2: + raise ValueError("plane 1 must have shape (H/2, W/2, 2) for 3D UV") + if m1.dl_tensor.strides != cython.NULL: + if m1.dl_tensor.strides[2] != 1 or m1.dl_tensor.strides[1] != 2: + raise ValueError("unexpected UV plane strides for (H/2, W/2, 2)") + uv_pitch_elems = cython.cast(int64_t, m1.dl_tensor.strides[0]) + else: + uv_pitch_elems = cython.cast(int64_t, width) + else: + raise ValueError("plane 1 must be 2D or 3D") + + uv_linesize = cython.cast(int, uv_pitch_elems * itemsize) + uv_size = cython.cast(int, uv_linesize * (height // 2)) + + frames_ref = _get_cuda_frames_ctx(device_id, sw_fmt, width, height) + + frame = alloc_video_frame() + frame.ptr.width = width + frame.ptr.height = height + frame.ptr.format = get_pix_fmt(b"cuda") + + frame.ptr.hw_frames_ctx = lib.av_buffer_ref(frames_ref) + if frame.ptr.hw_frames_ctx == cython.NULL: + raise MemoryError("av_buffer_ref(hw_frames_ctx) failed") + + y_ptr = cython.cast(cython.pointer[uint8_t], m0.dl_tensor.data) + cython.cast( + cython.size_t, m0.dl_tensor.byte_offset + ) + uv_ptr = cython.cast(cython.pointer[uint8_t], m1.dl_tensor.data) + cython.cast( + cython.size_t, m1.dl_tensor.byte_offset + ) + + frame.ptr.buf[0] = lib.av_buffer_create( + y_ptr, y_size, _dlpack_avbuffer_free, cython.cast(cython.p_void, m0), 0 + ) + if frame.ptr.buf[0] == cython.NULL: + raise MemoryError("av_buffer_create failed for plane 0") + frame.ptr.data[0] = y_ptr + frame.ptr.linesize[0] = y_linesize + m0 = cython.NULL + + frame.ptr.buf[1] = lib.av_buffer_create( + uv_ptr, uv_size, _dlpack_avbuffer_free, cython.cast(cython.p_void, m1), 0 + ) + if frame.ptr.buf[1] == cython.NULL: + raise MemoryError("av_buffer_create failed for plane 1") + frame.ptr.data[1] = uv_ptr + frame.ptr.linesize[1] = uv_linesize + m1 = cython.NULL + + frame._init_user_attributes() + return frame + + except Exception: + if frame is not None: + lib.av_frame_unref(frame.ptr) + if m0 != cython.NULL: + m0.deleter(m0) + if m1 != cython.NULL: + m1.deleter(m1) + raise diff --git a/av/video/frame.pyi b/av/video/frame.pyi index a7575e3bd..ed0104202 100644 --- a/av/video/frame.pyi +++ b/av/video/frame.pyi @@ -84,3 +84,12 @@ class VideoFrame(Frame): flip_horizontal: bool = False, flip_vertical: bool = False, ) -> VideoFrame: ... + @staticmethod + def from_dlpack( + planes: object | tuple[object, ...], + format: str = "nv12", + width: int = 0, + height: int = 0, + stream: int | None = None, + device_id: int | None = None, + ) -> "VideoFrame": ... diff --git a/include/libavcodec/avcodec.pxd b/include/libavcodec/avcodec.pxd index cfb474f4a..1029e92c2 100644 --- a/include/libavcodec/avcodec.pxd +++ b/include/libavcodec/avcodec.pxd @@ -353,8 +353,12 @@ cdef extern from "libavcodec/avcodec.h" nogil: int64_t pkt_dts void *opaque int sample_rate - int nb_side_data + AVBufferRef *buf[4] + AVBufferRef **extended_buf + int nb_extended_buf + AVFrameSideData **side_data + int nb_side_data int flags AVColorRange color_range AVColorPrimaries color_primaries @@ -364,10 +368,10 @@ cdef extern from "libavcodec/avcodec.h" nogil: AVDictionary *metadata int decode_error_flags + AVBufferRef *hw_frames_ctx AVBufferRef *opaque_ref AVChannelLayout ch_layout int64_t duration - AVBufferRef *hw_frames_ctx cdef struct AVPacket: void *buf From 56dd2dc18cd7f48fa3f5505702ff282686c0c418 Mon Sep 17 00:00:00 2001 From: Kim Minjong Date: Wed, 4 Feb 2026 16:35:07 +0000 Subject: [PATCH 3/9] Impl minimal support device_id --- av/_hwdevice_registry.py | 12 ++++++ av/codec/hwaccel.py | 21 ++++++++++ av/video/frame.py | 74 ++++++++++++++++++++++++---------- av/video/frame.pyi | 1 + av/video/plane.py | 56 +++++++++++++++++-------- include/libavcodec/avcodec.pxd | 2 +- 6 files changed, 128 insertions(+), 38 deletions(-) create mode 100644 av/_hwdevice_registry.py diff --git a/av/_hwdevice_registry.py b/av/_hwdevice_registry.py new file mode 100644 index 000000000..7557a6a02 --- /dev/null +++ b/av/_hwdevice_registry.py @@ -0,0 +1,12 @@ +_cuda_hwdevice_data_ptr_to_device_id: dict[int, int] = {} + + +def register_cuda_hwdevice_data_ptr(hwdevice_data_ptr: int, device_id: int) -> None: + if hwdevice_data_ptr: + _cuda_hwdevice_data_ptr_to_device_id[int(hwdevice_data_ptr)] = int(device_id) + + +def lookup_cuda_device_id(hwdevice_data_ptr: int) -> int: + if not hwdevice_data_ptr: + return 0 + return _cuda_hwdevice_data_ptr_to_device_id.get(int(hwdevice_data_ptr), 0) diff --git a/av/codec/hwaccel.py b/av/codec/hwaccel.py index 124716bd4..cea0eb1e0 100644 --- a/av/codec/hwaccel.py +++ b/av/codec/hwaccel.py @@ -8,6 +8,8 @@ from cython.cimports.av.error import err_check from cython.cimports.av.video.format import get_video_format +import av._hwdevice_registry as _hwreg + class HWDeviceType(IntEnum): none = lib.AV_HWDEVICE_TYPE_NONE @@ -112,6 +114,9 @@ def __init__( flags=None, output_format="sw", ): + if isinstance(device, int): + device = str(device) + if isinstance(device_type, HWDeviceType): self._device_type = device_type elif isinstance(device_type, str): @@ -131,7 +136,10 @@ def __init__( self._device = device self.allow_software_fallback = allow_software_fallback + self.options = {} if not options else dict(options) + if self._device_type == HWDeviceType.cuda and self.output_format == "hw": + self.options.setdefault("primary_ctx", "1") self.flags = 0 if not flags else flags self.ptr = cython.NULL self.config = None @@ -164,6 +172,19 @@ def _initialize_hw_context(self, codec: Codec): ) ) + if config.ptr.device_type == lib.AV_HWDEVICE_TYPE_CUDA: + device_id = 0 + if self._device: + try: + device_id = int(self._device) + except ValueError: + device_id = 0 + + _hwreg.register_cuda_hwdevice_data_ptr( + cython.cast(cython.size_t, self.ptr.data), + device_id, + ) + def create(self, codec: Codec): """Create a new hardware accelerator context with the given codec""" if self.ptr: diff --git a/av/video/frame.py b/av/video/frame.py index 08dc16973..58a9835ec 100644 --- a/av/video/frame.py +++ b/av/video/frame.py @@ -4,7 +4,7 @@ import cython import cython.cimports.libav as lib from cython.cimports.av.dictionary import Dictionary -from cython.cimports.av.dlpack import DLManagedTensor, kDLCUDA, kDLUInt +from cython.cimports.av.dlpack import DLManagedTensor, kDLCUDA, kDLUInt, kDLCPU from cython.cimports.av.error import err_check from cython.cimports.av.hwcontext import ( AVHWFramesContext, @@ -23,6 +23,7 @@ ) from cython.cimports.libc.stdint import int64_t, uint8_t +import av._hwdevice_registry as _hwreg _cuda_device_ctx_cache = {} _cuda_frames_ctx_cache = {} @@ -67,8 +68,12 @@ def _dlpack_avbuffer_free( managed.deleter(managed) @cython.cfunc -def _get_cuda_device_ctx(device_id: cython.int) -> cython.pointer[lib.AVBufferRef]: - cached = _cuda_device_ctx_cache.get(device_id) +def _get_cuda_device_ctx( + device_id: cython.int, + primary_ctx: cython.bint, +) -> cython.pointer[lib.AVBufferRef]: + key = (int(device_id), int(primary_ctx)) + cached = _cuda_device_ctx_cache.get(key) if cached is not None: return cython.cast( cython.pointer[lib.AVBufferRef], @@ -78,7 +83,7 @@ def _get_cuda_device_ctx(device_id: cython.int) -> cython.pointer[lib.AVBufferRe device_ref: cython.pointer[lib.AVBufferRef] = cython.NULL device_bytes = str(device_id).encode() c_device: cython.p_char = device_bytes - options: Dictionary = Dictionary({"primary_ctx": "1"}) + options: Dictionary = Dictionary({"primary_ctx": "1" if primary_ctx else "0"}) err_check( lib.av_hwdevice_ctx_create( @@ -90,17 +95,23 @@ def _get_cuda_device_ctx(device_id: cython.int) -> cython.pointer[lib.AVBufferRe ) ) - _cuda_device_ctx_cache[device_id] = cython.cast(cython.size_t, device_ref) + _hwreg.register_cuda_hwdevice_data_ptr( + cython.cast(cython.size_t, device_ref.data), + device_id, + ) + + _cuda_device_ctx_cache[key] = cython.cast(cython.size_t, device_ref) return device_ref @cython.cfunc def _get_cuda_frames_ctx( device_id: cython.int, + primary_ctx: cython.bint, sw_fmt: lib.AVPixelFormat, width: cython.int, height: cython.int, ) -> cython.pointer[lib.AVBufferRef]: - key = (device_id, int(sw_fmt), int(width), int(height)) + key = (int(device_id), int(primary_ctx), int(sw_fmt), int(width), int(height)) cached = _cuda_frames_ctx_cache.get(key) if cached is not None: return cython.cast( @@ -108,7 +119,7 @@ def _get_cuda_frames_ctx( cython.cast(cython.size_t, cached), ) - device_ref = _get_cuda_device_ctx(device_id) + device_ref = _get_cuda_device_ctx(device_id, primary_ctx) frames_ref = av_hwframe_ctx_alloc(device_ref) if frames_ref == cython.NULL: raise MemoryError("av_hwframe_ctx_alloc() failed") @@ -1330,6 +1341,7 @@ def from_dlpack( height: int = 0, stream=None, device_id: int | None = None, + primary_ctx: bool = True, ): if not isinstance(planes, (tuple, list)): planes = (planes,) @@ -1356,18 +1368,30 @@ def from_dlpack( m0 = _consume_dlpack(planes[0], stream) m1 = _consume_dlpack(planes[1], stream) - if m0.dl_tensor.device.device_type != kDLCUDA or m1.dl_tensor.device.device_type != kDLCUDA: - raise TypeError("only CUDA DLPack tensors are supported") + dev_type0 = m0.dl_tensor.device.device_type + dev_type1 = m1.dl_tensor.device.device_type + if dev_type0 != dev_type1: + raise ValueError("plane tensors must have the same device_type") + if dev_type0 not in {kDLCUDA, kDLCPU}: + raise NotImplementedError("only CPU and CUDA DLPack tensors are supported") dev0 = m0.dl_tensor.device.device_id dev1 = m1.dl_tensor.device.device_id if dev0 != dev1: raise ValueError("plane tensors must be on the same CUDA device") - - if device_id is None: - device_id = dev0 - elif device_id != dev0: - raise ValueError("device_id does not match the DLPack tensor device_id") + if dev_type0 == kDLCUDA: + if dev0 != dev1: + raise ValueError("plane tensors must be on the same CUDA device") + if device_id is None: + device_id = dev0 + elif device_id != dev0: + raise ValueError("device_id does not match the DLPack tensor device_id") + else: + if device_id not in (None, 0): + raise ValueError("device_id must be 0 for CPU tensors") + device_id = 0 + if dev_type0 == kDLCPU and (dev0 != 0 or dev1 != 0): + raise ValueError("CPU DLPack tensors must have device_id == 0") if ( m0.dl_tensor.dtype.code != kDLUInt @@ -1443,16 +1467,24 @@ def from_dlpack( uv_linesize = cython.cast(int, uv_pitch_elems * itemsize) uv_size = cython.cast(int, uv_linesize * (height // 2)) - frames_ref = _get_cuda_frames_ctx(device_id, sw_fmt, width, height) - frame = alloc_video_frame() frame.ptr.width = width frame.ptr.height = height - frame.ptr.format = get_pix_fmt(b"cuda") - - frame.ptr.hw_frames_ctx = lib.av_buffer_ref(frames_ref) - if frame.ptr.hw_frames_ctx == cython.NULL: - raise MemoryError("av_buffer_ref(hw_frames_ctx) failed") + if dev_type0 == kDLCUDA: + if primary_ctx is None: + primary_ctx = True + if not isinstance(primary_ctx, (bool, int)): + raise TypeError("primary_ctx must be a bool") + primary_ctx = bool(primary_ctx) + + frames_ref = _get_cuda_frames_ctx(device_id, primary_ctx, sw_fmt, width, height) + + frame.ptr.format = get_pix_fmt(b"cuda") + frame.ptr.hw_frames_ctx = lib.av_buffer_ref(frames_ref) + if frame.ptr.hw_frames_ctx == cython.NULL: + raise MemoryError("av_buffer_ref(hw_frames_ctx) failed") + else: + frame.ptr.format = sw_fmt y_ptr = cython.cast(cython.pointer[uint8_t], m0.dl_tensor.data) + cython.cast( cython.size_t, m0.dl_tensor.byte_offset diff --git a/av/video/frame.pyi b/av/video/frame.pyi index ed0104202..0102b1472 100644 --- a/av/video/frame.pyi +++ b/av/video/frame.pyi @@ -92,4 +92,5 @@ class VideoFrame(Frame): height: int = 0, stream: int | None = None, device_id: int | None = None, + primary_ctx: bool = True, ) -> "VideoFrame": ... diff --git a/av/video/plane.py b/av/video/plane.py index fb735dedc..997dfd09e 100644 --- a/av/video/plane.py +++ b/av/video/plane.py @@ -1,7 +1,7 @@ import cython import cython.cimports.libav as lib from cython.cimports.av.buffer import Buffer -from cython.cimports.av.dlpack import DLManagedTensor, kDLCUDA, kDLUInt +from cython.cimports.av.dlpack import DLManagedTensor, kDLCPU, kDLCUDA, kDLUInt from cython.cimports.av.error import err_check from cython.cimports.av.hwcontext import AVHWFramesContext from cython.cimports.av.video.format import get_pix_fmt, get_video_format @@ -17,6 +17,8 @@ from cython.cimports.libc.stdint import int64_t from cython.cimports.libc.stdlib import free, malloc +import av._hwdevice_registry as _hwreg + @cython.cclass class VideoPlane(Plane): @@ -79,22 +81,44 @@ def __getbuffer__(self, view: cython.pointer[Py_buffer], flags: cython.int): PyBuffer_FillInfo(view, self, self._buffer_ptr(), self._buffer_size(), 0, flags) def __dlpack_device__(self): - if not self.frame.ptr.hw_frames_ctx: - raise TypeError("DLPack export is only supported for hardware frames") - if cython.cast(lib.AVPixelFormat, self.frame.ptr.format) != get_pix_fmt(b"cuda"): - raise NotImplementedError("DLPack export is only implemented for CUDA hw frames") - return (kDLCUDA, 0) + if self.frame.ptr.hw_frames_ctx: + if cython.cast(lib.AVPixelFormat, self.frame.ptr.format) != get_pix_fmt(b"cuda"): + raise NotImplementedError("DLPack export is only implemented for CUDA hw frames") + + frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( + cython.pointer[AVHWFramesContext], self.frame.ptr.hw_frames_ctx.data + ) + device_id = _hwreg.lookup_cuda_device_id( + cython.cast(cython.size_t, frames_ctx.device_ref.data) + ) + return (kDLCUDA, device_id) + + return (kDLCPU, 0) def __dlpack__(self, stream=None): - if not self.frame.ptr.hw_frames_ctx: - raise TypeError("DLPack export is only supported for hardware frames") - if cython.cast(lib.AVPixelFormat, self.frame.ptr.format) != get_pix_fmt(b"cuda"): - raise NotImplementedError("DLPack export is only implemented for CUDA hw frames") + if self.frame.ptr.buf[0] == cython.NULL: + raise TypeError("DLPack export requires a refcounted AVFrame (frame.buf[0] is NULL)") - frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( - cython.pointer[AVHWFramesContext], self.frame.ptr.hw_frames_ctx.data - ) - sw_fmt = frames_ctx.sw_format + device_type: cython.int + device_id: cython.int + sw_fmt: lib.AVPixelFormat + + if self.frame.ptr.hw_frames_ctx: + if cython.cast(lib.AVPixelFormat, self.frame.ptr.format) != get_pix_fmt(b"cuda"): + raise NotImplementedError("DLPack export is only implemented for CUDA hw frames") + + frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( + cython.pointer[AVHWFramesContext], self.frame.ptr.hw_frames_ctx.data + ) + sw_fmt = frames_ctx.sw_format + device_type = kDLCUDA + device_id = _hwreg.lookup_cuda_device_id( + cython.cast(cython.size_t, frames_ctx.device_ref.data) + ) + else: + sw_fmt = cython.cast(lib.AVPixelFormat, self.frame.ptr.format) + device_type = kDLCPU + device_id = 0 line_size = self.line_size if line_size < 0: @@ -206,8 +230,8 @@ def __dlpack__(self, stream=None): raise MemoryError("malloc() failed") managed.dl_tensor.data = cython.cast(cython.p_void, frame_ref.data[self.index]) - managed.dl_tensor.device.device_type = kDLCUDA - managed.dl_tensor.device.device_id = 0 + managed.dl_tensor.device.device_type = device_type + managed.dl_tensor.device.device_id = device_id managed.dl_tensor.ndim = ndim managed.dl_tensor.dtype.code = kDLUInt managed.dl_tensor.dtype.bits = bits diff --git a/include/libavcodec/avcodec.pxd b/include/libavcodec/avcodec.pxd index 1029e92c2..d005938ba 100644 --- a/include/libavcodec/avcodec.pxd +++ b/include/libavcodec/avcodec.pxd @@ -353,7 +353,7 @@ cdef extern from "libavcodec/avcodec.h" nogil: int64_t pkt_dts void *opaque int sample_rate - AVBufferRef *buf[4] + AVBufferRef *buf[8] AVBufferRef **extended_buf int nb_extended_buf From 9426057e799e603a551d14852f6f75d422280d44 Mon Sep 17 00:00:00 2001 From: Kim Minjong Date: Thu, 5 Feb 2026 01:44:22 +0900 Subject: [PATCH 4/9] ruff / isort --- av/dlpack.pxd | 1 + av/hwcontext.pxd | 1 + av/video/frame.py | 56 +++++++++++++++++++++++++++++++++-------------- av/video/plane.py | 54 ++++++++++++++++++++++++++++++++++----------- 4 files changed, 83 insertions(+), 29 deletions(-) diff --git a/av/dlpack.pxd b/av/dlpack.pxd index 3bbfb0ad5..5bcefd3bd 100644 --- a/av/dlpack.pxd +++ b/av/dlpack.pxd @@ -1,5 +1,6 @@ from libc.stdint cimport int64_t, uint8_t, uint16_t, uint64_t + cdef enum DLDeviceType: kDLCPU = 1 kDLCUDA = 2 diff --git a/av/hwcontext.pxd b/av/hwcontext.pxd index 8ff9a2ea4..27d1ef2b6 100644 --- a/av/hwcontext.pxd +++ b/av/hwcontext.pxd @@ -1,5 +1,6 @@ cimport libav as lib + cdef extern from "libavutil/hwcontext.h" nogil: ctypedef struct AVHWFramesContext: const void *av_class diff --git a/av/video/frame.py b/av/video/frame.py index 58a9835ec..833181c4e 100644 --- a/av/video/frame.py +++ b/av/video/frame.py @@ -4,7 +4,7 @@ import cython import cython.cimports.libav as lib from cython.cimports.av.dictionary import Dictionary -from cython.cimports.av.dlpack import DLManagedTensor, kDLCUDA, kDLUInt, kDLCPU +from cython.cimports.av.dlpack import DLManagedTensor, kDLCPU, kDLCUDA, kDLUInt from cython.cimports.av.error import err_check from cython.cimports.av.hwcontext import ( AVHWFramesContext, @@ -28,6 +28,7 @@ _cuda_device_ctx_cache = {} _cuda_frames_ctx_cache = {} + @cython.cfunc def _consume_dlpack(obj: object, stream: object) -> cython.pointer[DLManagedTensor]: capsule: object @@ -40,7 +41,9 @@ def _consume_dlpack(obj: object, stream: object) -> cython.pointer[DLManagedTens if not PyCapsule_IsValid(capsule, b"dltensor"): PyErr_Clear() - raise TypeError("expected a DLPack capsule or an object implementing __dlpack__") + raise TypeError( + "expected a DLPack capsule or an object implementing __dlpack__" + ) managed = cython.cast( cython.pointer[DLManagedTensor], @@ -54,6 +57,7 @@ def _consume_dlpack(obj: object, stream: object) -> cython.pointer[DLManagedTens return managed + @cython.cfunc @cython.nogil @cython.exceptval(check=False) @@ -67,6 +71,7 @@ def _dlpack_avbuffer_free( if managed != cython.NULL: managed.deleter(managed) + @cython.cfunc def _get_cuda_device_ctx( device_id: cython.int, @@ -103,6 +108,7 @@ def _get_cuda_device_ctx( _cuda_device_ctx_cache[key] = cython.cast(cython.size_t, device_ref) return device_ref + @cython.cfunc def _get_cuda_frames_ctx( device_id: cython.int, @@ -398,7 +404,9 @@ def planes(self): frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( cython.pointer[AVHWFramesContext], self.ptr.hw_frames_ctx.data ) - fmt = get_video_format(frames_ctx.sw_format, self.ptr.width, self.ptr.height) + fmt = get_video_format( + frames_ctx.sw_format, self.ptr.width, self.ptr.height + ) max_plane_count: cython.int = 0 for i in range(fmt.ptr.nb_components): @@ -1347,7 +1355,9 @@ def from_dlpack( planes = (planes,) if len(planes) != 2: - raise ValueError("from_dlpack currently supports 2-plane formats only (nv12/p010le/p016le)") + raise ValueError( + "from_dlpack currently supports 2-plane formats only (nv12/p010le/p016le)" + ) sw_fmt: lib.AVPixelFormat = get_pix_fmt(format) nv12 = get_pix_fmt(b"nv12") @@ -1373,7 +1383,9 @@ def from_dlpack( if dev_type0 != dev_type1: raise ValueError("plane tensors must have the same device_type") if dev_type0 not in {kDLCUDA, kDLCPU}: - raise NotImplementedError("only CPU and CUDA DLPack tensors are supported") + raise NotImplementedError( + "only CPU and CUDA DLPack tensors are supported" + ) dev0 = m0.dl_tensor.device.device_id dev1 = m1.dl_tensor.device.device_id @@ -1385,7 +1397,9 @@ def from_dlpack( if device_id is None: device_id = dev0 elif device_id != dev0: - raise ValueError("device_id does not match the DLPack tensor device_id") + raise ValueError( + "device_id does not match the DLPack tensor device_id" + ) else: if device_id not in (None, 0): raise ValueError("device_id must be 0 for CPU tensors") @@ -1445,7 +1459,9 @@ def from_dlpack( raise ValueError("plane 1 must have shape (H/2, W) for 2D UV") if m1.dl_tensor.strides != cython.NULL: if m1.dl_tensor.strides[1] != 1: - raise ValueError("plane 1 must be contiguous in the last dimension") + raise ValueError( + "plane 1 must be contiguous in the last dimension" + ) uv_pitch_elems = cython.cast(int64_t, m1.dl_tensor.strides[0]) else: uv_pitch_elems = cython.cast(int64_t, uv_w) @@ -1457,7 +1473,9 @@ def from_dlpack( raise ValueError("plane 1 must have shape (H/2, W/2, 2) for 3D UV") if m1.dl_tensor.strides != cython.NULL: if m1.dl_tensor.strides[2] != 1 or m1.dl_tensor.strides[1] != 2: - raise ValueError("unexpected UV plane strides for (H/2, W/2, 2)") + raise ValueError( + "unexpected UV plane strides for (H/2, W/2, 2)" + ) uv_pitch_elems = cython.cast(int64_t, m1.dl_tensor.strides[0]) else: uv_pitch_elems = cython.cast(int64_t, width) @@ -1477,7 +1495,9 @@ def from_dlpack( raise TypeError("primary_ctx must be a bool") primary_ctx = bool(primary_ctx) - frames_ref = _get_cuda_frames_ctx(device_id, primary_ctx, sw_fmt, width, height) + frames_ref = _get_cuda_frames_ctx( + device_id, primary_ctx, sw_fmt, width, height + ) frame.ptr.format = get_pix_fmt(b"cuda") frame.ptr.hw_frames_ctx = lib.av_buffer_ref(frames_ref) @@ -1486,12 +1506,12 @@ def from_dlpack( else: frame.ptr.format = sw_fmt - y_ptr = cython.cast(cython.pointer[uint8_t], m0.dl_tensor.data) + cython.cast( - cython.size_t, m0.dl_tensor.byte_offset - ) - uv_ptr = cython.cast(cython.pointer[uint8_t], m1.dl_tensor.data) + cython.cast( - cython.size_t, m1.dl_tensor.byte_offset - ) + y_ptr = cython.cast( + cython.pointer[uint8_t], m0.dl_tensor.data + ) + cython.cast(cython.size_t, m0.dl_tensor.byte_offset) + uv_ptr = cython.cast( + cython.pointer[uint8_t], m1.dl_tensor.data + ) + cython.cast(cython.size_t, m1.dl_tensor.byte_offset) frame.ptr.buf[0] = lib.av_buffer_create( y_ptr, y_size, _dlpack_avbuffer_free, cython.cast(cython.p_void, m0), 0 @@ -1503,7 +1523,11 @@ def from_dlpack( m0 = cython.NULL frame.ptr.buf[1] = lib.av_buffer_create( - uv_ptr, uv_size, _dlpack_avbuffer_free, cython.cast(cython.p_void, m1), 0 + uv_ptr, + uv_size, + _dlpack_avbuffer_free, + cython.cast(cython.p_void, m1), + 0, ) if frame.ptr.buf[1] == cython.NULL: raise MemoryError("av_buffer_create failed for plane 1") diff --git a/av/video/plane.py b/av/video/plane.py index 997dfd09e..543ed3843 100644 --- a/av/video/plane.py +++ b/av/video/plane.py @@ -29,7 +29,9 @@ def __cinit__(self, frame: VideoFrame, index: cython.int): frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( cython.pointer[AVHWFramesContext], frame.ptr.hw_frames_ctx.data ) - fmt = get_video_format(frames_ctx.sw_format, frame.ptr.width, frame.ptr.height) + fmt = get_video_format( + frames_ctx.sw_format, frame.ptr.width, frame.ptr.height + ) if fmt.name == "pal8" and index == 1: self.width = 256 @@ -82,8 +84,12 @@ def __getbuffer__(self, view: cython.pointer[Py_buffer], flags: cython.int): def __dlpack_device__(self): if self.frame.ptr.hw_frames_ctx: - if cython.cast(lib.AVPixelFormat, self.frame.ptr.format) != get_pix_fmt(b"cuda"): - raise NotImplementedError("DLPack export is only implemented for CUDA hw frames") + if cython.cast(lib.AVPixelFormat, self.frame.ptr.format) != get_pix_fmt( + b"cuda" + ): + raise NotImplementedError( + "DLPack export is only implemented for CUDA hw frames" + ) frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( cython.pointer[AVHWFramesContext], self.frame.ptr.hw_frames_ctx.data @@ -97,15 +103,21 @@ def __dlpack_device__(self): def __dlpack__(self, stream=None): if self.frame.ptr.buf[0] == cython.NULL: - raise TypeError("DLPack export requires a refcounted AVFrame (frame.buf[0] is NULL)") + raise TypeError( + "DLPack export requires a refcounted AVFrame (frame.buf[0] is NULL)" + ) device_type: cython.int device_id: cython.int sw_fmt: lib.AVPixelFormat if self.frame.ptr.hw_frames_ctx: - if cython.cast(lib.AVPixelFormat, self.frame.ptr.format) != get_pix_fmt(b"cuda"): - raise NotImplementedError("DLPack export is only implemented for CUDA hw frames") + if cython.cast(lib.AVPixelFormat, self.frame.ptr.format) != get_pix_fmt( + b"cuda" + ): + raise NotImplementedError( + "DLPack export is only implemented for CUDA hw frames" + ) frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( cython.pointer[AVHWFramesContext], self.frame.ptr.hw_frames_ctx.data @@ -122,7 +134,9 @@ def __dlpack__(self, stream=None): line_size = self.line_size if line_size < 0: - raise NotImplementedError("negative linesize is not supported for DLPack export") + raise NotImplementedError( + "negative linesize is not supported for DLPack export" + ) nv12 = get_pix_fmt(b"nv12") p010le = get_pix_fmt(b"p010le") @@ -187,8 +201,12 @@ def __dlpack__(self, stream=None): raise MemoryError("av_frame_alloc() failed") err_check(lib.av_frame_ref(frame_ref, self.frame.ptr)) - shape = cython.cast(cython.pointer[int64_t], malloc(ndim * cython.sizeof(int64_t))) - strides = cython.cast(cython.pointer[int64_t], malloc(ndim * cython.sizeof(int64_t))) + shape = cython.cast( + cython.pointer[int64_t], malloc(ndim * cython.sizeof(int64_t)) + ) + strides = cython.cast( + cython.pointer[int64_t], malloc(ndim * cython.sizeof(int64_t)) + ) if shape == cython.NULL or strides == cython.NULL: if shape != cython.NULL: free(shape) @@ -210,7 +228,9 @@ def __dlpack__(self, stream=None): strides[1] = st1 strides[2] = st2 - ctx = cython.cast(cython.pointer[cython.p_void], malloc(3 * cython.sizeof(cython.p_void))) + ctx = cython.cast( + cython.pointer[cython.p_void], malloc(3 * cython.sizeof(cython.p_void)) + ) if ctx == cython.NULL: free(shape) free(strides) @@ -221,7 +241,9 @@ def __dlpack__(self, stream=None): ctx[1] = cython.cast(cython.p_void, shape) ctx[2] = cython.cast(cython.p_void, strides) - managed = cython.cast(cython.pointer[DLManagedTensor], malloc(cython.sizeof(DLManagedTensor))) + managed = cython.cast( + cython.pointer[DLManagedTensor], malloc(cython.sizeof(DLManagedTensor)) + ) if managed == cython.NULL: free(ctx) free(shape) @@ -243,7 +265,11 @@ def __dlpack__(self, stream=None): managed.deleter = _dlpack_managed_tensor_deleter try: - capsule = PyCapsule_New(cython.cast(cython.p_void, managed), b"dltensor", _dlpack_capsule_destructor) + capsule = PyCapsule_New( + cython.cast(cython.p_void, managed), + b"dltensor", + _dlpack_capsule_destructor, + ) except Exception: _dlpack_managed_tensor_deleter(managed) raise @@ -254,7 +280,9 @@ def __dlpack__(self, stream=None): @cython.cfunc @cython.nogil @cython.exceptval(check=False) -def _dlpack_managed_tensor_deleter(managed: cython.pointer[DLManagedTensor]) -> cython.void: +def _dlpack_managed_tensor_deleter( + managed: cython.pointer[DLManagedTensor], +) -> cython.void: ctx: cython.pointer[cython.p_void] frame_ref: cython.pointer[lib.AVFrame] shape: cython.pointer[int64_t] From b22e7a5e81c67bf07f2efb5a0706ccc259ff7b0a Mon Sep 17 00:00:00 2001 From: Kim Minjong Date: Thu, 5 Feb 2026 11:16:20 +0900 Subject: [PATCH 5/9] Merge av/hwcontext.pxd into include/libavutil/avutil.pxd --- av/hwcontext.pxd | 16 ---------------- av/video/frame.py | 21 ++++++++------------- av/video/plane.py | 13 ++++++------- include/libavcodec/avcodec.pxd | 3 ++- include/libavutil/avutil.pxd | 23 ++++++++++++++++++++++- include/libswscale/swscale.pxd | 1 + 6 files changed, 39 insertions(+), 38 deletions(-) delete mode 100644 av/hwcontext.pxd diff --git a/av/hwcontext.pxd b/av/hwcontext.pxd deleted file mode 100644 index 27d1ef2b6..000000000 --- a/av/hwcontext.pxd +++ /dev/null @@ -1,16 +0,0 @@ -cimport libav as lib - - -cdef extern from "libavutil/hwcontext.h" nogil: - ctypedef struct AVHWFramesContext: - const void *av_class - lib.AVBufferRef *device_ref - void *device_ctx - void *hwctx - lib.AVPixelFormat format - lib.AVPixelFormat sw_format - int width - int height - - lib.AVBufferRef *av_hwframe_ctx_alloc(lib.AVBufferRef *device_ref) - int av_hwframe_ctx_init(lib.AVBufferRef *ref) diff --git a/av/video/frame.py b/av/video/frame.py index 833181c4e..97cd351c1 100644 --- a/av/video/frame.py +++ b/av/video/frame.py @@ -6,11 +6,6 @@ from cython.cimports.av.dictionary import Dictionary from cython.cimports.av.dlpack import DLManagedTensor, kDLCPU, kDLCUDA, kDLUInt from cython.cimports.av.error import err_check -from cython.cimports.av.hwcontext import ( - AVHWFramesContext, - av_hwframe_ctx_alloc, - av_hwframe_ctx_init, -) from cython.cimports.av.sidedata.sidedata import get_display_rotation from cython.cimports.av.utils import check_ndarray from cython.cimports.av.video.format import get_pix_fmt, get_video_format @@ -126,19 +121,19 @@ def _get_cuda_frames_ctx( ) device_ref = _get_cuda_device_ctx(device_id, primary_ctx) - frames_ref = av_hwframe_ctx_alloc(device_ref) + frames_ref = lib.av_hwframe_ctx_alloc(device_ref) if frames_ref == cython.NULL: raise MemoryError("av_hwframe_ctx_alloc() failed") try: - frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( - cython.pointer[AVHWFramesContext], frames_ref.data + frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast( + cython.pointer[lib.AVHWFramesContext], frames_ref.data ) frames_ctx.format = get_pix_fmt(b"cuda") frames_ctx.sw_format = sw_fmt frames_ctx.width = width frames_ctx.height = height - err_check(av_hwframe_ctx_init(frames_ref)) + err_check(lib.av_hwframe_ctx_init(frames_ref)) except Exception: lib.av_buffer_unref(cython.address(frames_ref)) raise @@ -401,8 +396,8 @@ def planes(self): # the library implementation does not set the last plane to NULL. fmt = self.format if self.ptr.hw_frames_ctx: - frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( - cython.pointer[AVHWFramesContext], self.ptr.hw_frames_ctx.data + frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast( + cython.pointer[lib.AVHWFramesContext], self.ptr.hw_frames_ctx.data ) fmt = get_video_format( frames_ctx.sw_format, self.ptr.width, self.ptr.height @@ -595,8 +590,8 @@ def to_ndarray(self, channel_last=False, **kwargs): """ kwargs2 = dict(kwargs) if self.ptr.hw_frames_ctx and "format" not in kwargs2: - frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( - cython.pointer[AVHWFramesContext], self.ptr.hw_frames_ctx.data + frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast( + cython.pointer[lib.AVHWFramesContext], self.ptr.hw_frames_ctx.data ) kwargs2["format"] = get_video_format( frames_ctx.sw_format, self.ptr.width, self.ptr.height diff --git a/av/video/plane.py b/av/video/plane.py index 543ed3843..27569447c 100644 --- a/av/video/plane.py +++ b/av/video/plane.py @@ -3,7 +3,6 @@ from cython.cimports.av.buffer import Buffer from cython.cimports.av.dlpack import DLManagedTensor, kDLCPU, kDLCUDA, kDLUInt from cython.cimports.av.error import err_check -from cython.cimports.av.hwcontext import AVHWFramesContext from cython.cimports.av.video.format import get_pix_fmt, get_video_format from cython.cimports.av.video.frame import VideoFrame from cython.cimports.cpython import PyBUF_WRITABLE, PyBuffer_FillInfo @@ -26,8 +25,8 @@ def __cinit__(self, frame: VideoFrame, index: cython.int): # The palette plane has no associated component or linesize; set fields manually fmt = frame.format if frame.ptr.hw_frames_ctx: - frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( - cython.pointer[AVHWFramesContext], frame.ptr.hw_frames_ctx.data + frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast( + cython.pointer[lib.AVHWFramesContext], frame.ptr.hw_frames_ctx.data ) fmt = get_video_format( frames_ctx.sw_format, frame.ptr.width, frame.ptr.height @@ -91,8 +90,8 @@ def __dlpack_device__(self): "DLPack export is only implemented for CUDA hw frames" ) - frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( - cython.pointer[AVHWFramesContext], self.frame.ptr.hw_frames_ctx.data + frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast( + cython.pointer[lib.AVHWFramesContext], self.frame.ptr.hw_frames_ctx.data ) device_id = _hwreg.lookup_cuda_device_id( cython.cast(cython.size_t, frames_ctx.device_ref.data) @@ -119,8 +118,8 @@ def __dlpack__(self, stream=None): "DLPack export is only implemented for CUDA hw frames" ) - frames_ctx: cython.pointer[AVHWFramesContext] = cython.cast( - cython.pointer[AVHWFramesContext], self.frame.ptr.hw_frames_ctx.data + frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast( + cython.pointer[lib.AVHWFramesContext], self.frame.ptr.hw_frames_ctx.data ) sw_fmt = frames_ctx.sw_format device_type = kDLCUDA diff --git a/include/libavcodec/avcodec.pxd b/include/libavcodec/avcodec.pxd index d005938ba..a0a79df8d 100644 --- a/include/libavcodec/avcodec.pxd +++ b/include/libavcodec/avcodec.pxd @@ -1,4 +1,5 @@ -from libc.stdint cimport int64_t, uint16_t, uint32_t, uint8_t +from libc.stdint cimport int64_t, uint8_t, uint16_t, uint32_t + cdef extern from "libavcodec/packet.h" nogil: const AVPacketSideData *av_packet_side_data_get( diff --git a/include/libavutil/avutil.pxd b/include/libavutil/avutil.pxd index fe9085826..ffee28a38 100644 --- a/include/libavutil/avutil.pxd +++ b/include/libavutil/avutil.pxd @@ -1,4 +1,12 @@ -from libc.stdint cimport int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t +from libc.stdint cimport ( + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) cdef extern from "libavutil/audio_fifo.h" nogil: @@ -186,12 +194,25 @@ cdef extern from "libavutil/hwcontext.h" nogil: AV_HWDEVICE_TYPE_VULKAN AV_HWDEVICE_TYPE_D3D12VA + ctypedef struct AVHWFramesContext: + const void *av_class + AVBufferRef *device_ref + void *device_ctx + void *hwctx + AVPixelFormat format + AVPixelFormat sw_format + int width + int height + cdef int av_hwdevice_ctx_create(AVBufferRef **device_ctx, AVHWDeviceType type, const char *device, AVDictionary *opts, int flags) cdef AVHWDeviceType av_hwdevice_find_type_by_name(const char *name) cdef const char *av_hwdevice_get_type_name(AVHWDeviceType type) cdef AVHWDeviceType av_hwdevice_iterate_types(AVHWDeviceType prev) cdef int av_hwframe_transfer_data(AVFrame *dst, const AVFrame *src, int flags) + cdef AVBufferRef *av_hwframe_ctx_alloc(AVBufferRef *device_ref) + cdef int av_hwframe_ctx_init(AVBufferRef *ref) + cdef extern from "libavutil/imgutils.h" nogil: cdef int av_image_alloc( uint8_t *pointers[4], diff --git a/include/libswscale/swscale.pxd b/include/libswscale/swscale.pxd index ffc0eb6b0..ca84dceef 100644 --- a/include/libswscale/swscale.pxd +++ b/include/libswscale/swscale.pxd @@ -1,5 +1,6 @@ from libc.stdint cimport uint8_t + cdef extern from "libswscale/swscale.h" nogil: cdef int swscale_version() cdef char* swscale_configuration() From f713f95887653301c9fca31f8d5630c6eeda9ab9 Mon Sep 17 00:00:00 2001 From: Kim Minjong Date: Thu, 5 Feb 2026 11:38:39 +0900 Subject: [PATCH 6/9] Move av/dlpack.pxd to include/dlpack.pxd --- av/video/frame.py | 2 +- av/video/plane.py | 2 +- {av => include}/dlpack.pxd | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename {av => include}/dlpack.pxd (100%) diff --git a/av/video/frame.py b/av/video/frame.py index 97cd351c1..560fdc795 100644 --- a/av/video/frame.py +++ b/av/video/frame.py @@ -4,7 +4,6 @@ import cython import cython.cimports.libav as lib from cython.cimports.av.dictionary import Dictionary -from cython.cimports.av.dlpack import DLManagedTensor, kDLCPU, kDLCUDA, kDLUInt from cython.cimports.av.error import err_check from cython.cimports.av.sidedata.sidedata import get_display_rotation from cython.cimports.av.utils import check_ndarray @@ -16,6 +15,7 @@ PyCapsule_IsValid, PyCapsule_SetName, ) +from cython.cimports.dlpack import DLManagedTensor, kDLCPU, kDLCUDA, kDLUInt from cython.cimports.libc.stdint import int64_t, uint8_t import av._hwdevice_registry as _hwreg diff --git a/av/video/plane.py b/av/video/plane.py index 27569447c..953782608 100644 --- a/av/video/plane.py +++ b/av/video/plane.py @@ -1,7 +1,6 @@ import cython import cython.cimports.libav as lib from cython.cimports.av.buffer import Buffer -from cython.cimports.av.dlpack import DLManagedTensor, kDLCPU, kDLCUDA, kDLUInt from cython.cimports.av.error import err_check from cython.cimports.av.video.format import get_pix_fmt, get_video_format from cython.cimports.av.video.frame import VideoFrame @@ -13,6 +12,7 @@ PyCapsule_New, ) from cython.cimports.cpython.ref import PyObject +from cython.cimports.dlpack import DLManagedTensor, kDLCPU, kDLCUDA, kDLUInt from cython.cimports.libc.stdint import int64_t from cython.cimports.libc.stdlib import free, malloc diff --git a/av/dlpack.pxd b/include/dlpack.pxd similarity index 100% rename from av/dlpack.pxd rename to include/dlpack.pxd From d87422ab4fae79261e4e2bbb497b8e26ea91880c Mon Sep 17 00:00:00 2001 From: Kim Minjong Date: Thu, 5 Feb 2026 13:43:43 +0900 Subject: [PATCH 7/9] Add tests/test_dlpack.py --- tests/test_dlpack.py | 502 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 502 insertions(+) create mode 100644 tests/test_dlpack.py diff --git a/tests/test_dlpack.py b/tests/test_dlpack.py new file mode 100644 index 000000000..8af2fd8a3 --- /dev/null +++ b/tests/test_dlpack.py @@ -0,0 +1,502 @@ +import gc + +import numpy +import pytest + +import av +from av import VideoFrame +from av.codec.hwaccel import HWAccel + +from .common import assertNdarraysEqual, fate_png + + +def _make_u8(shape: tuple[int, ...]) -> numpy.ndarray: + return numpy.arange(int(numpy.prod(shape)), dtype=numpy.uint8).reshape(shape) + + +def _make_u16(shape: tuple[int, ...]) -> numpy.ndarray: + return numpy.arange(int(numpy.prod(shape)), dtype=numpy.uint16).reshape(shape) + + +def _plane_to_2d(plane, height: int, width: int, dtype) -> numpy.ndarray: + itemsize = numpy.dtype(dtype).itemsize + assert plane.line_size % itemsize == 0 + pitch_elems = plane.line_size // itemsize + arr = numpy.frombuffer(memoryview(plane), dtype=dtype).reshape(height, pitch_elems) + return arr[:, :width] + + +def _get_cuda_backend(): + try: + import cupy # type: ignore + + try: + if cupy.cuda.runtime.getDeviceCount() > 0: + return ("cupy", cupy) + except Exception: + pass + except Exception: + pass + + try: + import torch # type: ignore + + if torch.cuda.is_available(): + return ("torch", torch) + except Exception: + pass + + return None + + +def test_hwdevice_registry_register_and_lookup() -> None: + import av._hwdevice_registry as hwreg + + ptr = 0x1234_5678_9ABC_DEF0 + hwreg.register_cuda_hwdevice_data_ptr(ptr, 7) + assert hwreg.lookup_cuda_device_id(ptr) == 7 + assert hwreg.lookup_cuda_device_id(0) == 0 + assert hwreg.lookup_cuda_device_id(ptr + 1) == 0 + + +def test_hwaccel_output_format_validation_and_primary_ctx() -> None: + hw = HWAccel(device_type="cuda", output_format=None) + assert hw.output_format == "sw" + assert "primary_ctx" not in hw.options + + hw = HWAccel(device_type="cuda", output_format="hw") + assert hw.output_format == "hw" + assert hw.options.get("primary_ctx") == "1" + + hw = HWAccel(device_type="cuda", output_format="hw", options={"primary_ctx": "0"}) + assert hw.options.get("primary_ctx") == "0" + + hw = HWAccel(device_type="cuda", device=0, output_format="hw") + assert hw.output_format == "hw" + + with pytest.raises(ValueError, match="output_format must be 'sw' or 'hw'"): + HWAccel(device_type="cuda", output_format="invalid") # type: ignore[arg-type] + + +def test_video_frame_from_dlpack_nv12_cpu_basic_zero_copy_and_lifetime() -> None: + width, height = 64, 48 + y = _make_u8((height, width)) + uv = _make_u8((height // 2, width // 2, 2)) + + frame = VideoFrame.from_dlpack((y, uv), format="nv12") + + assert frame.format.name == "nv12" + assert frame.width == width + assert frame.height == height + assert len(frame.planes) == 2 + assert frame.planes[0].width == width + assert frame.planes[0].height == height + assert frame.planes[1].width == width // 2 + assert frame.planes[1].height == height // 2 + assert frame.planes[0].line_size == width + assert frame.planes[1].line_size == width + + y_plane = _plane_to_2d(frame.planes[0], height, width, numpy.uint8) + uv_plane = _plane_to_2d(frame.planes[1], height // 2, width, numpy.uint8) + assertNdarraysEqual(y_plane, y) + assertNdarraysEqual(uv_plane, uv.reshape(height // 2, width)) + + y[0, 0] = 123 + uv[0, 0, 0] = 11 + uv[0, 0, 1] = 22 + + expected_y_bytes = y.tobytes() + expected_uv_bytes = uv.reshape(height // 2, width).tobytes() + + assert memoryview(frame.planes[0])[0] == 123 + assert memoryview(frame.planes[1])[0] == 11 + assert memoryview(frame.planes[1])[1] == 22 + + del y + del uv + gc.collect() + + assert bytes(frame.planes[0]) == expected_y_bytes + assert bytes(frame.planes[1]) == expected_uv_bytes + + +def test_video_frame_from_dlpack_nv12_cpu_with_pitch_and_dlpack_export() -> None: + width, height = 64, 48 + pad = 16 + + y_base = _make_u8((height, width + pad)) + y = y_base[:, :width] + uv_base = _make_u8((height // 2, (width + pad) // 2, 2)) + uv = uv_base[:, : width // 2, :] + + frame = VideoFrame.from_dlpack((y, uv), format="nv12") + + assert frame.planes[0].line_size == width + pad + assert frame.planes[1].line_size == width + pad + assert frame.planes[0].buffer_size == (width + pad) * height + assert frame.planes[1].buffer_size == (width + pad) * (height // 2) + + y_plane = _plane_to_2d(frame.planes[0], height, width, numpy.uint8) + uv_plane = _plane_to_2d(frame.planes[1], height // 2, width, numpy.uint8) + assertNdarraysEqual(y_plane, y) + assertNdarraysEqual(uv_plane, uv.reshape(height // 2, width)) + + assert frame.planes[0].__dlpack_device__() == (1, 0) + + y_dl = numpy.from_dlpack(frame.planes[0]) + uv_dl = numpy.from_dlpack(frame.planes[1]) + + assert y_dl.shape == (height, width) + assert y_dl.dtype == numpy.uint8 + assert y_dl.strides == (width + pad, 1) + assertNdarraysEqual(y_dl, y) + + assert uv_dl.shape == (height // 2, width // 2, 2) + assert uv_dl.dtype == numpy.uint8 + assert uv_dl.strides == (width + pad, 2, 1) + assertNdarraysEqual(uv_dl, uv) + + expected_y = numpy.array(y, copy=True) + expected_uv = numpy.array(uv, copy=True) + + del frame + del y + del uv + del y_base + del uv_base + gc.collect() + + assertNdarraysEqual(y_dl, expected_y) + assertNdarraysEqual(uv_dl, expected_uv) + + +def test_video_frame_from_dlpack_nv12_cpu_accepts_uv_2d() -> None: + width, height = 64, 48 + y = _make_u8((height, width)) + uv2d = _make_u8((height // 2, width)) + + frame = VideoFrame.from_dlpack((y, uv2d), format="nv12") + + uv_plane = _plane_to_2d(frame.planes[1], height // 2, width, numpy.uint8) + assertNdarraysEqual(uv_plane, uv2d) + + uv_dl = numpy.from_dlpack(frame.planes[1]) + assert uv_dl.shape == (height // 2, width // 2, 2) + assertNdarraysEqual(uv_dl, uv2d.reshape(height // 2, width // 2, 2)) + + +def test_video_frame_from_dlpack_accepts_video_plane_objects() -> None: + width, height = 64, 48 + y = _make_u8((height, width)) + uv = _make_u8((height // 2, width // 2, 2)) + + frame1 = VideoFrame.from_dlpack((y, uv), format="nv12") + frame2 = VideoFrame.from_dlpack((frame1.planes[0], frame1.planes[1]), format="nv12") + + assert bytes(frame2.planes[0]) == bytes(frame1.planes[0]) + assert bytes(frame2.planes[1]) == bytes(frame1.planes[1]) + + +@pytest.mark.parametrize("fmt", ["p010le", "p016le"]) +def test_video_frame_from_dlpack_p010_p016_cpu(fmt: str) -> None: + width, height = 64, 48 + y = _make_u16((height, width)) + uv = _make_u16((height // 2, width // 2, 2)) + + frame = VideoFrame.from_dlpack((y, uv), format=fmt) + + assert frame.format.name == fmt + assert len(frame.planes) == 2 + assert frame.planes[0].line_size == width * 2 + assert frame.planes[1].line_size == width * 2 + + y_plane = _plane_to_2d(frame.planes[0], height, width, numpy.uint16) + uv_plane = _plane_to_2d(frame.planes[1], height // 2, width, numpy.uint16) + assertNdarraysEqual(y_plane, y) + assertNdarraysEqual(uv_plane, uv.reshape(height // 2, width)) + + y_dl = numpy.from_dlpack(frame.planes[0]) + uv_dl = numpy.from_dlpack(frame.planes[1]) + + assert y_dl.dtype == numpy.uint16 + assert y_dl.shape == (height, width) + assert y_dl.strides == (width * 2, 2) + assertNdarraysEqual(y_dl, y) + + assert uv_dl.dtype == numpy.uint16 + assert uv_dl.shape == (height // 2, width // 2, 2) + assert uv_dl.strides == (width * 2, 4, 2) + assertNdarraysEqual(uv_dl, uv) + + +def test_video_plane_dlpack_export_keeps_frame_alive_after_gc() -> None: + container = av.open(fate_png()) + frame = next(container.decode(video=0)) + frame_nv12 = frame.reformat(format="nv12") + + width = frame_nv12.width + height = frame_nv12.height + line_size = frame_nv12.planes[0].line_size + expected = _plane_to_2d(frame_nv12.planes[0], height, width, numpy.uint8).copy() + + y_dl = numpy.from_dlpack(frame_nv12.planes[0]) + assert y_dl.shape == (height, width) + assert y_dl.strides == (line_size, 1) + + del frame_nv12 + del frame + del container + gc.collect() + + assertNdarraysEqual(y_dl, expected) + + +def test_video_plane_dlpack_unsupported_format_raises() -> None: + rgb = numpy.zeros((16, 16, 3), dtype=numpy.uint8) + frame = VideoFrame.from_ndarray(rgb, format="rgb24") + assert frame.planes[0].__dlpack_device__() == (1, 0) + + with pytest.raises( + NotImplementedError, match="unsupported sw_format for DLPack export" + ): + frame.planes[0].__dlpack__() + + +def test_video_frame_from_dlpack_requires_two_planes() -> None: + y = numpy.zeros((4, 4), dtype=numpy.uint8) + with pytest.raises(ValueError, match="2-plane"): + VideoFrame.from_dlpack(y, format="nv12") + + +def test_video_frame_from_dlpack_rejects_unsupported_format() -> None: + width, height = 64, 48 + y = numpy.zeros((height, width), dtype=numpy.uint8) + uv = numpy.zeros((height // 2, width // 2, 2), dtype=numpy.uint8) + + with pytest.raises(NotImplementedError, match="supports nv12, p010le, p016le only"): + VideoFrame.from_dlpack((y, uv), format="yuv420p") + + +def test_video_frame_from_dlpack_rejects_device_id_for_cpu() -> None: + width, height = 64, 48 + y = numpy.zeros((height, width), dtype=numpy.uint8) + uv = numpy.zeros((height // 2, width // 2, 2), dtype=numpy.uint8) + + with pytest.raises(ValueError, match="device_id must be 0 for CPU tensors"): + VideoFrame.from_dlpack((y, uv), format="nv12", device_id=1) + + +def test_video_frame_from_dlpack_requires_both_width_height_or_neither() -> None: + width, height = 64, 48 + y = numpy.zeros((height, width), dtype=numpy.uint8) + uv = numpy.zeros((height // 2, width // 2, 2), dtype=numpy.uint8) + + with pytest.raises(ValueError, match="either specify both width/height or neither"): + VideoFrame.from_dlpack((y, uv), format="nv12", width=width, height=0) + + +def test_video_frame_from_dlpack_rejects_plane0_shape_mismatch_with_width_height() -> ( + None +): + width, height = 64, 48 + y = numpy.zeros((height, width), dtype=numpy.uint8) + uv = numpy.zeros((height // 2, width // 2, 2), dtype=numpy.uint8) + + with pytest.raises(ValueError, match="plane 0 shape does not match width/height"): + VideoFrame.from_dlpack((y, uv), format="nv12", width=width + 2, height=height) + + +def test_video_frame_from_dlpack_rejects_odd_dimensions() -> None: + width, height = 63, 48 + y = numpy.zeros((height, width), dtype=numpy.uint8) + uv = numpy.zeros((height // 2, width), dtype=numpy.uint8) + + with pytest.raises(ValueError, match="width/height must be even"): + VideoFrame.from_dlpack((y, uv), format="nv12") + + +def test_video_frame_from_dlpack_rejects_noncontiguous_plane0_last_dim() -> None: + width, height = 64, 48 + y_full = numpy.zeros((height, width * 2), dtype=numpy.uint8) + y = y_full[:, ::2] + uv = numpy.zeros((height // 2, width // 2, 2), dtype=numpy.uint8) + + with pytest.raises( + ValueError, match="plane 0 must be contiguous in the last dimension" + ): + VideoFrame.from_dlpack((y, uv), format="nv12") + + +def test_video_frame_from_dlpack_rejects_noncontiguous_uv_plane_last_dim_2d() -> None: + width, height = 64, 48 + y = numpy.zeros((height, width), dtype=numpy.uint8) + uv_full = numpy.zeros((height // 2, width * 2), dtype=numpy.uint8) + uv = uv_full[:, ::2] + + with pytest.raises( + ValueError, match="plane 1 must be contiguous in the last dimension" + ): + VideoFrame.from_dlpack((y, uv), format="nv12") + + +def test_video_frame_from_dlpack_rejects_unexpected_uv_strides_3d() -> None: + width, height = 64, 48 + y = numpy.zeros((height, width), dtype=numpy.uint8) + uv_full = numpy.zeros((height // 2, width // 2, 4), dtype=numpy.uint8) + uv = uv_full[:, :, :2] + + with pytest.raises(ValueError, match="unexpected UV plane strides"): + VideoFrame.from_dlpack((y, uv), format="nv12") + + +def test_video_frame_from_dlpack_rejects_wrong_dtype_plane0() -> None: + width, height = 64, 48 + y = numpy.zeros((height, width), dtype=numpy.uint16) + uv = numpy.zeros((height // 2, width // 2, 2), dtype=numpy.uint8) + + with pytest.raises(TypeError, match="unexpected dtype for plane 0"): + VideoFrame.from_dlpack((y, uv), format="nv12") + + +def test_video_frame_from_dlpack_rejects_wrong_dtype_plane1() -> None: + width, height = 64, 48 + y = numpy.zeros((height, width), dtype=numpy.uint8) + uv = numpy.zeros((height // 2, width // 2, 2), dtype=numpy.uint16) + + with pytest.raises(TypeError, match="unexpected dtype for plane 1"): + VideoFrame.from_dlpack((y, uv), format="nv12") + + +def test_video_frame_from_dlpack_p010le_requires_uint16() -> None: + width, height = 64, 48 + y = numpy.zeros((height, width), dtype=numpy.uint8) + uv = numpy.zeros((height // 2, width // 2, 2), dtype=numpy.uint8) + + with pytest.raises(TypeError, match="unexpected dtype for plane 0"): + VideoFrame.from_dlpack((y, uv), format="p010le") + + +def test_video_frame_from_dlpack_rejects_plane0_ndim_not_2() -> None: + y = numpy.zeros((4, 4, 1), dtype=numpy.uint8) + uv = numpy.zeros((2, 4), dtype=numpy.uint8) + + with pytest.raises(ValueError, match="plane 0 must be 2D"): + VideoFrame.from_dlpack((y, uv), format="nv12", width=4, height=4) + + +def test_video_frame_from_dlpack_rejects_plane1_ndim_not_2_or_3() -> None: + width, height = 64, 48 + y = numpy.zeros((height, width), dtype=numpy.uint8) + uv = numpy.zeros((height // 2, width, 1, 1), dtype=numpy.uint8) + + with pytest.raises(ValueError, match="plane 1 must be 2D or 3D"): + VideoFrame.from_dlpack((y, uv), format="nv12") + + +def test_video_frame_from_dlpack_reusing_capsule_raises_typeerror() -> None: + width, height = 64, 48 + y = numpy.zeros((height, width), dtype=numpy.uint8) + uv = numpy.zeros((height // 2, width // 2, 2), dtype=numpy.uint8) + + cap0 = y.__dlpack__() + cap1 = uv.__dlpack__() + + VideoFrame.from_dlpack((cap0, cap1), format="nv12", width=width, height=height) + + with pytest.raises(TypeError, match="expected a DLPack capsule"): + VideoFrame.from_dlpack((cap0, cap1), format="nv12", width=width, height=height) + + +def test_video_frame_from_dlpack_invalid_plane_object_raises_typeerror() -> None: + with pytest.raises(TypeError, match="expected a DLPack capsule"): + VideoFrame.from_dlpack((object(), object()), format="nv12", width=64, height=48) + + +def test_video_frame_from_dlpack_cuda_hw_frame_behavior_if_available() -> None: + backend = _get_cuda_backend() + if backend is None: + pytest.skip("CUDA backend (cupy/torch) not available.") + + width, height = 64, 48 + name, mod = backend + + try: + if name == "cupy": + try: + ndev = int(mod.cuda.runtime.getDeviceCount()) + except Exception: + ndev = 1 + + device_id = 1 if ndev > 1 else 0 + with mod.cuda.Device(device_id): + y = mod.arange(height * width, dtype=mod.uint8).reshape(height, width) + uv = mod.arange( + (height // 2) * (width // 2) * 2, dtype=mod.uint8 + ).reshape(height // 2, width // 2, 2) + expected_device = y.__dlpack_device__() + frame = VideoFrame.from_dlpack((y, uv), format="nv12") + + assert frame.format.name == "cuda" + assert len(frame.planes) == 2 + + with pytest.raises( + TypeError, match="Hardware frame planes do not support" + ): + memoryview(frame.planes[0]) + + assert frame.planes[0].__dlpack_device__() == expected_device + + cap_y = frame.planes[0].__dlpack__() + if hasattr(mod, "fromDlpack"): + y2 = mod.fromDlpack(cap_y) + else: + y2 = mod.from_dlpack(cap_y) + + assert y2.shape == y.shape + assert mod.all(y2 == y).item() + + with pytest.raises( + ValueError, + match="Cannot convert a hardware frame to numpy directly", + ): + frame.to_ndarray(format="cuda") + + else: + try: + ndev = int(mod.cuda.device_count()) + except Exception: + ndev = 1 + + device_id = 1 if ndev > 1 else 0 + device = f"cuda:{device_id}" + + y = mod.arange(height * width, device=device, dtype=mod.uint8).reshape( + height, width + ) + uv = mod.arange( + (height // 2) * (width // 2) * 2, device=device, dtype=mod.uint8 + ).reshape(height // 2, width // 2, 2) + + expected_device = y.__dlpack_device__() + frame = VideoFrame.from_dlpack((y, uv), format="nv12") + + assert frame.format.name == "cuda" + assert len(frame.planes) == 2 + + with pytest.raises(TypeError, match="Hardware frame planes do not support"): + memoryview(frame.planes[0]) + + assert frame.planes[0].__dlpack_device__() == expected_device + + cap_y = frame.planes[0].__dlpack__() + y2 = mod.utils.dlpack.from_dlpack(cap_y) + + assert tuple(y2.shape) == tuple(y.shape) + assert mod.equal(y2, y) + + with pytest.raises( + ValueError, match="Cannot convert a hardware frame to numpy directly" + ): + frame.to_ndarray(format="cuda") + except av.FFmpegError as e: + pytest.skip(f"CUDA hwcontext not available in this build/runtime: {e}") From 0af7dcfa8155a84a7c174ae4337ced26c30c1ad8 Mon Sep 17 00:00:00 2001 From: Kim Minjong Date: Thu, 5 Feb 2026 13:44:21 +0900 Subject: [PATCH 8/9] Fix interfaces --- av/codec/hwaccel.py | 2 +- av/codec/hwaccel.pyi | 8 ++++++-- av/video/plane.py | 4 +++- av/video/plane.pyi | 4 ++++ 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/av/codec/hwaccel.py b/av/codec/hwaccel.py index cea0eb1e0..e717bb070 100644 --- a/av/codec/hwaccel.py +++ b/av/codec/hwaccel.py @@ -112,7 +112,7 @@ def __init__( allow_software_fallback=True, options=None, flags=None, - output_format="sw", + output_format=None, ): if isinstance(device, int): device = str(device) diff --git a/av/codec/hwaccel.pyi b/av/codec/hwaccel.pyi index 8bdc0a6e0..549cc7eaf 100644 --- a/av/codec/hwaccel.pyi +++ b/av/codec/hwaccel.pyi @@ -1,5 +1,5 @@ from enum import IntEnum -from typing import cast +from typing import Literal, cast from av.codec.codec import Codec from av.video.format import VideoFormat @@ -37,13 +37,17 @@ class HWConfig: def is_supported(self) -> bool: ... class HWAccel: + output_format: Literal["sw", "hw"] + options: dict[str, object] + def __init__( self, device_type: str | HWDeviceType, - device: str | None = None, + device: str | int | None = None, allow_software_fallback: bool = False, options: dict[str, object] | None = None, flags: int | None = None, + output_format: Literal["sw", "hw"] | None = None, ) -> None: ... def create(self, codec: Codec) -> HWAccel: ... diff --git a/av/video/plane.py b/av/video/plane.py index 953782608..771ffe031 100644 --- a/av/video/plane.py +++ b/av/video/plane.py @@ -1,3 +1,5 @@ +from typing import Any + import cython import cython.cimports.libav as lib from cython.cimports.av.buffer import Buffer @@ -100,7 +102,7 @@ def __dlpack_device__(self): return (kDLCPU, 0) - def __dlpack__(self, stream=None): + def __dlpack__(self, stream: int | Any | None = None): if self.frame.ptr.buf[0] == cython.NULL: raise TypeError( "DLPack export requires a refcounted AVFrame (frame.buf[0] is NULL)" diff --git a/av/video/plane.pyi b/av/video/plane.pyi index e4a0a206c..fcbf8e6ed 100644 --- a/av/video/plane.pyi +++ b/av/video/plane.pyi @@ -1,3 +1,5 @@ +from types import CapsuleType + from av.plane import Plane from .frame import VideoFrame @@ -9,3 +11,5 @@ class VideoPlane(Plane): buffer_size: int def __init__(self, frame: VideoFrame, index: int) -> None: ... + def __dlpack_device__(self) -> tuple[int, int]: ... + def __dlpack__(self, *, stream: int | None = None) -> CapsuleType: ... From a4a03ae2b2570699c6fed8b4f7d6a0c7c1bf8fb1 Mon Sep 17 00:00:00 2001 From: Kim Minjong Date: Thu, 5 Feb 2026 17:44:57 +0900 Subject: [PATCH 9/9] Create VideoFrame using av_frame_get_buffer instead of av_image_alloc --- av/video/frame.py | 59 +++++++++++++++++++++++++----------- include/libavutil/avutil.pxd | 1 + 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/av/video/frame.py b/av/video/frame.py index 560fdc795..7914b1aa6 100644 --- a/av/video/frame.py +++ b/av/video/frame.py @@ -15,6 +15,7 @@ PyCapsule_IsValid, PyCapsule_SetName, ) +from cython.cimports.cpython.ref import Py_DECREF, Py_INCREF, PyObject from cython.cimports.dlpack import DLManagedTensor, kDLCPU, kDLCUDA, kDLUInt from cython.cimports.libc.stdint import int64_t, uint8_t @@ -67,6 +68,18 @@ def _dlpack_avbuffer_free( managed.deleter(managed) +@cython.cfunc +@cython.nogil +@cython.exceptval(check=False) +def _numpy_avbuffer_free( + opaque: cython.p_void, + data: cython.pointer[uint8_t], +) -> cython.void: + if opaque != cython.NULL: + with cython.gil: + Py_DECREF(cython.cast(object, opaque)) + + @cython.cfunc def _get_cuda_device_ctx( device_id: cython.int, @@ -355,10 +368,7 @@ def _init(self, format: lib.AVPixelFormat, width: cython.uint, height: cython.ui # We enforce aligned buffers, otherwise `sws_scale` can perform # poorly or even cause out-of-bounds reads and writes. if width and height: - res = lib.av_image_alloc( - self.ptr.data, self.ptr.linesize, width, height, format, 16 - ) - self._buffer = self.ptr.data[0] + res = lib.av_frame_get_buffer(self.ptr, 16) if res: err_check(res) @@ -376,7 +386,7 @@ def _init_user_attributes(self): def __dealloc__(self): # The `self._buffer` member is only set if *we* allocated the buffer in `_init`, # as opposed to a buffer allocated by a decoder. - lib.av_freep(cython.address(self._buffer)) + lib.av_frame_unref(self.ptr) # Let go of the reference from the numpy buffers if we made one self._np_buffer = None @@ -1015,9 +1025,9 @@ def from_numpy_buffer(array, format="rgb24", width=0): return frame def _image_fill_pointers_numpy(self, buffer, width, height, linesizes, format): - c_format: lib.AVPixelFormat - c_ptr: cython.pointer[uint8_t] - c_data: cython.size_t + c_data: cython.size_t = buffer.ctypes.data + c_ptr: cython.pointer[uint8_t] = cython.cast(cython.pointer[uint8_t], c_data) + c_format: lib.AVPixelFormat = get_pix_fmt(format) # If you want to use the numpy notation, then you need to include the following lines at the top of the file: # cimport numpy as cnp @@ -1038,26 +1048,41 @@ def _image_fill_pointers_numpy(self, buffer, width, height, linesizes, format): c_data = buffer.ctypes.data c_ptr = cython.cast(cython.pointer[uint8_t], c_data) c_format = get_pix_fmt(format) - lib.av_freep(cython.address(self._buffer)) + lib.av_frame_unref(self.ptr) + self._np_buffer = None # Hold on to a reference for the numpy buffer so that it doesn't get accidentally garbage collected - self._np_buffer = buffer self.ptr.format = c_format self.ptr.width = width self.ptr.height = height for i, linesize in enumerate(linesizes): self.ptr.linesize[i] = linesize - res = lib.av_image_fill_pointers( - self.ptr.data, - cython.cast(lib.AVPixelFormat, self.ptr.format), - self.ptr.height, + required = err_check( + lib.av_image_fill_pointers( + self.ptr.data, + cython.cast(lib.AVPixelFormat, self.ptr.format), + self.ptr.height, + c_ptr, + self.ptr.linesize, + ) + ) + + py_buf = cython.cast(object, buffer) + Py_INCREF(py_buf) + + self.ptr.buf[0] = lib.av_buffer_create( c_ptr, - self.ptr.linesize, + required, + _numpy_avbuffer_free, + cython.cast(cython.p_void, py_buf), + 0, ) + if self.ptr.buf[0] == cython.NULL: + Py_DECREF(py_buf) + raise MemoryError("av_buffer_create failed") - if res: - err_check(res) + self._np_buffer = buffer self._init_user_attributes() @staticmethod diff --git a/include/libavutil/avutil.pxd b/include/libavutil/avutil.pxd index ffee28a38..30de30720 100644 --- a/include/libavutil/avutil.pxd +++ b/include/libavutil/avutil.pxd @@ -174,6 +174,7 @@ cdef extern from "libavutil/frame.h" nogil: cdef void av_frame_free(AVFrame**) cdef int av_frame_ref(AVFrame *dst, const AVFrame *src) cdef void av_frame_unref(AVFrame *frame) + cdef int av_frame_get_buffer(AVFrame *frame, int align) cdef int av_frame_make_writable(AVFrame *frame) cdef int av_frame_copy_props(AVFrame *dst, const AVFrame *src) cdef AVFrameSideData* av_frame_get_side_data(AVFrame *frame, AVFrameSideDataType type)