-
Notifications
You must be signed in to change notification settings - Fork 426
Closed
Description
wrong behavior
when convert from yuv to rgb or rgb to yuv, or yuv to yuv:
- the color_range and colorspace is not automatically set based on the formatter args
- when src and dst colorspace is the same, the lib.sws_setColorspaceDetails is not called, resulting in using whatever default value in the context, usually bt601+pc
- the reformatter does not respect the color range of the source frame at all.
Line 119 in 5f0e312
c_src_color_range: cython.int = _resolve_enum_value(
This results in completely wrong color values when the source video is bt709 and user expect same color space after pixel fmt conversion.
The bug is on
Line 196 in 5f0e312
| if src_colorspace != dst_colorspace or src_color_range != dst_color_range: |
where it skip the lib.sws_setColorspaceDetails completely when color range and color space is the same, which is the wrong way to use sws_scale.
One should always config it.
The bug is apparently introduced by 72621f2 commit
reproduce
run following code
import warnings
import av
import numpy as np
import subprocess
from av.video.reformatter import ColorRange, Colorspace, Interpolation
def compare_frame_to_ndarray(raw_ndarray, av_frame):
"""
Compare a raw ndarray (YUV444p or RGB24) to a PyAV VideoFrame pixel by pixel.
Args:
raw_ndarray (np.ndarray):
- YUV444p: shape (3, H, W)
- RGB24: shape (H, W, 3)
av_frame (av.video.frame.VideoFrame):
VideoFrame in matching format: 'yuv444p' or 'rgb24'
Returns:
dict: summary with 'mean_diff', 'max_diff', 'num_mismatches', 'total_pixels'
"""
fmt = av_frame.format.name
H, W = raw_ndarray.shape[-2:] if fmt.startswith("yuv") else raw_ndarray.shape[:2]
total_diff = 0
max_diff = 0
num_mismatch = 0
if fmt == "yuv444p":
if raw_ndarray.shape[0] != 3:
raise ValueError("YUV ndarray must have shape (3, H, W)")
# compare plane by plane
for i, plane in enumerate(av_frame.planes):
stride = plane.line_size
buf = memoryview(plane)
plane_array = np.frombuffer(buf, dtype=np.uint8).reshape(H, stride)[:, :W]
diff = np.abs(plane_array.astype(int) - raw_ndarray[i].astype(int))
total_diff += diff.sum()
max_diff = max(max_diff, diff.max())
num_mismatch += np.count_nonzero(diff)
elif fmt == "rgb24":
if raw_ndarray.shape != (H, W, 3):
raise ValueError("RGB ndarray must have shape (H, W, 3)")
# RGB24 usually has one plane
plane = av_frame.planes[0]
stride = plane.line_size
buf = memoryview(plane)
plane_array = np.frombuffer(buf, dtype=np.uint8).reshape(H, stride)[:, :W*3]
plane_array = plane_array.reshape(H, W, 3)
diff = np.abs(plane_array.astype(int) - raw_ndarray.astype(int))
total_diff += diff.sum()
max_diff = diff.max()
num_mismatch += np.count_nonzero(diff)
else:
raise ValueError(f"Unsupported PyAV frame format: {fmt}")
total_pixels = raw_ndarray.size
mean_diff = total_diff / total_pixels
return {
"mean_diff": mean_diff,
"max_diff": int(max_diff),
"num_mismatches": int(num_mismatch),
"total_pixels": int(total_pixels)
}
import numpy as np
def load_yuv_raw(file_path, width, height, format="yuv444p"):
"""
Load a raw YUV frame from disk and reshape it into a NumPy array.
Args:
file_path (str): path to the raw YUV file
width (int): frame width
height (int): frame height
format (str): pixel format, currently only supports 'yuv444p'
Returns:
np.ndarray: shape (3, H, W), dtype=np.uint8, planes in order (Y, U, V)
"""
if format != "yuv444p":
raise ValueError("Currently only 'yuv444p' is supported")
# Load raw bytes
yuv = np.fromfile(file_path, dtype=np.uint8)
# Calculate expected size
expected_size = 3 * width * height
if yuv.size != expected_size:
raise ValueError(f"Expected {expected_size} bytes, got {yuv.size}")
# Split into planes and reshape
Y = yuv[0:width*height].reshape(height, width)
U = yuv[width*height:2*width*height].reshape(height, width)
V = yuv[2*width*height:3*width*height].reshape(height, width)
# Stack into (3, H, W)
return np.stack([Y, U, V], axis=0)
def numpy_to_av_frame_rgb(
rgb_np,
# RGB do not have the space and range concept
# colorspace=Colorspace.DEFAULT,
# color_range=ColorRange.JPEG
):
"""
Convert a NumPy RGB array to a PyAV VideoFrame (rgb24).
Args:
rgb_np (np.ndarray): shape (H, W, 3), dtype=uint8
colorspace (Colorspace, optional): e.g., Colorspace.ITU709
color_range (ColorRange, optional): e.g., ColorRange.JPEG (FULL)
Returns:
av.video.frame.VideoFrame: rgb24 frame filled with the array data
"""
H, W, C = rgb_np.shape
if C != 3:
raise ValueError("Input NumPy array must have 3 channels (H, W, 3)")
# Create empty VideoFrame
frame_rgb = av.VideoFrame(width=W, height=H, format="rgb24")
# Set colorspace and range
# frame_rgb.colorspace = colorspace
# frame_rgb.color_range = color_range
# Fill plane data
plane = frame_rgb.planes[0]
buf = memoryview(plane)
stride = plane.line_size
for y in range(H):
start = y * stride
end = start + W * 3
buf[start:end] = rgb_np[y].tobytes()
return frame_rgb
def check_color_metrics(frame, range: ColorRange, space: Colorspace):
if frame.color_range != range:
warnings.warn(f"{frame.color_range=} should be {range}")
if frame.colorspace != space:
warnings.warn(f"{frame.colorspace=} should be {space}")
W, H = 256, 256
# 1. Create RGB gradient (uint8, packed RGB)
x = np.linspace(0, 255, W, dtype=np.uint8)
y = np.linspace(0, 255, H, dtype=np.uint8)
R = np.tile(x, (H, 1))
G = np.tile(y[:, None], (1, W))
B = 255 - R
rgb_np = np.stack([R, G, B], axis=2) # (H, W, 3)
rgb_np.tofile("/tmp/gradient.rgb")
subprocess.check_call([
"ffmpeg",
"-hide_banner",
"-loglevel", "error",
"-y", # overwrite output
"-f", "rawvideo",
"-pix_fmt", "rgb24",
"-s", f"{W}x{H}",
"-i", "/tmp/gradient.rgb",
"-vf", "scale=sws_flags=neighbor:out_color_matrix=bt709:out_range=pc,format=yuv444p",
"-f", "rawvideo",
"/tmp/gradient.yuv"
])
yuv_planes = load_yuv_raw("/tmp/gradient.yuv", W, H)
print(f"{yuv_planes.shape=}")
frame_rgb = numpy_to_av_frame_rgb(
rgb_np=rgb_np
)
print(">>>rgb raw vs rgb frame")
print(compare_frame_to_ndarray(rgb_np, frame_rgb))
print("<<<<<<<<<<")
frame_yuv = frame_rgb.reformat(
format="yuv444p",
interpolation=Interpolation.POINT,
dst_colorspace=Colorspace.ITU709,
dst_color_range=ColorRange.JPEG
)
check_color_metrics(
frame_yuv,
range=ColorRange.JPEG,
space=Colorspace.ITU709
)
print(">>>yuv raw vs converted yuv frame")
print(compare_frame_to_ndarray(yuv_planes, frame_yuv))
print("<<<<<<<<<<")
# fix the color metrics
frame_yuv.color_range = ColorRange.JPEG
frame_yuv.colorspace = Colorspace.ITU709
frame_rgb_rt = frame_yuv.reformat(
format="rgb24"
)
print(">>>rgb raw vs yuv->rgb converted frame")
print(compare_frame_to_ndarray(rgb_np, frame_rgb_rt))
print("<<<<<<<<<<")
frame_rgb_rt2 = frame_yuv.reformat(
format="rgb24",
interpolation=Interpolation.POINT,
src_colorspace=Colorspace.ITU709,
dst_colorspace=Colorspace.DEFAULT,
src_color_range=ColorRange.JPEG
)
print(">>>rgb raw vs yuv->rgb converted frame")
print(compare_frame_to_ndarray(rgb_np, frame_rgb_rt2))
print("<<<<<<<<<<")the output
yuv_planes.shape=(3, 256, 256)
>>>rgb raw vs rgb frame
{'mean_diff': np.float64(0.0), 'max_diff': 0, 'num_mismatches': 0, 'total_pixels': 196608}
<<<<<<<<<<
>>>yuv raw vs converted yuv frame
{'mean_diff': np.float64(0.3345896402994792), 'max_diff': 1, 'num_mismatches': 65783, 'total_pixels': 196608}
<<<<<<<<<<
>>>rgb raw vs yuv->rgb converted frame
{'mean_diff': np.float64(9.679051717122396), 'max_diff': 39, 'num_mismatches': 188812, 'total_pixels': 196608}
<<<<<<<<<<
>>>rgb raw vs yuv->rgb converted frame
{'mean_diff': np.float64(0.6810099283854166), 'max_diff': 2, 'num_mismatches': 114061, 'total_pixels': 196608}
<<<<<<<<<<
/tmp/ipykernel_1203989/4113189771.py:146: UserWarning: frame.color_range=0 should be 2
warnings.warn(f"{frame.color_range=} should be {range}")
/tmp/ipykernel_1203989/4113189771.py:148: UserWarning: frame.colorspace=2 should be 1
warnings.warn(f"{frame.colorspace=} should be {space}")
workaround
The workaround only exists for rgb<->yuv since rgb does not care about the color matrix and color range.
- One need to manually set source color range for all conversion function like to_rgb, to_ndarray, etc.
- One need to set dst colorspace to whatever different one when convert to rgb.
This does not work for yuv->yuv because usually we want exactly same range and colorspace, then the setColorspaceDetails is skipped results in using default context
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels