Skip to content

reformat wrongly skip the sws_setColorspaceDetails results in completely wrong color when source video is at BT709 and full color range #2149

@braindevices

Description

@braindevices

wrong behavior

when convert from yuv to rgb or rgb to yuv, or yuv to yuv:

  1. the color_range and colorspace is not automatically set based on the formatter args
  2. when src and dst colorspace is the same, the lib.sws_setColorspaceDetails is not called, resulting in using whatever default value in the context, usually bt601+pc
  3. the reformatter does not respect the color range of the source frame at all.
    c_src_color_range: cython.int = _resolve_enum_value(

This results in completely wrong color values when the source video is bt709 and user expect same color space after pixel fmt conversion.

The bug is on

if src_colorspace != dst_colorspace or src_color_range != dst_color_range:

where it skip the lib.sws_setColorspaceDetails completely when color range and color space is the same, which is the wrong way to use sws_scale.
One should always config it.
The bug is apparently introduced by 72621f2 commit

reproduce

run following code

import warnings
import av
import numpy as np
import subprocess
from av.video.reformatter import ColorRange, Colorspace, Interpolation

def compare_frame_to_ndarray(raw_ndarray, av_frame):
    """
    Compare a raw ndarray (YUV444p or RGB24) to a PyAV VideoFrame pixel by pixel.

    Args:
        raw_ndarray (np.ndarray):
            - YUV444p: shape (3, H, W)
            - RGB24: shape (H, W, 3)
        av_frame (av.video.frame.VideoFrame):
            VideoFrame in matching format: 'yuv444p' or 'rgb24'

    Returns:
        dict: summary with 'mean_diff', 'max_diff', 'num_mismatches', 'total_pixels'
    """
    fmt = av_frame.format.name
    H, W = raw_ndarray.shape[-2:] if fmt.startswith("yuv") else raw_ndarray.shape[:2]

    total_diff = 0
    max_diff = 0
    num_mismatch = 0

    if fmt == "yuv444p":
        if raw_ndarray.shape[0] != 3:
            raise ValueError("YUV ndarray must have shape (3, H, W)")
        # compare plane by plane
        for i, plane in enumerate(av_frame.planes):
            stride = plane.line_size
            buf = memoryview(plane)
            plane_array = np.frombuffer(buf, dtype=np.uint8).reshape(H, stride)[:, :W]
            diff = np.abs(plane_array.astype(int) - raw_ndarray[i].astype(int))
            total_diff += diff.sum()
            max_diff = max(max_diff, diff.max())
            num_mismatch += np.count_nonzero(diff)

    elif fmt == "rgb24":
        if raw_ndarray.shape != (H, W, 3):
            raise ValueError("RGB ndarray must have shape (H, W, 3)")
        # RGB24 usually has one plane
        plane = av_frame.planes[0]
        stride = plane.line_size
        buf = memoryview(plane)
        plane_array = np.frombuffer(buf, dtype=np.uint8).reshape(H, stride)[:, :W*3]
        plane_array = plane_array.reshape(H, W, 3)
        diff = np.abs(plane_array.astype(int) - raw_ndarray.astype(int))
        total_diff += diff.sum()
        max_diff = diff.max()
        num_mismatch += np.count_nonzero(diff)

    else:
        raise ValueError(f"Unsupported PyAV frame format: {fmt}")

    total_pixels = raw_ndarray.size
    mean_diff = total_diff / total_pixels

    return {
        "mean_diff": mean_diff,
        "max_diff": int(max_diff),
        "num_mismatches": int(num_mismatch),
        "total_pixels": int(total_pixels)
    }


import numpy as np

def load_yuv_raw(file_path, width, height, format="yuv444p"):
    """
    Load a raw YUV frame from disk and reshape it into a NumPy array.

    Args:
        file_path (str): path to the raw YUV file
        width (int): frame width
        height (int): frame height
        format (str): pixel format, currently only supports 'yuv444p'

    Returns:
        np.ndarray: shape (3, H, W), dtype=np.uint8, planes in order (Y, U, V)
    """
    if format != "yuv444p":
        raise ValueError("Currently only 'yuv444p' is supported")

    # Load raw bytes
    yuv = np.fromfile(file_path, dtype=np.uint8)

    # Calculate expected size
    expected_size = 3 * width * height
    if yuv.size != expected_size:
        raise ValueError(f"Expected {expected_size} bytes, got {yuv.size}")

    # Split into planes and reshape
    Y = yuv[0:width*height].reshape(height, width)
    U = yuv[width*height:2*width*height].reshape(height, width)
    V = yuv[2*width*height:3*width*height].reshape(height, width)

    # Stack into (3, H, W)
    return np.stack([Y, U, V], axis=0)

def numpy_to_av_frame_rgb(
        rgb_np,
        # RGB do not have the space and range concept
        # colorspace=Colorspace.DEFAULT,
        # color_range=ColorRange.JPEG
    ):
    """
    Convert a NumPy RGB array to a PyAV VideoFrame (rgb24).

    Args:
        rgb_np (np.ndarray): shape (H, W, 3), dtype=uint8
        colorspace (Colorspace, optional): e.g., Colorspace.ITU709
        color_range (ColorRange, optional): e.g., ColorRange.JPEG (FULL)
    
    Returns:
        av.video.frame.VideoFrame: rgb24 frame filled with the array data
    """
    H, W, C = rgb_np.shape
    if C != 3:
        raise ValueError("Input NumPy array must have 3 channels (H, W, 3)")

    # Create empty VideoFrame
    frame_rgb = av.VideoFrame(width=W, height=H, format="rgb24")
    
    # Set colorspace and range
    # frame_rgb.colorspace = colorspace
    # frame_rgb.color_range = color_range

    # Fill plane data
    plane = frame_rgb.planes[0]
    buf = memoryview(plane)
    stride = plane.line_size

    for y in range(H):
        start = y * stride
        end = start + W * 3
        buf[start:end] = rgb_np[y].tobytes()

    return frame_rgb


def check_color_metrics(frame, range: ColorRange, space: Colorspace):
    if frame.color_range != range:
        warnings.warn(f"{frame.color_range=} should be {range}")
    if frame.colorspace != space:
        warnings.warn(f"{frame.colorspace=} should be {space}")

W, H = 256, 256

# 1. Create RGB gradient (uint8, packed RGB)
x = np.linspace(0, 255, W, dtype=np.uint8)
y = np.linspace(0, 255, H, dtype=np.uint8)

R = np.tile(x, (H, 1))
G = np.tile(y[:, None], (1, W))
B = 255 - R

rgb_np = np.stack([R, G, B], axis=2)  # (H, W, 3)
rgb_np.tofile("/tmp/gradient.rgb") 

subprocess.check_call([
    "ffmpeg",
    "-hide_banner",
    "-loglevel", "error",
    "-y",                   # overwrite output
    "-f", "rawvideo",
    "-pix_fmt", "rgb24",
    "-s", f"{W}x{H}",
    "-i", "/tmp/gradient.rgb",
    "-vf", "scale=sws_flags=neighbor:out_color_matrix=bt709:out_range=pc,format=yuv444p",
    "-f", "rawvideo",
    "/tmp/gradient.yuv"
])

yuv_planes = load_yuv_raw("/tmp/gradient.yuv", W, H)
print(f"{yuv_planes.shape=}")

frame_rgb = numpy_to_av_frame_rgb(
    rgb_np=rgb_np
)
print(">>>rgb raw vs rgb frame")
print(compare_frame_to_ndarray(rgb_np, frame_rgb))
print("<<<<<<<<<<")

frame_yuv = frame_rgb.reformat(
    format="yuv444p",
    interpolation=Interpolation.POINT,
    dst_colorspace=Colorspace.ITU709,
    dst_color_range=ColorRange.JPEG
)
check_color_metrics(
    frame_yuv,
    range=ColorRange.JPEG,
    space=Colorspace.ITU709
)
print(">>>yuv raw vs converted yuv frame")
print(compare_frame_to_ndarray(yuv_planes, frame_yuv))
print("<<<<<<<<<<")
# fix the color metrics
frame_yuv.color_range = ColorRange.JPEG
frame_yuv.colorspace = Colorspace.ITU709
frame_rgb_rt = frame_yuv.reformat(
    format="rgb24"
)
print(">>>rgb raw vs yuv->rgb converted frame")
print(compare_frame_to_ndarray(rgb_np, frame_rgb_rt))
print("<<<<<<<<<<")

frame_rgb_rt2 = frame_yuv.reformat(
    format="rgb24",
    interpolation=Interpolation.POINT,
    src_colorspace=Colorspace.ITU709,
    dst_colorspace=Colorspace.DEFAULT,
    src_color_range=ColorRange.JPEG
)
print(">>>rgb raw vs yuv->rgb converted frame")
print(compare_frame_to_ndarray(rgb_np, frame_rgb_rt2))
print("<<<<<<<<<<")

the output

yuv_planes.shape=(3, 256, 256)
>>>rgb raw vs rgb frame
{'mean_diff': np.float64(0.0), 'max_diff': 0, 'num_mismatches': 0, 'total_pixels': 196608}
<<<<<<<<<<
>>>yuv raw vs converted yuv frame
{'mean_diff': np.float64(0.3345896402994792), 'max_diff': 1, 'num_mismatches': 65783, 'total_pixels': 196608}
<<<<<<<<<<
>>>rgb raw vs yuv->rgb converted frame
{'mean_diff': np.float64(9.679051717122396), 'max_diff': 39, 'num_mismatches': 188812, 'total_pixels': 196608}
<<<<<<<<<<
>>>rgb raw vs yuv->rgb converted frame
{'mean_diff': np.float64(0.6810099283854166), 'max_diff': 2, 'num_mismatches': 114061, 'total_pixels': 196608}
<<<<<<<<<<
/tmp/ipykernel_1203989/4113189771.py:146: UserWarning: frame.color_range=0 should be 2
  warnings.warn(f"{frame.color_range=} should be {range}")
/tmp/ipykernel_1203989/4113189771.py:148: UserWarning: frame.colorspace=2 should be 1
  warnings.warn(f"{frame.colorspace=} should be {space}")

workaround

The workaround only exists for rgb<->yuv since rgb does not care about the color matrix and color range.

  • One need to manually set source color range for all conversion function like to_rgb, to_ndarray, etc.
  • One need to set dst colorspace to whatever different one when convert to rgb.

This does not work for yuv->yuv because usually we want exactly same range and colorspace, then the setColorspaceDetails is skipped results in using default context

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions