From 39db621505b31fe217eed504cf4419912d5ae2cc Mon Sep 17 00:00:00 2001 From: Manuel Candales Date: Fri, 30 Jan 2026 19:25:33 -0500 Subject: [PATCH 1/6] Update [ghstack-poisoned] --- .github/workflows/metal.yml | 24 + backends/apple/metal/tests/run_metal_test.sh | 126 +++ backends/apple/metal/tests/test_modules.py | 817 +++++++++++++++++++ 3 files changed, 967 insertions(+) create mode 100755 backends/apple/metal/tests/run_metal_test.sh create mode 100644 backends/apple/metal/tests/test_modules.py diff --git a/.github/workflows/metal.yml b/.github/workflows/metal.yml index 1e0ad2f9587..63466f36abb 100644 --- a/.github/workflows/metal.yml +++ b/.github/workflows/metal.yml @@ -28,6 +28,30 @@ jobs: PYTHON_EXECUTABLE=python CMAKE_ARGS="-DEXECUTORCH_BUILD_METAL=ON" ${CONDA_RUN} --no-capture-output ./install_executorch.sh echo "::endgroup::" + test-metal-modules: + name: test-metal-backend-modules + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + with: + runner: macos-m2-stable + python-version: '3.11' + submodules: 'recursive' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 + script: | + set -eux + + echo "::group::Setup ExecuTorch" + PYTHON_EXECUTABLE=python ${CONDA_RUN} ./install_executorch.sh + echo "::endgroup::" + + echo "::group::Build Metal Runtime" + ${CONDA_RUN} backends/apple/metal/tests/run_metal_test.sh --build + echo "::endgroup::" + + echo "::group::Run Metal Backend Module Tests" + ${CONDA_RUN} python -m unittest backends.apple.metal.tests.test_modules.TestMetalBackendModules + echo "::endgroup::" + export-model-metal-artifact: name: export-model-metal-artifact # Skip this job if the pull request is from a fork (HuggingFace secrets are not available) diff --git a/backends/apple/metal/tests/run_metal_test.sh b/backends/apple/metal/tests/run_metal_test.sh new file mode 100755 index 00000000000..95c0cb1c6a7 --- /dev/null +++ b/backends/apple/metal/tests/run_metal_test.sh @@ -0,0 +1,126 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Script to build and run Metal backend tests +# Usage: +# ./run_metal_test.sh --build # Build the Metal runtime +# ./run_metal_test.sh --run # Run inference with given model files +# ./run_metal_test.sh --check-build # Check if runtime is already built + +set -e # Exit on any error + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +EXECUTORCH_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)" +BUILD_DIR="$EXECUTORCH_ROOT/cmake-out" +EXECUTOR_RUNNER="$BUILD_DIR/executor_runner" + +# Function to check if Metal runtime is built +check_build() { + if [[ -f "$EXECUTOR_RUNNER" ]]; then + echo "true" + return 0 + else + echo "false" + return 1 + fi +} + +# Function to build the Metal runtime +build_runtime() { + echo "Building Metal runtime..." + + # Check if we're on macOS + if [[ "$(uname)" != "Darwin" ]]; then + echo "Error: Metal backend is only supported on macOS" + exit 1 + fi + + # Create build directory + mkdir -p "$BUILD_DIR" + cd "$BUILD_DIR" + + # CMake configuration for Metal backend + CMAKE_ARGS="-DEXECUTORCH_BUILD_METAL=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ + -DAOTI_METAL=ON \ + -DEXECUTORCH_LOG_LEVEL=Info \ + -DCMAKE_BUILD_TYPE=Release" + + echo "Running cmake..." + eval cmake $CMAKE_ARGS "$EXECUTORCH_ROOT" + + echo "Building..." + cmake --build . -j$(sysctl -n hw.ncpu) + + cd "$EXECUTORCH_ROOT" + + if [[ -f "$EXECUTOR_RUNNER" ]]; then + echo "Build successful: $EXECUTOR_RUNNER" + else + echo "Error: Build failed - executor_runner not found" + exit 1 + fi +} + +# Function to run inference +run_inference() { + local pte_path="$1" + local ptd_path="$2" + + if [[ ! -f "$EXECUTOR_RUNNER" ]]; then + echo "Error: executor_runner not found at $EXECUTOR_RUNNER" + echo "Run '$0 --build' first to build the Metal runtime" + exit 1 + fi + + if [[ ! -f "$pte_path" ]]; then + echo "Error: PTE file not found: $pte_path" + exit 1 + fi + + if [[ ! -f "$ptd_path" ]]; then + echo "Error: PTD file not found: $ptd_path" + exit 1 + fi + + echo "Running inference..." + echo " PTE: $pte_path" + echo " PTD: $ptd_path" + + "$EXECUTOR_RUNNER" --model_path "$pte_path" --data_path "$ptd_path" +} + +# Parse command line arguments +case "$1" in + --build) + build_runtime + ;; + --run) + if [[ -z "$2" ]] || [[ -z "$3" ]]; then + echo "Usage: $0 --run " + exit 1 + fi + run_inference "$2" "$3" + ;; + --check-build) + check_build + ;; + *) + echo "Metal Backend Test Runner" + echo "" + echo "Usage:" + echo " $0 --build Build the Metal runtime" + echo " $0 --run Run inference with given model files" + echo " $0 --check-build Check if runtime is already built" + exit 1 + ;; +esac diff --git a/backends/apple/metal/tests/test_modules.py b/backends/apple/metal/tests/test_modules.py new file mode 100644 index 00000000000..424d736b3b7 --- /dev/null +++ b/backends/apple/metal/tests/test_modules.py @@ -0,0 +1,817 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Unit tests for Metal backend modules. + +These tests export and run various model modules through the Metal backend +to verify that the export and execution pipeline works correctly. + +These tests require MPS to be available. On systems without MPS support, +the export tests will be skipped. +""" + +import os +import platform +import subprocess +import tempfile +import unittest +from pathlib import Path +from typing import Any, Dict, Optional, Tuple + +import numpy as np +import torch +from executorch.backends.apple.metal.metal_backend import MetalBackend +from executorch.backends.apple.metal.metal_partitioner import MetalPartitioner +from executorch.exir import to_edge_transform_and_lower +from torch import nn +from torch.export import export +from torch.nn.attention import SDPBackend + + +# Check if MPS is available for export tests +MPS_AVAILABLE = torch.backends.mps.is_available() +IS_MACOS = platform.system() == "Darwin" +SKIP_EXPORT_TESTS = not MPS_AVAILABLE +SKIP_REASON = "MPS not available - Metal export tests require MPS support" + +# Paths +TESTS_DIR = Path(__file__).parent +EXECUTORCH_ROOT = TESTS_DIR.parent.parent.parent.parent +BUILD_DIR = EXECUTORCH_ROOT / "cmake-out" +EXECUTOR_RUNNER = BUILD_DIR / "executor_runner" +RUN_METAL_TEST_SCRIPT = TESTS_DIR / "run_metal_test.sh" + +# Check if executor_runner is built +EXECUTOR_RUNNER_AVAILABLE = EXECUTOR_RUNNER.exists() +SKIP_RUNTIME_TESTS = not EXECUTOR_RUNNER_AVAILABLE or SKIP_EXPORT_TESTS +SKIP_RUNTIME_REASON = ( + "executor_runner not built - run 'backends/apple/metal/tests/run_metal_test.sh --build'" + if not EXECUTOR_RUNNER_AVAILABLE + else SKIP_REASON +) + +# Data types to test +DTYPES = [torch.float32, torch.bfloat16] + +# Map dtype to short name for test method naming +DTYPE_NAMES = { + torch.float32: "float32", + torch.bfloat16: "bfloat16", +} + +# Registry mapping model names to their configurations +MODULE_REGISTRY: Dict[str, Dict[str, Any]] = {} + + +# ============================================================================= +# Model Definitions +# ============================================================================= + + +class Add(nn.Module): + def forward(self, x: torch.Tensor, y: torch.Tensor): + return x + y + +MODULE_REGISTRY["add"] = { + "model_class": Add, + "input_shapes": [(10,), (10,)], + "description": "Simple tensor addition model", +} + + +# ------------------------------------------------------------------------- +# Matrix Multiplication Modules +# ------------------------------------------------------------------------- + +class Mm(nn.Module): + def forward(self, x: torch.Tensor, y: torch.Tensor): + return x.mm(y) + +MODULE_REGISTRY["mm"] = { + "model_class": Mm, + "input_shapes": [(3, 4), (4, 5)], + "description": "Simple mm layer model", +} + +# ------------------------------------------------------------------------- +class MmWeights(nn.Module): + def __init__(self): + super().__init__() + self.weight = nn.Parameter(torch.arange(20, dtype=torch.float).reshape(4, 5)) + + def forward(self, x: torch.Tensor): + return x.mm(self.weight) + +MODULE_REGISTRY["mm_weights"] = { + "model_class": MmWeights, + "input_shapes": [(3, 4)], + "description": "Matrix multiplication with weight parameter", +} + +# ------------------------------------------------------------------------- +class TwoMm(nn.Module): + def __init__(self): + super().__init__() + self.left_weight = nn.Parameter( + torch.arange(20, dtype=torch.float).reshape(4, 5) + ) + self.right_weight = nn.Parameter( + torch.arange(42, dtype=torch.float).reshape(6, 7) + ) + + def forward(self, x: torch.Tensor): + return self.left_weight.mm(x).mm(self.right_weight) + +MODULE_REGISTRY["two_mm"] = { + "model_class": TwoMm, + "input_shapes": [(5, 6)], + "description": "Two consecutive matrix multiplications", +} + +# ------------------------------------------------------------------------- +class ElementwiseMmReduction(nn.Module): + def forward(self, x: torch.Tensor, y: torch.Tensor): + x1 = x.sin() + x + y2 = y.cos() + 3 + z = x1.mm(y2) + return z + z.sum() + +MODULE_REGISTRY["elementwise_mm_reduction"] = { + "model_class": ElementwiseMmReduction, + "input_shapes": [(11, 45), (45, 8)], + "description": "Combining mm with elementwise and reduction ops", +} + + +# ------------------------------------------------------------------------- +# Linear Modules +# ------------------------------------------------------------------------- + +class LinearNoBias(nn.Module): + def __init__(self): + super().__init__() + self.linear = nn.Linear(7, 101, bias=False) + + def forward(self, x: torch.Tensor): + return self.linear(x) + +MODULE_REGISTRY["linear_nobias"] = { + "model_class": LinearNoBias, + "input_shapes": [(127, 7)], + "description": "Simple linear layer model with no bias", +} + + +# ------------------------------------------------------------------------- +# Convolution Modules +# ------------------------------------------------------------------------- + +class SingleConv2d(nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv2d( + in_channels=3, out_channels=5, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, x: torch.Tensor): + return self.conv(x) + +MODULE_REGISTRY["conv2d"] = { + "model_class": SingleConv2d, + "input_shapes": [(4, 3, 8, 8)], + "description": "Single Conv2d layer model", +} + +# ------------------------------------------------------------------------- +class DepthwiseConv(nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv2d( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + padding=1, + dilation=1, + groups=32, + bias=False, + ) + + def forward(self, x): + return self.conv(x) + +MODULE_REGISTRY["depthwise_conv"] = { + "model_class": DepthwiseConv, + "input_shapes": [(1, 32, 112, 112)], + "description": "Single Depthwise Conv2d layer model", +} + +# ------------------------------------------------------------------------- +class SmallConv1d(nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv1d( + in_channels=8, + out_channels=6, + kernel_size=3, + stride=1, + padding=1, + dilation=1, + groups=1, + bias=False, + ) + + def forward(self, x): + return self.conv(x) + +MODULE_REGISTRY["small_conv1d"] = { + "model_class": SmallConv1d, + "input_shapes": [(1, 8, 5)], + "description": "Conv1d layer with 8 input channels, 6 output channels", +} + +# ------------------------------------------------------------------------- +class MockConv1d(nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv1d( + in_channels=80, + out_channels=384, + kernel_size=3, + stride=1, + padding=1, + dilation=1, + groups=1, + bias=True, + ) + + def forward(self, x): + return self.conv(x) + +MODULE_REGISTRY["conv1d"] = { + "model_class": MockConv1d, + "input_shapes": [(1, 80, 3000)], + "description": "Conv1d layer with 80 input channels, 384 output channels", +} + +# ------------------------------------------------------------------------- +class VoxtralConv1d(nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv1d( + in_channels=128, + out_channels=1280, + kernel_size=3, + stride=1, + padding=1, + dilation=1, + groups=1, + bias=False, + ) + + def forward(self, x): + return self.conv(x) + +MODULE_REGISTRY["voxtral_conv1d"] = { + "model_class": VoxtralConv1d, + "input_shapes": [(10, 128, 3000)], + "description": "Conv1d layer with 128 input channels, 1280 output channels", +} + + +# ------------------------------------------------------------------------- +# Attention (SDPA) Modules +# ------------------------------------------------------------------------- + +class SimpleSDPA(nn.Module): + """Minimal SDPA test model.""" + + def forward( + self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor + ) -> torch.Tensor: + output = torch.nn.functional.scaled_dot_product_attention( + query, key, value, dropout_p=0.0, is_causal=False + ) + return output + +MODULE_REGISTRY["sdpa"] = { + "model_class": SimpleSDPA, + "input_shapes": [(2, 4, 16, 64), (2, 4, 16, 64), (2, 4, 16, 64)], + "description": "Simple Scaled Dot Product Attention model", +} + +# ------------------------------------------------------------------------- +class AddSDPA(nn.Module): + """SDPA model with Q, K, V as parameters that adds input to SDPA output.""" + + def __init__(self, batch_size=2, num_heads=4, seq_len=16, head_dim=64): + super().__init__() + self.query = nn.Parameter( + torch.randn(batch_size, num_heads, seq_len, head_dim) + ) + self.key = nn.Parameter(torch.randn(batch_size, num_heads, seq_len, head_dim)) + self.value = nn.Parameter( + torch.randn(batch_size, num_heads, seq_len, head_dim) + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + sdpa_output = torch.nn.functional.scaled_dot_product_attention( + self.query, self.key, self.value, dropout_p=0.0, is_causal=False + ) + return sdpa_output + x + +MODULE_REGISTRY["add_sdpa"] = { + "model_class": AddSDPA, + "input_shapes": [(2, 4, 16, 64)], + "description": "SDPA model with Q,K,V as parameters that adds input to output", +} + +# ------------------------------------------------------------------------- +class BaseAddStridedSDPA(nn.Module): + """SDPA model with strided Q, K, V parameters.""" + + def __init__(self, q_size, k_size, v_size, q_stride, k_stride, v_stride, attn_mask_size=None): + super().__init__() + self.q_size = q_size + self.k_size = k_size + self.v_size = v_size + self.q_stride = q_stride + self.k_stride = k_stride + self.v_stride = v_stride + self.attn_mask_size = attn_mask_size + + self.query = nn.Parameter(torch.randn(q_size)) + self.key = nn.Parameter(torch.randn(k_size)) + self.value = nn.Parameter(torch.randn(v_size)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + query = torch.as_strided(self.query, size=self.q_size, stride=self.q_stride) + key = torch.as_strided(self.key, size=self.k_size, stride=self.k_stride) + value = torch.as_strided(self.value, size=self.v_size, stride=self.v_stride) + attn_mask = None + if self.attn_mask_size: + attn_mask = torch.zeros(self.attn_mask_size) + + sdpa_output = torch.nn.functional.scaled_dot_product_attention( + query, key, value, attn_mask, dropout_p=0.0, is_causal=False, scale=1.0 + ) + return sdpa_output + x + +# ------------------------------------------------------------------------- +class AddStridedSDPA(BaseAddStridedSDPA): + def __init__(self): + super().__init__( + q_size=(10, 20, 1500, 64), + k_size=(10, 20, 1500, 64), + v_size=(10, 20, 1500, 64), + q_stride=(1920000, 64, 1280, 1), + k_stride=(1920000, 64, 1280, 1), + v_stride=(1920000, 64, 1280, 1), + ) + +MODULE_REGISTRY["audio_encoder_sdpa1"] = { + "model_class": AddStridedSDPA, + "input_shapes": [(10, 20, 1500, 64)], + "description": "Audio Encoder model with strided SDPA", +} + +# ------------------------------------------------------------------------- +class AddStridedSDPA1(BaseAddStridedSDPA): + def __init__(self): + super().__init__( + q_size=(1, 20, 1, 64), + k_size=(1, 20, 1500, 64), + v_size=(1, 20, 1500, 64), + q_stride=(1280, 64, 1280, 1), + k_stride=(1920000, 64, 1280, 1), + v_stride=(1920000, 64, 1280, 1), + ) + +MODULE_REGISTRY["whisper_strided_sdpa1"] = { + "model_class": AddStridedSDPA1, + "input_shapes": [(1, 20, 1, 64)], + "description": "Whisper-like strided SDPA variant 1", +} + +# ------------------------------------------------------------------------- +class AddStridedSDPA2(BaseAddStridedSDPA): + def __init__(self): + super().__init__( + q_size=(1, 20, 1, 64), + k_size=(1, 20, 1024, 64), + v_size=(1, 20, 1024, 64), + q_stride=(1280, 64, 1280, 1), + k_stride=(1310720, 65536, 64, 1), + v_stride=(1310720, 65536, 64, 1), + attn_mask_size=(1, 1, 1, 1024), + ) + +MODULE_REGISTRY["whisper_strided_sdpa2"] = { + "model_class": AddStridedSDPA2, + "input_shapes": [(1, 20, 1, 64)], + "description": "Whisper-like strided SDPA variant 2", +} + + +# ------------------------------------------------------------------------- +# Normalization Modules +# ------------------------------------------------------------------------- + +class BatchNorm(nn.Module): + def __init__(self): + super().__init__() + self.bn = nn.BatchNorm2d(num_features=16) + + def forward(self, x): + return self.bn(x) + +MODULE_REGISTRY["batchnorm"] = { + "model_class": BatchNorm, + "input_shapes": [(1, 16, 32, 32)], + "description": "Single BatchNorm2d layer model", +} + + +# ------------------------------------------------------------------------- +# Block/Composite Modules +# ------------------------------------------------------------------------- + +class SingleResNetBlock(nn.Module): + def __init__(self, in_channels=64, out_channels=64, stride=1): + super().__init__() + self.conv1 = nn.Conv2d( + in_channels, + out_channels, + kernel_size=3, + stride=stride, + padding=1, + bias=False, + ) + self.bn1 = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU(inplace=True) + self.conv2 = nn.Conv2d( + out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False + ) + self.bn2 = nn.BatchNorm2d(out_channels) + + self.skip_connection = None + if stride != 1 or in_channels != out_channels: + self.skip_connection = nn.Sequential( + nn.Conv2d( + in_channels, out_channels, kernel_size=1, stride=stride, bias=False + ), + nn.BatchNorm2d(out_channels), + ) + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.skip_connection is not None: + identity = self.skip_connection(x) + + out += identity + out = self.relu(out) + + return out + +MODULE_REGISTRY["single_resnet_block"] = { + "model_class": SingleResNetBlock, + "input_shapes": [(1, 64, 8, 8)], + "description": "Single ResNet block with skip connection", +} + + +# ============================================================================= +# Helper Functions +# ============================================================================= + + +def get_model_and_inputs( + model_name: str, dtype: torch.dtype = torch.float32 +) -> Tuple[nn.Module, Tuple[torch.Tensor, ...]]: + """Get model and example inputs based on model name.""" + if model_name not in MODULE_REGISTRY: + available_models = ", ".join(MODULE_REGISTRY.keys()) + raise ValueError( + f"Unsupported model: {model_name}. Available models: {available_models}" + ) + + model_config = MODULE_REGISTRY[model_name] + model_class = model_config["model_class"] + input_shapes = model_config["input_shapes"] + + model = model_class().eval() + if dtype is not None: + model = model.to(dtype) + + example_inputs = tuple( + torch.randn(*shape, dtype=dtype) for shape in input_shapes + ) + + return model, example_inputs + + +def export_model_to_metal( + model: nn.Module, example_inputs: Tuple[torch.Tensor, ...] +) -> Any: + """Export model through the Metal backend pipeline.""" + method_name = "forward" + + with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad(): + aten_dialect = export(model, example_inputs, strict=False) + + edge_program = to_edge_transform_and_lower( + aten_dialect, + partitioner=[ + MetalPartitioner( + [MetalBackend.generate_method_name_compile_spec(method_name)] + ) + ], + ) + + executorch_program = edge_program.to_executorch() + return executorch_program + + +def export_model_to_files( + model: nn.Module, + example_inputs: Tuple[torch.Tensor, ...], + output_dir: Path, + model_name: str, +) -> Tuple[Path, Path, torch.Tensor]: + """ + Export model to .pte and .ptd files, and compute expected output. + + Returns: + Tuple of (pte_path, ptd_path, expected_output) + """ + # Compute expected output using all-ones input (matching export_aoti_metal.py) + all_ones_input = tuple(torch.ones_like(inp) for inp in example_inputs) + with torch.no_grad(): + expected_output = model(*all_ones_input) + + # Export to executorch + executorch_program = export_model_to_metal(model, example_inputs) + + # Save .pte file + pte_path = output_dir / f"{model_name}.pte" + with open(pte_path, "wb") as f: + f.write(executorch_program.buffer) + + # Save .ptd file (tensor data) + executorch_program.write_tensor_data_to_file(str(output_dir)) + ptd_path = output_dir / "aoti_metal_blob.ptd" + + return pte_path, ptd_path, expected_output + + +def run_executor_runner(pte_path: Path, ptd_path: Path) -> bool: + """ + Run the executor_runner binary with the given model files. + + Returns: + True if execution succeeded, False otherwise. + """ + if not EXECUTOR_RUNNER.exists(): + raise RuntimeError( + f"executor_runner not found at {EXECUTOR_RUNNER}. " + f"Run '{RUN_METAL_TEST_SCRIPT} --build' to build." + ) + + cmd = [ + str(EXECUTOR_RUNNER), + "--model_path", str(pte_path), + "--data_path", str(ptd_path), + ] + + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=60, + cwd=str(EXECUTORCH_ROOT), + ) + return result.returncode == 0 + except subprocess.TimeoutExpired: + return False + except Exception: + return False + + +def read_output_file(filepath: Path) -> Optional[np.ndarray]: + """Read comma-separated output values from a file.""" + try: + with open(filepath, "r") as f: + content = f.read().strip() + if not content: + return None + values = [float(x.strip()) for x in content.split(",") if x.strip()] + return np.array(values) + except (FileNotFoundError, ValueError): + return None + + +def compare_outputs( + expected: torch.Tensor, + runtime_output_file: Path, + atol: float = 1e-5, + rtol: float = 1e-5, +) -> Tuple[bool, Optional[float], Optional[float]]: + """ + Compare expected PyTorch output with runtime output from file. + + Returns: + Tuple of (is_close, max_atol, max_rtol) + """ + runtime_values = read_output_file(runtime_output_file) + if runtime_values is None: + return False, None, None + + # Flatten expected output + if isinstance(expected, tuple): + expected_values = np.concatenate([t.flatten().numpy() for t in expected]) + else: + expected_values = expected.flatten().numpy() + + if len(runtime_values) != len(expected_values): + return False, None, None + + # Calculate tolerances + abs_diff = np.abs(runtime_values - expected_values) + max_atol_val = np.max(abs_diff) + + eps = 1e-8 + denominator = np.maximum( + np.maximum(np.abs(runtime_values), np.abs(expected_values)), eps + ) + rel_diff = abs_diff / denominator + max_rtol_val = np.max(rel_diff) + + is_close = np.allclose(runtime_values, expected_values, atol=atol, rtol=rtol) + + return is_close, max_atol_val, max_rtol_val + + +# ============================================================================= +# Test Class +# ============================================================================= + + +class TestMetalBackendModules(unittest.TestCase): + """ + Test Metal backend modules export and execution. + + Each test exports a model through the Metal backend and verifies: + 1. The export process completes without errors + 2. The exported program has non-zero buffer size + 3. The runtime output matches the expected PyTorch output + """ + + def _test_module_export( + self, model_name: str, dtype: torch.dtype = torch.float32 + ) -> None: + """Generic test for module export.""" + if SKIP_EXPORT_TESTS: + self.skipTest(SKIP_REASON) + + model, example_inputs = get_model_and_inputs(model_name, dtype=dtype) + + # Verify model forward pass works before export + with torch.no_grad(): + model_output = model(*example_inputs) + + self.assertIsNotNone( + model_output, + f"{model_name} ({DTYPE_NAMES[dtype]}): Forward pass returned None", + ) + + # Export to Metal backend + executorch_program = export_model_to_metal(model, example_inputs) + + self.assertIsNotNone( + executorch_program, + f"{model_name} ({DTYPE_NAMES[dtype]}): Export returned None", + ) + self.assertGreater( + len(executorch_program.buffer), + 0, + f"{model_name} ({DTYPE_NAMES[dtype]}): Exported buffer is empty", + ) + + def _test_module_output_consistency( + self, model_name: str, dtype: torch.dtype = torch.float32 + ) -> None: + """ + Test that Metal backend runtime output matches PyTorch output. + + This test: + 1. Exports the model to .pte and .ptd files + 2. Runs the model using executor_runner + 3. Compares the runtime output with expected PyTorch output + """ + if SKIP_RUNTIME_TESTS: + self.skipTest(SKIP_RUNTIME_REASON) + + model, example_inputs = get_model_and_inputs(model_name, dtype=dtype) + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir_path = Path(tmpdir) + + # Create aoti_debug_data directory for output files + debug_dir = tmpdir_path / "aoti_debug_data" + debug_dir.mkdir(exist_ok=True) + + # Export model and get expected output + pte_path, ptd_path, expected_output = export_model_to_files( + model, example_inputs, tmpdir_path, model_name + ) + + self.assertTrue( + pte_path.exists(), + f"{model_name}: PTE file not created at {pte_path}", + ) + self.assertTrue( + ptd_path.exists(), + f"{model_name}: PTD file not created at {ptd_path}", + ) + + # Run executor_runner + success = run_executor_runner(pte_path, ptd_path) + self.assertTrue( + success, + f"{model_name}: executor_runner failed", + ) + + # Compare outputs + runtime_output_file = debug_dir / "final_runtime_output.txt" + + if runtime_output_file.exists(): + is_close, max_atol, max_rtol = compare_outputs( + expected_output, runtime_output_file + ) + + self.assertTrue( + is_close, + f"{model_name} ({DTYPE_NAMES[dtype]}): Output mismatch - max_atol={max_atol}, max_rtol={max_rtol}", + ) + + +# ============================================================================= +# Dynamically generate test methods for each module and dtype in MODULE_REGISTRY +# ============================================================================= + + +def _make_export_test(model_name: str, dtype: torch.dtype): + """Factory function to create an export test method for a given model and dtype.""" + def test_method(self): + self._test_module_export(model_name, dtype) + dtype_name = DTYPE_NAMES[dtype] + test_method.__doc__ = f"Test {model_name} module export with {dtype_name}." + return test_method + + +def _make_output_consistency_test(model_name: str, dtype: torch.dtype): + """Factory function to create an output consistency test method for a given model and dtype.""" + def test_method(self): + self._test_module_output_consistency(model_name, dtype) + dtype_name = DTYPE_NAMES[dtype] + test_method.__doc__ = f"Test {model_name} module output consistency with {dtype_name}." + return test_method + + +# Add export and output consistency tests for each module and dtype in the registry +for _model_name in MODULE_REGISTRY: + for _dtype in DTYPES: + _dtype_name = DTYPE_NAMES[_dtype] + + # Create export test: test___export + _export_test_name = f"test_{_model_name}_{_dtype_name}_export" + setattr( + TestMetalBackendModules, + _export_test_name, + _make_export_test(_model_name, _dtype), + ) + + # Create output consistency test: test___output_consistency + _consistency_test_name = f"test_{_model_name}_{_dtype_name}_output_consistency" + setattr( + TestMetalBackendModules, + _consistency_test_name, + _make_output_consistency_test(_model_name, _dtype), + ) + + +if __name__ == "__main__": + unittest.main() From 0ed7c5c778087f16c3b06ab9463964f0b1e5287f Mon Sep 17 00:00:00 2001 From: Manuel Candales Date: Fri, 30 Jan 2026 21:59:19 -0500 Subject: [PATCH 2/6] Update [ghstack-poisoned] --- backends/apple/metal/tests/run_metal_test.sh | 2 +- backends/apple/metal/tests/test_modules.py | 166 ++++++++++++++----- 2 files changed, 121 insertions(+), 47 deletions(-) diff --git a/backends/apple/metal/tests/run_metal_test.sh b/backends/apple/metal/tests/run_metal_test.sh index 95c0cb1c6a7..9595cbf0c3d 100755 --- a/backends/apple/metal/tests/run_metal_test.sh +++ b/backends/apple/metal/tests/run_metal_test.sh @@ -56,7 +56,7 @@ build_runtime() { -DCMAKE_BUILD_TYPE=Release" echo "Running cmake..." - eval cmake $CMAKE_ARGS "$EXECUTORCH_ROOT" + cmake $CMAKE_ARGS "$EXECUTORCH_ROOT" echo "Building..." cmake --build . -j$(sysctl -n hw.ncpu) diff --git a/backends/apple/metal/tests/test_modules.py b/backends/apple/metal/tests/test_modules.py index 424d736b3b7..c97298a6bc2 100644 --- a/backends/apple/metal/tests/test_modules.py +++ b/backends/apple/metal/tests/test_modules.py @@ -38,6 +38,9 @@ SKIP_EXPORT_TESTS = not MPS_AVAILABLE SKIP_REASON = "MPS not available - Metal export tests require MPS support" +# Check if running in CI (GitHub Actions) +IS_CI = os.environ.get("GITHUB_ACTIONS") == "true" + # Paths TESTS_DIR = Path(__file__).parent EXECUTORCH_ROOT = TESTS_DIR.parent.parent.parent.parent @@ -45,6 +48,12 @@ EXECUTOR_RUNNER = BUILD_DIR / "executor_runner" RUN_METAL_TEST_SCRIPT = TESTS_DIR / "run_metal_test.sh" +# Test output directory - use current working directory in CI for reliable write access +if IS_CI: + TEST_OUTPUT_BASE_DIR = Path.cwd() / "aoti_debug_data" +else: + TEST_OUTPUT_BASE_DIR = None # Will use tempfile.TemporaryDirectory + # Check if executor_runner is built EXECUTOR_RUNNER_AVAILABLE = EXECUTOR_RUNNER.exists() SKIP_RUNTIME_TESTS = not EXECUTOR_RUNNER_AVAILABLE or SKIP_EXPORT_TESTS @@ -76,6 +85,7 @@ class Add(nn.Module): def forward(self, x: torch.Tensor, y: torch.Tensor): return x + y + MODULE_REGISTRY["add"] = { "model_class": Add, "input_shapes": [(10,), (10,)], @@ -87,16 +97,19 @@ def forward(self, x: torch.Tensor, y: torch.Tensor): # Matrix Multiplication Modules # ------------------------------------------------------------------------- + class Mm(nn.Module): def forward(self, x: torch.Tensor, y: torch.Tensor): return x.mm(y) + MODULE_REGISTRY["mm"] = { "model_class": Mm, "input_shapes": [(3, 4), (4, 5)], "description": "Simple mm layer model", } + # ------------------------------------------------------------------------- class MmWeights(nn.Module): def __init__(self): @@ -106,12 +119,14 @@ def __init__(self): def forward(self, x: torch.Tensor): return x.mm(self.weight) + MODULE_REGISTRY["mm_weights"] = { "model_class": MmWeights, "input_shapes": [(3, 4)], "description": "Matrix multiplication with weight parameter", } + # ------------------------------------------------------------------------- class TwoMm(nn.Module): def __init__(self): @@ -126,12 +141,14 @@ def __init__(self): def forward(self, x: torch.Tensor): return self.left_weight.mm(x).mm(self.right_weight) + MODULE_REGISTRY["two_mm"] = { "model_class": TwoMm, "input_shapes": [(5, 6)], "description": "Two consecutive matrix multiplications", } + # ------------------------------------------------------------------------- class ElementwiseMmReduction(nn.Module): def forward(self, x: torch.Tensor, y: torch.Tensor): @@ -140,6 +157,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor): z = x1.mm(y2) return z + z.sum() + MODULE_REGISTRY["elementwise_mm_reduction"] = { "model_class": ElementwiseMmReduction, "input_shapes": [(11, 45), (45, 8)], @@ -151,6 +169,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor): # Linear Modules # ------------------------------------------------------------------------- + class LinearNoBias(nn.Module): def __init__(self): super().__init__() @@ -159,6 +178,7 @@ def __init__(self): def forward(self, x: torch.Tensor): return self.linear(x) + MODULE_REGISTRY["linear_nobias"] = { "model_class": LinearNoBias, "input_shapes": [(127, 7)], @@ -170,6 +190,7 @@ def forward(self, x: torch.Tensor): # Convolution Modules # ------------------------------------------------------------------------- + class SingleConv2d(nn.Module): def __init__(self): super().__init__() @@ -180,12 +201,14 @@ def __init__(self): def forward(self, x: torch.Tensor): return self.conv(x) + MODULE_REGISTRY["conv2d"] = { "model_class": SingleConv2d, "input_shapes": [(4, 3, 8, 8)], "description": "Single Conv2d layer model", } + # ------------------------------------------------------------------------- class DepthwiseConv(nn.Module): def __init__(self): @@ -204,12 +227,14 @@ def __init__(self): def forward(self, x): return self.conv(x) + MODULE_REGISTRY["depthwise_conv"] = { "model_class": DepthwiseConv, "input_shapes": [(1, 32, 112, 112)], "description": "Single Depthwise Conv2d layer model", } + # ------------------------------------------------------------------------- class SmallConv1d(nn.Module): def __init__(self): @@ -228,14 +253,16 @@ def __init__(self): def forward(self, x): return self.conv(x) + MODULE_REGISTRY["small_conv1d"] = { "model_class": SmallConv1d, "input_shapes": [(1, 8, 5)], "description": "Conv1d layer with 8 input channels, 6 output channels", } + # ------------------------------------------------------------------------- -class MockConv1d(nn.Module): +class MediumConv1d(nn.Module): def __init__(self): super().__init__() self.conv = nn.Conv1d( @@ -252,12 +279,14 @@ def __init__(self): def forward(self, x): return self.conv(x) + MODULE_REGISTRY["conv1d"] = { - "model_class": MockConv1d, + "model_class": MediumConv1d, "input_shapes": [(1, 80, 3000)], "description": "Conv1d layer with 80 input channels, 384 output channels", } + # ------------------------------------------------------------------------- class VoxtralConv1d(nn.Module): def __init__(self): @@ -276,6 +305,7 @@ def __init__(self): def forward(self, x): return self.conv(x) + MODULE_REGISTRY["voxtral_conv1d"] = { "model_class": VoxtralConv1d, "input_shapes": [(10, 128, 3000)], @@ -287,6 +317,7 @@ def forward(self, x): # Attention (SDPA) Modules # ------------------------------------------------------------------------- + class SimpleSDPA(nn.Module): """Minimal SDPA test model.""" @@ -298,25 +329,23 @@ def forward( ) return output + MODULE_REGISTRY["sdpa"] = { "model_class": SimpleSDPA, "input_shapes": [(2, 4, 16, 64), (2, 4, 16, 64), (2, 4, 16, 64)], "description": "Simple Scaled Dot Product Attention model", } + # ------------------------------------------------------------------------- class AddSDPA(nn.Module): """SDPA model with Q, K, V as parameters that adds input to SDPA output.""" def __init__(self, batch_size=2, num_heads=4, seq_len=16, head_dim=64): super().__init__() - self.query = nn.Parameter( - torch.randn(batch_size, num_heads, seq_len, head_dim) - ) + self.query = nn.Parameter(torch.randn(batch_size, num_heads, seq_len, head_dim)) self.key = nn.Parameter(torch.randn(batch_size, num_heads, seq_len, head_dim)) - self.value = nn.Parameter( - torch.randn(batch_size, num_heads, seq_len, head_dim) - ) + self.value = nn.Parameter(torch.randn(batch_size, num_heads, seq_len, head_dim)) def forward(self, x: torch.Tensor) -> torch.Tensor: sdpa_output = torch.nn.functional.scaled_dot_product_attention( @@ -324,17 +353,21 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: ) return sdpa_output + x + MODULE_REGISTRY["add_sdpa"] = { "model_class": AddSDPA, "input_shapes": [(2, 4, 16, 64)], "description": "SDPA model with Q,K,V as parameters that adds input to output", } + # ------------------------------------------------------------------------- class BaseAddStridedSDPA(nn.Module): """SDPA model with strided Q, K, V parameters.""" - def __init__(self, q_size, k_size, v_size, q_stride, k_stride, v_stride, attn_mask_size=None): + def __init__( + self, q_size, k_size, v_size, q_stride, k_stride, v_stride, attn_mask_size=None + ): super().__init__() self.q_size = q_size self.k_size = k_size @@ -361,6 +394,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: ) return sdpa_output + x + # ------------------------------------------------------------------------- class AddStridedSDPA(BaseAddStridedSDPA): def __init__(self): @@ -373,12 +407,14 @@ def __init__(self): v_stride=(1920000, 64, 1280, 1), ) + MODULE_REGISTRY["audio_encoder_sdpa1"] = { "model_class": AddStridedSDPA, "input_shapes": [(10, 20, 1500, 64)], "description": "Audio Encoder model with strided SDPA", } + # ------------------------------------------------------------------------- class AddStridedSDPA1(BaseAddStridedSDPA): def __init__(self): @@ -391,12 +427,14 @@ def __init__(self): v_stride=(1920000, 64, 1280, 1), ) + MODULE_REGISTRY["whisper_strided_sdpa1"] = { "model_class": AddStridedSDPA1, "input_shapes": [(1, 20, 1, 64)], "description": "Whisper-like strided SDPA variant 1", } + # ------------------------------------------------------------------------- class AddStridedSDPA2(BaseAddStridedSDPA): def __init__(self): @@ -410,6 +448,7 @@ def __init__(self): attn_mask_size=(1, 1, 1, 1024), ) + MODULE_REGISTRY["whisper_strided_sdpa2"] = { "model_class": AddStridedSDPA2, "input_shapes": [(1, 20, 1, 64)], @@ -421,6 +460,7 @@ def __init__(self): # Normalization Modules # ------------------------------------------------------------------------- + class BatchNorm(nn.Module): def __init__(self): super().__init__() @@ -429,6 +469,7 @@ def __init__(self): def forward(self, x): return self.bn(x) + MODULE_REGISTRY["batchnorm"] = { "model_class": BatchNorm, "input_shapes": [(1, 16, 32, 32)], @@ -440,6 +481,7 @@ def forward(self, x): # Block/Composite Modules # ------------------------------------------------------------------------- + class SingleResNetBlock(nn.Module): def __init__(self, in_channels=64, out_channels=64, stride=1): super().__init__() @@ -485,6 +527,7 @@ def forward(self, x): return out + MODULE_REGISTRY["single_resnet_block"] = { "model_class": SingleResNetBlock, "input_shapes": [(1, 64, 8, 8)], @@ -515,9 +558,7 @@ def get_model_and_inputs( if dtype is not None: model = model.to(dtype) - example_inputs = tuple( - torch.randn(*shape, dtype=dtype) for shape in input_shapes - ) + example_inputs = tuple(torch.randn(*shape, dtype=dtype) for shape in input_shapes) return model, example_inputs @@ -576,12 +617,13 @@ def export_model_to_files( return pte_path, ptd_path, expected_output -def run_executor_runner(pte_path: Path, ptd_path: Path) -> bool: +def run_executor_runner(pte_path: Path, ptd_path: Path) -> Tuple[bool, Optional[str]]: """ Run the executor_runner binary with the given model files. Returns: - True if execution succeeded, False otherwise. + Tuple of (success, error_message). If success is True, error_message is None. + If success is False, error_message contains details about the failure. """ if not EXECUTOR_RUNNER.exists(): raise RuntimeError( @@ -591,8 +633,10 @@ def run_executor_runner(pte_path: Path, ptd_path: Path) -> bool: cmd = [ str(EXECUTOR_RUNNER), - "--model_path", str(pte_path), - "--data_path", str(ptd_path), + "--model_path", + str(pte_path), + "--data_path", + str(ptd_path), ] try: @@ -603,11 +647,17 @@ def run_executor_runner(pte_path: Path, ptd_path: Path) -> bool: timeout=60, cwd=str(EXECUTORCH_ROOT), ) - return result.returncode == 0 - except subprocess.TimeoutExpired: - return False - except Exception: - return False + if result.returncode == 0: + return True, None + else: + error_msg = ( + f"executor_runner exited with code {result.returncode}\n" + f"stdout: {result.stdout}\n" + f"stderr: {result.stderr}" + ) + return False, error_msg + except subprocess.TimeoutExpired as e: + return False, f"executor_runner timed out after 60 seconds: {e}" def read_output_file(filepath: Path) -> Optional[np.ndarray]: @@ -639,11 +689,14 @@ def compare_outputs( if runtime_values is None: return False, None, None - # Flatten expected output + # Flatten expected output and move to CPU for numpy conversion + # (required when tensor is on MPS device) if isinstance(expected, tuple): - expected_values = np.concatenate([t.flatten().numpy() for t in expected]) + expected_values = np.concatenate( + [t.detach().cpu().flatten().numpy() for t in expected] + ) else: - expected_values = expected.flatten().numpy() + expected_values = expected.detach().cpu().flatten().numpy() if len(runtime_values) != len(expected_values): return False, None, None @@ -725,47 +778,62 @@ def _test_module_output_consistency( self.skipTest(SKIP_RUNTIME_REASON) model, example_inputs = get_model_and_inputs(model_name, dtype=dtype) + dtype_name = DTYPE_NAMES[dtype] + test_subdir_name = f"{model_name}_{dtype_name}" - with tempfile.TemporaryDirectory() as tmpdir: - tmpdir_path = Path(tmpdir) - - # Create aoti_debug_data directory for output files - debug_dir = tmpdir_path / "aoti_debug_data" - debug_dir.mkdir(exist_ok=True) + def run_test_in_directory(test_dir: Path) -> None: + """Run the actual test logic in the given directory.""" + # Create model output directory: aoti_debug_data/_/ + model_output_dir = test_dir / test_subdir_name + model_output_dir.mkdir(parents=True, exist_ok=True) # Export model and get expected output pte_path, ptd_path, expected_output = export_model_to_files( - model, example_inputs, tmpdir_path, model_name + model, example_inputs, model_output_dir, model_name ) self.assertTrue( pte_path.exists(), - f"{model_name}: PTE file not created at {pte_path}", + f"{model_name} ({dtype_name}): PTE file not created at {pte_path}", ) self.assertTrue( ptd_path.exists(), - f"{model_name}: PTD file not created at {ptd_path}", + f"{model_name} ({dtype_name}): PTD file not created at {ptd_path}", ) # Run executor_runner - success = run_executor_runner(pte_path, ptd_path) + success, error_msg = run_executor_runner(pte_path, ptd_path) self.assertTrue( success, - f"{model_name}: executor_runner failed", + f"{model_name} ({dtype_name}): executor_runner failed\n{error_msg}", ) - # Compare outputs - runtime_output_file = debug_dir / "final_runtime_output.txt" + # Compare outputs - executor_runner writes to aoti_debug_data/ in cwd + # In CI, this is TEST_OUTPUT_BASE_DIR; locally it may vary + runtime_output_file = model_output_dir / "final_runtime_output.txt" - if runtime_output_file.exists(): - is_close, max_atol, max_rtol = compare_outputs( - expected_output, runtime_output_file - ) + self.assertTrue( + runtime_output_file.exists(), + f"{model_name} ({dtype_name}): Runtime output file not created at {runtime_output_file}", + ) - self.assertTrue( - is_close, - f"{model_name} ({DTYPE_NAMES[dtype]}): Output mismatch - max_atol={max_atol}, max_rtol={max_rtol}", - ) + is_close, max_atol, max_rtol = compare_outputs( + expected_output, runtime_output_file + ) + + self.assertTrue( + is_close, + f"{model_name} ({dtype_name}): Output mismatch - max_atol={max_atol}, max_rtol={max_rtol}", + ) + + if IS_CI: + # In CI, use a persistent directory in the current working directory + TEST_OUTPUT_BASE_DIR.mkdir(parents=True, exist_ok=True) + run_test_in_directory(TEST_OUTPUT_BASE_DIR) + else: + # Locally, use a temporary directory that gets cleaned up + with tempfile.TemporaryDirectory() as tmpdir: + run_test_in_directory(Path(tmpdir)) # ============================================================================= @@ -775,8 +843,10 @@ def _test_module_output_consistency( def _make_export_test(model_name: str, dtype: torch.dtype): """Factory function to create an export test method for a given model and dtype.""" + def test_method(self): self._test_module_export(model_name, dtype) + dtype_name = DTYPE_NAMES[dtype] test_method.__doc__ = f"Test {model_name} module export with {dtype_name}." return test_method @@ -784,10 +854,14 @@ def test_method(self): def _make_output_consistency_test(model_name: str, dtype: torch.dtype): """Factory function to create an output consistency test method for a given model and dtype.""" + def test_method(self): self._test_module_output_consistency(model_name, dtype) + dtype_name = DTYPE_NAMES[dtype] - test_method.__doc__ = f"Test {model_name} module output consistency with {dtype_name}." + test_method.__doc__ = ( + f"Test {model_name} module output consistency with {dtype_name}." + ) return test_method From b4310cc6f03ad25fd8e082b5d4995a18eb4e4491 Mon Sep 17 00:00:00 2001 From: Manuel Candales Date: Fri, 30 Jan 2026 22:01:15 -0500 Subject: [PATCH 3/6] Update [ghstack-poisoned] --- .github/workflows/metal.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/metal.yml b/.github/workflows/metal.yml index 63466f36abb..50ab0a70e1c 100644 --- a/.github/workflows/metal.yml +++ b/.github/workflows/metal.yml @@ -54,7 +54,7 @@ jobs: export-model-metal-artifact: name: export-model-metal-artifact - # Skip this job if the pull request is from a fork (HuggingFace secrets are not available) + # Skip this job if the pull request is from a fork (HuggingFace secrets are not available) if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' uses: pytorch/test-infra/.github/workflows/macos_job.yml@main secrets: inherit From 94c823c8f1effbcf7f8c0bff3aa3db2c0ef570f3 Mon Sep 17 00:00:00 2001 From: Manuel Candales Date: Fri, 30 Jan 2026 23:32:54 -0500 Subject: [PATCH 4/6] Update [ghstack-poisoned] --- backends/apple/metal/tests/run_metal_test.sh | 19 +- backends/apple/metal/tests/test_modules.py | 241 ++++++++++++++++--- 2 files changed, 212 insertions(+), 48 deletions(-) diff --git a/backends/apple/metal/tests/run_metal_test.sh b/backends/apple/metal/tests/run_metal_test.sh index 9595cbf0c3d..0f70c20ea4e 100755 --- a/backends/apple/metal/tests/run_metal_test.sh +++ b/backends/apple/metal/tests/run_metal_test.sh @@ -8,7 +8,7 @@ # Script to build and run Metal backend tests # Usage: # ./run_metal_test.sh --build # Build the Metal runtime -# ./run_metal_test.sh --run # Run inference with given model files +# ./run_metal_test.sh --run # Run inference with given model file # ./run_metal_test.sh --check-build # Check if runtime is already built set -e # Exit on any error @@ -74,7 +74,6 @@ build_runtime() { # Function to run inference run_inference() { local pte_path="$1" - local ptd_path="$2" if [[ ! -f "$EXECUTOR_RUNNER" ]]; then echo "Error: executor_runner not found at $EXECUTOR_RUNNER" @@ -87,16 +86,10 @@ run_inference() { exit 1 fi - if [[ ! -f "$ptd_path" ]]; then - echo "Error: PTD file not found: $ptd_path" - exit 1 - fi - echo "Running inference..." echo " PTE: $pte_path" - echo " PTD: $ptd_path" - "$EXECUTOR_RUNNER" --model_path "$pte_path" --data_path "$ptd_path" + "$EXECUTOR_RUNNER" --model_path "$pte_path" } # Parse command line arguments @@ -105,11 +98,11 @@ case "$1" in build_runtime ;; --run) - if [[ -z "$2" ]] || [[ -z "$3" ]]; then - echo "Usage: $0 --run " + if [[ -z "$2" ]]; then + echo "Usage: $0 --run " exit 1 fi - run_inference "$2" "$3" + run_inference "$2" ;; --check-build) check_build @@ -119,7 +112,7 @@ case "$1" in echo "" echo "Usage:" echo " $0 --build Build the Metal runtime" - echo " $0 --run Run inference with given model files" + echo " $0 --run Run inference with given model file" echo " $0 --check-build Check if runtime is already built" exit 1 ;; diff --git a/backends/apple/metal/tests/test_modules.py b/backends/apple/metal/tests/test_modules.py index c97298a6bc2..fc3e2c6d4e8 100644 --- a/backends/apple/metal/tests/test_modules.py +++ b/backends/apple/metal/tests/test_modules.py @@ -72,7 +72,22 @@ torch.bfloat16: "bfloat16", } +# Default tolerances for output comparison by dtype +# bfloat16 has lower precision (7 bits mantissa vs 23 for float32) +DEFAULT_TOLERANCES = { + torch.float32: {"atol": 1e-5, "rtol": 1e-5}, + torch.bfloat16: {"atol": 1e-2, "rtol": 1e-2}, +} + + # Registry mapping model names to their configurations +# Each entry can optionally include: +# - "atol": float - Override absolute tolerance for all dtypes +# - "rtol": float - Override relative tolerance for all dtypes +# - "atol_": float - Override absolute tolerance for specific dtype (e.g., "atol_bfloat16") +# - "rtol_": float - Override relative tolerance for specific dtype (e.g., "rtol_bfloat16") +# - "skip": bool or str - Skip all tests for this module (True to skip, or string with reason) +# - "skip_": bool or str - Skip tests for specific dtype (e.g., "skip_bfloat16") MODULE_REGISTRY: Dict[str, Dict[str, Any]] = {} @@ -206,6 +221,7 @@ def forward(self, x: torch.Tensor): "model_class": SingleConv2d, "input_shapes": [(4, 3, 8, 8)], "description": "Single Conv2d layer model", + "skip": True, } @@ -232,6 +248,7 @@ def forward(self, x): "model_class": DepthwiseConv, "input_shapes": [(1, 32, 112, 112)], "description": "Single Depthwise Conv2d layer model", + "skip": True, } @@ -412,6 +429,8 @@ def __init__(self): "model_class": AddStridedSDPA, "input_shapes": [(10, 20, 1500, 64)], "description": "Audio Encoder model with strided SDPA", + "atol_float32": 1e-4, + "atol_bfloat16": 5e-2, } @@ -532,6 +551,45 @@ def forward(self, x): "model_class": SingleResNetBlock, "input_shapes": [(1, 64, 8, 8)], "description": "Single ResNet block with skip connection", + "skip": True, +} + + +# ------------------------------------------------------------------------- +class TransformerBlock(nn.Module): + def __init__(self, embed_dim=256, num_heads=8, ff_dim=1024, dropout=0.1): + super().__init__() + self.embed_dim = embed_dim + self.num_heads = num_heads + + self.self_attn = nn.MultiheadAttention( + embed_dim=embed_dim, num_heads=num_heads, dropout=dropout, batch_first=True + ) + + self.norm1 = nn.LayerNorm(embed_dim) + self.norm2 = nn.LayerNorm(embed_dim) + + self.ffn = nn.Sequential( + nn.Linear(embed_dim, ff_dim), + nn.ReLU(), + nn.Dropout(dropout), + nn.Linear(ff_dim, embed_dim), + nn.Dropout(dropout), + ) + + def forward(self, x): + attn_output, _ = self.self_attn(x, x, x) + x = self.norm1(x + attn_output) + ff_output = self.ffn(x) + x = self.norm2(x + ff_output) + return x + + +MODULE_REGISTRY["transformer_block"] = { + "model_class": TransformerBlock, + "input_shapes": [(4, 32, 256)], + "description": "Single transformer block with multi-head attention and FFN", + "skip": True, } @@ -540,6 +598,59 @@ def forward(self, x): # ============================================================================= +def get_tolerances_for_model( + model_name: str, dtype: torch.dtype +) -> Tuple[float, float]: + """ + Get atol and rtol tolerances for a specific model and dtype. + + Priority order: + 1. Model-specific dtype tolerance (e.g., "atol_bfloat16") + 2. Model-specific general tolerance (e.g., "atol") + 3. Default dtype tolerance from DEFAULT_TOLERANCES + + Returns: + Tuple of (atol, rtol) + """ + model_config = MODULE_REGISTRY.get(model_name, {}) + dtype_name = DTYPE_NAMES.get(dtype, "float32") + default_tols = DEFAULT_TOLERANCES.get(dtype, DEFAULT_TOLERANCES[torch.float32]) + + # Check for dtype-specific override, then general override, then default + atol = model_config.get( + f"atol_{dtype_name}", model_config.get("atol", default_tols["atol"]) + ) + rtol = model_config.get( + f"rtol_{dtype_name}", model_config.get("rtol", default_tols["rtol"]) + ) + + return atol, rtol + + +def should_skip_model(model_name: str, dtype: torch.dtype) -> Tuple[bool, str]: + """ + Check if a model should be skipped for testing. + + Priority order: + 1. Model-specific dtype skip (e.g., "skip_bfloat16") + 2. Model-specific general skip (e.g., "skip") + + Returns: + Tuple of (should_skip, reason) + """ + model_config = MODULE_REGISTRY.get(model_name, {}) + dtype_name = DTYPE_NAMES.get(dtype, "float32") + + # Check for dtype-specific skip first, then general skip + skip_value = model_config.get(f"skip_{dtype_name}", model_config.get("skip", False)) + + if skip_value is True: + return True, f"{model_name} is marked as skipped" + elif isinstance(skip_value, str): + return True, skip_value + return False, "" + + def get_model_and_inputs( model_name: str, dtype: torch.dtype = torch.float32 ) -> Tuple[nn.Module, Tuple[torch.Tensor, ...]]: @@ -605,22 +716,24 @@ def export_model_to_files( # Export to executorch executorch_program = export_model_to_metal(model, example_inputs) - # Save .pte file + # Save .pte file (Metal backend embeds data into the .pte file, no separate .ptd) pte_path = output_dir / f"{model_name}.pte" with open(pte_path, "wb") as f: f.write(executorch_program.buffer) - # Save .ptd file (tensor data) - executorch_program.write_tensor_data_to_file(str(output_dir)) - ptd_path = output_dir / "aoti_metal_blob.ptd" - - return pte_path, ptd_path, expected_output + return pte_path, expected_output -def run_executor_runner(pte_path: Path, ptd_path: Path) -> Tuple[bool, Optional[str]]: +def run_executor_runner( + pte_path: Path, output_path: Path +) -> Tuple[bool, Optional[str]]: """ Run the executor_runner binary with the given model files. + Args: + pte_path: Path to the .pte model file + output_path: Base path for output files (executor_runner will create -0.bin, etc.) + Returns: Tuple of (success, error_message). If success is True, error_message is None. If success is False, error_message contains details about the failure. @@ -635,8 +748,8 @@ def run_executor_runner(pte_path: Path, ptd_path: Path) -> Tuple[bool, Optional[ str(EXECUTOR_RUNNER), "--model_path", str(pte_path), - "--data_path", - str(ptd_path), + "--output_file", + str(output_path), ] try: @@ -660,32 +773,80 @@ def run_executor_runner(pte_path: Path, ptd_path: Path) -> Tuple[bool, Optional[ return False, f"executor_runner timed out after 60 seconds: {e}" -def read_output_file(filepath: Path) -> Optional[np.ndarray]: - """Read comma-separated output values from a file.""" +def read_binary_output_file(filepath: Path, dtype: torch.dtype) -> Optional[np.ndarray]: + """ + Read binary output values from an executor_runner output file. + + Args: + filepath: Path to the binary output file + dtype: The torch dtype to interpret the binary data as + + Returns: + numpy array of values, or None if file doesn't exist or is empty + """ + if not filepath.exists(): + return None + + # Map torch dtype to numpy dtype + dtype_map = { + torch.float32: np.float32, + torch.float16: np.float16, + torch.bfloat16: np.float32, # bfloat16 is read as float32 after conversion + torch.int32: np.int32, + torch.int64: np.int64, + } + + np_dtype = dtype_map.get(dtype, np.float32) + try: - with open(filepath, "r") as f: - content = f.read().strip() - if not content: + with open(filepath, "rb") as f: + binary_data = f.read() + if not binary_data: return None - values = [float(x.strip()) for x in content.split(",") if x.strip()] - return np.array(values) - except (FileNotFoundError, ValueError): + # For bfloat16, the runtime output is in bfloat16 format (2 bytes per element) + # We need to read it as uint16 and convert + if dtype == torch.bfloat16: + # Read as uint16 (2 bytes per element like bfloat16) + values_uint16 = np.frombuffer(binary_data, dtype=np.uint16) + # Convert bfloat16 to float32 by shifting left 16 bits + values_uint32 = values_uint16.astype(np.uint32) << 16 + values = values_uint32.view(np.float32) + else: + values = np.frombuffer(binary_data, dtype=np_dtype) + return values + except (FileNotFoundError, ValueError) as e: + print(f"Error reading binary file {filepath}: {e}") return None def compare_outputs( expected: torch.Tensor, runtime_output_file: Path, - atol: float = 1e-5, - rtol: float = 1e-5, + dtype: torch.dtype, + atol: Optional[float] = None, + rtol: Optional[float] = None, ) -> Tuple[bool, Optional[float], Optional[float]]: """ - Compare expected PyTorch output with runtime output from file. + Compare expected PyTorch output with runtime output from binary file. + + Args: + expected: Expected output tensor from PyTorch + runtime_output_file: Path to the binary output file from executor_runner + dtype: The dtype used for the model (needed to parse binary output) + atol: Absolute tolerance for comparison (if None, uses dtype-specific default) + rtol: Relative tolerance for comparison (if None, uses dtype-specific default) Returns: Tuple of (is_close, max_atol, max_rtol) """ - runtime_values = read_output_file(runtime_output_file) + # Use dtype-specific tolerances if not specified + tolerances = DEFAULT_TOLERANCES.get(dtype, DEFAULT_TOLERANCES[torch.float32]) + if atol is None: + atol = tolerances["atol"] + if rtol is None: + rtol = tolerances["rtol"] + + runtime_values = read_binary_output_file(runtime_output_file, dtype) if runtime_values is None: return False, None, None @@ -693,10 +854,10 @@ def compare_outputs( # (required when tensor is on MPS device) if isinstance(expected, tuple): expected_values = np.concatenate( - [t.detach().cpu().flatten().numpy() for t in expected] + [t.detach().cpu().float().flatten().numpy() for t in expected] ) else: - expected_values = expected.detach().cpu().flatten().numpy() + expected_values = expected.detach().cpu().float().flatten().numpy() if len(runtime_values) != len(expected_values): return False, None, None @@ -736,6 +897,11 @@ def _test_module_export( self, model_name: str, dtype: torch.dtype = torch.float32 ) -> None: """Generic test for module export.""" + # Check if this model/dtype combination should be skipped + skip, skip_reason = should_skip_model(model_name, dtype) + if skip: + self.skipTest(skip_reason) + if SKIP_EXPORT_TESTS: self.skipTest(SKIP_REASON) @@ -770,10 +936,15 @@ def _test_module_output_consistency( Test that Metal backend runtime output matches PyTorch output. This test: - 1. Exports the model to .pte and .ptd files + 1. Exports the model to a .pte file 2. Runs the model using executor_runner 3. Compares the runtime output with expected PyTorch output """ + # Check if this model/dtype combination should be skipped + skip, skip_reason = should_skip_model(model_name, dtype) + if skip: + self.skipTest(skip_reason) + if SKIP_RUNTIME_TESTS: self.skipTest(SKIP_RUNTIME_REASON) @@ -788,7 +959,7 @@ def run_test_in_directory(test_dir: Path) -> None: model_output_dir.mkdir(parents=True, exist_ok=True) # Export model and get expected output - pte_path, ptd_path, expected_output = export_model_to_files( + pte_path, expected_output = export_model_to_files( model, example_inputs, model_output_dir, model_name ) @@ -796,29 +967,29 @@ def run_test_in_directory(test_dir: Path) -> None: pte_path.exists(), f"{model_name} ({dtype_name}): PTE file not created at {pte_path}", ) - self.assertTrue( - ptd_path.exists(), - f"{model_name} ({dtype_name}): PTD file not created at {ptd_path}", - ) - # Run executor_runner - success, error_msg = run_executor_runner(pte_path, ptd_path) + # Run executor_runner with output file + output_base_path = model_output_dir / "output" + success, error_msg = run_executor_runner(pte_path, output_base_path) self.assertTrue( success, f"{model_name} ({dtype_name}): executor_runner failed\n{error_msg}", ) - # Compare outputs - executor_runner writes to aoti_debug_data/ in cwd - # In CI, this is TEST_OUTPUT_BASE_DIR; locally it may vary - runtime_output_file = model_output_dir / "final_runtime_output.txt" + # executor_runner writes output files as -.bin + # For single output models, this is output-0.bin + runtime_output_file = model_output_dir / "output-0.bin" self.assertTrue( runtime_output_file.exists(), f"{model_name} ({dtype_name}): Runtime output file not created at {runtime_output_file}", ) + # Get model-specific tolerances (with dtype-specific overrides) + atol, rtol = get_tolerances_for_model(model_name, dtype) + is_close, max_atol, max_rtol = compare_outputs( - expected_output, runtime_output_file + expected_output, runtime_output_file, dtype, atol=atol, rtol=rtol ) self.assertTrue( From 31b6f4555bae21d41797d01ac18fa3b888f441f4 Mon Sep 17 00:00:00 2001 From: Manuel Candales Date: Mon, 2 Feb 2026 16:27:14 -0500 Subject: [PATCH 5/6] Update [ghstack-poisoned] --- backends/apple/metal/tests/test_modules.py | 261 +++------------------ 1 file changed, 29 insertions(+), 232 deletions(-) diff --git a/backends/apple/metal/tests/test_modules.py b/backends/apple/metal/tests/test_modules.py index fc3e2c6d4e8..6a5eaeb9a53 100644 --- a/backends/apple/metal/tests/test_modules.py +++ b/backends/apple/metal/tests/test_modules.py @@ -50,7 +50,7 @@ # Test output directory - use current working directory in CI for reliable write access if IS_CI: - TEST_OUTPUT_BASE_DIR = Path.cwd() / "aoti_debug_data" + TEST_OUTPUT_BASE_DIR = Path.cwd() / "metal_backend_module_outputs" else: TEST_OUTPUT_BASE_DIR = None # Will use tempfile.TemporaryDirectory @@ -126,7 +126,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor): # ------------------------------------------------------------------------- -class MmWeights(nn.Module): +class MmWeightParam(nn.Module): def __init__(self): super().__init__() self.weight = nn.Parameter(torch.arange(20, dtype=torch.float).reshape(4, 5)) @@ -135,51 +135,13 @@ def forward(self, x: torch.Tensor): return x.mm(self.weight) -MODULE_REGISTRY["mm_weights"] = { - "model_class": MmWeights, +MODULE_REGISTRY["mm_weight_param"] = { + "model_class": MmWeightParam, "input_shapes": [(3, 4)], "description": "Matrix multiplication with weight parameter", } -# ------------------------------------------------------------------------- -class TwoMm(nn.Module): - def __init__(self): - super().__init__() - self.left_weight = nn.Parameter( - torch.arange(20, dtype=torch.float).reshape(4, 5) - ) - self.right_weight = nn.Parameter( - torch.arange(42, dtype=torch.float).reshape(6, 7) - ) - - def forward(self, x: torch.Tensor): - return self.left_weight.mm(x).mm(self.right_weight) - - -MODULE_REGISTRY["two_mm"] = { - "model_class": TwoMm, - "input_shapes": [(5, 6)], - "description": "Two consecutive matrix multiplications", -} - - -# ------------------------------------------------------------------------- -class ElementwiseMmReduction(nn.Module): - def forward(self, x: torch.Tensor, y: torch.Tensor): - x1 = x.sin() + x - y2 = y.cos() + 3 - z = x1.mm(y2) - return z + z.sum() - - -MODULE_REGISTRY["elementwise_mm_reduction"] = { - "model_class": ElementwiseMmReduction, - "input_shapes": [(11, 45), (45, 8)], - "description": "Combining mm with elementwise and reduction ops", -} - - # ------------------------------------------------------------------------- # Linear Modules # ------------------------------------------------------------------------- @@ -206,54 +168,7 @@ def forward(self, x: torch.Tensor): # ------------------------------------------------------------------------- -class SingleConv2d(nn.Module): - def __init__(self): - super().__init__() - self.conv = nn.Conv2d( - in_channels=3, out_channels=5, kernel_size=3, stride=1, padding=1 - ) - - def forward(self, x: torch.Tensor): - return self.conv(x) - - -MODULE_REGISTRY["conv2d"] = { - "model_class": SingleConv2d, - "input_shapes": [(4, 3, 8, 8)], - "description": "Single Conv2d layer model", - "skip": True, -} - - -# ------------------------------------------------------------------------- -class DepthwiseConv(nn.Module): - def __init__(self): - super().__init__() - self.conv = nn.Conv2d( - in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - padding=1, - dilation=1, - groups=32, - bias=False, - ) - - def forward(self, x): - return self.conv(x) - - -MODULE_REGISTRY["depthwise_conv"] = { - "model_class": DepthwiseConv, - "input_shapes": [(1, 32, 112, 112)], - "description": "Single Depthwise Conv2d layer model", - "skip": True, -} - - -# ------------------------------------------------------------------------- -class SmallConv1d(nn.Module): +class Conv1dNoBias(nn.Module): def __init__(self): super().__init__() self.conv = nn.Conv1d( @@ -271,15 +186,15 @@ def forward(self, x): return self.conv(x) -MODULE_REGISTRY["small_conv1d"] = { - "model_class": SmallConv1d, +MODULE_REGISTRY["conv1d_nobias"] = { + "model_class": Conv1dNoBias, "input_shapes": [(1, 8, 5)], "description": "Conv1d layer with 8 input channels, 6 output channels", } # ------------------------------------------------------------------------- -class MediumConv1d(nn.Module): +class Conv1dBias(nn.Module): def __init__(self): super().__init__() self.conv = nn.Conv1d( @@ -297,15 +212,15 @@ def forward(self, x): return self.conv(x) -MODULE_REGISTRY["conv1d"] = { - "model_class": MediumConv1d, +MODULE_REGISTRY["conv1d_bias"] = { + "model_class": Conv1dBias, "input_shapes": [(1, 80, 3000)], "description": "Conv1d layer with 80 input channels, 384 output channels", } # ------------------------------------------------------------------------- -class VoxtralConv1d(nn.Module): +class Conv1dVoxtral(nn.Module): def __init__(self): super().__init__() self.conv = nn.Conv1d( @@ -323,8 +238,8 @@ def forward(self, x): return self.conv(x) -MODULE_REGISTRY["voxtral_conv1d"] = { - "model_class": VoxtralConv1d, +MODULE_REGISTRY["conv1d_voxtral"] = { + "model_class": Conv1dVoxtral, "input_shapes": [(10, 128, 3000)], "description": "Conv1d layer with 128 input channels, 1280 output channels", } @@ -335,7 +250,7 @@ def forward(self, x): # ------------------------------------------------------------------------- -class SimpleSDPA(nn.Module): +class SDPA(nn.Module): """Minimal SDPA test model.""" def forward( @@ -348,14 +263,14 @@ def forward( MODULE_REGISTRY["sdpa"] = { - "model_class": SimpleSDPA, + "model_class": SDPA, "input_shapes": [(2, 4, 16, 64), (2, 4, 16, 64), (2, 4, 16, 64)], "description": "Simple Scaled Dot Product Attention model", } # ------------------------------------------------------------------------- -class AddSDPA(nn.Module): +class SDPAAdd(nn.Module): """SDPA model with Q, K, V as parameters that adds input to SDPA output.""" def __init__(self, batch_size=2, num_heads=4, seq_len=16, head_dim=64): @@ -371,15 +286,15 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return sdpa_output + x -MODULE_REGISTRY["add_sdpa"] = { - "model_class": AddSDPA, +MODULE_REGISTRY["sdpa_add"] = { + "model_class": SDPAAdd, "input_shapes": [(2, 4, 16, 64)], "description": "SDPA model with Q,K,V as parameters that adds input to output", } # ------------------------------------------------------------------------- -class BaseAddStridedSDPA(nn.Module): +class BaseStridedSDPA(nn.Module): """SDPA model with strided Q, K, V parameters.""" def __init__( @@ -413,7 +328,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # ------------------------------------------------------------------------- -class AddStridedSDPA(BaseAddStridedSDPA): +class SDPAStrided(BaseStridedSDPA): def __init__(self): super().__init__( q_size=(10, 20, 1500, 64), @@ -425,8 +340,8 @@ def __init__(self): ) -MODULE_REGISTRY["audio_encoder_sdpa1"] = { - "model_class": AddStridedSDPA, +MODULE_REGISTRY["sdpa_strided"] = { + "model_class": SDPAStrided, "input_shapes": [(10, 20, 1500, 64)], "description": "Audio Encoder model with strided SDPA", "atol_float32": 1e-4, @@ -435,7 +350,7 @@ def __init__(self): # ------------------------------------------------------------------------- -class AddStridedSDPA1(BaseAddStridedSDPA): +class SDPAStridedBroadcast(BaseStridedSDPA): def __init__(self): super().__init__( q_size=(1, 20, 1, 64), @@ -447,15 +362,15 @@ def __init__(self): ) -MODULE_REGISTRY["whisper_strided_sdpa1"] = { - "model_class": AddStridedSDPA1, +MODULE_REGISTRY["sdpa_strided_broadcast"] = { + "model_class": SDPAStridedBroadcast, "input_shapes": [(1, 20, 1, 64)], "description": "Whisper-like strided SDPA variant 1", } # ------------------------------------------------------------------------- -class AddStridedSDPA2(BaseAddStridedSDPA): +class SDPAStridedBroadcastAttnMask(BaseStridedSDPA): def __init__(self): super().__init__( q_size=(1, 20, 1, 64), @@ -468,131 +383,13 @@ def __init__(self): ) -MODULE_REGISTRY["whisper_strided_sdpa2"] = { - "model_class": AddStridedSDPA2, +MODULE_REGISTRY["sdpa_strided_broadcast_attn_mask"] = { + "model_class": SDPAStridedBroadcastAttnMask, "input_shapes": [(1, 20, 1, 64)], "description": "Whisper-like strided SDPA variant 2", } -# ------------------------------------------------------------------------- -# Normalization Modules -# ------------------------------------------------------------------------- - - -class BatchNorm(nn.Module): - def __init__(self): - super().__init__() - self.bn = nn.BatchNorm2d(num_features=16) - - def forward(self, x): - return self.bn(x) - - -MODULE_REGISTRY["batchnorm"] = { - "model_class": BatchNorm, - "input_shapes": [(1, 16, 32, 32)], - "description": "Single BatchNorm2d layer model", -} - - -# ------------------------------------------------------------------------- -# Block/Composite Modules -# ------------------------------------------------------------------------- - - -class SingleResNetBlock(nn.Module): - def __init__(self, in_channels=64, out_channels=64, stride=1): - super().__init__() - self.conv1 = nn.Conv2d( - in_channels, - out_channels, - kernel_size=3, - stride=stride, - padding=1, - bias=False, - ) - self.bn1 = nn.BatchNorm2d(out_channels) - self.relu = nn.ReLU(inplace=True) - self.conv2 = nn.Conv2d( - out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False - ) - self.bn2 = nn.BatchNorm2d(out_channels) - - self.skip_connection = None - if stride != 1 or in_channels != out_channels: - self.skip_connection = nn.Sequential( - nn.Conv2d( - in_channels, out_channels, kernel_size=1, stride=stride, bias=False - ), - nn.BatchNorm2d(out_channels), - ) - - def forward(self, x): - identity = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - - if self.skip_connection is not None: - identity = self.skip_connection(x) - - out += identity - out = self.relu(out) - - return out - - -MODULE_REGISTRY["single_resnet_block"] = { - "model_class": SingleResNetBlock, - "input_shapes": [(1, 64, 8, 8)], - "description": "Single ResNet block with skip connection", - "skip": True, -} - - -# ------------------------------------------------------------------------- -class TransformerBlock(nn.Module): - def __init__(self, embed_dim=256, num_heads=8, ff_dim=1024, dropout=0.1): - super().__init__() - self.embed_dim = embed_dim - self.num_heads = num_heads - - self.self_attn = nn.MultiheadAttention( - embed_dim=embed_dim, num_heads=num_heads, dropout=dropout, batch_first=True - ) - - self.norm1 = nn.LayerNorm(embed_dim) - self.norm2 = nn.LayerNorm(embed_dim) - - self.ffn = nn.Sequential( - nn.Linear(embed_dim, ff_dim), - nn.ReLU(), - nn.Dropout(dropout), - nn.Linear(ff_dim, embed_dim), - nn.Dropout(dropout), - ) - - def forward(self, x): - attn_output, _ = self.self_attn(x, x, x) - x = self.norm1(x + attn_output) - ff_output = self.ffn(x) - x = self.norm2(x + ff_output) - return x - - -MODULE_REGISTRY["transformer_block"] = { - "model_class": TransformerBlock, - "input_shapes": [(4, 32, 256)], - "description": "Single transformer block with multi-head attention and FFN", - "skip": True, -} - - # ============================================================================= # Helper Functions # ============================================================================= @@ -954,7 +751,7 @@ def _test_module_output_consistency( def run_test_in_directory(test_dir: Path) -> None: """Run the actual test logic in the given directory.""" - # Create model output directory: aoti_debug_data/_/ + # Create model output directory: metal_backend_module_outputs/_/ model_output_dir = test_dir / test_subdir_name model_output_dir.mkdir(parents=True, exist_ok=True) From 08346599720e32ec8d58bc5a7050603694c97003 Mon Sep 17 00:00:00 2001 From: Manuel Candales Date: Mon, 2 Feb 2026 17:26:39 -0500 Subject: [PATCH 6/6] Update [ghstack-poisoned] --- backends/apple/metal/tests/test_modules.py | 38 ++++++++++++++++++---- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/backends/apple/metal/tests/test_modules.py b/backends/apple/metal/tests/test_modules.py index 6a5eaeb9a53..59904bb494d 100644 --- a/backends/apple/metal/tests/test_modules.py +++ b/backends/apple/metal/tests/test_modules.py @@ -88,6 +88,14 @@ # - "rtol_": float - Override relative tolerance for specific dtype (e.g., "rtol_bfloat16") # - "skip": bool or str - Skip all tests for this module (True to skip, or string with reason) # - "skip_": bool or str - Skip tests for specific dtype (e.g., "skip_bfloat16") +# +# Model Parameter Initialization: +# Model parameters are initialized with their default dtype (typically float32) when the +# model class is instantiated. The parameters are then converted to the target dtype using +# model.to(dtype). For example: +# - nn.Parameter(torch.arange(20, dtype=torch.get_default_dtype()) creates float32 parameters +# - These are converted to bfloat16 when model.to(torch.bfloat16) is called +# MODULE_REGISTRY: Dict[str, Dict[str, Any]] = {} @@ -129,7 +137,9 @@ def forward(self, x: torch.Tensor, y: torch.Tensor): class MmWeightParam(nn.Module): def __init__(self): super().__init__() - self.weight = nn.Parameter(torch.arange(20, dtype=torch.float).reshape(4, 5)) + self.weight = nn.Parameter( + torch.arange(20, dtype=torch.get_default_dtype()).reshape(4, 5) + ) def forward(self, x: torch.Tensor): return x.mm(self.weight) @@ -451,7 +461,18 @@ def should_skip_model(model_name: str, dtype: torch.dtype) -> Tuple[bool, str]: def get_model_and_inputs( model_name: str, dtype: torch.dtype = torch.float32 ) -> Tuple[nn.Module, Tuple[torch.Tensor, ...]]: - """Get model and example inputs based on model name.""" + """Get model and example inputs based on model name. + + Note: Model parameters are initialized with their default dtype (typically float32) + during model instantiation, then converted to the target dtype using model.to(dtype). + + Args: + model_name: Name of the model to create + dtype: Target data type for the model (default: torch.float32) + + Returns: + Tuple of (model, example_inputs) + """ if model_name not in MODULE_REGISTRY: available_models = ", ".join(MODULE_REGISTRY.keys()) raise ValueError( @@ -462,7 +483,10 @@ def get_model_and_inputs( model_class = model_config["model_class"] input_shapes = model_config["input_shapes"] + # Create model with default parameter dtypes (typically float32) model = model_class().eval() + + # Convert model parameters to target dtype if specified if dtype is not None: model = model.to(dtype) @@ -493,17 +517,17 @@ def export_model_to_metal( return executorch_program -def export_model_to_files( +def export_model_to_pte( model: nn.Module, example_inputs: Tuple[torch.Tensor, ...], output_dir: Path, model_name: str, -) -> Tuple[Path, Path, torch.Tensor]: +) -> Tuple[Path, torch.Tensor]: """ - Export model to .pte and .ptd files, and compute expected output. + Export model to .pte file, and compute expected output. Returns: - Tuple of (pte_path, ptd_path, expected_output) + Tuple of (pte_path, expected_output) """ # Compute expected output using all-ones input (matching export_aoti_metal.py) all_ones_input = tuple(torch.ones_like(inp) for inp in example_inputs) @@ -756,7 +780,7 @@ def run_test_in_directory(test_dir: Path) -> None: model_output_dir.mkdir(parents=True, exist_ok=True) # Export model and get expected output - pte_path, expected_output = export_model_to_files( + pte_path, expected_output = export_model_to_pte( model, example_inputs, model_output_dir, model_name )