diff --git a/backends/nxp/backend/neutron_converter_manager.py b/backends/nxp/backend/neutron_converter_manager.py index 77f63176ac8..a89a639b40d 100644 --- a/backends/nxp/backend/neutron_converter_manager.py +++ b/backends/nxp/backend/neutron_converter_manager.py @@ -72,7 +72,9 @@ def verify_target(self, target: str): f"Target `{target}` is not a valid target. Must be one of `{valid_targets}`." ) - def convert(self, tflite_model: bytes, target: str) -> bytes: + def convert( + self, tflite_model: bytes, target: str, fetch_constants_to_sram: bool + ) -> bytes: # Neutron converter crashes if we provide invalid target -> verify. self.verify_target(target) @@ -82,6 +84,7 @@ def convert(self, tflite_model: bytes, target: str) -> bytes: cctx.compilationOpts.excludeGraphPasses = ( "HoistSliceAboveTranspose,MergeTranspose" ) + cctx.compilationOpts.fetchConstantsToSRAM = fetch_constants_to_sram # Try to use multiprocessing for isolation, but fall back to direct execution # if the environment doesn't support it (e.g., in sandcastle/build environments) diff --git a/backends/nxp/nxp_backend.py b/backends/nxp/nxp_backend.py index 83c304566d5..b5d508543a9 100644 --- a/backends/nxp/nxp_backend.py +++ b/backends/nxp/nxp_backend.py @@ -46,6 +46,7 @@ def __init__(self): self.operators_not_to_delegate: List[str] = [] self.neutron_converter_flavor = None self.use_neutron_for_format_conversion = True + self.fetch_constants_to_sram = False def _replace_colons(self, operator: str) -> str: """ @@ -60,6 +61,7 @@ def neutron_compile_spec( extra_flags: Optional[str] = None, operators_not_to_delegate: Optional[List[str]] = None, use_neutron_for_format_conversion: bool = True, + fetch_constants_to_sram: bool = False, ): """ Generate compile spec for Neutron NPU @@ -73,6 +75,8 @@ def neutron_compile_spec( use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to ensure that the IO matches the executorch partition, which will be delegated to Neutron. + fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights + from FLASH to SRAM. This should be used when the whole model does not fit into SRAM. """ self.neutron_converter_flavor = neutron_converter_flavor @@ -94,6 +98,8 @@ def neutron_compile_spec( self.use_neutron_for_format_conversion = use_neutron_for_format_conversion + self.fetch_constants_to_sram = fetch_constants_to_sram + return self def build(self): @@ -116,6 +122,10 @@ def build(self): "use_neutron_for_format_conversion", f"{self.use_neutron_for_format_conversion}".encode(), ), + CompileSpec( + "fetch_constants_to_sram", + f"{self.fetch_constants_to_sram}".encode(), + ), ] return self.compile_spec @@ -128,6 +138,7 @@ def generate_neutron_compile_spec( extra_flags: Optional[str] = None, operators_not_to_delegate: Optional[List[str]] = None, use_neutron_for_format_conversion: bool = True, + fetch_constants_to_sram: bool = False, ) -> List[CompileSpec]: return ( NeutronCompileSpecBuilder() @@ -137,6 +148,7 @@ def generate_neutron_compile_spec( extra_flags=extra_flags, operators_not_to_delegate=operators_not_to_delegate, use_neutron_for_format_conversion=use_neutron_for_format_conversion, + fetch_constants_to_sram=fetch_constants_to_sram, ) .build() ) @@ -160,6 +172,7 @@ def preprocess( # noqa C901 target = "" neutron_converter_flavor = "" use_neutron_for_format_conversion = None + fetch_constants_to_sram = False for spec in compile_spec: if spec.key == "output_format": output_format = spec.value.decode() @@ -171,6 +184,8 @@ def preprocess( # noqa C901 neutron_converter_flavor = spec.value.decode() if spec.key == "use_neutron_for_format_conversion": use_neutron_for_format_conversion = spec.value.decode() == "True" + if spec.key == "fetch_constants_to_sram": + fetch_constants_to_sram = spec.value.decode() == "True" # Check that the output format is set in the compile spec if not output_format: @@ -209,7 +224,7 @@ def preprocess( # noqa C901 ) neutron_model = NeutronConverterManager(neutron_converter_flavor).convert( - tflite_model, target + tflite_model, target, fetch_constants_to_sram ) # Dump the tflite file if logging level is enabled diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py index fa775b860f8..508e12121b4 100644 --- a/backends/nxp/tests/executorch_pipeline.py +++ b/backends/nxp/tests/executorch_pipeline.py @@ -98,6 +98,7 @@ def to_quantized_edge_program( custom_delegation_options=CustomDelegationOptions(), # noqa B008 get_quantizer_fn=None, use_neutron_for_format_conversion=True, + fetch_constants_to_sram=False, ) -> EdgeProgramManager: _neutron_target_spec = NeutronTargetSpec(target, neutron_converter_flavor) if get_quantizer_fn is None: @@ -125,6 +126,7 @@ def to_quantized_edge_program( operators_not_to_delegate=operators_not_to_delegate, neutron_converter_flavor=neutron_converter_flavor, use_neutron_for_format_conversion=use_neutron_for_format_conversion, + fetch_constants_to_sram=fetch_constants_to_sram, ) partitioners = [ NeutronPartitioner( diff --git a/backends/nxp/tests/test_neutron_converter_manager.py b/backends/nxp/tests/test_neutron_converter_manager.py index 5b105d7ef64..0d164c24bf2 100644 --- a/backends/nxp/tests/test_neutron_converter_manager.py +++ b/backends/nxp/tests/test_neutron_converter_manager.py @@ -1,4 +1,4 @@ -# Copyright 2024-2025 NXP +# Copyright 2024-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -14,7 +14,8 @@ NeutronConverterManager, ) from executorch.backends.nxp.backend.node_format_inference import NodeFormatInference -from executorch.backends.nxp.tests.models import Conv2dModule +from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program +from executorch.backends.nxp.tests.models import Conv2dModule, LinearModule def test_conv2d_neutron_conversion__default_flavor(): @@ -31,7 +32,7 @@ def test_conv2d_neutron_conversion__default_flavor(): ) neutron_converter_manager = NeutronConverterManager() - neutron_model = neutron_converter_manager.convert(tflite_model, "imxrt700") + neutron_model = neutron_converter_manager.convert(tflite_model, "imxrt700", False) assert len( neutron_model @@ -52,8 +53,30 @@ def test__conv2d_neutron_conversion__invalid_flavor(): ) with pytest.raises(RuntimeError) as excinfo: - _ = NeutronConverterManager("bad_flavor").convert(tflite_model, "imxrt700") + _ = NeutronConverterManager("bad_flavor").convert( + tflite_model, "imxrt700", False + ) assert "Neutron Converter module with flavor 'bad_flavor' not found." in str( excinfo ) + + +def test_conv2d_neutron_conversion__prefetching(mocker): + model = LinearModule(True) + input_shape = (1, 1, 32, 32) + + converter_spy = mocker.spy(NeutronConverterManager, "convert") + _ = to_quantized_edge_program( + model, input_shape, fetch_constants_to_sram=True + ).exported_program() + neutron_model_prefetch = converter_spy.spy_return + + _ = to_quantized_edge_program( + model, input_shape, fetch_constants_to_sram=False + ).exported_program() + neutron_model_regular = converter_spy.spy_return + + assert len(neutron_model_prefetch) != len( + neutron_model_regular + ), "The weight prefetching flag does not make a difference!" diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py index 02e266ae0a8..84ca436f785 100644 --- a/examples/nxp/aot_neutron_compile.py +++ b/examples/nxp/aot_neutron_compile.py @@ -214,6 +214,13 @@ def get_model_and_inputs_from_name(model_name: str): help="The model (including the Neutron backend) will use the channels last dim order, which can result in faster " "inference. The inputs must also be provided in the channels last dim order.", ) + parser.add_argument( + "--fetch_constants_to_sram", + required=False, + default=False, + action="store_true", + help="This feature allows running models which do not fit into SRAM by offloading them to an external memory.", + ) args = parser.parse_args() @@ -291,6 +298,7 @@ def get_model_and_inputs_from_name(model_name: str): args.target, operators_not_to_delegate=args.operators_not_to_delegate, neutron_converter_flavor=args.neutron_converter_flavor, + fetch_constants_to_sram=args.fetch_constants_to_sram, ) partitioners = ( [NeutronPartitioner(compile_spec, neutron_target_spec)] if args.delegate else []