Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion backends/nxp/backend/neutron_converter_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ def verify_target(self, target: str):
f"Target `{target}` is not a valid target. Must be one of `{valid_targets}`."
)

def convert(self, tflite_model: bytes, target: str) -> bytes:
def convert(
self, tflite_model: bytes, target: str, fetch_constants_to_sram: bool
) -> bytes:
# Neutron converter crashes if we provide invalid target -> verify.
self.verify_target(target)

Expand All @@ -82,6 +84,7 @@ def convert(self, tflite_model: bytes, target: str) -> bytes:
cctx.compilationOpts.excludeGraphPasses = (
"HoistSliceAboveTranspose,MergeTranspose"
)
cctx.compilationOpts.fetchConstantsToSRAM = fetch_constants_to_sram

# Try to use multiprocessing for isolation, but fall back to direct execution
# if the environment doesn't support it (e.g., in sandcastle/build environments)
Expand Down
17 changes: 16 additions & 1 deletion backends/nxp/nxp_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(self):
self.operators_not_to_delegate: List[str] = []
self.neutron_converter_flavor = None
self.use_neutron_for_format_conversion = True
self.fetch_constants_to_sram = False

def _replace_colons(self, operator: str) -> str:
"""
Expand All @@ -60,6 +61,7 @@ def neutron_compile_spec(
extra_flags: Optional[str] = None,
operators_not_to_delegate: Optional[List[str]] = None,
use_neutron_for_format_conversion: bool = True,
fetch_constants_to_sram: bool = False,
):
"""
Generate compile spec for Neutron NPU
Expand All @@ -73,6 +75,8 @@ def neutron_compile_spec(
use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
ensure that the IO matches the executorch partition, which will be
delegated to Neutron.
fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights
from FLASH to SRAM. This should be used when the whole model does not fit into SRAM.
"""

self.neutron_converter_flavor = neutron_converter_flavor
Expand All @@ -94,6 +98,8 @@ def neutron_compile_spec(

self.use_neutron_for_format_conversion = use_neutron_for_format_conversion

self.fetch_constants_to_sram = fetch_constants_to_sram

return self

def build(self):
Expand All @@ -116,6 +122,10 @@ def build(self):
"use_neutron_for_format_conversion",
f"{self.use_neutron_for_format_conversion}".encode(),
),
CompileSpec(
"fetch_constants_to_sram",
f"{self.fetch_constants_to_sram}".encode(),
),
]

return self.compile_spec
Expand All @@ -128,6 +138,7 @@ def generate_neutron_compile_spec(
extra_flags: Optional[str] = None,
operators_not_to_delegate: Optional[List[str]] = None,
use_neutron_for_format_conversion: bool = True,
fetch_constants_to_sram: bool = False,
) -> List[CompileSpec]:
return (
NeutronCompileSpecBuilder()
Expand All @@ -137,6 +148,7 @@ def generate_neutron_compile_spec(
extra_flags=extra_flags,
operators_not_to_delegate=operators_not_to_delegate,
use_neutron_for_format_conversion=use_neutron_for_format_conversion,
fetch_constants_to_sram=fetch_constants_to_sram,
)
.build()
)
Expand All @@ -160,6 +172,7 @@ def preprocess( # noqa C901
target = ""
neutron_converter_flavor = ""
use_neutron_for_format_conversion = None
fetch_constants_to_sram = False
for spec in compile_spec:
if spec.key == "output_format":
output_format = spec.value.decode()
Expand All @@ -171,6 +184,8 @@ def preprocess( # noqa C901
neutron_converter_flavor = spec.value.decode()
if spec.key == "use_neutron_for_format_conversion":
use_neutron_for_format_conversion = spec.value.decode() == "True"
if spec.key == "fetch_constants_to_sram":
fetch_constants_to_sram = spec.value.decode() == "True"

# Check that the output format is set in the compile spec
if not output_format:
Expand Down Expand Up @@ -209,7 +224,7 @@ def preprocess( # noqa C901
)

neutron_model = NeutronConverterManager(neutron_converter_flavor).convert(
tflite_model, target
tflite_model, target, fetch_constants_to_sram
)

# Dump the tflite file if logging level is enabled
Expand Down
2 changes: 2 additions & 0 deletions backends/nxp/tests/executorch_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def to_quantized_edge_program(
custom_delegation_options=CustomDelegationOptions(), # noqa B008
get_quantizer_fn=None,
use_neutron_for_format_conversion=True,
fetch_constants_to_sram=False,
) -> EdgeProgramManager:
_neutron_target_spec = NeutronTargetSpec(target, neutron_converter_flavor)
if get_quantizer_fn is None:
Expand Down Expand Up @@ -125,6 +126,7 @@ def to_quantized_edge_program(
operators_not_to_delegate=operators_not_to_delegate,
neutron_converter_flavor=neutron_converter_flavor,
use_neutron_for_format_conversion=use_neutron_for_format_conversion,
fetch_constants_to_sram=fetch_constants_to_sram,
)
partitioners = [
NeutronPartitioner(
Expand Down
31 changes: 27 additions & 4 deletions backends/nxp/tests/test_neutron_converter_manager.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2024-2025 NXP
# Copyright 2024-2026 NXP
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
Expand All @@ -14,7 +14,8 @@
NeutronConverterManager,
)
from executorch.backends.nxp.backend.node_format_inference import NodeFormatInference
from executorch.backends.nxp.tests.models import Conv2dModule
from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
from executorch.backends.nxp.tests.models import Conv2dModule, LinearModule


def test_conv2d_neutron_conversion__default_flavor():
Expand All @@ -31,7 +32,7 @@ def test_conv2d_neutron_conversion__default_flavor():
)

neutron_converter_manager = NeutronConverterManager()
neutron_model = neutron_converter_manager.convert(tflite_model, "imxrt700")
neutron_model = neutron_converter_manager.convert(tflite_model, "imxrt700", False)

assert len(
neutron_model
Expand All @@ -52,8 +53,30 @@ def test__conv2d_neutron_conversion__invalid_flavor():
)

with pytest.raises(RuntimeError) as excinfo:
_ = NeutronConverterManager("bad_flavor").convert(tflite_model, "imxrt700")
_ = NeutronConverterManager("bad_flavor").convert(
tflite_model, "imxrt700", False
)

assert "Neutron Converter module with flavor 'bad_flavor' not found." in str(
excinfo
)


def test_conv2d_neutron_conversion__prefetching(mocker):
model = LinearModule(True)
input_shape = (1, 1, 32, 32)

converter_spy = mocker.spy(NeutronConverterManager, "convert")
_ = to_quantized_edge_program(
model, input_shape, fetch_constants_to_sram=True
).exported_program()
neutron_model_prefetch = converter_spy.spy_return

_ = to_quantized_edge_program(
model, input_shape, fetch_constants_to_sram=False
).exported_program()
neutron_model_regular = converter_spy.spy_return

assert len(neutron_model_prefetch) != len(
neutron_model_regular
), "The weight prefetching flag does not make a difference!"
8 changes: 8 additions & 0 deletions examples/nxp/aot_neutron_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,13 @@ def get_model_and_inputs_from_name(model_name: str):
help="The model (including the Neutron backend) will use the channels last dim order, which can result in faster "
"inference. The inputs must also be provided in the channels last dim order.",
)
parser.add_argument(
"--fetch_constants_to_sram",
required=False,
default=False,
action="store_true",
help="This feature allows running models which do not fit into SRAM by offloading them to an external memory.",
)

args = parser.parse_args()

Expand Down Expand Up @@ -291,6 +298,7 @@ def get_model_and_inputs_from_name(model_name: str):
args.target,
operators_not_to_delegate=args.operators_not_to_delegate,
neutron_converter_flavor=args.neutron_converter_flavor,
fetch_constants_to_sram=args.fetch_constants_to_sram,
)
partitioners = (
[NeutronPartitioner(compile_spec, neutron_target_spec)] if args.delegate else []
Expand Down
Loading