pytorch · MartinPavella · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
diff --git a/.lintrunner.toml b/.lintrunner.toml
@@ -224,6 +224,7 @@ exclude_patterns = [
     'extension/llm/tokenizers',
     'extension/llm/tokenizers/**',
     'examples/cuda',
+    'examples/nxp',
     'kernels/portable',
     # File contains @generated
     'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h',

@@ -412,5 +412,5 @@ def call_operator(
             case _:
                 pass
 
-        result = super().call_operator(op, args, {}, meta)
+        result = super().call_operator(op, args, kwargs, meta)
         return result
@@ -4,10 +4,11 @@
 # LICENSE file in the root directory of this source tree.
 
 
-from typing import cast, List, Optional
+from typing import Callable, cast, Iterator, List, Optional
 
 from executorch.backends.arm.quantizer.arm_quantizer_utils import (
     _mark_node_as_quantized,
+    NodeFinder,
     PatternCheck,
     PatternQuantizer,
     SharedQspecQuantizer,
@@ -30,7 +31,7 @@
     CORTEX_M_QUANTIZER_SUPPORT_DICT,
 )
 from torch._ops import OpOverload
-from torch.fx import GraphModule
+from torch.fx import GraphModule, Node
 from torchao.quantization.pt2e.quantizer import ComposableQuantizer, Quantizer
 
 
@@ -43,9 +44,27 @@ def mark_node_as_annotated(
     _mark_node_as_quantized(node, input_qspec_map, output_qspec, is_quantized)
 
 
+class FilteredNodeFinder(NodeFinder):
+    """Wrapper that filters out nodes from another NodeFinder."""
+
+    def __init__(
+        self, base_finder: NodeFinder, filter_fn: Callable[[Node], bool] | None = None
+    ) -> None:
+        self.base_finder = base_finder
+        self.filter_fn = filter_fn
+
+    def find_nodes(self, model: GraphModule) -> Iterator[Node]:
+        nested_generator = self.base_finder.find_nodes(model)
+
+        if self.filter_fn is None:
+            return nested_generator
+
+        return filter(self.filter_fn, nested_generator)
+
+
 class CortexMQuantizer(ComposableQuantizer):
 
-    def __init__(self) -> None:
+    def __init__(self, filter_fn: Callable[[Node], bool] | None = None) -> None:
         conv_targets: set[OpOverload] = set()
         for key in CONV_OP_PATTERNS.keys() | CONV_TRANSPOSE_OP_PATTERNS.keys():
             conv_targets.update(key)
@@ -63,12 +82,14 @@ def __init__(self) -> None:
         quantizers: List[Quantizer] = [
             PatternQuantizer(
                 INT8_PER_CHANNEL_CONFIG,
-                node_finder=NodeTargetNodeFinder(list(conv_targets)),
+                node_finder=FilteredNodeFinder(
+                    NodeTargetNodeFinder(list(conv_targets)), filter_fn
+                ),
                 pattern_matcher=pattern_matcher,
             ),
             PatternQuantizer(
                 INT8_PER_TENSOR_CONFIG,
-                node_finder=GlobalNodeFinder(),
+                node_finder=FilteredNodeFinder(GlobalNodeFinder(), filter_fn),
                 pattern_matcher=pattern_matcher,
             ),
             SharedQspecQuantizer(),

@@ -0,0 +1,3 @@
+# Key into `node.meta` dictionary. If it's mapped to `True`, the node will not be quantized by the NeutronQuantizer, and
+#  it will not be selected for delegation by the NeutronPartitioner.
+NXP_NEUTRON_BACKEND_IGNORE = "NXP_NEUTRON_BACKEND_IGNORE"
@@ -124,6 +124,10 @@ def _is_quantize(node_: Node) -> bool:
     ]
 
 
+def is_qdq_op(node: Node) -> bool:
+    return _is_quantize(node) or _is_dequantize(node)
+
+
 def previous_non_qdq_node(node: Node, input_index: int = 0) -> Node | None:
     """Return the first node which is not a `quantize` or `dequantize`, found by traversing the graph backwards
     starting with the `node.args[input_index]`,

@@ -0,0 +1,186 @@
+# Copyright 2026 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.import copy
+
+import copy
+from typing import Callable
+
+import torch
+
+from executorch.backends.cortex_m.passes import CortexMPassManager
+from executorch.backends.nxp.backend.custom_delegation_options import (
+    CustomDelegationOptions,
+)
+from executorch.backends.nxp.backend.edge_helper import is_qdq_op
+from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
+from executorch.backends.nxp.edge_passes.neutron_edge_pass_manager import (
+    NeutronEdgePassManager,
+)
+from executorch.backends.nxp.imxrt700cm.imxrt700cm_quantizer import IMXRT700CMQuantizer
+from executorch.backends.nxp.neutron_partitioner import (
+    NeutronPartitioner,
+    NXP_DELEGATION_TAG,
+)
+from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec
+from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
+from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize
+from executorch.backends.nxp.tests.executorch_pipeline import (
+    get_random_calibration_inputs,
+    ModelInputSpec,
+    to_model_input_spec,
+    to_quantized_edge_program,
+)
+from executorch.exir import EdgeProgramManager, to_edge
+from executorch.exir.backend.partitioner import PartitionResult
+from executorch.exir.dialects.edge._ops import EdgeOpOverload
+from torch.fx import Node
+
+
+def lower_to_imxrt700cm(
+    model: torch.nn.Module,
+    input_spec: tuple[ModelInputSpec, ...] | tuple[int, ...] | list[tuple[int, ...]],
+    get_calibration_inputs_fn: Callable[
+        [tuple[ModelInputSpec, ...]], list[tuple[torch.Tensor, ...]]
+    ] = get_random_calibration_inputs,
+    target: str = "imxrt700",
+    remove_quant_io_ops: bool = False,
+    custom_delegation_options: CustomDelegationOptions = CustomDelegationOptions(),  # noqa B008
+    use_neutron_for_format_conversion: bool = True,
+    use_quant_state_dict: bool = True,
+    fetch_constants_to_sram: bool = False,
+    dump_kernel_selection_code: bool = False,
+) -> EdgeProgramManager:
+    """Lower model to hybrid Neutron + Cortex-M backend.
+
+    Pipeline:
+    1. Identify nodes not supported by Neutron.
+        1.1 Quantize a copy of the model with 1 dummy calibration tensor.
+        1.2 Run NeutronPartitioner to mark supported nodes.
+        1.3 Extract the names of the aten operator unsupported by neutron.
+    2. Quantize using a hybrid quantizer, which applies NeutronQuantizer to some nodes, and CortexMQuantizer to others.
+    3. Lower to edge using Neutron backend.
+    4. Run Cortex-M passes on the edge program to replace leftover nodes with Cortex-M operators.
+
+    NOTE: Some Cortex-M operators require the channels last dim order. So the provided `model` and `example_inputs`
+          should use the channels last memory format for best results.
+
+    TODO (Martin) The Cortex-M backend requires some aten nodes to be preserved in the edge program.
+     This is not yet implemented. (EIEX-805)
+    """
+    input_spec = to_model_input_spec(input_spec)
+
+    # Discover the names (stored in node.meta["torch_fn"][0]) of the aten operators which are not supported by Neutron.
+    # The Cortex-M backend will be used for these if possible.
+    cortex_m_designated_node_identifiers = _get_neutron_unsupported_node_identifiers(
+        model,
+        input_spec,
+        target,
+    )
+
+    # Use the standard Neutron lowering pipeline
+    edge_program_manager = to_quantized_edge_program(
+        model,
+        input_spec,
+        get_calibration_inputs_fn=get_calibration_inputs_fn,
+        target=target,
+        remove_quant_io_ops=remove_quant_io_ops,
+        custom_delegation_options=custom_delegation_options,
+        get_quantizer_fn=lambda: IMXRT700CMQuantizer(
+            NeutronTargetSpec(target), cortex_m_designated_node_identifiers
+        ),
+        use_neutron_for_format_conversion=use_neutron_for_format_conversion,
+        use_quant_state_dict=use_quant_state_dict,
+        fetch_constants_to_sram=fetch_constants_to_sram,
+        dump_kernel_selection_code=dump_kernel_selection_code,
+    )
+
+    # Apply Cortex-M passes to replace the remaining nodes with Cortex-M variants where possible.
+    pass_manager = CortexMPassManager(edge_program_manager.exported_program())
+    edge_program_manager._edge_programs["forward"] = pass_manager.transform()
+
+    return edge_program_manager
+
+
+def get_non_delegated_nodes(partition_result: PartitionResult) -> list[Node]:
+    """Return a list of nodes which were not marked by the NeutronPartitioner for delegation."""
+
+    def _is_compute_op(node: Node) -> bool:
+        # Return `True` for call functions which represent edge operators (not getitem or ExecutorchCallDelegate, ...)
+        # Also exclude quantize/dequantize operations.
+        return (
+            node.op == "call_function"
+            and isinstance(node.target, EdgeOpOverload)
+            and not is_qdq_op(node)
+        )
+
+    return list(
+        filter(
+            lambda n: _is_compute_op(n) and NXP_DELEGATION_TAG not in n.meta,
+            partition_result.tagged_exported_program.graph.nodes,
+        )
+    )
+
+
+def _get_neutron_unsupported_node_identifiers(
+    model: torch.nn.Module,
+    input_spec: tuple[ModelInputSpec, ...],
+    target: str,
+    use_quant_state_dict: bool = False,
+) -> set[str]:
+    """Identify nodes not supported by Neutron.
+        This is done by running quantization with dummy calibration data and then applying the NeutronPartitioner.
+
+    :param model: Input model to analyze.
+    :param input_spec: Tuple of objects containing information about the model inputs.
+    :param target: Neutron target to use for quantization.
+    :param use_quant_state_dict: If `True` the state dict from the quantized model will be used to assess operator
+                                  support by the NeutronPartitioner.
+    :return: Set of identifiers of nodes (stored in node.meta["torch_fn"][0]) which are not supported by Neutron.
+    """
+    example_inputs = tuple(
+        torch.rand(spec.shape, dtype=spec.dtype) for spec in input_spec
+    )
+    exir_program_aten = torch.export.export(model, example_inputs, strict=True)
+
+    # Make a deep copy for discovery
+    copied_program = copy.deepcopy(exir_program_aten)
+
+    # Use the example input to quantize the model. There is no need to have a representative calibration dataset.
+    #  Instead, we want to use just 1 sample as we only care about speed and which nodes are supported.
+    calibration_inputs = [example_inputs]
+
+    # Quantize with NeutronQuantizer
+    neutron_target_spec = NeutronTargetSpec(target)
+    copied_program_quantized = calibrate_and_quantize(
+        model=copied_program,
+        calibration_inputs=calibration_inputs,
+        quantizer=NeutronQuantizer(neutron_target_spec),
+    )
+
+    # Partition with Neutron
+    compile_spec = generate_neutron_compile_spec(target)
+
+    neutron_partitioner = NeutronPartitioner(
+        compile_spec,
+        neutron_target_spec,
+        post_quantization_state_dict=(
+            copied_program_quantized.state_dict() if use_quant_state_dict else None
+        ),
+    )
+
+    epm = to_edge(
+        torch.export.export(copied_program_quantized, example_inputs, strict=True)
+    )
+    epm = epm.transform(NeutronEdgePassManager())
+    partition_result = neutron_partitioner.partition(epm.exported_program())
+
+    # Identify edge compute operators which were not delegated to Neutron.
+    non_delegated_edge_nodes = get_non_delegated_nodes(partition_result)
+
+    # Extract the identifiers of these nodes, which can be used to identify the original aten nodes.
+    non_delegated_node_identifiers = {
+        n.meta["torch_fn"][0] for n in non_delegated_edge_nodes if "torch_fn" in n.meta
+    }
+
+    return non_delegated_node_identifiers
@@ -0,0 +1,104 @@
+# Copyright 2026 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.import copy
+
+import torch
+
+from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer
+from executorch.backends.nxp import NXP_NEUTRON_BACKEND_IGNORE
+from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
+from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
+from torch.fx import GraphModule, Node
+from torchao.quantization.pt2e.quantizer import Quantizer
+from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY
+
+
+class IMXRT700CMQuantizer(Quantizer):
 def to_quantized_edge_program( 
 def to_quantized_edge_program( 
 def to_quantized_edge_program( 
+    """Hybrid quantizer that uses NeutronQuantizer for Neutron supported ops and CortexMQuantizer for the rest."""
+
+    def __init__(
+        self,
+        target_spec: NeutronTargetSpec,
+        cortex_m_designated_node_identifiers: set[str],
+    ):
+        """
+        :param target_spec: Neutron target specification
+        :param cortex_m_designated_node_identifiers: Set of identifiers (stored in node.meta["torch_fn"][0]) of nodes
+                                                      that are not supported by Neutron. Nodes not in this set will use
+                                                      the NeutronQuantizer. Nodes in this set and nodes skipped by
+                                                      NeutronQuantizer will be quantized with the CortexMQuantizer.
+        """
+        super().__init__()
+        self.neutron_quantizer = NeutronQuantizer(target_spec)
+
+        def _should_quantize_with_cortex_m(n: Node) -> bool:
+            # The CortexMQuantizer should be used for nodes which were explicitly chosen, and for nodes which were not
+            #  quantized by the NeutronQuantizer.
+            return (
+                n.meta.get(NXP_NEUTRON_BACKEND_IGNORE, False)
+                or Q_ANNOTATION_KEY not in n.meta
+            )
+
+        self.cortex_m_quantizer = CortexMQuantizer(
+            filter_fn=_should_quantize_with_cortex_m
+        )
+
+        self.target_spec = target_spec
+        self.cortex_m_designated_node_identifiers = cortex_m_designated_node_identifiers
+
+    def annotate(self, model: GraphModule) -> GraphModule:
+        # Due to how SharedQuantizationSpecs are used, skipping select ops in NeutronQuantizer would cause errors.
+        # Instead, first NeutronQuantizer is used to annotate all nodes in the model (that it supports). Then, the
+        #  annotations are removed from nodes that should be handled by the Cortex-M backend, and the CortexMQuantizer
+        #  is applied to these nodes only.
+
+        # Apply NeutronQuantizer.
+        model = self.neutron_quantizer.annotate(model)
+
+        # Mark the nodes which will be handled by the Cortex-M backend.
+        # This mark is used by the CortexMQuantizer, and later by the NeutronPartitioner.
+        self._mark_nodes_to_be_quantized_by_cortex_m_quantizer(
+            model, self.cortex_m_designated_node_identifiers
+        )
+
+        # Remove the annotations from nodes selected for the Cortex-M quantizer.
+        for node in model.graph.nodes:
+            if node.op != "call_function":
+                continue
+            if not node.meta.get(NXP_NEUTRON_BACKEND_IGNORE, False):
+                continue  # Node should be quantized by NeutronQuantizer. All good.
+
+            # This node should be quantized using CortexMQuantizer. Remove the quantization annotation.
+            node.meta.pop(Q_ANNOTATION_KEY, None)
+            node.meta.pop("quantizer_matched", None)
+
+        # Apply CortexMQuantizer.
+        model = self.cortex_m_quantizer.annotate(model)
+
+        return model
+
+    def transform_for_annotation(
+        self, model: torch.fx.GraphModule
+    ) -> torch.fx.GraphModule:
+        model = self.neutron_quantizer.transform_for_annotation(model)
+        model = self.cortex_m_quantizer.transform_for_annotation(model)
+        return model
+
+    def validate(self, model: GraphModule) -> None:
+        self.neutron_quantizer.validate(model)
+        self.cortex_m_quantizer.validate(model)
+
+    # noinspection PyMethodMayBeStatic
+    def _mark_nodes_to_be_quantized_by_cortex_m_quantizer(
+        self, graph: GraphModule, cortex_m_designated_node_identifiers: set[str]
+    ):
+        """Mark nodes which were selected to be handled by to Cortex-M backend.
+        The mark is `node.meta[NXP_NEUTRON_BACKEND_IGNORE] = True`
+        """
+        for node in graph.graph.nodes:
+            if (torch_fn := node.meta.get("torch_fn")) is not None and torch_fn[
+                0
+            ] in cortex_m_designated_node_identifiers:
+                # This node was selected specifically for the Cortex-M backend.
+                node.meta[NXP_NEUTRON_BACKEND_IGNORE] = True