I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,301 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# Helper script that will check if the types and operators used in an ONNX model
# are supported by the pre-built ORT Mobile package.
import argparse
import logging
import pathlib
import sys
import onnx
from ..onnx_model_utils import ModelProtoWithShapeInfo, get_opsets_imported
from ..reduced_build_config_parser import parse_config
cpp_to_tensorproto_type = {
"float": 1,
"uint8_t": 2,
"int8_t": 3,
"uint16_t": 4,
"int16_t": 5,
"int32_t": 6,
"int64_t": 7,
"std::string": 8,
"bool": 9,
"MLFloat16": 10,
"double": 11,
"uint32_t": 12,
"uint64_t": 13,
"Complex64": 14, # not supported by ORT
"Complex128": 15, # not supported by ORT
"BFloat16": 16,
}
tensorproto_type_to_cpp = {v: k for k, v in cpp_to_tensorproto_type.items()}
def check_graph(graph, opsets, required_ops, global_types, special_types, unsupported_ops, logger):
"""
Check the graph and any subgraphs for usage of types or operators which we know are not supported.
:param graph: Graph to process.
:param opsets: Map of domain to opset version that the model imports.
:param required_ops: Operators that are included in the pre-built package.
:param global_types: Types globally enabled in the pre-built package.
:param special_types: Types that are always enabled for a subset of operators and are _usually_ supported but are
not guaranteed to be. We would need to add a lot of infrastructure to know for sure so
currently we treat them as supported.
:param unsupported_ops: Set of unsupported operators that were found.
:param logger: Logger for diagnostic output.
:return: Returns whether the graph uses unsupported operators or types.
"""
has_unsupported_types = False
value_info_map = {vi.name: vi for vi in graph.value_info}
def _is_type_supported(value_info, description):
is_supported = True
type_name = value_info.type.WhichOneof("value")
if type_name == "tensor_type":
t = value_info.type.tensor_type.elem_type
if t not in global_types and t not in special_types:
cpp_type = tensorproto_type_to_cpp[t]
logger.debug(f"Element type {cpp_type} of {description} is not supported.")
is_supported = False
else:
# we don't support sequences, map, sparse tensors, or optional types in the pre-built package
logger.debug(f"Data type {type_name} of {description} is not supported.")
is_supported = False
return is_supported
def _input_output_is_supported(value_info, input_output):
return _is_type_supported(value_info, f"graph {input_output} {value_info.name}")
# node outputs are simpler to check.
# node inputs have a much wider mix of types, some of which come from initializers and most likely are always
# enabled as we generally do type reduction on the user data input to the operator and not the weights/etc. which
# come from initializers.
def _node_output_is_supported(name):
is_supported = True
if name in value_info_map:
vi = value_info_map[name]
is_supported = _is_type_supported(vi, f"node output {name}")
else:
# we don't have type info so ignore
pass
return is_supported
for i in graph.input:
if not _input_output_is_supported(i, "input"):
has_unsupported_types = True
for o in graph.output:
if not _input_output_is_supported(o, "output"):
has_unsupported_types = True
for node in graph.node:
# required_ops are map of [domain][opset] to set of op_type names. '' == ai.onnx
domain = node.domain or "ai.onnx"
# special case Constant as we will convert to an initializer during model load
if domain == "ai.onnx" and node.op_type == "Constant":
continue
# some models don't have complete imports. use 1 as a default as that's valid for custom domains and should
# result in an error for any others. not sure why ONNX or ORT validation allows this though.
opset = opsets.get(domain, 1)
if (
domain not in required_ops
or opset not in required_ops[domain]
or node.op_type not in required_ops[domain][opset]
):
unsupported_ops.add(f"{domain}:{opset}:{node.op_type}")
for output_name in node.output:
if not _node_output_is_supported(output_name):
has_unsupported_types = True
# recurse into subgraph for control flow nodes (Scan/Loop/If)
for attr in node.attribute:
if attr.HasField("g"):
check_graph(attr.g, opsets, required_ops, global_types, special_types, unsupported_ops, logger)
return has_unsupported_types or unsupported_ops
def _get_global_tensorproto_types(op_type_impl_filter, logger: logging.Logger):
"""
Map the globally supported types (C++) to onnx.TensorProto.DataType values used in the model
See https://github.com/onnx/onnx/blob/1faae95520649c93ae8d0b403816938a190f4fa7/onnx/onnx.proto#L485
Additionally return a set of types we special case as being able to generally be considered as supported.
:param op_type_impl_filter: type filter from reduced build configuration parser
:param logger: Logger
:return: tuple of globally enabled types and special cased types
"""
global_cpp_types = op_type_impl_filter.global_type_list()
global_onnx_tensorproto_types = set()
for t in global_cpp_types:
if t in cpp_to_tensorproto_type:
global_onnx_tensorproto_types.add(cpp_to_tensorproto_type[t])
else:
logger.error(f"Error: Unexpected data type of {t} in package build config's globally enabled types.")
sys.exit(-1)
# a subset of operators require int32 and int64 to always be enabled, as those types are used for dimensions in
# shapes and indices.
# additionally we have a number of operators (e.g. Not, Where) that always require the use of bool.
# this _may_ mean values involving these types can be processed, but without adding a lot more code we don't know
# for sure.
special_types = [
cpp_to_tensorproto_type["int32_t"],
cpp_to_tensorproto_type["int64_t"],
cpp_to_tensorproto_type["bool"],
]
return global_onnx_tensorproto_types, special_types
def get_default_config_path():
# get default path to config that was used to create the pre-built package.
script_dir = pathlib.Path(__file__).parent
local_config = script_dir / "mobile_package.required_operators.config"
# if we're running in the ORT python package the file should be local. otherwise assume we're running from the
# ORT repo
if local_config.exists():
default_config_path = local_config
else:
ort_root = script_dir.parents[3]
default_config_path = (
ort_root / "tools" / "ci_build" / "github" / "android" / "mobile_package.required_operators.config"
)
return default_config_path
def run_check_with_model(
model_with_type_info: onnx.ModelProto, mobile_pkg_build_config: pathlib.Path, logger: logging.Logger
):
"""
Check if an ONNX model can be used with the ORT Mobile pre-built package.
:param model_with_type_info: ONNX model that has had ONNX shape inferencing run on to add type/shape information.
:param mobile_pkg_build_config: Configuration file used to build the ORT Mobile package.
:param logger: Logger for output
:return: True if supported
"""
if not mobile_pkg_build_config:
mobile_pkg_build_config = get_default_config_path()
enable_type_reduction = True
config_path = str(mobile_pkg_build_config.resolve(strict=True))
required_ops, op_type_impl_filter = parse_config(config_path, enable_type_reduction)
global_onnx_tensorproto_types, special_types = _get_global_tensorproto_types(op_type_impl_filter, logger)
# get the opset imports
opsets = get_opsets_imported(model_with_type_info)
# If the ONNX opset of the model is not supported we can recommend using our tools to update that first.
supported_onnx_opsets = set(required_ops["ai.onnx"].keys())
# we have a contrib op that is erroneously in the ai.onnx domain with opset 1. manually remove that incorrect value
supported_onnx_opsets.remove(1)
onnx_opset_model_uses = opsets["ai.onnx"]
if onnx_opset_model_uses not in supported_onnx_opsets:
logger.info(f"Model uses ONNX opset {onnx_opset_model_uses}.")
logger.info(f"The pre-built package only supports ONNX opsets {sorted(supported_onnx_opsets)}.")
logger.info(
"Please try updating the ONNX model opset to a supported version using "
"python -m onnxruntime.tools.onnx_model_utils.update_onnx_opset ..."
)
return False
unsupported_ops = set()
logger.debug(
"Checking if the data types and operators used in the model are supported in the pre-built ORT package..."
)
unsupported = check_graph(
model_with_type_info.graph,
opsets,
required_ops,
global_onnx_tensorproto_types,
special_types,
unsupported_ops,
logger,
)
if unsupported_ops:
logger.info("Unsupported operators:")
for entry in sorted(unsupported_ops):
logger.info(" " + entry) # noqa: G003
if unsupported:
logger.info("\nModel is not supported by the pre-built package due to unsupported types and/or operators.")
logger.info(
"Please see https://onnxruntime.ai/docs/install/#install-on-web-and-mobile for information "
"on what is supported in the pre-built package."
)
logger.info(
"The 'full' ORT package for Android (onnxruntime-android) or iOS (onnxruntime-{objc|c}) could be used, "
"or a custom build of ONNX Runtime will be required if binary size is critical. Please see "
"https://onnxruntime.ai/docs/build/custom.html for details on performing that."
)
else:
logger.info("Model should work with the pre-built package.")
logger.info("---------------\n")
return not unsupported
def run_check(model_path: pathlib.Path, mobile_pkg_build_config: pathlib.Path, logger: logging.Logger):
"""
Check if an ONNX model will be able to be used with the ORT Mobile pre-built package.
:param model_path: Path to ONNX model.
:param mobile_pkg_build_config: Configuration file used to build the ORT Mobile package.
:param logger: Logger for output
:return: True if supported
"""
logger.info(
f"Checking if pre-built ORT Mobile package can be used with {model_path} once model is "
"converted from ONNX to ORT format using onnxruntime.tools.convert_onnx_models_to_ort..."
)
model_file = model_path.resolve(strict=True)
# we need to run shape inferencing to populate that type info for node outputs.
# we will get warnings if the model uses ORT contrib ops (ONNX does not have shape inferencing for those),
# and shape inferencing will be lost downstream of those.
# TODO: add support for checking ORT format model as it will have full type/shape info for all nodes
model_wrapper = ModelProtoWithShapeInfo(model_file)
return run_check_with_model(model_wrapper.model_with_shape_info, mobile_pkg_build_config, logger)
def main():
parser = argparse.ArgumentParser(
description="Check if model can be run using the ONNX Runtime Mobile Pre-Built Package",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--config_path",
help="Path to required operators and types configuration used to build the pre-built ORT mobile package.",
required=False,
type=pathlib.Path,
default=get_default_config_path(),
)
parser.add_argument("model_path", help="Path to ONNX model to check", type=pathlib.Path)
args = parser.parse_args()
logger = logging.getLogger("default")
logger.setLevel(logging.INFO)
run_check(args.model_path, args.config_path, logger)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,31 @@
<!--
Keep in sync with doco generated from /docs/execution-providers/CoreML-ExecutionProvider.md on the gh_pages branch
-->
|Operator|Note|
|--------|------|
|ai.onnx:Add||
|ai.onnx:AveragePool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.|
|ai.onnx:Clip||
|ai.onnx:Concat||
|ai.onnx:Conv|Only 1D/2D Conv is supported.<br/>Bias if provided must be constant.|
|ai.onnx:ConvTranspose|Weight and bias must be constant.<br/>padding_type of SAME_UPPER/SAME_LOWER is not supported.<br/>kernel_shape must have default values.<br/>output_shape is not supported.<br/>output_padding must have default values.|
|ai.onnx.DepthToSpace|If 'mode' is 'CRD' the input must have a fixed shape.|
|ai.onnx:Div||
|ai.onnx:Gemm|Input B must be constant.|
|ai.onnx:GlobalAveragePool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.|
|ai.onnx:GlobalMaxPool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.|
|ai.onnx:GridSample|4D input.<br/>'mode' of 'linear' or 'zeros'.<br/>(mode==linear && padding_mode==reflection && align_corners==0) is not supported.|
|ai.onnx.LeakyRelu||
|ai.onnx:MatMul|Only support for transA == 0, alpha == 1.0 and beta == 1.0 is currently implemented.|
|ai.onnx:MaxPool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.|
|ai.onnx:Mul||
|ai.onnx:Pow|Only supports cases when both inputs are fp32.|
|ai.onnx:Relu||
|ai.onnx:Reshape||
|ai.onnx:Resize|See [resize_op_builder.cc](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc) implementation. There are too many permutations to describe the valid combinations.|
|ai.onnx.Slice|starts/ends/axes/steps must be constant initializers.|
|ai.onnx:Split||
|ai.onnx:Sub||
|ai.onnx:Sigmoid||
|ai:onnx:Tanh||
|ai.onnx:Transpose||

View File

@ -0,0 +1,43 @@
<!--
Keep in sync with doco generated from /docs/execution-providers/CoreML-ExecutionProvider.md on the gh_pages branch
-->
|Operator|Note|
|--------|------|
|ai.onnx:Add||
|ai.onnx:ArgMax||
|ai.onnx:AveragePool|Only 2D Pool is supported.|
|ai.onnx:BatchNormalization||
|ai.onnx:Cast||
|ai.onnx:Clip||
|ai.onnx:Concat||
|ai.onnx:Conv|Only 1D/2D Conv is supported.<br/>Weights and bias should be constant.|
|ai.onnx:DepthToSpace|Only DCR mode DepthToSpace is supported.|
|ai.onnx:Div||
|ai.onnx:Flatten||
|ai.onnx:Gather|Input `indices` with scalar value is not supported.|
|ai.onnx:Gemm|Input B should be constant.|
|ai.onnx:GlobalAveragePool|Only 2D Pool is supported.|
|ai.onnx:GlobalMaxPool|Only 2D Pool is supported.|
|ai.onnx:LeakyRelu||
|ai.onnx:LRN||
|ai.onnx:MatMul|Input B should be constant.|
|ai.onnx:MaxPool|Only 2D Pool is supported.|
|ai.onnx:Mul||
|ai.onnx:Pad|Only constant mode and last two dim padding is supported.<br/>Input pads and constant_value should be constant.<br/>If provided, axes should be constant.|
|ai.onnx:Pow|Only supports cases when both inputs are fp32.|
|ai.onnx:PRelu|Input slope should be constant.<br/>Input slope should either have shape [C, 1, 1] or have 1 element.|
|ai.onnx:Reciprocal||
|ai.onnx.ReduceSum||
|ai.onnx:Relu||
|ai.onnx:Reshape||
|ai.onnx:Resize|4D input.<br/>`coordinate_transformation_mode` == `asymmetric`.<br/>`mode` == `linear` or `nearest`.<br/>`nearest_mode` == `floor`.<br/>`exclude_outside` == false<br/>`scales` or `sizes` must be constant.|
|ai.onnx:Shape|Attribute `start` with non-default value is not supported.<br/>Attribute `end` is not supported.|
|ai.onnx:Sigmoid||
|ai.onnx:Slice|Inputs `starts`, `ends`, `axes`, and `steps` should be constant. Empty slice is not supported.|
|ai.onnx:Softmax||
|ai.onnx:Split|If provided, `splits` should be constant. num of outputs supported is at least 2.|
|ai.onnx:Squeeze||
|ai.onnx:Sqrt||
|ai.onnx:Sub||
|ai.onnx:Tanh||
|ai.onnx:Transpose||

View File

@ -0,0 +1,46 @@
# Android package for ORT Mobile operator and type reduction configuration
#
# The list of operators was generated from:
# - the ONNX operators use by the tf2onnx tflite converter
# - the operators used in a set of tflite models from tfhub, the tflite examples, and the mlperf mobile models
# - models were optimized with optimizations set to 'basic', 'extended' and 'all'
# - see the readme file for full details
# allow float, int8, uint8. operators that manipulate shapes or indices have int32 and int64 enabled internally.
!globally_allowed_types;float,int8_t,uint8_t
# ops used by the tf2onnx tflite converter.
ai.onnx;12,13,14,15;Abs,Add,And,ArgMax,ArgMin,AveragePool,Cast,Ceil,Clip,Concat,ConstantOfShape,Conv,ConvTranspose,Cos,CumSum,DepthToSpace,DequantizeLinear,Div,DynamicQuantizeLinear,Elu,Equal,Exp,Expand,Flatten,Floor,Gather,GatherND,Gemm,Greater,GreaterOrEqual,Identity,If,LRN,LeakyRelu,Less,LessOrEqual,Log,LogSoftmax,Loop,MatMul,Max,MaxPool,Mean,Min,Mul,Neg,NonMaxSuppression,NonZero,Not,Or,PRelu,Pad,Pow,QuantizeLinear,Range,Reciprocal,ReduceMax,ReduceMean,ReduceMin,ReduceProd,ReduceSum,Relu,Reshape,Resize,ReverseSequence,Round,ScatterND,Shape,Sigmoid,Sin,Size,Slice,Softmax,SpaceToDepth,Split,Sqrt,Squeeze,Sub,Sum,Tanh,ThresholdedRelu,Tile,TopK,Transpose,Unique,Unsqueeze,Where
# other ops found in test models
ai.onnx;12,13,14,15;Erf,GlobalAveragePool,InstanceNormalization,HardSigmoid,MatMulInteger,QLinearConv,QLinearMatMul
# Control flow ops
# - If and Loop are covered by the tflite converter list
# - Scan tends to be used in speech models (it's more efficient than Loop) so include it for support of those
ai.onnx;12,13,14,15;Scan
# Changed ONNX ops by opset version for the above ops. This list is to provide context as to how much was added
# for each additional opset we support.
#
# opset 13
# Abs,Add,ArgMax,ArgMin,Cast,Ceil,Clip,Concat,DepthToSpace,DequantizeLinear,Div,Equal,Erf,Exp,Expand,Flatten,Floor,
# Gather,GatherND,Gemm,Greater,Identity,If,LRN,Less,Log,LogSoftmax,Loop,MatMul,Max,Mean,Min,Mul,Neg,NonZero,Pad,
# Pow,QuantizeLinear,Reciprocal,ReduceMax,ReduceMean,ReduceMin,ReduceProd,ReduceSum,Relu,Reshape,Resize,
# ScatterND,Shape,Sigmoid,Size,Slice,Softmax,SpaceToDepth,Split,Sqrt,Squeeze,Sub,Sum,Tanh,Tile,Transpose,Unsqueeze
# opset 14
# Add,CumSum,Div,Identity,Mul,Relu,Reshape,Sub
# opset 15
# Pow,Shape
# internal ops added by optimizers
# Note: LayerNormalization is an internal op even though it is (incorrectly) registered in the ONNX domain.
ai.onnx;1;LayerNormalization
com.microsoft;1;DynamicQuantizeMatMul,FusedConv,FusedGemm,FusedMatMul,Gelu,MatMulIntegerToFloat,NhwcMaxPool,QLinearAdd,QLinearAveragePool,QLinearConv,QLinearGlobalAveragePool,QLinearMul,QLinearSigmoid
# NHWC transformer also uses this, so assuming it's valuable enough to include
com.microsoft;1;QLinearLeakyRelu
# Quantized contrib ops that are registered but no usage was found. Excluding for now.
# com.microsoft;1;DynamicQuantizeLSTM,QAttention

View File

@ -0,0 +1,58 @@
<!--
Keep in sync with doco generated from /docs/execution-providers/NNAPI-ExecutionProvider.md on the gh_pages branch
-->
|Operator|Note|
|--------|------|
|ai.onnx:Abs||
|ai.onnx:Add||
|ai.onnx:AveragePool|Only 2D Pool is supported.|
|ai.onnx:BatchNormalization||
|ai.onnx:Cast||
|ai.onnx:Clip||
|ai.onnx:Concat||
|ai.onnx:Conv|Only 2D Conv is supported.<br/>Weights and bias should be constant.|
|ai.onnx:DepthToSpace|Only DCR mode DepthToSpace is supported.|
|ai.onnx:DequantizeLinear|All quantization scales and zero points should be constant.|
|ai.onnx:Div||
|ai.onnx:Elu||
|ai.onnx:Exp||
|ai.onnx:Flatten||
|ai.onnx:Floor||
|ai.onnx:Gather|Input indices should be constant if not int32 type.|
|ai.onnx:Gemm|If input B is not constant, transB should be 1.|
|ai.onnx:GlobalAveragePool|Only 2D Pool is supported.|
|ai.onnx:GlobalMaxPool|Only 2D Pool is supported.|
|ai.onnx:Identity||
|ai.onnx:LeakyRelu||
|ai.onnx:Log||
|ai.onnx:LRN||
|ai.onnx:MatMul||
|ai.onnx:MaxPool|Only 2D Pool is supported.|
|ai.onnx:Max||
|ai.onnx:Min||
|ai.onnx:Mul||
|ai.onnx:Neg||
|ai.onnx:Pad|Only constant mode Pad is supported.<br/>Input pads and constant_value should be constant.<br/>Input pads values should be non-negative.|
|ai.onnx:Pow||
|ai.onnx:PRelu||
|ai.onnx:QLinearConv|Only 2D Conv is supported.<br/>Weights and bias should be constant.<br/>All quantization scales and zero points should be constant.|
|ai.onnx:QLinearMatMul|All quantization scales and zero points should be constant.|
|ai.onnx:QuantizeLinear|All quantization scales and zero points should be constant.|
|ai.onnx:ReduceMean||
|ai.onnx:Relu||
|ai.onnx:Reshape||
|ai.onnx:Resize|Only 2D Resize is supported.|
|ai.onnx:Sigmoid||
|ai.onnx:Sin||
|ai.onnx:Slice||
|ai.onnx:Softmax||
|ai.onnx:Split|Number of splits must evenly divide split axis size. Input split should be constant if provided.|
|ai.onnx:Sqrt||
|ai.onnx:Squeeze|Input axes should be constant.|
|ai.onnx:Sub||
|ai.onnx:Tanh||
|ai.onnx:Transpose||
|ai.onnx:Unsqueeze|Input axes should be constant.|
|com.microsoft:QLinearAdd|All quantization scales and zero points should be constant.|
|com.microsoft:QLinearAveragePool|Only 2D Pool is supported.<br/>All quantization scales and zero points should be constant.|
|com.microsoft:QLinearSigmoid|All quantization scales and zero points should be constant.|

View File

@ -0,0 +1,739 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
from __future__ import annotations
import argparse
import logging
import os
import pathlib
import tempfile
from collections import deque
from enum import IntEnum
import onnx
from ..onnx_model_utils import ModelProtoWithShapeInfo, get_producer_consumer_maps, is_fixed_size_tensor, optimize_model
class _SupportedOpsChecker:
"""
Class to process the md file with list of supported ops and caveats for an execution provider.
e.g. /tools/ci_build/github/android/nnapi_supported_ops.md
/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md
/tools/ci_build/github/apple/coreml_supported_neuralnetwork_ops.md
"""
def __init__(self, filename):
self._filename = filename
self._ops = {} # op to caveats
self._ops_seen = set()
with open(filename) as f:
for line in f:
# we're looking for a markdown table with 2 columns. first is op name. second is caveats
# op name is domain:op
if line.startswith("|"):
pieces = line.strip().split("|")
if len(pieces) == 4: # pre-first '|'. op, caveat, post-last '|'
domain_op = pieces[1]
caveat = pieces[2]
caveat = caveat.replace("<br/>", " ") # remove some HTML tags
# skip lines that don't have the ':' which separates the domain and op
# e.g. the table header will fail this check
if ":" in domain_op:
self._ops[domain_op] = caveat
def is_op_supported(self, node):
domain = node.domain if node.domain else "ai.onnx"
domain_op = domain + ":" + node.op_type
is_supported = domain_op in self._ops
if is_supported:
self._ops_seen.add(domain_op)
return is_supported
def get_caveats(self):
caveats = []
for op in sorted(self._ops_seen):
caveat = self._ops[op]
if caveat:
caveats.append(f"{op}:{caveat}")
return caveats
class PartitioningInfo:
class TryWithEP(IntEnum):
NO = (0,)
MAYBE = (1,)
YES = 2
def __init__(
self,
num_nodes: int,
num_supported_nodes: int,
num_partitions: int,
supported_ops_checker: _SupportedOpsChecker,
supported_groups: list[onnx.NodeProto],
unsupported_ops: set[str],
nodes_unsupported_due_to_op: int,
nodes_unsupported_due_to_dynamic_input: int,
num_unsupported_nodes_due_to_rank: int,
ops_with_unsupported_rank: set[str],
):
self.num_nodes = num_nodes
self.num_supported_nodes = num_supported_nodes
self.num_partitions = num_partitions
self.supported_ops_checker = supported_ops_checker
self.supported_groups = supported_groups
self.unsupported_ops = unsupported_ops
self.nodes_unsupported_due_to_op = nodes_unsupported_due_to_op
self.nodes_unsupported_due_to_dynamic_input = nodes_unsupported_due_to_dynamic_input
self.num_unsupported_nodes_due_to_rank = num_unsupported_nodes_due_to_rank
self.ops_with_unsupported_rank = ops_with_unsupported_rank
self.num_subgraphs = 0
self.num_nodes_in_subgraphs = 0
def merge(self, other: PartitioningInfo):
"""
Merge the information from another PartitioningInfo instance into this one.
"""
self.num_nodes += other.num_nodes
self.num_supported_nodes += other.num_supported_nodes
self.num_partitions += other.num_partitions
self.supported_groups.extend(other.supported_groups)
self.unsupported_ops.update(other.unsupported_ops)
self.nodes_unsupported_due_to_op += other.nodes_unsupported_due_to_op
self.nodes_unsupported_due_to_dynamic_input += other.nodes_unsupported_due_to_dynamic_input
self.num_unsupported_nodes_due_to_rank += other.num_unsupported_nodes_due_to_rank
self.ops_with_unsupported_rank.update(other.ops_with_unsupported_rank)
# hard assumption that we merge into the main graph partitioning info
self.num_subgraphs += 1
self.num_nodes_in_subgraphs += other.num_nodes
def suitability(self):
# semi-arbitrary choices that err on the side of MAYBE.
# having 1 partition is always preferred, but if that is small it may not be useful.
# having 2 partitions may be okay if they cover most nodes
# more than 2 partitions and the device copy cost is almost guaranteed to outweigh the benefit of using the NPU
# NOTE: This assumes the EP is not CPU based and there is device copy overhead to consider
pct_supported = self.num_supported_nodes / self.num_nodes * 100
if self.num_partitions == 1:
if pct_supported > 75:
return PartitioningInfo.TryWithEP.YES
elif pct_supported > 50:
return PartitioningInfo.TryWithEP.MAYBE
else:
return PartitioningInfo.TryWithEP.NO
if self.num_partitions == 2:
if pct_supported > 75:
return PartitioningInfo.TryWithEP.MAYBE
else:
return PartitioningInfo.TryWithEP.NO
return PartitioningInfo.TryWithEP.NO
def print_analysis(self, logger: logging.Logger, ep_name: str):
"""
Analyze the partitioning information and log the analysis
:param logger: Logger to use
:param ep_name: Execution provider name to use in the log messages
"""
logger.info(
f"{self.num_partitions} partitions with a total of {self.num_supported_nodes}/{self.num_nodes} "
f"nodes can be handled by the {ep_name} EP."
)
if self.supported_groups:
logger.info(
f'\tPartition sizes: [{", ".join([str(len(partition)) for partition in self.supported_groups])}]'
)
# dump full groups if debug output is enabled
for group in self.supported_groups:
logger.debug(f'Nodes in group: {",".join([f"{node.op_type}:{node.name}" for node in group])}')
logger.info(f"Unsupported nodes due to operator={self.nodes_unsupported_due_to_op}")
if self.unsupported_ops:
logger.info(f'\tUnsupported ops: {",".join(sorted(self.unsupported_ops))}')
caveats = self.supported_ops_checker.get_caveats()
if caveats:
indent = " " * 5
logger.info(
"\tCaveats that have not been checked and may result in a node not actually being supported: "
f'{"".join([os.linesep + indent + caveat for caveat in caveats])}'
)
if self.nodes_unsupported_due_to_dynamic_input:
logger.info(
"Unsupported nodes due to input having a dynamic shape=%d",
self.nodes_unsupported_due_to_dynamic_input,
)
if self.num_unsupported_nodes_due_to_rank:
logger.info(f"Unsupported nodes due to rank of input data={self.num_unsupported_nodes_due_to_rank}")
logger.info(f"\tOps with unsupported rank: {','.join(sorted(self.ops_with_unsupported_rank))}")
if self.num_subgraphs > 0:
# TODO: CoreML has a flag. NNAPI doesn't. Either should be able to support a subgraph when treated as a
# separate graph (only extra detail would be making sure implicit inputs are handled).
# Merging the subgraph into the parent graph would be more complex.
# e.g. for CoreML we could potentially convert Loop to while_loop and If to cond if the subgraphs in the
# control flow node are fully supported.
# NNAPI also has While and If.
# It most likely will be necessary to support merging in If nodes with fully supported subgraphs,
# as the subgraphs in those are often very simple, so the performance cost of going to the CPU EP and back
# is high.
logger.info(
f"{self.num_nodes_in_subgraphs} nodes are in {self.num_subgraphs} subgraphs. "
"Check EP as to whether subgraphs are supported."
)
pct_nodes_using_ep = self.num_supported_nodes / self.num_nodes * 100
if self.num_partitions == 0:
logger.info(f"{ep_name} cannot run any nodes in this model.")
elif self.num_partitions == 1:
if pct_nodes_using_ep > 75:
logger.info(
f"{ep_name} should work well for this model as there is one partition "
f"covering {pct_nodes_using_ep:.1f}% of the nodes in the model."
)
elif pct_nodes_using_ep > 50:
logger.info(
f"{ep_name} may work well for this model, however only {pct_nodes_using_ep:.1f}% of nodes "
"will use it. Performance testing is required to validate."
)
else:
logger.info(
f"{ep_name} will probably not work will for this model as only {pct_nodes_using_ep:.2f}% "
"of nodes will use it."
)
elif self.num_partitions == 2 and pct_nodes_using_ep > 75:
logger.info(
f"{ep_name} can be considered for this model as there are two partitions "
f"covering {pct_nodes_using_ep:.1f}% of the nodes. "
"Performance testing is required to validate."
)
else:
logger.info(
f"{ep_name} is not recommended with this model as there are {self.num_partitions} partitions "
f"covering {pct_nodes_using_ep:.1f}% of the nodes in the model. "
"This will most likely result in worse performance than just using the CPU EP."
)
def _check_partitioning_for_graph(
graph: onnx.GraphProto,
node_to_producers: dict[onnx.NodeProto, set[onnx.NodeProto]],
node_to_consumers: dict[onnx.NodeProto, set[onnx.NodeProto]],
supported_ops_checker: _SupportedOpsChecker,
outer_scope_initializers: set[str],
require_fixed_input_sizes: bool,
value_info: dict[str, onnx.ValueInfoProto],
max_rank: int = 999, # max rank if EP has a limitation
):
# initializers have fixed sizes.
initializers = [i.name for i in graph.initializer]
def _is_fixed_shape_value(value):
if value in value_info:
return is_fixed_size_tensor(value_info[value])
if value in initializers or value in outer_scope_initializers:
return True
# if something has an unknown shape (e.g. something downstream of a Reshape with dynamic input for the shape)
# it won't have an entry in value_info
return False
#
# Replicate logic from /onnxruntime/core/providers/partitioning_utils.cc:CreateSupportedPartitionNodeGroups
# to roughly estimate number of partitions for nodes that is_node_supported_fn returns true for.
#
# We keep the structure and variable names as close as possible to the C++ implementation to simplify keeping them
# in sync if future updates are needed.
#
# NOTE: CreateSupportedPartitionNodeGroups was recently updated to be QDQ aware so that partitions did not split
# QDQ node groups. This code does not need to be QDQ aware as splitting a QDQ node group does not affect the total
# number of partitions or supported nodes.
#
# we don't currently support a callback for additional group closure checks in the python implementation
on_group_closed_fn = None
supported_groups = []
# number of inputs from unprocessed nodes (in-degree) per node
in_degree = {}
# nodes that are ready to process
nodes_to_process = deque() # deque of Node instances
# nodes that will be processed when considering the next partition node group
nodes_to_process_with_next_group = deque()
# initialize in-degrees and find root nodes
for node in graph.node:
node_input_edge_count = len(node_to_producers[node]) if node in node_to_producers else 0
in_degree[node] = node_input_edge_count
if node_input_edge_count == 0:
# node is only dependent on graph input or initializers
nodes_to_process.append(node)
supported_group = []
# the partition node group's border is the aggregate of its nodes' output nodes
supported_group_border = set()
num_supported_nodes = 0
num_unsupported_nodes_due_to_op = 0
num_unsupported_nodes_due_to_dynamic_input = 0
num_unsupported_nodes_due_to_rank = 0
unsupported_ops = set()
ops_with_unsupported_rank = set()
def close_group():
if supported_group:
keep_partition = not on_group_closed_fn or on_group_closed_fn(supported_group)
if keep_partition:
supported_groups.append(supported_group.copy())
supported_group.clear()
supported_group_border.clear()
while nodes_to_process or nodes_to_process_with_next_group:
if not nodes_to_process:
close_group()
nodes_to_process = nodes_to_process_with_next_group
nodes_to_process_with_next_group = deque()
continue
node = nodes_to_process.popleft()
is_op_supported = supported_ops_checker.is_op_supported(node)
is_input_shape_supported = not require_fixed_input_sizes or all(_is_fixed_shape_value(i) for i in node.input)
is_rank_supported = True
if value_info:
for node_input in node.input:
if node_input and node_input in value_info and value_info[node_input].type.HasField("tensor_type"):
input_rank = len(value_info[node_input].type.tensor_type.shape.dim)
if input_rank > max_rank:
is_rank_supported = False
break
# special-case if we can infer the rank from the length of the 'perms' Transpose attribute
# e.g. this works with SegmentAnything where dynamic Reshape operators result in no shape info.
if node.op_type == "Transpose" and len(node.attribute[0].ints) > max_rank:
is_rank_supported = False
is_node_supported = is_op_supported and is_input_shape_supported and is_rank_supported
if not is_node_supported:
if node in supported_group_border:
# an unsupported node on the border will be processed after the current partition node group
# so skip any additional processing/counting here
nodes_to_process_with_next_group.append(node)
continue
if not is_op_supported:
unsupported_ops.add(f'{node.domain if node.domain else "ai.onnx"}:{node.op_type}')
num_unsupported_nodes_due_to_op += 1
if not is_input_shape_supported:
num_unsupported_nodes_due_to_dynamic_input += 1
if not is_rank_supported:
num_unsupported_nodes_due_to_rank += 1
ops_with_unsupported_rank.add(f'{node.domain if node.domain else "ai.onnx"}:{node.op_type}')
if is_node_supported:
num_supported_nodes += 1
# add node to the partition node group
supported_group.append(node)
# remove node from the border and add its outputs to the border
if node in supported_group_border:
supported_group_border.remove(node)
# for each consumer node add to supported_group_border
if node in node_to_consumers:
for consumer in node_to_consumers[node]:
supported_group_border.add(consumer)
# adjust in-degrees of the node outputs and add any new nodes to process
if node in node_to_consumers:
for consumer in node_to_consumers[node]:
consumer_node_in_degree = in_degree[consumer]
consumer_node_in_degree -= 1
if consumer_node_in_degree == 0:
nodes_to_process.append(consumer)
in_degree[consumer] = consumer_node_in_degree
close_group()
num_nodes = len(graph.node)
num_partitions = len(supported_groups)
info = PartitioningInfo(
num_nodes,
num_supported_nodes,
num_partitions,
supported_ops_checker,
supported_groups,
unsupported_ops,
num_unsupported_nodes_due_to_op,
num_unsupported_nodes_due_to_dynamic_input,
num_unsupported_nodes_due_to_rank,
ops_with_unsupported_rank,
)
return info
def check_partitioning(
main_graph: onnx.GraphProto,
supported_ops_checker: _SupportedOpsChecker,
require_fixed_input_sizes: bool,
max_rank: int = 999,
) -> PartitioningInfo:
"""
Estimate the partitions the graph will be split into for nodes that is_node_supported_fn returns true for.
The check on whether a node is supported is purely based on the operator type. Additional limitations
(e.g. NNAPI EP only supports 2D Conv) are not checked, so partitions may not be 100% accurate. The limitations
for operators in the partitions are printed so the user can manually check.
:param main_graph: Graph to process
:param supported_ops_checker: Checker with info on supported ops.
:param require_fixed_input_sizes: If True, require that the inputs to a potentially supported node are fixed size
tensors for it to be considered as supported. This requires
onnx.shape_inference.infer_shapes to have been run on the model to populate the
shape information.
If False, shapes are ignored during the check.
:param max_rank: Set if EP has a limitation on the rank of tensors it supports.
:return PartitioningInfo instance with details
"""
if require_fixed_input_sizes and len(main_graph.value_info) == 0 and len(main_graph.node) > 1:
raise ValueError("Run onnx.shape_inference.infer_shapes on the model to populate the shape information.")
# create lookup map from ValueInfo for efficiency
def _update_value_info(graph: onnx.GraphProto, value_to_shape: dict[str, onnx.ValueInfoProto]):
for v in graph.input:
value_to_shape[v.name] = v
for v in graph.output:
value_to_shape[v.name] = v
for v in graph.value_info:
value_to_shape[v.name] = v
# the producer/consumer maps are for the entire model
node_to_producers, node_to_consumers = get_producer_consumer_maps(main_graph)
def _check_graph(
graph: onnx.GraphProto,
outer_scope_value_info: dict[str, onnx.ValueInfoProto] | None,
outer_scope_initializers: set[str] | None = None,
partitioning_info: PartitioningInfo | None = None,
) -> PartitioningInfo:
if outer_scope_value_info is not None:
# extend value info if we're using it. we replace any value shadowed with a local one
value_info = outer_scope_value_info.copy()
_update_value_info(graph, value_info)
else:
value_info = {}
if outer_scope_initializers is None:
outer_scope_initializers = set()
info = _check_partitioning_for_graph(
graph,
node_to_producers,
node_to_consumers,
supported_ops_checker,
outer_scope_initializers,
require_fixed_input_sizes,
value_info,
max_rank,
)
if partitioning_info:
# merge in subgraph info
partitioning_info.merge(info)
else:
# main graph info
partitioning_info = info
# setup outer scope initializers. we copy the input set as a model may have multiple subgraphs
# on multiple levels, so we need to keep the set for each descent separate
subgraph_outer_scope_initializers = set(outer_scope_initializers)
for initializer in graph.initializer:
subgraph_outer_scope_initializers.add(initializer.name)
for node in graph.node:
# recurse into nodes with subgraphs
for attr in node.attribute:
if attr.HasField("g"):
subgraph = attr.g
partitioning_info = _check_graph(
subgraph, value_info, subgraph_outer_scope_initializers, partitioning_info
)
return partitioning_info
aggregated_partitioning_info = _check_graph(main_graph, {} if require_fixed_input_sizes else None)
return aggregated_partitioning_info
def _check_ep_partitioning(
model: onnx.ModelProto, supported_ops_config: pathlib.Path, require_fixed_input_sizes: bool, max_rank: int = 999
):
supported_ops = _SupportedOpsChecker(supported_ops_config)
partition_info = check_partitioning(model.graph, supported_ops, require_fixed_input_sizes, max_rank)
return partition_info
def check_nnapi_partitions(model, require_fixed_input_sizes: bool):
# if we're running in the ORT python package the file should be local. otherwise assume we're running from the
# ORT repo
script_dir = pathlib.Path(__file__).parent
local_config = script_dir / "nnapi_supported_ops.md"
if local_config.exists():
config_path = local_config
else:
ort_root = script_dir.parents[3]
config_path = ort_root / "tools" / "ci_build" / "github" / "android" / "nnapi_supported_ops.md"
return _check_ep_partitioning(model, config_path, require_fixed_input_sizes)
def check_coreml_partitions(model: onnx.ModelProto, require_fixed_input_sizes: bool, config_filename: str):
# if we're running in the ORT python package the file should be local. otherwise assume we're running from the
# ORT repo
script_dir = pathlib.Path(__file__).parent
local_config = script_dir / config_filename
if local_config.exists():
config_path = local_config
else:
ort_root = script_dir.parents[3]
config_path = ort_root / "tools" / "ci_build" / "github" / "apple" / config_filename
max_rank = 5
return _check_ep_partitioning(model, config_path, require_fixed_input_sizes, max_rank)
def check_shapes(graph: onnx.GraphProto, logger: logging.Logger | None = None):
"""
Check the shapes of graph inputs, values and graph outputs to determine if they have static or dynamic sizes.
NNAPI does not support dynamically sized values. CoreML does, but it will most likely cost performance.
:param graph: Graph to check. If shape inferencing has been run the checks on values will be meaningful.
:param logger: Optional logger for diagnostic information.
:return: Tuple of List of inputs with dynamic shapes, Number of dynamic values found
"""
# it's OK if the input is dynamically sized and we do a Resize early to a fixed size.
# it's not good if lots of ops have dynamic inputs
num_fixed_values = 0
num_dynamic_values = 0
dynamic_inputs = []
for i in graph.input:
if not is_fixed_size_tensor(i):
dynamic_inputs.append(i)
# split/join to remove repeated whitespace and newlines from str(i)
if logger:
logger.info(f"Input is not a fixed size tensor: {' '.join(str(i).split())}")
num_dynamic_values += 1
else:
num_fixed_values += 1
dynamic_outputs = []
for o in graph.output:
if not is_fixed_size_tensor(o):
dynamic_outputs.append(o)
if logger:
logger.info(f"Output is not a fixed size tensor: {' '.join(str(o).split())}")
num_dynamic_values += 1
else:
num_fixed_values += 1
# check we have value info.
# special case some test graphs with a single node which only have graph input and output values, and
# a model where all inputs are dynamic (results in no value_info)
if not graph.value_info and not (len(graph.node) == 1 or len(dynamic_inputs) == len(graph.input)):
logger.warning(
"Unable to check shapes within model. "
"ONNX shape inferencing should be run on the model prior to checking."
)
for vi in graph.value_info:
if is_fixed_size_tensor(vi):
num_fixed_values += 1
else:
num_dynamic_values += 1
if logger:
logger.info(
f"Num values with fixed shape={num_fixed_values}. Num values with dynamic shape={num_dynamic_values}"
)
if dynamic_inputs:
if dynamic_outputs:
logger.info(
"Model has dynamic inputs and outputs. Consider re-exporting model with fixed sizes "
"if NNAPI or CoreML can be used with this model."
)
else:
logger.info(
"""Model has dynamically sized inputs but fixed sized outputs.
If the sizes become fixed early in the model (e.g. pre-processing of a dynamic input size
results in a fixed input size for the majority of the model) performance with NNAPI and CoreML,
if applicable, should not be significantly impacted."""
)
return dynamic_inputs, num_dynamic_values
def checker(model_path: pathlib.Path, logger: logging.Logger):
model_with_shape_info_wrapper = ModelProtoWithShapeInfo(model_path)
model_with_shape_info = model_with_shape_info_wrapper.model_with_shape_info
dynamic_inputs, num_dynamic_values = check_shapes(model_with_shape_info.graph)
def check_ep(ep_name, checker_func):
logger.info(f"Checking {ep_name}")
# check with shape info first so supported nodes takes into account values with dynamic shapes
require_fixed_input_sizes = True
partition_info = checker_func(model_with_shape_info, require_fixed_input_sizes)
if logger.getEffectiveLevel() <= logging.INFO:
partition_info.print_analysis(logger, ep_name)
suitability = partition_info.suitability()
logger.info(f"Model should perform well with {ep_name} as is: {suitability.name}")
if suitability != PartitioningInfo.TryWithEP.YES and dynamic_inputs:
logger.info("--------")
logger.info("Checking if model will perform better if the dynamic shapes are fixed...")
require_fixed_input_sizes = False
partition_info_with_fixed_shapes = checker_func(model_with_shape_info, require_fixed_input_sizes)
if logger.getEffectiveLevel() <= logging.INFO:
# analyze and log detailed info
logger.info("Partition information if the model was updated to make the shapes fixed:")
partition_info_with_fixed_shapes.print_analysis(logger, ep_name)
fixed_shape_suitability = partition_info_with_fixed_shapes.suitability()
logger.info(
f"Model should perform well with {ep_name} if modified to have fixed input shapes: "
f"{fixed_shape_suitability.name}"
)
if fixed_shape_suitability != PartitioningInfo.TryWithEP.NO:
logger.info("Shapes can be altered using python -m onnxruntime.tools.make_dynamic_shape_fixed")
if fixed_shape_suitability.value > suitability.value:
suitability = fixed_shape_suitability
logger.info("================")
logger.info("")
return suitability
nnapi_suitability = check_ep("NNAPI", check_nnapi_partitions)
# Check for NeuralNetwork CoreML model
def check_nn_coreml(model: onnx.ModelProto, require_fixed_input_sizes):
return check_coreml_partitions(model, require_fixed_input_sizes, "coreml_supported_neuralnetwork_ops.md")
# Check for MLProgram CoreML model
def check_mlprogram_coreml(model: onnx.ModelProto, require_fixed_input_sizes):
return check_coreml_partitions(model, require_fixed_input_sizes, "coreml_supported_mlprogram_ops.md")
coreml_nn_suitability = check_ep("CoreML NeuralNetwork", check_nn_coreml)
coreml_mlprogram_suitability = check_ep("CoreML MLProgram", check_mlprogram_coreml)
if (
nnapi_suitability != PartitioningInfo.TryWithEP.YES
or coreml_nn_suitability != PartitioningInfo.TryWithEP.YES
or coreml_mlprogram_suitability != PartitioningInfo.TryWithEP.YES
) and logger.getEffectiveLevel() > logging.INFO:
logger.info("Re-run with log level of INFO for more details on the NNAPI/CoreML issues.")
return (
nnapi_suitability != PartitioningInfo.TryWithEP.NO
or coreml_nn_suitability != PartitioningInfo.TryWithEP.NO
or coreml_mlprogram_suitability != PartitioningInfo.TryWithEP.NO
)
def analyze_model(model_path: pathlib.Path, skip_optimize: bool = False, logger: logging.Logger | None = None):
"""
Analyze the provided model to determine if it's likely to work well with the NNAPI or CoreML Execution Providers
:param model_path: Model to analyze.
:param skip_optimize: Skip optimizing to BASIC level before checking. When exporting to ORT format we will do this
optimization..
:param logger: Logger for output
:return: True if either the NNAPI or CoreML Execution Providers may work well with this model.
"""
if not logger:
logger = logging.getLogger("usability_checker")
logger.setLevel(logging.INFO)
logger.info(f"Checking {model_path} for usability with ORT Mobile.")
with tempfile.TemporaryDirectory() as tmp:
if not skip_optimize:
tmp_path = pathlib.Path(tmp) / model_path.name
optimize_model(model_path, tmp_path, use_external_initializers=True)
model_path = tmp_path
try_eps = checker(model_path.resolve(strict=True), logger)
return try_eps
def parse_args():
parser = argparse.ArgumentParser(
os.path.basename(__file__), description="""Analyze an ONNX model for usage with the ORT mobile"""
)
parser.add_argument("--log_level", choices=["debug", "info"], default="info", help="Logging level")
parser.add_argument(
"--skip_optimize",
action="store_true",
help="Don't optimize the model to BASIC level prior to analyzing. "
"Optimization will occur when exporting the model to ORT format, so in general "
"should not be skipped unless you have a specific reason to do so.",
)
parser.add_argument("model_path", type=pathlib.Path, help="Provide path to ONNX model")
return parser.parse_args()
def run_analyze_model():
args = parse_args()
logger = logging.getLogger("default")
if args.log_level == "debug":
logger.setLevel(logging.DEBUG)
elif args.log_level == "info":
logger.setLevel(logging.INFO)
elif args.log_level == "warning":
logger.setLevel(logging.WARNING)
else:
logger.setLevel(logging.ERROR)
model_path = args.model_path.resolve()
analyze_model(model_path, args.skip_optimize, logger)
if __name__ == "__main__":
run_analyze_model()