From 49f1f7e4f3cb0e51cb2603ded04cedd8f66e2c8d Mon Sep 17 00:00:00 2001 From: hualxie Date: Wed, 22 Jan 2025 16:29:23 +0800 Subject: [PATCH 1/4] add op_types_to_quantize to get_qnn_qdq_config --- .../quantization/execution_providers/qnn/quant_config.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py index eac5b3b78690b..33c96b25981ce 100644 --- a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py +++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py @@ -53,6 +53,7 @@ def get_qnn_qdq_config( weight_symmetric: bool | None = None, keep_removable_activations: bool = False, stride: int | None = None, + op_types_to_quantize: list[str] | None = None, ) -> StaticQuantConfig: """ Returns a static quantization configuration suitable for running QDQ models on QNN EP. @@ -117,6 +118,7 @@ def get_qnn_qdq_config( are automatically removed if activations are asymmetrically quantized. Keeping these activations is necessary if optimizations or EP transformations will later remove QuantizeLinear/DequantizeLinear operators from the model. + op_types_to_quantize: If set to None, all operator types will be quantized except for OP_TYPES_TO_EXCLUDE Returns: A StaticQuantConfig object @@ -162,6 +164,9 @@ def get_qnn_qdq_config( ) for node in model.graph.node: + if op_types_to_quantize: + if node.op_type not in op_types_to_quantize: + continue op_types.add(node.op_type) qnn_compat.process_node(node) @@ -189,7 +194,7 @@ def get_qnn_qdq_config( calibrate_method=calibrate_method, activation_type=activation_type, weight_type=weight_type, - op_types_to_quantize=list(op_types.difference(OP_TYPES_TO_EXCLUDE)), + op_types_to_quantize=op_types_to_quantize if op_types_to_quantize else list(op_types.difference(OP_TYPES_TO_EXCLUDE)), per_channel=per_channel, use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD), extra_options=extra_options, From 528aefe5bd3aebd46cdaca4a7c197ce21f5c8491 Mon Sep 17 00:00:00 2001 From: hualxie Date: Fri, 24 Jan 2025 17:38:34 +0800 Subject: [PATCH 2/4] use set --- .../quantization/execution_providers/qnn/quant_config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py index 33c96b25981ce..ee4e667cca335 100644 --- a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py +++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py @@ -163,9 +163,11 @@ def get_qnn_qdq_config( name_to_initializer, ) + op_types_to_quantize_set = set(op_types_to_quantize) if op_types_to_quantize else None + for node in model.graph.node: - if op_types_to_quantize: - if node.op_type not in op_types_to_quantize: + if op_types_to_quantize_set: + if node.op_type not in op_types_to_quantize_set: continue op_types.add(node.op_type) qnn_compat.process_node(node) From f79f00a9bf142a353d0044b53671fbab2961e757 Mon Sep 17 00:00:00 2001 From: hualxie Date: Fri, 24 Jan 2025 17:40:44 +0800 Subject: [PATCH 3/4] nit --- .../quantization/execution_providers/qnn/quant_config.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py index ee4e667cca335..53fb018ac6d56 100644 --- a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py +++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py @@ -166,9 +166,8 @@ def get_qnn_qdq_config( op_types_to_quantize_set = set(op_types_to_quantize) if op_types_to_quantize else None for node in model.graph.node: - if op_types_to_quantize_set: - if node.op_type not in op_types_to_quantize_set: - continue + if op_types_to_quantize_set and node.op_type not in op_types_to_quantize_set: + continue op_types.add(node.op_type) qnn_compat.process_node(node) From 417dab3cc6406fc3eee522a2c9667097cd26fe2a Mon Sep 17 00:00:00 2001 From: hualxie Date: Sun, 26 Jan 2025 15:35:01 +0800 Subject: [PATCH 4/4] lintrunner -a --- .../quantization/execution_providers/qnn/quant_config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py index 53fb018ac6d56..2c7f54b43e88a 100644 --- a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py +++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py @@ -195,7 +195,9 @@ def get_qnn_qdq_config( calibrate_method=calibrate_method, activation_type=activation_type, weight_type=weight_type, - op_types_to_quantize=op_types_to_quantize if op_types_to_quantize else list(op_types.difference(OP_TYPES_TO_EXCLUDE)), + op_types_to_quantize=op_types_to_quantize + if op_types_to_quantize + else list(op_types.difference(OP_TYPES_TO_EXCLUDE)), per_channel=per_channel, use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD), extra_options=extra_options,