diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py index eac5b3b78690b..2c7f54b43e88a 100644 --- a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py +++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py @@ -53,6 +53,7 @@ def get_qnn_qdq_config( weight_symmetric: bool | None = None, keep_removable_activations: bool = False, stride: int | None = None, + op_types_to_quantize: list[str] | None = None, ) -> StaticQuantConfig: """ Returns a static quantization configuration suitable for running QDQ models on QNN EP. @@ -117,6 +118,7 @@ def get_qnn_qdq_config( are automatically removed if activations are asymmetrically quantized. Keeping these activations is necessary if optimizations or EP transformations will later remove QuantizeLinear/DequantizeLinear operators from the model. + op_types_to_quantize: If set to None, all operator types will be quantized except for OP_TYPES_TO_EXCLUDE Returns: A StaticQuantConfig object @@ -161,7 +163,11 @@ def get_qnn_qdq_config( name_to_initializer, ) + op_types_to_quantize_set = set(op_types_to_quantize) if op_types_to_quantize else None + for node in model.graph.node: + if op_types_to_quantize_set and node.op_type not in op_types_to_quantize_set: + continue op_types.add(node.op_type) qnn_compat.process_node(node) @@ -189,7 +195,9 @@ def get_qnn_qdq_config( calibrate_method=calibrate_method, activation_type=activation_type, weight_type=weight_type, - op_types_to_quantize=list(op_types.difference(OP_TYPES_TO_EXCLUDE)), + op_types_to_quantize=op_types_to_quantize + if op_types_to_quantize + else list(op_types.difference(OP_TYPES_TO_EXCLUDE)), per_channel=per_channel, use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD), extra_options=extra_options,