microsoft · xieofxie · Mar 4, 2025 · Mar 6, 2025 · Mar 7, 2025
diff --git a/model_lab_configs/huggingface/microsoft/resnet-50/0.1/imagenet.py b/model_lab_configs/huggingface/microsoft/resnet-50/0.1/imagenet.py
@@ -0,0 +1,74 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+from logging import getLogger
+from pathlib import Path
+
+import numpy as np
+import torchvision.transforms as transforms
+from torch import from_numpy
+from torch.utils.data import Dataset
+
+from olive.data.registry import Registry
+
+logger = getLogger(__name__)
+
+
+class ImagenetDataset(Dataset):
+    def __init__(self, data):
+        self.images = from_numpy(data["images"])
+        self.labels = from_numpy(data["labels"])
+
+    def __len__(self):
+        return min(len(self.images), len(self.labels))
+
+    def __getitem__(self, idx):
+        return {"input": self.images[idx]}, self.labels[idx]
+
+
+@Registry.register_post_process()
+def imagenet_post_fun(output):
+    return output.argmax(axis=1)
+
+
+preprocess = transforms.Compose(
+    [
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ]
+)
+
+
+@Registry.register_pre_process()
+def dataset_pre_process(output_data, **kwargs):
+    cache_key = kwargs.get("cache_key")
+    size = kwargs.get("size", 256)
+    cache_file = None
+    if cache_key:
+        cache_file = Path(f"./cache/data/{cache_key}_{size}.npz")
+        if cache_file.exists():
+            with np.load(Path(cache_file)) as data:
+                return ImagenetDataset(data)
+
+    labels = []
+    images = []
+    for i, sample in enumerate(output_data):
+        if i >= size:
+            break
+        image = sample["image"]
+        label = sample["label"]
+        image = image.convert("RGB")
+        image = preprocess(image)
+        images.append(image)
+        labels.append(label)
+
+    result_data = ImagenetDataset({"images": np.array(images), "labels": np.array(labels)})
+
+    if cache_file:
+        cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)
+        np.savez(cache_file, images=np.array(images), labels=np.array(labels))
+
+    return result_data
diff --git a/model_lab_configs/huggingface/microsoft/resnet-50/0.1/modelspace.config b/model_lab_configs/huggingface/microsoft/resnet-50/0.1/modelspace.config
@@ -0,0 +1,8 @@
+{
+    "models":[
+        {
+            "name": "Convert to QNN",
+            "file": "resnet_ptq_qnn.json"
+        }
+    ]
+}
diff --git a/model_lab_configs/huggingface/microsoft/resnet-50/0.1/requirements.txt b/model_lab_configs/huggingface/microsoft/resnet-50/0.1/requirements.txt
@@ -0,0 +1 @@
+olive-ai
diff --git a/model_lab_configs/huggingface/microsoft/resnet-50/0.1/resnet_ptq_qnn.json b/model_lab_configs/huggingface/microsoft/resnet-50/0.1/resnet_ptq_qnn.json
@@ -0,0 +1,144 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "microsoft/resnet-50",
+        "task": "image-classification",
+        "io_config": {
+            "input_names": [ "input" ],
+            "input_shapes": [ [ 1, 3, 224, 224 ] ],
+            "output_names": [ "output" ]
+        }
+    },
+    "systems": {
+        "qnn_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "npu", "execution_providers": [ "QNNExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "quantize_data_config",
+            "type": "HuggingfaceContainer",
+            "user_script": "imagenet.py",
+            "load_dataset_config": {
+                "data_name": "imagenet-1k",
+                "split": "train",
+                "streaming": true,
+                "trust_remote_code": true
+            },
+            "pre_process_data_config": { "type": "dataset_pre_process", "size": 256 },
+            "post_process_data_config": { "type": "imagenet_post_fun" }
+        },
+        {
+            "name": "evaluation_data_config",
+            "type": "HuggingfaceContainer",
+            "user_script": "imagenet.py",
+            "load_dataset_config": {
+                "data_name": "imagenet-1k",
+                "split": "test",
+                "streaming": true,
+                "trust_remote_code": true
+            },
+            "pre_process_data_config": { "type": "dataset_pre_process", "size": 256 },
+            "post_process_data_config": { "type": "imagenet_post_fun" }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy_qnn",
+                    "type": "accuracy",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy_score",
+                            "priority": 1,
+                            "metric_config": { "task": "multiclass", "num_classes": 1001 }
+                        }
+                    ],
+                    "user_config": {
+                        "inference_settings": {
+                            "onnx": {
+                                "session_options": {
+                                    "extra_session_config": { "session.disable_cpu_ep_fallback": "1" }
+                                },
+                                "execution_provider": "QNNExecutionProvider",
+                                "provider_options": [ { "backend_path": "QnnHtp.dll" } ]
+                            }
+                        }
+                    }
+                },
+                {
+                    "name": "accuracy_cpu",
+                    "type": "accuracy",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy_score",
+                            "priority": 2,
+                            "metric_config": { "task": "multiclass", "num_classes": 1001 }
+                        }
+                    ],
+                    "user_config": {
+                        "inference_settings": { "onnx": { "execution_provider": "CPUExecutionProvider" } }
+                    }
+                },
+                {
+                    "name": "latency_qnn",
+                    "type": "latency",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [ { "name": "avg", "priority": 3 } ],
+                    "user_config": {
+                        "inference_settings": {
+                            "onnx": {
+                                "session_options": {
+                                    "extra_session_config": { "session.disable_cpu_ep_fallback": "1" }
+                                },
+                                "execution_provider": "QNNExecutionProvider",
+                                "provider_options": [ { "backend_path": "QnnHtp.dll" } ]
+                            }
+                        }
+                    }
+                },
+                {
+                    "name": "latency_cpu",
+                    "type": "latency",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [ { "name": "avg", "priority": 4 } ],
+                    "user_config": {
+                        "inference_settings": { "onnx": { "execution_provider": "CPUExecutionProvider" } }
+                    }
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "device": "cpu",
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true,
+            "all_tensors_to_one_file": true,
+            "dynamic": false,
+            "use_dynamo_exporter": false
+        },
+        "QNNPreprocess": { "type": "QNNPreprocess" },
+        "OnnxQuantization": {
+            "type": "OnnxQuantization",
+            "data_config": "quantize_data_config",
+            "activation_type": "QUInt16",
+            "weight_type": "QUInt8",
+            "calibrate_method": "MinMax",
+            "quant_preprocess": true,
+            "prepare_qnn_config": true
+        }
+    },
+    "host": "qnn_system",
+    "target": "qnn_system",
+    "evaluator": "common_evaluator",
+    "cache_dir": "cache",
+    "clean_cache": true,
+    "output_dir": "models/resnet_ptq_qnn",
+    "evaluate_input_model": false
+}
diff --git a/model_lab_configs/huggingface/microsoft/resnet-50/0.1/resnet_ptq_qnn.json.config b/model_lab_configs/huggingface/microsoft/resnet-50/0.1/resnet_ptq_qnn.json.config
@@ -0,0 +1,86 @@
+{
+    "sections": [
+        {
+            "name": "System Parameters",
+            "description": "Parameters for the system"
+        },
+        {
+            "name": "Model Parameters",
+            "description": "Parameters for the model"
+        },
+        {
+            "name": "Data Parameters",
+            "description": "Parameters for the data"
+        }
+    ],
+    "parameters": [
+        {
+            "path": "passes.OnnxQuantization.weight_type",
+            "template": "WeightType",
+            "section": "Model Parameters"
+        },
+        {
+            "path": "passes.OnnxQuantization.activation_type",
+            "template": "ActivationType",
+            "section": "Model Parameters"
+        },
+        {
+            "path": "data_configs[0].load_dataset_config.data_name",
+            "name": "Training Dataset",
+            "type": "enum",
+            "values": [
+                {
+                    "name": "ImageNet 1k",
+                    "value": "imagenet-1k",
+                    "discription": "ImageNet dataset with 1000 classes"
+                }
+            ],
+            "section": "Data Parameters"
+        },
+        {
+            "path": "data_configs[0].load_dataset_config.split",
+            "name": "Training Dataset Split",
+            "type": "enum",
+            "values": [
+                "train",
+                "validation",
+                "test"
+            ],
+            "section": "Data Parameters"
+        },
+        {
+            "path": "data_configs[1].load_dataset_config.data_name",
+            "name": "Evaluation Dataset",
+            "type": "enum",
+            "values": [
+                {
+                    "name": "ImageNet 1k",
+                    "value": "imagenet-1k",
+                    "discription": "ImageNet dataset with 1000 classes"
+                }
+            ],
+            "section": "Data Parameters"
+        },
+        {
+            "path": "data_configs[1].load_dataset_config.split",
+            "name": "Evaluation Dataset Split",
+            "type": "enum",
+            "values": [
+                "train",
+                "validation",
+                "test"
+            ],
+            "section": "Data Parameters"
+        },
+        {
+            "path": "systems.qnn_system.accelerators[0].execution_providers[0]",
+            "name": "Execution Provider",
+            "type": "enum",
+            "values": [
+                "QNNExecutionProvider",
+                "CPUExecutionProvider"
+            ],
+            "section": "System Parameters"
+        }
+    ]
+}
diff --git a/model_lab_configs/huggingface/microsoft/resnet-50/0.1/resnet_ptq_qnn.json.md b/model_lab_configs/huggingface/microsoft/resnet-50/0.1/resnet_ptq_qnn.json.md
@@ -0,0 +1,15 @@
+# ResNet optimization with QNN execution providers
+
+This example performs ResNet optimization with QNN execution providers in one workflow. It performs the optimization pipeline:
+
+- *PyTorch Model -> Onnx Model -> QNN Quantized Onnx Model*
+
+## Evaluation result
+
+The quantization uses 256 samples from train split of imagenet-1k dataset and the evaluations uses 256 samples from test split of imagenet-1k dataset.
+
+| Activation Type | Weight Type | Size | Accuracy | Latency ms (avg) |
+|-----------------|-------------|------|----|---------|
+| TBD | TBD | TBD | TBD | TBD |
+| QUInt16         | QUInt8      |  TBD | 0.78515625      | 2.53724 |
+| QUInt16         | QUInt16      |  TBD | TBD     | TBD |
diff --git a/model_lab_configs/model_list.json b/model_lab_configs/model_list.json
@@ -0,0 +1,12 @@
+{
+    "models": [
+        {
+            "name": "Microsoft/ResNet-50",
+            "icon": "ms",
+            "model_link": "https://huggingface.co/microsoft/resnet-50",
+            "id": "huggingface/microsoft/resnet-50",
+            "runtime": ["QNN"],
+            "architecture": "CNN"
+        }
+    ]
+}
diff --git a/model_lab_configs/parameter_template.json b/model_lab_configs/parameter_template.json
@@ -0,0 +1,14 @@
+{
+    "ActivationType": {
+        "name": "Activation Type",
+        "description": "Quantization data type of activation",
+        "type": "enum",
+        "values": ["QUInt16", "QInt16"]
+    },
+    "WeightType": {
+        "name": "Weight Type",
+        "description": "Data type for quantizing weights",
+        "type": "enum",
+        "values": ["QInt8", "QUInt8"]
+    }
+}