From 4b155d08594ff16d4558e913ccac70a9b8ed6cde Mon Sep 17 00:00:00 2001 From: "REDMOND\\kacruz" Date: Thu, 30 Jan 2025 14:28:20 -0800 Subject: [PATCH 1/5] First draft adding deepseek standin for inference_models --- inference_models.json | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/inference_models.json b/inference_models.json index dce3490..019194f 100644 --- a/inference_models.json +++ b/inference_models.json @@ -264,5 +264,24 @@ "parameterSize": "3.8B", "fileSize": "2.30 GB", "runtime": "cuda" + }, + { + "name": "DeepSeek R1", + "providerType": "DeepSeek", + "uri": "https://amlwlrt4use01.blob.core.windows.net/azureml-e667c101-62bd-5ad2-8453-c83286dc5e9f/", + "modelType": "onnx", + "task": "Text Generation", + "fineTuningTemplateName": "DeepSeek/DeepSeek-R1", + "promptTemplate": { + "system":"<|system|>\n{Content}<|end|>", + "user":"<|user|>\n{Content}<|end|>", + "assistant":"<|assistant|>\n{Content}<|end|>", + "prompt":"<|user|>\n{Content}<|end|>\n<|assistant|>" + }, + "publisher": "DeepSeek", + "architecture": "DeepSeek", + "parameterSize": "3.8B", + "fileSize": "2.30 GB", + "runtime": "NPU" } ] From ee51de755a6d6a472fc7fbcf675c7d282b8b7928 Mon Sep 17 00:00:00 2001 From: "REDMOND\\kacruz" Date: Thu, 30 Jan 2025 15:21:04 -0800 Subject: [PATCH 2/5] Fix providerType for Deepseek --- inference_models.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inference_models.json b/inference_models.json index 019194f..c45e7df 100644 --- a/inference_models.json +++ b/inference_models.json @@ -266,12 +266,12 @@ "runtime": "cuda" }, { - "name": "DeepSeek R1", - "providerType": "DeepSeek", + "name": "DeepSeek-R1-NPU-Optimized", + "providerType": "Blob", "uri": "https://amlwlrt4use01.blob.core.windows.net/azureml-e667c101-62bd-5ad2-8453-c83286dc5e9f/", "modelType": "onnx", "task": "Text Generation", - "fineTuningTemplateName": "DeepSeek/DeepSeek-R1", + "fineTuningTemplateName": "DeepSeek/DeepSeek-R1-NPU-Optimized", "promptTemplate": { "system":"<|system|>\n{Content}<|end|>", "user":"<|user|>\n{Content}<|end|>", From ed163927e1f876a09632cb2242e274b4dbcd7959 Mon Sep 17 00:00:00 2001 From: "REDMOND\\kacruz" Date: Thu, 30 Jan 2025 17:55:11 -0800 Subject: [PATCH 3/5] Update deepseek entry --- inference_models.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/inference_models.json b/inference_models.json index c45e7df..c5d9b8c 100644 --- a/inference_models.json +++ b/inference_models.json @@ -267,18 +267,18 @@ }, { "name": "DeepSeek-R1-NPU-Optimized", - "providerType": "Blob", + "providerType": "AzureBlob", "uri": "https://amlwlrt4use01.blob.core.windows.net/azureml-e667c101-62bd-5ad2-8453-c83286dc5e9f/", "modelType": "onnx", "task": "Text Generation", - "fineTuningTemplateName": "DeepSeek/DeepSeek-R1-NPU-Optimized", + "fineTuningTemplateName": "DeepSeek-AI/DeepSeek-R1-NPU-Optimized", "promptTemplate": { - "system":"<|system|>\n{Content}<|end|>", - "user":"<|user|>\n{Content}<|end|>", - "assistant":"<|assistant|>\n{Content}<|end|>", - "prompt":"<|user|>\n{Content}<|end|>\n<|assistant|>" + "system": "\u003C|system|\u003E\n{Content}\u003C|end|\u003E", + "user": "\u003C|user|\u003E\n{Content}\u003C|end|\u003E", + "assistant": "\u003C|assistant|\u003E\n{Content}\u003C|end|\u003E", + "prompt": "\u003C|user|\u003E\n{Content}\u003C|end|\u003E\n\u003C|assistant|\u003E" }, - "publisher": "DeepSeek", + "publisher": "DeepSeek-AI", "architecture": "DeepSeek", "parameterSize": "3.8B", "fileSize": "2.30 GB", From 72ecb691bfd67ec41c2b33a295927a92b10a6f48 Mon Sep 17 00:00:00 2001 From: "REDMOND\\kacruz" Date: Thu, 30 Jan 2025 18:39:24 -0800 Subject: [PATCH 4/5] HuggingFace entry for DeepSeek-R1 --- inference_models.json | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/inference_models.json b/inference_models.json index c5d9b8c..658dae6 100644 --- a/inference_models.json +++ b/inference_models.json @@ -267,21 +267,19 @@ }, { "name": "DeepSeek-R1-NPU-Optimized", - "providerType": "AzureBlob", - "uri": "https://amlwlrt4use01.blob.core.windows.net/azureml-e667c101-62bd-5ad2-8453-c83286dc5e9f/", + "providerType": "HuggingFace", + "uri": "cloudnbits/DeepSeek-R1-Distilled-NPU-Optimized", "modelType": "onnx", "task": "Text Generation", - "fineTuningTemplateName": "DeepSeek-AI/DeepSeek-R1-NPU-Optimized", + "fineTuningTemplateName": "cloudnbits/DeepSeek-R1-Distilled-NPU-Optimized", "promptTemplate": { - "system": "\u003C|system|\u003E\n{Content}\u003C|end|\u003E", - "user": "\u003C|user|\u003E\n{Content}\u003C|end|\u003E", - "assistant": "\u003C|assistant|\u003E\n{Content}\u003C|end|\u003E", - "prompt": "\u003C|user|\u003E\n{Content}\u003C|end|\u003E\n\u003C|assistant|\u003E" + "assistant": "{Content}", + "prompt": "\u003C|begin▁of▁sentence|\u003E|User|{Content}|Assistant|" }, - "publisher": "DeepSeek-AI", - "architecture": "DeepSeek", - "parameterSize": "3.8B", - "fileSize": "2.30 GB", + "publisher": "cloudnbits", + "architecture": "MoE", + "parameterSize": "1.5B", + "fileSize": "3.90 GB", "runtime": "NPU" } ] From 4e0facc1034fc1d8fbd4ed2927e21bf75408c6a7 Mon Sep 17 00:00:00 2001 From: "REDMOND\\kacruz" Date: Thu, 30 Jan 2025 18:44:45 -0800 Subject: [PATCH 5/5] Update model name --- inference_models.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference_models.json b/inference_models.json index 658dae6..bacea65 100644 --- a/inference_models.json +++ b/inference_models.json @@ -266,7 +266,7 @@ "runtime": "cuda" }, { - "name": "DeepSeek-R1-NPU-Optimized", + "name": "DeepSeek-R1-Distilled-NPU-Optimized", "providerType": "HuggingFace", "uri": "cloudnbits/DeepSeek-R1-Distilled-NPU-Optimized", "modelType": "onnx",