microsoft · acube3 · Feb 11, 2024 · Feb 11, 2024 · Feb 12, 2024 · Feb 12, 2024
diff --git a/configs/llama-v2-7b/.vscode/launch.json b/configs/llama-v2-7b/.vscode/launch.json
@@ -6,11 +6,34 @@
     "configurations": [
         {
             "name": "Python: Current File",
-            "type": "python",
+            "type": "debugpy",
             "request": "launch",
             "program": "${file}",
             "console": "integratedTerminal",
             "justMyCode": false
+        },
+        {
+            "name": "Python: invoke_finetune_azure",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "finetuning/invoke_olive.py",
+            "console": "integratedTerminal",
+            "justMyCode": false,
+            "args": [
+                "--azure",
+                "--aml_config",
+                "finetuning/aml_config.json",
+            ]
+        },
+        {
+            "name": "Python: invoke_finetune_local",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "finetuning/invoke_olive.py",
+            "console": "integratedTerminal",
+            "justMyCode": false,
+            "args": [                
+            ]
         }
     ]
 }
diff --git a/configs/llama-v2-7b/README.md b/configs/llama-v2-7b/README.md
@@ -23,8 +23,48 @@ cd inference
 python gradio_chat.py --baseonly
 ```
 
+### Model fine-tuning and inferencing
+
+Once the workspace is opened in a dev container, open a terminal (the default path is project root), then run the command below to fine tune a LLM on the selected dataset.
+
+```bash
+python finetuning/invoke_olive.py 
+```
+
 Checkpoints and final model will be saved in `models` folder.
 
+### Model fine-tuning on Azure
+
+Please make sure that you have created the Azure ML workspace according to the following directions:
+https://learn.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources?view=azureml-api-2
+
+Create Compute cluster using GPU nodes (https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-gpu?view=azureml-api-2):
+
+https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-attach-compute-cluster?view=azureml-api-2&tabs=azure-studio
+
+Setup "hf-token" in the Keyvault associated with the Azure ML workspace.
+Get HuggingFace access token from your HuggingFace account. 
+1. Get your Huggingface token string from Settings -> [Access Tokens](https://huggingface.co/settings/tokens). Please, make sure that your account has access to any gated model that you will access for finetuning.
+2. Create or use an existing [Azure Key Vault](https://learn.microsoft.com/en-us/azure/key-vault/general/overview). Assume the key vault is named `keyvault_name`. Add a new secret named `hf-token`, and set the value as the token from the first step. It is important to note that `hf-token` secret name is reserved specifically for Huggingface login. Do not use this name in this keyvault for other purpose.
+3. Give the compute cluster access to the keyvault `keyvault_name` using the following instructions:
+a. Create a managed identity for the compute cluster.
+b. Give the compute cluster access to the keyvault `keyvault_name` using the following instructions:
+https://learn.microsoft.com/en-us/azure/key-vault/general/assign-access-policy?tabs=azure-portal
+
+Update finetuning/aml_config.json with correct values.
+
+Once the workspace is opened in a dev container, open a terminal (the default path is project root).
+
+```bash
+az login
+python finetuning/invoke_olive.py --azure --aml_config finetuning/aml_config.json
+```
+
+This will submit an AML job which can be monitored using the link printed by the program. After training finishes, the trained model can be downloaded from the outputs folder.
+
+
+### Inference
+
 Next run inferencing with the fune-tuned model through chats in a `console`, `web browser` or `prompt flow`.
 
 ```bash

diff --git a/configs/llama-v2-7b/finetuning/aml_config.json b/configs/llama-v2-7b/finetuning/aml_config.json
@@ -0,0 +1,7 @@
+{
+    "subscription_id": "<subscription_id>",
+    "resource_group": "<resource_group>",
+    "workspace_name": "<workspace_name>",
+    "aml_compute_name": "<aml_compute_name>",    
+    "keyvault_name": "<keyvault_name>"
+}
diff --git a/configs/llama-v2-7b/finetuning/docker-contexts/wais_llama_2_7b_env/Dockerfile b/configs/llama-v2-7b/finetuning/docker-contexts/wais_llama_2_7b_env/Dockerfile
@@ -0,0 +1,31 @@
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:20240109.v1
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV ACCEPT_EULA=Y
+
+# Build python3.9 as the default python
+RUN apt-get -y update && \
+    apt-get -y upgrade && \
+    apt-get install -y --no-install-recommends \
+        software-properties-common gnupg\
+    && add-apt-repository -y ppa:deadsnakes \
+    && apt-get install -y --no-install-recommends \
+        python3.9-venv \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* && \
+    python3.9 -m venv /venv
+
+ENV PATH=/venv/bin:$PATH
+
+# Install TensorRT
+RUN v="8.4.1-1+cuda11.6" &&\
+    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub &&\
+    apt-get -y update && \
+    apt-get -y upgrade && \
+    apt-get install -y libnvinfer8=${v} libnvonnxparsers8=${v} libnvparsers8=${v} libnvinfer-plugin8=${v} \
+        libnvinfer-dev=${v} libnvonnxparsers-dev=${v} libnvparsers-dev=${v} libnvinfer-plugin-dev=${v} \
+        python3-libnvinfer=${v} libnvinfer-samples=${v}
+
+RUN pip install --upgrade pip && pip install --upgrade setuptools
+COPY requirements.txt .
+RUN pip install -r requirements.txt --no-cache-dir
diff --git a/configs/llama-v2-7b/finetuning/docker-contexts/wais_llama_2_7b_env/requirements.txt b/configs/llama-v2-7b/finetuning/docker-contexts/wais_llama_2_7b_env/requirements.txt
@@ -0,0 +1,34 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+torchvision
+torchaudio
+packaging
+datasets==2.14.5
+transformers==4.36.2
+accelerate==0.23.0
+bitsandbytes==0.41.1
+peft==0.5.0
+scikit-learn==1.3.1
+sentencepiece==0.1.99
+trl==0.7.2
+protobuf
+ipykernel==6.25.2
+wandb==0.15.12
+einops
+promptflow==0.1.0b8
+promptflow-tools==0.1.0b10
+gradio
+azureml-core==1.54.0
+azure-ai-ml==1.12.1
+azureml-fsspec
+azureml-mlflow
+azure-identity
+azure-keyvault
+azure-keyvault-secrets
+mlflow
+docker
+onnxruntime-gpu
+scipy
+onnxruntime-training
+torch-ort
+git+https://github.com/microsoft/Olive.git@3c25717980ad743e802c16a8e4c831b6196f68bf
diff --git a/configs/llama-v2-7b/finetuning/invoke_olive.py b/configs/llama-v2-7b/finetuning/invoke_olive.py
@@ -1,6 +1,103 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import argparse
+import sys
+import json
+from utils import get_aml_client, create_dataset
+from azure.ai.ml.entities import Environment, BuildContext
+import re
+
 from olive.workflows import run as olive_run
 import os
+import argparse
+import sys
+
+def parse_aml_config(aml_config):
+    """Parse the AML config to make sure the required fields are present"""
+    with open(aml_config, 'r') as file:
+        aml_config = json.load(file)
+
+    try:
+        subscription_id = aml_config["subscription_id"]
+        resource_group = aml_config["resource_group"]
+        workspace_name = aml_config["workspace_name"]
+        aml_compute_name = aml_config["aml_compute_name"]        
+        keyvault_name = aml_config["keyvault_name"]
+    except KeyError as e:
+        print(f"KeyError: {e} not found in aml_config.json")
+        sys.exit(1)
+
+    return aml_config
+
+
+def main():
+    """Main function of the script."""
+
+    # input and output arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--azure", required=False, action="store_true", help="runs the training on azure when this option is enabled")
+    parser.add_argument("--aml_config", type=str, required='--azure' in sys.argv, help="aml config (update and use aml_config.json) for azure subscription/workspace details")
+
+    args = parser.parse_args()
+
+    # Run olive from file locally
+    if not args.azure:
+        file_path = os.path.join(os.getcwd(), 'finetuning/olive-config.json')
+        olive_run(file_path)
+    else:
+        dataset_local_path = "dataset/dataset-classification.json"
+        dataset_name = "llama_2_7b_train_dataset"
+        dataset_version = "1"
+        docker_context_path = "finetuning/docker-contexts/wais_llama_2_7b_env"
+        azure_olive_config_template_path = "finetuning/olive-config-azureml-template.json"
+        azure_olive_config_path = "finetuning/olive-config-azureml.json"
+        azure_environment_name = "wais_llama_2_7b_env"
+
+        aml_config = parse_aml_config(args.aml_config)
+        # Get the AML client
+        ml_client = get_aml_client(aml_config["subscription_id"], aml_config["resource_group"], aml_config["workspace_name"])
+
+        # Create the environment
+        print("Creating the environment...")
+        env_docker_context = Environment(
+            build=BuildContext(path=docker_context_path),    # Path to the Docker context
+            name=azure_environment_name,
+            description="Environment created from a Docker context for training llama_2_7b model using Olive.",
+        )
+        aml_env = ml_client.environments.create_or_update(env_docker_context)
+        print("The environment {} was created successfully.".format(aml_env.name))
+
+        # Create the dataset
+        print("Creating the dataset...")
+
+        description = "Train dataset for tone classification model."
+        dataset = create_dataset(ml_client, local_path=dataset_local_path, name=dataset_name, version=dataset_version, description=description)
+        print("The dataset {} was created successfully.".format(dataset.name))
+        dataset_relative_path = re.split("/datastores/.*/paths/", dataset.path)[-1]
+
+        # Update the olive-config-azureml.json
+        with open(azure_olive_config_template_path, 'r') as file:
+            olive_config = json.load(file)
+            try:
+                olive_config["azureml_client"]["aml_config_path"] = args.aml_config
+                olive_config["data_configs"]["dataset_default_train"]["params_config"]["data_files"]["config"]["azureml_client"]["aml_config_path"] = args.aml_config
+                olive_config["data_configs"]["dataset_default_train"]["params_config"]["data_files"]["config"]["relative_path"] = dataset_relative_path
+                olive_config["systems"]["aml_system"]["config"]["aml_compute"] = aml_config["aml_compute_name"]
+                olive_config["systems"]["aml_system"]["config"]["aml_environment_config"]["name"] = aml_env.name
+                olive_config["systems"]["aml_system"]["config"]["aml_environment_config"]["version"] = aml_env.version
+                olive_config["azureml_client"]["keyvault_name"] = aml_config["keyvault_name"]
+            except KeyError as e:
+                print(f"KeyError: {e} not found in olive-config-azureml.json")
+                sys.exit(1)
+
+        with open(azure_olive_config_path, 'w') as file:
+            json.dump(olive_config, file, indent=4)
+
+        # Run olive from file for debug.
+        file_path = os.path.join(os.getcwd(), azure_olive_config_path)
+        olive_run(file_path)
+
 
-# Run olive from file for debug.
-file_path = os.path.join(os.getcwd(), 'finetuning/olive-config.json')
-olive_run(file_path)
+if __name__ == "__main__":
+    main()
diff --git a/configs/llama-v2-7b/finetuning/olive-config-azureml-template.json b/configs/llama-v2-7b/finetuning/olive-config-azureml-template.json
@@ -0,0 +1,119 @@
+{
+    "azureml_client": {
+        "aml_config_path": "<aml_config_path>",
+        "read_timeout": 4000,
+        "max_operation_retries": 4,
+        "operation_retry_interval": 5,
+        "keyvault_name" : "<keyvault_name>"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem"
+        },
+        "aml_system": {
+            "type": "AzureML",
+            "config": {
+                "aml_compute": "<aml_compute_name>",
+                "accelerators": [
+                    "gpu"
+                ],
+                "hf_token": true,
+                "aml_environment_config": {            
+                    "name": "<aml_environment_name>",            
+                    "version": "<aml_environment_version>"        
+                }
+            }            
+        }
+    },
+    "input_model": {
+        "type": "PyTorchModel",
+        "config": {
+            "hf_config": {
+                "model_name": "meta-llama/Llama-2-7b-hf",
+                "task": "text-generation",
+                "from_pretrained_args": {
+                    "trust_remote_code": true
+                }
+            }
+        }
+    },
+    "data_configs": {
+        "dataset_default_train": {
+            "name": "dataset_default",
+            "type": "HuggingfaceContainer",
+            "params_config": {
+#data_configs_data_files_extension_start
+                <!--    "data_name": "<data_configs_data_files_extension>", -->
+#data_configs_data_files_extension_end
+                "data_files": {
+                    "type": "azureml_datastore",
+                    "config": {
+                    "azureml_client":
+                        {
+                            "aml_config_path": "<aml_config_path>"
+                        },
+                    "datastore_name": "workspaceblobstore",
+                    "relative_path": "<data_configs_data_files>"
+                    }
+                },
+                "split": "<data_configs_split>",
+                "component_kwargs": {
+                    "pre_process_data": {
+                        "dataset_type": "<dataset_type>",
+                        "text_cols": <text_cols>,
+                        "text_template": "<text_template>",
+                        "corpus_strategy": "<line_by_line>",
+                        "source_max_len": <source_max_len>,
+                        "pad_to_max_len": <pad_to_max_len>,
+                        "use_attention_mask": false
+                    }
+                }
+            }
+        }
+    },
+    "passes": {
+        "qlora": {
+            "type": "QLoRA",
+            "config": {
+                "compute_dtype": "<compute_dtype>",
+                "quant_type": "<quant_type>",
+                "double_quant": <double_quant>,
+                "lora_r": <lora_r>,
+                "lora_alpha": <lora_alpha>,
+                "lora_dropout": <lora_dropout>,
+                "train_data_config": "dataset_default_train",
+                "eval_dataset_size": <eval_dataset_size>,
+                "training_args": {
+                    "seed": <training_args_seed>,
+                    "data_seed": <training_args_data_seed>,
+                    "per_device_train_batch_size": <per_device_train_batch_size>,
+                    "per_device_eval_batch_size": <per_device_eval_batch_size>,
+                    "gradient_accumulation_steps": <gradient_accumulation_steps>,
+                    "gradient_checkpointing": <gradient_checkpointing>,
+                    "learning_rate": <learning_rate>,
+                    "num_train_epochs":<num_train_epochs>,
+                    "max_steps": <max_steps>,
+                    "logging_steps": 10,
+                    "evaluation_strategy": "steps",
+                    "eval_steps": 187,
+                    "group_by_length": true,
+                    "adam_beta2": 0.999,
+                    "max_grad_norm": 0.3,
+                    "output_dir": "outputs/<output_dir>"
+                }
+            }
+        }
+    },
+    "engine": {
+        "log_severity_level": 0,
+        "search_strategy": false,
+        "evaluate_input_model": false,
+        "execution_providers": [
+            "CUDAExecutionProvider"
+        ],
+        "host": "aml_system",
+        "cache_dir": "model_cache",
+        "output_dir": "outputs/models/qlora",
+        "target": "aml_system"
+    }
+}