mila-iqia · Delaunay · Aug 29, 2024 · Aug 30, 2024 · Aug 30, 2024 · Aug 30, 2024
diff --git a/.gitignore b/.gitignore
@@ -56,4 +56,5 @@ benchmarks/voir
 benchmarks/*/base/
 benchmarks/lightning/lightning_logs/
 
-benchmarks/*/src/
+benchmarks/*/src/
+benchmarks/llm/tune
diff --git a/benchmarks/diffusion/main.py b/benchmarks/diffusion/main.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 from dataclasses import dataclass
 
 from accelerate import Accelerator

diff --git a/benchmarks/llm/benchfile.py b/benchmarks/llm/benchfile.py
@@ -6,6 +6,10 @@
 from milabench.commands import SimpleCommand
 
 
+URL = "https://github.com/pytorch/torchtune.git"
+BRANCH = "a83eeff0079a73ee04a11e8fc2573ed8f671b231"
+
+
 class Torchtune(TorchrunAllGPU):
     @property
     def executable(self):
@@ -40,6 +44,14 @@ class Llm(Package):
     async def install(self):
         await super().install()  # super() call installs the requirements
 
+        # Clone the right version of torchtune
+        tune = self.dirs.code / "tune"
+        if not tune.exists():
+            tune.clone_subtree(URL, BRANCH)
+
+        # make an editable install
+        await self.pip_install("-e", str(tune))
+
     def build_run_plan(self):
         exec = SimpleCommand(self)
         return TorchtuneAllNodes(exec).use_stdout()

diff --git a/benchmarks/llm/configs/llama3_70B_full.yaml b/benchmarks/llm/configs/llama3_70B_full.yaml
@@ -20,6 +20,7 @@
 tokenizer:
   _component_: torchtune.models.llama3.llama3_tokenizer
   path: /tmp/Meta-Llama-3.1-70B-Instruct/original/tokenizer.model
+  max_seq_len: null
 
 # Dataset
 dataset:
@@ -32,7 +33,7 @@ model:
   _component_: torchtune.models.llama3_1.llama3_1_70b
 
 checkpointer:
-  _component_: torchtune.utils.FullModelHFCheckpointer
+  _component_: torchtune.training.FullModelHFCheckpointer
   checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
   checkpoint_files: [
     model-00001-of-00030.safetensors,
@@ -81,10 +82,11 @@ optimizer:
   foreach: False
   # Note: highly recommended to use fused=True optimizer flag
   # with CPU offload for faster optimizer step.
+
   fused: True
 
 loss:
-  _component_: torch.nn.CrossEntropyLoss
+  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1
 
@@ -94,7 +96,7 @@ device: cuda
 
 # Memory management
 enable_activation_checkpointing: True
-memory_efficient_fsdp_wrap: True
+custom_sharded_layers: ['tok_embeddings', 'output']
 fsdp_cpu_offload: True
 
 # Reduced precision
@@ -104,6 +106,6 @@ dtype: bf16
 metric_logger:
   _component_: torchtune.utils.metric_logging.DiskLogger
   log_dir: ${output_dir}
-output_dir: /tmp/alpaca-llama3-finetune
+output_dir: /tmp/full-llama3_1-finetune
 log_every_n_steps: 1
 log_peak_memory_stats: False
diff --git a/benchmarks/llm/configs/llama3_70B_lora.yaml b/benchmarks/llm/configs/llama3_70B_lora.yaml
@@ -24,7 +24,7 @@ tokenizer:
 
 safetensors: true
 checkpointer:
-  _component_: torchtune.utils.FullModelHFCheckpointer
+  _component_: torchtune.training.checkpointing.FullModelHFCheckpointer
   checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
   checkpoint_files: [
     model-00001-of-00030.safetensors,

diff --git a/benchmarks/llm/configs/llama3_8B_lora.yaml b/benchmarks/llm/configs/llama3_8B_lora.yaml
@@ -32,7 +32,7 @@ model:
   lora_alpha: 16
 
 checkpointer:
-  _component_: torchtune.utils.FullModelMetaCheckpointer
+  _component_: torchtune.training.checkpointing.FullModelMetaCheckpointer
   checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
   checkpoint_files: [
     consolidated.00.pth

diff --git a/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml b/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml
@@ -31,7 +31,7 @@ tokenizer:
   path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
 
 checkpointer:
-  _component_: torchtune.utils.FullModelMetaCheckpointer
+  _component_: torchtune.training.checkpointing.FullModelMetaCheckpointer
   checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
   checkpoint_files: [
     consolidated.00.pth

diff --git a/benchmarks/llm/configs/llama3_8B_qat_full.yaml b/benchmarks/llm/configs/llama3_8B_qat_full.yaml
@@ -29,7 +29,7 @@ model:
   _component_: torchtune.models.llama3_1.llama3_1_8b
 
 checkpointer:
-  _component_: torchtune.utils.FullModelMetaCheckpointer
+  _component_: torchtune.training.checkpointing.FullModelMetaCheckpointer
   checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
   checkpoint_files: [
     consolidated.00.pth

diff --git a/benchmarks/llm/configs/llama3_8B_qlora_single_device.yaml b/benchmarks/llm/configs/llama3_8B_qlora_single_device.yaml
@@ -30,7 +30,7 @@ tokenizer:
   path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
 
 checkpointer:
-  _component_: torchtune.utils.FullModelMetaCheckpointer
+  _component_: torchtune.training.checkpointing.FullModelMetaCheckpointer
   checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
   checkpoint_files: [
     consolidated.00.pth

diff --git a/benchmarks/llm/configs/mistral_ppo.yaml b/benchmarks/llm/configs/mistral_ppo.yaml
@@ -0,0 +1,182 @@
+# Config for single device RLHF full finetuning using PPO in ppo_full_finetune_single_device.py
+# using a Mistral 7B model.
+#
+# This config has been tested on an A100 80GB.
+# This config uses hyperparameters based on small set of experiments and information
+# available from existing implementations.
+#
+# This config assumes that you've run the following command before launching
+# this run:
+#   tune download weqweasdas/RM-Mistral-7B --output-dir /tmp/RM-Mistral-7B/ --ignore-patterns=""
+#   tune download mistralai/Mistral-7B-Instruct-v0.2 --output-dir /tmp/Mistral-7B-Instruct-v0.2/ --hf-token HF_TOKEN
+#
+# You'll also need to ensure that {output_dir} exists beforehand, as checkpoints for policy and value models are saved in sub-folders.
+# The default config uses an optimizer from bitsandbytes. If you do not have it installed,
+# you can install it with
+#   pip install bitsandbytes
+#
+# To launch on a single device, run the following command from root:
+#   tune run ppo_full_finetune_single_device --config mistral/7B_full_ppo_low_memory
+#
+# You can add specific overrides through the command line. For example
+# to override the checkpointer directory while launching training
+# you can run:
+#   tune run ppo_full_finetune_single_device --config mistral/7B_full_low_memory checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
+#
+
+# Tokenizer
+tokenizer:
+  _component_: torchtune.models.mistral.mistral_tokenizer
+  path: /tmp/Mistral-7B-Instruct-v0.2/tokenizer.model
+  max_seq_len: null
+
+# Dataset
+dataset:
+  _component_: torchtune.datasets.text_completion_dataset
+  source: trl-internal-testing/sentiment-trl-style
+  max_seq_len: null
+  split: train
+  column: prompt
+  add_eos: False
+
+policy_model:
+  _component_: torchtune.models.mistral.mistral_7b
+
+# we need to manually build the mistral classifier model
+# because our reward model checkpoint has a larger vocabulary size (due to an added padding token)
+reward_and_value_model:
+  _component_: torchtune.models.mistral._component_builders.mistral_classifier
+  attn_dropout: 0.0
+  embed_dim: 4096
+  intermediate_dim: 14336
+  max_seq_len: 32768
+  norm_eps: 1.0e-05
+  num_classes: 1
+  num_heads: 32
+  num_kv_heads: 8
+  num_layers: 32
+  vocab_size: 32001
+
+# checkpointer for the policy model - update this if resuming from checkpoint
+checkpointer:
+  _component_: torchtune.training.FullModelHFCheckpointer
+  checkpoint_dir: /tmp/Mistral-7B-Instruct-v0.2/
+  checkpoint_files:
+    [
+      "pytorch_model-00001-of-00003.bin",
+      "pytorch_model-00002-of-00003.bin",
+      "pytorch_model-00003-of-00003.bin",
+    ]
+  # this is the only place where you should update `recipe_checkpoint` if resuming training
+  recipe_checkpoint: null
+  output_dir: ${output_dir}/policy
+  model_type: MISTRAL
+
+# this should be setup identically to the policy model checkpointer at the start of training
+# ensure `checkpoint_files` always points to the original policy weights, even if resuming training
+ref_policy_checkpointer:
+  _component_: torchtune.training.FullModelHFCheckpointer
+  checkpoint_dir: /tmp/Mistral-7B-Instruct-v0.2/
+  checkpoint_files:
+    [
+      "pytorch_model-00001-of-00003.bin",
+      "pytorch_model-00002-of-00003.bin",
+      "pytorch_model-00003-of-00003.bin",
+    ]
+  output_dir: ${output_dir}/policy
+  model_type: MISTRAL
+
+# checkpointer for the value model - update `checkpoint_files` if resuming from checkpoint
+# since this model will be identical to the reward model it's helpful to initialise this
+# from the trained reward model weights
+value_checkpointer:
+  _component_: torchtune.training.FullModelHFCheckpointer
+  checkpoint_dir: /tmp/RM-Mistral-7B/
+  checkpoint_files:
+    [
+      "model-00001-of-00003.safetensors",
+      "model-00002-of-00003.safetensors",
+      "model-00003-of-00003.safetensors",
+    ]
+  output_dir: ${output_dir}/value
+  model_type: REWARD
+
+# checkpointer for the reward model, ensure `checkpoint_files`
+# always points to the original reward model weights, even if resuming training
+reward_checkpointer:
+  _component_: torchtune.training.FullModelHFCheckpointer
+  checkpoint_dir: /tmp/RM-Mistral-7B/
+  checkpoint_files:
+    [
+      "model-00001-of-00003.safetensors",
+      "model-00002-of-00003.safetensors",
+      "model-00003-of-00003.safetensors",
+    ]
+  output_dir: ${output_dir}/value
+  model_type: REWARD
+
+resume_from_checkpoint: False
+output_dir: /tmp/mistral7b-ppo-finetune
+seed: null
+shuffle: True
+
+# Training env
+device: cuda
+
+# Training arguments
+batch_size: 64
+num_steps: 10000
+ppo_epochs: 2
+ppo_batch_size: 32
+gradient_accumulation_steps: 1
+
+# Memory management and performance
+compile: True
+optimizer:
+  _component_: bitsandbytes.optim.PagedAdamW
+  lr: 3e-6
+optimizer_in_bwd: True
+log_peak_memory_stats: False
+enable_activation_checkpointing: True
+
+# Reduced precision
+dtype: bf16
+
+# batch size for forward pass during generation
+forward_batch_size: 16
+max_generated_tokens: 58
+temperature: 0.7
+top_k: null
+
+# parameter for penalising generations shorter than `min_response_length`
+min_response_length: 18
+# parameter for penalising generations without a stop token
+penalise_no_eos: True
+# scalar penalty to apply when penalising
+reward_penalty: -3
+
+# tokens to consider as "end of sequence" tokens
+stop_token_ids: [
+    2, # eos_id
+    28723, # mistral "." token
+  ]
+whiten_rewards: False
+
+# GAE hyperparameters
+gamma: 1
+lmbda: 0.95
+
+# PPO hyperparameters
+loss:
+  _component_: torchtune.modules.rlhf.loss.PPOLoss
+  epsilon: 0.2
+  value_coeff: 0.1
+  value_clip_range: 0.2
+kl_coeff: 0.01
+
+# Logging
+metric_logger:
+  _component_: torchtune.utils.metric_logging.DiskLogger
+  log_dir: ${output_dir}
+
+log_every_n_steps: 1
diff --git a/benchmarks/llm/dev.yaml b/benchmarks/llm/dev.yaml
@@ -204,4 +204,36 @@ llm-qlora-single:
     repo_id="meta-llama/Meta-Llama-3.1-8B": true
     batch_size=8: true
     gradient_accumulation_steps=8: true
-
+
+
+#
+#
+#
+
+
+#
+# There are multiple issues when loading the pretrained model
+#   - reward model has missing weights
+#   - 
+llm-rlhf-single:
+  inherits: _llm
+  plan:
+    method: per_gpu
+  argv:
+    "{milabench_code}/recipes/ppo_full_finetune_single_device.py": true
+    --config: "{milabench_code}/configs/mistral_ppo.yaml"
+    epochs=1: true
+    output_dir={milabench_extra}/output: true
+    tokenizer.path={milabench_data}/mistral_7B/tokenizer.model: true
+    checkpointer.checkpoint_dir={milabench_data}/mistral_7B: true
+    ref_policy_checkpointer.checkpoint_dir={milabench_data}/mistral_7B: true
+    value_checkpointer.checkpoint_dir={milabench_data}/mistral_7B: true
+    reward_checkpointer.checkpoint_dir={milabench_data}/mistral_7B: true
+    checkpointer.output_dir={milabench_data}/mistral_7B: true
+    metric_logger.log_dir={milabench_extra}/metrics: true
+    repo_id="mistralai/Mistral-7B-Instruct-v0.2": true
+    safetensors=true: true
+    batch_size=64: true
+    num_steps=10000: true
+    ppo_epochs=2: true
+    ppo_batch_size=32: true
diff --git a/benchmarks/llm/prepare.py b/benchmarks/llm/prepare.py
@@ -5,10 +5,10 @@
 
 from omegaconf import OmegaConf
 from argklass import ArgumentParser
-from torchtune._cli.tune import TuneCLIParser
-
 from benchmate.ux import long_action
 
+from torchtune._cli.tune import TuneCLIParser
+
 
 @dataclass
 class Arguments:
@@ -83,6 +83,11 @@ def main():
     if "qlora" in config.get("model", {}).get("_component_", ""):
         load_model(args.recipe, config)
 
+    outdir = config.get("output_dir")
+    if outdir:
+        for path in ["policy", "value"]:
+            os.makedirs(os.path.join(outdir, path), exist_ok=True)
+
 
 if __name__ == "__main__":
     main()