Skip to content

Commit

Permalink
add pad_to_buckets in evaluation for hpu performance (#2011)
Browse files Browse the repository at this point in the history
* add pad_to_buckets in evaluation for hpu performance
---------

Signed-off-by: xin3he <[email protected]>
  • Loading branch information
xin3he authored Sep 27, 2024
1 parent b6b7d7c commit 7bbc473
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 788 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from torch.nn.functional import pad
from torch.utils.data import DataLoader
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
from neural_compressor.torch.utils import is_hpex_available

parser = argparse.ArgumentParser()
parser.add_argument(
Expand Down Expand Up @@ -324,22 +325,26 @@ def run_fn_for_gptq(model, dataloader_for_calibration, *args):
user_model, _ = get_user_model()
tokenizer = AutoTokenizer.from_pretrained(args.model)
config = AutoConfig.from_pretrained(args.model)
user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)), user_model)
user_model = load(
os.path.abspath(os.path.expanduser(args.output_dir)),
user_model,
device="hpu" if is_hpex_available() else "cpu",
)
setattr(user_model, "config", config)
else:
user_model, tokenizer = get_user_model()


if args.accuracy:
user_model.eval()
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
user_model=user_model,
tokenizer=tokenizer,
batch_size=args.batch_size,
tasks=args.tasks,
device="cpu",
device="hpu" if is_hpex_available() else "cpu",
)
results = evaluate(eval_args)
for task_name in args.tasks.split(","):
Expand All @@ -352,7 +357,7 @@ def run_fn_for_gptq(model, dataloader_for_calibration, *args):

if args.performance:
user_model.eval()
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
import time

samples = args.iters * args.batch_size
Expand All @@ -363,7 +368,7 @@ def run_fn_for_gptq(model, dataloader_for_calibration, *args):
batch_size=args.batch_size,
tasks=args.tasks,
limit=samples,
device="cpu",
device="hpu" if is_hpex_available() else "cpu",
)
start = time.time()
results = evaluate(eval_args)
Expand Down
69 changes: 62 additions & 7 deletions neural_compressor/evaluation/lm_eval/accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,26 @@
from pathlib import Path
from typing import Union

import lm_eval
import numpy as np
from lm_eval import utils
from lm_eval import evaluator, utils
from lm_eval.loggers import WandbLogger
from lm_eval.tasks import TaskManager
from lm_eval.utils import make_table, simple_parse_args_string

from neural_compressor.evaluation.lm_eval import evaluator
from neural_compressor.evaluation.lm_eval.evaluator import request_caching_arg_to_dict

DEFAULT_RESULTS_FILE = "results.json"


def request_caching_arg_to_dict(cache_requests: str) -> dict:
request_caching_args = {
"cache_requests": cache_requests in {"true", "refresh"},
"rewrite_requests_cache": cache_requests == "refresh",
"delete_requests_cache": cache_requests == "delete",
}

return request_caching_args


def _handle_non_serializable(o):
if isinstance(o, np.int64) or isinstance(o, np.int32):
return int(o)
Expand Down Expand Up @@ -143,8 +151,57 @@ def cli_evaluate(args) -> None:

request_caching_args = request_caching_arg_to_dict(cache_requests=args.cache_requests)

### update model with user_model ###
if args.model_args is None:
args.model_args = ""
# replace HFLM.
from .models.huggingface import HFLM

lm_eval.api.registry.MODEL_REGISTRY["hf-auto"] = HFLM
lm_eval.api.registry.MODEL_REGISTRY["hf"] = HFLM
lm_eval.api.registry.MODEL_REGISTRY["huggingface"] = HFLM

if args.user_model is not None:
# use tiny model to built lm.
print(
"We use 'pretrained=Muennighoff/tiny-random-bert'"
+ "to build `LM` instance, the actually run model is user_model you passed."
)
lm = lm_eval.api.registry.get_model(args.model).create_from_arg_string(
"pretrained=Muennighoff/tiny-random-bert",
{
"batch_size": args.batch_size,
"max_batch_size": args.max_batch_size,
"device": args.device,
},
)
lm._model = args.user_model
if args.tokenizer is not None:
lm.tokenizer = args.tokenizer
else:
assert False, "Please provide tokenizer in evaluation function"
elif isinstance(args.model_args, dict):
lm = lm_eval.api.registry.get_model(args.model).create_from_arg_obj(
args.model_args,
{
"batch_size": args.batch_size,
"max_batch_size": args.max_batch_size,
"device": args.device,
},
)
else:
lm = lm_eval.api.registry.get_model(args.model).create_from_arg_string(
args.model_args,
{
"batch_size": args.batch_size,
"max_batch_size": args.max_batch_size,
"device": args.device,
},
)
lm.pad_to_buckets = args.pad_to_buckets

results = evaluator.simple_evaluate(
model=args.model,
model=lm,
model_args=args.model_args,
tasks=task_names,
num_fewshot=args.num_fewshot,
Expand All @@ -163,8 +220,6 @@ def cli_evaluate(args) -> None:
random_seed=args.seed[0],
numpy_random_seed=args.seed[1],
torch_random_seed=args.seed[2],
user_model=args.user_model, # to validate the model in memory,
tokenizer=args.tokenizer, # to use tokenizer in mem,
**request_caching_args,
)

Expand Down
Loading

0 comments on commit 7bbc473

Please sign in to comment.