Skip to content

Commit

Permalink
add buckets setting for lm_eval (#2044)
Browse files Browse the repository at this point in the history
* add buckets setting for lm_eval

Signed-off-by: xinhe3 <[email protected]>

* clear graph cache to avoid OOM

Signed-off-by: xinhe3 <[email protected]>

---------

Signed-off-by: xinhe3 <[email protected]>
Co-authored-by: xinhe3 <[email protected]>
  • Loading branch information
xin3he and xinhe3 authored Oct 29, 2024
1 parent f176798 commit c186708
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 2 deletions.
1 change: 1 addition & 0 deletions neural_compressor/evaluation/lm_eval/accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ def cli_evaluate(args) -> None:
},
)
lm.pad_to_buckets = args.pad_to_buckets
lm.buckets = args.buckets

results = evaluator.simple_evaluate(
model=lm,
Expand Down
18 changes: 16 additions & 2 deletions neural_compressor/evaluation/lm_eval/models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,14 @@ def __init__(
peft: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False,
pad_to_buckets: Optional[Union[bool]] = False,
buckets: Optional[list] = [32, 64, 128, 256, 512, 1024, 2048, 4096],
model_format: Optional[str] = "torch",
**kwargs,
) -> None:
super().__init__()
self.pad_to_buckets = pad_to_buckets
self.buckets = buckets
self.last_bucket = -1
self.model_format = model_format
# optionally: take in an already-initialized transformers.PreTrainedModel
if not isinstance(pretrained, str):
Expand Down Expand Up @@ -874,6 +877,18 @@ def tok_decode(self, tokens, skip_special_tokens=True):
elif self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM:
return self.tokenizer.decode(tokens, skip_special_tokens=skip_special_tokens)

def find_bucket(self, length):
suitable_buckets = [b for b in self.buckets if b >= length]
if len(suitable_buckets) == 0:
eval_logger.error(f"The input_length={length} exceeds the maximum value in buckets={self.buckets}")
eval_logger.error("Please add a higher value into the buckets list for this case.")
exit(0)
else:
if self.last_bucket != suitable_buckets[0]:
self.model.clear_cache() # clear graph cache to avoid OOM
self.last_bucket = suitable_buckets[0]
return self.last_bucket

def _model_call(self, inps, attn_mask=None, labels=None):
"""
:param inps: torch.Tensor
Expand Down Expand Up @@ -943,8 +958,7 @@ def _model_call(self, inps, attn_mask=None, labels=None):
if self.pad_to_buckets: # use buckets to pad inputs
bs, seq_length = inps.shape
padding_length = 0
buckets = [64, 128, 256, 512, 1024, 2048, 4096, 8192]
bucket_length = [b for b in buckets if b >= seq_length][0]
bucket_length = self.find_bucket(seq_length)
padding_length = bucket_length - seq_length
inps = F.pad(inps, (0, padding_length), value=self.model.config.pad_token_id)
output = self.model(inps)
Expand Down
2 changes: 2 additions & 0 deletions neural_compressor/evaluation/lm_eval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(
seed=[0, 1234, 1234],
trust_remote_code=False,
pad_to_buckets=None, # used by HPU to align input length for performance.
buckets=[32, 64, 128, 256, 512, 1024, 2048, 4096], # used by HPU to limit input length range.
):
self.model = model
self.tasks = tasks
Expand Down Expand Up @@ -81,3 +82,4 @@ def __init__(
self.pad_to_buckets = False
else:
self.pad_to_buckets = pad_to_buckets
self.buckets = buckets

0 comments on commit c186708

Please sign in to comment.