Merge branch 'EleutherAI:main' into main

EleutherAI · Jan 7, 2025 · 27e39b5 · 27e39b5
2 parents 014a7c0 + 6d62a69
commit 27e39b5
Show file tree

Hide file tree

Showing 11 changed files with 54 additions and 27 deletions.
diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py
@@ -398,7 +398,7 @@ def build_all_requests(
         )
         cache_key += f"-tokenizer{tokenizer_name}"
 
-        cached_instances = load_from_cache(file_name=cache_key)
+        cached_instances = load_from_cache(file_name=cache_key, cache=cache_requests)
 
         if cache_requests and cached_instances and not rewrite_requests_cache:
             cached_instances = cached_instances[:limit]

diff --git a/lm_eval/caching/cache.py b/lm_eval/caching/cache.py
@@ -21,7 +21,9 @@
 FILE_SUFFIX = f".{HASH_PREFIX}.pickle"
 
 
-def load_from_cache(file_name):
+def load_from_cache(file_name: str, cache: bool = False):
+    if not cache:
+        return
     try:
         path = f"{PATH}/{file_name}{FILE_SUFFIX}"
 

diff --git a/lm_eval/models/huggingface.py b/lm_eval/models/huggingface.py
@@ -90,6 +90,7 @@ def __init__(
         delta: Optional[str] = None,
         autogptq: Optional[Union[bool, str]] = False,
         gptqmodel: Optional[bool] = False,
+        gguf_file: Optional[str] = None,
         **kwargs,
     ) -> None:
         super().__init__()
@@ -164,6 +165,7 @@ def __init__(
                 pretrained,
                 revision=revision,
                 trust_remote_code=trust_remote_code,
+                gguf_file=gguf_file,
             )
 
             # determine which of 'causal' and 'seq2seq' backends to use for HF models
@@ -178,6 +180,7 @@ def __init__(
             revision=revision,
             trust_remote_code=trust_remote_code,
             use_fast_tokenizer=use_fast_tokenizer,
+            gguf_file=gguf_file,
         )
 
         # if we passed `pretrained` as a string, initialize our model now
@@ -196,6 +199,7 @@ def __init__(
                 delta=delta,
                 autogptq=autogptq,
                 gptqmodel=gptqmodel,
+                gguf_file=gguf_file,
                 **kwargs,
             )
 
@@ -508,12 +512,14 @@ def _get_config(
         pretrained: str,
         revision: str = "main",
         trust_remote_code: bool = False,
+        gguf_file: Optional[str] = None,
     ) -> None:
         """Return the model config for HuggingFace models"""
         self._config = transformers.AutoConfig.from_pretrained(
             pretrained,
             revision=revision,
             trust_remote_code=trust_remote_code,
+            gguf_file=gguf_file,
         )
 
     def _create_model(
@@ -535,6 +541,7 @@ def _create_model(
         delta: Optional[str] = None,
         autogptq: Optional[Union[bool, str]] = False,
         gptqmodel: Optional[bool] = False,
+        gguf_file: Optional[str] = None,
         **kwargs,
     ) -> None:
         """
@@ -579,6 +586,7 @@ def _create_model(
                 revision=revision,
                 torch_dtype=get_dtype(dtype),
                 trust_remote_code=trust_remote_code,
+                gguf_file=gguf_file,
                 **model_kwargs,
             )
         else:
@@ -676,21 +684,29 @@ def _create_tokenizer(
         revision: Optional[str] = "main",
         trust_remote_code: Optional[bool] = False,
         use_fast_tokenizer: Optional[bool] = True,
+        gguf_file: Optional[str] = None,
     ) -> None:
         """
         Helper method during initialization.
 
         Create a tokenizer object corresponding to the correct
         tokenizer for value of `pretrained`, or use the pre-initialized tokenizer passed.
         """
+        kwargs = {
+            "revision": revision,
+            "trust_remote_code": trust_remote_code,
+        }
+
+        # gguf format embeds tokenizer and is not compatible with hf tokenizer `use_fast` param
+        if gguf_file is not None:
+            kwargs["gguf_file"] = gguf_file
+        else:
+            kwargs["use_fast"] = use_fast_tokenizer
 
         if tokenizer:
             if isinstance(tokenizer, str):
                 self.tokenizer = transformers.AutoTokenizer.from_pretrained(
-                    tokenizer,
-                    revision=revision,
-                    trust_remote_code=trust_remote_code,
-                    use_fast=use_fast_tokenizer,
+                    tokenizer, **kwargs
                 )
             else:
                 assert isinstance(
@@ -705,10 +721,7 @@ def _create_tokenizer(
                 # get the HF hub name via accessor on model
                 model_name = self.model.name_or_path
             self.tokenizer = transformers.AutoTokenizer.from_pretrained(
-                model_name,
-                revision=revision,
-                trust_remote_code=trust_remote_code,
-                use_fast=use_fast_tokenizer,
+                model_name, **kwargs
             )
         return None
 

diff --git a/..._bench/phrases_va/_phrases_va_common.yaml → ...talan_bench/phrases_va/_phrases_va_common b/..._bench/phrases_va/_phrases_va_common.yaml → ...talan_bench/phrases_va/_phrases_va_common
diff --git a/lm_eval/tasks/catalan_bench/phrases_va/phrases_ca-va.yaml b/lm_eval/tasks/catalan_bench/phrases_va/phrases_ca-va.yaml
@@ -1,5 +1,5 @@
 # File generated by `create-yamls.py`
-include: _phrases_va_common.yaml
+include: _phrases_va_common
 task: phrases_ca-va
 doc_to_text: 'Oració en català: {{ca}}
 

diff --git a/lm_eval/tasks/catalan_bench/phrases_va/phrases_va-ca.yaml b/lm_eval/tasks/catalan_bench/phrases_va/phrases_va-ca.yaml
@@ -1,5 +1,5 @@
 # File generated by `create-yamls.py`
-include: _phrases_va_common.yaml
+include: _phrases_va_common
 task: phrases_va-ca
 doc_to_text: 'Oració en valencià: {{va}}
 

diff --git a/...en_evals/winogenerated/winogenerated.yaml → ...written_evals/winogenerated/winogenerated b/...en_evals/winogenerated/winogenerated.yaml → ...written_evals/winogenerated/winogenerated
diff --git a/..._bench/phrases_es/_phrases_es_common.yaml → ...anish_bench/phrases_es/_phrases_es_common b/..._bench/phrases_es/_phrases_es_common.yaml → ...anish_bench/phrases_es/_phrases_es_common
diff --git a/lm_eval/tasks/spanish_bench/phrases_es/phrases_es-va.yaml b/lm_eval/tasks/spanish_bench/phrases_es/phrases_es-va.yaml
@@ -1,5 +1,5 @@
 # File generated by `create-yamls.py`
-include: _phrases_es_common.yaml
+include: _phrases_es_common
 task: phrases_es-va
 doc_to_text: 'Oració en espanyol: {{es}}
 

diff --git a/lm_eval/tasks/spanish_bench/phrases_es/phrases_va-es.yaml b/lm_eval/tasks/spanish_bench/phrases_es/phrases_va-es.yaml
@@ -1,5 +1,5 @@
 # File generated by `create-yamls.py`
-include: _phrases_es_common.yaml
+include: _phrases_es_common
 task: phrases_va-es
 doc_to_text: 'Oració en valencià: {{va}}
 

diff --git a/scripts/zeno_visualize.py b/scripts/zeno_visualize.py
@@ -109,13 +109,14 @@ def main():
             if model_index == 0:  # Only need to assemble data for the first model
                 metrics = []
                 for metric in config["metric_list"]:
-                    metrics.append(
-                        ZenoMetric(
-                            name=metric["metric"],
-                            type="mean",
-                            columns=[metric["metric"]],
+                    if metric.get("aggregation") == "mean":
+                        metrics.append(
+                            ZenoMetric(
+                                name=metric["metric"],
+                                type="mean",
+                                columns=[metric["metric"]],
+                            )
                         )
-                    )
                 project = client.create_project(
                     name=args.project_name + (f"_{task}" if len(tasks) > 1 else ""),
                     view="text-classification",
@@ -168,7 +169,11 @@ def generate_dataset(
     Returns:
         pd.Dataframe: A dataframe that is ready to be uploaded to Zeno.
     """
-    ids = [x["doc_id"] for x in data]
+    ids = (
+        [x["doc_id"] for x in data]
+        if not config.get("filter_list")
+        else [f"{x['doc_id']}.{x['filter']}" for x in data]
+    )
     labels = [x["target"] for x in data]
     instance = [""] * len(ids)
 
@@ -190,6 +195,7 @@ def generate_dataset(
     return pd.DataFrame(
         {
             "id": ids,
+            "doc_id": [x["doc_id"] for x in data],
             "data": instance,
             "input_len": [len(x) for x in instance],
             "labels": labels,
@@ -208,8 +214,15 @@ def generate_system_df(data, config):
     Returns:
         pd.Dataframe: A dataframe that is ready to be uploaded to Zeno as a system.
     """
-    ids = [x["doc_id"] for x in data]
+    ids = (
+        [x["doc_id"] for x in data]
+        if not config.get("filter_list")
+        else [f"{x['doc_id']}.{x['filter']}" for x in data]
+    )
     system_dict = {"id": ids}
+    system_dict["doc_id"] = [x["doc_id"] for x in data]
+    if config.get("filter_list"):
+        system_dict["filter"] = [x["filter"] for x in data]
     system_dict["output"] = [""] * len(ids)
 
     if config["output_type"] == "loglikelihood":
@@ -228,11 +241,10 @@ def generate_system_df(data, config):
         system_dict["output"] = [str(x["filtered_resps"][0]) for x in data]
         system_dict["output_length"] = [len(str(x["filtered_resps"][0])) for x in data]
 
-    metrics = {}
-    for metric in config["metric_list"]:
-        if "aggregation" in metric and metric["aggregation"] == "mean":
-            metrics[metric["metric"]] = [x[metric["metric"]] for x in data]
-
+    metrics = {
+        metric["metric"]: [x[metric["metric"]] for x in data]
+        for metric in config["metric_list"]
+    }
     system_dict.update(metrics)
     system_df = pd.DataFrame(system_dict)
     return system_df