huggingface · dacorvo · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025
diff --git a/.github/workflows/check_code_quality.yml b/.github/workflows/check_code_quality.yml
@@ -45,11 +45,8 @@ jobs:
         source venv/bin/activate
         pip install --upgrade pip
         pip install .[quality]
-    - name: Check style with black
-      run: |
-        source venv/bin/activate
-        black --check .
     - name: Check style with ruff
       run: |
         source venv/bin/activate
+        ruff format . --diff
         ruff check .
diff --git a/Makefile b/Makefile
@@ -59,12 +59,12 @@ transformers_examples:
 
 # Run code quality checks
 style_check:
-	black --check .
 	ruff check .
+	ruff format . --diff
 
 style:
-	black .
 	ruff check . --fix
+	ruff format .
 
 # Utilities to release to PyPi
 build_dist_install_tools:

diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py
@@ -462,7 +462,7 @@ def main():
             model = AutoModelForCausalLM.from_config(config, trust_remote_code=model_args.trust_remote_code)
 
         n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
-        logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
+        logger.info(f"Training new model from scratch - Total size={n_params / 2**20:.2f}M params")
 
     # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
     # on a small vocab and want a smaller embedding size, remove this test.

diff --git a/examples/question-answering/trainer_qa.py b/examples/question-answering/trainer_qa.py
@@ -15,6 +15,7 @@
 """
 A subclass of `Trainer` specific to Question-Answering tasks
 """
+
 import math
 import time
 

diff --git a/examples/question-answering/trainer_seq2seq_qa.py b/examples/question-answering/trainer_seq2seq_qa.py
@@ -15,6 +15,7 @@
 """
 A subclass of `Trainer` specific to Question-Answering tasks
 """
+
 import math
 import time
 from typing import Dict, List, Optional

diff --git a/examples/question-answering/utils_qa.py b/examples/question-answering/utils_qa.py
@@ -15,6 +15,7 @@
 """
 Post-processing utilities for question answering.
 """
+
 import collections
 import json
 import logging

diff --git a/examples/summarization/run_summarization.py b/examples/summarization/run_summarization.py
@@ -525,9 +525,9 @@ def main():
         return
 
     if isinstance(tokenizer, tuple(MULTILINGUAL_TOKENIZERS)):
-        assert (
-            data_args.lang is not None
-        ), f"{tokenizer.__class__.__name__} is a multilingual tokenizer which requires --lang argument"
+        assert data_args.lang is not None, (
+            f"{tokenizer.__class__.__name__} is a multilingual tokenizer which requires --lang argument"
+        )
 
         tokenizer.src_lang = data_args.lang
         tokenizer.tgt_lang = data_args.lang

diff --git a/examples/text-classification/run_glue.py b/examples/text-classification/run_glue.py
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-""" Finetuning the library models for sequence classification on GLUE."""
+"""Finetuning the library models for sequence classification on GLUE."""
 # You can also adapt this script on your own text classification task. Pointers for this are left as comments.
 
 import logging
@@ -158,9 +158,9 @@ def __post_init__(self):
             train_extension = self.train_file.split(".")[-1]
             assert train_extension in ["csv", "json"], "`train_file` should be a csv or a json file."
             validation_extension = self.validation_file.split(".")[-1]
-            assert (
-                validation_extension == train_extension
-            ), "`validation_file` should have the same extension (csv or json) as `train_file`."
+            assert validation_extension == train_extension, (
+                "`validation_file` should have the same extension (csv or json) as `train_file`."
+            )
 
 
 @dataclass
@@ -329,9 +329,9 @@ def main():
             if data_args.test_file is not None:
                 train_extension = data_args.train_file.split(".")[-1]
                 test_extension = data_args.test_file.split(".")[-1]
-                assert (
-                    test_extension == train_extension
-                ), "`test_file` should have the same extension (csv or json) as `train_file`."
+                assert test_extension == train_extension, (
+                    "`test_file` should have the same extension (csv or json) as `train_file`."
+                )
                 data_files["test"] = data_args.test_file
             else:
                 raise ValueError("Need either a GLUE task or a test file for `do_predict`.")

diff --git a/examples/text-classification/run_xnli.py b/examples/text-classification/run_xnli.py
@@ -14,8 +14,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-""" Finetuning multi-lingual models on XNLI (e.g. Bert, DistilBERT, XLM).
-    Adapted from `examples/text-classification/run_glue.py`"""
+"""Finetuning multi-lingual models on XNLI (e.g. Bert, DistilBERT, XLM).
+Adapted from `examples/text-classification/run_glue.py`"""
 
 import logging
 import os

diff --git a/optimum/exporters/neuron/__main__.py b/optimum/exporters/neuron/__main__.py
@@ -662,7 +662,7 @@ def main_export(
             )
 
             logger.info(
-                f"The {NEURON_COMPILER} export succeeded and the exported model was saved at: " f"{output.as_posix()}"
+                f"The {NEURON_COMPILER} export succeeded and the exported model was saved at: {output.as_posix()}"
             )
         except ShapeError as e:
             raise e
@@ -678,8 +678,7 @@ def main_export(
             )
         except Exception as e:
             logger.error(
-                f"An error occured with the error message: {e}.\n The exported model was saved at: "
-                f"{output.as_posix()}"
+                f"An error occured with the error message: {e}.\n The exported model was saved at: {output.as_posix()}"
             )
 
 

diff --git a/optimum/exporters/neuron/config.py b/optimum/exporters/neuron/config.py
@@ -16,6 +16,7 @@
 Common Neuron configuration classes that handle most of the features for building model specific
 configurations.
 """
+
 from typing import List
 
 from ...utils import (

diff --git a/optimum/exporters/neuron/convert.py b/optimum/exporters/neuron/convert.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Neuron compiled model check and export functions."""
+
 import copy
 import time
 from collections import OrderedDict
@@ -282,7 +283,7 @@ def validate_model_outputs(
 
     if shape_failures:
         msg = "\n".join(f"- {t[0]}: got {t[1]} (reference) and {t[2]} (neuron)" for t in shape_failures)
-        raise ShapeError("Output shapes do not match between reference model and the Neuron exported model:\n" "{msg}")
+        raise ShapeError("Output shapes do not match between reference model and the Neuron exported model:\n{msg}")
 
     if value_failures:
         msg = "\n".join(f"- {t[0]}: max diff = {t[1]}" for t in value_failures)

diff --git a/optimum/exporters/neuron/model_configs/decoder_configs.py b/optimum/exporters/neuron/model_configs/decoder_configs.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 """Neuron export configurations for models using transformers_neuronx."""
 
-
 from optimum.exporters.tasks import TasksManager
 
 from ....neuron.models.granite.model import GraniteForSampling

diff --git a/optimum/exporters/neuron/model_wrappers.py b/optimum/exporters/neuron/model_wrappers.py
@@ -247,13 +247,13 @@ def forward(self, input_ids, attention_mask):
         batch_size = input_ids.shape[0]
         sequence_length = input_ids.shape[1]
         if self.sequence_length is not None:
-            assert (
-                self.sequence_length
-            ), f"Different sequence length for the parallel partition({self.sequence_length}) and for dummy inputs({sequence_length}). Make sure that they have the same value."
+            assert self.sequence_length, (
+                f"Different sequence length for the parallel partition({self.sequence_length}) and for dummy inputs({sequence_length}). Make sure that they have the same value."
+            )
         if self.batch_size is not None:
-            assert (
-                self.batch_size
-            ), f"Different batch size for the parallel partition({self.batch_size}) and for dummy inputs({batch_size}). Make sure that they have the same value."
+            assert self.batch_size, (
+                f"Different batch size for the parallel partition({self.batch_size}) and for dummy inputs({batch_size}). Make sure that they have the same value."
+            )
 
         encoder_output = self.model.encoder(
             input_ids=input_ids, attention_mask=attention_mask, output_attentions=False, output_hidden_states=False

diff --git a/optimum/neuron/accelerate/utils/operations.py b/optimum/neuron/accelerate/utils/operations.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 """Custom operations related to accelerate for Neuron."""
 
-
 import torch
 from accelerate.utils.operations import recursively_apply
 

diff --git a/optimum/neuron/distributed/encoder_decoder_models.py b/optimum/neuron/distributed/encoder_decoder_models.py
@@ -268,7 +268,7 @@ def project(hidden_states, proj_layer, key_value_states, past_key_value):
             if past_key_value is not None:
                 if len(past_key_value) != 2:
                     raise ValueError(
-                        f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states"
+                        f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states"
                     )
                 real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length
 

diff --git a/optimum/neuron/distributed/parallel_layers.py b/optimum/neuron/distributed/parallel_layers.py
@@ -108,7 +108,7 @@ def prepare_parallel_layer_specific_kwargs(cls, **parallel_layer_specific_kwargs
                 name for name in parallel_layer_specific_kwargs if name not in default_parallel_layer_specific_kwargs
             ]
             logger.debug(
-                f'The following arguments are not allowed for {cls.__name__}: {", ".join(wrong_argument_names)}, they '
+                f"The following arguments are not allowed for {cls.__name__}: {', '.join(wrong_argument_names)}, they "
                 "will be ignored."
             )
 

diff --git a/optimum/neuron/distributed/parallelizers_manager.py b/optimum/neuron/distributed/parallelizers_manager.py
@@ -28,7 +28,7 @@
 
 
 def parallelizer_classes_resolver(
-    model_type_to_parallelizer_class_name: Dict[str, str]
+    model_type_to_parallelizer_class_name: Dict[str, str],
 ) -> Dict[str, Type[Parallelizer]]:
     modules = []
     for module_name in _PARALLELIZER_CLASSES_MODULE_NAMES:

diff --git a/optimum/neuron/modeling.py b/optimum/neuron/modeling.py
@@ -179,9 +179,7 @@ def forward(
             # last_hidden_state -> (batch_size, sequencen_len, hidden_size)
             last_hidden_state = self.remove_padding(
                 [outputs[0]], dims=[0, 1], indices=[input_ids.shape[0], input_ids.shape[1]]
-            )[
-                0
-            ]  # Remove padding on batch_size(0), and sequence_length(1)
+            )[0]  # Remove padding on batch_size(0), and sequence_length(1)
             if len(outputs) > 1:
                 # pooler_output -> (batch_size, hidden_size)
                 pooler_output = self.remove_padding([outputs[1]], dims=[0], indices=[input_ids.shape[0]])[
@@ -264,9 +262,7 @@ def forward(
                 # token_embeddings -> (batch_size, sequencen_len, hidden_size)
                 token_embeddings = self.remove_padding(
                     [outputs[0]], dims=[0, 1], indices=[input_ids.shape[0], input_ids.shape[1]]
-                )[
-                    0
-                ]  # Remove padding on batch_size(0), and sequence_length(1)
+                )[0]  # Remove padding on batch_size(0), and sequence_length(1)
                 # sentence_embedding -> (batch_size, hidden_size)
                 sentence_embedding = self.remove_padding([outputs[1]], dims=[0], indices=[input_ids.shape[0]])[
                     0

diff --git a/optimum/neuron/modeling_diffusion.py b/optimum/neuron/modeling_diffusion.py
@@ -1183,7 +1183,9 @@ def forward(
         if output_hidden_states:
             assert (
                 self.config.output_hidden_states or self.config.neuron.get("output_hidden_states")
-            ) == output_hidden_states, "output_hidden_states is expected to be False since the model was compiled without hidden_states as output."
+            ) == output_hidden_states, (
+                "output_hidden_states is expected to be False since the model was compiled without hidden_states as output."
+            )
 
         input_ids = input_ids.to(torch.long)  # dummy generator uses long int for tracing
         inputs = (input_ids,)

diff --git a/optimum/neuron/models/granite/hlo.py b/optimum/neuron/models/granite/hlo.py
@@ -35,7 +35,6 @@ def scale_mul(t, scale):
 
 
 class GraniteForSamplingNoEmbeddingHlo:
-
     def __init__(self, config: GraniteConfig, neuron_config: Optional[NeuronConfig] = None):
         self.config = config
         self.neuron_config = neuron_config
@@ -324,9 +323,9 @@ def layer(
             tp_degree=self.config.tp_degree,
         )
         if self.neuron_config.fuse_mlp:
-            assert all(
-                (not (x) for x in [in0_weight, in1_weight, out_weight, in0_scales, in1_scales, out_scales])
-            ), "in0, in1 and out weights have to be None"
+            assert all((not (x) for x in [in0_weight, in1_weight, out_weight, in0_scales, in1_scales, out_scales])), (
+                "in0, in1 and out weights have to be None"
+            )
             in0_weight, in0_scales = mlp_in_weight, mlp_in_scales
             out_weight, out_scales = mlp_out_weight, mlp_out_scales
 
@@ -688,7 +687,6 @@ def attention(
 
         # Single Token Generation ("Prefetch"-style) ans speculative forward
         if active_mask is not None:
-
             n_active_tokens = key.sizes[1] if bsh_cache_layout else key.sizes[0]
             if n_active_tokens > 1 and self.neuron_config and self.neuron_config.continuous_batching:
                 # For speculative forward + continuous batching, slice out samples in the batch size

diff --git a/optimum/neuron/models/granite/model.py b/optimum/neuron/models/granite/model.py
@@ -159,9 +159,9 @@ def load_weights(self):
 
             # Note: Automatic MLP padding is safe since zeros are *only* introduced to intermediary state
             if self.neuron_config.fuse_mlp:
-                assert all(
-                    getattr(mlp, attr, None) for attr in ["gate_proj", "up_proj"]
-                ), "fuse_mlp need to have gate and up proj weights"
+                assert all(getattr(mlp, attr, None) for attr in ["gate_proj", "up_proj"]), (
+                    "fuse_mlp need to have gate and up proj weights"
+                )
                 assert all(
                     getattr(mlp, attr, None).weight.shape[0] % self.config.tp_degree == 0
                     for attr in ["gate_proj", "up_proj"]

diff --git a/optimum/neuron/models/granite/modules.py b/optimum/neuron/models/granite/modules.py
@@ -18,7 +18,6 @@
 
 
 class GraniteForCausalLM(module.PretrainedModel):
-
     def __init__(self, config: GraniteConfig):
         super().__init__()
         dtype, _, _ = utils.parse_amp(config.amp)
@@ -34,7 +33,6 @@ def get_base_model(self):
 
 
 class GraniteModel(module.LowMemoryModule):
-
     def __init__(self, config: GraniteConfig):
         super().__init__()
         self.embed_tokens = module.LowMemoryEmbedding(config.vocab_size, config.hidden_size)
@@ -45,14 +43,12 @@ def __init__(self, config: GraniteConfig):
 
 
 class GraniteRMSNorm(module.LowMemoryModule):
-
     def __init__(self, config: GraniteConfig) -> None:
         super().__init__()
         self.weight = module.UninitializedParameter()
 
 
 class GraniteDecoderLayer(module.LowMemoryModule):
-
     def __init__(self, config: GraniteConfig):
         super().__init__()
         self.self_attn = GraniteAttention(config)
@@ -62,7 +58,6 @@ def __init__(self, config: GraniteConfig):
 
 
 class GraniteAttention(module.LowMemoryModule):
-
     def __init__(self, config: GraniteConfig):
         super().__init__()
         self.hidden_size = config.hidden_size
@@ -77,7 +72,6 @@ def __init__(self, config: GraniteConfig):
 
 
 class GraniteMLP(module.LowMemoryModule):
-
     def __init__(self, config: GraniteConfig):
         super().__init__()
         dtype, _, _ = utils.parse_amp(config.amp)

diff --git a/optimum/neuron/models/qwen2/model.py b/optimum/neuron/models/qwen2/model.py
@@ -156,9 +156,9 @@ def load_weights(self):
 
             # Note: Automatic MLP padding is safe since zeros are *only* introduced to intermediary state
             if self.neuron_config.fuse_mlp:
-                assert all(
-                    getattr(mlp, attr, None) for attr in ["gate_proj", "up_proj"]
-                ), "fuse_mlp need to have gate and up proj weights"
+                assert all(getattr(mlp, attr, None) for attr in ["gate_proj", "up_proj"]), (
+                    "fuse_mlp need to have gate and up proj weights"
+                )
                 assert all(
                     getattr(mlp, attr, None).weight.shape[0] % self.config.tp_degree == 0
                     for attr in ["gate_proj", "up_proj"]

diff --git a/optimum/neuron/utils/cache_utils.py b/optimum/neuron/utils/cache_utils.py
@@ -62,7 +62,7 @@
 
 
 def load_custom_cache_repo_name_from_hf_home(
-    hf_home_cache_repo_file: Union[str, Path] = HF_HOME_CACHE_REPO_FILE
+    hf_home_cache_repo_file: Union[str, Path] = HF_HOME_CACHE_REPO_FILE,
 ) -> Optional[str]:
     if Path(hf_home_cache_repo_file).exists():
         with open(hf_home_cache_repo_file, "r") as fp:

diff --git a/optimum/neuron/utils/peft_utils.py b/optimum/neuron/utils/peft_utils.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Utilities related to the PEFT library and support."""
+
 import collections
 import functools
 import os

diff --git a/optimum/neuron/utils/version_utils.py b/optimum/neuron/utils/version_utils.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Version utilities."""
+
 from typing import Optional
 
 from packaging import version
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,6 +15,7 @@ @@
     """
     A subclass of `Trainer` specific to Question-Answering tasks
     """
     import math
     import time
@@ Expand Down @@