Skip to content

Commit

Permalink
add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
tastelikefeet committed Oct 31, 2024
1 parent eda5119 commit 2f43f86
Show file tree
Hide file tree
Showing 16 changed files with 361 additions and 10 deletions.
5 changes: 0 additions & 5 deletions swift/cli/app_ui.py

This file was deleted.

1 change: 0 additions & 1 deletion swift/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
'pt': 'swift.cli.pt',
'sft': 'swift.cli.sft',
'infer': 'swift.cli.infer',
'app-ui': 'swift.cli.app_ui',
'merge-lora': 'swift.cli.merge_lora',
'web-ui': 'swift.cli.web_ui',
'deploy': 'swift.cli.deploy',
Expand Down
20 changes: 20 additions & 0 deletions swift/hub/hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ class HubOperation:

@classmethod
def try_login(cls, token: Optional[str] = None) -> bool:
"""Try to login to the hub
Args:
token: The hub token to use
Returns:
bool: Whether login is successful
"""
raise NotImplementedError

@classmethod
Expand Down Expand Up @@ -103,6 +111,18 @@ def download_model(cls,


def create_repo(repo_id: str, *, token: Union[str, bool, None] = None, private: bool = False, **kwargs) -> RepoUrl:
"""
Create a new repository on the hub.
Args:
repo_id: The ID of the repository to create.
token: The authentication token to use.
private: Whether the repository should be private.
**kwargs: Additional arguments.
Returns:
RepoUrl: The URL of the created repository.
"""
hub_model_id = MSHub.create_model_repo(repo_id, token, private)
return RepoUrl(url=hub_model_id, )

Expand Down
17 changes: 17 additions & 0 deletions swift/llm/argument/base_args/base_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,23 @@

@dataclass
class BaseArguments(ModelArguments, TemplateArguments, QuantizeArguments, GenerationArguments, DataArguments):
"""
BaseArguments class is a dataclass that inherits from multiple argument classes:
ModelArguments, TemplateArguments, QuantizeArguments, GenerationArguments, and DataArguments.
Attributes:
seed (int): Random seed for reproducibility. Default is 42.
load_args (bool): Flag to determine if arguments should be loaded from sft_args.json. Default is True.
load_dataset_config (bool): Flag to determine if dataset configuration should be loaded. Default is False.
save_safetensors (bool): Flag to determine if save to safetensors. Default is True.
hub_token (Optional[str]): SDK token for authentication. Default is None.
gpu_memory_fraction (Optional[float]): Fraction of GPU memory to be used. Default is None.
ignore_args_error (bool): Flag to ignore argument errors for notebook compatibility. Default is False.
Methods:
__post_init__: Initializes the class and loads/saves arguments.
_load_args: Loads specific attributes from sft_args.json.
"""
seed: int = 42
load_args: bool = True
load_dataset_config: bool = False
Expand Down
24 changes: 24 additions & 0 deletions swift/llm/argument/base_args/data_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,30 @@

@dataclass
class DataArguments:
"""
DataArguments class is a dataclass that holds various arguments related to dataset handling and processing.
Attributes:
dataset (List[str]): List of dataset identifiers or paths.
val_dataset (List[str]): List of validation dataset identifiers or paths.
split_dataset_ratio (float): Ratio to split the dataset for validation if val_dataset is empty. Default is 0.01.
dataset_seed (Optional[int]): Seed for dataset shuffling. Default is None.
num_proc (int): Number of processes to use for data loading and preprocessing. Default is 1.
load_from_cache_file (bool): Flag to load dataset from cache file. Default is False.
download_mode (Literal): Mode for downloading datasets. Default is 'reuse_dataset_if_exists'.
model_name (List[str]): List containing Chinese and English names of the model. Default is [None, None].
model_author (List[str]): List containing Chinese and English names of the model author. Default is [None, None].
streaming (bool): Flag to enable streaming of datasets. Default is False.
streaming_val_size (int): Size of the validation set when streaming. Default is 0.
streaming_buffer_size (int): Buffer size for streaming. Default is 16384.
custom_register_path (Optional[str]): Path to custom .py file for dataset registration. Default is None.
custom_dataset_info (Optional[str]): Path to custom dataset_info.json file. Default is None.
Methods:
_init_custom_register: Registers a custom .py file to datasets.
_init_custom_dataset_info: Registers a custom dataset_info.json file to datasets.
__post_init__: Initializes the class and sets up custom dataset registration and information.
"""
# dataset_id or dataset_name or dataset_path or ...
dataset: List[str] = field(
default_factory=list, metadata={'help': f'dataset choices: {list(DATASET_MAPPING.keys())}'})
Expand Down
17 changes: 17 additions & 0 deletions swift/llm/argument/base_args/generation_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,23 @@

@dataclass
class GenerationArguments:
"""
GenerationArguments class is a dataclass that holds various arguments related to text generation.
Attributes:
max_new_tokens (Optional[int]): Maximum number of new tokens to generate. Default is None (unlimited).
do_sample (Optional[bool]): Flag to enable sampling during generation. Default is None.
temperature (Optional[float]): Sampling temperature. Default is None.
top_k (Optional[int]): Top-k sampling parameter. Default is None.
top_p (Optional[float]): Top-p (nucleus) sampling parameter. Default is None.
repetition_penalty (Optional[float]): Penalty for repeated tokens. Default is None.
num_beams (int): Number of beams for beam search. Default is 1.
stop_words (List[str]): List of stop words to end generation. Default is an empty list.
Methods:
_handle_do_sample: Adjusts arguments based on the do_sample flag.
__post_init__: Initializes the class and handles the do_sample flag.
"""

# generation config
max_new_tokens: Optional[int] = None # Unlimited, constrained by max_model_len.
Expand Down
21 changes: 21 additions & 0 deletions swift/llm/argument/base_args/model_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,27 @@

@dataclass
class ModelArguments:
"""
ModelArguments class is a dataclass that holds various arguments related to model configuration and usage.
Attributes:
model (Optional[str]): Model identifier or path. Default is None.
model_type (Optional[str]): Type of the model group. Default is None.
model_revision (Optional[str]): Revision of the model. Default is None.
use_hf (bool): Flag to indicate if Hugging Face model should be used. Default is False, Meaning use modelscope.
torch_dtype (Literal): Model data type. Default is None.
attn_impl (Literal): Attention implementation to use. Default is None.
model_kwargs (Optional[str]): Additional keyword arguments for the model. Default is None.
rope_scaling (Literal): Type of rope scaling to use. Default is None.
device_map_config (Optional[str]): Configuration for device mapping. Default is None.
device_max_memory (List[str]): List of maximum memory for each CUDA device. Default is an empty list.
local_repo_path (Optional[str]): Path to the local repository for model code. Default is None.
Methods:
parse_to_dict: Converts a JSON string or JSON file into a dictionary.
_init_model_kwargs: Prepares model keyword arguments and sets them to the environment.
_init_device_map_config: Prepares device map arguments.
"""
model: Optional[str] = None # model id or model path
model_type: Optional[str] = field(
default=None, metadata={'help': f'model_type choices: {list(MODEL_MAPPING.keys())}'})
Expand Down
15 changes: 15 additions & 0 deletions swift/llm/argument/base_args/quant_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,21 @@

@dataclass
class QuantizeArguments:
"""
QuantizeArguments is a dataclass that holds the configuration for model quantization.
Attributes:
quant_method (Literal['bnb', 'hqq', 'eetq', 'awq', 'gptq']): The quantization method to be used.
quant_bits (Literal[0, 1, 2, 3, 4, 8]): The number of bits to use for quantization.
hqq_axis (Optional[int]): The axis for hqq quantization.
bnb_4bit_compute_dtype (Literal['float16', 'bfloat16', 'float32', None]): The compute dtype for bnb 4-bit quantization.
bnb_4bit_quant_type (Literal['fp4', 'nf4']): The quantization type for bnb 4-bit quantization.
bnb_4bit_use_double_quant (bool): Whether to use double quantization for bnb 4-bit quantization.
Methods:
_init_quantization_config(): Initializes the quantization configuration based on the quant_method and quant_bits.
__post_init__(): Post-initialization method to set default values and initialize the quantization configuration.
"""
# awq, gptq, and aqlm need to be pre-quantized models,
# while bnb, hqq, and eetq can be quantized during SFT using the original models.
quant_method: Literal['bnb', 'hqq', 'eetq', 'awq', 'gptq'] = None
Expand Down
16 changes: 16 additions & 0 deletions swift/llm/argument/base_args/template_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,22 @@

@dataclass
class TemplateArguments:
"""
TemplateArguments class is a dataclass that holds various arguments related to template configuration and usage.
Attributes:
template (Optional[str]): Template identifier. Default is None, meaning use the template of the model_type.
system (Optional[str]): Override the default system in the template. Default is None.
max_length (Optional[int]): Maximum length for the template. Default is None.
truncation_strategy (Literal): Strategy for truncating the template. Default is 'truncation_left'.
tools_prompt (str): Override the default tools prompt in the template. Default is 'react_en'.
max_pixels (Optional[int]): Maximum number of pixels for the template. Default is None.
loss_scale (str): Loss scale for training. Default is 'default', meaning only calculate the loss of the response.
sequence_parallel_size (int): Size of sequence parallelism. Default is 1.
Methods:
__post_init__: Initializes the class and sets up the template based on model metadata.
"""
template: Optional[str] = field(
default=None, metadata={'help': f'template choices: {list(TEMPLATE_MAPPING.keys())}'})
system: Optional[str] = None # Override the default_system in the template.
Expand Down
15 changes: 15 additions & 0 deletions swift/llm/argument/deploy_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,21 @@

@dataclass
class DeployArguments(InferArguments):
"""
DeployArguments is a dataclass that extends InferArguments and is used to define
the arguments required for deploying a model.
Attributes:
host (str): The host address to bind the server to. Default is '0.0.0.0'.
port (int): The port number to bind the server to. Default is 8000.
api_key (Optional[str]): The API key for authentication. Default is None.
ssl_keyfile (Optional[str]): The path to the SSL key file. Default is None.
ssl_certfile (Optional[str]): The path to the SSL certificate file. Default is None.
owned_by (str): The owner of the deployment. Default is 'swift'.
served_model_name (Optional[str]): The name of the model being served. Default is None.
verbose (bool): Whether to log request information. Default is True.
log_interval (int): The interval for printing global statistics. Default is 10.
"""
host: str = '0.0.0.0'
port: int = 8000
api_key: Optional[str] = None
Expand Down
25 changes: 25 additions & 0 deletions swift/llm/argument/eval_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,31 @@

@dataclass
class EvalArguments(InferArguments):
"""
EvalArguments is a dataclass that extends InferArguments and is used to define
the arguments required for evaluating a model.
Attributes:
eval_dataset (List[str]): List of evaluation datasets. Default is an empty list.
eval_few_shot (Optional[int]): Number of few-shot examples for evaluation. Default is None.
eval_limit (Optional[str]): Limit number of each evaluation dataset. Default is None.
name (str): Name of the evaluation. Default is an empty string.
eval_url (Optional[str]): URL for evaluation, only useful when evaluating an OpenAI URL. Default is None.
eval_token (str): Token for evaluation an url. Default is 'EMPTY'.
eval_is_chat_model (Optional[bool]): Flag to indicate if the model is a chat model or a generate model. Default is None.
custom_eval_config (Optional[str]): Path to custom evaluation configuration. This is used when evaluating a custom dataset. Default is None.
eval_use_cache (bool): Flag to indicate if cache should be used. Default is False.
eval_output_dir (str): Directory to store evaluation outputs. Default is 'eval_outputs'.
eval_backend (Literal): Backend to use for evaluation. Default is 'OpenCompass'.
eval_batch_size (int): Batch size for evaluation. Default is 8.
deploy_timeout (int): Timeout for deployment. Default is 60.
do_sample (bool): Flag to indicate if sampling should be done. Default is False.
temperature (float): Temperature for sampling. Default is 0.
eval_nproc (int): Number of processes to use for evaluation. Default is 16. Reduce it when your evaluation timeout.
Methods:
__post_init__: Initializes the class and sets up the evaluation dataset and model type.
"""

eval_dataset: List[str] = field(default_factory=list)
eval_few_shot: Optional[int] = None
Expand Down
21 changes: 21 additions & 0 deletions swift/llm/argument/export_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,27 @@

@dataclass
class ExportArguments(BaseArguments, MergeArguments):
"""
ExportArguments is a dataclass that inherits from BaseArguments and MergeArguments.
Attributes:
output_dir (Optional[str]): Directory where the output will be saved.
to_peft_format (bool): Flag to indicate if the output should be in PEFT format. This argument is useless for now.
quant_n_samples (int): Number of samples for quantization.
quant_seqlen (int): Sequence length for quantization.
quant_device_map (str): Device map for quantization, e.g., 'cpu', 'auto'.
quant_batch_size (int): Batch size for quantization.
to_ollama (bool): Flag to indicate export model to ollama format.
gguf_file (Optional[str]): Path to the GGUF file when exporting to ollama format.
push_to_hub (bool): Flag to indicate if the output should be pushed to the model hub.
hub_model_id (Optional[str]): Model ID for the hub.
hub_private_repo (bool): Flag to indicate if the hub repository is private.
commit_message (str): Commit message for pushing to the hub.
to_megatron (bool): Flag to indicate export model to megatron format.
to_hf (bool): Flag to indicate export model to hugging face format.
tp (int): Tensor parallelism degree.
pp (int): Pipeline parallelism degree.
"""
output_dir: Optional[str] = None

to_peft_format: bool = False
Expand Down
43 changes: 43 additions & 0 deletions swift/llm/argument/infer_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@

@dataclass
class LmdeployArguments:
"""
LmdeployArguments is a dataclass that holds the configuration for lmdeploy.
Attributes:
tp (int): Tensor parallelism degree. Default is 1.
cache_max_entry_count (float): Maximum entry count for cache. Default is 0.8.
quant_policy (int): Quantization policy, e.g., 4, 8. Default is 0.
vision_batch_size (int): Maximum batch size in VisionConfig. Default is 1.
"""

# lmdeploy
tp: int = 1
cache_max_entry_count: float = 0.8
Expand All @@ -27,6 +37,21 @@ class LmdeployArguments:

@dataclass
class VllmArguments:
"""
VllmArguments is a dataclass that holds the configuration for vllm.
Attributes:
gpu_memory_utilization (float): GPU memory utilization. Default is 0.9.
tensor_parallel_size (int): Tensor parallelism size. Default is 1.
max_num_seqs (int): Maximum number of sequences. Default is 256.
max_model_len (Optional[int]): Maximum model length. Default is None.
disable_custom_all_reduce (bool): Flag to disable custom all-reduce. Default is True.
enforce_eager (bool): Flag to enforce eager execution. Default is False.
limit_mm_per_prompt (Optional[str]): Limit multimedia per prompt. Default is None.
vllm_max_lora_rank (int): Maximum LoRA rank. Default is 16.
lora_modules (List[str]): List of LoRA modules. Default is an empty list.
max_logprobs (int): Maximum log probabilities. Default is 20.
"""
# vllm
gpu_memory_utilization: float = 0.9
tensor_parallel_size: int = 1
Expand All @@ -46,6 +71,24 @@ def __post_init__(self):

@dataclass
class InferArguments(BaseArguments, MergeArguments, VllmArguments, LmdeployArguments):
"""
InferArguments is a dataclass that extends BaseArguments, MergeArguments, VllmArguments, and LmdeployArguments.
It is used to define the arguments required for model inference.
Attributes:
infer_backend (Literal): Backend to use for inference. Default is 'pt'. Allowed values are 'vllm', 'pt', 'lmdeploy'.
ckpt_dir (Optional[str]): Directory to the checkpoint. Default is None.
max_batch_size (int): Maximum batch size for the pt engine. Default is 16.
val_dataset_sample (Optional[int]): Sample size for validation dataset. Default is None.
result_dir (Optional[str]): Directory to store inference results. Default is None.
save_result (bool): Flag to indicate if results should be saved. Default is True.
stream (Optional[bool]): Flag to indicate if streaming should be enabled. Default is None.
Methods:
_init_result_dir: Initializes the result directory.
_init_stream: Initializes the stream settings.
__post_init__: Post-initialization method to set default values and initialize configurations.
"""
infer_backend: Literal['vllm', 'pt', 'lmdeploy'] = 'pt'
ckpt_dir: Optional[str] = field(default=None, metadata={'help': '/path/to/your/vx-xxx/checkpoint-xxx'})
max_batch_size: int = 16 # for pt engine
Expand Down
10 changes: 10 additions & 0 deletions swift/llm/argument/merge_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@

@dataclass
class MergeArguments:
"""
MergeArguments is a dataclass that holds configuration for merging models.
Attributes:
merge_lora (bool): Flag to indicate if LoRA merging is enabled. Default is False.
merge_device_map (str): Device map configuration for merging. Default is 'auto'.
use_merge_kit (bool): Flag to indicate merge with `mergekit`. Default is False.
instruct_model_id_or_path (Optional[str]): Path or ID of the instruct model. Use when `use_merge_kit` is True.
instruct_model_revision (Optional[str]): Revision of the instruct model. Use when `use_merge_kit` is True.
"""
merge_lora: bool = False
merge_device_map: str = 'auto'

Expand Down
Loading

0 comments on commit 2f43f86

Please sign in to comment.