-
-
Notifications
You must be signed in to change notification settings - Fork 88
/
Copy pathdefaults.yaml
146 lines (127 loc) · 6.09 KB
/
defaults.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# This default config file shows all arguments.
# NOTE: If your custom config file doesn't have all of following arguments (not including the advanced FL methods'),
# the defaults can be found in src/utils/constants.py
# The method name must be identical to the file name in src/server/<method>.py
method: fedavg
dataset:
# [mnist, cifar10, cifar100, emnist, fmnist, femnist, medmnist,
# medmnistA, medmnistC, covid19, celeba, synthetic, svhn,
# tiny_imagenet, cinic10, domain]
name: mnist
model:
name: lenet5
# Whether to use torchvision integrated model weights.
# Has no effect if model is lenet5, 2nn or fedavgcnn
use_torchvision_pretrained_weights: true
# The model parameters `.pt` file relative path to the directory of FL-bench.
# This feature is enabled only when `all_model_params_personalized=False`,
# which is defined and fixed by each FL method.
external_model_weights_path: null
# The learning rate scheduler that used for client local training.
# Can be null if no lr_scheduler is needed.
lr_scheduler:
name: null # [null, step, cosine, constant, plateau]
step_size: 10 # step
gamma: 0.1 # [step, plateau]
T_max: 10 # cosine
eta_min: 0 # cosine
factor: 0.3334 # [constant, plateau]
total_iters: 5 # constant
mode: min # plataeu
patience: 10 # plateau
threshold: 1.0e-4 # plateau
threshold_mode: rel # plateau
cooldown: 0 # plateau
min_lr: 0 # plateau
eps: 1.0e-8 # plateau
last_epoch: -1
# The optimizer that used for client local training.
optimizer:
name: sgd # [sgd, adam, adamw, rmsprop, adagrad]
lr: 0.01
dampening: 0 # for SGD
weight_decay: 0
momentum: 0 # for [SGD, RMSprop]
alpha: 0.99 # for RMSprop
nesterov: false # for SGD
betas: [0.9, 0.999] # for [Adam, AdamW]
amsgrad: false # for [Adam, AdamW]
mode: serial # [serial, parallel]
# It's fine to keep these configs. if mode is 'serial', these configs will be ignored.
parallel:
# The IP address of the selected ray cluster.
# Default as null, which means if there is no existing ray cluster,
# then Ray will create a new cluster at the beginning of the experiment
# and destroy it at the end.
# More details can be found in https://docs.ray.io/en/latest/ray-core/api/doc/ray.init.html.
ray_cluster_addr: null # [null, auto, local]
# The amount of computational resources you allocate for your Ray cluster.
# Default as null for all.
num_cpus: null
num_gpus: null
# Should be set larger than 1, or training mode fallback to `serial`
# Set a larger `num_workers` can further boost efficiency,
# but also increases the computational overhead.
num_workers: 2
common:
seed: 42 # Random seed of the run.
join_ratio: 0.1 # Ratio for (client each round) / (client num in total).
global_epoch: 100 # Number of global epochs, also called communication round.
local_epoch: 5 # Number if epochs of client local training.
batch_size: 32 # Data batch size for client local training.
reset_optimizer_on_global_epoch: true # Whether to reset optimizer on each global epoch.
# The ratio of stragglers (set in [0, 1]).
# Stragglers would not perform full-epoch local training as normal clients.
# Their local epoch would be randomly selected from range [straggler_min_local_epoch, local_epoch).
straggler_ratio: 0
straggler_min_local_epoch: 0
# How to deal with parameter buffers (in model.buffers()) of each client model.
# global (default): buffers will be aggregated like other model parameters.
# local: clients' buffers are isolated.
# drop: clients will drop their buffers after training done.
buffers: global # [local, global, drop]
# Whether to evaluate client local models (that before and after local training) on client side.
# You can deactivating this for acclerating training.
# NOTE: deactivate this feature will affect features like logging and monitoring.
client_side_evaluation: true
# The evaluation settings for client side and server side.
test:
# For example, Set client.test as true to evaluate on local testsets from selected clients with client local models.
# Frequency is set by `client.interval`. Negative value for disabling.
client:
interval: 100
finetune_epoch: 0 # Number of epochs for clients fine-tunning their models before testing.
train: false
val: false
test: true
# For example, set server.test as true to evaluate on a centralized testset (created by aggregating all clients' local testsets)
# with the updated global model at the end of a communication round.
# Frequency is set by `server.interval`. Negative value for disabling.
# NOTE: If clients have personalized model parameters like local buffers or classifiers, centralized evaluation for the global model is disabled.
server:
interval: -1
train: false
val: false
test: false
# Whether to evaluate the global model in train mode or eval mode.
# Evaluating in train mode allows to get better batchnorm statistics, but is dependent on the order of the data.
model_in_train_mode: false
verbose_gap: 10 # Interval round of displaying clients training performance on terminal.
monitor: null # [null, visdom, tensorboard]
use_cuda: true # Whether to use cuda for training.
save_log: true # Whether to save log files in out/<method>/<start_time>.
save_model: false # Whether to save model weights (*.pt) in out/<method>/<start_time>.
save_learning_curve_plot: true # Whether to save learning curve figure (*.png) in out/<method>/<start_time>.
save_metrics: true # Whether to save metrics (*.csv) in out/<method>/<start_time>.
# Whether to delete output files after user press `Ctrl + C`,
# which indicates that the run is removable.
delete_useless_run: true
# You can set specific arguments for advanced FL methods also.
# FL-bench uses FL method arguments by args.<method>.<arg>.
# You need to follow the key set in `get_hyperparams()` in class <method>Server, src/server/<method>.py
# FL-bench will ignore these arguments if they are not supported by the selected method,
# e.g., if you are running FedProx, then pFedSim arguments will be ignored.
fedprox:
mu: 0.01
pfedsim:
warmup_round: 0.5