config/defaults.yaml

# This default config file shows all arguments.
# NOTE: If your custom config file doesn't have all of following arguments (not including the advanced FL methods'),
# the defaults can be found in src/utils/constants.py

# The method name must be identical to the file name in src/server/<method>.py
method: fedavg

dataset:
  # [mnist, cifar10, cifar100, emnist, fmnist, femnist, medmnist,
  # medmnistA, medmnistC, covid19, celeba, synthetic, svhn,
  # tiny_imagenet, cinic10, domain]
  name: mnist

model:
  name: lenet5

  # Whether to use torchvision integrated model weights.
  # Has no effect if model is lenet5, 2nn or fedavgcnn
  use_torchvision_pretrained_weights: true

  # The model parameters `.pt` file relative path to the directory of FL-bench.
  # This feature is enabled only when `all_model_params_personalized=False`,
  # which is defined and fixed by each FL method.
  external_model_weights_path: null

# The learning rate scheduler that used for client local training.
# Can be null if no lr_scheduler is needed.
lr_scheduler:
  name: null # [null, step, cosine, constant, plateau]
  step_size: 10 # step
  gamma: 0.1 # [step, plateau]
  T_max: 10 # cosine
  eta_min: 0 # cosine
  factor: 0.3334 # [constant, plateau]
  total_iters: 5 # constant
  mode: min # plataeu
  patience: 10 # plateau
  threshold: 1.0e-4 # plateau
  threshold_mode: rel # plateau
  cooldown: 0 # plateau
  min_lr: 0 # plateau
  eps: 1.0e-8 # plateau
  last_epoch: -1

# The optimizer that used for client local training.
optimizer:
  name: sgd # [sgd, adam, adamw, rmsprop, adagrad]
  lr: 0.01
  dampening: 0 # for SGD
  weight_decay: 0
  momentum: 0 # for [SGD, RMSprop]
  alpha: 0.99 # for RMSprop
  nesterov: false # for SGD
  betas: [0.9, 0.999] # for [Adam, AdamW]
  amsgrad: false # for [Adam, AdamW]

mode: serial # [serial, parallel]
# It's fine to keep these configs. if mode is 'serial', these configs will be ignored.
parallel:
  # The IP address of the selected ray cluster.
  # Default as null, which means if there is no existing ray cluster,
  # then Ray will create a new cluster at the beginning of the experiment
  # and destroy it at the end.
  # More details can be found in https://docs.ray.io/en/latest/ray-core/api/doc/ray.init.html.
  ray_cluster_addr: null # [null, auto, local]

  # The amount of computational resources you allocate for your Ray cluster.
  # Default as null for all.
  num_cpus: null
  num_gpus: null

  # Should be set larger than 1, or training mode fallback to `serial`
  # Set a larger `num_workers` can further boost efficiency,
  # but also increases the computational overhead.
  num_workers: 2

common:
  seed: 42 # Random seed of the run.
  join_ratio: 0.1 # Ratio for (client each round) / (client num in total).
  global_epoch: 100 # Number of global epochs, also called communication round.
  local_epoch: 5 # Number if epochs of client local training.
  batch_size: 32 # Data batch size for client local training.
  reset_optimizer_on_global_epoch: true # Whether to reset optimizer on each global epoch.

  # The ratio of stragglers (set in [0, 1]).
  # Stragglers would not perform full-epoch local training as normal clients.
  # Their local epoch would be randomly selected from range [straggler_min_local_epoch, local_epoch).
  straggler_ratio: 0
  straggler_min_local_epoch: 0

  # How to deal with parameter buffers (in model.buffers()) of each client model.
  # global (default): buffers will be aggregated like other model parameters.
  # local: clients' buffers are isolated.
  # drop: clients will drop their buffers after training done.
  buffers: global # [local, global, drop]

  # Whether to evaluate client local models (that before and after local training) on client side.
  # You can deactivating this for acclerating training.
  # NOTE: deactivate this feature will affect features like logging and monitoring.
  client_side_evaluation: true

  # The evaluation settings for client side and server side.
  test:
    # For example, Set client.test as true to evaluate on local testsets from selected clients with client local models.
    # Frequency is set by `client.interval`. Negative value for disabling.
    client:
      interval: 100
      finetune_epoch: 0 # Number of epochs for clients fine-tunning their models before testing.
      train: false
      val: false
      test: true
    # For example, set server.test as true to evaluate on a centralized testset (created by aggregating all clients' local testsets)
    # with the updated global model at the end of a communication round.
    # Frequency is set by `server.interval`. Negative value for disabling.
    # NOTE: If clients have personalized model parameters like local buffers or classifiers, centralized evaluation for the global model is disabled.
    server:
      interval: -1
      train: false
      val: false
      test: false
      # Whether to evaluate the global model in train mode or eval mode.
      # Evaluating in train mode allows to get better batchnorm statistics, but is dependent on the order of the data.
      model_in_train_mode: false

  verbose_gap: 10 # Interval round of displaying clients training performance on terminal.
  monitor: null # [null, visdom, tensorboard]
  use_cuda: true # Whether to use cuda for training.

  save_log: true # Whether to save log files in out/<method>/<start_time>.
  save_model: false # Whether to save model weights (*.pt) in out/<method>/<start_time>.
  save_learning_curve_plot: true # Whether to save learning curve figure (*.png) in out/<method>/<start_time>.
  save_metrics: true # Whether to save metrics (*.csv) in out/<method>/<start_time>.

  # Whether to delete output files after user press `Ctrl + C`,
  # which indicates that the run is removable.
  delete_useless_run: true

# You can set specific arguments for advanced FL methods also.
# FL-bench uses FL method arguments by args.<method>.<arg>.
# You need to follow the key set in `get_hyperparams()` in class <method>Server, src/server/<method>.py
# FL-bench will ignore these arguments if they are not supported by the selected method,
# e.g., if you are running FedProx, then pFedSim arguments will be ignored.
fedprox:
  mu: 0.01
pfedsim:
  warmup_round: 0.5