- compute_environment: LOCAL_MACHINE
- distributed_type: DEEPSPEED
- num_processes: 4
- machine_rank: 0
- num_machines: 1
- gpu_ids: all
- mixed_precision: fp16
- deepspeed_config:
- gradient_accumulation_steps: 4
- gradient_clipping: 1.0
- offload_optimizer_device: cpu # 优化器状态卸载到CPU
- offload_param_device: cpu # 参数卸载到CPU
- zero3_init_flag: true
- zero_stage: 3 # ZeRO-3
|