compute_environment: LOCAL_MACHINE distributed_type: DEEPSPEED num_processes: 4 machine_rank: 0 num_machines: 1 gpu_ids: all mixed_precision: fp16 deepspeed_config: gradient_accumulation_steps: 4 gradient_clipping: 1.0 offload_optimizer_device: cpu # 优化器状态卸载到CPU offload_param_device: cpu # 参数卸载到CPU zero3_init_flag: true zero_stage: 3 # ZeRO-3