config.json 485 B

123456789101112131415161718192021222324252627
  1. {
  2. "model": {
  3. "base_model": "Qwen/Qwen3-0.6B"
  4. },
  5. "data": {
  6. "max_samples": 100
  7. },
  8. "sft": {
  9. "output_dir": "./models/sft_model",
  10. "num_epochs": 2,
  11. "batch_size": 4
  12. },
  13. "grpo": {
  14. "output_dir": "./models/grpo_model",
  15. "num_epochs": 2,
  16. "batch_size": 2
  17. },
  18. "eval": {
  19. "max_samples": 20,
  20. "sft_accuracy_threshold": 0.4
  21. },
  22. "monitoring": {
  23. "use_wandb": false,
  24. "use_tensorboard": true,
  25. "wandb_project": "agentic-rl-pipeline"
  26. }
  27. }