| 123456789101112131415161718192021222324252627 |
- {
- "model": {
- "base_model": "Qwen/Qwen3-0.6B"
- },
- "data": {
- "max_samples": 100
- },
- "sft": {
- "output_dir": "./models/sft_model",
- "num_epochs": 2,
- "batch_size": 4
- },
- "grpo": {
- "output_dir": "./models/grpo_model",
- "num_epochs": 2,
- "batch_size": 2
- },
- "eval": {
- "max_samples": 20,
- "sft_accuracy_threshold": 0.4
- },
- "monitoring": {
- "use_wandb": false,
- "use_tensorboard": true,
- "wandb_project": "agentic-rl-pipeline"
- }
- }
|