| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461 |
- """
- AIME数学题目生成器
- 使用HelloAgents框架生成AIME风格的数学题目
- """
- import json
- import os
- import time
- import random
- from typing import List, Dict, Any, Optional
- from datetime import datetime
- from tqdm import tqdm
- from hello_agents import SimpleAgent
- from hello_agents import HelloAgentsLLM
- from datasets import load_dataset
- class AIMEGenerator:
- """AIME题目生成器"""
-
- # AIME题目生成提示词(英文)
- GENERATION_PROMPT = """You are a professional mathematics competition problem designer, skilled in creating AIME (American Invitational Mathematics Examination) style problems.
- AIME Problem Characteristics:
- 1. Answer: An integer between 0 and 999
- 2. Topics: Algebra, Geometry, Number Theory, Combinatorics, Probability, etc.
- 3. Style: Requires multi-step reasoning, but no advanced theory
- 4. Difficulty: Medium to hard (similar to AIME problems 6-9)
- Please generate an AIME-style mathematics problem, including:
- 1. Problem statement (clear and complete)
- 2. Answer (an integer between 0 and 999)
- 3. Detailed solution (including all reasoning steps)
- 4. Topic classification (Algebra/Geometry/Number Theory/Combinatorics/Probability)
- Please output in the following JSON format, avoid using special escape characters in JSON:
- ```json
- {
- "problem": "Problem statement in English",
- "answer": 123,
- "solution": "Detailed solution steps in English",
- "topic": "Algebra"
- }
- ```
- """
-
- def __init__(
- self,
- llm: HelloAgentsLLM = None,
- delay_seconds: float = 1.0,
- use_reference_examples: bool = True,
- reference_dataset: str = "TianHongZXY/aime-1983-2025"
- ):
- """
- 初始化生成器
- Args:
- llm: LLM实例(可选)
- delay_seconds: 每次生成之间的延迟(秒),避免API速率限制
- use_reference_examples: 是否使用真题作为参考样例
- reference_dataset: 参考数据集名称,默认使用TianHongZXY/aime-1983-2025(900+道题)
- """
- # 如果没有提供llm,创建默认的HelloAgentsLLM
- if llm is None:
- self.llm = HelloAgentsLLM()
- else:
- self.llm = llm
- self.agent = SimpleAgent(
- name="AIME Generator",
- llm=self.llm,
- system_prompt="你是一位专业的数学竞赛题目设计专家。"
- )
- self.delay_seconds = delay_seconds
- self.use_reference_examples = use_reference_examples
- self.reference_examples = []
- # 加载参考样例
- if use_reference_examples:
- try:
- print(f"📚 加载AIME真题数据集: {reference_dataset}")
- # 尝试不同的split
- try:
- dataset = load_dataset(reference_dataset, split="train")
- except:
- dataset = load_dataset(reference_dataset, split="test")
- # 加载所有题目作为参考
- self.reference_examples = list(dataset)
- print(f" ✓ 已加载 {len(self.reference_examples)} 道参考题目")
- # 统计年份分布(如果有year字段)
- year_counts = {}
- for item in self.reference_examples:
- year = item.get('year')
- if year:
- year_counts[year] = year_counts.get(year, 0) + 1
- if year_counts:
- year_range = f"{min(year_counts.keys())}-{max(year_counts.keys())}"
- print(f" ℹ️ 年份范围: {year_range}")
- except Exception as e:
- print(f" ⚠️ 加载参考样例失败: {e}")
- print(f" ℹ️ 将使用默认提示词生成")
- self.use_reference_examples = False
-
- def generate_single(self, max_retries: int = 3) -> Dict[str, Any]:
- """
- 生成单个题目
- Args:
- max_retries: 最大重试次数
- Returns:
- 题目数据
- """
- # 构建提示词
- prompt = self._build_prompt()
- for attempt in range(max_retries):
- try:
- response = self.agent.run(prompt)
- return self._parse_response(response)
- except Exception as e:
- if attempt < max_retries - 1:
- tqdm.write(f"⚠️ 生成失败(尝试 {attempt + 1}/{max_retries}),{self.delay_seconds}秒后重试...")
- time.sleep(self.delay_seconds)
- else:
- tqdm.write(f"❌ 生成失败,已达最大重试次数: {e}")
- return self._get_default_problem()
- def _build_prompt(self) -> str:
- """构建生成提示词"""
- if not self.use_reference_examples or not self.reference_examples:
- return self.GENERATION_PROMPT
- # 随机选择一个参考样例
- example = random.choice(self.reference_examples)
- example_problem = example.get('problem', 'Example problem')
- example_answer = example.get('answer', 0)
- # 构建带参考样例的提示词(英文)
- prompt = f"""You are a professional mathematics competition problem designer, skilled in creating AIME (American Invitational Mathematics Examination) style problems.
- 【Reference Example】(For style reference only, please generate a completely different problem)
- Problem: {example_problem}
- Answer: {example_answer}
- AIME Problem Characteristics:
- 1. Answer: An integer between 0 and 999
- 2. Topics: Algebra, Geometry, Number Theory, Combinatorics, Probability, etc.
- 3. Style: Requires multi-step reasoning, but no advanced theory
- 4. Difficulty: Medium to hard (similar to AIME problems 6-9)
- Please generate a **completely different** AIME-style mathematics problem, including:
- 1. Problem statement (clear and complete, different from the reference)
- 2. Answer (an integer between 0 and 999, different from the reference)
- 3. Detailed solution (including all reasoning steps)
- 4. Topic classification (Algebra/Geometry/Number Theory/Combinatorics/Probability)
- Please output in the following JSON format, avoid using special escape characters in JSON:
- ```json
- {{
- "problem": "Problem statement in English",
- "answer": 123,
- "solution": "Detailed solution steps in English",
- "topic": "Algebra"
- }}
- ```
- Important Notes:
- - **Must generate a completely different problem from the reference**
- - You can reference the style, but do not copy the content
- - Ensure the problem is creative and original
- """
- return prompt
- def _parse_response(self, response: str) -> Dict[str, Any]:
- """解析LLM响应(支持LaTeX数学公式)"""
- import re
- # 提取JSON部分
- if "```json" in response:
- json_str = response.split("```json")[1].split("```")[0].strip()
- elif "```" in response:
- json_str = response.split("```")[1].split("```")[0].strip()
- else:
- json_str = response.strip()
- # 使用json.loads的strict=False来处理转义字符
- # 但这还不够,我们需要更智能的处理
- try:
- problem_data = json.loads(json_str)
- except json.JSONDecodeError as e:
- # 如果解析失败,尝试修复常见的LaTeX转义问题
- # 方法:先将字符串中的单个反斜杠替换为双反斜杠(但保留已经转义的)
- # 这样LaTeX的 \frac 会变成 \\frac,在JSON中是合法的
- # 使用正则表达式:找到所有未转义的反斜杠(不是\\的\)
- # 并将其替换为\\
- fixed_json_str = re.sub(r'(?<!\\)\\(?!["\\/bfnrtu])', r'\\\\', json_str)
- try:
- problem_data = json.loads(fixed_json_str)
- except json.JSONDecodeError:
- # 如果还是失败,打印错误信息并抛出
- print(f"❌ JSON解析失败:")
- print(f"原始响应: {response[:500]}...")
- print(f"提取的JSON: {json_str[:500]}...")
- raise
- # 验证必需字段
- if "problem" not in problem_data or "answer" not in problem_data:
- raise ValueError("缺少必需字段: problem 或 answer")
- # 验证答案范围
- answer = int(problem_data.get("answer", 0))
- if not (0 <= answer <= 999):
- print(f"⚠️ 答案超出范围: {answer},调整为0-999范围内")
- answer = max(0, min(999, answer))
- problem_data["answer"] = answer
- # 确保有默认值
- problem_data.setdefault("solution", "No solution provided")
- problem_data.setdefault("topic", "Uncategorized")
- return problem_data
- def _get_default_problem(self) -> Dict[str, Any]:
- """获取默认题目(生成失败时使用)"""
- return {
- "problem": "生成失败,请重新生成",
- "answer": 0,
- "solution": "N/A",
- "topic": "未知"
- }
-
- def generate_batch(
- self,
- num_problems: int = 30,
- checkpoint_path: str = None
- ) -> List[Dict[str, Any]]:
- """
- 批量生成题目
- Args:
- num_problems: 生成题目数量
- checkpoint_path: 检查点文件路径(用于保存进度)
- Returns:
- 题目列表
- """
- print(f"\n🎯 开始生成AIME题目")
- print(f" 目标数量: {num_problems}")
- print(f" 生成模型: {self.llm.model}")
- print(f" 延迟设置: {self.delay_seconds}秒/题")
- # 尝试从检查点恢复
- problems = []
- start_index = 0
- if checkpoint_path and os.path.exists(checkpoint_path):
- print(f"\n📂 发现检查点文件,尝试恢复...")
- try:
- with open(checkpoint_path, 'r', encoding='utf-8') as f:
- problems = json.load(f)
- start_index = len(problems)
- print(f" ✓ 已恢复 {start_index} 个题目,从第 {start_index + 1} 个继续")
- except Exception as e:
- print(f" ⚠️ 恢复失败: {e},从头开始")
- problems = []
- start_index = 0
- # 生成题目(使用tqdm显示进度)
- with tqdm(total=num_problems, initial=start_index, desc="生成AIME题目", unit="题") as pbar:
- last_call_time = 0 # 上次API调用的时间
- for i in range(start_index, num_problems):
- # 计算距离上次调用的时间
- if last_call_time > 0:
- elapsed = time.time() - last_call_time
- # 如果距离上次调用不足delay_seconds,则等待
- if elapsed < self.delay_seconds:
- wait_time = self.delay_seconds - elapsed
- tqdm.write(f"⏳ 等待 {wait_time:.1f} 秒以避免速率限制...")
- time.sleep(wait_time)
- # 记录开始时间
- start_time = time.time()
- # 生成题目
- problem = self.generate_single()
- problem["id"] = f"gen_aime_{i + 1}"
- problem["generated_at"] = datetime.now().isoformat()
- # 记录结束时间
- last_call_time = time.time()
- generation_time = last_call_time - start_time
- problems.append(problem)
- # 更新进度条描述
- pbar.set_postfix({
- "主题": problem.get('topic', 'N/A'),
- "答案": problem.get('answer', 'N/A'),
- "耗时": f"{generation_time:.1f}s"
- })
- pbar.update(1)
- # 保存检查点
- if checkpoint_path:
- try:
- with open(checkpoint_path, 'w', encoding='utf-8') as f:
- json.dump(problems, f, ensure_ascii=False, indent=2)
- except Exception as e:
- tqdm.write(f"⚠️ 保存检查点失败: {e}")
- print(f"\n✅ 生成完成!共 {len(problems)} 个题目")
- return problems
-
- def save_problems(
- self,
- problems: List[Dict[str, Any]],
- output_path: str
- ):
- """保存题目到文件"""
- # 确保目录存在
- os.makedirs(os.path.dirname(output_path), exist_ok=True)
-
- with open(output_path, 'w', encoding='utf-8') as f:
- json.dump(problems, f, ensure_ascii=False, indent=2)
-
- print(f"\n💾 题目已保存: {output_path}")
-
- def generate_and_save(
- self,
- num_problems: int = 30,
- output_dir: str = "data_generation/generated_data"
- ) -> str:
- """生成并保存题目"""
- # 创建输出目录
- os.makedirs(output_dir, exist_ok=True)
- # 清理旧的检查点文件
- for file in os.listdir(output_dir):
- if file.startswith("checkpoint_") and file.endswith(".json"):
- old_checkpoint = os.path.join(output_dir, file)
- try:
- os.remove(old_checkpoint)
- print(f"🗑️ 已删除旧检查点文件: {file}")
- except Exception as e:
- print(f"⚠️ 删除旧检查点失败: {e}")
- # 设置检查点路径
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
- checkpoint_path = os.path.join(output_dir, f"checkpoint_{timestamp}.json")
- # 生成题目(带检查点)
- problems = self.generate_batch(num_problems, checkpoint_path=checkpoint_path)
- # 保存题目
- output_path = os.path.join(output_dir, f"aime_generated_{timestamp}.json")
- self.save_problems(problems, output_path)
- # 生成统计报告
- self._generate_statistics_report(problems, output_dir, timestamp)
- # 删除检查点文件
- if os.path.exists(checkpoint_path):
- try:
- os.remove(checkpoint_path)
- print(f"\n🗑️ 已删除检查点文件")
- except Exception as e:
- print(f"\n⚠️ 删除检查点文件失败: {e}")
- return output_path
-
- def _generate_statistics_report(
- self,
- problems: List[Dict[str, Any]],
- output_dir: str,
- timestamp: str
- ):
- """生成统计报告"""
- # 统计主题分布
- topics = {}
- answers = []
- for problem in problems:
- topic = problem.get("topic", "未知")
- topics[topic] = topics.get(topic, 0) + 1
- if "answer" in problem:
- answers.append(problem["answer"])
-
- # 生成报告
- report = f"""# AIME题目生成统计报告
- ## 基本信息
- - **生成时间**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
- - **题目数量**: {len(problems)}
- ## 主题分布
- | 主题 | 数量 | 占比 |
- |------|------|------|
- """
-
- for topic, count in sorted(topics.items(), key=lambda x: x[1], reverse=True):
- percentage = count / len(problems) * 100
- report += f"| {topic} | {count} | {percentage:.1f}% |\n"
- if answers:
- report += f"""
- ## 答案分析
- - **平均答案**: {sum(answers) / len(answers):.2f}
- - **最小答案**: {min(answers)}
- - **最大答案**: {max(answers)}
- - **答案范围**: {min(answers)}-{max(answers)}
- """
-
- report += f"""
- ## 题目列表
- | ID | 主题 | 答案 |
- |-----|------|------|
- """
- for problem in problems[:10]: # 只显示前10个
- report += f"| {problem.get('id', 'N/A')} | {problem.get('topic', 'N/A')} | {problem.get('answer', 'N/A')} |\n"
-
- if len(problems) > 10:
- report += f"\n*(仅显示前10个题目,完整列表请查看JSON文件)*\n"
-
- report += f"""
- ---
- *报告生成时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}*
- """
-
- # 保存报告
- report_path = os.path.join(output_dir, f"generation_report_{timestamp}.md")
- with open(report_path, 'w', encoding='utf-8') as f:
- f.write(report)
-
- print(f"📊 统计报告已保存: {report_path}")
- if __name__ == "__main__":
- # 创建生成器
- generator = AIMEGenerator()
-
- # 生成30个题目
- output_path = generator.generate_and_save(num_problems=30)
-
- print(f"\n✅ 完成!生成的题目保存在: {output_path}")
|