moki
/
hello-agents
zrcadlo https://github.com/datawhalechina/hello-agents.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
							"""
人工验证界面

使用Gradio创建Web界面，用于人工验证生成的AIME题目
"""

import json
import os
from typing import List, Dict, Any, Tuple
from datetime import datetime
import gradio as gr


class HumanVerificationUI:
    """人工验证界面"""
    
    def __init__(self, data_path: str):
        """
        初始化验证界面
        
        Args:
            data_path: 生成数据的JSON文件路径
        """
        self.data_path = data_path
        self.problems = self._load_problems()
        self.current_index = 0
        self.verifications = self._load_verifications()
        
    def _load_problems(self) -> List[Dict[str, Any]]:
        """加载题目数据"""
        if not os.path.exists(self.data_path):
            raise FileNotFoundError(f"数据文件不存在: {self.data_path}")
        
        with open(self.data_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    
    def _load_verifications(self) -> Dict[str, Any]:
        """加载已有的验证结果"""
        verification_path = self.data_path.replace(".json", "_verifications.json")
        
        if os.path.exists(verification_path):
            with open(verification_path, 'r', encoding='utf-8') as f:
                return json.load(f)
        
        return {}
    
    def _save_verifications(self):
        """保存验证结果"""
        verification_path = self.data_path.replace(".json", "_verifications.json")
        
        with open(verification_path, 'w', encoding='utf-8') as f:
            json.dump(self.verifications, f, ensure_ascii=False, indent=2)
    
    def get_current_problem(self) -> Tuple[str, str, str, str, str, str]:
        """获取当前题目信息"""
        if not self.problems:
            return "无题目", "", "", "", "", "0/0"

        problem = self.problems[self.current_index]
        problem_id = problem.get("id", "unknown")

        # 获取已有的验证信息
        verification = self.verifications.get(problem_id, {})

        return (
            f"题目 {self.current_index + 1}/{len(self.problems)}",
            problem.get("problem", ""),
            f"答案: {problem.get('answer', 'N/A')}",
            problem.get("solution", ""),
            f"主题: {problem.get('topic', 'N/A')}",
            verification.get("comments", "")
        )
    
    def verify_problem(
        self,
        correctness: int,
        clarity: int,
        difficulty_match: int,
        completeness: int,
        status: str,
        comments: str
    ) -> str:
        """
        验证当前题目
        
        Args:
            correctness: 正确性评分 (1-5)
            clarity: 清晰度评分 (1-5)
            difficulty_match: 难度匹配评分 (1-5)
            completeness: 完整性评分 (1-5)
            status: 验证状态 (approved/rejected/needs_revision)
            comments: 评论
        
        Returns:
            验证结果消息
        """
        if not self.problems:
            return "❌ 无题目可验证"
        
        problem = self.problems[self.current_index]
        problem_id = problem.get("id", "unknown")
        
        # 保存验证结果
        self.verifications[problem_id] = {
            "problem_id": problem_id,
            "scores": {
                "correctness": correctness,
                "clarity": clarity,
                "difficulty_match": difficulty_match,
                "completeness": completeness
            },
            "total_score": (correctness + clarity + difficulty_match + completeness) / 4,
            "status": status,
            "comments": comments,
            "verified_at": datetime.now().isoformat()
        }
        
        self._save_verifications()
        
        return f"✅ 题目 {problem_id} 验证完成！\n总分: {self.verifications[problem_id]['total_score']:.2f}/5.0"
    
    def next_problem(self) -> Tuple[str, str, str, str, str, str]:
        """下一个题目"""
        if self.current_index < len(self.problems) - 1:
            self.current_index += 1
        return self.get_current_problem()
    
    def prev_problem(self) -> Tuple[str, str, str, str, str, str]:
        """上一个题目"""
        if self.current_index > 0:
            self.current_index -= 1
        return self.get_current_problem()
    
    def get_statistics(self) -> str:
        """获取验证统计信息"""
        if not self.verifications:
            return "暂无验证数据"
        
        total = len(self.problems)
        verified = len(self.verifications)
        
        approved = sum(1 for v in self.verifications.values() if v["status"] == "approved")
        rejected = sum(1 for v in self.verifications.values() if v["status"] == "rejected")
        needs_revision = sum(1 for v in self.verifications.values() if v["status"] == "needs_revision")
        
        avg_score = sum(v["total_score"] for v in self.verifications.values()) / verified if verified > 0 else 0
        
        return f"""
📊 验证统计

总题目数: {total}
已验证: {verified} ({verified/total*100:.1f}%)
未验证: {total - verified}

验证结果:
- ✅ 通过: {approved}
- ❌ 拒绝: {rejected}
- 🔄 需修改: {needs_revision}

平均评分: {avg_score:.2f}/5.0
"""
    
    def launch(self, share: bool = False):
        """启动Gradio界面"""
        with gr.Blocks(title="AIME题目人工验证") as demo:
            gr.Markdown("# 🎯 AIME题目人工验证系统")
            gr.Markdown(f"数据文件: `{self.data_path}`")
            
            with gr.Row():
                with gr.Column(scale=2):
                    # 题目显示区域
                    title = gr.Textbox(label="当前题目", interactive=False)
                    problem_text = gr.Textbox(label="问题描述", lines=5, interactive=False)
                    answer_text = gr.Textbox(label="答案", interactive=False)
                    solution_text = gr.Textbox(label="解答过程", lines=10, interactive=False)
                    metadata_text = gr.Textbox(label="元数据", interactive=False)
                
                with gr.Column(scale=1):
                    # 评分区域
                    gr.Markdown("### 📝 评分 (1-5分)")
                    correctness_slider = gr.Slider(1, 5, value=3, step=1, label="正确性")
                    clarity_slider = gr.Slider(1, 5, value=3, step=1, label="清晰度")
                    difficulty_slider = gr.Slider(1, 5, value=3, step=1, label="难度匹配")
                    completeness_slider = gr.Slider(1, 5, value=3, step=1, label="完整性")
                    
                    # 状态选择
                    gr.Markdown("### ✅ 验证状态")
                    status_radio = gr.Radio(
                        choices=["approved", "rejected", "needs_revision"],
                        value="approved",
                        label="状态"
                    )
                    
                    # 评论
                    comments_text = gr.Textbox(label="评论", lines=3, placeholder="请输入评论...")
                    
                    # 验证按钮
                    verify_btn = gr.Button("✅ 提交验证", variant="primary")
                    verify_result = gr.Textbox(label="验证结果", interactive=False)
            
            # 导航按钮
            with gr.Row():
                prev_btn = gr.Button("⬅️ 上一题")
                next_btn = gr.Button("下一题 ➡️")
            
            # 统计信息
            with gr.Row():
                stats_text = gr.Textbox(label="验证统计", lines=10, interactive=False)
                refresh_stats_btn = gr.Button("🔄 刷新统计")
            
            # 加载初始题目
            demo.load(
                fn=self.get_current_problem,
                outputs=[title, problem_text, answer_text, solution_text, metadata_text, comments_text]
            )
            
            # 绑定事件
            verify_btn.click(
                fn=self.verify_problem,
                inputs=[correctness_slider, clarity_slider, difficulty_slider, completeness_slider, status_radio, comments_text],
                outputs=verify_result
            )
            
            next_btn.click(
                fn=self.next_problem,
                outputs=[title, problem_text, answer_text, solution_text, metadata_text, comments_text]
            )
            
            prev_btn.click(
                fn=self.prev_problem,
                outputs=[title, problem_text, answer_text, solution_text, metadata_text, comments_text]
            )
            
            refresh_stats_btn.click(
                fn=self.get_statistics,
                outputs=stats_text
            )
        
        demo.launch(share=share, server_name="127.0.0.1", server_port=7860)


if __name__ == "__main__":
    import sys
    
    if len(sys.argv) < 2:
        print("用法: python human_verification_ui.py <data_path>")
        print("示例: python human_verification_ui.py generated_data/aime_generated_20250110_120000.json")
        sys.exit(1)
    
    data_path = sys.argv[1]
    
    ui = HumanVerificationUI(data_path)
    ui.launch(share=False)