| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346 |
- from __future__ import annotations
- import json
- from dataclasses import dataclass
- from datetime import datetime
- from pathlib import Path
- from typing import List, Optional
- from agents.react_agent import ReActAgent
- from core.config import Config
- from core.llm import HelloAgentsLLM
- from core.message import Message
- from context.builder import ContextBuilder, ContextConfig, ContextPacket
- from tools.registry import ToolRegistry
- from tools.builtin.note_tool import NoteTool
- from tools.builtin.terminal_tool import TerminalTool
- from tools.builtin.plan_tool import PlanTool
- from tools.builtin.todo_tool import TodoTool
- from tools.builtin.context_fetch_tool import ContextFetchTool
- @dataclass
- class CodeAgentPaths:
- """CodeAgent 路径配置类,集中管理所有相关目录路径"""
- repo_root: Path
- notes_dir: Path
- memory_dir: Path
- sessions_dir: Path
- logs_dir: Path
- @property
- def helloagents_dir(self) -> Path:
- """返回 .helloagents 目录路径"""
- return self.repo_root / ".helloagents"
- @property
- def prompts_dir(self) -> Path:
- """返回 prompts 目录路径"""
- return self.repo_root / "code_agent" / "prompts"
- class CodeAgent:
- """
- 类似 Claude Code/Codex 的 CLI 智能体:
- - 核心循环使用 ReActAgent。
- - ContextBuilder 负责拼接:系统提示词 + 最近对话 + 相关笔记 + 情景记忆。
- - 规划能力作为可选工具 (`plan`) 暴露给模型,模型可按需调用。
- """
- def __init__(self, repo_root: Path, llm: Optional[HelloAgentsLLM] = None, config: Optional[Config] = None):
- """
- 初始化 CodeAgent
- Args:
- repo_root: 代码仓库根目录
- llm: LLM 实例
- config: 配置对象
- """
- repo_root = repo_root.resolve()
- self.config = config or Config.from_env()
- # 初始化目录结构
- helloagents_dir = Path(self.config.helloagents_dir)
- state_root = helloagents_dir if helloagents_dir.is_absolute() else (repo_root / helloagents_dir)
- self.paths = CodeAgentPaths(
- repo_root=repo_root,
- notes_dir=state_root / "notes",
- memory_dir=state_root / "memory",
- sessions_dir=state_root / "sessions",
- logs_dir=state_root / "logs",
- )
- # 确保所有必要目录存在
- self.paths.helloagents_dir.mkdir(parents=True, exist_ok=True)
- self.paths.notes_dir.mkdir(parents=True, exist_ok=True)
- self.paths.sessions_dir.mkdir(parents=True, exist_ok=True)
- # memory / logs 仅在需要时创建,这里不再预建
- self.session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
- self.llm = llm or HelloAgentsLLM()
- # 初始化工具 (真实实现)
- self.note_tool = NoteTool(workspace=str(self.paths.notes_dir))
- # 类似 Claude Code:默认允许 Shell 语法 (管道等),但危险操作需确认
- self.terminal_tool = TerminalTool(
- workspace=str(self.paths.repo_root),
- timeout=60,
- confirm_dangerous=True,
- default_shell_mode=True,
- )
- self.todo_tool = TodoTool(workspace=str(self.paths.helloagents_dir / "todos"))
- # ReActAgent 的工具注册表
- # 核心工具:terminal, note, memory, plan
- # 扩展上下文工具:context_fetch(让模型按需获取更多证据)
- self.registry = ToolRegistry()
- self.registry.register_tool(self.terminal_tool)
- self.registry.register_tool(self.note_tool)
- self.registry.register_tool(PlanTool(self.llm, prompt_path=str(self.paths.prompts_dir / "plan.md")))
- self.registry.register_tool(self.todo_tool)
-
- # 注册上下文获取工具(让模型按需探索)
- self.context_fetch_tool = ContextFetchTool(
- workspace=str(self.paths.repo_root),
- note_tool=self.note_tool,
- memory_tool=None,
- max_tokens_per_source=800,
- context_lines=5,
- )
- self.registry.register_tool(self.context_fetch_tool)
- # 初始化上下文构建器(lazy_fetch=True:只构建保底上下文)
- self.context_builder = ContextBuilder(
- memory_tool=None,
- rag_tool=None,
- config=ContextConfig(
- max_tokens=8000,
- reserve_ratio=0.15,
- max_history_turns=10,
- enable_compression=True,
- include_output_format=False,
- lazy_fetch=True, # 按需探索模式
- ),
- llm=self.llm,
- )
- # 加载自定义 Prompt 并初始化 ReActAgent
- react_prompt = (self.paths.prompts_dir / "react.md").read_text(encoding="utf-8")
- summarize_prompt = (self.paths.prompts_dir / "summarize_observation.md").read_text(encoding="utf-8")
- def _summarize_observation(tool_name: str, tool_input: str, observation: str) -> str:
- """
- 使用 LLM 压缩工具输出 (避免将巨大的原始输出放入 Prompt)
- """
- truncated = observation
- if len(truncated) > 8000:
- truncated = truncated[:8000] + "\n...truncated...\n"
- user_msg = (
- f"Tool: {tool_name}\n"
- f"Input: {tool_input}\n\n"
- f"Output:\n{truncated}"
- )
- return self.llm.invoke(
- [
- {"role": "system", "content": summarize_prompt},
- {"role": "user", "content": user_msg},
- ],
- max_tokens=400,
- ) or ""
- self.react = ReActAgent(
- name="code_agent",
- llm=self.llm,
- tool_registry=self.registry,
- max_steps=20,
- custom_prompt=react_prompt,
- observation_summarizer=_summarize_observation,
- summarize_threshold_chars=1800,
- )
- base_system = (self.paths.prompts_dir / "system.md").read_text(encoding="utf-8")
- self.tools_reference_path = self.paths.prompts_dir / "tools.md"
- self.system_prompt = base_system
- self.history: List[Message] = []
- self.recent_tool_packets: List[ContextPacket] = []
- self.last_direct_reply: bool = False
- def _is_chitchat(self, text: str) -> bool:
- """判断是否为闲聊,避免不必要的工具调用"""
- t = (text or "").strip().lower()
- return t in {"hi", "hello", "hey", "yo", "你好", "您好", "在吗", "嗨", "哈喽"}
- def _is_history_query(self, text: str) -> bool:
- """判断是否为'回顾刚才说了什么'的元请求"""
- t = (text or "").strip().lower()
- patterns = [
- "说了什么",
- "刚才说了什么",
- "之前说了什么",
- "what did i say",
- "what did we say",
- "recap",
- "summary of conversation",
- ]
- return any(p in t for p in patterns)
- def _reply_with_recent_history(self, limit: int = 6) -> str:
- """生成最近对话的简要回顾"""
- # 只取用户/助手消息(跳过系统等)
- items = [m for m in self.history if m.role in {"user", "assistant"}][-limit * 2 :]
- if not items:
- return "目前还没有可回顾的对话历史。"
- lines = []
- for m in items:
- role = "你" if m.role == "user" else "助手"
- lines.append(f"- {role}: {m.content}")
- return "下面是最近的对话回顾:\n" + "\n".join(lines)
- # 以下两个方法在 lazy_fetch 模式下不再主动调用,
- # 扩展上下文改由模型通过 context_fetch 工具按需获取。
- # 保留这些方法以支持 lazy_fetch=False 的传统模式。
-
- def _note_packets(self, query: str) -> List[ContextPacket]:
- """检索相关笔记并封装为 ContextPacket"""
- packets: List[ContextPacket] = []
- if self._is_chitchat(query):
- return packets
- try:
- # 获取最近的阻碍 (Blocker)
- blockers = self.note_tool.run({"action": "list", "note_type": "blocker", "limit": 2})
- if blockers and isinstance(blockers, str) and "暂无" not in blockers:
- packets.append(ContextPacket(content=f"[Notes:blocker]\n{blockers}", metadata={"source": "note"}))
- # 搜索相关笔记
- hits = self.note_tool.run({"action": "search", "query": query, "limit": 3})
- if hits and isinstance(hits, str) and "未找到" not in hits:
- packets.append(ContextPacket(content=f"[Notes:search]\n{hits}", metadata={"source": "note"}))
- except Exception:
- pass
- return packets
- def _memory_packets(self, query: str) -> List[ContextPacket]:
- """检索相关记忆并封装为 ContextPacket"""
- packets: List[ContextPacket] = []
- if self._is_chitchat(query):
- return packets
- try:
- hits = self.memory_tool.run(
- {"action": "search", "query": query, "memory_types": self.memory_tool.memory_types, "limit": 5, "min_importance": 0.0}
- )
- if hits and isinstance(hits, str) and "未找到" not in hits:
- packets.append(ContextPacket(content=f"[Memory]\n{hits}", metadata={"source": "memory"}))
- except Exception:
- pass
- return packets
- def _persist_session(self) -> None:
- """持久化当前会话到 JSON 文件"""
- p = self.paths.sessions_dir / f"{self.session_id}.json"
- data = {
- "session_id": self.session_id,
- "updated_at": datetime.now().isoformat(),
- "history": [
- {"role": m.role, "content": m.content, "timestamp": m.timestamp.isoformat()} for m in self.history[-50:]
- ],
- }
- p.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
- def run_turn(self, user_input: str) -> str:
- """
- 执行一轮对话:
- 1. 收集上下文 (笔记、记忆、最近工具输出)
- 2. 构建完整 Prompt
- 3. 运行 ReAct 循环
- 4. 更新历史并持久化
- """
- # 空输入:提示而不进入 ReAct
- if not user_input.strip():
- return "请提供具体指令或问题。"
- # 闲聊/问候:直接回复,避免 ReAct 的严格格式解析失败,也避免无谓的工具调用。
- if self._is_chitchat(user_input):
- self.last_direct_reply = True
- reply = "你好!我是 Code Agent,可以帮你按需探索代码仓库、生成补丁并在确认后落盘。你想做什么?(例如:分析项目结构 / 搜索某个类 / 修复一个报错)"
- self.history.append(Message(content=user_input, role="user", timestamp=datetime.now()))
- self.history.append(Message(content=reply, role="assistant", timestamp=datetime.now()))
- if len(self.history) > 50:
- self.history = self.history[-50:]
- self._persist_session()
- return reply
- self.last_direct_reply = False
- # 元请求:回顾最近对话
- if self._is_history_query(user_input):
- self.last_direct_reply = True
- reply = self._reply_with_recent_history(limit=6)
- self.history.append(Message(content=user_input, role="user", timestamp=datetime.now()))
- self.history.append(Message(content=reply, role="assistant", timestamp=datetime.now()))
- if len(self.history) > 50:
- self.history = self.history[-50:]
- self._persist_session()
- return reply
- # 若检测到明显多步骤词汇,向模型追加轻量提示(不强制,只提高倾向)
- multistep_hint = ""
- multi_patterns = ["分步", "步骤", "三步", "计划", "改造", "完成后", "多步", "多步骤"]
- if any(p in user_input for p in multi_patterns):
- multistep_hint = "提示:本任务包含多个步骤,先用 todo 记录/更新,再执行;收尾用 todo list 汇总。"
- # 构建保底上下文(系统提示 + 对话历史 + 上次工具摘要 + 可选 hint)
- # 扩展上下文由模型通过 context_fetch 工具按需获取
- tool_summaries = []
- for packet in self.recent_tool_packets[-3:]:
- tool_summaries.append(packet.content)
-
- context_text = self.context_builder.build_base(
- user_query=user_input,
- conversation_history=self.history,
- system_instructions=self.system_prompt + ("\n" + multistep_hint if multistep_hint else ""),
- tool_summaries=tool_summaries if tool_summaries else None,
- )
-
- # 将拼接好的上下文作为"问题"输入给 ReAct
- response = self.react.run(context_text, max_tokens=8000)
- # 收集本轮的工具执行证据 (已在 ReActAgent 内部摘要)
- try:
- tool_summaries: List[str] = []
- todo_used = False
- todo_listed = False
- for item in getattr(self.react, "last_trace", [])[-6:]:
- summary = item.get("observation_summary")
- tname = item.get("tool_name")
- if tname == "todo":
- todo_used = True
- if "list" in str(item.get("tool_input", "")):
- todo_listed = True
- if summary:
- tool_summaries.append(
- f"[{item.get('tool_name')}] {item.get('tool_input')}\n{summary}"
- )
- if tool_summaries:
- self.recent_tool_packets.append(
- ContextPacket(
- content="[Tool Evidence]\n" + "\n\n".join(tool_summaries),
- metadata={"type": "tool_result", "source": "react"},
- )
- )
- # 保持缓冲区较小
- if len(self.recent_tool_packets) > 8:
- self.recent_tool_packets = self.recent_tool_packets[-8:]
- except Exception:
- pass
- # 更新历史记录 (保留最近 50 条)
- self.history.append(Message(content=user_input, role="user", timestamp=datetime.now()))
- self.history.append(Message(content=response, role="assistant", timestamp=datetime.now()))
- if len(self.history) > 50:
- self.history = self.history[-50:]
- self._persist_session()
- try:
- if todo_used and not todo_listed:
- todo_snapshot = self.registry.execute_tool("todo", {"action": "list"})
- response = f"{response}\n\nTodo board:\n{todo_snapshot}"
- except Exception:
- pass
- return response
|