code_agent.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. from __future__ import annotations
  2. import json
  3. from dataclasses import dataclass
  4. from datetime import datetime
  5. from pathlib import Path
  6. from typing import List, Optional
  7. from agents.react_agent import ReActAgent
  8. from core.config import Config
  9. from core.llm import HelloAgentsLLM
  10. from core.message import Message
  11. from context.builder import ContextBuilder, ContextConfig, ContextPacket
  12. from tools.registry import ToolRegistry
  13. from tools.builtin.note_tool import NoteTool
  14. from tools.builtin.terminal_tool import TerminalTool
  15. from tools.builtin.plan_tool import PlanTool
  16. from tools.builtin.todo_tool import TodoTool
  17. from tools.builtin.context_fetch_tool import ContextFetchTool
  18. @dataclass
  19. class CodeAgentPaths:
  20. """CodeAgent 路径配置类,集中管理所有相关目录路径"""
  21. repo_root: Path
  22. notes_dir: Path
  23. memory_dir: Path
  24. sessions_dir: Path
  25. logs_dir: Path
  26. @property
  27. def helloagents_dir(self) -> Path:
  28. """返回 .helloagents 目录路径"""
  29. return self.repo_root / ".helloagents"
  30. @property
  31. def prompts_dir(self) -> Path:
  32. """返回 prompts 目录路径"""
  33. return self.repo_root / "code_agent" / "prompts"
  34. class CodeAgent:
  35. """
  36. 类似 Claude Code/Codex 的 CLI 智能体:
  37. - 核心循环使用 ReActAgent。
  38. - ContextBuilder 负责拼接:系统提示词 + 最近对话 + 相关笔记 + 情景记忆。
  39. - 规划能力作为可选工具 (`plan`) 暴露给模型,模型可按需调用。
  40. """
  41. def __init__(self, repo_root: Path, llm: Optional[HelloAgentsLLM] = None, config: Optional[Config] = None):
  42. """
  43. 初始化 CodeAgent
  44. Args:
  45. repo_root: 代码仓库根目录
  46. llm: LLM 实例
  47. config: 配置对象
  48. """
  49. repo_root = repo_root.resolve()
  50. self.config = config or Config.from_env()
  51. # 初始化目录结构
  52. helloagents_dir = Path(self.config.helloagents_dir)
  53. state_root = helloagents_dir if helloagents_dir.is_absolute() else (repo_root / helloagents_dir)
  54. self.paths = CodeAgentPaths(
  55. repo_root=repo_root,
  56. notes_dir=state_root / "notes",
  57. memory_dir=state_root / "memory",
  58. sessions_dir=state_root / "sessions",
  59. logs_dir=state_root / "logs",
  60. )
  61. # 确保所有必要目录存在
  62. self.paths.helloagents_dir.mkdir(parents=True, exist_ok=True)
  63. self.paths.notes_dir.mkdir(parents=True, exist_ok=True)
  64. self.paths.sessions_dir.mkdir(parents=True, exist_ok=True)
  65. # memory / logs 仅在需要时创建,这里不再预建
  66. self.session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
  67. self.llm = llm or HelloAgentsLLM()
  68. # 初始化工具 (真实实现)
  69. self.note_tool = NoteTool(workspace=str(self.paths.notes_dir))
  70. # 类似 Claude Code:默认允许 Shell 语法 (管道等),但危险操作需确认
  71. self.terminal_tool = TerminalTool(
  72. workspace=str(self.paths.repo_root),
  73. timeout=60,
  74. confirm_dangerous=True,
  75. default_shell_mode=True,
  76. )
  77. self.todo_tool = TodoTool(workspace=str(self.paths.helloagents_dir / "todos"))
  78. # ReActAgent 的工具注册表
  79. # 核心工具:terminal, note, memory, plan
  80. # 扩展上下文工具:context_fetch(让模型按需获取更多证据)
  81. self.registry = ToolRegistry()
  82. self.registry.register_tool(self.terminal_tool)
  83. self.registry.register_tool(self.note_tool)
  84. self.registry.register_tool(PlanTool(self.llm, prompt_path=str(self.paths.prompts_dir / "plan.md")))
  85. self.registry.register_tool(self.todo_tool)
  86. # 注册上下文获取工具(让模型按需探索)
  87. self.context_fetch_tool = ContextFetchTool(
  88. workspace=str(self.paths.repo_root),
  89. note_tool=self.note_tool,
  90. memory_tool=None,
  91. max_tokens_per_source=800,
  92. context_lines=5,
  93. )
  94. self.registry.register_tool(self.context_fetch_tool)
  95. # 初始化上下文构建器(lazy_fetch=True:只构建保底上下文)
  96. self.context_builder = ContextBuilder(
  97. memory_tool=None,
  98. rag_tool=None,
  99. config=ContextConfig(
  100. max_tokens=8000,
  101. reserve_ratio=0.15,
  102. max_history_turns=10,
  103. enable_compression=True,
  104. include_output_format=False,
  105. lazy_fetch=True, # 按需探索模式
  106. ),
  107. llm=self.llm,
  108. )
  109. # 加载自定义 Prompt 并初始化 ReActAgent
  110. react_prompt = (self.paths.prompts_dir / "react.md").read_text(encoding="utf-8")
  111. summarize_prompt = (self.paths.prompts_dir / "summarize_observation.md").read_text(encoding="utf-8")
  112. def _summarize_observation(tool_name: str, tool_input: str, observation: str) -> str:
  113. """
  114. 使用 LLM 压缩工具输出 (避免将巨大的原始输出放入 Prompt)
  115. """
  116. truncated = observation
  117. if len(truncated) > 8000:
  118. truncated = truncated[:8000] + "\n...truncated...\n"
  119. user_msg = (
  120. f"Tool: {tool_name}\n"
  121. f"Input: {tool_input}\n\n"
  122. f"Output:\n{truncated}"
  123. )
  124. return self.llm.invoke(
  125. [
  126. {"role": "system", "content": summarize_prompt},
  127. {"role": "user", "content": user_msg},
  128. ],
  129. max_tokens=400,
  130. ) or ""
  131. self.react = ReActAgent(
  132. name="code_agent",
  133. llm=self.llm,
  134. tool_registry=self.registry,
  135. max_steps=20,
  136. custom_prompt=react_prompt,
  137. observation_summarizer=_summarize_observation,
  138. summarize_threshold_chars=1800,
  139. )
  140. base_system = (self.paths.prompts_dir / "system.md").read_text(encoding="utf-8")
  141. self.tools_reference_path = self.paths.prompts_dir / "tools.md"
  142. self.system_prompt = base_system
  143. self.history: List[Message] = []
  144. self.recent_tool_packets: List[ContextPacket] = []
  145. self.last_direct_reply: bool = False
  146. def _is_chitchat(self, text: str) -> bool:
  147. """判断是否为闲聊,避免不必要的工具调用"""
  148. t = (text or "").strip().lower()
  149. return t in {"hi", "hello", "hey", "yo", "你好", "您好", "在吗", "嗨", "哈喽"}
  150. def _is_history_query(self, text: str) -> bool:
  151. """判断是否为'回顾刚才说了什么'的元请求"""
  152. t = (text or "").strip().lower()
  153. patterns = [
  154. "说了什么",
  155. "刚才说了什么",
  156. "之前说了什么",
  157. "what did i say",
  158. "what did we say",
  159. "recap",
  160. "summary of conversation",
  161. ]
  162. return any(p in t for p in patterns)
  163. def _reply_with_recent_history(self, limit: int = 6) -> str:
  164. """生成最近对话的简要回顾"""
  165. # 只取用户/助手消息(跳过系统等)
  166. items = [m for m in self.history if m.role in {"user", "assistant"}][-limit * 2 :]
  167. if not items:
  168. return "目前还没有可回顾的对话历史。"
  169. lines = []
  170. for m in items:
  171. role = "你" if m.role == "user" else "助手"
  172. lines.append(f"- {role}: {m.content}")
  173. return "下面是最近的对话回顾:\n" + "\n".join(lines)
  174. # 以下两个方法在 lazy_fetch 模式下不再主动调用,
  175. # 扩展上下文改由模型通过 context_fetch 工具按需获取。
  176. # 保留这些方法以支持 lazy_fetch=False 的传统模式。
  177. def _note_packets(self, query: str) -> List[ContextPacket]:
  178. """检索相关笔记并封装为 ContextPacket"""
  179. packets: List[ContextPacket] = []
  180. if self._is_chitchat(query):
  181. return packets
  182. try:
  183. # 获取最近的阻碍 (Blocker)
  184. blockers = self.note_tool.run({"action": "list", "note_type": "blocker", "limit": 2})
  185. if blockers and isinstance(blockers, str) and "暂无" not in blockers:
  186. packets.append(ContextPacket(content=f"[Notes:blocker]\n{blockers}", metadata={"source": "note"}))
  187. # 搜索相关笔记
  188. hits = self.note_tool.run({"action": "search", "query": query, "limit": 3})
  189. if hits and isinstance(hits, str) and "未找到" not in hits:
  190. packets.append(ContextPacket(content=f"[Notes:search]\n{hits}", metadata={"source": "note"}))
  191. except Exception:
  192. pass
  193. return packets
  194. def _memory_packets(self, query: str) -> List[ContextPacket]:
  195. """检索相关记忆并封装为 ContextPacket"""
  196. packets: List[ContextPacket] = []
  197. if self._is_chitchat(query):
  198. return packets
  199. try:
  200. hits = self.memory_tool.run(
  201. {"action": "search", "query": query, "memory_types": self.memory_tool.memory_types, "limit": 5, "min_importance": 0.0}
  202. )
  203. if hits and isinstance(hits, str) and "未找到" not in hits:
  204. packets.append(ContextPacket(content=f"[Memory]\n{hits}", metadata={"source": "memory"}))
  205. except Exception:
  206. pass
  207. return packets
  208. def _persist_session(self) -> None:
  209. """持久化当前会话到 JSON 文件"""
  210. p = self.paths.sessions_dir / f"{self.session_id}.json"
  211. data = {
  212. "session_id": self.session_id,
  213. "updated_at": datetime.now().isoformat(),
  214. "history": [
  215. {"role": m.role, "content": m.content, "timestamp": m.timestamp.isoformat()} for m in self.history[-50:]
  216. ],
  217. }
  218. p.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
  219. def run_turn(self, user_input: str) -> str:
  220. """
  221. 执行一轮对话:
  222. 1. 收集上下文 (笔记、记忆、最近工具输出)
  223. 2. 构建完整 Prompt
  224. 3. 运行 ReAct 循环
  225. 4. 更新历史并持久化
  226. """
  227. # 空输入:提示而不进入 ReAct
  228. if not user_input.strip():
  229. return "请提供具体指令或问题。"
  230. # 闲聊/问候:直接回复,避免 ReAct 的严格格式解析失败,也避免无谓的工具调用。
  231. if self._is_chitchat(user_input):
  232. self.last_direct_reply = True
  233. reply = "你好!我是 Code Agent,可以帮你按需探索代码仓库、生成补丁并在确认后落盘。你想做什么?(例如:分析项目结构 / 搜索某个类 / 修复一个报错)"
  234. self.history.append(Message(content=user_input, role="user", timestamp=datetime.now()))
  235. self.history.append(Message(content=reply, role="assistant", timestamp=datetime.now()))
  236. if len(self.history) > 50:
  237. self.history = self.history[-50:]
  238. self._persist_session()
  239. return reply
  240. self.last_direct_reply = False
  241. # 元请求:回顾最近对话
  242. if self._is_history_query(user_input):
  243. self.last_direct_reply = True
  244. reply = self._reply_with_recent_history(limit=6)
  245. self.history.append(Message(content=user_input, role="user", timestamp=datetime.now()))
  246. self.history.append(Message(content=reply, role="assistant", timestamp=datetime.now()))
  247. if len(self.history) > 50:
  248. self.history = self.history[-50:]
  249. self._persist_session()
  250. return reply
  251. # 若检测到明显多步骤词汇,向模型追加轻量提示(不强制,只提高倾向)
  252. multistep_hint = ""
  253. multi_patterns = ["分步", "步骤", "三步", "计划", "改造", "完成后", "多步", "多步骤"]
  254. if any(p in user_input for p in multi_patterns):
  255. multistep_hint = "提示:本任务包含多个步骤,先用 todo 记录/更新,再执行;收尾用 todo list 汇总。"
  256. # 构建保底上下文(系统提示 + 对话历史 + 上次工具摘要 + 可选 hint)
  257. # 扩展上下文由模型通过 context_fetch 工具按需获取
  258. tool_summaries = []
  259. for packet in self.recent_tool_packets[-3:]:
  260. tool_summaries.append(packet.content)
  261. context_text = self.context_builder.build_base(
  262. user_query=user_input,
  263. conversation_history=self.history,
  264. system_instructions=self.system_prompt + ("\n" + multistep_hint if multistep_hint else ""),
  265. tool_summaries=tool_summaries if tool_summaries else None,
  266. )
  267. # 将拼接好的上下文作为"问题"输入给 ReAct
  268. response = self.react.run(context_text, max_tokens=8000)
  269. # 收集本轮的工具执行证据 (已在 ReActAgent 内部摘要)
  270. try:
  271. tool_summaries: List[str] = []
  272. todo_used = False
  273. todo_listed = False
  274. for item in getattr(self.react, "last_trace", [])[-6:]:
  275. summary = item.get("observation_summary")
  276. tname = item.get("tool_name")
  277. if tname == "todo":
  278. todo_used = True
  279. if "list" in str(item.get("tool_input", "")):
  280. todo_listed = True
  281. if summary:
  282. tool_summaries.append(
  283. f"[{item.get('tool_name')}] {item.get('tool_input')}\n{summary}"
  284. )
  285. if tool_summaries:
  286. self.recent_tool_packets.append(
  287. ContextPacket(
  288. content="[Tool Evidence]\n" + "\n\n".join(tool_summaries),
  289. metadata={"type": "tool_result", "source": "react"},
  290. )
  291. )
  292. # 保持缓冲区较小
  293. if len(self.recent_tool_packets) > 8:
  294. self.recent_tool_packets = self.recent_tool_packets[-8:]
  295. except Exception:
  296. pass
  297. # 更新历史记录 (保留最近 50 条)
  298. self.history.append(Message(content=user_input, role="user", timestamp=datetime.now()))
  299. self.history.append(Message(content=response, role="assistant", timestamp=datetime.now()))
  300. if len(self.history) > 50:
  301. self.history = self.history[-50:]
  302. self._persist_session()
  303. try:
  304. if todo_used and not todo_listed:
  305. todo_snapshot = self.registry.execute_tool("todo", {"action": "list"})
  306. response = f"{response}\n\nTodo board:\n{todo_snapshot}"
  307. except Exception:
  308. pass
  309. return response