codebase_maintainer.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. """
  2. CodebaseMaintainer - 代码库维护助手
  3. 完整的长程智能体实现,整合:
  4. 1. ContextBuilder - 上下文管理
  5. 2. NoteTool - 结构化笔记
  6. 3. TerminalTool - 即时文件访问
  7. 4. MemoryTool - 对话记忆
  8. 实现跨会话的代码库维护任务管理
  9. """
  10. from typing import Dict, Any, List, Optional
  11. from datetime import datetime
  12. import json
  13. from hello_agents import SimpleAgent, HelloAgentsLLM
  14. from hello_agents.context import ContextBuilder, ContextConfig, ContextPacket
  15. from hello_agents.tools import MemoryTool, NoteTool, TerminalTool
  16. from hello_agents.core.message import Message
  17. class CodebaseMaintainer:
  18. """代码库维护助手 - 长程智能体示例
  19. 整合 ContextBuilder + NoteTool + TerminalTool + MemoryTool
  20. 实现跨会话的代码库维护任务管理
  21. """
  22. def __init__(
  23. self,
  24. project_name: str,
  25. codebase_path: str,
  26. llm: Optional[HelloAgentsLLM] = None
  27. ):
  28. self.project_name = project_name
  29. self.codebase_path = codebase_path
  30. self.session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
  31. # 初始化 LLM
  32. self.llm = llm or HelloAgentsLLM()
  33. # 初始化工具
  34. self.memory_tool = MemoryTool(user_id=project_name)
  35. self.note_tool = NoteTool(workspace=f"./{project_name}_notes")
  36. self.terminal_tool = TerminalTool(workspace=codebase_path, timeout=60)
  37. # 初始化上下文构建器
  38. self.context_builder = ContextBuilder(
  39. memory_tool=self.memory_tool,
  40. rag_tool=None, # 本案例不使用 RAG
  41. config=ContextConfig(
  42. max_tokens=4000,
  43. reserve_ratio=0.15,
  44. min_relevance=0.2,
  45. enable_compression=True
  46. )
  47. )
  48. # 对话历史
  49. self.conversation_history: List[Message] = []
  50. # 统计信息
  51. self.stats = {
  52. "session_start": datetime.now(),
  53. "commands_executed": 0,
  54. "notes_created": 0,
  55. "issues_found": 0
  56. }
  57. print(f"✅ 代码库维护助手已初始化: {project_name}")
  58. print(f"📁 工作目录: {codebase_path}")
  59. print(f"🆔 会话ID: {self.session_id}")
  60. def run(self, user_input: str, mode: str = "auto") -> str:
  61. """运行助手
  62. Args:
  63. user_input: 用户输入
  64. mode: 运行模式
  65. - "auto": 自动决策是否使用工具
  66. - "explore": 侧重代码探索
  67. - "analyze": 侧重问题分析
  68. - "plan": 侧重任务规划
  69. Returns:
  70. str: 助手的回答
  71. """
  72. print(f"\n{'='*80}")
  73. print(f"👤 用户: {user_input}")
  74. print(f"{'='*80}\n")
  75. # 第一步:根据模式执行预处理
  76. pre_context = self._preprocess_by_mode(user_input, mode)
  77. # 第二步:检索相关笔记
  78. relevant_notes = self._retrieve_relevant_notes(user_input)
  79. note_packets = self._notes_to_packets(relevant_notes)
  80. # 第三步:构建优化的上下文
  81. context = self.context_builder.build(
  82. user_query=user_input,
  83. conversation_history=self.conversation_history,
  84. system_instructions=self._build_system_instructions(mode),
  85. custom_packets=note_packets + pre_context
  86. )
  87. # 第四步:调用 LLM
  88. print("🤖 正在思考...")
  89. response = self.llm.invoke(context)
  90. # 第五步:后处理
  91. self._postprocess_response(user_input, response)
  92. # 第六步:更新对话历史
  93. self._update_history(user_input, response)
  94. print(f"\n🤖 助手: {response}\n")
  95. print(f"{'='*80}\n")
  96. return response
  97. def _preprocess_by_mode(
  98. self,
  99. user_input: str,
  100. mode: str
  101. ) -> List[ContextPacket]:
  102. """根据模式执行预处理,收集相关信息"""
  103. packets = []
  104. if mode == "explore" or mode == "auto":
  105. # 探索模式:自动查看项目结构
  106. print("🔍 探索代码库结构...")
  107. structure = self.terminal_tool.run({"command": "find . -type f -name '*.py' | head -n 20"})
  108. self.stats["commands_executed"] += 1
  109. packets.append(ContextPacket(
  110. content=f"[代码库结构]\n{structure}",
  111. timestamp=datetime.now(),
  112. token_count=len(structure) // 4,
  113. relevance_score=0.6,
  114. metadata={"type": "code_structure", "source": "terminal"}
  115. ))
  116. if mode == "analyze":
  117. # 分析模式:检查代码复杂度和问题
  118. print("📊 分析代码质量...")
  119. # 统计代码行数
  120. loc = self.terminal_tool.run({"command": "find . -name '*.py' -exec wc -l {} + | tail -n 1"})
  121. # 查找 TODO 和 FIXME
  122. todos = self.terminal_tool.run({"command": "grep -rn 'TODO\\|FIXME' --include='*.py' | head -n 10"})
  123. self.stats["commands_executed"] += 2
  124. packets.append(ContextPacket(
  125. content=f"[代码统计]\n{loc}\n\n[待办事项]\n{todos}",
  126. timestamp=datetime.now(),
  127. token_count=(len(loc) + len(todos)) // 4,
  128. relevance_score=0.7,
  129. metadata={"type": "code_analysis", "source": "terminal"}
  130. ))
  131. if mode == "plan":
  132. # 规划模式:加载最近的笔记
  133. print("📋 加载任务规划...")
  134. task_notes = self.note_tool.run({
  135. "action": "list",
  136. "note_type": "task_state",
  137. "limit": 3
  138. })
  139. if task_notes:
  140. content = "\n".join([f"- {note['title']}" for note in task_notes])
  141. packets.append(ContextPacket(
  142. content=f"[当前任务]\n{content}",
  143. timestamp=datetime.now(),
  144. token_count=len(content) // 4,
  145. relevance_score=0.8,
  146. metadata={"type": "task_plan", "source": "notes"}
  147. ))
  148. return packets
  149. def _retrieve_relevant_notes(self, query: str, limit: int = 3) -> List[Dict]:
  150. """检索相关笔记"""
  151. try:
  152. # 优先检索 blocker
  153. blockers = self.note_tool.run({
  154. "action": "list",
  155. "note_type": "blocker",
  156. "limit": 2
  157. })
  158. # 搜索相关笔记
  159. search_results = self.note_tool.run({
  160. "action": "search",
  161. "query": query,
  162. "limit": limit
  163. })
  164. # 合并去重
  165. all_notes = {note.get('note_id') or note.get('id'): note for note in (blockers or []) + (search_results or [])}
  166. return list(all_notes.values())[:limit]
  167. except Exception as e:
  168. print(f"[WARNING] 笔记检索失败: {e}")
  169. return []
  170. def _notes_to_packets(self, notes: List[Dict]) -> List[ContextPacket]:
  171. """将笔记转换为上下文包"""
  172. packets = []
  173. for note in notes:
  174. # 根据笔记类型设置不同的相关性分数
  175. relevance_map = {
  176. "blocker": 0.9,
  177. "action": 0.8,
  178. "task_state": 0.75,
  179. "conclusion": 0.7
  180. }
  181. note_type = note.get('type', 'general')
  182. relevance = relevance_map.get(note_type, 0.6)
  183. content = f"[笔记:{note.get('title', 'Untitled')}]\n类型: {note_type}\n\n{note.get('content', '')}"
  184. packets.append(ContextPacket(
  185. content=content,
  186. timestamp=datetime.fromisoformat(note.get('updated_at', datetime.now().isoformat())),
  187. token_count=len(content) // 4,
  188. relevance_score=relevance,
  189. metadata={
  190. "type": "note",
  191. "note_type": note_type,
  192. "note_id": note.get('note_id') or note.get('id')
  193. }
  194. ))
  195. return packets
  196. def _build_system_instructions(self, mode: str) -> str:
  197. """构建系统指令"""
  198. base_instructions = f"""你是 {self.project_name} 项目的代码库维护助手。
  199. 你的核心能力:
  200. 1. 使用 TerminalTool 探索代码库(ls, cat, grep, find等)
  201. 2. 使用 NoteTool 记录发现和任务
  202. 3. 基于历史笔记提供连贯的建议
  203. 当前会话ID: {self.session_id}
  204. """
  205. mode_specific = {
  206. "explore": """
  207. 当前模式: 探索代码库
  208. 你应该:
  209. - 主动使用 terminal 命令了解代码结构
  210. - 识别关键模块和文件
  211. - 记录项目架构到笔记
  212. """,
  213. "analyze": """
  214. 当前模式: 分析代码质量
  215. 你应该:
  216. - 查找代码问题(重复、复杂度、TODO等)
  217. - 评估代码质量
  218. - 将发现的问题记录为 blocker 或 action 笔记
  219. """,
  220. "plan": """
  221. 当前模式: 任务规划
  222. 你应该:
  223. - 回顾历史笔记和任务
  224. - 制定下一步行动计划
  225. - 更新任务状态笔记
  226. """,
  227. "auto": """
  228. 当前模式: 自动决策
  229. 你应该:
  230. - 根据用户需求灵活选择策略
  231. - 在需要时使用工具
  232. - 保持回答的专业性和实用性
  233. """
  234. }
  235. return base_instructions + mode_specific.get(mode, mode_specific["auto"])
  236. def _postprocess_response(self, user_input: str, response: str):
  237. """后处理:分析回答,自动记录重要信息"""
  238. # 如果发现问题,自动创建 blocker 笔记
  239. if any(keyword in response.lower() for keyword in ["问题", "bug", "错误", "阻塞"]):
  240. try:
  241. self.note_tool.run({
  242. "action": "create",
  243. "title": f"发现问题: {user_input[:30]}...",
  244. "content": f"## 用户输入\n{user_input}\n\n## 问题分析\n{response[:500]}...",
  245. "note_type": "blocker",
  246. "tags": [self.project_name, "auto_detected", self.session_id]
  247. })
  248. self.stats["notes_created"] += 1
  249. self.stats["issues_found"] += 1
  250. print("📝 已自动创建问题笔记")
  251. except Exception as e:
  252. print(f"[WARNING] 创建笔记失败: {e}")
  253. # 如果是任务规划,自动创建 action 笔记
  254. elif any(keyword in user_input.lower() for keyword in ["计划", "下一步", "任务", "todo"]):
  255. try:
  256. self.note_tool.run({
  257. "action": "create",
  258. "title": f"任务规划: {user_input[:30]}...",
  259. "content": f"## 讨论\n{user_input}\n\n## 行动计划\n{response[:500]}...",
  260. "note_type": "action",
  261. "tags": [self.project_name, "planning", self.session_id]
  262. })
  263. self.stats["notes_created"] += 1
  264. print("📝 已自动创建行动计划笔记")
  265. except Exception as e:
  266. print(f"[WARNING] 创建笔记失败: {e}")
  267. def _update_history(self, user_input: str, response: str):
  268. """更新对话历史"""
  269. self.conversation_history.append(
  270. Message(content=user_input, role="user", timestamp=datetime.now())
  271. )
  272. self.conversation_history.append(
  273. Message(content=response, role="assistant", timestamp=datetime.now())
  274. )
  275. # 限制历史长度(保留最近10轮对话)
  276. if len(self.conversation_history) > 20:
  277. self.conversation_history = self.conversation_history[-20:]
  278. # === 便捷方法 ===
  279. def explore(self, target: str = ".") -> str:
  280. """探索代码库"""
  281. return self.run(f"请探索 {target} 的代码结构", mode="explore")
  282. def analyze(self, focus: str = "") -> str:
  283. """分析代码质量"""
  284. query = f"请分析代码质量" + (f",重点关注{focus}" if focus else "")
  285. return self.run(query, mode="analyze")
  286. def plan_next_steps(self) -> str:
  287. """规划下一步任务"""
  288. return self.run("根据当前进度,规划下一步任务", mode="plan")
  289. def execute_command(self, command: str) -> str:
  290. """执行终端命令"""
  291. result = self.terminal_tool.run({"command": command})
  292. self.stats["commands_executed"] += 1
  293. return result
  294. def create_note(
  295. self,
  296. title: str,
  297. content: str,
  298. note_type: str = "general",
  299. tags: List[str] = None
  300. ) -> str:
  301. """创建笔记"""
  302. result = self.note_tool.run({
  303. "action": "create",
  304. "title": title,
  305. "content": content,
  306. "note_type": note_type,
  307. "tags": tags or [self.project_name]
  308. })
  309. self.stats["notes_created"] += 1
  310. return result
  311. def get_stats(self) -> Dict[str, Any]:
  312. """获取统计信息"""
  313. duration = (datetime.now() - self.stats["session_start"]).total_seconds()
  314. # 获取笔记摘要
  315. try:
  316. note_summary = self.note_tool.run({"action": "summary"})
  317. except:
  318. note_summary = {}
  319. return {
  320. "session_info": {
  321. "session_id": self.session_id,
  322. "project": self.project_name,
  323. "duration_seconds": duration
  324. },
  325. "activity": {
  326. "commands_executed": self.stats["commands_executed"],
  327. "notes_created": self.stats["notes_created"],
  328. "issues_found": self.stats["issues_found"]
  329. },
  330. "notes": note_summary
  331. }
  332. def generate_report(self, save_to_file: bool = True) -> Dict[str, Any]:
  333. """生成会话报告"""
  334. report = self.get_stats()
  335. if save_to_file:
  336. report_file = f"maintainer_report_{self.session_id}.json"
  337. with open(report_file, 'w', encoding='utf-8') as f:
  338. json.dump(report, f, ensure_ascii=False, indent=2, default=str)
  339. report["report_file"] = report_file
  340. print(f"📄 报告已保存: {report_file}")
  341. return report
  342. def main():
  343. """主函数 - 演示 CodebaseMaintainer 的使用"""
  344. print("=" * 80)
  345. print("CodebaseMaintainer 演示")
  346. print("=" * 80 + "\n")
  347. # 初始化助手
  348. maintainer = CodebaseMaintainer(
  349. project_name="my_flask_app",
  350. codebase_path="./my_flask_app",
  351. llm=HelloAgentsLLM()
  352. )
  353. # 探索代码库
  354. print("\n### 探索代码库 ###")
  355. response = maintainer.explore()
  356. # 分析代码质量
  357. print("\n### 分析代码质量 ###")
  358. response = maintainer.analyze()
  359. # 规划下一步
  360. print("\n### 规划下一步任务 ###")
  361. response = maintainer.plan_next_steps()
  362. # 生成报告
  363. print("\n### 生成会话报告 ###")
  364. report = maintainer.generate_report()
  365. print(json.dumps(report, indent=2, ensure_ascii=False))
  366. print("\n" + "=" * 80)
  367. print("演示完成!")
  368. print("=" * 80)
  369. if __name__ == "__main__":
  370. main()