1
0

agents.py 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181
  1. """核心 Agent"""
  2. import json
  3. import os
  4. import hashlib
  5. from pathlib import Path
  6. from typing import Dict, Any, Optional, List
  7. from hello_agents import (
  8. HelloAgentsLLM,
  9. ReActAgent,
  10. ReflectionAgent,
  11. PlanAndSolveAgent
  12. )
  13. from hello_agents.tools import MCPTool, ToolRegistry, SearchTool
  14. from models import ColumnPlan, ReviewResult, ContentNode, ContentLevel
  15. from prompts import get_structure_requirements, get_react_writer_prompt, get_reflection_writer_prompts, get_planner_prompts
  16. from config import get_settings, get_word_count
  17. from utils import JSONExtractor, parse_react_output, get_current_timestamp
  18. settings = get_settings()
  19. class LLMService:
  20. """LLM 服务单例"""
  21. _instance: Optional[HelloAgentsLLM] = None
  22. @classmethod
  23. def get_llm(cls) -> HelloAgentsLLM:
  24. """获取 LLM 实例(单例模式)"""
  25. if cls._instance is None:
  26. cls._instance = HelloAgentsLLM()
  27. print(f"▸ LLM服务初始化成功")
  28. print(f" 提供商: {cls._instance.provider}")
  29. print(f" 模型: {cls._instance.model}")
  30. return cls._instance
  31. class PlannerAgent:
  32. """
  33. 使用 PlanAndSolveAgent 模式
  34. PlanAndSolveAgent 将任务分解为子任务并逐步执行,非常适合专栏规划场景:
  35. 1. 分析主题(理解用户需求)
  36. 2. 规划子话题(分解任务)
  37. 3. 组织结构(逐步执行)
  38. 支持缓存机制,以主题为key缓存规划结果
  39. """
  40. def __init__(self, cache_dir: str = ".cache"):
  41. """
  42. 初始化规划 Agent
  43. Args:
  44. cache_dir: 缓存目录路径
  45. """
  46. self.llm = LLMService.get_llm()
  47. self.cache_dir = Path(cache_dir)
  48. self.cache_dir.mkdir(exist_ok=True)
  49. # 自定义 PlanAndSolve 提示词
  50. planner_prompts = {
  51. "planner": """
  52. 你是一位经验丰富的专栏策划专家。请将以下专栏主题分解为清晰的子话题规划步骤。
  53. 主题: {question}
  54. 请按以下格式输出规划步骤:
  55. ```python
  56. [
  57. "步骤1: 分析主题的核心概念和目标读者",
  58. "步骤2: 确定知识体系的整体框架",
  59. "步骤3: 规划2-4个子话题,确保逻辑递进",
  60. "步骤4: 为每个子话题设定学习目标和要点",
  61. "步骤5: 组装完整的专栏大纲"
  62. ]
  63. ```
  64. 不能超过10个步骤。
  65. """,
  66. "executor": """
  67. 你是专栏规划执行专家。请按照规划步骤执行专栏大纲的生成。
  68. # 原始主题: {question}
  69. # 规划步骤: {plan}
  70. # 已完成步骤: {history}
  71. # 当前步骤: {current_step}
  72. ▸️ **关键要求**:
  73. - 不能超过10个步骤。
  74. - 如果当前步骤是"步骤5: 组装完整的专栏大纲"或包含"组装"、"完整"、"大纲"等关键词,**必须**输出完整的 JSON 格式专栏大纲
  75. - 如果不是最后一步,请输出当前步骤的分析结果(文本格式)
  76. **最后一步的输出格式(必须是 JSON,不要添加任何其他文本)**:
  77. ```json
  78. {{
  79. "column_title": "专栏总标题",
  80. "column_description": "专栏简介(100-200字)",
  81. "target_audience": "目标读者群体",
  82. "topics": [
  83. {{
  84. "id": "topic_001",
  85. "title": "子话题标题",
  86. "description": "子话题简介(50-100字)",
  87. "estimated_words": 200,
  88. "key_points": ["要点1", "要点2", "要点3"],
  89. "prerequisites": ["前置知识1", "前置知识2"]
  90. }}
  91. ]
  92. }}
  93. ```
  94. **重要**:如果是最后一步,请直接输出 JSON,不要添加"当前步骤分析结果"等前缀文本。
  95. 请执行当前步骤:
  96. """
  97. }
  98. # 创建带缓存的 Executor 包装器
  99. from hello_agents.agents.plan_solve_agent import Executor
  100. class CachedExecutor(Executor):
  101. """带缓存的 Executor,缓存每个步骤的执行结果"""
  102. def __init__(self, llm_client, prompt_template, cache_dir, main_topic):
  103. super().__init__(llm_client, prompt_template)
  104. self.cache_dir = cache_dir
  105. self.main_topic = main_topic
  106. self.steps_cache_dir = cache_dir / "steps_cache"
  107. self.steps_cache_dir.mkdir(exist_ok=True)
  108. def _get_step_cache_key(self, step_index: int, step_content: str) -> Path:
  109. """生成步骤缓存文件路径"""
  110. # 使用主题 + 步骤索引 + 步骤内容的hash作为key
  111. step_hash = hashlib.md5(
  112. f"{self.main_topic}_{step_index}_{step_content}".encode('utf-8')
  113. ).hexdigest()
  114. return self.steps_cache_dir / f"step_{step_index}_{step_hash}.json"
  115. def _load_step_from_cache(self, step_index: int, step_content: str) -> Optional[str]:
  116. """从缓存加载步骤结果"""
  117. cache_file = self._get_step_cache_key(step_index, step_content)
  118. if not cache_file.exists():
  119. return None
  120. try:
  121. with open(cache_file, 'r', encoding='utf-8') as f:
  122. cache_data = json.load(f)
  123. # 验证缓存的主题和步骤是否匹配
  124. if (cache_data.get('topic') == self.main_topic and
  125. cache_data.get('step_index') == step_index and
  126. cache_data.get('step_content') == step_content):
  127. print(f" ▸ 从缓存加载步骤 {step_index} 的结果")
  128. return cache_data.get('result')
  129. except Exception as e:
  130. print(f" ▸️ 加载步骤缓存失败: {e}")
  131. return None
  132. def _save_step_to_cache(self, step_index: int, step_content: str, result: str):
  133. """保存步骤结果到缓存"""
  134. cache_file = self._get_step_cache_key(step_index, step_content)
  135. try:
  136. cache_data = {
  137. 'topic': self.main_topic,
  138. 'step_index': step_index,
  139. 'step_content': step_content,
  140. 'result': result
  141. }
  142. with open(cache_file, 'w', encoding='utf-8') as f:
  143. json.dump(cache_data, f, ensure_ascii=False, indent=2)
  144. except Exception as e:
  145. print(f" ▸️ 保存步骤缓存失败: {e}")
  146. def execute(self, question: str, plan: List[str], **kwargs) -> str:
  147. """按计划执行任务(带缓存)"""
  148. history = ""
  149. final_answer = ""
  150. print("\n--- 正在执行计划 ---")
  151. for i, step in enumerate(plan, 1):
  152. print(f"\n-> 正在执行步骤 {i}/{len(plan)}: {step}")
  153. # 尝试从缓存加载
  154. cached_result = self._load_step_from_cache(i, step)
  155. if cached_result:
  156. response_text = cached_result
  157. else:
  158. # 缓存未命中,执行步骤
  159. prompt = self.prompt_template.format(
  160. question=question,
  161. plan=plan,
  162. history=history if history else "无",
  163. current_step=step
  164. )
  165. messages = [{"role": "user", "content": prompt}]
  166. response_text = self.llm_client.invoke(messages, **kwargs) or ""
  167. # 保存到缓存
  168. self._save_step_to_cache(i, step, response_text)
  169. history += f"步骤 {i}: {step}\n结果: {response_text}\n\n"
  170. final_answer = response_text
  171. print(f"▸ 步骤 {i} 已完成,结果: {final_answer[:100] if len(final_answer) > 100 else final_answer}...")
  172. return final_answer
  173. # 创建 PlanAndSolveAgent,但替换 Executor
  174. self.agent = PlanAndSolveAgent(
  175. name="专栏规划专家",
  176. llm=self.llm,
  177. custom_prompts=planner_prompts
  178. )
  179. # 替换 Executor 为带缓存的版本
  180. cached_executor = CachedExecutor(
  181. llm_client=self.llm,
  182. prompt_template=planner_prompts["executor"],
  183. cache_dir=self.cache_dir,
  184. main_topic="" # 将在 plan_column 中设置
  185. )
  186. self.agent.executor = cached_executor
  187. def _get_cache_key(self, main_topic: str) -> str:
  188. """
  189. 生成缓存key(使用主题的hash值)
  190. Args:
  191. main_topic: 专栏主题
  192. Returns:
  193. 缓存文件名
  194. """
  195. # 使用主题的hash值作为文件名
  196. topic_hash = hashlib.md5(main_topic.encode('utf-8')).hexdigest()
  197. return f"plan_{topic_hash}.json"
  198. def _load_from_cache(self, main_topic: str) -> Optional[ColumnPlan]:
  199. """
  200. 从缓存加载规划结果
  201. Args:
  202. main_topic: 专栏主题
  203. Returns:
  204. ColumnPlan 实例,如果缓存不存在则返回 None
  205. """
  206. cache_file = self.cache_dir / self._get_cache_key(main_topic)
  207. if not cache_file.exists():
  208. return None
  209. try:
  210. with open(cache_file, 'r', encoding='utf-8') as f:
  211. cache_data = json.load(f)
  212. # 验证缓存的主题是否匹配
  213. if cache_data.get('topic') != main_topic:
  214. print(f"▸️ 缓存主题不匹配,忽略缓存")
  215. return None
  216. plan_data = cache_data.get('plan')
  217. if not plan_data:
  218. return None
  219. plan = ColumnPlan.from_dict(plan_data)
  220. print(f"▸ 从缓存加载规划结果")
  221. print(f" 缓存文件: {cache_file}")
  222. return plan
  223. except Exception as e:
  224. print(f"▸️ 加载缓存失败: {e}")
  225. return None
  226. def _save_to_cache(self, main_topic: str, plan: ColumnPlan):
  227. """
  228. 保存规划结果到缓存
  229. Args:
  230. main_topic: 专栏主题
  231. plan: ColumnPlan 实例
  232. """
  233. cache_file = self.cache_dir / self._get_cache_key(main_topic)
  234. try:
  235. cache_data = {
  236. 'topic': main_topic,
  237. 'plan': plan.to_dict(),
  238. 'cached_at': get_current_timestamp() # 正确的缓存时间戳
  239. }
  240. with open(cache_file, 'w', encoding='utf-8') as f:
  241. json.dump(cache_data, f, ensure_ascii=False, indent=2)
  242. print(f"▸ 规划结果已保存到缓存: {cache_file}")
  243. except Exception as e:
  244. print(f"▸️ 保存缓存失败: {e}")
  245. def plan_column(self, main_topic: str, use_cache: bool = True) -> ColumnPlan:
  246. """
  247. 规划专栏大纲
  248. Args:
  249. main_topic: 专栏主题
  250. use_cache: 是否使用缓存(默认True)
  251. Returns:
  252. ColumnPlan 实例
  253. """
  254. # 尝试从缓存加载
  255. if use_cache:
  256. cached_plan = self._load_from_cache(main_topic)
  257. if cached_plan:
  258. print(f" 专栏标题: {cached_plan.column_title}")
  259. print(f" 话题数量: {cached_plan.get_topic_count()}")
  260. return cached_plan
  261. # 缓存未命中,调用 LLM 进行规划
  262. print(f"\n▸ PlanAndSolve Agent 开始规划专栏...")
  263. print(f" 使用模式: 任务分解 → 逐步执行")
  264. print(f" 主题: {main_topic}")
  265. # 更新 Executor 的主题(用于缓存key)
  266. if hasattr(self.agent.executor, 'main_topic'):
  267. self.agent.executor.main_topic = main_topic
  268. response = self.agent.run(main_topic)
  269. # 解析 JSON 响应
  270. plan_data = self._extract_json(response)
  271. plan = ColumnPlan.from_dict(plan_data)
  272. print(f"▸ 规划完成")
  273. print(f" 专栏标题: {plan.column_title}")
  274. print(f" 话题数量: {plan.get_topic_count()}")
  275. # 保存到缓存
  276. if use_cache:
  277. self._save_to_cache(main_topic, plan)
  278. return plan
  279. def _extract_json(self, response: str) -> Dict[str, Any]:
  280. """从响应中提取 JSON(使用统一的 JSONExtractor)"""
  281. try:
  282. return JSONExtractor.extract(
  283. response,
  284. required_fields=['column_title', 'topics']
  285. )
  286. except Exception as e:
  287. print(f"▸️ JSON 提取失败: {e}")
  288. print(f" 响应内容(前500字符): {response[:500]}...")
  289. raise
  290. class ReActAgentWrapper:
  291. """
  292. ReActAgent 包装器,用于捕获历史信息和处理错误
  293. """
  294. def __init__(self, agent: ReActAgent):
  295. self.agent = agent
  296. self.last_history = [] # 保存最后一次运行的历史
  297. self.last_response = None # run() 方法的返回值(通常是 final_answer)
  298. self.last_raw_responses = [] # 保存所有原始 LLM 响应,用于调试
  299. def run(self, question: str):
  300. """
  301. 运行 Agent 并捕获历史信息
  302. Args:
  303. question: 问题
  304. """
  305. try:
  306. # 清空上次的原始响应
  307. self.last_raw_responses = []
  308. # 尝试访问 agent 的 history 属性(如果存在)
  309. if hasattr(self.agent, 'current_history'):
  310. original_history = self.agent.current_history.copy() if self.agent.current_history else []
  311. elif hasattr(self.agent, 'history'):
  312. original_history = self.agent.history.copy() if self.agent.history else []
  313. else:
  314. original_history = []
  315. # 如果 agent 有 _parse_output 方法,保存原始方法并替换为改进版本
  316. original_parse = None
  317. original_invoke = None
  318. if hasattr(self.agent, '_parse_output'):
  319. original_parse = self.agent._parse_output
  320. # 使用统一的解析函数(包装为方法)
  321. def parse_wrapper(text):
  322. return parse_react_output(text)
  323. self.agent._parse_output = parse_wrapper
  324. # 拦截 LLM 调用以捕获原始响应
  325. if hasattr(self.agent, 'llm') and hasattr(self.agent.llm, 'invoke'):
  326. original_invoke = self.agent.llm.invoke
  327. def wrapped_invoke(messages, **kwargs):
  328. """包装 LLM invoke 方法以捕获原始响应"""
  329. response = original_invoke(messages, **kwargs)
  330. if response:
  331. self.last_raw_responses.append(response)
  332. return response
  333. self.agent.llm.invoke = wrapped_invoke
  334. try:
  335. response = self.agent.run(question)
  336. self.last_response = response
  337. # 尝试获取最终的历史信息
  338. if hasattr(self.agent, 'current_history'):
  339. self.last_history = self.agent.current_history.copy() if self.agent.current_history else []
  340. elif hasattr(self.agent, 'history'):
  341. self.last_history = self.agent.history.copy() if self.agent.history else []
  342. else:
  343. self.last_history = original_history
  344. return response
  345. finally:
  346. # 恢复原始方法
  347. if original_parse:
  348. self.agent._parse_output = original_parse
  349. if original_invoke and hasattr(self.agent, 'llm'):
  350. self.agent.llm.invoke = original_invoke
  351. except Exception as e:
  352. # 即使出错也尝试保存历史
  353. if hasattr(self.agent, 'current_history'):
  354. self.last_history = self.agent.current_history.copy() if self.agent.current_history else []
  355. elif hasattr(self.agent, 'history'):
  356. self.last_history = self.agent.history.copy() if self.agent.history else []
  357. print(f"▸️ ReActAgentWrapper 捕获到异常: {e}")
  358. raise
  359. class WriterAgent:
  360. """
  361. 写作 Agent - 使用 ReActAgent 模式
  362. ReActAgent 结合推理(Reasoning)和行动(Acting),非常适合需要工具调用的写作场景:
  363. 1. 分析写作需求(推理)
  364. 2. 决定是否需要搜索(推理)
  365. 3. 调用搜索工具(行动)
  366. 4. 整合信息写作(行动)
  367. """
  368. def __init__(self, enable_search: bool = True):
  369. """
  370. 初始化写作 Agent
  371. Args:
  372. enable_search: 是否启用搜索功能
  373. """
  374. self.llm = LLMService.get_llm()
  375. self.enable_search = enable_search
  376. # 创建工具注册表
  377. self.tool_registry = ToolRegistry()
  378. # 添加搜索工具(如果启用)
  379. if enable_search:
  380. self._setup_search_tool()
  381. # 自定义 ReAct 提示词(参考示例代码的简洁格式)
  382. react_prompt = get_react_writer_prompt() # 从 prompts.py 获取
  383. # 创建 ReActAgent(将在包装器中替换解析方法)
  384. react_agent = ReActAgent(
  385. name="内容创作专家",
  386. llm=self.llm,
  387. tool_registry=self.tool_registry,
  388. custom_prompt=react_prompt,
  389. max_steps=10 # 增加到 10 步,给 Agent 更多机会完成任务
  390. )
  391. self.agent = ReActAgentWrapper(react_agent)
  392. def _setup_search_tool(self):
  393. """设置搜索工具(使用 SearchTool 和 MCPTool)"""
  394. settings = get_settings()
  395. # 保存 search_tool 实例供 wrappers 使用
  396. self.search_tool = None
  397. # 1. 初始化内置 SearchTool
  398. try:
  399. # 检查是否配置了搜索 API
  400. if settings.tavily_api_key or settings.serpapi_api_key:
  401. self.search_tool = SearchTool(
  402. tavily_key=settings.tavily_api_key,
  403. serpapi_key=settings.serpapi_api_key
  404. )
  405. print("▸ SearchTool (内置) 已初始化")
  406. else:
  407. print("▸️ 未配置搜索 API Key (Tavily/SerpApi),跳过 SearchTool 初始化")
  408. except Exception as e:
  409. print(f"▸️ 初始化 SearchTool 失败: {e}")
  410. # 2. 注册 wrapper 函数 (如果 search_tool 可用)
  411. if self.search_tool:
  412. self._register_search_wrappers()
  413. # 3. 注册 GitHub MCPTool
  414. try:
  415. # 检查是否有 GitHub Token (通常在环境变量 GITHUB_PERSONAL_ACCESS_TOKEN)
  416. if os.environ.get("GITHUB_PERSONAL_ACCESS_TOKEN"):
  417. github_tool = MCPTool(
  418. name="github",
  419. description="GitHub 操作工具,支持搜索仓库、查看代码等",
  420. server_command=["npx", "-y", "@modelcontextprotocol/server-github"],
  421. auto_expand=True
  422. )
  423. self.tool_registry.register_tool(github_tool)
  424. print("▸ GitHub MCPTool 已注册")
  425. else:
  426. print("▸️ 未配置 GITHUB_PERSONAL_ACCESS_TOKEN,跳过 GitHub MCPTool 注册")
  427. except Exception as e:
  428. print(f"▸️ 注册 GitHub MCPTool 失败: {e}")
  429. def _register_search_wrappers(self):
  430. """注册适配 Prompt 的搜索函数 wrappers"""
  431. def web_search(query: str) -> str:
  432. """通用网页搜索,获取最新资讯和资料"""
  433. # SearchTool.run 接受 dict 参数
  434. return str(self.search_tool.run({"query": query}))
  435. def search_recent_info(topic: str) -> str:
  436. """搜索最新信息和动态"""
  437. return str(self.search_tool.run({"query": f"{topic} latest info"}))
  438. def search_code_examples(technology: str, task: str) -> str:
  439. """搜索代码示例和教程"""
  440. return str(self.search_tool.run({"query": f"{technology} {task} code examples tutorial"}))
  441. def verify_facts(statement: str) -> str:
  442. """验证事实准确性"""
  443. return str(self.search_tool.run({"query": f"verify fact: {statement}"}))
  444. self.tool_registry.register_function("web_search", "通用网页搜索,获取最新资讯和资料", web_search)
  445. self.tool_registry.register_function("search_recent_info", "搜索最新信息和动态", search_recent_info)
  446. self.tool_registry.register_function("search_code_examples", "搜索代码示例和教程", search_code_examples)
  447. self.tool_registry.register_function("verify_facts", "验证事实准确性", verify_facts)
  448. print("▸ 搜索函数 wrappers 已注册")
  449. def generate_content(
  450. self,
  451. node: ContentNode,
  452. context: Dict[str, Any],
  453. level: int,
  454. additional_requirements: str = ""
  455. ) -> Dict[str, Any]:
  456. """
  457. 生成内容(使用 ReAct 模式)
  458. Args:
  459. node: 当前节点
  460. context: 写作上下文
  461. level: 当前层级
  462. additional_requirements: 额外要求
  463. Returns:
  464. 生成的内容数据
  465. """
  466. structure_requirements = get_structure_requirements(level)
  467. word_count = get_word_count(level)
  468. # 构建写作任务描述(简化格式,参考示例代码)
  469. task_description = f"""
  470. 请撰写一篇技术专栏文章。
  471. 层级: Level {level}/3
  472. 话题: {node.title}
  473. 描述: {node.description}
  474. 要求字数: {word_count} 字(允许误差±10%)
  475. 上下文信息:
  476. {json.dumps(context, ensure_ascii=False, indent=2)}
  477. 结构要求:
  478. {structure_requirements}
  479. 额外要求:
  480. {additional_requirements if additional_requirements else "无"}
  481. 重要提示:
  482. - 完成写作后,必须使用 `\n\nFinish[JSON内容]` 格式输出结果
  483. - JSON 中的 `level` 字段必须是 {level}
  484. - `content` 字段必须包含完整的文章正文(Markdown格式)
  485. - 文章必须包含:引言、主体内容(3-5个小节)、实践案例、总结
  486. """
  487. try:
  488. response = self.agent.run(task_description)
  489. # 调试:打印真正的原始 LLM 响应(最后一次的响应)
  490. print(f"\n{'='*70}")
  491. print("▸ ReActAgent 原始 LLM 响应:")
  492. print(f"{'='*70}")
  493. if self.agent.last_raw_responses:
  494. # 打印最后一次的原始响应(通常是包含 Finish[...] 的那次)
  495. last_raw = self.agent.last_raw_responses[-1]
  496. print(last_raw)
  497. # print(last_raw[:2000] if len(last_raw) > 2000 else last_raw)
  498. # if len(last_raw) > 2000:
  499. # print(f"\n... (响应过长,已截断,总长度: {len(last_raw)} 字符)")
  500. else:
  501. print("▸️ 未捕获到原始响应")
  502. print(f"{'='*70}\n")
  503. # 打印 run() 方法的返回值(通常是 final_answer)
  504. print(f"▸ ReActAgent.run() 返回值:")
  505. print(f" {response[:500] if response and len(response) > 500 else response}")
  506. print()
  507. # 检查响应是否有效
  508. # 注意:即使 response 为空或错误,也要检查是否有原始响应可以提取
  509. if not response or (isinstance(response, str) and not response.strip()):
  510. print("▸️ ReActAgent 返回了空响应或空白响应")
  511. print(f" 已收集的历史信息: {len(self.agent.last_history)} 条")
  512. # 尝试从最后一次原始响应中提取内容
  513. if self.agent.last_raw_responses:
  514. last_raw = self.agent.last_raw_responses[-1]
  515. print(f" 尝试从最后一次原始响应中提取内容(长度: {len(last_raw)} 字符)...")
  516. # 尝试直接提取 JSON
  517. try:
  518. content_data = self._extract_json(last_raw)
  519. # 验证提取的 JSON 是否包含必需的字段
  520. if not isinstance(content_data, dict):
  521. raise ValueError("提取的内容不是字典格式")
  522. if 'content' not in content_data:
  523. print(f" ▸️ 提取的 JSON 缺少 'content' 字段")
  524. print(f" 可用字段: {list(content_data.keys())}")
  525. raise ValueError("提取的 JSON 缺少 'content' 字段")
  526. print("▸ 成功从原始响应中提取到内容")
  527. return content_data
  528. except Exception as e:
  529. print(f" ▸️ 从原始响应提取失败: {e}")
  530. # 如果提取失败,使用 fallback
  531. return self._generate_content_with_history(
  532. node, context, level, structure_requirements, word_count,
  533. self.agent.last_history, task_description
  534. )
  535. # 检查是否是错误消息
  536. if "无法在限定步数内完成" in response or "抱歉" in response or "流程终止" in response:
  537. print("▸️ ReActAgent 达到最大步数限制或无法完成任务")
  538. print(f" 已收集的历史信息: {len(self.agent.last_history)} 条")
  539. # 即使返回错误消息,也尝试从最后一次原始响应中提取内容
  540. if self.agent.last_raw_responses:
  541. last_raw = self.agent.last_raw_responses[-1]
  542. print(f" 尝试从最后一次原始响应中提取内容(长度: {len(last_raw)} 字符)...")
  543. try:
  544. content_data = self._extract_json(last_raw)
  545. # 验证提取的 JSON 是否包含必需的字段
  546. if not isinstance(content_data, dict):
  547. raise ValueError("提取的内容不是字典格式")
  548. if 'content' not in content_data:
  549. print(f" ▸️ 提取的 JSON 缺少 'content' 字段")
  550. print(f" 可用字段: {list(content_data.keys())}")
  551. raise ValueError("提取的 JSON 缺少 'content' 字段")
  552. print("▸ 成功从原始响应中提取到内容(尽管 ReActAgent 返回了错误消息)")
  553. return content_data
  554. except Exception as e:
  555. print(f" ▸️ 从原始响应提取失败: {e}")
  556. # 如果提取失败,基于历史信息生成内容
  557. return self._generate_content_with_history(
  558. node, context, level, structure_requirements, word_count,
  559. self.agent.last_history, task_description
  560. )
  561. # 如果 response 是 "JSON内容" 这样的占位符,从原始响应中提取
  562. if response.strip() in ["JSON内容", "JSON", "内容"]:
  563. print(f"▸️ ReActAgent 返回了占位符 '{response}',尝试从原始响应中提取...")
  564. if self.agent.last_raw_responses:
  565. last_raw = self.agent.last_raw_responses[-1]
  566. print(f" 从最后一次原始响应中提取(长度: {len(last_raw)} 字符)...")
  567. try:
  568. content_data = self._extract_json(last_raw)
  569. if isinstance(content_data, dict) and 'content' in content_data:
  570. print("▸ 成功从原始响应中提取到内容")
  571. return content_data
  572. except Exception as e:
  573. print(f" ▸️ 从原始响应提取失败: {e}")
  574. content_data = self._extract_json(response)
  575. # 验证提取的 JSON 是否包含必需的字段
  576. if not isinstance(content_data, dict):
  577. raise ValueError(f"提取的内容不是字典格式: {type(content_data)}")
  578. if 'content' not in content_data:
  579. print(f"▸️ 提取的 JSON 缺少 'content' 字段")
  580. print(f" 可用字段: {list(content_data.keys())}")
  581. print(f" 响应内容(前500字符): {response[:500]}")
  582. # 如果从 response 提取失败,尝试从原始响应中提取
  583. if self.agent.last_raw_responses:
  584. last_raw = self.agent.last_raw_responses[-1]
  585. print(f" 尝试从最后一次原始响应中提取(长度: {len(last_raw)} 字符)...")
  586. try:
  587. content_data = self._extract_json(last_raw)
  588. if isinstance(content_data, dict) and 'content' in content_data:
  589. print("▸ 成功从原始响应中提取到内容")
  590. return content_data
  591. except Exception as e:
  592. print(f" ▸️ 从原始响应提取失败: {e}")
  593. raise ValueError("提取的 JSON 缺少 'content' 字段")
  594. return content_data
  595. except Exception as e:
  596. print(f"▸️ ReActAgent 执行失败: {e}")
  597. import traceback
  598. traceback.print_exc()
  599. print(f" 已收集的历史信息: {len(self.agent.last_history)} 条")
  600. print(" 尝试基于历史信息生成内容...")
  601. return self._generate_content_with_history(
  602. node, context, level, structure_requirements, word_count,
  603. self.agent.last_history, task_description
  604. )
  605. def _generate_content_with_history(
  606. self,
  607. node: ContentNode,
  608. context: Dict[str, Any],
  609. level: int,
  610. structure_requirements: str,
  611. word_count: int,
  612. history: List[str],
  613. original_task: str
  614. ) -> Dict[str, Any]:
  615. """
  616. 当 ReActAgent 失败时,基于历史信息使用 SimpleAgent 生成内容
  617. Args:
  618. history: ReActAgent 收集的历史信息(Thought、Action、Observation)
  619. """
  620. from hello_agents import SimpleAgent
  621. fallback_agent = SimpleAgent(
  622. name="内容创作专家(备用)",
  623. llm=self.llm,
  624. system_prompt="你是一位专业的内容创作者,擅长撰写技术专栏文章。"
  625. )
  626. # 构建包含历史信息的任务描述
  627. history_summary = ""
  628. if history:
  629. history_summary = "\n\n## 已撰写的部分历史:\n"
  630. for i, item in enumerate(history[-10:], 1): # 只取最后10条历史
  631. history_summary += f"{i}. {item}\n"
  632. history_summary += "\n请基于以上信息继续完成写作任务。\n"
  633. task = f"""
  634. 请撰写一篇技术专栏文章。
  635. 话题: {node.title}
  636. 描述: {node.description}
  637. 要求字数: {word_count} 字
  638. 结构要求:
  639. {structure_requirements}
  640. {history_summary}
  641. 请直接输出 JSON 格式的内容:
  642. {{
  643. "title": "{node.title}",
  644. "level": {level},
  645. "content": "完整的文章正文(markdown格式,包含引言、主体、案例、总结)",
  646. "word_count": 实际字数,
  647. "needs_expansion": false,
  648. "subsections": [],
  649. "metadata": {{}}
  650. }}
  651. """
  652. print(f"▸ 使用 SimpleAgent 基于历史信息生成内容...")
  653. response = fallback_agent.run(task)
  654. return self._extract_json(response)
  655. def revise_content(
  656. self,
  657. original_content: str,
  658. review_result: ReviewResult,
  659. level: int
  660. ) -> Dict[str, Any]:
  661. """
  662. 根据评审意见修改内容
  663. Args:
  664. original_content: 原始内容
  665. review_result: 评审结果
  666. level: 层级
  667. Returns:
  668. 修改后的内容数据
  669. """
  670. # 构建修改任务
  671. task_description = f"""
  672. ## 修改任务
  673. **原始内容**:
  674. {original_content[:500]}...
  675. **评审分数**: {review_result.score}/100
  676. **评审等级**: {review_result.grade}
  677. **主要问题**:
  678. {json.dumps(review_result.detailed_feedback.get('issues', [])[:3], ensure_ascii=False, indent=2)}
  679. **修改建议**:
  680. {json.dumps(review_result.revision_plan.get('priority_changes', []), ensure_ascii=False, indent=2)}
  681. 请使用 ReAct 模式完成修改:
  682. 1. 思考评审意见的核心要求
  683. 2. 决定是否需要搜索新信息
  684. 3. 修改内容
  685. 4. 使用 Finish[修改后的JSON内容] 输出结果
  686. """
  687. response = self.agent.run(task_description)
  688. revised_data = self._extract_json(response)
  689. return revised_data
  690. def _extract_json(self, response: str) -> Dict[str, Any]:
  691. """从响应中提取 JSON(使用统一的 JSONExtractor)"""
  692. try:
  693. return JSONExtractor.extract(
  694. response,
  695. required_fields=['content'],
  696. fallback_fields={
  697. 'subsections': [],
  698. 'metadata': {},
  699. 'needs_expansion': False
  700. }
  701. )
  702. except Exception as e:
  703. print(f"▸️ 提取 JSON 时发生错误: {e}")
  704. print(f" 响应内容(前1000字符): {response[:1000]}")
  705. raise
  706. class ReviewerAgent:
  707. """
  708. 评审 Agent - 使用 SimpleAgent 模式
  709. 负责对生成的内容进行质量评审,提供详细的评分和修改建议
  710. """
  711. def __init__(self):
  712. from hello_agents import SimpleAgent
  713. from prompts import get_reviewer_prompt
  714. self.llm = LLMService.get_llm()
  715. self.reviewer_prompt = get_reviewer_prompt()
  716. self.agent = SimpleAgent(
  717. name="内容评审专家",
  718. llm=self.llm,
  719. system_prompt="你是一位严格而专业的内容评审专家,擅长评估文章质量并提供建设性的修改意见。"
  720. )
  721. def review_content(
  722. self,
  723. content: str,
  724. level: int,
  725. target_word_count: int,
  726. key_points: List[str]
  727. ) -> 'ReviewResult':
  728. """
  729. 评审内容
  730. Args:
  731. content: 待评审的内容
  732. level: 内容层级
  733. target_word_count: 目标字数
  734. key_points: 关键要点
  735. Returns:
  736. ReviewResult 实例
  737. """
  738. print(f"\n▸ ReviewerAgent 开始评审内容...")
  739. print(f" 内容长度: {len(content)} 字符")
  740. print(f" 目标字数: {target_word_count}")
  741. # 构建评审任务
  742. task = self.reviewer_prompt.format(
  743. level=level,
  744. target_word_count=target_word_count,
  745. key_points=json.dumps(key_points, ensure_ascii=False),
  746. content=content
  747. )
  748. response = self.agent.run(task)
  749. review_data = self._extract_json(response)
  750. # 创建 ReviewResult 实例
  751. result = ReviewResult.from_dict(review_data)
  752. print(f"▸ 评审完成")
  753. print(f" 评分: {result.score}/100 ({result.grade})")
  754. print(f" 需要修改: {'是' if result.needs_revision else '否'}")
  755. return result
  756. def _extract_json(self, response: str) -> Dict[str, Any]:
  757. """从响应中提取 JSON"""
  758. try:
  759. return JSONExtractor.extract(
  760. response,
  761. required_fields=['score', 'grade'],
  762. fallback_fields={
  763. 'dimension_scores': {},
  764. 'detailed_feedback': {'strengths': [], 'issues': []},
  765. 'revision_plan': {'priority_changes': [], 'minor_improvements': []},
  766. 'needs_revision': True,
  767. 'estimated_revision_effort': '',
  768. 'reviewer_notes': ''
  769. }
  770. )
  771. except Exception as e:
  772. print(f"▸️ 评审结果解析失败: {e}")
  773. # 返回默认的评审结果(需要修改)
  774. return {
  775. 'score': 60,
  776. 'grade': '需改进',
  777. 'dimension_scores': {},
  778. 'detailed_feedback': {'strengths': [], 'issues': [{'problem': '评审结果解析失败'}]},
  779. 'revision_plan': {'priority_changes': [], 'minor_improvements': []},
  780. 'needs_revision': True,
  781. 'estimated_revision_effort': '未知',
  782. 'reviewer_notes': f'评审结果解析失败: {str(e)}'
  783. }
  784. class RevisionAgent:
  785. """
  786. 修改 Agent - 使用 SimpleAgent 模式
  787. 根据评审意见修改内容
  788. """
  789. def __init__(self):
  790. from hello_agents import SimpleAgent
  791. from prompts import get_revision_prompt
  792. self.llm = LLMService.get_llm()
  793. self.revision_prompt = get_revision_prompt()
  794. self.agent = SimpleAgent(
  795. name="内容修改专家",
  796. llm=self.llm,
  797. system_prompt="你是一位专业的内容创作者,擅长根据评审意见修改和优化文章。"
  798. )
  799. def revise_content(
  800. self,
  801. original_content: str,
  802. review_result: 'ReviewResult',
  803. target_word_count: int
  804. ) -> Dict[str, Any]:
  805. """
  806. 根据评审意见修改内容
  807. Args:
  808. original_content: 原始内容
  809. review_result: 评审结果
  810. target_word_count: 目标字数
  811. Returns:
  812. 修改后的内容数据
  813. """
  814. print(f"\n▸ RevisionAgent 开始修改内容...")
  815. print(f" 原始评分: {review_result.score}/100")
  816. current_word_count = len(original_content)
  817. word_count_min = int(target_word_count * 0.9)
  818. word_count_max = int(target_word_count * 1.1)
  819. # 计算字数调整建议
  820. if current_word_count < word_count_min:
  821. word_count_adjustment = f"需要增加约 {word_count_min - current_word_count} 字"
  822. elif current_word_count > word_count_max:
  823. word_count_adjustment = f"需要删减约 {current_word_count - word_count_max} 字"
  824. else:
  825. word_count_adjustment = "字数在合理范围内"
  826. # 格式化评审信息
  827. strengths = "\n".join([f"- {s}" for s in review_result.detailed_feedback.get('strengths', [])])
  828. issues = "\n".join([
  829. f"- [{issue.get('category', '未知')}] {issue.get('problem', '')}: {issue.get('suggestion', '')}"
  830. for issue in review_result.detailed_feedback.get('issues', [])
  831. ])
  832. priority_changes = "\n".join([
  833. f"- **{change.get('section', '')}**: {change.get('action', '')} - {change.get('detail', '')}"
  834. for change in review_result.revision_plan.get('priority_changes', [])
  835. ])
  836. minor_improvements = "\n".join([
  837. f"- {imp.get('section', '')}: {imp.get('detail', '')}"
  838. for imp in review_result.revision_plan.get('minor_improvements', [])
  839. ])
  840. # 构建修改任务
  841. task = self.revision_prompt.format(
  842. original_content=original_content,
  843. score=review_result.score,
  844. grade=review_result.grade,
  845. strengths=strengths or "无",
  846. issues=issues or "无",
  847. reviewer_notes=review_result.reviewer_notes or "无",
  848. priority_changes=priority_changes or "无",
  849. minor_improvements=minor_improvements or "无",
  850. word_count_range=f"{word_count_min}-{word_count_max}",
  851. current_word_count=current_word_count,
  852. word_count_adjustment=word_count_adjustment
  853. )
  854. response = self.agent.run(task)
  855. revised_data = self._extract_json(response)
  856. print(f"▸ 修改完成")
  857. print(f" 修改后字数: {revised_data.get('word_count', len(revised_data.get('revised_content', '')))}")
  858. return revised_data
  859. def _extract_json(self, response: str) -> Dict[str, Any]:
  860. """从响应中提取 JSON"""
  861. try:
  862. data = JSONExtractor.extract(
  863. response,
  864. required_fields=['revised_content'],
  865. fallback_fields={
  866. 'revision_summary': {'major_changes': [], 'minor_changes': [], 'preserved_strengths': []},
  867. 'word_count': 0,
  868. 'word_count_change': ''
  869. }
  870. )
  871. # 如果没有 word_count,计算一下
  872. if not data.get('word_count'):
  873. data['word_count'] = len(data.get('revised_content', ''))
  874. return data
  875. except Exception as e:
  876. print(f"▸️ 修改结果解析失败: {e}")
  877. raise
  878. class ReflectionWriterAgent:
  879. """
  880. 反思写作 Agent - 使用 ReflectionAgent 模式
  881. ReflectionAgent 通过自我反思和迭代优化来改进输出,将评审和修改整合为一个 Agent:
  882. 1. 生成初稿
  883. 2. 自我评审(反思)
  884. 3. 根据反思修改(优化)
  885. 4. 达到质量标准
  886. """
  887. def __init__(self):
  888. self.llm = LLMService.get_llm()
  889. # 自定义 Reflection 提示词
  890. reflection_prompts = {
  891. "initial": """
  892. 你是一位专业的内容创作者。请撰写以下内容的初稿:
  893. {task}
  894. 请输出完整的 JSON 格式内容。
  895. """,
  896. "reflect": """
  897. 你是一位严格的内容评审专家。请评审以下内容:
  898. # 写作任务: {task}
  899. # 内容初稿: {content}
  900. 请从以下维度评审:
  901. 1. **内容质量** (40分): 准确性、完整性、深度、原创性
  902. 2. **结构逻辑** (30分): 层次清晰、逻辑连贯、过渡自然
  903. 3. **语言表达** (20分): 易读性、专业性、准确性
  904. 4. **格式规范** (10分): 字数达标、格式正确、排版美观
  905. 如果内容质量很好(85分以上),请回答"无需改进"。
  906. 否则,请详细指出问题并提供具体的修改建议。
  907. """,
  908. "refine": """
  909. 请根据评审意见优化你的内容:
  910. # 原始任务: {task}
  911. # 当前内容: {last_attempt}
  912. # 评审意见: {feedback}
  913. 请输出优化后的完整 JSON 格式内容。
  914. """
  915. }
  916. self.agent = ReflectionAgent(
  917. name="反思写作专家",
  918. llm=self.llm,
  919. custom_prompts=reflection_prompts,
  920. max_iterations=2 # 最多反思 2 次
  921. )
  922. def generate_and_refine_content(
  923. self,
  924. node: ContentNode,
  925. context: Dict[str, Any],
  926. level: int
  927. ) -> Dict[str, Any]:
  928. """
  929. 生成并反思优化内容
  930. Args:
  931. node: 当前节点
  932. context: 写作上下文
  933. level: 当前层级
  934. Returns:
  935. 优化后的内容数据
  936. """
  937. print(f"\n▸ ReflectionAgent 开始写作并自我反思...")
  938. print(f" 使用模式: 初稿 → 自我评审 → 优化")
  939. structure_requirements = get_structure_requirements(level)
  940. word_count = get_word_count(level)
  941. task_description = f"""
  942. ## 写作任务
  943. **层级**: Level {level}/3
  944. **话题**: {node.title}
  945. **描述**: {node.description}
  946. **要求字数**: {word_count} 字(允许误差±10%)
  947. **结构要求**:
  948. {structure_requirements}
  949. **上下文**:
  950. {json.dumps(context, ensure_ascii=False, indent=2)}
  951. 请输出完整的 JSON 格式内容:
  952. ```json
  953. {{
  954. "title": "章节标题",
  955. "level": {level},
  956. "content": "正文内容(markdown格式)",
  957. "word_count": 实际字数,
  958. "needs_expansion": true/false,
  959. "subsections": [...],
  960. "metadata": {{...}}
  961. }}
  962. ```
  963. """
  964. response = self.agent.run(task_description)
  965. content_data = self._extract_json(response)
  966. print(f"▸ ReflectionAgent 完成反思优化")
  967. return content_data
  968. def _extract_json(self, response: str) -> Dict[str, Any]:
  969. """从响应中提取 JSON(使用统一的 JSONExtractor)"""
  970. try:
  971. return JSONExtractor.extract(
  972. response,
  973. required_fields=['content'],
  974. fallback_fields={
  975. 'subsections': [],
  976. 'metadata': {},
  977. 'needs_expansion': False
  978. }
  979. )
  980. except Exception as e:
  981. print(f"▸️ JSON 解析失败: {e}")
  982. raise