summary_manager.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. # core/summary_manager.py
  2. """摘要更新管理器 - 混合策略:<5个文件完全重写,≥5个增量更新"""
  3. from pathlib import Path
  4. from typing import List
  5. from hello_agents import HelloAgentsLLM
  6. from config import Config
  7. class SummaryManager:
  8. """
  9. 管理知识摘要和会话摘要的更新
  10. 使用混合策略:
  11. - 文件数 < 5:完全重写摘要
  12. - 文件数 ≥ 5:增量更新摘要
  13. Attributes:
  14. fm: FileManager 实例
  15. llm: HelloAgentsLLM 实例
  16. """
  17. def __init__(self, file_manager):
  18. """
  19. 初始化摘要管理器
  20. Args:
  21. file_manager: FileManager 实例
  22. """
  23. self.fm = file_manager
  24. self.llm = HelloAgentsLLM()
  25. def update_knowledge_summary(self, domain: str, new_file: str) -> None:
  26. """
  27. 更新 knowledge_summary.md
  28. Args:
  29. domain: 领域名称
  30. new_file: 新添加的文件名
  31. """
  32. domain_path = self.fm.BASE_DIR / domain
  33. knowledge_dir = domain_path / "knowledge"
  34. summary_path = knowledge_dir / "knowledge_summary.md"
  35. # 统计文件数(排除 summary.md)
  36. existing_files: List[Path] = list(knowledge_dir.glob("*.md"))
  37. file_count = len(
  38. [f for f in existing_files if f.name != "knowledge_summary.md"]
  39. )
  40. if file_count < Config.SUMMARY_FULL_REWRITE_THRESHOLD:
  41. self._full_rewrite_knowledge_summary(domain, knowledge_dir, summary_path)
  42. else:
  43. self._incremental_update_knowledge_summary(domain, new_file, summary_path)
  44. def _full_rewrite_knowledge_summary(
  45. self, domain: str, knowledge_dir: Path, summary_path: Path
  46. ) -> None:
  47. """
  48. 完全重写知识摘要
  49. Args:
  50. domain: 领域名称
  51. knowledge_dir: 知识目录
  52. summary_path: 摘要文件路径
  53. """
  54. # 读取所有知识文件
  55. all_files: List[Path] = [
  56. f for f in knowledge_dir.glob("*.md") if f.name != "knowledge_summary.md"
  57. ]
  58. all_content = []
  59. for file in all_files:
  60. content = file.read_text(encoding="utf-8")
  61. all_content.append(f"## {file.stem}\n{content}\n")
  62. # 让 LLM 生成压缩摘要
  63. user_prompt = f"""以下是 {domain} 领域的所有知识笔记,请生成一个结构化的总结摘要:
  64. {''.join(all_content)}
  65. 要求:
  66. 1. 按主题分类组织
  67. 2. 提取核心概念和关键知识点
  68. 3. 保持结构化(markdown格式)
  69. 4. 控制在原来内容的20%长度
  70. """
  71. messages = [
  72. {
  73. "role": "system",
  74. "content": "你是一个知识总结助手,擅长提取核心概念并生成结构化摘要。",
  75. },
  76. {"role": "user", "content": user_prompt},
  77. ]
  78. try:
  79. summary = self.llm.invoke(messages)
  80. summary_path.write_text(summary, encoding="utf-8")
  81. except Exception:
  82. # 如果 LLM 调用失败,使用简单的合并
  83. fallback_summary = f"# {domain} 知识总结\n\n" + "\n".join(all_content)
  84. summary_path.write_text(fallback_summary, encoding="utf-8")
  85. def _incremental_update_knowledge_summary(
  86. self, domain: str, new_file: str, summary_path: Path
  87. ) -> None:
  88. """
  89. 增量更新知识摘要
  90. Args:
  91. domain: 领域名称
  92. new_file: 新文件名
  93. summary_path: 摘要文件路径
  94. """
  95. # 读取当前摘要和新文件
  96. current_summary = summary_path.read_text(encoding="utf-8")
  97. new_content = (self.fm.BASE_DIR / domain / "knowledge" / new_file).read_text(
  98. encoding="utf-8"
  99. )
  100. # 让 LLM 合并
  101. user_prompt = f"""当前摘要:
  102. {current_summary}
  103. 新增内容:
  104. {new_content}
  105. 请将新增内容整合到摘要中,保持结构化和简洁性。
  106. """
  107. messages = [
  108. {
  109. "role": "system",
  110. "content": "你是一个知识总结助手,擅长整合新内容到现有摘要中。",
  111. },
  112. {"role": "user", "content": user_prompt},
  113. ]
  114. try:
  115. updated_summary = self.llm.invoke(messages)
  116. summary_path.write_text(updated_summary, encoding="utf-8")
  117. except Exception:
  118. # 如果 LLM 调用失败,使用简单追加
  119. updated_summary = (
  120. current_summary + f"\n\n## {Path(new_file).stem}\n{new_content}"
  121. )
  122. summary_path.write_text(updated_summary, encoding="utf-8")
  123. def update_session_summary(self, domain: str, new_session_content: str) -> None:
  124. """
  125. 更新 session_summary.md
  126. Args:
  127. domain: 领域名称
  128. new_session_content: 新会话内容
  129. """
  130. domain_path = self.fm.BASE_DIR / domain
  131. sessions_dir = domain_path / "sessions"
  132. summary_path = sessions_dir / "session_summary.md"
  133. # 统计文件数
  134. existing_files: List[Path] = list(sessions_dir.glob("session_*.md"))
  135. file_count = len(
  136. [f for f in existing_files if not f.name.startswith("session_summary")]
  137. )
  138. if file_count < Config.SUMMARY_FULL_REWRITE_THRESHOLD:
  139. self._full_rewrite_session_summary(domain, sessions_dir, summary_path)
  140. else:
  141. self._incremental_update_session_summary(new_session_content, summary_path)
  142. def _full_rewrite_session_summary(
  143. self, domain: str, sessions_dir: Path, summary_path: Path
  144. ) -> None:
  145. """
  146. 完全重写会话摘要
  147. """
  148. all_sessions: List[Path] = [
  149. f
  150. for f in sessions_dir.glob("session_*.md")
  151. if not f.name.startswith("session_summary")
  152. ]
  153. all_content = []
  154. for file in all_sessions:
  155. content = file.read_text(encoding="utf-8")
  156. all_content.append(f"## {file.stem}\n{content}\n")
  157. user_prompt = f"""以下是 {domain} 领域的所有学习会话记录,请生成一个压缩的总结:
  158. {''.join(all_content)}
  159. 要求:
  160. 1. 提取关键学习点
  161. 2. 记录进步轨迹
  162. 3. 识别需要复习的内容
  163. 4. 控制在原来内容的30%长度
  164. """
  165. messages = [
  166. {
  167. "role": "system",
  168. "content": "你是一个学习历程总结助手,擅长提取关键学习点和进步轨迹。",
  169. },
  170. {"role": "user", "content": user_prompt},
  171. ]
  172. try:
  173. summary = self.llm.invoke(messages)
  174. summary_path.write_text(summary, encoding="utf-8")
  175. except Exception:
  176. fallback_summary = f"# {domain} 学习历程\n\n" + "\n".join(all_content)
  177. summary_path.write_text(fallback_summary, encoding="utf-8")
  178. def _incremental_update_session_summary(
  179. self, new_session_content: str, summary_path: Path
  180. ) -> None:
  181. """
  182. 增量更新会话摘要
  183. """
  184. current_summary = summary_path.read_text(encoding="utf-8")
  185. user_prompt = f"""当前总结:
  186. {current_summary}
  187. 新会话记录:
  188. {new_session_content}
  189. 请将新会话整合到总结中。
  190. """
  191. messages = [
  192. {
  193. "role": "system",
  194. "content": "你是一个学习历程总结助手,擅长整合新的学习会话到总结中。",
  195. },
  196. {"role": "user", "content": user_prompt},
  197. ]
  198. try:
  199. updated_summary = self.llm.invoke(messages)
  200. summary_path.write_text(updated_summary, encoding="utf-8")
  201. except Exception:
  202. updated_summary = current_summary + f"\n\n{new_session_content}"
  203. summary_path.write_text(updated_summary, encoding="utf-8")