debate_orchestrator.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. """多角色历史辩论编排:观点碰撞 → 终局综合(最可能事实 / 可疑点 / 阴谋论辨析)。"""
  2. from __future__ import annotations
  3. import json
  4. from collections.abc import Iterator
  5. from dataclasses import dataclass
  6. from typing import Any
  7. from hello_agents import HelloAgentsLLM
  8. from .config import create_llm
  9. from .debate_prompts import (
  10. EVIDENCE_PREAMBLE,
  11. SUMMARIZER_FOR_ROUND2,
  12. SYSTEM_FOREIGN,
  13. SYSTEM_OFFICIAL,
  14. SYSTEM_POLITICAL,
  15. SYSTEM_SUSPICION,
  16. SYSTEM_SYNTHESIZER,
  17. SYSTEM_UNOFFICIAL,
  18. USER_ROUND1_TEMPLATE,
  19. USER_ROUND2_TEMPLATE,
  20. USER_SYNTHESIZER_TEMPLATE,
  21. )
  22. from .evidence_bundle import build_evidence_bundle
  23. @dataclass(frozen=True)
  24. class RoleSpec:
  25. key: str
  26. display_name: str
  27. system_prompt: str
  28. ROLES: tuple[RoleSpec, ...] = (
  29. RoleSpec("official", "官修史书与王朝叙事", SYSTEM_OFFICIAL),
  30. RoleSpec("unofficial", "野史与边缘叙事", SYSTEM_UNOFFICIAL),
  31. RoleSpec("political", "政治语境与权力结构", SYSTEM_POLITICAL),
  32. RoleSpec("foreign", "域外与他者视角", SYSTEM_FOREIGN),
  33. RoleSpec("suspicion", "蹊跷与阴谋论辨析", SYSTEM_SUSPICION),
  34. )
  35. # 进度:议题 + 附录 + 五角色第一轮 + 秘书 + 五角色第二轮 + 终局(step 0..14 → 共 15 段)
  36. TOTAL_STEPS = 15
  37. def _excerpt(text: str, limit: int = 380) -> str:
  38. text = (text or "").strip()
  39. if len(text) <= limit:
  40. return text
  41. return text[:limit] + "…"
  42. def _invoke(llm: HelloAgentsLLM, system: str, user: str, *, temperature: float) -> str:
  43. messages = [
  44. {"role": "system", "content": system},
  45. {"role": "user", "content": user},
  46. ]
  47. return (llm.invoke(messages, temperature=temperature) or "").strip()
  48. def _summarize_round1_for_context(llm: HelloAgentsLLM, round1: dict[str, str]) -> str:
  49. body = "\n\n".join(f"### {r.display_name}\n{round1[r.key]}" for r in ROLES)
  50. return _invoke(
  51. llm,
  52. SUMMARIZER_FOR_ROUND2,
  53. body,
  54. temperature=0.15,
  55. )
  56. def _yield_progress(step: int, message: str, **extra: Any) -> dict[str, Any]:
  57. return {
  58. "event": "progress",
  59. "step": step,
  60. "total": TOTAL_STEPS,
  61. "message": message,
  62. **extra,
  63. }
  64. def iter_debate_events(
  65. topic: str,
  66. *,
  67. llm: HelloAgentsLLM | None = None,
  68. use_evidence_bundle: bool = True,
  69. debate_temperature: float = 0.72,
  70. synthesizer_temperature: float = 0.22,
  71. llm_api_key: str | None = None,
  72. llm_base_url: str | None = None,
  73. llm_model: str | None = None,
  74. llm_max_tokens: int | None = 4096,
  75. llm_timeout: int | None = None,
  76. ) -> Iterator[dict[str, Any]]:
  77. """
  78. 逐步产出辩论过程事件,供 SSE / 日志展示。
  79. 事件类型
  80. --------
  81. - progress: step, total, message
  82. - round1_start / round1_end: role, content(end)
  83. - digest_start / digest_end: content(end)
  84. - round2_start / round2_end: role, content(end)
  85. - synthesis_start / synthesis_end: content(end)
  86. - complete: markdown(全文)
  87. """
  88. topic = (topic or "").strip()
  89. if not topic:
  90. raise ValueError("议题不能为空")
  91. if llm is None:
  92. llm = create_llm(
  93. api_key=llm_api_key,
  94. base_url=llm_base_url,
  95. model=llm_model,
  96. max_tokens=llm_max_tokens,
  97. timeout=llm_timeout,
  98. temperature=0.4,
  99. )
  100. step = 0
  101. yield _yield_progress(step, f"议题已接收:{topic[:80]}{'…' if len(topic) > 80 else ''}")
  102. step += 1
  103. evidence_block = ""
  104. if use_evidence_bundle:
  105. yield _yield_progress(step, "正在抓取维基与 DuckDuckGo 考据附录(可能需几十秒)…")
  106. evidence_block = EVIDENCE_PREAMBLE + "\n\n" + build_evidence_bundle(topic)
  107. yield {
  108. "event": "evidence_done",
  109. "step": step,
  110. "total": TOTAL_STEPS,
  111. "chars": len(evidence_block),
  112. "preview": evidence_block[:600] + ("…" if len(evidence_block) > 600 else ""),
  113. }
  114. else:
  115. yield _yield_progress(step, "已跳过网络附录,将仅依赖模型知识。")
  116. evidence_block = "(未启用网络附录;请完全依赖你的训练知识与逻辑。)"
  117. step += 1
  118. lines: list[str] = [
  119. "# 多角色历史辩论记录\n",
  120. f"## 议题\n{topic}\n",
  121. ]
  122. round1: dict[str, str] = {}
  123. for role in ROLES:
  124. yield {
  125. "event": "round1_start",
  126. "step": step,
  127. "total": TOTAL_STEPS,
  128. "role": role.display_name,
  129. "message": f"第一轮 · {role.display_name}:正在调用模型…",
  130. }
  131. user_msg = USER_ROUND1_TEMPLATE.format(topic=topic, evidence_block=evidence_block)
  132. out = _invoke(llm, role.system_prompt, user_msg, temperature=debate_temperature)
  133. round1[role.key] = out
  134. md_chunk = f"### 第一轮 · {role.display_name}\n\n{out}\n"
  135. lines.append(md_chunk)
  136. yield {
  137. "event": "round1_end",
  138. "step": step,
  139. "total": TOTAL_STEPS,
  140. "role": role.display_name,
  141. "content": out,
  142. "markdown_section": md_chunk,
  143. }
  144. step += 1
  145. yield {
  146. "event": "digest_start",
  147. "step": step,
  148. "total": TOTAL_STEPS,
  149. "message": "秘书:正在压缩第一轮五角色发言…",
  150. }
  151. digest = _summarize_round1_for_context(llm, round1)
  152. digest_md = f"### 秘书摘要(供第二轮引用)\n\n{digest}\n"
  153. lines.append(digest_md)
  154. yield {
  155. "event": "digest_end",
  156. "step": step,
  157. "total": TOTAL_STEPS,
  158. "content": digest,
  159. "markdown_section": digest_md,
  160. }
  161. step += 1
  162. round2: dict[str, str] = {}
  163. for role in ROLES:
  164. yield {
  165. "event": "round2_start",
  166. "step": step,
  167. "total": TOTAL_STEPS,
  168. "role": role.display_name,
  169. "message": f"第二轮观点碰撞 · {role.display_name}:正在调用模型…",
  170. }
  171. peer_bits = "\n".join(
  172. f"- **{r.display_name}**(摘录):{_excerpt(round1[r.key], 420)}"
  173. for r in ROLES
  174. if r.key != role.key
  175. )
  176. user_msg = USER_ROUND2_TEMPLATE.format(
  177. topic=topic,
  178. other_summaries=digest + "\n\n**他角色第一轮摘录(供点名反驳)**:\n" + peer_bits,
  179. self_previous=_excerpt(round1[role.key], 520),
  180. )
  181. out = _invoke(llm, role.system_prompt, user_msg, temperature=debate_temperature)
  182. round2[role.key] = out
  183. md_chunk = f"### 第二轮 · 观点碰撞 · {role.display_name}\n\n{out}\n"
  184. lines.append(md_chunk)
  185. yield {
  186. "event": "round2_end",
  187. "step": step,
  188. "total": TOTAL_STEPS,
  189. "role": role.display_name,
  190. "content": out,
  191. "markdown_section": md_chunk,
  192. }
  193. step += 1
  194. yield {
  195. "event": "synthesis_start",
  196. "step": step,
  197. "total": TOTAL_STEPS,
  198. "message": "终局综合:正在生成「最可能事实 / 可疑点 / 阴谋论辨析」…",
  199. }
  200. full_transcript = "\n".join(lines)
  201. final_user = USER_SYNTHESIZER_TEMPLATE.format(topic=topic, full_transcript=full_transcript)
  202. verdict = _invoke(llm, SYSTEM_SYNTHESIZER, final_user, temperature=synthesizer_temperature)
  203. tail = "---\n\n# 终局综合\n\n" + verdict
  204. lines.append("---\n")
  205. lines.append("# 终局综合\n")
  206. lines.append(verdict)
  207. full_md = "\n".join(lines)
  208. yield {
  209. "event": "synthesis_end",
  210. "step": step,
  211. "total": TOTAL_STEPS,
  212. "content": verdict,
  213. "markdown_section": tail,
  214. }
  215. step += 1
  216. yield {
  217. "event": "complete",
  218. "step": step,
  219. "total": TOTAL_STEPS,
  220. "markdown": full_md,
  221. "message": "全部完成",
  222. }
  223. def run_historical_debate(
  224. topic: str,
  225. *,
  226. llm: HelloAgentsLLM | None = None,
  227. use_evidence_bundle: bool = True,
  228. debate_temperature: float = 0.72,
  229. synthesizer_temperature: float = 0.22,
  230. llm_api_key: str | None = None,
  231. llm_base_url: str | None = None,
  232. llm_model: str | None = None,
  233. llm_max_tokens: int | None = 4096,
  234. llm_timeout: int | None = None,
  235. ) -> str:
  236. """执行两轮角色辩论 + 终局综合报告(无流式,供 CLI 等)。"""
  237. last: dict[str, Any] | None = None
  238. for ev in iter_debate_events(
  239. topic,
  240. llm=llm,
  241. use_evidence_bundle=use_evidence_bundle,
  242. debate_temperature=debate_temperature,
  243. synthesizer_temperature=synthesizer_temperature,
  244. llm_api_key=llm_api_key,
  245. llm_base_url=llm_base_url,
  246. llm_model=llm_model,
  247. llm_max_tokens=llm_max_tokens,
  248. llm_timeout=llm_timeout,
  249. ):
  250. last = ev
  251. if not last or last.get("event") != "complete":
  252. raise RuntimeError("辩论未正常结束")
  253. md = last.get("markdown")
  254. if not isinstance(md, str):
  255. raise RuntimeError("缺少完整 Markdown")
  256. return md
  257. def debate_event_json(ev: dict[str, Any]) -> str:
  258. """序列化单条事件(SSE data 行)。"""
  259. return json.dumps(ev, ensure_ascii=False)