hace 8 meses · 10a45a165b
--- a/code/chapter14/helloagents-deepresearch/backend/src/deep_researcher/app/agents/deep_research_agent.py
+++ b/code/chapter14/helloagents-deepresearch/backend/src/deep_researcher/app/agents/deep_research_agent.py
@@ -7,7 +7,7 @@ import re
 
				 from pathlib import Path
			
 
				 from queue import Empty, Queue
			
 
				 from threading import Lock, Thread
			
 
				-from typing import Any, Callable, Iterator, Optional
			
 
				+from typing import Any, Callable, Iterator
			
 
				 
			
 
				 from hello_agents import HelloAgentsLLM
			
 
				 from hello_agents.tools import ToolRegistry
			
@@ -34,6 +34,7 @@ class DeepResearchAgent:
 
				     """Coordinator orchestrating TODO-based research workflow using HelloAgents."""
			
 
				 
			
 
				     def __init__(self, config: Configuration | None = None) -> None:
			
 
				+        """Initialise the coordinator with configuration and shared tools."""
			
 
				         self.config = config or Configuration.from_env()
			
 
				         self.llm = self._init_llm()
			
 
				 
			
@@ -78,7 +79,6 @@ class DeepResearchAgent:
 
				     # ------------------------------------------------------------------
			
 
				     def _init_llm(self) -> HelloAgentsLLM:
			
 
				         """Instantiate HelloAgentsLLM following configuration preferences."""
			
 
				-
			
 
				         llm_kwargs: dict[str, Any] = {"temperature": 0.0}
			
 
				 
			
 
				         model_id = self.config.llm_model_id or self.config.local_llm
			
@@ -109,7 +109,6 @@ class DeepResearchAgent:
 
				 
			
 
				     def _create_tool_aware_agent(self, *, name: str, system_prompt: str) -> ToolAwareSimpleAgent:
			
 
				         """Instantiate a ToolAwareSimpleAgent sharing tool registry and tracker."""
			
 
				-
			
 
				         return ToolAwareSimpleAgent(
			
 
				             name=name,
			
 
				             llm=self.llm,
			
@@ -119,15 +118,13 @@ class DeepResearchAgent:
 
				             tool_call_listener=self._tool_tracker.record,
			
 
				         )
			
 
				 
			
 
				-    def _set_tool_event_sink(self, sink: Optional[Callable[[dict[str, Any]], None]]) -> None:
			
 
				+    def _set_tool_event_sink(self, sink: Callable[[dict[str, Any]], None] | None) -> None:
			
 
				         """Enable or disable immediate tool event callbacks."""
			
 
				-
			
 
				         self._tool_event_sink_enabled = sink is not None
			
 
				         self._tool_tracker.set_event_sink(sink)
			
 
				 
			
 
				     def run(self, topic: str) -> SummaryStateOutput:
			
 
				         """Execute the research workflow and return the final report."""
			
 
				-
			
 
				         state = SummaryState(research_topic=topic)
			
 
				         state.todo_items = self.planner.plan_todo_list(state)
			
 
				         self._drain_tool_events(state)
			
@@ -153,7 +150,6 @@ class DeepResearchAgent:
 
				 
			
 
				     def run_stream(self, topic: str) -> Iterator[dict[str, Any]]:
			
 
				         """Execute the workflow yielding incremental progress events."""
			
 
				-
			
 
				         state = SummaryState(research_topic=topic)
			
 
				         logger.debug("Starting streaming research: topic=%s", topic)
			
 
				         yield {"type": "status", "message": "初始化研究流程"}
			
@@ -181,8 +177,8 @@ class DeepResearchAgent:
 
				         def enqueue(
			
 
				             event: dict[str, Any],
			
 
				             *,
			
 
				-            task: Optional[TodoItem] = None,
			
 
				-            step_override: Optional[int] = None,
			
 
				+            task: TodoItem | None = None,
			
 
				+            step_override: int | None = None,
			
 
				         ) -> None:
			
 
				             payload = dict(event)
			
 
				             target_task_id = payload.get("task_id")
			
@@ -300,7 +296,6 @@ class DeepResearchAgent:
 
				         step: int | None = None,
			
 
				     ) -> Iterator[dict[str, Any]]:
			
 
				         """Run search + summarization for a single task."""
			
 
				-
			
 
				         task.status = "in_progress"
			
 
				 
			
 
				         search_result, notices, answer_text, backend = dispatch_search(
			
@@ -362,7 +357,7 @@ class DeepResearchAgent:
 
				             state.sources_gathered.append(sources_summary)
			
 
				             state.research_loop_count += 1
			
 
				 
			
 
				-        summary_text: Optional[str] = None
			
 
				+        summary_text: str | None = None
			
 
				 
			
 
				         if emit_stream:
			
 
				             for event in self._drain_tool_events(state, step=step):
			
@@ -422,10 +417,9 @@ class DeepResearchAgent:
 
				         self,
			
 
				         state: SummaryState,
			
 
				         *,
			
 
				-        step: Optional[int] = None,
			
 
				+        step: int | None = None,
			
 
				     ) -> list[dict[str, Any]]:
			
 
				         """Proxy to the shared tool call tracker."""
			
 
				-
			
 
				         events = self._tool_tracker.drain(state, step=step)
			
 
				         if self._tool_event_sink_enabled:
			
 
				             return []
			
@@ -434,12 +428,10 @@ class DeepResearchAgent:
 
				     @property
			
 
				     def _tool_call_events(self) -> list[dict[str, Any]]:
			
 
				         """Expose recorded tool events for legacy integrations."""
			
 
				-
			
 
				         return self._tool_tracker.as_dicts()
			
 
				 
			
 
				     def _serialize_task(self, task: TodoItem) -> dict[str, Any]:
			
 
				         """Convert task dataclass to serializable dict for frontend."""
			
 
				-
			
 
				         return {
			
 
				             "id": task.id,
			
 
				             "title": task.title,
			
@@ -453,7 +445,7 @@ class DeepResearchAgent:
 
				             "stream_token": task.stream_token,
			
 
				         }
			
 
				 
			
 
				-    def _persist_final_report(self, state: SummaryState, report: str) -> Optional[dict[str, Any]]:
			
 
				+    def _persist_final_report(self, state: SummaryState, report: str) -> dict[str, Any] | None:
			
 
				         if not self.note_tool or not report or not report.strip():
			
 
				             return None
			
 
				 
			
@@ -511,7 +503,7 @@ class DeepResearchAgent:
 
				 
			
 
				         return payload
			
 
				 
			
 
				-    def _find_existing_report_note_id(self, state: SummaryState) -> Optional[str]:
			
 
				+    def _find_existing_report_note_id(self, state: SummaryState) -> str | None:
			
 
				         if state.report_note_id:
			
 
				             return state.report_note_id
			
 
				 
			
@@ -543,7 +535,7 @@ class DeepResearchAgent:
 
				         return None
			
 
				 
			
 
				     @staticmethod
			
 
				-    def _extract_note_id_from_text(response: str) -> Optional[str]:
			
 
				+    def _extract_note_id_from_text(response: str) -> str | None:
			
 
				         if not response:
			
 
				             return None
			
 
				 
			
@@ -554,8 +546,7 @@ class DeepResearchAgent:
 
				         return match.group(1).strip()
			
 
				 
			
 
				 
			
 
				-def run_deep_research(topic: str, config: Optional[Configuration] = None) -> SummaryStateOutput:
			
 
				+def run_deep_research(topic: str, config: Configuration | None = None) -> SummaryStateOutput:
			
 
				     """Convenience function mirroring the class-based API."""
			
 
				-
			
 
				     agent = DeepResearchAgent(config=config)
			
 
				     return agent.run(topic)
			
--- a/code/chapter14/helloagents-deepresearch/backend/src/deep_researcher/app/services/search_service.py
+++ b/code/chapter14/helloagents-deepresearch/backend/src/deep_researcher/app/services/search_service.py
@@ -1,25 +1,23 @@
 
				-"""Search dispatching helpers."""
			
 
				+"""Search dispatch helpers leveraging HelloAgents SearchTool."""
			
 
				 
			
 
				 from __future__ import annotations
			
 
				 
			
 
				 import logging
			
 
				 from typing import Any, Optional, Tuple
			
 
				 
			
 
				+from hello_agents.tools import SearchTool
			
 
				+
			
 
				 from ...configuration import Configuration
			
 
				 from ...utils import (
			
 
				-    advanced_search,
			
 
				     deduplicate_and_format_sources,
			
 
				-    duckduckgo_search,
			
 
				     format_sources,
			
 
				     get_config_value,
			
 
				-    perplexity_search,
			
 
				-    searxng_search,
			
 
				-    tavily_search,
			
 
				 )
			
 
				 
			
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				 MAX_TOKENS_PER_SOURCE = 2000
			
 
				+_GLOBAL_SEARCH_TOOL = SearchTool(backend="hybrid")
			
 
				 
			
 
				 
			
 
				 def dispatch_search(
			
@@ -27,70 +25,56 @@ def dispatch_search(
 
				     config: Configuration,
			
 
				     loop_count: int,
			
 
				 ) -> Tuple[dict[str, Any] | None, list[str], Optional[str], str]:
			
 
				-    """Call the configured search backend and normalize the response."""
			
 
				+    """Execute configured search backend and normalise response payload."""
			
 
				 
			
 
				     search_api = get_config_value(config.search_api)
			
 
				-    notices: list[str] = []
			
 
				-    answer_text: Optional[str] = None
			
 
				-    backend_label = search_api
			
 
				-
			
 
				-    if search_api == "tavily":
			
 
				-        result = tavily_search(
			
 
				-            query,
			
 
				-            fetch_full_page=config.fetch_full_page,
			
 
				-            max_results=5,
			
 
				-        )
			
 
				-    elif search_api == "perplexity":
			
 
				-        result = perplexity_search(
			
 
				-            query,
			
 
				-            perplexity_search_loop_count=loop_count,
			
 
				-        )
			
 
				-    elif search_api == "duckduckgo":
			
 
				-        result = duckduckgo_search(
			
 
				-            query,
			
 
				-            max_results=5,
			
 
				-            fetch_full_page=config.fetch_full_page,
			
 
				-        )
			
 
				-    elif search_api == "searxng":
			
 
				-        result = searxng_search(
			
 
				-            query,
			
 
				-            max_results=5,
			
 
				-            fetch_full_page=config.fetch_full_page,
			
 
				-        )
			
 
				-    elif search_api == "advanced":
			
 
				-        result = advanced_search(
			
 
				-            query,
			
 
				-            fetch_full_page=config.fetch_full_page,
			
 
				+
			
 
				+    try:
			
 
				+        raw_response = _GLOBAL_SEARCH_TOOL.run(
			
 
				+            {
			
 
				+                "input": query,
			
 
				+                "backend": search_api,
			
 
				+                "mode": "structured",
			
 
				+                "fetch_full_page": config.fetch_full_page,
			
 
				+                "max_results": 5,
			
 
				+                "max_tokens_per_source": MAX_TOKENS_PER_SOURCE,
			
 
				+                "loop_count": loop_count,
			
 
				+            }
			
 
				         )
			
 
				-        if isinstance(result, dict):
			
 
				-            notices = list(result.get("notices") or [])
			
 
				-            answer_text = result.get("answer")
			
 
				-            backend_label = str(result.get("backend") or "advanced")
			
 
				+    except Exception as exc:  # pragma: no cover - defensive logging
			
 
				+        logger.exception("Search backend %s failed: %s", search_api, exc)
			
 
				+        raise
			
 
				+
			
 
				+    if isinstance(raw_response, str):
			
 
				+        notices = [raw_response]
			
 
				+        logger.warning("Search backend %s returned text notice: %s", search_api, raw_response)
			
 
				+        payload: dict[str, Any] = {
			
 
				+            "results": [],
			
 
				+            "backend": search_api,
			
 
				+            "answer": None,
			
 
				+            "notices": notices,
			
 
				+        }
			
 
				     else:
			
 
				-        raise ValueError(f"Unsupported search API: {config.search_api}")
			
 
				-
			
 
				-    if answer_text is None and isinstance(result, dict):
			
 
				-        answer_text = result.get("answer")
			
 
				+        payload = raw_response
			
 
				+        notices = list(payload.get("notices") or [])
			
 
				 
			
 
				-    if isinstance(result, dict):
			
 
				-        results_len = len(result.get("results", []))
			
 
				-    elif isinstance(result, list):
			
 
				-        results_len = len(result)
			
 
				-    else:
			
 
				-        results_len = "?"
			
 
				+    backend_label = str(payload.get("backend") or search_api)
			
 
				+    answer_text = payload.get("answer")
			
 
				+    results = payload.get("results", [])
			
 
				 
			
 
				     if notices:
			
 
				         for notice in notices:
			
 
				             logger.info("Search notice (%s): %s", backend_label, notice)
			
 
				+
			
 
				     logger.info(
			
 
				         "Search backend=%s resolved_backend=%s answer=%s results=%s",
			
 
				         search_api,
			
 
				         backend_label,
			
 
				         bool(answer_text),
			
 
				-        results_len,
			
 
				+        len(results),
			
 
				     )
			
 
				 
			
 
				-    return result, notices, answer_text, backend_label
			
 
				+    return payload, notices, answer_text, backend_label
			
 
				 
			
 
				 
			
 
				 def prepare_research_context(
			
@@ -98,11 +82,11 @@ def prepare_research_context(
 
				     answer_text: Optional[str],
			
 
				     config: Configuration,
			
 
				 ) -> tuple[str, str]:
			
 
				-    """Format sources and research context for downstream summarization."""
			
 
				+    """Build structured context and source summary for downstream agents."""
			
 
				 
			
 
				     sources_summary = format_sources(search_result)
			
 
				     context = deduplicate_and_format_sources(
			
 
				-        search_result,
			
 
				+        search_result or {"results": []},
			
 
				         max_tokens_per_source=MAX_TOKENS_PER_SOURCE,
			
 
				         fetch_full_page=config.fetch_full_page,
			
 
				     )
			
@@ -111,4 +95,3 @@ def prepare_research_context(
 
				         context = f"AI直接答案：\n{answer_text}\n\n{context}"
			
 
				 
			
 
				     return sources_summary, context
			
 
				-
			
--- a/code/chapter14/helloagents-deepresearch/backend/src/deep_researcher/utils.py
+++ b/code/chapter14/helloagents-deepresearch/backend/src/deep_researcher/utils.py
@@ -1,53 +1,24 @@
 
				-import os
			
 
				-import logging
			
 
				-import httpx
			
 
				-import requests
			
 
				-from typing import Dict, Any, List, Union, Optional
			
 
				-
			
 
				-from markdownify import markdownify
			
 
				-from langsmith import traceable
			
 
				-from tavily import TavilyClient
			
 
				-from ddgs import DDGS
			
 
				-from ddgs.exceptions import DDGSException
			
 
				+"""Utility helpers shared across deep researcher services."""
			
 
				 
			
 
				+from __future__ import annotations
			
 
				 
			
 
				-logger = logging.getLogger(__name__)
			
 
				+import logging
			
 
				+from typing import Any, Dict, List, Union
			
 
				 
			
 
				-# Constants
			
 
				 CHARS_PER_TOKEN = 4
			
 
				 
			
 
				+logger = logging.getLogger(__name__)
			
 
				 
			
 
				-def get_config_value(value: Any) -> str:
			
 
				-    """
			
 
				-    Convert configuration values to string format, handling both string and enum types.
			
 
				-
			
 
				-    Args:
			
 
				-        value (Any): The configuration value to process. Can be a string or an Enum.
			
 
				 
			
 
				-    Returns:
			
 
				-        str: The string representation of the value.
			
 
				+def get_config_value(value: Any) -> str:
			
 
				+    """Return configuration value as plain string."""
			
 
				 
			
 
				-    Examples:
			
 
				-        >>> get_config_value("tavily")
			
 
				-        'tavily'
			
 
				-        >>> get_config_value(SearchAPI.TAVILY)
			
 
				-        'tavily'
			
 
				-    """
			
 
				     return value if isinstance(value, str) else value.value
			
 
				 
			
 
				 
			
 
				 def strip_thinking_tokens(text: str) -> str:
			
 
				-    """
			
 
				-    Remove <think> and </think> tags and their content from the text.
			
 
				-
			
 
				-    Iteratively removes all occurrences of content enclosed in thinking tokens.
			
 
				+    """Remove ``<think>`` sections from model responses."""
			
 
				 
			
 
				-    Args:
			
 
				-        text (str): The text to process
			
 
				-
			
 
				-    Returns:
			
 
				-        str: The text with thinking tokens and their content removed
			
 
				-    """
			
 
				     while "<think>" in text and "</think>" in text:
			
 
				         start = text.find("<think>")
			
 
				         end = text.find("</think>") + len("</think>")
			
@@ -56,482 +27,58 @@ def strip_thinking_tokens(text: str) -> str:
 
				 
			
 
				 
			
 
				 def deduplicate_and_format_sources(
			
 
				-    search_response: Union[Dict[str, Any], List[Dict[str, Any]]],
			
 
				+    search_response: Dict[str, Any] | List[Dict[str, Any]],
			
 
				     max_tokens_per_source: int,
			
 
				+    *,
			
 
				     fetch_full_page: bool = False,
			
 
				 ) -> str:
			
 
				-    """
			
 
				-    Format and deduplicate search responses from various search APIs.
			
 
				-
			
 
				-    Takes either a single search response or list of responses from search APIs,
			
 
				-    deduplicates them by URL, and formats them into a structured string.
			
 
				+    """Format and deduplicate search results for downstream prompting."""
			
 
				 
			
 
				-    Args:
			
 
				-        search_response (Union[Dict[str, Any], List[Dict[str, Any]]]): Either:
			
 
				-            - A dict with a 'results' key containing a list of search results
			
 
				-            - A list of dicts, each containing search results
			
 
				-        max_tokens_per_source (int): Maximum number of tokens to include for each source's content
			
 
				-        fetch_full_page (bool, optional): Whether to include the full page content. Defaults to False.
			
 
				-
			
 
				-    Returns:
			
 
				-        str: Formatted string with deduplicated sources
			
 
				-
			
 
				-    Raises:
			
 
				-        ValueError: If input is neither a dict with 'results' key nor a list of search results
			
 
				-    """
			
 
				-    # Convert input to list of results
			
 
				     if isinstance(search_response, dict):
			
 
				-        sources_list = search_response["results"]
			
 
				-    elif isinstance(search_response, list):
			
 
				-        sources_list = []
			
 
				-        for response in search_response:
			
 
				-            if isinstance(response, dict) and "results" in response:
			
 
				-                sources_list.extend(response["results"])
			
 
				-            else:
			
 
				-                sources_list.extend(response)
			
 
				+        sources_list = search_response.get("results", [])
			
 
				     else:
			
 
				-        raise ValueError(
			
 
				-            "Input must be either a dict with 'results' or a list of search results"
			
 
				-        )
			
 
				+        sources_list = search_response
			
 
				 
			
 
				-    # Deduplicate by URL
			
 
				-    unique_sources = {}
			
 
				+    unique_sources: dict[str, Dict[str, Any]] = {}
			
 
				     for source in sources_list:
			
 
				-        if source["url"] not in unique_sources:
			
 
				-            unique_sources[source["url"]] = source
			
 
				+        url = source.get("url")
			
 
				+        if not url:
			
 
				+            continue
			
 
				+        if url not in unique_sources:
			
 
				+            unique_sources[url] = source
			
 
				+
			
 
				+    formatted_parts: List[str] = []
			
 
				+    for source in unique_sources.values():
			
 
				+        title = source.get("title") or source.get("url", "")
			
 
				+        content = source.get("content", "")
			
 
				+        formatted_parts.append(f"信息来源: {title}\n\n")
			
 
				+        formatted_parts.append(f"URL: {source.get('url', '')}\n\n")
			
 
				+        formatted_parts.append(f"信息内容: {content}\n\n")
			
 
				 
			
 
				-    # Format output text
			
 
				-    formatted_text = ""
			
 
				-    for i, source in enumerate(unique_sources.values(), 1):
			
 
				-        formatted_text += f"信息来源: {source['title']}\n\n"
			
 
				-        formatted_text += f"URL: {source['url']}\n\n"
			
 
				-        formatted_text += (
			
 
				-            f"信息内容: {source['content']}\n\n"
			
 
				-        )
			
 
				         if fetch_full_page:
			
 
				-            # Using rough estimate of characters per token
			
 
				-            char_limit = max_tokens_per_source * CHARS_PER_TOKEN
			
 
				-            # Handle None raw_content
			
 
				-            raw_content = source.get("raw_content", "")
			
 
				+            raw_content = source.get("raw_content")
			
 
				             if raw_content is None:
			
 
				+                logger.debug("raw_content missing for %s", source.get("url", ""))
			
 
				                 raw_content = ""
			
 
				-                print(f"Warning: No raw_content found for source {source['url']}")
			
 
				+            char_limit = max_tokens_per_source * CHARS_PER_TOKEN
			
 
				             if len(raw_content) > char_limit:
			
 
				-                raw_content = raw_content[:char_limit] + "... [truncated]"
			
 
				-            formatted_text += f"详细信息内容限制为 {max_tokens_per_source} 个 token: {raw_content}\n\n"
			
 
				-
			
 
				-    return formatted_text.strip()
			
 
				-
			
 
				-
			
 
				-def format_sources(search_results: Dict[str, Any]) -> str:
			
 
				-    """Format search results into a bullet-point list of sources with URLs.
			
 
				-
			
 
				-    Creates a simple bulleted list of search results with title and URL for each source.
			
 
				-
			
 
				-    Args:
			
 
				-        search_results (Dict[str, Any]): Search response containing a 'results' key with
			
 
				-                                        a list of search result objects
			
 
				-
			
 
				-    Returns:
			
 
				-        str: Formatted string with sources as bullet points in the format "* title : url"
			
 
				-    """
			
 
				-    return "\n".join(
			
 
				-        f"* {source['title']} : {source['url']}" for source in search_results["results"]
			
 
				-    )
			
 
				-
			
 
				-
			
 
				-def fetch_raw_content(url: str) -> Optional[str]:
			
 
				-    """
			
 
				-    Fetch HTML content from a URL and convert it to markdown format.
			
 
				-
			
 
				-    Uses a 10-second timeout to avoid hanging on slow sites or large pages.
			
 
				-
			
 
				-    Args:
			
 
				-        url (str): The URL to fetch content from
			
 
				-
			
 
				-    Returns:
			
 
				-        Optional[str]: The fetched content converted to markdown if successful,
			
 
				-                      None if any error occurs during fetching or conversion
			
 
				-    """
			
 
				-    try:
			
 
				-        # Create a client with reasonable timeout
			
 
				-        with httpx.Client(timeout=10.0) as client:
			
 
				-            response = client.get(url)
			
 
				-            response.raise_for_status()
			
 
				-            return markdownify(response.text)
			
 
				-    except Exception as e:
			
 
				-        print(f"Warning: Failed to fetch full page content for {url}: {str(e)}")
			
 
				-        return None
			
 
				-
			
 
				-
			
 
				-@traceable
			
 
				-def duckduckgo_search(
			
 
				-    query: str, max_results: int = 3, fetch_full_page: bool = False
			
 
				-) -> Dict[str, List[Dict[str, Any]]]:
			
 
				-    """Search the web using DuckDuckGo and return formatted results.
			
 
				-
			
 
				-    Uses the DDGS library to perform web searches through DuckDuckGo.
			
 
				-
			
 
				-    Args:
			
 
				-        query (str): The search query to execute
			
 
				-        max_results (int, optional): Maximum number of results to return. Defaults to 3.
			
 
				-        fetch_full_page (bool, optional): Whether to fetch full page content from result URLs.
			
 
				-                                         Defaults to False.
			
 
				-
			
 
				-    Returns:
			
 
				-        Dict[str, List[Dict[str, Any]]]: Search response containing:
			
 
				-            - results (list): List of search result dictionaries, each containing:
			
 
				-                - title (str): Title of the search result
			
 
				-                - url (str): URL of the search result
			
 
				-                - content (str): Snippet/summary of the content
			
 
				-                - raw_content (str or None): Full page content if fetch_full_page is True,
			
 
				-                                            otherwise same as content
			
 
				-    """
			
 
				-    try:
			
 
				-        with DDGS(timeout=10) as client:
			
 
				-            search_results = client.text(
			
 
				-                query,
			
 
				-                max_results=max_results,
			
 
				-                backend="duckduckgo",
			
 
				+                raw_content = f"{raw_content[:char_limit]}... [truncated]"
			
 
				+            formatted_parts.append(
			
 
				+                f"详细信息内容限制为 {max_tokens_per_source} 个 token: {raw_content}\n\n"
			
 
				             )
			
 
				 
			
 
				-        results: list[dict[str, Any]] = []
			
 
				-        for entry in search_results:
			
 
				-            url = entry.get("href") or entry.get("url")
			
 
				-            title = entry.get("title") or url
			
 
				-            content = entry.get("body") or entry.get("content")
			
 
				-
			
 
				-            if not all([url, title, content]):
			
 
				-                print(f"Warning: Incomplete result from DuckDuckGo: {entry}")
			
 
				-                continue
			
 
				-
			
 
				-            raw_content = content
			
 
				-            if fetch_full_page:
			
 
				-                fetched = fetch_raw_content(url)
			
 
				-                raw_content = fetched if fetched is not None else content
			
 
				-
			
 
				-            results.append(
			
 
				-                {
			
 
				-                    "title": title,
			
 
				-                    "url": url,
			
 
				-                    "content": content,
			
 
				-                    "raw_content": raw_content,
			
 
				-                }
			
 
				-            )
			
 
				-
			
 
				-        return {"results": results}
			
 
				-    except DDGSException as exc:
			
 
				-        print(f"Error in DuckDuckGo search: {str(exc)}")
			
 
				-        print("Full error details: DDGSException")
			
 
				-        return {"results": []}
			
 
				-    except Exception as exc:  # pragma: no cover - defensive
			
 
				-        print(f"Unexpected error in DuckDuckGo search: {str(exc)}")
			
 
				-        print(f"Full error details: {type(exc).__name__}")
			
 
				-        return {"results": []}
			
 
				-
			
 
				-
			
 
				-@traceable
			
 
				-def searxng_search(
			
 
				-    query: str, max_results: int = 3, fetch_full_page: bool = False
			
 
				-) -> Dict[str, List[Dict[str, Any]]]:
			
 
				-    """
			
 
				-    Search the web using SearXNG and return formatted results.
			
 
				-
			
 
				-    Uses the SearXNG JSON API (`/search?format=json`) to执行检索。
			
 
				-    The SearXNG host URL is read from the SEARXNG_URL environment variable
			
 
				-    or defaults to http://localhost:8888.
			
 
				-
			
 
				-    Args:
			
 
				-        query (str): The search query to execute
			
 
				-        max_results (int, optional): Maximum number of results to return. Defaults to 3.
			
 
				-        fetch_full_page (bool, optional): Whether to fetch full page content from result URLs.
			
 
				-                                         Defaults to False.
			
 
				+    return "".join(formatted_parts).strip()
			
 
				 
			
 
				-    Returns:
			
 
				-        Dict[str, List[Dict[str, Any]]]: Search response containing:
			
 
				-            - results (list): List of search result dictionaries, each containing:
			
 
				-                - title (str): Title of the search result
			
 
				-                - url (str): URL of the search result
			
 
				-                - content (str): Snippet/summary of the content
			
 
				-                - raw_content (str or None): Full page content if fetch_full_page is True,
			
 
				-                                           otherwise same as content
			
 
				-    """
			
 
				-    host = os.environ.get("SEARXNG_URL", "http://localhost:8888")
			
 
				-    endpoint = f"{host.rstrip('/')}/search"
			
 
				 
			
 
				-    try:
			
 
				-        response = requests.get(
			
 
				-            endpoint,
			
 
				-            params={
			
 
				-                "q": query,
			
 
				-                "format": "json",
			
 
				-                "language": "zh-CN",
			
 
				-                "safesearch": 1,
			
 
				-                "categories": "general",
			
 
				-            },
			
 
				-            timeout=10,
			
 
				-        )
			
 
				-        response.raise_for_status()
			
 
				-        payload = response.json()
			
 
				-    except Exception as exc:  # pragma: no cover - 远程接口失败兜底
			
 
				-        logger.warning("SearXNG request failed: %s", exc)
			
 
				-        return {"results": []}
			
 
				+def format_sources(search_results: Dict[str, Any] | None) -> str:
			
 
				+    """Return bullet list summarising search sources."""
			
 
				 
			
 
				-    results = []
			
 
				-    for entry in payload.get("results", [])[:max_results]:
			
 
				-        url = entry.get("url") or entry.get("link")
			
 
				-        title = entry.get("title") or url
			
 
				-        content = entry.get("content") or entry.get("snippet") or ""
			
 
				+    if not search_results:
			
 
				+        return ""
			
 
				 
			
 
				-        if not all([url, title]) or not content:
			
 
				-            logger.debug("Skipping incomplete SearXNG result: %s", entry)
			
 
				-            continue
			
 
				-
			
 
				-        raw_content = content
			
 
				-        if fetch_full_page:
			
 
				-            fetched = fetch_raw_content(url)
			
 
				-            raw_content = fetched if fetched is not None else content
			
 
				-
			
 
				-        results.append(
			
 
				-            {
			
 
				-                "title": title,
			
 
				-                "url": url,
			
 
				-                "content": content,
			
 
				-                "raw_content": raw_content,
			
 
				-            }
			
 
				-        )
			
 
				-
			
 
				-    return {"results": results}
			
 
				-
			
 
				-
			
 
				-@traceable
			
 
				-def tavily_search(
			
 
				-    query: str, fetch_full_page: bool = True, max_results: int = 3
			
 
				-) -> Dict[str, List[Dict[str, Any]]]:
			
 
				-    """
			
 
				-    Search the web using the Tavily API and return formatted results.
			
 
				-
			
 
				-    Uses the TavilyClient to perform searches. Tavily API key must be configured
			
 
				-    in the environment.
			
 
				-
			
 
				-    Args:
			
 
				-        query (str): The search query to execute
			
 
				-        fetch_full_page (bool, optional): Whether to include raw content from sources.
			
 
				-                                         Defaults to True.
			
 
				-        max_results (int, optional): Maximum number of results to return. Defaults to 3.
			
 
				-
			
 
				-    Returns:
			
 
				-        Dict[str, List[Dict[str, Any]]]: Search response containing:
			
 
				-            - results (list): List of search result dictionaries, each containing:
			
 
				-                - title (str): Title of the search result
			
 
				-                - url (str): URL of the search result
			
 
				-                - content (str): Snippet/summary of the content
			
 
				-                - raw_content (str or None): Full content of the page if available and
			
 
				-                                            fetch_full_page is True
			
 
				-    """
			
 
				-
			
 
				-    tavily_client = TavilyClient()
			
 
				-    return tavily_client.search(
			
 
				-        query, max_results=max_results, include_raw_content=fetch_full_page
			
 
				-    )
			
 
				-
			
 
				-
			
 
				-@traceable
			
 
				-def advanced_search(query: str, fetch_full_page: bool = False) -> Dict[str, Any]:
			
 
				-    """利用多源策略执行搜索，优先 Tavily，其次 SerpApi，最后 DuckDuckGo。"""
			
 
				-
			
 
				-    notices: list[str] = []
			
 
				-    results: list[dict[str, Any]] = []
			
 
				-    answer: Optional[str] = None
			
 
				-    backend = "advanced"
			
 
				-
			
 
				-    # 优先尝试 Tavily
			
 
				-    tavily_key = os.getenv("TAVILY_API_KEY")
			
 
				-    if tavily_key:
			
 
				-        try:
			
 
				-            tavily_result = tavily_search(
			
 
				-                query,
			
 
				-                fetch_full_page=fetch_full_page,
			
 
				-                max_results=5,
			
 
				-            )
			
 
				-            if tavily_result.get("results"):
			
 
				-                backend = "tavily"
			
 
				-                answer = tavily_result.get("answer")
			
 
				-                results.extend(tavily_result["results"])
			
 
				-                logger.info("advanced_search: using Tavily results for query='%s'", query)
			
 
				-                return {
			
 
				-                    "results": results,
			
 
				-                    "notices": notices,
			
 
				-                    "answer": answer,
			
 
				-                    "backend": backend,
			
 
				-                }
			
 
				-            notices.append("⚠️ Tavily 未返回有效结果，尝试其他搜索源")
			
 
				-            logger.info("advanced_search: Tavily returned no results for query='%s'", query)
			
 
				-        except Exception as exc:  # pragma: no cover - 第三方库防御
			
 
				-            notices.append(f"⚠️ Tavily 搜索失败：{exc}")
			
 
				-            logger.warning("advanced_search: Tavily failed for query='%s': %s", query, exc)
			
 
				-    else:
			
 
				-        notices.append("⚠️ 未检测到 TAVILY_API_KEY，跳过 Tavily 搜索")
			
 
				-        logger.info("advanced_search: Tavily disabled for query='%s'", query)
			
 
				-
			
 
				-    # 其次尝试 SerpApi
			
 
				-    serpapi_key = os.getenv("SERPAPI_API_KEY")
			
 
				-    if serpapi_key:
			
 
				-        try:
			
 
				-            from serpapi import GoogleSearch  # type: ignore
			
 
				-
			
 
				-            params = {
			
 
				-                "engine": "google",
			
 
				-                "q": query,
			
 
				-                "api_key": serpapi_key,
			
 
				-                "gl": "cn",
			
 
				-                "hl": "zh-cn",
			
 
				-                "num": 5,
			
 
				-            }
			
 
				-
			
 
				-            client = GoogleSearch(params)
			
 
				-            response = client.get_dict()
			
 
				-
			
 
				-            answer_box = response.get("answer_box") or {}
			
 
				-            direct_answer = answer_box.get("answer") or answer_box.get("snippet")
			
 
				-            if direct_answer:
			
 
				-                answer = direct_answer
			
 
				-
			
 
				-            organic_results = response.get("organic_results", [])
			
 
				-            for item in organic_results[:5]:
			
 
				-                results.append(
			
 
				-                    {
			
 
				-                        "title": item.get("title") or item.get("link") or query,
			
 
				-                        "url": item.get("link", ""),
			
 
				-                        "content": item.get("snippet") or item.get("title") or "",
			
 
				-                        "raw_content": item.get("snippet") or "",
			
 
				-                    }
			
 
				-                )
			
 
				-
			
 
				-            if results:
			
 
				-                backend = "serpapi"
			
 
				-                logger.info("advanced_search: using SerpApi results for query='%s'", query)
			
 
				-                return {
			
 
				-                    "results": results,
			
 
				-                    "notices": notices,
			
 
				-                    "answer": answer,
			
 
				-                    "backend": backend,
			
 
				-                }
			
 
				-
			
 
				-            notices.append("⚠️ SerpApi 未返回有效结果，回退到通用搜索")
			
 
				-            logger.info("advanced_search: SerpApi returned no results for query='%s'", query)
			
 
				-        except ImportError:
			
 
				-            notices.append("⚠️ SerpApi 库未安装，跳过 SerpApi 搜索 (pip install google-search-results)")
			
 
				-            logger.warning("advanced_search: serpapi package missing, skip query='%s'", query)
			
 
				-        except Exception as exc:  # pragma: no cover - 第三方库防御
			
 
				-            notices.append(f"⚠️ SerpApi 搜索失败：{exc}")
			
 
				-            logger.warning("advanced_search: SerpApi failed for query='%s': %s", query, exc)
			
 
				-    else:
			
 
				-        notices.append("⚠️ 未检测到 SERPAPI_API_KEY，跳过 SerpApi 搜索")
			
 
				-        logger.info("advanced_search: SerpApi disabled for query='%s'", query)
			
 
				-
			
 
				-    # 最后回退到 DuckDuckGo（无需额外配置）
			
 
				-    try:
			
 
				-        ddg_result = duckduckgo_search(
			
 
				-            query,
			
 
				-            max_results=5,
			
 
				-            fetch_full_page=fetch_full_page,
			
 
				-        )
			
 
				-        if ddg_result.get("results"):
			
 
				-            backend = "duckduckgo"
			
 
				-            results.extend(ddg_result["results"])
			
 
				-            logger.info("advanced_search: using DuckDuckGo results for query='%s'", query)
			
 
				-        else:
			
 
				-            notices.append("⚠️ DuckDuckGo 未返回有效结果")
			
 
				-            logger.info("advanced_search: DuckDuckGo returned no results for query='%s'", query)
			
 
				-    except Exception as exc:  # pragma: no cover - 第三方库防御
			
 
				-        notices.append(f"⚠️ DuckDuckGo 搜索失败：{exc}")
			
 
				-        logger.warning("advanced_search: DuckDuckGo failed for query='%s': %s", query, exc)
			
 
				-
			
 
				-    return {
			
 
				-        "results": results,
			
 
				-        "notices": notices,
			
 
				-        "answer": answer,
			
 
				-        "backend": backend,
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-@traceable
			
 
				-def perplexity_search(
			
 
				-    query: str, perplexity_search_loop_count: int = 0
			
 
				-) -> Dict[str, Any]:
			
 
				-    """
			
 
				-    Search the web using the Perplexity API and return formatted results.
			
 
				-
			
 
				-    Uses the Perplexity API to perform searches with the 'sonar-pro' model.
			
 
				-    Requires a PERPLEXITY_API_KEY environment variable to be set.
			
 
				-
			
 
				-    Args:
			
 
				-        query (str): The search query to execute
			
 
				-        perplexity_search_loop_count (int, optional): The loop step for perplexity search
			
 
				-                                                     (used for source labeling). Defaults to 0.
			
 
				-
			
 
				-    Returns:
			
 
				-        Dict[str, Any]: Search response containing:
			
 
				-            - results (list): List of search result dictionaries, each containing:
			
 
				-                - title (str): Title of the search result (includes search counter)
			
 
				-                - url (str): URL of the citation source
			
 
				-                - content (str): Content of the response or reference to main content
			
 
				-                - raw_content (str or None): Full content for the first source, None for additional
			
 
				-                                            citation sources
			
 
				-
			
 
				-    Raises:
			
 
				-        requests.exceptions.HTTPError: If the API request fails
			
 
				-    """
			
 
				-
			
 
				-    headers = {
			
 
				-        "accept": "application/json",
			
 
				-        "content-type": "application/json",
			
 
				-        "Authorization": f"Bearer {os.getenv('PERPLEXITY_API_KEY')}",
			
 
				-    }
			
 
				-
			
 
				-    payload = {
			
 
				-        "model": "sonar-pro",
			
 
				-        "messages": [
			
 
				-            {
			
 
				-                "role": "system",
			
 
				-                "content": "Search the web and provide factual information with sources.",
			
 
				-            },
			
 
				-            {"role": "user", "content": query},
			
 
				-        ],
			
 
				-    }
			
 
				-
			
 
				-    response = requests.post(
			
 
				-        "https://api.perplexity.ai/chat/completions", headers=headers, json=payload
			
 
				+    results = search_results.get("results", [])
			
 
				+    return "\n".join(
			
 
				+        f"* {item.get('title', item.get('url', ''))} : {item.get('url', '')}"
			
 
				+        for item in results
			
 
				+        if item.get("url")
			
 
				     )
			
 
				-    response.raise_for_status()  # Raise exception for bad status codes
			
 
				-
			
 
				-    # Parse the response
			
 
				-    data = response.json()
			
 
				-    content = data["choices"][0]["message"]["content"]
			
 
				-
			
 
				-    # Perplexity returns a list of citations for a single search result
			
 
				-    citations = data.get("citations", ["https://perplexity.ai"])
			
 
				-
			
 
				-    # Return first citation with full content, others just as references
			
 
				-    results = [
			
 
				-        {
			
 
				-            "title": f"Perplexity Search {perplexity_search_loop_count + 1}, Source 1",
			
 
				-            "url": citations[0],
			
 
				-            "content": content,
			
 
				-            "raw_content": content,
			
 
				-        }
			
 
				-    ]
			
 
				-
			
 
				-    # Add additional citations without duplicating content
			
 
				-    for i, citation in enumerate(citations[1:], start=2):
			
 
				-        results.append(
			
 
				-            {
			
 
				-                "title": f"Perplexity Search {perplexity_search_loop_count + 1}, Source {i}",
			
 
				-                "url": citation,
			
 
				-                "content": "See above for full content",
			
 
				-                "raw_content": None,
			
 
				-            }
			
 
				-        )
			
 
				-
			
 
				-    return {"results": results}
			
--- a/docs/chapter14/第十四章自动化深度研究智能体.md
+++ b/docs/chapter14/第十四章自动化深度研究智能体.md
@@ -32,10 +32,10 @@
 
				 
			
 
				 ### 14.1.2 整体能力与用户价值
			
 
				 
			
 
				-`helloagents-deepresearch` 项目将后端HelloAgents智能体、可配置搜索适配器与前端可视化界面结合，形成「输入主题→实时观察→获取总结」的闭环体验。整体亮点如下：
			
 
				+`helloagents-deepresearch` 项目将后端HelloAgents智能体、HelloAgents 内置工具体系与前端可视化界面结合，形成「输入主题→实时观察→获取总结」的闭环体验。整体亮点如下：
			
 
				 
			
 
				-- <strong>多提供者模型接入</strong>：支持 Ollama、LMStudio 或自定义 OpenAI 兼容服务，自主选择推理能力与成本。
			
 
				-- <strong>多搜索源融合</strong>：内置 DuckDuckGo、Tavily、Perplexity、SearXNG 适配器，灵活切换，甚至可以按轮次混合使用。
			
 
				+- <strong>多提供者模型接入</strong>：支持 Ollama、LMStudio，或通过 `LLM_PROVIDER=custom` 自定义任意 OpenAI 兼容服务，自主选择推理能力与成本。
			
 
				+- <strong>多搜索源融合</strong>：直接复用 HelloAgents 的 `SearchTool`，内置 Tavily、SerpApi、DuckDuckGo、SearXNG、Perplexity 以及高级混合策略，可按需切换。
			
 
				 - <strong>流式反馈</strong>：后端通过 Server-Sent Events 推送各阶段结果，前端即时展示时间线、最新来源和逐字更新的总结。
			
 
				 - <strong>配置优先级清晰</strong>：环境变量、代码默认值分层管理，方便调试与部署。
			
 
				 系统采用经典的<strong>前后端分离架构</strong>，分为四个层次，如图14.1所示
			
@@ -52,7 +52,7 @@ graph LR
 
				     Config[Configuration.from_env]
			
 
				     subgraph Workflow[DeepResearchAgent 工作流]
			
 
				       Planner{{PlanningService\n任务规划}}
			
 
				-      Search{{dispatch_search\nprepare_research_context}}
			
 
				+      Search{{dispatch_search\n(SearchTool)}}
			
 
				       Summarizer{{SummarizationService\n任务总结}}
			
 
				       Reporter{{ReportingService\n报告整合}}
			
 
				       Tracker[[ToolCallTracker\n工具事件]]
			
@@ -63,7 +63,7 @@ graph LR
 
				     NoteTool[(NoteTool\nToolRegistry)]
			
 
				   end
			
 
				   subgraph External[外部依赖]
			
 
				-    SearchAPI[(Tavily / Perplexity /\nDuckDuckGo / SearxNG / Advanced)]
			
 
				+    SearchAPI[(HelloAgents SearchTool\nTavily · SerpApi · DuckDuckGo ·\nSearXNG · Perplexity · Advanced)]
			
 
				     Notes[(本地笔记\nnotes_workspace)]
			
 
				   end
			
 
				 
			
@@ -210,7 +210,7 @@ graph TD
 
				 - `agents/deep_research_agent.py`：顶层协调者，负责任务规划、并行执行与报告沉淀。
			
 
				 - `services/planner_service.py`、`summarization_service.py`、`reporting_service.py`：分别封装计划、总结、报告逻辑，内部都复用了 `ToolAwareSimpleAgent`。
			
 
				 - `services/tool_events.py`：跟踪 `note` 工具调用，把事件转换成 SSE，可见第九章 `NoteTool` 的集成成果。
			
 
				-- `services/search_service.py`：统一封装多源搜索结果，与第七章的自定义搜索工具同样遵循“结果字典”约定。
			
 
				+- `services/search_service.py`：作为 HelloAgents `SearchTool` 的薄包装，将结构化搜索结果与上下文整理给下游 Agent。
			
 
				 - `configuration.py`、`api.py`：负责配置加载、HelloAgentsLLM 初始化与 HTTP 层的流式推送。
			
 
				 
			
 
				 数据流转顺序为：
			
@@ -275,7 +275,7 @@ class Configuration(BaseModel):
 
				         return cls(**raw_values)
			
 
				 ```
			
 
				 
			
 
				-配置解析逻辑（`backend/src/deep_researcher/configuration.py:18`）先读取所有大写环境变量，再应用显式别名，最后才合并 API 请求的覆盖值。实际运行时意味着：
			
 
				+配置解析逻辑（`backend/src/deep_researcher/configuration.py:18`）先读取所有大写环境变量，再应用显式别名，最后才合并 API 请求的覆盖值。需要注意的是，从本章起 HelloAgents 原生 `HelloAgentsLLM` 已支持 `LLM_PROVIDER=custom`：只要同时提供 `LLM_BASE_URL` 与 `LLM_API_KEY` 即可连接任意 OpenAI 兼容服务，而 `LOCAL_LLM`/`OLLAMA_BASE_URL` 等字段继续服务于本地模型场景。实际运行时意味着：
			
 
				 
			
 
				 - `.env` 或系统环境变量拥有最高优先级，便于本地调试和生产部署。
			
 
				 - LangGraph/UI 提交的临时参数通过 `overrides` 注入，不会修改全局环境。
			
@@ -283,14 +283,14 @@ class Configuration(BaseModel):
 
				 
			
 
				 ### 14.2.3 自定义多源搜索工具
			
 
				 
			
 
				-为了适应不同团队的检索与合规要求，`helloagents-deepresearch` 的搜索层设计成可插拔结构。除了内置的 DuckDuckGo、Tavily、Perplexity、SearXNG，我们还可以像第七章那样编写自己的多源工具，再通过配置切换到 `advanced` 模式使用。建议按如下步骤操作：
			
 
				+为了适应不同团队的检索与合规要求，本项目直接复用 HelloAgents 框架的 `SearchTool`。这一工具现已支持 Tavily、SerpApi、DuckDuckGo、SearXNG、Perplexity 以及「advanced」混合策略，并且同一份结构化返回可以被多个 Agent 共享。因此在深度研究项目里，我们只需要按需配置即可：
			
 
				 
			
 
				-1. **复用示例代码**：参考 `code/chapter7/my_advanced_search.py` 中的 `MyAdvancedSearchTool` 类和 `create_advanced_search_registry()` 工厂函数。该示例演示了如何根据环境变量自动检测 Tavily、SerpApi 等后端，并在执行时做降级提示。
			
 
				-2. **本地验证**：运行 `code/chapter7/test_advanced_search.py` 可以快速检查自定义工具是否按预期返回结果；若未配置任何 API key，它会输出友好的诊断信息，便于调试。
			
 
				-3. **接入深度研究项目**：在 `backend/src/deep_researcher/utils.py` 中新增一个适配函数（示例实现见函数 `advanced_search`），内部直接调用你自定义的工具，再将返回的文本整理为统一的 `{"results": [...], "notices": [...]}` 结构，方便后续的去重与摘要逻辑复用。
			
 
				-4. **启用新工具**：将后端配置中的 `SEARCH_API` 设置为 `advanced`，或在前端表单里选择该选项；深度研究 Agent 会自动透传工具的降级提示与直接答案，并在前端时间线中显示，确保用户知晓检索来源和失败原因。
			
 
				+1. **选择后端**：将 `SEARCH_API` 设为 `tavily`、`serpapi`、`duckduckgo`、`searxng`、`perplexity` 或 `advanced`。其中 `hybrid` 与 `advanced` 等价，都会优先尝试 Tavily/SerpApi，再降级到 DuckDuckGo。
			
 
				+2. **配置密钥**：根据选定后端设置 `TAVILY_API_KEY`、`SERPAPI_API_KEY`、`PERPLEXITY_API_KEY` 等环境变量；若只想使用无密钥的 DuckDuckGo/SearXNG，可不设置。
			
 
				+3. **结构化输出**：`SearchTool` 默认返回友好的文本描述，当我们在 `services/search_service.py` 中传入 `mode=structured` 时，会得到统一的 `{"results": [...], "answer": ..., "notices": [...]}` 结构，方便后续做去重、裁剪、引用。
			
 
				+4. **深度定制（可选）**：如果还需要扩展新的搜索后端，可以在 HelloAgents 仓库内直接继承 `SearchTool` 并覆盖 `_search_xxx` 方法，或者提交 PR 将新后端合入框架。由于深度研究项目只是框架的“薄封装”，一旦上游合并，所有下游应用都会自动获得能力升级。
			
 
				 
			
 
				-通过这种方式，你可以把任意内部/垂直领域的检索能力接入到深度研究工作流里，同时保持与核心流程的高度解耦。
			
 
				+这种设计让学习者无需在项目中维护额外的搜索适配器，即可通过配置或框架升级获得最新能力；同时也保留了在企业环境内扩展私有搜索源的灵活度。
			
 
				 
			
 
				 > 提示：配置项 `ENABLE_NOTES` 默认为 `true`。当启用时，后端会为每个任务自动同步一份 Markdown 笔记（目录由 `NOTES_WORKSPACE` 指定），并把 `note` 工具挂载到所有 Agent，方便它们在需要时对笔记做增删改查。流式事件会附带 `note_id`，前端可据此展示或跳转对应笔记。