9 місяців тому · be93e7fc54
--- a/code/chapter2/ELIZA.py
+++ b/code/chapter2/ELIZA.py
@@ -0,0 +1,85 @@
 
															+import re
														
 
															+import random
														
 
															+
														
 
															+# 定义规则库：模式(正则表达式) -> 响应模板列表
														
 
															+rules = {
														
 
															+    r'I need (.*)': [
														
 
															+        "Why do you need {0}?",
														
 
															+        "Would it really help you to get {0}?",
														
 
															+        "Are you sure you need {0}?"
														
 
															+    ],
														
 
															+    r'Why don\'t you (.*)\?': [
														
 
															+        "Do you really think I don't {0}?",
														
 
															+        "Perhaps eventually I will {0}.",
														
 
															+        "Do you really want me to {0}?"
														
 
															+    ],
														
 
															+    r'Why can\'t I (.*)\?': [
														
 
															+        "Do you think you should be able to {0}?",
														
 
															+        "If you could {0}, what would you do?",
														
 
															+        "I don't know -- why can't you {0}?"
														
 
															+    ],
														
 
															+    r'I am (.*)': [
														
 
															+        "Did you come to me because you are {0}?",
														
 
															+        "How long have you been {0}?",
														
 
															+        "How do you feel about being {0}?"
														
 
															+    ],
														
 
															+    r'.* mother .*': [
														
 
															+        "Tell me more about your mother.",
														
 
															+        "What was your relationship with your mother like?",
														
 
															+        "How do you feel about your mother?"
														
 
															+    ],
														
 
															+    r'.* father .*': [
														
 
															+        "Tell me more about your father.",
														
 
															+        "How did your father make you feel?",
														
 
															+        "What has your father taught you?"
														
 
															+    ],
														
 
															+    r'.*': [
														
 
															+        "Please tell me more.",
														
 
															+        "Let's change focus a bit... Tell me about your family.",
														
 
															+        "Can you elaborate on that?"
														
 
															+    ]
														
 
															+}
														
 
															+
														
 
															+# 定义代词转换规则
														
 
															+pronoun_swap = {
														
 
															+    "i": "you", "you": "i", "me": "you", "my": "your",
														
 
															+    "am": "are", "are": "am", "was": "were", "i'd": "you would",
														
 
															+    "i've": "you have", "i'll": "you will", "yours": "mine",
														
 
															+    "mine": "yours"
														
 
															+}
														
 
															+
														
 
															+def swap_pronouns(phrase):
														
 
															+    """
														
 
															+    对输入短语中的代词进行第一/第二人称转换
														
 
															+    """
														
 
															+    words = phrase.lower().split()
														
 
															+    swapped_words = [pronoun_swap.get(word, word) for word in words]
														
 
															+    return " ".join(swapped_words)
														
 
															+
														
 
															+def respond(user_input):
														
 
															+    """
														
 
															+    根据规则库生成响应
														
 
															+    """
														
 
															+    for pattern, responses in rules.items():
														
 
															+        match = re.search(pattern, user_input, re.IGNORECASE)
														
 
															+        if match:
														
 
															+            # 捕获匹配到的部分
														
 
															+            captured_group = match.group(1) if match.groups() else ''
														
 
															+            # 进行代词转换
														
 
															+            swapped_group = swap_pronouns(captured_group)
														
 
															+            # 从模板中随机选择一个并格式化
														
 
															+            response = random.choice(responses).format(swapped_group)
														
 
															+            return response
														
 
															+    # 如果没有匹配任何特定规则，使用最后的通配符规则
														
 
															+    return random.choice(rules[r'.*'])
														
 
															+
														
 
															+# 主聊天循环
														
 
															+if __name__ == '__main__':
														
 
															+    print("Therapist: Hello! How can I help you today?")
														
 
															+    while True:
														
 
															+        user_input = input("You: ")
														
 
															+        if user_input.lower() in ["quit", "exit", "bye"]:
														
 
															+            print("Therapist: Goodbye. It was nice talking to you.")
														
 
															+            break
														
 
															+        response = respond(user_input)
														
 
															+        print(f"Therapist: {response}")
														
--- a/code/chapter3/BPE.py
+++ b/code/chapter3/BPE.py
@@ -0,0 +1,34 @@
 
															+import re, collections
														
 
															+
														
 
															+def get_stats(vocab):
														
 
															+    """统计词元对频率"""
														
 
															+    pairs = collections.defaultdict(int)
														
 
															+    for word, freq in vocab.items():
														
 
															+        symbols = word.split()
														
 
															+        for i in range(len(symbols)-1):
														
 
															+            pairs[symbols[i],symbols[i+1]] += freq
														
 
															+    return pairs
														
 
															+
														
 
															+def merge_vocab(pair, v_in):
														
 
															+    """合并词元对"""
														
 
															+    v_out = {}
														
 
															+    bigram = re.escape(' '.join(pair))
														
 
															+    p = re.compile(r'(?<!\S)' + bigram + r'(?!\S)')
														
 
															+    for word in v_in:
														
 
															+        w_out = p.sub(''.join(pair), word)
														
 
															+        v_out[w_out] = v_in[word]
														
 
															+    return v_out
														
 
															+
														
 
															+# 准备语料库，每个词末尾加上</w>表示结束，并切分好字符
														
 
															+vocab = {'h u g </w>': 1, 'p u g </w>': 1, 'p u n </w>': 1, 'b u n </w>': 1}
														
 
															+num_merges = 4 # 设置合并次数
														
 
															+
														
 
															+for i in range(num_merges):
														
 
															+    pairs = get_stats(vocab)
														
 
															+    if not pairs:
														
 
															+        break
														
 
															+    best = max(pairs, key=pairs.get)
														
 
															+    vocab = merge_vocab(best, vocab)
														
 
															+    print(f"第{i+1}次合并: {best} -> {''.join(best)}")
														
 
															+    print(f"新词表（部分）: {list(vocab.keys())}")
														
 
															+    print("-" * 20)
														
--- a/code/chapter3/N_gram.py
+++ b/code/chapter3/N_gram.py
@@ -0,0 +1,30 @@
 
															+import collections
														
 
															+
														
 
															+# 示例语料库，与上方案例讲解中的语料库保持一致
														
 
															+corpus = "datawhale agent learns datawhale agent works"
														
 
															+tokens = corpus.split()
														
 
															+total_tokens = len(tokens)
														
 
															+
														
 
															+# --- 第一步：计算 P(datawhale) ---
														
 
															+count_datawhale = tokens.count('datawhale')
														
 
															+p_datawhale = count_datawhale / total_tokens
														
 
															+print(f"第一步: P(datawhale) = {count_datawhale}/{total_tokens} = {p_datawhale:.3f}")
														
 
															+
														
 
															+# --- 第二步：计算 P(agent|datawhale) ---
														
 
															+# 先计算 bigrams 用于后续步骤
														
 
															+bigrams = zip(tokens, tokens[1:])
														
 
															+bigram_counts = collections.Counter(bigrams)
														
 
															+count_datawhale_agent = bigram_counts[('datawhale', 'agent')]
														
 
															+# count_datawhale 已在第一步计算
														
 
															+p_agent_given_datawhale = count_datawhale_agent / count_datawhale
														
 
															+print(f"第二步: P(agent|datawhale) = {count_datawhale_agent}/{count_datawhale} = {p_agent_given_datawhale:.3f}")
														
 
															+
														
 
															+# --- 第三步：计算 P(learns|agent) ---
														
 
															+count_agent_learns = bigram_counts[('agent', 'learns')]
														
 
															+count_agent = tokens.count('agent')
														
 
															+p_learns_given_agent = count_agent_learns / count_agent
														
 
															+print(f"第三步: P(learns|agent) = {count_agent_learns}/{count_agent} = {p_learns_given_agent:.3f}")
														
 
															+
														
 
															+# --- 最后：将概率连乘 ---
														
 
															+p_sentence = p_datawhale * p_agent_given_datawhale * p_learns_given_agent
														
 
															+print(f"最后: P('datawhale agent learns') ≈ {p_datawhale:.3f} * {p_agent_given_datawhale:.3f} * {p_learns_given_agent:.3f} = {p_sentence:.3f}")
														
--- a/code/chapter3/Qwen.py
+++ b/code/chapter3/Qwen.py
@@ -0,0 +1,55 @@
 
															+import torch
														
 
															+from transformers import AutoModelForCausalLM, AutoTokenizer
														
 
															+
														
 
															+# 指定模型ID
														
 
															+model_id = "Qwen/Qwen1.5-0.5B-Chat"
														
 
															+
														
 
															+# 设置设备，优先使用GPU
														
 
															+device = "cuda" if torch.cuda.is_available() else "cpu"
														
 
															+print(f"Using device: {device}")
														
 
															+
														
 
															+# 加载分词器
														
 
															+tokenizer = AutoTokenizer.from_pretrained(model_id)
														
 
															+
														
 
															+# 加载模型，并将其移动到指定设备
														
 
															+model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
														
 
															+
														
 
															+print("模型和分词器加载完成！")
														
 
															+
														
 
															+# 准备对话输入
														
 
															+messages = [
														
 
															+    {"role": "system", "content": "You are a helpful assistant."},
														
 
															+    {"role": "user", "content": "你好，请介绍你自己。"}
														
 
															+]
														
 
															+
														
 
															+# 使用分词器的模板格式化输入
														
 
															+text = tokenizer.apply_chat_template(
														
 
															+    messages,
														
 
															+    tokenize=False,
														
 
															+    add_generation_prompt=True
														
 
															+)
														
 
															+
														
 
															+# 编码输入文本
														
 
															+model_inputs = tokenizer([text], return_tensors="pt").to(device)
														
 
															+
														
 
															+print("编码后的输入文本:")
														
 
															+print(model_inputs)
														
 
															+
														
 
															+# 使用模型生成回答
														
 
															+# max_new_tokens 控制了模型最多能生成多少个新的Token
														
 
															+generated_ids = model.generate(
														
 
															+    model_inputs.input_ids,
														
 
															+    max_new_tokens=512
														
 
															+)
														
 
															+
														
 
															+# 将生成的 Token ID 截取掉输入部分
														
 
															+# 这样我们只解码模型新生成的部分
														
 
															+generated_ids = [
														
 
															+    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
														
 
															+]
														
 
															+
														
 
															+# 解码生成的 Token ID
														
 
															+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
														
 
															+
														
 
															+print("\n模型的回答:")
														
 
															+print(response)
														
--- a/code/chapter3/Transformer.py
+++ b/code/chapter3/Transformer.py
@@ -0,0 +1,249 @@
 
															+import torch
														
 
															+import torch.nn as nn
														
 
															+import math
														
 
															+import copy
														
 
															+
														
 
															+class MultiHeadAttention(nn.Module):
														
 
															+    """
														
 
															+    多头注意力机制模块
														
 
															+    """
														
 
															+    def __init__(self, d_model, num_heads):
														
 
															+        super(MultiHeadAttention, self).__init__()
														
 
															+        assert d_model % num_heads == 0, "d_model 必须能被 num_heads 整除"
														
 
															+        
														
 
															+        self.d_model = d_model
														
 
															+        self.num_heads = num_heads
														
 
															+        self.d_k = d_model // num_heads
														
 
															+        
														
 
															+        # 定义 Q, K, V 和输出的线性变换层
														
 
															+        self.W_q = nn.Linear(d_model, d_model)
														
 
															+        self.W_k = nn.Linear(d_model, d_model)
														
 
															+        self.W_v = nn.Linear(d_model, d_model)
														
 
															+        self.W_o = nn.Linear(d_model, d_model)
														
 
															+        
														
 
															+    def scaled_dot_product_attention(self, Q, K, V, mask=None):
														
 
															+        # 1. 计算注意力得分 (QK^T)
														
 
															+        attn_scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
														
 
															+        
														
 
															+        # 2. 应用掩码 (如果提供)
														
 
															+        if mask is not None:
														
 
															+            # 将掩码中为 0 的位置设置为一个非常小的负数，这样 softmax 后会接近 0
														
 
															+            attn_scores = attn_scores.masked_fill(mask == 0, -1e9)
														
 
															+        
														
 
															+        # 3. 计算注意力权重 (Softmax)
														
 
															+        attn_probs = torch.softmax(attn_scores, dim=-1)
														
 
															+        
														
 
															+        # 4. 加权求和 (权重 * V)
														
 
															+        output = torch.matmul(attn_probs, V)
														
 
															+        return output
														
 
															+        
														
 
															+    def split_heads(self, x):
														
 
															+        # 将输入 x 的形状从 (batch_size, seq_length, d_model)
														
 
															+        # 变换为 (batch_size, num_heads, seq_length, d_k)
														
 
															+        batch_size, seq_length, d_model = x.size()
														
 
															+        return x.view(batch_size, seq_length, self.num_heads, self.d_k).transpose(1, 2)
														
 
															+        
														
 
															+    def combine_heads(self, x):
														
 
															+        # 将输入 x 的形状从 (batch_size, num_heads, seq_length, d_k)
														
 
															+        # 变回 (batch_size, seq_length, d_model)
														
 
															+        batch_size, num_heads, seq_length, d_k = x.size()
														
 
															+        return x.transpose(1, 2).contiguous().view(batch_size, seq_length, self.d_model)
														
 
															+        
														
 
															+    def forward(self, Q, K, V, mask=None):
														
 
															+        # 1. 对 Q, K, V 进行线性变换
														
 
															+        Q = self.split_heads(self.W_q(Q))
														
 
															+        K = self.split_heads(self.W_k(K))
														
 
															+        V = self.split_heads(self.W_v(V))
														
 
															+        
														
 
															+        # 2. 计算缩放点积注意力
														
 
															+        attn_output = self.scaled_dot_product_attention(Q, K, V, mask)
														
 
															+        
														
 
															+        # 3. 合并多头输出并进行最终的线性变换
														
 
															+        output = self.W_o(self.combine_heads(attn_output))
														
 
															+        return output
														
 
															+
														
 
															+class PositionWiseFeedForward(nn.Module):
														
 
															+    """
														
 
															+    位置前馈网络模块
														
 
															+    """
														
 
															+    def __init__(self, d_model, d_ff, dropout=0.1):
														
 
															+        super(PositionWiseFeedForward, self).__init__()
														
 
															+        self.linear1 = nn.Linear(d_model, d_ff)
														
 
															+        self.dropout = nn.Dropout(dropout)
														
 
															+        self.linear2 = nn.Linear(d_ff, d_model)
														
 
															+        self.relu = nn.ReLU()
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        # x 形状: (batch_size, seq_len, d_model)
														
 
															+        x = self.linear1(x)
														
 
															+        x = self.relu(x)
														
 
															+        x = self.dropout(x)
														
 
															+        x = self.linear2(x)
														
 
															+        # 最终输出形状: (batch_size, seq_len, d_model)
														
 
															+        return x
														
 
															+
														
 
															+class PositionalEncoding(nn.Module):
														
 
															+    """
														
 
															+    为输入序列的词嵌入向量添加位置编码。
														
 
															+    """
														
 
															+    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
														
 
															+        super().__init__()
														
 
															+        self.dropout = nn.Dropout(p=dropout)
														
 
															+
														
 
															+        # 创建一个足够长的位置编码矩阵
														
 
															+        position = torch.arange(max_len).unsqueeze(1)
														
 
															+        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
														
 
															+        
														
 
															+        # pe (positional encoding) 的大小为 (max_len, d_model)
														
 
															+        pe = torch.zeros(max_len, d_model)
														
 
															+        
														
 
															+        # 偶数维度使用 sin, 奇数维度使用 cos
														
 
															+        pe[:, 0::2] = torch.sin(position * div_term)
														
 
															+        pe[:, 1::2] = torch.cos(position * div_term)
														
 
															+        
														
 
															+        # 将 pe 注册为 buffer，这样它就不会被视为模型参数，但会随模型移动（例如 to(device)）
														
 
															+        self.register_buffer('pe', pe.unsqueeze(0))
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor) -> torch.Tensor:
														
 
															+        # x.size(1) 是当前输入的序列长度
														
 
															+        # 将位置编码加到输入向量上
														
 
															+        x = x + self.pe[:, :x.size(1)]
														
 
															+        return self.dropout(x)
														
 
															+
														
 
															+class EncoderLayer(nn.Module):
														
 
															+    """
														
 
															+    编码器核心层
														
 
															+    """
														
 
															+    def __init__(self, d_model, num_heads, d_ff, dropout):
														
 
															+        super(EncoderLayer, self).__init__()
														
 
															+        self.self_attn = MultiHeadAttention(d_model, num_heads)
														
 
															+        self.feed_forward = PositionWiseFeedForward(d_model, d_ff, dropout)
														
 
															+        self.norm1 = nn.LayerNorm(d_model)
														
 
															+        self.norm2 = nn.LayerNorm(d_model)
														
 
															+        self.dropout = nn.Dropout(dropout)
														
 
															+    
														
 
															+    def forward(self, x, mask):
														
 
															+        # 1. 多头自注意力
														
 
															+        attn_output = self.self_attn(x, x, x, mask)
														
 
															+        x = self.norm1(x + self.dropout(attn_output))
														
 
															+        
														
 
															+        # 2. 前馈网络
														
 
															+        ff_output = self.feed_forward(x)
														
 
															+        x = self.norm2(x + self.dropout(ff_output))
														
 
															+        
														
 
															+        return x
														
 
															+
														
 
															+class DecoderLayer(nn.Module):
														
 
															+    """
														
 
															+    解码器核心层
														
 
															+    """
														
 
															+    def __init__(self, d_model, num_heads, d_ff, dropout):
														
 
															+        super(DecoderLayer, self).__init__()
														
 
															+        self.self_attn = MultiHeadAttention(d_model, num_heads)
														
 
															+        self.cross_attn = MultiHeadAttention(d_model, num_heads)
														
 
															+        self.feed_forward = PositionWiseFeedForward(d_model, d_ff, dropout)
														
 
															+        self.norm1 = nn.LayerNorm(d_model)
														
 
															+        self.norm2 = nn.LayerNorm(d_model)
														
 
															+        self.norm3 = nn.LayerNorm(d_model)
														
 
															+        self.dropout = nn.Dropout(dropout)
														
 
															+        
														
 
															+    def forward(self, x, encoder_output, src_mask, tgt_mask):
														
 
															+        # 1. 掩码多头自注意力 (对自己)
														
 
															+        attn_output = self.self_attn(x, x, x, tgt_mask)
														
 
															+        x = self.norm1(x + self.dropout(attn_output))
														
 
															+        
														
 
															+        # 2. 交叉注意力 (对编码器输出)
														
 
															+        cross_attn_output = self.cross_attn(x, encoder_output, encoder_output, src_mask)
														
 
															+        x = self.norm2(x + self.dropout(cross_attn_output))
														
 
															+        
														
 
															+        # 3. 前馈网络
														
 
															+        ff_output = self.feed_forward(x)
														
 
															+        x = self.norm3(x + self.dropout(ff_output))
														
 
															+        
														
 
															+        return x
														
 
															+
														
 
															+class Encoder(nn.Module):
														
 
															+    def __init__(self, vocab_size, d_model, num_layers, num_heads, d_ff, dropout, max_len):
														
 
															+        super(Encoder, self).__init__()
														
 
															+        self.embedding = nn.Embedding(vocab_size, d_model)
														
 
															+        self.pos_encoder = PositionalEncoding(d_model, dropout, max_len)
														
 
															+        self.layers = nn.ModuleList([EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
														
 
															+        self.norm = nn.LayerNorm(d_model)
														
 
															+
														
 
															+    def forward(self, x, mask):
														
 
															+        x = self.embedding(x)
														
 
															+        x = self.pos_encoder(x)
														
 
															+        for layer in self.layers:
														
 
															+            x = layer(x, mask)
														
 
															+        return self.norm(x)
														
 
															+
														
 
															+class Decoder(nn.Module):
														
 
															+    def __init__(self, vocab_size, d_model, num_layers, num_heads, d_ff, dropout, max_len):
														
 
															+        super(Decoder, self).__init__()
														
 
															+        self.embedding = nn.Embedding(vocab_size, d_model)
														
 
															+        self.pos_encoder = PositionalEncoding(d_model, dropout, max_len)
														
 
															+        self.layers = nn.ModuleList([DecoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
														
 
															+        self.norm = nn.LayerNorm(d_model)
														
 
															+
														
 
															+    def forward(self, x, encoder_output, src_mask, tgt_mask):
														
 
															+        x = self.embedding(x)
														
 
															+        x = self.pos_encoder(x)
														
 
															+        for layer in self.layers:
														
 
															+            x = layer(x, encoder_output, src_mask, tgt_mask)
														
 
															+        return self.norm(x)
														
 
															+
														
 
															+class Transformer(nn.Module):
														
 
															+    def __init__(self, src_vocab_size, tgt_vocab_size, d_model, num_layers, num_heads, d_ff, dropout, max_len=5000):
														
 
															+        super(Transformer, self).__init__()
														
 
															+        self.encoder = Encoder(src_vocab_size, d_model, num_layers, num_heads, d_ff, dropout, max_len)
														
 
															+        self.decoder = Decoder(tgt_vocab_size, d_model, num_layers, num_heads, d_ff, dropout, max_len)
														
 
															+        self.final_linear = nn.Linear(d_model, tgt_vocab_size)
														
 
															+    
														
 
															+    def generate_mask(self, src, tgt):
														
 
															+        # src_mask: (batch_size, 1, 1, src_len)
														
 
															+        src_mask = (src != 0).unsqueeze(1).unsqueeze(2) 
														
 
															+        
														
 
															+        # tgt_mask: (batch_size, 1, tgt_len, tgt_len)
														
 
															+        tgt_pad_mask = (tgt != 0).unsqueeze(1).unsqueeze(2) # (batch_size, 1, 1, tgt_len)
														
 
															+        tgt_len = tgt.size(1)
														
 
															+        # 下三角矩阵，用于防止看到未来的 token
														
 
															+        tgt_sub_mask = torch.tril(torch.ones((tgt_len, tgt_len), device=src.device)).bool() # (tgt_len, tgt_len)
														
 
															+        tgt_mask = tgt_pad_mask & tgt_sub_mask
														
 
															+        
														
 
															+        return src_mask, tgt_mask
														
 
															+    
														
 
															+    def forward(self, src, tgt):
														
 
															+        src_mask, tgt_mask = self.generate_mask(src, tgt)
														
 
															+        
														
 
															+        encoder_output = self.encoder(src, src_mask)
														
 
															+        decoder_output = self.decoder(tgt, encoder_output, src_mask, tgt_mask)
														
 
															+        
														
 
															+        output = self.final_linear(decoder_output)
														
 
															+        return output
														
 
															+
														
 
															+# --- 演示如何使用模型 ---
														
 
															+if __name__ == "__main__":
														
 
															+    # 1. 定义超参数
														
 
															+    src_vocab_size = 5000
														
 
															+    tgt_vocab_size = 5000
														
 
															+    d_model = 512
														
 
															+    num_layers = 6
														
 
															+    num_heads = 8
														
 
															+    d_ff = 2048
														
 
															+    dropout = 0.1
														
 
															+    max_len = 100
														
 
															+    
														
 
															+    # 2. 实例化模型
														
 
															+    model = Transformer(src_vocab_size, tgt_vocab_size, d_model, num_layers, num_heads, d_ff, dropout, max_len)
														
 
															+    
														
 
															+    # 3. 创建模拟输入数据
														
 
															+    # 假设 batch_size=2, src_seq_len=10, tgt_seq_len=12
														
 
															+    src = torch.randint(1, src_vocab_size, (2, 10))  # (batch_size, seq_length)
														
 
															+    tgt = torch.randint(1, tgt_vocab_size, (2, 12))  # (batch_size, seq_length)
														
 
															+
														
 
															+    # 4. 模型前向传播
														
 
															+    output = model(src, tgt)
														
 
															+    
														
 
															+    # 5. 打印输出形状
														
 
															+    print("模型输出的形状:", output.shape)
														
 
															+    # 预期输出: torch.Size([2, 12, 5000]) -> (batch_size, tgt_seq_len, tgt_vocab_size)
														
--- a/code/chapter3/Word_Embedding.py
+++ b/code/chapter3/Word_Embedding.py
@@ -0,0 +1,23 @@
 
															+import numpy as np
														
 
															+
														
 
															+# 假设我们已经学习到了简化的二维词向量
														
 
															+embeddings = {
														
 
															+    "king": np.array([0.9, 0.8]),
														
 
															+    "queen": np.array([0.9, 0.2]),
														
 
															+    "man": np.array([0.7, 0.9]),
														
 
															+    "woman": np.array([0.7, 0.3])
														
 
															+}
														
 
															+
														
 
															+def cosine_similarity(vec1, vec2):
														
 
															+    dot_product = np.dot(vec1, vec2)
														
 
															+    norm_product = np.linalg.norm(vec1) * np.linalg.norm(vec2)
														
 
															+    return dot_product / norm_product
														
 
															+
														
 
															+# king - man + woman
														
 
															+result_vec = embeddings["king"] - embeddings["man"] + embeddings["woman"]
														
 
															+
														
 
															+# 计算结果向量与 "queen" 的相似度
														
 
															+sim = cosine_similarity(result_vec, embeddings["queen"])
														
 
															+
														
 
															+print(f"king - man + woman 的结果向量: {result_vec}")
														
 
															+print(f"该结果与 'queen' 的相似度: {sim:.4f}")
														
--- a/code/chapter4/Plan_and_solve.py
+++ b/code/chapter4/Plan_and_solve.py
@@ -1,11 +1,16 @@
 
															 import os
														
 
															+import ast
														
 
															 from llm_client import HelloAgentsLLM
														
 
															 from dotenv import load_dotenv
														
 
															 from typing import List, Dict
														
 
															-# 加载 .env 文件中的环境变量
														
 
															-# 请确保你的项目根目录下有 .env 文件，并已配置好 LLM_MODEL_ID, LLM_API_KEY, LLM_BASE_URL
														
 
															-load_dotenv()
														
 
															+# 加载 .env 文件中的环境变量，处理文件不存在异常
														
 
															+try:
														
 
															+    load_dotenv()
														
 
															+except FileNotFoundError:
														
 
															+    print("警告：未找到 .env 文件，将使用系统环境变量。")
														
 
															+except Exception as e:
														
 
															+    print(f"警告：加载 .env 文件时出错: {e}")
														
 
															 # --- 1. LLM客户端定义 ---
														
 
															 # 假设你已经有llm_client.py文件，里面定义了HelloAgentsLLM类
														
@@ -38,10 +43,14 @@ class Planner:
 
															         try:
														
 
															             plan_str = response_text.split("```python")[1].split("```")[0].strip()
														
 
															-            plan = eval(plan_str)
														
 
															+            plan = ast.literal_eval(plan_str)
														
 
															             return plan if isinstance(plan, list) else []
														
 
															-        except Exception as e:
														
 
															+        except (ValueError, SyntaxError, IndexError) as e:
														
 
															             print(f"❌ 解析计划时出错: {e}")
														
 
															+            print(f"原始响应: {response_text}")
														
 
															+            return []
														
 
															+        except Exception as e:
														
 
															+            print(f"❌ 解析计划时发生未知错误: {e}")
														
 
															             return []
														
 
															 # --- 3. 执行器 (Executor) 定义 ---
														
--- a/code/chapter4/tools.py
+++ b/code/chapter4/tools.py
@@ -61,6 +61,9 @@ class ToolExecutor:
 
															         """
														
 
															         向工具箱中注册一个新工具。
														
 
															         """
														
 
															+        if name in self.tools:
														
 
															+            print(f"警告：工具 '{name}' 已存在，将被覆盖。")
														
 
															+        
														
 
															         self.tools[name] = {"description": description, "func": func}
														
 
															         print(f"工具 '{name}' 已注册。")
														
--- a/code/chapter6/AutoGenDemo/autogen_software_team.py
+++ b/code/chapter6/AutoGenDemo/autogen_software_team.py
@@ -10,11 +10,6 @@ from dotenv import load_dotenv
 
															 # 加载环境变量
														
 
															 load_dotenv()
														
 
															-# # 导入 HelloAgentsLLM
														
 
															-# import sys
														
 
															-# sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
														
 
															-# from llm_client import HelloAgentsLLM
														
 
															-
														
 
															 # 先测试一个版本，使用 OpenAI 客户端
														
 
															 from autogen_ext.models.openai import OpenAIChatCompletionClient
														
 
															 from autogen_agentchat.agents import AssistantAgent, UserProxyAgent
														
--- a/code/chapter6/AutoGenDemo/llm_client.py
+++ b/code/chapter6/AutoGenDemo/llm_client.py
@@ -1,72 +0,0 @@
 
															-import os
														
 
															-from openai import OpenAI
														
 
															-from dotenv import load_dotenv
														
 
															-from typing import List, Dict
														
 
															-
														
 
															-# 加载 .env 文件中的环境变量
														
 
															-load_dotenv()
														
 
															-
														
 
															-class HelloAgentsLLM:
														
 
															-    """
														
 
															-    为本书 "Hello Agents" 定制的LLM客户端。
														
 
															-    它用于调用任何兼容OpenAI接口的服务，并默认使用流式响应。
														
 
															-    """
														
 
															-    def __init__(self, model: str = None, apiKey: str = None, baseUrl: str = None, timeout: int = None):
														
 
															-        """
														
 
															-        初始化客户端。优先使用传入参数，如果未提供，则从环境变量加载。
														
 
															-        """
														
 
															-        self.model = model or os.getenv("LLM_MODEL_ID")
														
 
															-        apiKey = apiKey or os.getenv("LLM_API_KEY")
														
 
															-        baseUrl = baseUrl or os.getenv("LLM_BASE_URL")
														
 
															-        timeout = timeout or int(os.getenv("LLM_TIMEOUT", 60))
														
 
															-        
														
 
															-        if not all([self.model, apiKey, baseUrl]):
														
 
															-            raise ValueError("模型ID、API密钥和服务地址必须被提供或在.env文件中定义。")
														
 
															-
														
 
															-        self.client = OpenAI(api_key=apiKey, base_url=baseUrl, timeout=timeout)
														
 
															-
														
 
															-    def think(self, messages: List[Dict[str, str]], temperature: float = 0) -> str:
														
 
															-        """
														
 
															-        调用大语言模型进行思考，并返回其响应。
														
 
															-        """
														
 
															-        print(f"🧠 正在调用 {self.model} 模型...")
														
 
															-        try:
														
 
															-            response = self.client.chat.completions.create(
														
 
															-                model=self.model,
														
 
															-                messages=messages,
														
 
															-                temperature=temperature,
														
 
															-                stream=True,
														
 
															-            )
														
 
															-            
														
 
															-            # 处理流式响应
														
 
															-            print("✅ 大语言模型响应成功:")
														
 
															-            collected_content = []
														
 
															-            for chunk in response:
														
 
															-                content = chunk.choices[0].delta.content or ""
														
 
															-                print(content, end="", flush=True)
														
 
															-                collected_content.append(content)
														
 
															-            print()  # 在流式输出结束后换行
														
 
															-            return "".join(collected_content)
														
 
															-
														
 
															-        except Exception as e:
														
 
															-            print(f"❌ 调用LLM API时发生错误: {e}")
														
 
															-            return None
														
 
															-
														
 
															-# --- 客户端使用示例 ---
														
 
															-if __name__ == '__main__':
														
 
															-    try:
														
 
															-        llmClient = HelloAgentsLLM()
														
 
															-        
														
 
															-        exampleMessages = [
														
 
															-            {"role": "system", "content": "You are a helpful assistant that writes Python code."},
														
 
															-            {"role": "user", "content": "写一个快速排序算法"}
														
 
															-        ]
														
 
															-        
														
 
															-        print("--- 调用LLM ---")
														
 
															-        responseText = llmClient.think(exampleMessages)
														
 
															-        if responseText:
														
 
															-            print("\n\n--- 完整模型响应 ---")
														
 
															-            print(responseText)
														
 
															-
														
 
															-    except ValueError as e:
														
 
															-        print(e)
														
--- a/docs/README.md
+++ b/docs/README.md
@@ -7,6 +7,7 @@
 
															   <img src="https://img.shields.io/github/forks/datawhalechina/Hello-Agents?style=flat&logo=github" alt="GitHub forks"/>
														
 
															   <img src="https://img.shields.io/badge/language-Chinese-brightgreen?style=flat" alt="Language"/>
														
 
															   <a href="https://github.com/datawhalechina/Hello-Agents"><img src="https://img.shields.io/badge/GitHub-Project-blue?style=flat&logo=github" alt="GitHub Project"></a>
														
 
															+  <a href="https://datawhalechina.github.io/hello-agents/"><img src="https://img.shields.io/badge/在线阅读-Online%20Reading-green?style=flat&logo=gitbook" alt="Online Reading"></a>
														
 
															 </div>
														
 
															 ---
														
@@ -17,6 +18,16 @@
 
															 &emsp;&emsp;Hello-Agents 是一个<strong>系统性的多智能体学习教程</strong>，旨在"授人以渔"。教程将带领你穿透框架表象，从智能体的核心原理出发，深入其核心架构，理解其经典范式，并最终亲手构建起属于自己的、强大的多智能体应用。我们相信，最好的学习方式就是动手实践。希望这本书能成为你探索智能体世界的起点，能够从一名 LLM 的"使用者"，蜕变为一名智能系统的"构建者"。
														
 
															+## 🌐 在线阅读
														
 
															+
														
 
															+**[📖 点击这里开始在线阅读](https://datawhalechina.github.io/hello-agents/)**
														
 
															+
														
 
															+> 推荐使用在线阅读方式，享受更好的阅读体验，包括：
														
 
															+> - 📱 响应式设计，支持手机、平板、电脑
														
 
															+> - 🔍 全文搜索功能
														
 
															+> - 📑 自动生成目录导航
														
 
															+> - 🎨 优雅的阅读界面
														
 
															+
														
 
															 ### ✨ 你将收获什么？
														
 
															 - 📖 <strong>Datawhale 开源免费</strong> 完全免费学习本项目所有内容，与社区共同成长
														
--- a/docs/chapter4/第四章智能体经典范式构建.md
+++ b/docs/chapter4/第四章智能体经典范式构建.md
@@ -251,6 +251,8 @@ class ToolExecutor:
 
															         """
														
 
															         向工具箱中注册一个新工具。
														
 
															         """
														
 
															+        if name in self.tools:
														
 
															+            print(f"警告：工具 '{name}' 已存在，将被覆盖。")
														
 
															         self.tools[name] = {"description": description, "func": func}
														
 
															         print(f"工具 '{name}' 已注册。")
														
@@ -662,15 +664,15 @@ class Planner:
 
															         try:
														
 
															             # 找到```python和```之间的内容
														
 
															             plan_str = response_text.split("```python")[1].split("```")[0].strip()
														
 
															-            # 使用eval来安全地执行字符串，将其转换为Python列表
														
 
															-            plan = eval(plan_str)
														
 
															-            if isinstance(plan, list):
														
 
															-                return plan
														
 
															-            else:
														
 
															-                print("❌ 解析失败：输出不是一个列表。")
														
 
															-                return []
														
 
															-        except Exception as e:
														
 
															+            # 使用ast.literal_eval来安全地执行字符串，将其转换为Python列表
														
 
															+            plan = ast.literal_eval(plan_str)
														
 
															+            return plan if isinstance(plan, list) else []
														
 
															+        except (ValueError, SyntaxError, IndexError) as e:
														
 
															             print(f"❌ 解析计划时出错: {e}")
														
 
															+            print(f"原始响应: {response_text}")
														
 
															+            return []
														
 
															+        except Exception as e:
														
 
															+            print(f"❌ 解析计划时发生未知错误: {e}")
														
 
															             return []
														
 
															 ```