9 місяців тому · be93e7fc54
--- a/code/chapter2/ELIZA.py
+++ b/code/chapter2/ELIZA.py
@@ -0,0 +1,85 @@
 
				+import re
			
 
				+import random
			
 
				+
			
 
				+# 定义规则库：模式(正则表达式) -> 响应模板列表
			
 
				+rules = {
			
 
				+    r'I need (.*)': [
			
 
				+        "Why do you need {0}?",
			
 
				+        "Would it really help you to get {0}?",
			
 
				+        "Are you sure you need {0}?"
			
 
				+    ],
			
 
				+    r'Why don\'t you (.*)\?': [
			
 
				+        "Do you really think I don't {0}?",
			
 
				+        "Perhaps eventually I will {0}.",
			
 
				+        "Do you really want me to {0}?"
			
 
				+    ],
			
 
				+    r'Why can\'t I (.*)\?': [
			
 
				+        "Do you think you should be able to {0}?",
			
 
				+        "If you could {0}, what would you do?",
			
 
				+        "I don't know -- why can't you {0}?"
			
 
				+    ],
			
 
				+    r'I am (.*)': [
			
 
				+        "Did you come to me because you are {0}?",
			
 
				+        "How long have you been {0}?",
			
 
				+        "How do you feel about being {0}?"
			
 
				+    ],
			
 
				+    r'.* mother .*': [
			
 
				+        "Tell me more about your mother.",
			
 
				+        "What was your relationship with your mother like?",
			
 
				+        "How do you feel about your mother?"
			
 
				+    ],
			
 
				+    r'.* father .*': [
			
 
				+        "Tell me more about your father.",
			
 
				+        "How did your father make you feel?",
			
 
				+        "What has your father taught you?"
			
 
				+    ],
			
 
				+    r'.*': [
			
 
				+        "Please tell me more.",
			
 
				+        "Let's change focus a bit... Tell me about your family.",
			
 
				+        "Can you elaborate on that?"
			
 
				+    ]
			
 
				+}
			
 
				+
			
 
				+# 定义代词转换规则
			
 
				+pronoun_swap = {
			
 
				+    "i": "you", "you": "i", "me": "you", "my": "your",
			
 
				+    "am": "are", "are": "am", "was": "were", "i'd": "you would",
			
 
				+    "i've": "you have", "i'll": "you will", "yours": "mine",
			
 
				+    "mine": "yours"
			
 
				+}
			
 
				+
			
 
				+def swap_pronouns(phrase):
			
 
				+    """
			
 
				+    对输入短语中的代词进行第一/第二人称转换
			
 
				+    """
			
 
				+    words = phrase.lower().split()
			
 
				+    swapped_words = [pronoun_swap.get(word, word) for word in words]
			
 
				+    return " ".join(swapped_words)
			
 
				+
			
 
				+def respond(user_input):
			
 
				+    """
			
 
				+    根据规则库生成响应
			
 
				+    """
			
 
				+    for pattern, responses in rules.items():
			
 
				+        match = re.search(pattern, user_input, re.IGNORECASE)
			
 
				+        if match:
			
 
				+            # 捕获匹配到的部分
			
 
				+            captured_group = match.group(1) if match.groups() else ''
			
 
				+            # 进行代词转换
			
 
				+            swapped_group = swap_pronouns(captured_group)
			
 
				+            # 从模板中随机选择一个并格式化
			
 
				+            response = random.choice(responses).format(swapped_group)
			
 
				+            return response
			
 
				+    # 如果没有匹配任何特定规则，使用最后的通配符规则
			
 
				+    return random.choice(rules[r'.*'])
			
 
				+
			
 
				+# 主聊天循环
			
 
				+if __name__ == '__main__':
			
 
				+    print("Therapist: Hello! How can I help you today?")
			
 
				+    while True:
			
 
				+        user_input = input("You: ")
			
 
				+        if user_input.lower() in ["quit", "exit", "bye"]:
			
 
				+            print("Therapist: Goodbye. It was nice talking to you.")
			
 
				+            break
			
 
				+        response = respond(user_input)
			
 
				+        print(f"Therapist: {response}")
			
--- a/code/chapter3/BPE.py
+++ b/code/chapter3/BPE.py
@@ -0,0 +1,34 @@
 
				+import re, collections
			
 
				+
			
 
				+def get_stats(vocab):
			
 
				+    """统计词元对频率"""
			
 
				+    pairs = collections.defaultdict(int)
			
 
				+    for word, freq in vocab.items():
			
 
				+        symbols = word.split()
			
 
				+        for i in range(len(symbols)-1):
			
 
				+            pairs[symbols[i],symbols[i+1]] += freq
			
 
				+    return pairs
			
 
				+
			
 
				+def merge_vocab(pair, v_in):
			
 
				+    """合并词元对"""
			
 
				+    v_out = {}
			
 
				+    bigram = re.escape(' '.join(pair))
			
 
				+    p = re.compile(r'(?<!\S)' + bigram + r'(?!\S)')
			
 
				+    for word in v_in:
			
 
				+        w_out = p.sub(''.join(pair), word)
			
 
				+        v_out[w_out] = v_in[word]
			
 
				+    return v_out
			
 
				+
			
 
				+# 准备语料库，每个词末尾加上</w>表示结束，并切分好字符
			
 
				+vocab = {'h u g </w>': 1, 'p u g </w>': 1, 'p u n </w>': 1, 'b u n </w>': 1}
			
 
				+num_merges = 4 # 设置合并次数
			
 
				+
			
 
				+for i in range(num_merges):
			
 
				+    pairs = get_stats(vocab)
			
 
				+    if not pairs:
			
 
				+        break
			
 
				+    best = max(pairs, key=pairs.get)
			
 
				+    vocab = merge_vocab(best, vocab)
			
 
				+    print(f"第{i+1}次合并: {best} -> {''.join(best)}")
			
 
				+    print(f"新词表（部分）: {list(vocab.keys())}")
			
 
				+    print("-" * 20)
			
--- a/code/chapter3/N_gram.py
+++ b/code/chapter3/N_gram.py
@@ -0,0 +1,30 @@
 
				+import collections
			
 
				+
			
 
				+# 示例语料库，与上方案例讲解中的语料库保持一致
			
 
				+corpus = "datawhale agent learns datawhale agent works"
			
 
				+tokens = corpus.split()
			
 
				+total_tokens = len(tokens)
			
 
				+
			
 
				+# --- 第一步：计算 P(datawhale) ---
			
 
				+count_datawhale = tokens.count('datawhale')
			
 
				+p_datawhale = count_datawhale / total_tokens
			
 
				+print(f"第一步: P(datawhale) = {count_datawhale}/{total_tokens} = {p_datawhale:.3f}")
			
 
				+
			
 
				+# --- 第二步：计算 P(agent|datawhale) ---
			
 
				+# 先计算 bigrams 用于后续步骤
			
 
				+bigrams = zip(tokens, tokens[1:])
			
 
				+bigram_counts = collections.Counter(bigrams)
			
 
				+count_datawhale_agent = bigram_counts[('datawhale', 'agent')]
			
 
				+# count_datawhale 已在第一步计算
			
 
				+p_agent_given_datawhale = count_datawhale_agent / count_datawhale
			
 
				+print(f"第二步: P(agent|datawhale) = {count_datawhale_agent}/{count_datawhale} = {p_agent_given_datawhale:.3f}")
			
 
				+
			
 
				+# --- 第三步：计算 P(learns|agent) ---
			
 
				+count_agent_learns = bigram_counts[('agent', 'learns')]
			
 
				+count_agent = tokens.count('agent')
			
 
				+p_learns_given_agent = count_agent_learns / count_agent
			
 
				+print(f"第三步: P(learns|agent) = {count_agent_learns}/{count_agent} = {p_learns_given_agent:.3f}")
			
 
				+
			
 
				+# --- 最后：将概率连乘 ---
			
 
				+p_sentence = p_datawhale * p_agent_given_datawhale * p_learns_given_agent
			
 
				+print(f"最后: P('datawhale agent learns') ≈ {p_datawhale:.3f} * {p_agent_given_datawhale:.3f} * {p_learns_given_agent:.3f} = {p_sentence:.3f}")
			
--- a/code/chapter3/Qwen.py
+++ b/code/chapter3/Qwen.py
@@ -0,0 +1,55 @@
 
				+import torch
			
 
				+from transformers import AutoModelForCausalLM, AutoTokenizer
			
 
				+
			
 
				+# 指定模型ID
			
 
				+model_id = "Qwen/Qwen1.5-0.5B-Chat"
			
 
				+
			
 
				+# 设置设备，优先使用GPU
			
 
				+device = "cuda" if torch.cuda.is_available() else "cpu"
			
 
				+print(f"Using device: {device}")
			
 
				+
			
 
				+# 加载分词器
			
 
				+tokenizer = AutoTokenizer.from_pretrained(model_id)
			
 
				+
			
 
				+# 加载模型，并将其移动到指定设备
			
 
				+model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
			
 
				+
			
 
				+print("模型和分词器加载完成！")
			
 
				+
			
 
				+# 准备对话输入
			
 
				+messages = [
			
 
				+    {"role": "system", "content": "You are a helpful assistant."},
			
 
				+    {"role": "user", "content": "你好，请介绍你自己。"}
			
 
				+]
			
 
				+
			
 
				+# 使用分词器的模板格式化输入
			
 
				+text = tokenizer.apply_chat_template(
			
 
				+    messages,
			
 
				+    tokenize=False,
			
 
				+    add_generation_prompt=True
			
 
				+)
			
 
				+
			
 
				+# 编码输入文本
			
 
				+model_inputs = tokenizer([text], return_tensors="pt").to(device)
			
 
				+
			
 
				+print("编码后的输入文本:")
			
 
				+print(model_inputs)
			
 
				+
			
 
				+# 使用模型生成回答
			
 
				+# max_new_tokens 控制了模型最多能生成多少个新的Token
			
 
				+generated_ids = model.generate(
			
 
				+    model_inputs.input_ids,
			
 
				+    max_new_tokens=512
			
 
				+)
			
 
				+
			
 
				+# 将生成的 Token ID 截取掉输入部分
			
 
				+# 这样我们只解码模型新生成的部分
			
 
				+generated_ids = [
			
 
				+    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
			
 
				+]
			
 
				+
			
 
				+# 解码生成的 Token ID
			
 
				+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
			
 
				+
			
 
				+print("\n模型的回答:")
			
 
				+print(response)
			
--- a/code/chapter3/Transformer.py
+++ b/code/chapter3/Transformer.py
@@ -0,0 +1,249 @@
 
				+import torch
			
 
				+import torch.nn as nn
			
 
				+import math
			
 
				+import copy
			
 
				+
			
 
				+class MultiHeadAttention(nn.Module):
			
 
				+    """
			
 
				+    多头注意力机制模块
			
 
				+    """
			
 
				+    def __init__(self, d_model, num_heads):
			
 
				+        super(MultiHeadAttention, self).__init__()
			
 
				+        assert d_model % num_heads == 0, "d_model 必须能被 num_heads 整除"
			
 
				+        
			
 
				+        self.d_model = d_model
			
 
				+        self.num_heads = num_heads
			
 
				+        self.d_k = d_model // num_heads
			
 
				+        
			
 
				+        # 定义 Q, K, V 和输出的线性变换层
			
 
				+        self.W_q = nn.Linear(d_model, d_model)
			
 
				+        self.W_k = nn.Linear(d_model, d_model)
			
 
				+        self.W_v = nn.Linear(d_model, d_model)
			
 
				+        self.W_o = nn.Linear(d_model, d_model)
			
 
				+        
			
 
				+    def scaled_dot_product_attention(self, Q, K, V, mask=None):
			
 
				+        # 1. 计算注意力得分 (QK^T)
			
 
				+        attn_scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
			
 
				+        
			
 
				+        # 2. 应用掩码 (如果提供)
			
 
				+        if mask is not None:
			
 
				+            # 将掩码中为 0 的位置设置为一个非常小的负数，这样 softmax 后会接近 0
			
 
				+            attn_scores = attn_scores.masked_fill(mask == 0, -1e9)
			
 
				+        
			
 
				+        # 3. 计算注意力权重 (Softmax)
			
 
				+        attn_probs = torch.softmax(attn_scores, dim=-1)
			
 
				+        
			
 
				+        # 4. 加权求和 (权重 * V)
			
 
				+        output = torch.matmul(attn_probs, V)
			
 
				+        return output
			
 
				+        
			
 
				+    def split_heads(self, x):
			
 
				+        # 将输入 x 的形状从 (batch_size, seq_length, d_model)
			
 
				+        # 变换为 (batch_size, num_heads, seq_length, d_k)
			
 
				+        batch_size, seq_length, d_model = x.size()
			
 
				+        return x.view(batch_size, seq_length, self.num_heads, self.d_k).transpose(1, 2)
			
 
				+        
			
 
				+    def combine_heads(self, x):
			
 
				+        # 将输入 x 的形状从 (batch_size, num_heads, seq_length, d_k)
			
 
				+        # 变回 (batch_size, seq_length, d_model)
			
 
				+        batch_size, num_heads, seq_length, d_k = x.size()
			
 
				+        return x.transpose(1, 2).contiguous().view(batch_size, seq_length, self.d_model)
			
 
				+        
			
 
				+    def forward(self, Q, K, V, mask=None):
			
 
				+        # 1. 对 Q, K, V 进行线性变换
			
 
				+        Q = self.split_heads(self.W_q(Q))
			
 
				+        K = self.split_heads(self.W_k(K))
			
 
				+        V = self.split_heads(self.W_v(V))
			
 
				+        
			
 
				+        # 2. 计算缩放点积注意力
			
 
				+        attn_output = self.scaled_dot_product_attention(Q, K, V, mask)
			
 
				+        
			
 
				+        # 3. 合并多头输出并进行最终的线性变换
			
 
				+        output = self.W_o(self.combine_heads(attn_output))
			
 
				+        return output
			
 
				+
			
 
				+class PositionWiseFeedForward(nn.Module):
			
 
				+    """
			
 
				+    位置前馈网络模块
			
 
				+    """
			
 
				+    def __init__(self, d_model, d_ff, dropout=0.1):
			
 
				+        super(PositionWiseFeedForward, self).__init__()
			
 
				+        self.linear1 = nn.Linear(d_model, d_ff)
			
 
				+        self.dropout = nn.Dropout(dropout)
			
 
				+        self.linear2 = nn.Linear(d_ff, d_model)
			
 
				+        self.relu = nn.ReLU()
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        # x 形状: (batch_size, seq_len, d_model)
			
 
				+        x = self.linear1(x)
			
 
				+        x = self.relu(x)
			
 
				+        x = self.dropout(x)
			
 
				+        x = self.linear2(x)
			
 
				+        # 最终输出形状: (batch_size, seq_len, d_model)
			
 
				+        return x
			
 
				+
			
 
				+class PositionalEncoding(nn.Module):
			
 
				+    """
			
 
				+    为输入序列的词嵌入向量添加位置编码。
			
 
				+    """
			
 
				+    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
			
 
				+        super().__init__()
			
 
				+        self.dropout = nn.Dropout(p=dropout)
			
 
				+
			
 
				+        # 创建一个足够长的位置编码矩阵
			
 
				+        position = torch.arange(max_len).unsqueeze(1)
			
 
				+        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
			
 
				+        
			
 
				+        # pe (positional encoding) 的大小为 (max_len, d_model)
			
 
				+        pe = torch.zeros(max_len, d_model)
			
 
				+        
			
 
				+        # 偶数维度使用 sin, 奇数维度使用 cos
			
 
				+        pe[:, 0::2] = torch.sin(position * div_term)
			
 
				+        pe[:, 1::2] = torch.cos(position * div_term)
			
 
				+        
			
 
				+        # 将 pe 注册为 buffer，这样它就不会被视为模型参数，但会随模型移动（例如 to(device)）
			
 
				+        self.register_buffer('pe', pe.unsqueeze(0))
			
 
				+
			
 
				+    def forward(self, x: torch.Tensor) -> torch.Tensor:
			
 
				+        # x.size(1) 是当前输入的序列长度
			
 
				+        # 将位置编码加到输入向量上
			
 
				+        x = x + self.pe[:, :x.size(1)]
			
 
				+        return self.dropout(x)
			
 
				+
			
 
				+class EncoderLayer(nn.Module):
			
 
				+    """
			
 
				+    编码器核心层
			
 
				+    """
			
 
				+    def __init__(self, d_model, num_heads, d_ff, dropout):
			
 
				+        super(EncoderLayer, self).__init__()
			
 
				+        self.self_attn = MultiHeadAttention(d_model, num_heads)
			
 
				+        self.feed_forward = PositionWiseFeedForward(d_model, d_ff, dropout)
			
 
				+        self.norm1 = nn.LayerNorm(d_model)
			
 
				+        self.norm2 = nn.LayerNorm(d_model)
			
 
				+        self.dropout = nn.Dropout(dropout)
			
 
				+    
			
 
				+    def forward(self, x, mask):
			
 
				+        # 1. 多头自注意力
			
 
				+        attn_output = self.self_attn(x, x, x, mask)
			
 
				+        x = self.norm1(x + self.dropout(attn_output))
			
 
				+        
			
 
				+        # 2. 前馈网络
			
 
				+        ff_output = self.feed_forward(x)
			
 
				+        x = self.norm2(x + self.dropout(ff_output))
			
 
				+        
			
 
				+        return x
			
 
				+
			
 
				+class DecoderLayer(nn.Module):
			
 
				+    """
			
 
				+    解码器核心层
			
 
				+    """
			
 
				+    def __init__(self, d_model, num_heads, d_ff, dropout):
			
 
				+        super(DecoderLayer, self).__init__()
			
 
				+        self.self_attn = MultiHeadAttention(d_model, num_heads)
			
 
				+        self.cross_attn = MultiHeadAttention(d_model, num_heads)
			
 
				+        self.feed_forward = PositionWiseFeedForward(d_model, d_ff, dropout)
			
 
				+        self.norm1 = nn.LayerNorm(d_model)
			
 
				+        self.norm2 = nn.LayerNorm(d_model)
			
 
				+        self.norm3 = nn.LayerNorm(d_model)
			
 
				+        self.dropout = nn.Dropout(dropout)
			
 
				+        
			
 
				+    def forward(self, x, encoder_output, src_mask, tgt_mask):
			
 
				+        # 1. 掩码多头自注意力 (对自己)
			
 
				+        attn_output = self.self_attn(x, x, x, tgt_mask)
			
 
				+        x = self.norm1(x + self.dropout(attn_output))
			
 
				+        
			
 
				+        # 2. 交叉注意力 (对编码器输出)
			
 
				+        cross_attn_output = self.cross_attn(x, encoder_output, encoder_output, src_mask)
			
 
				+        x = self.norm2(x + self.dropout(cross_attn_output))
			
 
				+        
			
 
				+        # 3. 前馈网络
			
 
				+        ff_output = self.feed_forward(x)
			
 
				+        x = self.norm3(x + self.dropout(ff_output))
			
 
				+        
			
 
				+        return x
			
 
				+
			
 
				+class Encoder(nn.Module):
			
 
				+    def __init__(self, vocab_size, d_model, num_layers, num_heads, d_ff, dropout, max_len):
			
 
				+        super(Encoder, self).__init__()
			
 
				+        self.embedding = nn.Embedding(vocab_size, d_model)
			
 
				+        self.pos_encoder = PositionalEncoding(d_model, dropout, max_len)
			
 
				+        self.layers = nn.ModuleList([EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
			
 
				+        self.norm = nn.LayerNorm(d_model)
			
 
				+
			
 
				+    def forward(self, x, mask):
			
 
				+        x = self.embedding(x)
			
 
				+        x = self.pos_encoder(x)
			
 
				+        for layer in self.layers:
			
 
				+            x = layer(x, mask)
			
 
				+        return self.norm(x)
			
 
				+
			
 
				+class Decoder(nn.Module):
			
 
				+    def __init__(self, vocab_size, d_model, num_layers, num_heads, d_ff, dropout, max_len):
			
 
				+        super(Decoder, self).__init__()
			
 
				+        self.embedding = nn.Embedding(vocab_size, d_model)
			
 
				+        self.pos_encoder = PositionalEncoding(d_model, dropout, max_len)
			
 
				+        self.layers = nn.ModuleList([DecoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
			
 
				+        self.norm = nn.LayerNorm(d_model)
			
 
				+
			
 
				+    def forward(self, x, encoder_output, src_mask, tgt_mask):
			
 
				+        x = self.embedding(x)
			
 
				+        x = self.pos_encoder(x)
			
 
				+        for layer in self.layers:
			
 
				+            x = layer(x, encoder_output, src_mask, tgt_mask)
			
 
				+        return self.norm(x)
			
 
				+
			
 
				+class Transformer(nn.Module):
			
 
				+    def __init__(self, src_vocab_size, tgt_vocab_size, d_model, num_layers, num_heads, d_ff, dropout, max_len=5000):
			
 
				+        super(Transformer, self).__init__()
			
 
				+        self.encoder = Encoder(src_vocab_size, d_model, num_layers, num_heads, d_ff, dropout, max_len)
			
 
				+        self.decoder = Decoder(tgt_vocab_size, d_model, num_layers, num_heads, d_ff, dropout, max_len)
			
 
				+        self.final_linear = nn.Linear(d_model, tgt_vocab_size)
			
 
				+    
			
 
				+    def generate_mask(self, src, tgt):
			
 
				+        # src_mask: (batch_size, 1, 1, src_len)
			
 
				+        src_mask = (src != 0).unsqueeze(1).unsqueeze(2) 
			
 
				+        
			
 
				+        # tgt_mask: (batch_size, 1, tgt_len, tgt_len)
			
 
				+        tgt_pad_mask = (tgt != 0).unsqueeze(1).unsqueeze(2) # (batch_size, 1, 1, tgt_len)
			
 
				+        tgt_len = tgt.size(1)
			
 
				+        # 下三角矩阵，用于防止看到未来的 token
			
 
				+        tgt_sub_mask = torch.tril(torch.ones((tgt_len, tgt_len), device=src.device)).bool() # (tgt_len, tgt_len)
			
 
				+        tgt_mask = tgt_pad_mask & tgt_sub_mask
			
 
				+        
			
 
				+        return src_mask, tgt_mask
			
 
				+    
			
 
				+    def forward(self, src, tgt):
			
 
				+        src_mask, tgt_mask = self.generate_mask(src, tgt)
			
 
				+        
			
 
				+        encoder_output = self.encoder(src, src_mask)
			
 
				+        decoder_output = self.decoder(tgt, encoder_output, src_mask, tgt_mask)
			
 
				+        
			
 
				+        output = self.final_linear(decoder_output)
			
 
				+        return output
			
 
				+
			
 
				+# --- 演示如何使用模型 ---
			
 
				+if __name__ == "__main__":
			
 
				+    # 1. 定义超参数
			
 
				+    src_vocab_size = 5000
			
 
				+    tgt_vocab_size = 5000
			
 
				+    d_model = 512
			
 
				+    num_layers = 6
			
 
				+    num_heads = 8
			
 
				+    d_ff = 2048
			
 
				+    dropout = 0.1
			
 
				+    max_len = 100
			
 
				+    
			
 
				+    # 2. 实例化模型
			
 
				+    model = Transformer(src_vocab_size, tgt_vocab_size, d_model, num_layers, num_heads, d_ff, dropout, max_len)
			
 
				+    
			
 
				+    # 3. 创建模拟输入数据
			
 
				+    # 假设 batch_size=2, src_seq_len=10, tgt_seq_len=12
			
 
				+    src = torch.randint(1, src_vocab_size, (2, 10))  # (batch_size, seq_length)
			
 
				+    tgt = torch.randint(1, tgt_vocab_size, (2, 12))  # (batch_size, seq_length)
			
 
				+
			
 
				+    # 4. 模型前向传播
			
 
				+    output = model(src, tgt)
			
 
				+    
			
 
				+    # 5. 打印输出形状
			
 
				+    print("模型输出的形状:", output.shape)
			
 
				+    # 预期输出: torch.Size([2, 12, 5000]) -> (batch_size, tgt_seq_len, tgt_vocab_size)
			
--- a/code/chapter3/Word_Embedding.py
+++ b/code/chapter3/Word_Embedding.py
@@ -0,0 +1,23 @@
 
				+import numpy as np
			
 
				+
			
 
				+# 假设我们已经学习到了简化的二维词向量
			
 
				+embeddings = {
			
 
				+    "king": np.array([0.9, 0.8]),
			
 
				+    "queen": np.array([0.9, 0.2]),
			
 
				+    "man": np.array([0.7, 0.9]),
			
 
				+    "woman": np.array([0.7, 0.3])
			
 
				+}
			
 
				+
			
 
				+def cosine_similarity(vec1, vec2):
			
 
				+    dot_product = np.dot(vec1, vec2)
			
 
				+    norm_product = np.linalg.norm(vec1) * np.linalg.norm(vec2)
			
 
				+    return dot_product / norm_product
			
 
				+
			
 
				+# king - man + woman
			
 
				+result_vec = embeddings["king"] - embeddings["man"] + embeddings["woman"]
			
 
				+
			
 
				+# 计算结果向量与 "queen" 的相似度
			
 
				+sim = cosine_similarity(result_vec, embeddings["queen"])
			
 
				+
			
 
				+print(f"king - man + woman 的结果向量: {result_vec}")
			
 
				+print(f"该结果与 'queen' 的相似度: {sim:.4f}")
			
--- a/code/chapter4/Plan_and_solve.py
+++ b/code/chapter4/Plan_and_solve.py
@@ -1,11 +1,16 @@
 
				 import os
			
 
				+import ast
			
 
				 from llm_client import HelloAgentsLLM
			
 
				 from dotenv import load_dotenv
			
 
				 from typing import List, Dict
			
 
				 
			
 
				-# 加载 .env 文件中的环境变量
			
 
				-# 请确保你的项目根目录下有 .env 文件，并已配置好 LLM_MODEL_ID, LLM_API_KEY, LLM_BASE_URL
			
 
				-load_dotenv()
			
 
				+# 加载 .env 文件中的环境变量，处理文件不存在异常
			
 
				+try:
			
 
				+    load_dotenv()
			
 
				+except FileNotFoundError:
			
 
				+    print("警告：未找到 .env 文件，将使用系统环境变量。")
			
 
				+except Exception as e:
			
 
				+    print(f"警告：加载 .env 文件时出错: {e}")
			
 
				 
			
 
				 # --- 1. LLM客户端定义 ---
			
 
				 # 假设你已经有llm_client.py文件，里面定义了HelloAgentsLLM类
			
@@ -38,10 +43,14 @@ class Planner:
 
				         
			
 
				         try:
			
 
				             plan_str = response_text.split("```python")[1].split("```")[0].strip()
			
 
				-            plan = eval(plan_str)
			
 
				+            plan = ast.literal_eval(plan_str)
			
 
				             return plan if isinstance(plan, list) else []
			
 
				-        except Exception as e:
			
 
				+        except (ValueError, SyntaxError, IndexError) as e:
			
 
				             print(f"❌ 解析计划时出错: {e}")
			
 
				+            print(f"原始响应: {response_text}")
			
 
				+            return []
			
 
				+        except Exception as e:
			
 
				+            print(f"❌ 解析计划时发生未知错误: {e}")
			
 
				             return []
			
 
				 
			
 
				 # --- 3. 执行器 (Executor) 定义 ---
			
--- a/code/chapter4/tools.py
+++ b/code/chapter4/tools.py
@@ -61,6 +61,9 @@ class ToolExecutor:
 
				         """
			
 
				         向工具箱中注册一个新工具。
			
 
				         """
			
 
				+        if name in self.tools:
			
 
				+            print(f"警告：工具 '{name}' 已存在，将被覆盖。")
			
 
				+        
			
 
				         self.tools[name] = {"description": description, "func": func}
			
 
				         print(f"工具 '{name}' 已注册。")
			
 
				 
			
--- a/code/chapter6/AutoGenDemo/autogen_software_team.py
+++ b/code/chapter6/AutoGenDemo/autogen_software_team.py
@@ -10,11 +10,6 @@ from dotenv import load_dotenv
 
				 # 加载环境变量
			
 
				 load_dotenv()
			
 
				 
			
 
				-# # 导入 HelloAgentsLLM
			
 
				-# import sys
			
 
				-# sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
			
 
				-# from llm_client import HelloAgentsLLM
			
 
				-
			
 
				 # 先测试一个版本，使用 OpenAI 客户端
			
 
				 from autogen_ext.models.openai import OpenAIChatCompletionClient
			
 
				 from autogen_agentchat.agents import AssistantAgent, UserProxyAgent
			
--- a/code/chapter6/AutoGenDemo/llm_client.py
+++ b/code/chapter6/AutoGenDemo/llm_client.py
@@ -1,72 +0,0 @@
 
				-import os
			
 
				-from openai import OpenAI
			
 
				-from dotenv import load_dotenv
			
 
				-from typing import List, Dict
			
 
				-
			
 
				-# 加载 .env 文件中的环境变量
			
 
				-load_dotenv()
			
 
				-
			
 
				-class HelloAgentsLLM:
			
 
				-    """
			
 
				-    为本书 "Hello Agents" 定制的LLM客户端。
			
 
				-    它用于调用任何兼容OpenAI接口的服务，并默认使用流式响应。
			
 
				-    """
			
 
				-    def __init__(self, model: str = None, apiKey: str = None, baseUrl: str = None, timeout: int = None):
			
 
				-        """
			
 
				-        初始化客户端。优先使用传入参数，如果未提供，则从环境变量加载。
			
 
				-        """
			
 
				-        self.model = model or os.getenv("LLM_MODEL_ID")
			
 
				-        apiKey = apiKey or os.getenv("LLM_API_KEY")
			
 
				-        baseUrl = baseUrl or os.getenv("LLM_BASE_URL")
			
 
				-        timeout = timeout or int(os.getenv("LLM_TIMEOUT", 60))
			
 
				-        
			
 
				-        if not all([self.model, apiKey, baseUrl]):
			
 
				-            raise ValueError("模型ID、API密钥和服务地址必须被提供或在.env文件中定义。")
			
 
				-
			
 
				-        self.client = OpenAI(api_key=apiKey, base_url=baseUrl, timeout=timeout)
			
 
				-
			
 
				-    def think(self, messages: List[Dict[str, str]], temperature: float = 0) -> str:
			
 
				-        """
			
 
				-        调用大语言模型进行思考，并返回其响应。
			
 
				-        """
			
 
				-        print(f"🧠 正在调用 {self.model} 模型...")
			
 
				-        try:
			
 
				-            response = self.client.chat.completions.create(
			
 
				-                model=self.model,
			
 
				-                messages=messages,
			
 
				-                temperature=temperature,
			
 
				-                stream=True,
			
 
				-            )
			
 
				-            
			
 
				-            # 处理流式响应
			
 
				-            print("✅ 大语言模型响应成功:")
			
 
				-            collected_content = []
			
 
				-            for chunk in response:
			
 
				-                content = chunk.choices[0].delta.content or ""
			
 
				-                print(content, end="", flush=True)
			
 
				-                collected_content.append(content)
			
 
				-            print()  # 在流式输出结束后换行
			
 
				-            return "".join(collected_content)
			
 
				-
			
 
				-        except Exception as e:
			
 
				-            print(f"❌ 调用LLM API时发生错误: {e}")
			
 
				-            return None
			
 
				-
			
 
				-# --- 客户端使用示例 ---
			
 
				-if __name__ == '__main__':
			
 
				-    try:
			
 
				-        llmClient = HelloAgentsLLM()
			
 
				-        
			
 
				-        exampleMessages = [
			
 
				-            {"role": "system", "content": "You are a helpful assistant that writes Python code."},
			
 
				-            {"role": "user", "content": "写一个快速排序算法"}
			
 
				-        ]
			
 
				-        
			
 
				-        print("--- 调用LLM ---")
			
 
				-        responseText = llmClient.think(exampleMessages)
			
 
				-        if responseText:
			
 
				-            print("\n\n--- 完整模型响应 ---")
			
 
				-            print(responseText)
			
 
				-
			
 
				-    except ValueError as e:
			
 
				-        print(e)
			
--- a/docs/README.md
+++ b/docs/README.md
@@ -7,6 +7,7 @@
 
				   <img src="https://img.shields.io/github/forks/datawhalechina/Hello-Agents?style=flat&logo=github" alt="GitHub forks"/>
			
 
				   <img src="https://img.shields.io/badge/language-Chinese-brightgreen?style=flat" alt="Language"/>
			
 
				   <a href="https://github.com/datawhalechina/Hello-Agents"><img src="https://img.shields.io/badge/GitHub-Project-blue?style=flat&logo=github" alt="GitHub Project"></a>
			
 
				+  <a href="https://datawhalechina.github.io/hello-agents/"><img src="https://img.shields.io/badge/在线阅读-Online%20Reading-green?style=flat&logo=gitbook" alt="Online Reading"></a>
			
 
				 </div>
			
 
				 
			
 
				 ---
			
@@ -17,6 +18,16 @@
 
				 
			
 
				 &emsp;&emsp;Hello-Agents 是一个<strong>系统性的多智能体学习教程</strong>，旨在"授人以渔"。教程将带领你穿透框架表象，从智能体的核心原理出发，深入其核心架构，理解其经典范式，并最终亲手构建起属于自己的、强大的多智能体应用。我们相信，最好的学习方式就是动手实践。希望这本书能成为你探索智能体世界的起点，能够从一名 LLM 的"使用者"，蜕变为一名智能系统的"构建者"。
			
 
				 
			
 
				+## 🌐 在线阅读
			
 
				+
			
 
				+**[📖 点击这里开始在线阅读](https://datawhalechina.github.io/hello-agents/)**
			
 
				+
			
 
				+> 推荐使用在线阅读方式，享受更好的阅读体验，包括：
			
 
				+> - 📱 响应式设计，支持手机、平板、电脑
			
 
				+> - 🔍 全文搜索功能
			
 
				+> - 📑 自动生成目录导航
			
 
				+> - 🎨 优雅的阅读界面
			
 
				+
			
 
				 ### ✨ 你将收获什么？
			
 
				 
			
 
				 - 📖 <strong>Datawhale 开源免费</strong> 完全免费学习本项目所有内容，与社区共同成长
			
--- a/docs/chapter4/第四章智能体经典范式构建.md
+++ b/docs/chapter4/第四章智能体经典范式构建.md
@@ -251,6 +251,8 @@ class ToolExecutor:
 
				         """
			
 
				         向工具箱中注册一个新工具。
			
 
				         """
			
 
				+        if name in self.tools:
			
 
				+            print(f"警告：工具 '{name}' 已存在，将被覆盖。")
			
 
				         self.tools[name] = {"description": description, "func": func}
			
 
				         print(f"工具 '{name}' 已注册。")
			
 
				 
			
@@ -662,15 +664,15 @@ class Planner:
 
				         try:
			
 
				             # 找到```python和```之间的内容
			
 
				             plan_str = response_text.split("```python")[1].split("```")[0].strip()
			
 
				-            # 使用eval来安全地执行字符串，将其转换为Python列表
			
 
				-            plan = eval(plan_str)
			
 
				-            if isinstance(plan, list):
			
 
				-                return plan
			
 
				-            else:
			
 
				-                print("❌ 解析失败：输出不是一个列表。")
			
 
				-                return []
			
 
				-        except Exception as e:
			
 
				+            # 使用ast.literal_eval来安全地执行字符串，将其转换为Python列表
			
 
				+            plan = ast.literal_eval(plan_str)
			
 
				+            return plan if isinstance(plan, list) else []
			
 
				+        except (ValueError, SyntaxError, IndexError) as e:
			
 
				             print(f"❌ 解析计划时出错: {e}")
			
 
				+            print(f"原始响应: {response_text}")
			
 
				+            return []
			
 
				+        except Exception as e:
			
 
				+            print(f"❌ 解析计划时发生未知错误: {e}")
			
 
				             return []
			
 
				 ```