Qwen.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. # 增加HF_ENDPOINT,避免Connection aborted.
  2. import os
  3. os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
  4. import torch
  5. from transformers import AutoModelForCausalLM, AutoTokenizer
  6. # 指定模型ID
  7. model_id = "Qwen/Qwen1.5-0.5B-Chat"
  8. # 设置设备,优先使用GPU
  9. device = "cuda" if torch.cuda.is_available() else "cpu"
  10. print(f"Using device: {device}")
  11. # 加载分词器
  12. tokenizer = AutoTokenizer.from_pretrained(model_id)
  13. # 加载模型,并将其移动到指定设备
  14. model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
  15. print("模型和分词器加载完成!")
  16. # 准备对话输入
  17. messages = [
  18. {"role": "system", "content": "You are a helpful assistant."},
  19. {"role": "user", "content": "你好,请介绍你自己。"}
  20. ]
  21. # 使用分词器的模板格式化输入
  22. text = tokenizer.apply_chat_template(
  23. messages,
  24. tokenize=False,
  25. add_generation_prompt=True
  26. )
  27. # 编码输入文本
  28. model_inputs = tokenizer([text], return_tensors="pt").to(device)
  29. print("编码后的输入文本:")
  30. print(model_inputs)
  31. # 使用模型生成回答
  32. # max_new_tokens 控制了模型最多能生成多少个新的Token
  33. generated_ids = model.generate(
  34. model_inputs.input_ids,
  35. max_new_tokens=512
  36. )
  37. # 将生成的 Token ID 截取掉输入部分
  38. # 这样我们只解码模型新生成的部分
  39. generated_ids = [
  40. output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
  41. ]
  42. # 解码生成的 Token ID
  43. response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
  44. print("\n模型的回答:")
  45. print(response)