Qwen.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import torch
  2. from transformers import AutoModelForCausalLM, AutoTokenizer
  3. # 指定模型ID
  4. model_id = "Qwen/Qwen1.5-0.5B-Chat"
  5. # 设置设备,优先使用GPU
  6. device = "cuda" if torch.cuda.is_available() else "cpu"
  7. print(f"Using device: {device}")
  8. # 加载分词器
  9. tokenizer = AutoTokenizer.from_pretrained(model_id)
  10. # 加载模型,并将其移动到指定设备
  11. model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
  12. print("模型和分词器加载完成!")
  13. # 准备对话输入
  14. messages = [
  15. {"role": "system", "content": "You are a helpful assistant."},
  16. {"role": "user", "content": "你好,请介绍你自己。"}
  17. ]
  18. # 使用分词器的模板格式化输入
  19. text = tokenizer.apply_chat_template(
  20. messages,
  21. tokenize=False,
  22. add_generation_prompt=True
  23. )
  24. # 编码输入文本
  25. model_inputs = tokenizer([text], return_tensors="pt").to(device)
  26. print("编码后的输入文本:")
  27. print(model_inputs)
  28. # 使用模型生成回答
  29. # max_new_tokens 控制了模型最多能生成多少个新的Token
  30. generated_ids = model.generate(
  31. model_inputs.input_ids,
  32. max_new_tokens=512
  33. )
  34. # 将生成的 Token ID 截取掉输入部分
  35. # 这样我们只解码模型新生成的部分
  36. generated_ids = [
  37. output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
  38. ]
  39. # 解码生成的 Token ID
  40. response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
  41. print("\n模型的回答:")
  42. print(response)