from transformers import AutoTokenizer, AutoModel
modelPath = "/home/cmcc/server/model/chatglm3-6b"
tokenizer = AutoTokenizer.from_pretrained(modelPath, trust_remote_code=True)
model = AutoModel.from_pretrained(modelPath, trust_remote_code=True).half().cuda()
model.eval()
history = [
{"role": "user", "content": "你好"},
{"role": "assistant", "content": "我是人工智能助手,我叫小明"}
]
response, history = model.chat(tokenizer, "你好", history=history)
print(response)
response, history = model.chat(tokenizer, "你叫什么名字", history=history)
print(response)