1.下载文件
首先要求有至少6gb显存 从huggingface下载
在下载完之后把所有文件装在一起,命名chatglm2-6b-int4
2.安装环境
按照官方文档
3.测试和运行
import os
import warnings
import torch
from transformers import AutoTokenizer, AutoModel
warnings.filterwarnings('ignore')
current_path = os.path.abspath(__file__)
parent_path = os.path.dirname(current_path)
path = parent_path + r'\chatglm2-6b-int4'
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
model = AutoModel.from_pretrained(path, trust_remote_code=True).half().cuda()
model.eval()
past_key_values, history = None, []
with torch.no_grad():
while True:
query = input('>>')
if query == ':q':
break
current_length = 0
for response, history, past_key_values in model.stream_chat(
tokenizer, query, history,
top_p=0.5,
temperature=0.5,
past_key_values=past_key_values,
return_past_key_values=True
):
print(response[current_length:], end="", flush=True)
current_length = len(response)
print('\n')
测试↓