fastllm将chatglm3-6b转成flm文件的正确方式:
from modelscope import AutoTokenizer, AutoModel, snapshot_download
import torch2flm
model_dir = snapshot_download("ZhipuAI/chatglm3-6b", revision = "v1.0.0")
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).half().cuda()
model = model.eval()
torch2flm.tofile("/mnt/workspace/chatglm3-6b-int4.flm", model.to("cpu") , tokenizer, dtype="int4")