tensorrt-llm如何打印logits和probs
#首先在engine编译时加入参数--gather_all_token_logits
trtllm-build --checkpoint_dir ./tmp \
--output_dir $2/ \
--gather_all_token_logits
#其次执行tensorrt_llm/examples/run.py,比如是两卡执行,下面放在shell脚本中执行
type=fp8
output_path=output_$type
mpirun -n 2 --allow-run-as-root python3 run.py --input_text="test.txt" --max_output_len 10 \
--engine_dir /engine/$type --max_input_length 4000 --no_prompt_template \
--temperature 0.1 --tokenizer_dir /engine/$type/tokenizer_path \
--output_logits_npy ./$output_path/logits --output_log_probs_npy ./$output_path/log_probs --output_cum_log_probs_npy ./$output_path/cum_log_probs
#最后分析生成的logits_generation.npy文件,python代码如下
import numpy as np
import torch
prefix = 'output_int8'
array = np.load(f'{prefix}/logits_generation.npy')
print(array.shape)
for round in range(1):
k = 10 #取得分最高的前10个token_id
arr = array[0][0][round]
values, indices = torch.topk(torch.from_numpy(arr), k)
print("Top-k values:", values)
from transformers import AutoTokenizer
path = '/engine/tokenizer_path'
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
if len(indices) == 1:
tokens = tokenizer.convert_ids_to_tokens([indices])
print(tokens)
else:
for id in indices:
tokens = tokenizer.decode([id])
# tokens = tokenizer.convert_ids_to_tokens([id])
print(tokens, end=' ')
# print(f'{id}:{tokens}', end=' ')
print()
print(tokenizer.encode("None"), tokenizer.encode("80"), tokenizer.encode("8"))
print(array[0][0][0][tokenizer.encode("None")[0]])
print(array[0][0][0][tokenizer.encode("80")[1]])
print(array[0][0][0][tokenizer.encode("8")[1]])