PyTorch之计算模型推理时间

一、参考资料

如何测试模型的推理速度
Pytorch 测试模型的推理速度

二、计算PyTorch模型推理时间

1. 计算CPU推理时间

import torch
import torchvision
import time
import tqdm
from torchsummary import summary


def calcCPUTime():
    model = torchvision.models.resnet18()
    model.eval()
    # summary(model, input_size=(3, 224, 224), device="cpu")
    dummy_input = torch.randn(1, 3, 224, 224)

    num_iterations = 1000  # 迭代次数
    # 预热, GPU 平时可能为了节能而处于休眠状态, 因此需要预热
    print('warm up ...\n')
    with torch.no_grad():
        for _ in range(100):
            _ = model(dummy_input)

    print('testing ...\n')
    total_forward_time = 0.0  # 使用time来测试
    # 记录开始时间
    start_event = time.time()
    with torch.no_grad():
        for _ in tqdm.tqdm(range(num_iterations)):
            start_forward_time = time.time()
            _ = model(dummy_input)
            end_forward_time = time.time()
            forward_time = end_forward_time - start_forward_time
            total_forward_time += forward_time * 1000  # 转换为毫秒

    # 记录结束时间
    end_event = time.time()

    elapsed_time = (end_event - start_event)  # 转换为秒
    fps = num_iterations / elapsed_time

    elapsed_time_ms = elapsed_time / (num_iterations * dummy_input.shape[0])

    avg_forward_time = total_forward_time / (num_iterations * dummy_input.shape[0])

    print(f"FPS: {fps}")
    print("elapsed_time_ms:", elapsed_time_ms * 1000)
    print(f"Avg Forward Time per Image: {avg_forward_time} ms")


if __name__ == "__main__":
    calcCPUTime()

输出结果

warm up ...

testing ...

100%|██████████| 1000/1000 [00:09<00:00, 102.13it/s]
FPS: 102.11109490533485
elapsed_time_ms: 9.793255090713501
Avg Forward Time per Image: 9.777164697647095 ms

CPU资源占用情况

在这里插入图片描述

2. 计算GPU推理时间

方法一

import torch
import torchvision
import time
import tqdm
from torchsummary import summary


def calcGPUTime():
    model = torchvision.models.resnet18()
    model.cuda()
    model.eval()
    # summary(model, input_size=(3, 224, 224), device="cuda")
    dummy_input = torch.randn(1, 3, 224, 224).cuda()

    num_iterations = 1000  # 迭代次数
    # 预热, GPU 平时可能为了节能而处于休眠状态, 因此需要预热
    print('warm up ...\n')
    with torch.no_grad():
        for _ in range(100):
            _ = model(dummy_input)

    print('testing ...\n')
    total_forward_time = 0.0  # 使用time来测试
    # 记录开始时间
    start_event = time.time() * 1000
    with torch.no_grad():
        for _ in tqdm.tqdm(range(num_iterations)):
            start_forward_time = time.time()
            _ = model(dummy_input)
            end_forward_time = time.time()
            forward_time = end_forward_time - start_forward_time
            total_forward_time += forward_time * 1000  # 转换为毫秒

    # 记录结束时间
    end_event = time.time() * 1000

    elapsed_time = (end_event - start_event) / 1000.0  # 转换为秒
    fps = num_iterations / elapsed_time

    elapsed_time_ms = elapsed_time / (num_iterations * dummy_input.shape[0])

    avg_forward_time = total_forward_time / (num_iterations * dummy_input.shape[0])

    print(f"FPS: {fps}")
    print("elapsed_time_ms:", elapsed_time_ms * 1000)
    print(f"Avg Forward Time per Image: {avg_forward_time} ms")


if __name__ == "__main__":
    calcGPUTime()

输出结果

warm up ...

testing ...

100%|██████████| 1000/1000 [00:01<00:00, 727.79it/s]
FPS: 727.1527832145586
elapsed_time_ms: 1.375226806640625
Avg Forward Time per Image: 1.3709843158721924 ms

GPU资源占用情况

在这里插入图片描述

方法二

import torch
import torchvision
import numpy as np
import tqdm


# TODO - 计算模型的推理时间
def calcGPUTime():

    device = 'cuda:0'
    model = torchvision.models.resnet18()
    model.to(device)
    model.eval()

    repetitions = 1000

    dummy_input = torch.rand(1, 3, 224, 224).to(device)

    # 预热, GPU 平时可能为了节能而处于休眠状态, 因此需要预热
    print('warm up ...\n')
    with torch.no_grad():
        for _ in range(100):
            _ = model(dummy_input)

    # synchronize 等待所有 GPU 任务处理完才返回 CPU 主线程
    torch.cuda.synchronize()

    # 设置用于测量时间的 cuda Event, 这是PyTorch 官方推荐的接口,理论上应该最靠谱
    starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
    # 初始化一个时间容器
    timings = np.zeros((repetitions, 1))

    print('testing ...\n')
    with torch.no_grad():
        for rep in tqdm.tqdm(range(repetitions)):
            starter.record()
            _ = model(dummy_input)
            ender.record()
            torch.cuda.synchronize()  # 等待GPU任务完成
            curr_time = starter.elapsed_time(ender)  # 从 starter 到 ender 之间用时,单位为毫秒
            timings[rep] = curr_time

    avg = timings.sum() / repetitions
    print('\navg={}\n'.format(avg))


if __name__ == '__main__':
    calcGPUTime()

输出结果

warm up ...

testing ...

100%|██████████| 1000/1000 [00:01<00:00, 627.50it/s]

avg=1.4300348817110062

GPU资源占用情况

在这里插入图片描述

相关推荐

最近更新

  1. TCP协议是安全的吗?

    2024-04-07 06:52:03       16 阅读
  2. 阿里云服务器执行yum,一直下载docker-ce-stable失败

    2024-04-07 06:52:03       16 阅读
  3. 【Python教程】压缩PDF文件大小

    2024-04-07 06:52:03       15 阅读
  4. 通过文章id递归查询所有评论(xml)

    2024-04-07 06:52:03       18 阅读

热门阅读

  1. MQTT面试题

    2024-04-07 06:52:03       11 阅读
  2. leetcode热题HOT 23. 合并 K 个升序链表

    2024-04-07 06:52:03       12 阅读
  3. [高考] 数理化

    2024-04-07 06:52:03       12 阅读
  4. centos 安装 stable-diffusion 详细流程

    2024-04-07 06:52:03       11 阅读
  5. QT智能指针

    2024-04-07 06:52:03       18 阅读
  6. 【工具或平台】Gem5编译

    2024-04-07 06:52:03       13 阅读
  7. vue指令v-model

    2024-04-07 06:52:03       14 阅读
  8. Transformer架构的自注意力机制

    2024-04-07 06:52:03       13 阅读
  9. Django -- 报错

    2024-04-07 06:52:03       11 阅读