4. Python 推理脚本(OnnxRuntime和TensorRT)

24 阅读1分钟

OnnxRuntime部署脚本:

import cv2
import onnxruntime as rt
import numpy as np
import os

input_path = r""
onnx_path = r""


for img_name in os.listdir(input_path):
    if img_name.endswith('.bmp'):
        img_path = os.path.join(input_path, img_name)
        output_path = img_path.replace(".bmp", "_out.bmp")
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)

        img= cv2.resize(img, (240,192))

        std=[
            58.395,
            57.12,
            57.375,
        ]

        mean=[
            123.675,
            116.28,
            103.53,
        ]

        img=(img-mean)/std

        input_data = np.array(img,dtype=np.float32).transpose((2,0,1))
        #onnx_path = r"C:\Users\sindre\Downloads\test_show_mesh\end2end.onnx"#+".onnx"
        #convert_to_model(onnx_path)

        sess = rt.InferenceSession(onnx_path,None)
        input_name = sess.get_inputs()[0].name
        pred_onx = sess.run(None, {input_name:input_data[None]})[0][0][0]
        print(pred_onx)

        out_img = np.array(pred_onx)
        print(out_img.shape)
        print(np.unique(out_img))
        cv2.imwrite(output_path,out_img*50)

TensorRT部署脚本:

import cv2
import onnxruntime as rt
import numpy as np


def  infer_engine(img):
    import pycuda.driver as cuda
    import pycuda.autoinit
    import tensorrt as trt
    import time
    model_path = r"end2end.engine"
    # 加载runtime,记录log
    runtime = trt.Runtime(trt.Logger(trt.Logger.ERROR))
    # 反序列化模型
    
    with open(model_path, "rb") as f:
        serialized_engine = f.read()
    engine = runtime.deserialize_cuda_engine(serialized_engine)
    print("输入",engine.get_binding_shape(0),engine.get_binding_dtype(0))
    print("输出",engine.get_binding_shape(1),engine.get_binding_dtype(1))
    # 1. Allocate some host and device buffers for inputs and outputs:
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(engine.get_binding_dtype(0)))
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(engine.get_binding_dtype(1)))
    # Allocate device memory for inputs and outputs.
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)
    # Create a stream in which to copy inputs/outputs and run inference.
    stream = cuda.Stream()
    # 推理上下文
    context = engine.create_execution_context()
    
    np.copyto(h_input, img.ravel())
    t1 = time.time()
    # 将图片数据送到cuda显存中
    cuda.memcpy_htod_async(d_input, h_input, stream)
    # 模型预测
    context.execute_async_v2(bindings=[d_input, d_output], stream_handle=stream.handle)
    # 将结果送回内存中
    cuda.memcpy_dtoh_async(h_output, d_output, stream)
    ## 异步等待结果
    stream.synchronize()
    # Return the host output.
    print("推理时间", time.time() - t1)
    return h_output

img = cv2.imread(r"1596_Origin.bmp")
img =cv2.cvtColor(img,cv2.COLOR_BGR2RGB)

img= cv2.resize(img, (240,192))
std=[
    58.395,
    57.12,
    57.375,
],
 
mean=[
    123.675,
    116.28,
    103.53,
]

img=(img-mean)/std

input_data = np.array(img,dtype=np.float32).transpose((2,0,1))
onnx_path = r"end2end.onnx"#+".onnx"
#convert(onnx_path)

sess = rt.InferenceSession(onnx_path,None)
input_name = sess.get_inputs()[0].name
pred_onx = sess.run(None, {input_name:input_data[None]})[0][0][0]
print(pred_onx)

out_img = np.array(pred_onx)
print(out_img.shape)
print(np.unique(out_img))
cv2.imwrite(r"out.png",out_img*50)

h_output=infer_engine(input_data[None])
print("trt_out:", h_output.shape)
out_img2 = np.array(h_output).reshape((192, 240))
print(out_img2.shape)
print(np.unique(out_img2))
cv2.imwrite(r"out_trt.png",out_img2*50)