OnnxRuntime部署脚本:
import cv2
import onnxruntime as rt
import numpy as np
import os
input_path = r""
onnx_path = r""
for img_name in os.listdir(input_path):
if img_name.endswith('.bmp'):
img_path = os.path.join(input_path, img_name)
output_path = img_path.replace(".bmp", "_out.bmp")
img = cv2.imread(img_path)
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
img= cv2.resize(img, (240,192))
std=[
58.395,
57.12,
57.375,
]
mean=[
123.675,
116.28,
103.53,
]
img=(img-mean)/std
input_data = np.array(img,dtype=np.float32).transpose((2,0,1))
#onnx_path = r"C:\Users\sindre\Downloads\test_show_mesh\end2end.onnx"#+".onnx"
#convert_to_model(onnx_path)
sess = rt.InferenceSession(onnx_path,None)
input_name = sess.get_inputs()[0].name
pred_onx = sess.run(None, {input_name:input_data[None]})[0][0][0]
print(pred_onx)
out_img = np.array(pred_onx)
print(out_img.shape)
print(np.unique(out_img))
cv2.imwrite(output_path,out_img*50)
TensorRT部署脚本:
import cv2
import onnxruntime as rt
import numpy as np
def infer_engine(img):
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
import time
model_path = r"end2end.engine"
# 加载runtime,记录log
runtime = trt.Runtime(trt.Logger(trt.Logger.ERROR))
# 反序列化模型
with open(model_path, "rb") as f:
serialized_engine = f.read()
engine = runtime.deserialize_cuda_engine(serialized_engine)
print("输入",engine.get_binding_shape(0),engine.get_binding_dtype(0))
print("输出",engine.get_binding_shape(1),engine.get_binding_dtype(1))
# 1. Allocate some host and device buffers for inputs and outputs:
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(engine.get_binding_dtype(0)))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(engine.get_binding_dtype(1)))
# Allocate device memory for inputs and outputs.
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
# Create a stream in which to copy inputs/outputs and run inference.
stream = cuda.Stream()
# 推理上下文
context = engine.create_execution_context()
np.copyto(h_input, img.ravel())
t1 = time.time()
# 将图片数据送到cuda显存中
cuda.memcpy_htod_async(d_input, h_input, stream)
# 模型预测
context.execute_async_v2(bindings=[d_input, d_output], stream_handle=stream.handle)
# 将结果送回内存中
cuda.memcpy_dtoh_async(h_output, d_output, stream)
## 异步等待结果
stream.synchronize()
# Return the host output.
print("推理时间", time.time() - t1)
return h_output
img = cv2.imread(r"1596_Origin.bmp")
img =cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
img= cv2.resize(img, (240,192))
std=[
58.395,
57.12,
57.375,
],
mean=[
123.675,
116.28,
103.53,
]
img=(img-mean)/std
input_data = np.array(img,dtype=np.float32).transpose((2,0,1))
onnx_path = r"end2end.onnx"#+".onnx"
#convert(onnx_path)
sess = rt.InferenceSession(onnx_path,None)
input_name = sess.get_inputs()[0].name
pred_onx = sess.run(None, {input_name:input_data[None]})[0][0][0]
print(pred_onx)
out_img = np.array(pred_onx)
print(out_img.shape)
print(np.unique(out_img))
cv2.imwrite(r"out.png",out_img*50)
h_output=infer_engine(input_data[None])
print("trt_out:", h_output.shape)
out_img2 = np.array(h_output).reshape((192, 240))
print(out_img2.shape)
print(np.unique(out_img2))
cv2.imwrite(r"out_trt.png",out_img2*50)