基于AidLux的热成像电力巡检项目成果演示本项目是基于R-RetinaNet的检测网络 RetinaNet由一个主网络

本项目是基于R-RetinaNet的检测网络

RetinaNet由一个主网络和两种子网络构成，采用FPN作为RetinaNet的主网络，是一个现成的神经网络，负责从输入图像上计算出卷积特征图，第一种子网对主网络的输出进行目标分类，第二种子网负责边界框回归。

为了将项目在AidLux平台部署，本项目需要完成前置模型转换工作采取的方案：pt—onnx—tflite。

部分实时监测代码如下：

def decoder(ims, anchors, cls_score, bbox_pred, thresh=0.6, nms_thresh=0.2, test_conf=None): if test_conf is not None: thresh = test_conf bboxes = BoxCoder().decode(anchors, bbox_pred, mode='xywht') bboxes = clip_boxes(bboxes, ims) scores = cls_score.max(2, keepdims=True) keep = (scores >= thresh)[0, :, 0] if keep.sum() == 0: return [np.zeros(1), np.zeros(1), np.zeros(1, 5)] scores = scores[:, keep, :] anchors = anchors[:, keep, :] cls_score = cls_score[:, keep, :] bboxes = bboxes[:, keep, :] # NMS anchors_nms_idx = nms(np.concatenate([bboxes, scores], axis=2)[0, :, :], nms_thresh) nms_scores = cls_score[0, anchors_nms_idx, :].max(axis=1) nms_class = cls_score[0, anchors_nms_idx, :].argmax(axis=1) output_boxes = np.concatenate([ bboxes[0, anchors_nms_idx, :], anchors[0, anchors_nms_idx, :]], axis=1 ) return [nms_scores, nms_class, output_boxes]

def process_img(img, target_size=640, max_size=2000, multiple=32, keep_ratio=True, NCHW=True, ToTensor=True): ''' 图像与处理 ''' im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) # resize with keep_ratio if keep_ratio: im_scale = float(target_size) / float(im_size_min)
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max) im_scale_x = np.floor(img.shape[1] * im_scale / multiple) * multiple / img.shape[1] im_scale_y = np.floor(img.shape[0] * im_scale / multiple) * multiple / img.shape[0] image_resized = cv2.resize(img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=cv2.INTER_LINEAR) im_scales = np.array([im_scale_x, im_scale_y, im_scale_x, im_scale_y]) im = image_resized / 255.0 # np.float64 im = im.astype(np.float32) PIXEL_MEANS =(0.485, 0.456, 0.406) # RGB format mean and variances PIXEL_STDS = (0.229, 0.224, 0.225) im -= np.array(PIXEL_MEANS) im /= np.array(PIXEL_STDS) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # BGR2RGB if NCHW: im = np.transpose(im, (2, 0, 1)).astype(np.float32) # [SAI-KEY] TensorFlow use input with NHWC. im = im[np.newaxis, ...] if ToTensor: im = torch.from_numpy(im) return im, im_scales else: return None

if name=="main": ''' 读取本地图片 ''' image_path = "/home/R-RetinaNet/samples/000001.jpg" cap = cvs.VideoCapture(image_path) img = cap.read() im, im_scales = process_img(img, NCHW=False, ToTensor=False) # im: NHWC

''' 定义输入输出shape '''
in_shape = [1 * 640 * 800 * 3 * 4]  # HWC, float32
out_shape = [1 * 53325 * 8 * 4]  # 8400: total cells, 52 = 48(num_classes) + 4(xywh), float32
# out_shape = [1 * 55425 * 8 * 4]  # 8400: total cells, 52 = 48(num_classes) + 4(xywh), float32

''' AidLite初始化 '''
aidlite = aidlite_gpu.aidlite()
''' 加载R-RetinaNet模型 '''
tflite_model = '/home/R-RetinaNet/models/r-retinanet.tflite'
res = aidlite.ANNModel(tflite_model, in_shape, out_shape, 4, -1) # Infer on -1: cpu, 0: gpu, 1: mixed, 2: dsp

''' 设定输入输出 '''
aidlite.setInput_Float32(im, 800, 640)

''' 启动推理 '''
aidlite.invoke()

''' 捕获输出 '''
preds = aidlite.getOutput_Float32(0)
# preds = preds.reshape(1, 8, 53325)
preds = preds.reshape(1, 8, (int)(preds.shape[0]/8))
output = np.transpose(preds, (0, 2, 1))

''' 创建Anchor '''
im_anchor = np.transpose(im, (0, 3, 1, 2)).astype(np.float32)
anchors_list = []
anchor_generator = Anchors(ratios = np.array([0.2, 0.5, 1, 2, 5]))
original_anchors = anchor_generator(im_anchor)   # (bs, num_all_achors, 5)
anchors_list.append(original_anchors)

''' 解算输出 '''
decode_output = decoder(im_anchor, anchors_list[-1], output[..., 5:8], output[..., 0:5], thresh=0.5, nms_thresh=0.2, test_conf=None)
for i in range(len(decode_output)):
    print("dim({}), shape: {}".format(i, decode_output[i].shape))

''' 重构输出 '''
scores = decode_output[0].reshape(-1, 1)
classes = decode_output[1].reshape(-1, 1)
boxes = decode_output[2]
boxes[:, :4] = boxes[:, :4] / im_scales
if boxes.shape[1] > 5:   
    boxes[:, 5:9] = boxes[:, 5:9] / im_scales
dets = np.concatenate([classes, scores, boxes], axis=1)

''' 过滤类别 '''
keep = np.where(classes > 0)[0]
dets =  dets[keep, :]

''' 转换坐标('xyxya'->'xyxyxyxy') '''
res = sort_corners(rbox_2_quad(dets[:, 2:]))

''' 评估绘图 '''
for k in range(dets.shape[0]):
    cv2.line(img, (int(res[k, 0]), int(res[k, 1])), (int(res[k, 2]), int(res[k, 3])), (0, 255, 0), 3)
    cv2.line(img, (int(res[k, 2]), int(res[k, 3])), (int(res[k, 4]), int(res[k, 5])), (0, 255, 0), 3)
    cv2.line(img, (int(res[k, 4]), int(res[k, 5])), (int(res[k, 6]), int(res[k, 7])), (0, 255, 0), 3)
    cv2.line(img, (int(res[k, 6]), int(res[k, 7])), (int(res[k, 0]), int(res[k, 1])), (0, 255, 0), 3)
cv2.imwrite("/home/R-RetinaNet/samples/00_detected_image.jpg", img)

''' 将绝缘子旋转至水平 '''
t_center = ((dets[0, 4]+dets[0, 2])/2, (dets[0,5]+dets[0,3])/2)
t_angle = dets[0, 6]
t_height, t_width = img.shape[:2]
rotate_matrix = cv2.getRotationMatrix2D(center=t_center, angle=t_angle, scale=1)
rotated_image = cv2.warpAffine(src=img, M=rotate_matrix, dsize=(t_width, t_height))

''' 转换旋转后的坐标 '''
new_coord = np.zeros((dets.shape[0], 4, 2), dtype=np.float)

''' 当存在多根绝缘子, 以其中一条为例进行后处理 '''
k = 0
new_coord[k, 0] = np.squeeze(np.dot(rotate_matrix, np.array([[res[k, 0]], [res[k, 1]], [1]])))
new_coord[k, 1] = np.squeeze(np.dot(rotate_matrix, np.array([[res[k, 2]], [res[k, 3]], [1]])))
new_coord[k, 2] = np.squeeze(np.dot(rotate_matrix, np.array([[res[k, 4]], [res[k, 5]], [1]])))
new_coord[k, 3] = np.squeeze(np.dot(rotate_matrix, np.array([[res[k, 6]], [res[k, 7]], [1]])))

''' 获取标准外接矩形 '''
(x, y, w, h) = get_std_rect(new_coord[k])

''' 提取ROI图像 '''
roi_image = rotated_image[y:(y+h), x:(x+w)]
''' 灰度图 '''
gray_image = cv2.cvtColor(roi_image, cv2.COLOR_BGR2GRAY)
''' 二值化 '''
retval, binary_image = cv2.threshold(gray_image, 150, 255, cv2.THRESH_BINARY)

''' 创建一个5*5的值为1的卷积核 '''
kernel = np.ones((5, 5), np.uint8)
''' 腐蚀运算, 迭代1次 '''
erode_image = cv2.erode(binary_image, kernel, iterations=1)

''' 存储本地评估 '''
cv2.imwrite("/home/R-RetinaNet/samples/01_rotated_image.jpg", rotated_image)
cv2.imwrite("/home/R-RetinaNet/samples/02_roi_image.jpg", roi_image)
cv2.imwrite("/home/R-RetinaNet/samples/03_binary_image.jpg", binary_image)
cv2.imwrite("/home/R-RetinaNet/samples/04_erode_image.jpg", erode_image)

结果展示：

注：照片及视频中的识别框出现识别不完全准确的原因是输入宽高处理有点小问题