多传感器滤波数据采集系列（一）线下找到起点和终点分别对应的帧数，获取车上水杯bbox的中点的深度作为量测，代码如下。实验

使用interl realsense d435i测试数据。实验一个摄像头模拟两个传感器观测车的位置，获得车距离传感器的距离，用于后续滤波实验。

实验设置

使用小车沿统一轨迹运动，分别在两个点放置摄像头，测试小车从起点运动到终点每一时刻的距离，真实距离可以通过坐标系求得，如下图所示

数据采集

实验过中采用60fps对于数据进行采集，考虑到电脑检测的速度无法达到60fps，因此先采集数据，线下在进行检测和获取距离，代码如下

import pyrealsense2 as rs
import numpy as np
import cv2
import os
import time
import pickle
import argparse


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--save", default=False, action='store_true', help="if save the video")
    opt = parser.parse_args()
    # init the camerae
    pipeline = rs.pipeline()

    cfg = rs.config()
    cfg.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 60)
    cfg.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 60)

    profile = pipeline.start(cfg)

    depth_sensor = profile.get_device().first_depth_sensor()
    depth_scale = depth_sensor.get_depth_scale()
    print("深度比例系数为：", depth_scale)

    align = rs.align(rs.stream.color)

    # init the video streaming
    time_str = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time()))
    out_dir = os.path.join("data/exp", time_str)
    if not os.path.exists(out_dir) and opt.save:
        os.makedirs(out_dir)

    cnt = 0

    try:
        while True:
            frames = pipeline.wait_for_frames()
            aligned_frames = align.process(frames)

            aligned_depth_frame = aligned_frames.get_depth_frame()
            color_frame = aligned_frames.get_color_frame()

            if not aligned_depth_frame or not color_frame:
                continue

            # Convert images to numpy arrays
            depth_image = np.asanyarray(aligned_depth_frame.get_data())
            color_image = np.asanyarray(color_frame.get_data())

            depth_image = depth_image * depth_scale * 1000
            depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)

            cv2.imshow("RGB", color_image)
            cv2.imshow("depth", depth_colormap)
            if opt.save:
                cv2.imwrite(os.path.join(out_dir, "{:0>6d}.jpg".format(cnt)), color_image)
                with open(os.path.join(out_dir, "{:0>6d}.pickle".format(cnt)), 'wb') as fout:
                    pickle.dump(depth_image, fout)

            key = cv2.waitKey(1)
            # Press esc or "q" to close the image window
            if key & 0xFF == ord("q") or key == 27:
                cv2.destroyAllWindows()
                break
            cnt += 1
    finally:
        pipeline.stop()

数据处理

线下找到起点和终点分别对应的帧数，获取车上水杯bbox的中点的深度作为量测，代码如下。实验PyTorch-YOLOv3进行检测，由于放低了检测的conf_thres，最终获取置信度最大的bbox作为水杯的bbox。

from __future__ import division

from models import *
from utils.utils import *
from utils.datasets import *

import os
import argparse
import cv2
import numpy as np
import pickle
import time

import torch
from torch.autograd import Variable


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file")
    parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file")
    parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file")
    parser.add_argument("--conf_thres", type=float, default=0.1, help="object confidence threshold")
    parser.add_argument("--nms_thres", type=float, default=0.1, help="iou thresshold for non-maximum suppression")
    parser.add_argument("--batch_size", type=int, default=1, help="size of the batches")
    parser.add_argument("--n_cpu", type=int, default=3, help="number of cpu threads to use during batch generation")
    parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
    parser.add_argument("--checkpoint_model", type=str, help="path to checkpoint model")
    parser.add_argument("--start_frame", type=int, default=70, help="frame to start")
    parser.add_argument("--end_frame", type=int, default=476, help="frame to start")
    parser.add_argument("--data_dir", type=str, default="data/exp/2020-09-14-14-54-09", help="data dir"
                                                                                    " contain the image and pickle")

    # init the model
    opt = parser.parse_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Darknet(opt.model_def, img_size=opt.img_size).to(device)
    if opt.weights_path.endswith(".weights"):
        model.load_darknet_weights(opt.weights_path)
    else:
        model.load_state_dict(torch.load(opt.weights_path))
    model.eval()
    classes = load_classes(opt.class_path)
    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
    colors = np.random.randint(0, 255, size=(len(classes), 3), dtype="uint8")

    # prepare for out
    time_str = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time()))
    fout = open(os.path.join(opt.data_dir, time_str + ".txt"), "w")

    # start detect
    img_list = os.listdir(opt.data_dir)
    img_list = [i for i in img_list if ".jpg" in i]
    img_list = sorted(img_list)
    for img_file in img_list:
        if int(img_file.split(".")[0]) < opt.start_frame or int(img_file.split(".")[0]) > opt.end_frame:
            continue

        color_image = cv2.imread(os.path.join(opt.data_dir, img_file))
        with open(os.path.join(opt.data_dir, img_file.replace('.jpg', '.pickle')), 'rb') as fin:
            depth_image = pickle.load(fin)
        depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)

        RGBimg = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)
        imgTensor = transforms.ToTensor()(RGBimg)
        imgTensor, _ = pad_to_square(imgTensor, 0)
        imgTensor = Variable(imgTensor.type(Tensor))
        imgTensor = resize(imgTensor, 416)
        imgTensor = imgTensor.unsqueeze(0)

        with torch.no_grad():
            detections = model(imgTensor)
            detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)[0]
        if detections is None:
            continue
        detections = rescale_boxes(detections, opt.img_size, RGBimg.shape[:2])
        unique_labels = detections[:, -1].cpu().unique()
        n_cls_preds = len(unique_labels)
        img = color_image.copy()
        detections = [i for i in detections if i[6] == 39]
        if len(detections) != 0:
            x1_s, y1_s, x2_s, y2_s, conf_s, cls_conf_s, cls_pred_s = 0, 0, 0, 0, 0, 0, 0
            for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
                if cls_conf_s < cls_conf:
                    x1_s, y1_s, x2_s, y2_s, conf_s, cls_conf_s, cls_pred_s = x1, y1, x2, y2, conf, cls_conf, cls_pred
                box_w = x2 - x1
                box_h = y2 - y1
                color = [int(c) for c in colors[int(cls_pred)]]
                # print(cls_conf)
                img = cv2.rectangle(img, (x1, y1 + box_h), (x2, y1), color, 2)
                cv2.putText(img, classes[int(cls_pred)], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                cv2.putText(img, str("%.2f" % float(conf)), (x2, y2 - box_h), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                            color, 2)
                print("dist: {}".format(depth_image[int(y1+box_h//2), int(x1+box_w//2)]))
            fout.write("{:.2f}\n".format(depth_image[int(y1_s+y2_s)//2, int(x1_s+x2_s)//2]))
        else:
            print("lost")
            fout.write("{}\n".format(-1))
        cv2.imshow("RGB", img)
        cv2.imshow("depth", depth_colormap)
        key = cv2.waitKey(1)
        # Press esc or "q" to close the image window
        if key & 0xFF == ord("q") or key == 27:
            cv2.destroyAllWindows()
            break

参考代码

1. YOLOv3

2. pyrealsense2 api

3. pyrealsense2 examples