在多传感器滤波数据采集系列(一)的基础上,使用interl realsense t265测试车的实际位置。t265绑定在车上,读取车位置,d435i作为传感器进行测量距离。
实验设置
使用小车沿统一轨迹运动,分别在两个点放置摄像头,测试小车从起点运动到终点每一时刻的距离,通过t265获得小车从起点运动偏移的x,y坐标,从而求得小车的绝对x,y坐标,求得小车和相机的角度,如下图所示
数据采集
使用t265获取小车运动的偏移x,y坐标,同时保存时间戳。
import pyrealsense2 as rs
import numpy as np
import argparse
import os
import time
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--save", default=True, action='store_true', help="if save the pos")
parser.add_argument("--out_dir", default="data/t265_pos", help="dir to save the data")
opt = parser.parse_args()
time_str = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time()))
if not os.path.exists(opt.out_dir) and opt.save:
os.makedirs(opt.out_dir)
if opt.save:
fout = open(os.path.join(opt.out_dir, time_str + ".txt"), "w")
# init the pipeline
pipeline = rs.pipeline()
conf = rs.config()
conf.enable_stream(rs.stream.pose)
profile = pipeline.start(conf)
try:
while True:
frames = pipeline.wait_for_frames()
pose = frames.get_pose_frame()
if pose:
data = pose.get_pose_data()
if opt.save:
save_time = time.time()
pos = data.translation
fout.write("{:.4f}\tx\t{:.4f}\ty\t{:.4f}\tz\t{:.4f}\n".format(save_time, pos.x, pos.y, pos.z))
if pose.frame_number % 100 == 0:
print("Frame #{}".format(pose.frame_number))
print("Position: {}".format(data.translation))
else:
print("lost")
finally:
pipeline.stop()
使用d435i获得摄像头距离车的距离,同时保存时间戳。
import pyrealsense2 as rs
import numpy as np
import cv2
import os
import time
import pickle
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--save", default=True, action='store_true', help="if save the video")
opt = parser.parse_args()
# init the camerae
pipeline = rs.pipeline()
cfg = rs.config()
cfg.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 60)
cfg.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 60)
profile = pipeline.start(cfg)
depth_sensor = profile.get_device().first_depth_sensor()
depth_scale = depth_sensor.get_depth_scale()
print("深度比例系数为:", depth_scale)
align = rs.align(rs.stream.color)
# init the video streaming
time_str = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time()))
out_dir = os.path.join("data/exp", time_str)
if not os.path.exists(out_dir) and opt.save:
os.makedirs(out_dir)
cnt = 0
try:
while True:
frames = pipeline.wait_for_frames()
aligned_frames = align.process(frames)
aligned_depth_frame = aligned_frames.get_depth_frame()
color_frame = aligned_frames.get_color_frame()
if not aligned_depth_frame or not color_frame:
continue
# Convert images to numpy arrays
depth_image = np.asanyarray(aligned_depth_frame.get_data())
color_image = np.asanyarray(color_frame.get_data())
depth_image = depth_image * depth_scale * 1000
depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)
cv2.imshow("RGB", color_image)
cv2.imshow("depth", depth_colormap)
if opt.save:
save_time = time.time()
cv2.imwrite(os.path.join(out_dir, "{:0>6d}-{:.4f}.jpg".format(cnt, save_time)), color_image)
with open(os.path.join(out_dir, "{:0>6d}-{:.4f}.pickle".format(cnt, save_time)), 'wb') as fout:
pickle.dump(depth_image, fout)
key = cv2.waitKey(1)
# Press esc or "q" to close the image window
if key & 0xFF == ord("q") or key == 27:
cv2.destroyAllWindows()
break
cnt += 1
finally:
pipeline.stop()
数据处理
使用时间戳对t265和d435i进行对齐,最终输出的是传感器距离小车距离,传感器和小车的角度,小车的真实x,y坐标。
from __future__ import division
from models import *
from utils.utils import *
from utils.datasets import *
import os
import argparse
import cv2
import numpy as np
import pickle
import time
import torch
from torch.autograd import Variable
def get_distance(model, color_image, depth_image, colors):
RGBimg = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)
imgTensor = transforms.ToTensor()(RGBimg)
imgTensor, _ = pad_to_square(imgTensor, 0)
imgTensor = Variable(imgTensor.type(Tensor))
imgTensor = resize(imgTensor, 416)
imgTensor = imgTensor.unsqueeze(0)
with torch.no_grad():
detections = model(imgTensor)
detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)[0]
if detections is None:
return -1
detections = rescale_boxes(detections, opt.img_size, RGBimg.shape[:2])
img = color_image.copy()
detections = [i for i in detections if i[6] == 39]
if len(detections) != 0:
x1_s, y1_s, x2_s, y2_s, conf_s, cls_conf_s, cls_pred_s = 0, 0, 0, 0, 0, 0, 0
for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
if cls_conf_s < cls_conf:
x1_s, y1_s, x2_s, y2_s, conf_s, cls_conf_s, cls_pred_s = x1, y1, x2, y2, conf, cls_conf, cls_pred
box_w = x2 - x1
box_h = y2 - y1
color = [int(c) for c in colors[int(cls_pred)]]
# print(cls_conf)
img = cv2.rectangle(img, (x1, y1 + box_h), (x2, y1), color, 2)
cv2.putText(img, classes[int(cls_pred)], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
cv2.putText(img, str("%.2f" % float(conf)), (x2, y2 - box_h), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
color, 2)
return img, depth_image[int(y1 + box_h // 2), int(x1 + box_w // 2)]
else:
return img, -1
def get_pos(pos_timestamp_list, pos_list, frame_timestamp, start_pos, end_pos):
"""
use frame timesramp to get the pos
"""
low = 0
heigh = len(pos_timestamp_list)-1
middle = int(low + (heigh - low) / 2)
while low < heigh:
if pos_timestamp_list[middle] < frame_timestamp:
low = middle + 1
else:
heigh = middle
middle = int(low + (heigh - low) / 2)
# pos_list[middle] is the relative position
t265_z = pos_list[middle][2] * 1000
t265_x = pos_list[middle][0] * 1000
theta = np.arctan2(start_pos[0]-end_pos[0], start_pos[1] - end_pos[1])
pt_x1 = start_pos[0] - t265_z * np.sin(theta)
pt_y1 = start_pos[1] - t265_z * np.cos(theta)
pt_x2 = pt_x1 - t265_x * np.cos(theta)
pt_y2 = pt_y1 + t265_x * np.sin(theta)
return pt_x2, pt_y2
def get_angle(absolute_pos, camera_pos):
delta_x = absolute_pos[0] - camera_pos[0]
delta_y = absolute_pos[1] - camera_pos[1]
return np.arctan2(-delta_x, delta_y)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file")
parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file")
parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file")
parser.add_argument("--conf_thres", type=float, default=0.1, help="object confidence threshold")
parser.add_argument("--nms_thres", type=float, default=0.1, help="iou thresshold for non-maximum suppression")
parser.add_argument("--batch_size", type=int, default=1, help="size of the batches")
parser.add_argument("--n_cpu", type=int, default=3, help="number of cpu threads to use during batch generation")
parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
parser.add_argument("--checkpoint_model", type=str, help="path to checkpoint model")
parser.add_argument("--start_frame", type=int, default=36, help="frame to start")
parser.add_argument("--end_frame", type=int, default=430, help="frame to start")
parser.add_argument("--data_dir", type=str, default="data/exp/2020-09-19-15-13-57",
help="data dir contain the image and pickle")
# add param to get the true pos
parser.add_argument("--pos_data_file", type=str,
default="/home/shenatao/myspace/thesis/code/RealsenseExp/realsense/data/t265_pos/"
"2020-09-19-15-13-53.txt",
help="data dir contain the t265 pos")
parser.add_argument('--camera_pos', nargs='+', type=float, default=[-820, 1800],
help='(x, y) coordinate for camera, unit mm')
parser.add_argument('--start_pos', nargs='+', type=float, default=[2260, 3400],
help='(x, y) coordinate for start point, unit mm')
parser.add_argument('--end_pos', nargs='+', type=float, default=[870, 1380],
help='(x, y) coordinate for end point, unit mm')
opt = parser.parse_args()
# init the pos
pos_timestamp_list = []
pos_list = []
with open(opt.pos_data_file, 'r') as fin:
for line in fin.readlines():
line = line.strip().split('\t')
pos_timestamp_list.append(float(line[0]))
pos_list.append([float(line[2]), float(line[4]), float(line[6])])
# init the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Darknet(opt.model_def, img_size=opt.img_size).to(device)
if opt.weights_path.endswith(".weights"):
model.load_darknet_weights(opt.weights_path)
else:
model.load_state_dict(torch.load(opt.weights_path))
model.eval()
classes = load_classes(opt.class_path)
Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
colors = np.random.randint(0, 255, size=(len(classes), 3), dtype="uint8")
# prepare for out
time_str = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time()))
fout = open(os.path.join(opt.data_dir, time_str + ".txt"), "w")
# start detect
img_list = os.listdir(opt.data_dir)
img_list = [i for i in img_list if ".jpg" in i]
img_list = sorted(img_list)
for img_file in img_list:
frame_id = int(img_file.split("-")[0])
frame_timestamp = float(img_file.split("-")[1][:-4])
if frame_id < opt.start_frame or frame_id > opt.end_frame:
continue
color_image = cv2.imread(os.path.join(opt.data_dir, img_file))
with open(os.path.join(opt.data_dir, img_file.replace('.jpg', '.pickle')), 'rb') as fin:
depth_image = pickle.load(fin)
depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)
# gte distance
img, distance = get_distance(model, color_image, depth_image, colors)
# get pos
pos_x, pos_y = get_pos(pos_timestamp_list, pos_list, frame_timestamp, opt.start_pos, opt.end_pos)
angle = get_angle([pos_x, pos_y], opt.camera_pos)
fout.write("{:.4f}\t{:.6f}\t{:.4f}\t{:.4f}\n".format(distance, angle, pos_x, pos_y))
cv2.putText(img, "dist: {:.2f}".format(distance), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
cv2.putText(img, "angle: {:.2f}".format(angle), (20, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
cv2.imshow("RGB", img)
cv2.imshow("depth", depth_colormap)
key = cv2.waitKey(1)
# Press esc or "q" to close the image window
if key & 0xFF == ord("q") or key == 27:
cv2.destroyAllWindows()
break