RealSense使用系列(二):估计物体位置

7,023 阅读3分钟

使用t265和d435i估计物体在三维坐标系中的位置

ros坐标系

t265自身的坐标如下所示

在ros读取到的坐标中,x 轴指向机器人前方,y 轴指向机器人左方,z 轴指向机器人上方。对应来说,ros中x轴为上图z轴反方向,y轴为上图x轴反方向,z轴为上图y轴。下面统一使用ros中坐标定义而不是t265自身坐标定义。读取pos时使用msg_Odometry已经自动将坐标进行转换。

图像坐标系与世界坐标系转换

相机中有四个坐标系,分别为world,camera,image,pixel

  • world为世界坐标系,可以任意指定xw轴和yw​轴,为上图P点所在坐标系。
  • camera为相机坐标系,原点位于小孔,z轴与光轴重合,xw​轴和yw​轴平行投影面,为上图坐标系XcYcZc。
  • image为图像坐标系,原点位于光轴和投影面的交点,xw轴和yw​轴平行投影面,为上图坐标系XYZ。
  • pixel为像素坐标系,从小孔向投影面方向看,投影面的左上角为原点,uv轴和投影面两边重合,该坐标系与图像坐标系处在同一平面,但原点不同。

坐标变换如下

非齐次的形式如下

对应我们初始坐标系没有平移,所以上面 t=[0,0,0]Tt = [0, 0, 0] ^T,上述的R矩阵为

010001100(1) \begin{matrix} 0 & -1 & 0 \\ 0 & 0 & -1 \\ 1 & 0 & 0 \end{matrix} \tag{1}

M矩阵可以通过相机内参求得。

齐次坐标变换

由于相机发生相对运动,上面我们只是获得了物体相对相机的[x,y,z][x,y,z]坐标,我们还需要考虑相机自己的平移和旋转,转换关系如下所示 我们知道相机的相对平移和旋转变换到B坐标系,然后求得B坐标系中物体相对相机的[x,y,z][x,y,z]坐标,最后还是要变换到A坐标系。

具体推导参考位置角度平移旋转

代码实现

import sys
import time
import os
import rospy
from sensor_msgs.msg import Image as msg_Image
from sensor_msgs.msg import CameraInfo as msg_CameraInfo
from nav_msgs.msg import Odometry as msg_Odometry
import numpy as np
from cv_bridge import CvBridge, CvBridgeError
import argparse
import message_filters
import cv2
import matplotlib.pyplot as plt
from autolab_core import RigidTransform
from scipy.spatial.transform import Rotation as R


class CWaitForMessage:
    def __init__(self, opt):
        self.height = opt.img_height
        self.width = opt.img_width
        self.node_name = "rs2_listener"
        self.bridge = CvBridge()
        self.listener = None
        self.prev_msg_time = 0
        self.is_init = False
        # x->pitch, y->yaw, z->roll
        self.theta = {}
        self.theta["pitch"], self.theta["yaw"], self.theta["roll"] = None, None, None
        self.external_mat = None
        self.rotation_mat = None
        self.t_mat = None
        self.internal_mat = None

    def rotation_estimator(self, pos_data):
        w = pos_data.pose.pose.orientation.w
        x = pos_data.pose.pose.orientation.x
        y = pos_data.pose.pose.orientation.y
        z = pos_data.pose.pose.orientation.z
        self.theta["pitch"] = -np.arcsin(2.0 * (x*z - w*y)) * 180.0 / np.pi
        self.theta["roll"] = np.arctan2(2.0 * (w*x + y*z), w*w - x*x - y*y + z*z) * 180.0 / np.pi
        self.theta["yaw"] = np.arctan2(2.0 * (w*z + x*y), w*w + x*x - y*y - z*z) * 180.0 / np.pi

    def get_camera_external_param(self, pos_data):
        rotation_quaternion = np.asarray([pos_data.pose.pose.orientation.w, pos_data.pose.pose.orientation.x,
                                          pos_data.pose.pose.orientation.y, pos_data.pose.pose.orientation.z])
        translation = np.asarray([pos_data.pose.pose.position.x, pos_data.pose.pose.position.y,
                                  pos_data.pose.pose.position.z])
        T_qua2rota = RigidTransform(rotation_quaternion, translation)
        self.rotation_mat = T_qua2rota._rotation
        self.t_mat = T_qua2rota._translation

    def get_camera_internal_param(self, camera_info):
        k = camera_info.K
        self.internal_mat = np.zeros((3, 3))
        self.internal_mat[0, :] = k[:3]
        self.internal_mat[1, :] = k[3:6]
        self.internal_mat[2, :] = k[6:]

    def check(self, img_data, depth_data):
        cv_color_img = self.bridge.imgmsg_to_cv2(img_data, img_data.encoding)
        cv_depth_img = self.bridge.imgmsg_to_cv2(depth_data, depth_data.encoding)
        assert cv_color_img.shape == (self.height, self.width, 3)
        assert cv_depth_img.shape == (self.height, self.width)

    def estimate_absolute_pos(self, obj_pos, obj_depth):
        uv_pos = np.array([[obj_pos[0]], [obj_pos[1]], [1]])
        abs_pos = np.dot(np.linalg.inv(self.internal_mat), obj_depth * uv_pos)
        axes_trans = np.array([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
        abs_pos = np.dot(axes_trans, abs_pos)
        # abs_pos[:, 0] -= self.t_mat * 1000
        abs_pos = np.dot(self.rotation_mat, abs_pos)
        abs_pos[:, 0] += self.t_mat * 1000
        # abs_pos = np.dot(np.linalg.inv(self.rotation_mat), abs_pos)
        return abs_pos.flatten()

    def callback(self, img_data, depth_data, pos_data, camera_info):
        # self.rotation_estimator(pos_data)
        # print(pos_data)
        if not self.is_init:
            self.check(img_data, depth_data)
            self.get_camera_internal_param(camera_info)
        cv_color_img = self.bridge.imgmsg_to_cv2(img_data, img_data.encoding)
        cv_depth_img = self.bridge.imgmsg_to_cv2(depth_data, depth_data.encoding)
        cv_color_img = cv2.cvtColor(cv_color_img, cv2.COLOR_RGB2BGR)
        obj_pos = [100, 100]
        self.get_camera_external_param(pos_data)
        obj_absolute_pos = self.estimate_absolute_pos(obj_pos, cv_depth_img[obj_pos[0], obj_pos[1]])
        cv2.rectangle(cv_color_img, (obj_pos[0] - 5, obj_pos[1] - 5), (obj_pos[0] + 5, obj_pos[1] + 5), (0, 255, 0), 2)
        pos_str = "x: {:.2f}, y: {:.2f}, z: {:.2f}".format(obj_absolute_pos[0], obj_absolute_pos[1], obj_absolute_pos[2])
        cv2.putText(cv_color_img, pos_str, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
        cv2.imshow("RGB", cv_color_img)
        cv2.imshow("depth", cv_depth_img)

        if int(time.time()) % 10 == 0:
            print("pos")
            print(obj_absolute_pos)
            print(self.t_mat)
            print(self.rotation_mat)

        key = cv2.waitKey(1)
        # Press esc or "q" to close the image window
        if key & 0xFF == ord("q") or key == 27:
            cv2.destroyAllWindows()
            os._exit(0)

    def wait_for_messages(self):
        print("connect to ROS with name: %s" % self.node_name)
        rospy.init_node(self.node_name, anonymous=True)
        img_sub = message_filters.Subscriber("/d400/color/image_raw", msg_Image)
        depth_sub = message_filters.Subscriber("/d400/aligned_depth_to_color/image_raw", msg_Image)
        pos_sub = message_filters.Subscriber("/t265/odom/sample", msg_Odometry)
        carmera_sub = message_filters.Subscriber("/d400/color/camera_info", msg_CameraInfo)
        # topic synchronization by time
        ts = message_filters.ApproximateTimeSynchronizer([img_sub, depth_sub, pos_sub, carmera_sub],
                                                         queue_size=10, slop=0.5, allow_headerless=True)
        ts.registerCallback(self.callback)
        rospy.spin()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--img_height", type=int, default=480, help="color image height")
    parser.add_argument("--img_width", type=int, default=640, help="color image width")
    opt = parser.parse_args()

    msg_retriever = CWaitForMessage(opt)
    msg_retriever.wait_for_messages()

最终验证相机运动时候,观测一个点,其三维坐标是不是不变,如下图所示

场景1场景2
单元格

估计单位为mm,上述误差也比较合理。

参考连接