C++ RTTI(运行时类型识别）简单介绍c++RTTI（运行时类型识别）的两种使用方式，并分析其内部实现原理，以及C+

Code

#include <iostream>
#include <typeinfo>

using namespace std;

class Base {
public:
    Base() {}
    ~Base() {}
    virtual void Print() {
        cout << "Base" << endl;
    }
    int b;
};

class Derive : public Base {
public:
    virtual void Print() {
        cout << "Derive" << endl;
    }
    int d;
};

int main(int argc, char** argv) {
    Base* bb = new Derive();
    cout << "Output0: " << typeid(bb).name() << endl;
    cout << "Output1: " << typeid(*bb).name() << endl;
    cout << "Output2: " << dynamic_cast<Derive*>(bb) << endl;
    cout << "Output3: " << dynamic_cast<Derive*>(new Base()) << endl;
    return 0;
}

Output

xiaoju@XiaojuVM ~/self_src/demo/cpp $ g++ -g  hello.cpp
xiaoju@XiaojuVM ~/self_src/demo/cpp $ ./a.out
Output0: P4Base
Output1: 6Derive
Output2: 0x13e8040
Output3: 0

RTTI 的两种方式

typeid

typeid是c++进行类型识别的运算符，返回一个type_info用于描述对应参数的类型信息，type_info核心定义如下

class type_info
{
public:
    virtual ~type_info();
    
    /** Returns an @e implementation-defined byte string; this is not
     *  portable between compilers!  */
    const char* name() const ;

    /** Returns true if @c *this precedes @c __arg in the implementation's
     *  collation order.  */
    bool before(const type_info& __arg) ;

    bool operator==(const type_info& __arg) const;
    bool operator!=(const type_info& __arg) const ;
 protected:
    const char *__name;
  };

编译期类型识别 类似于Output0，编译期编译器就知道bb的类型是指向Base的指针了，编译期就知道typeid的返回是什么
运行期类型识别 类似于Output1，由于Derive继承Base实现了多态，编译期无法确定*bb的类型是Derive还是Base,只有等到运行时才知道，才可以确定typeid的返回

dynamic_cast

dynamic_cast是c++进行类型转换的运算符，可以安全的进行父类指针的向下类型转换
若父类指针无法转型为对应子类，则返回一个NULL指针，类似于Output3，否则，返回对应子类类型指针，类似于Output2

底层实现

内存布局

内存布局分析

首先通过gdb看一下vtbl位置

(gdb) l
21	    int d;
22	};
23
24	int main(int argc, char** argv) {
25	    Base* bb = new Derive();
26	    cout << "Output0: " << typeid(bb).name() << endl;
27	    cout << "Output1: " << typeid(*bb).name() << endl;
28	    cout << "Output2: " << dynamic_cast<Derive*>(bb) << endl;
29	    cout << "Output3: " << dynamic_cast<Derive*>(new Base()) << endl;
30	    return 0;
(gdb) p *bb
$1 = {_vptr.Base = 0x400e60 <vtable for Derive+16>, b = 0}
(gdb) p bb
$2 = (Base *) 0x603040
(gdb)

可以看到，bb的值0x603040，vptr是0x400e60，由于vtbl位于.rodata只读数据段，下面通过objdump看一下.rodata只读数据段内容

可以看到vptr[0]位置（0x400e60处）代表第一个虚函数的函数地址，具体的函数地址应该是0x400d1e(字节序)，通过objdump反汇编可以看到具体二进制代码

xiaoju@XiaojuVM ~/self_src/demo/cpp $ objdump -S -j .text a.out

0000000000400d1e <_ZN6Derive5PrintEv>:

class Derive : public Base {
public:
    virtual void Print() {
  400d1e:	55                   	push   %rbp
  400d1f:	48 89 e5             	mov    %rsp,%rbp
  400d22:	48 83 ec 10          	sub    $0x10,%rsp
  400d26:	48 89 7d f8          	mov    %rdi,-0x8(%rbp)
        cout << "Derive" << endl;
  400d2a:	be 15 0e 40 00       	mov    $0x400e15,%esi
  400d2f:	bf e0 20 60 00       	mov    $0x6020e0,%edi
  400d34:	e8 47 fc ff ff       	callq  400980 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@plt>
  400d39:	be d0 09 40 00       	mov    $0x4009d0,%esi
  400d3e:	48 89 c7             	mov    %rax,%rdi
  400d41:	e8 7a fc ff ff       	callq  4009c0 <_ZNSolsEPFRSoS_E@plt>
    }
  400d46:	c9                   	leaveq
  400d47:	c3                   	retq

同时可以看到，vptr[-1]位置（0x400e58处)代表bb实际指向数据类型的type_info的地址，为0x400ed0，因此可以证明，类的type_info信息，与vtbl一样，是保存在.rodata只读数据段的

在0x400ed0处，第一个word是0x602210，代表type_info的vptr，第二个word 0x400ec0，代表 __name 成员变量（char*类型）的值，可见，__name所指向的字符串为6Derive，为Derive类型的类型名。

实现原理

typeid 运行时类型识别 编译器将

typeid(*bb)

转化为

*((type_info*)bb->vptr[-1])

若bb实际指向类型为Base，则bb->vptr指向Base的vtbl，bb->vptr[-1]指向Base的type_info
若bb实际指向类型为Derive，则bb->vptr指向Derive的vtbl，bb->vptr[-1]指向Derive的type_info
依此实现运行时进行类型识别

dynamic_cast 编译器将

dynamic_cast<Derive*>(bb)

转化为

Derive* dynamicCast(Base* bb) {
    const type_info& bb_type = *((type_info*)bb->vptr[-1]); // 获取bb实际指向数据类型信息
    if (bb_type == typeid(Derive)) {
        return (Derive*)bb;
    }
    return NULL;
}

核心实现逻辑大致如此，内部细节定有不同