OC底层之类结构探索

1,765 阅读9分钟

引言

自定义一个类JPerson继承自NSObject

@interface JPerson : NSObject
{
    NSString *nickName;
}

@end

我们在main方法中写以下代码并且查看其底层实现

int main(int argc, char * argv[]) {
    @autoreleasepool {
        JPerson *p = [[JPerson alloc] init];
        NSLog(@"%@",p);
    }
    return 0;
}

终端进入main.m文件目录通过clang查看其底层实现

xcrun -sdk iphoneos clang -arch arm64 -rewrite-objc main.m

打开生成的main.cpp文件

...
typedef struct objc_object NSObject;
struct NSObject_IMPL {
    Class isa;
};
...
typedef struct objc_object JPerson;
struct JPerson_IMPL {
    struct NSObject_IMPL NSObject_IVARS;
    NSString *nickName;
};
...

可以看到NSObject的底层是一个struct objc_object结构体类型,结构体内部包含isa

JPerson的底层也是一个struct objc_object结构体类型,结构体内部包含一个struct NSObject_IMPL类型的NSObject_IVARS,也就是isa和自定义的成员变量nickName

objc_object

struct objc_object {
private:
    isa_t isa;
public:

    // ISA() assumes this is NOT a tagged pointer object
    Class ISA();

    // rawISA() assumes this is NOT a tagged pointer object or a non pointer ISA
    Class rawISA();

    // getIsa() allows this to be a tagged pointer object
    Class getIsa();

    uintptr_t isaBits() const;
    
    ......
};

isa_t我们在# OC底层之isa探索介绍过,objc_object结构体中包含了一个isa还有一些方法。

我们知道实例对象的isa中存储了指向类对象的指针,并且上文我们也验证过了,我们看一下其具体实现细节

inline Class 
objc_object::ISA() 
{
    ASSERT(!isTaggedPointer()); 
#if SUPPORT_INDEXED_ISA
    if (isa.nonpointer) {
        uintptr_t slot = isa.indexcls;
        return classForIndex((unsigned)slot);
    }
    return (Class)isa.bits;
#else
    return (Class)(isa.bits & ISA_MASK);
#endif


# elif __x86_64__ 
# define ISA_MASK 0x00007ffffffffff8ULL
}

SUPPORT_INDEXED_ISA=1表示isWatchABI,具体参考# iOS底层探索 - 实例对象的创建

实例对象就是通过isa.bits & ISA_MASK获取到类对象指针的

Class

struct objc_class;
struct objc_object;

typedef struct objc_class *Class;
typedef struct objc_object *id;

Class底层是一个struct objc_class类型的结构体指针

objc_class

struct objc_class : objc_object {
    ......
    // Class ISA;
    
    //8字节
    Class superclass; 
    
    // 16字节
    cache_t cache;             // formerly cache pointer and vtable
    
    // 8字节
    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags

    class_rw_t *data() const {
        return bits.data();
    }

    ......
};

objc_class结构体从objc_object集成来了isa,自己定义了superclasscachebits

cache_t

typedef uint32_t mask_t; // x86_64 & arm64 asm are less efficient with 16-bits\

struct cache_t {
private:
    explicit_atomic<uintptr_t> _bucketsAndMaybeMask; //8字节
    
    //联合体占用8字节
    union {
        struct {
            explicit_atomic<mask_t>    _maybeMask; //4字节
#if __LP64__
            uint16_t                   _flags; //2字节
#endif
            uint16_t                   _occupied; //2字节
        };
        explicit_atomic<preopt_cache_t *>  _originalPreoptCache; //8字节
    };
    
};

cache可以缓存方法,提高方法调用效率,我们在后续的研究方法缓存时还会详细讨论,这里先做简单介绍,知道其占用16字节就可以了

class_data_bits_t

struct class_data_bits_t {
    friend objc_class;

    // Values are the FAST_ flags above.
    // 下面方法可以通过 bits & 掩码 获取信息,可见bits是一个联合体
    uintptr_t bits; 

private:
    bool getBit(uintptr_t bit) const
    {
        return bits & bit;
    }
    ...
public:
    class_rw_t* data() const {
        return (class_rw_t *)(bits & FAST_DATA_MASK);
    }

    void setData(class_rw_t *newData)
    {
        ASSERT(!data()  ||  (newData->flags & (RW_REALIZING | RW_FUTURE)));
        // Set during realization or construction only. No locking needed.
        // Use a store-release fence because there may be concurrent
        // readers of data and data's contents.
        uintptr_t newBits = (bits & ~FAST_DATA_MASK) | (uintptr_t)newData;
        atomic_thread_fence(memory_order_release);
        bits = newBits;
    }

    // Get the class's ro data, even in the presence of concurrent realization.
    // fixme this isn't really safe without a compiler barrier at least
    // and probably a memory barrier when realizeClass changes the data field
    const class_ro_t *safe_ro() {
        class_rw_t *maybe_rw = data();
        if (maybe_rw->flags & RW_REALIZED) {
            // maybe_rw is rw
            return maybe_rw->ro();
        } else {
            // maybe_rw is actually ro
            return (class_ro_t *)maybe_rw;
        }
    }
};

class_data_bits_t中存储了一个bits,使用bits进行位运算可以得到class_rw_t,通过class_rw_t可以获取到class_ro_t

class_rw_t

struct class_rw_t {
    // Be warned that Symbolication knows the layout of this structure.
    uint32_t flags;
    uint16_t witness;
    
#if SUPPORT_INDEXED_ISA    //iWatch的abi
    uint16_t index;
#endif

    explicit_atomic<uintptr_t> ro_or_rw_ext;

    Class firstSubclass;
    Class nextSiblingClass;
    
private:
    using ro_or_rw_ext_t = objc::PointerUnion<const class_ro_t *, class_rw_ext_t *>;
    
    const ro_or_rw_ext_t get_ro_or_rwe() const {
        return ro_or_rw_ext_t{ro_or_rw_ext};
    }
    ......
    // 分情况访问class_rw_ext_t或者class_ro_t
    const method_array_t methods() const {
        auto v = get_ro_or_rwe();
        if (v.is<class_rw_ext_t *>()) {
            return v.get<class_rw_ext_t *>()->methods;
        } else {
            return method_array_t{v.get<const class_ro_t *>()->baseMethods()};
        }
    }

    const property_array_t properties() const {
        auto v = get_ro_or_rwe();
        if (v.is<class_rw_ext_t *>()) {
            return v.get<class_rw_ext_t *>()->properties;
        } else {
            return property_array_t{v.get<const class_ro_t *>()->baseProperties};
        }
    }

    const protocol_array_t protocols() const {
        auto v = get_ro_or_rwe();
        if (v.is<class_rw_ext_t *>()) {
            return v.get<class_rw_ext_t *>()->protocols;
        } else {
            return protocol_array_t{v.get<const class_ro_t *>()->baseProtocols};
        }
    }
};

ro_or_rw_ext支持class_ro_tclass_rw_ext_t两种模版,可以通过methods()properties()protocols()中分情况取值来验证

class_ro_t

struct class_ro_t {
    uint32_t flags;
    uint32_t instanceStart;
    uint32_t instanceSize;

#ifdef __LP64__
    uint32_t reserved;
#endif

    const uint8_t * ivarLayout;

    const char * name;
    method_list_t * baseMethodList;
    protocol_list_t * baseProtocols;
    const ivar_list_t * ivars;

    const uint8_t * weakIvarLayout;
    property_list_t *baseProperties;
    
    ......
};

class_ro_t包含了在编译期就确定的方法、属性、成员变量等信息。 instanceStartinstanceSize两个属性在dyld加载时会用到,具体可以参考文章# Non Fragile ivars讲解

class_rw_ext_t


struct class_rw_ext_t {
    const class_ro_t *ro;
    method_array_t methods;
    property_array_t properties;
    protocol_array_t protocols;
    char *demangledName;
    uint32_t version;
};

class_rw_ext_t存储指向class_ro_t指针还有动态添加的信息。

具体可以参考 # WWDC2020

简单总结

OC对象都是继承自NSObjectNSObject底层实现是一个只包含isaobjc_object结构体,子类从NSObject哪里继承来了isa

objc2版本的isa是一个联合体,通过bits位域配合存储相关信息。

  • 实例变量的isa中包含指向类对象的指针
  • 类对象isa中包含指向元类对象的指针。
  • 元类对象isa中包含指向根元类对象的指针。
  • 根元类对象isa中包含指向自己的指针。

类对象Class的底层实现是一个objc_class结构体,除了从objc_object继承来的isa还有包含:

  • superclass指向父类的指针
  • cache缓存方法等信息(后面探索方法流程的时候会用到)
  • bits包含class_rw_t等信息

class_rw_t包含了一个重要的变量ro_or_rw_extro_or_rw_ext中支持class_rw_ext_tclass_ro_t两种模版,既根据不同的情况可以存储class_rw_ext_t也可以存储class_ro_t(功能同联合体union

class_ro_t存储编译期就确定的信息,例如方法列表、属性列表、协议列表等。因为OC是一门动态语言可以在运行时动态添加信息,如果Class对象没有动态添加信息那么ro_or_rw_ext中存储的就是class_ro_t,如果有动态添加的信息ro_or_rw_ext中存储的就是class_rw_ext_t

class_rw_ext_t中有指向class_ro_t的指针和动态添加的其他信息。

注意class_rw_ext_t中包含从class_ro_t拷贝过来的信息和用户动态添加的信息`

NSObject协议

自定义的类继承根类NSObject

@interface JPerson : NSObject
    ...
@end

自定义协议遵循NSObject

@protocol JProtocol <NSObject>
    ...
@end

这两个NSObject是不同的,一个是作为根类,OC中所有的对象类型都要继承根类NSObject

...
@interface NSObject <NSObject> {
    ...
    Class isa  OBJC_ISA_AVAILABILITY;
    ...
}

一个是作为协议,根类NSObject已经实现了NSObject协议,所以我们可以直接使用OC对象的以下方法

@protocol NSObject

- (BOOL)isEqual:(id)object;

@property (readonly) NSUInteger hash;
@property (readonly) Class superclass;

- (Class)class OBJC_SWIFT_UNAVAILABLE("use 'type(of: anObject)' instead");
- (instancetype)self;
- (id)performSelector:(SEL)aSelector;
- (id)performSelector:(SEL)aSelector withObject:(id)object;
- (id)performSelector:(SEL)aSelector withObject:(id)object1 withObject:(id)object2;
- (BOOL)isProxy;
- (BOOL)isKindOfClass:(Class)aClass;
- (BOOL)isMemberOfClass:(Class)aClass;
- (BOOL)conformsToProtocol:(Protocol *)aProtocol;
- (BOOL)respondsToSelector:(SEL)aSelector;
- (instancetype)retain OBJC_ARC_UNAVAILABLE;
- (oneway void)release OBJC_ARC_UNAVAILABLE;
- (instancetype)autorelease OBJC_ARC_UNAVAILABLE;
- (NSUInteger)retainCount OBJC_ARC_UNAVAILABLE;
- (struct _NSZone *)zone OBJC_ARC_UNAVAILABLE;

@property (readonly, copy) NSString *description;

@optional
@property (readonly, copy) NSString *debugDescription;

@end

代码验证

NS_ASSUME_NONNULL_BEGIN

@protocol JProtocol <NSObject>

@property(nonatomic,strong) NSString *protocolProperty;
- (void)method_1; //在JPerson.m中实现了该方法
- (void)method_2; //在JPerson.m中没有实现该方法

@end

@interface JPerson : NSObject<JProtocol>
{
    NSString *name;
    NSInteger age;
}

@property (nonatomic,strong)NSString *nickName;

- (void)instanceMethod; //.m实现之
+ (void)classMethod; //.m实现之

@end

NS_ASSUME_NONNULL_END

我们创建一个JPerson类,实现JProtocol协议,但是只实现了协议中的部分方法,再给JPerson创建一个分类JPerson_category,同样也只实现了部分方法

#import "JPerson.h"

NS_ASSUME_NONNULL_BEGIN

@interface JPerson (JPerson_category)
@property (nonatomic,strong) NSString *categoryProperty;

-(void)method_cat1; //在.m实现了该方法
-(void)method_cat2; //在.m没有实现该方法
@end

NS_ASSUME_NONNULL_END

main函数中写如下代码

      JPerson *p1 = [[JPerson alloc] init];
      Class pClass = object_getClass(p1);
      NSLog(@"%@-%@",p1,pClass); //打断点

p1为实例对象的指针,pClass为类对象JPerson的指针,也就是我们本文研究的重点内容

(lldb) x/5gx pClass
0x100008900: 0x00000001000088d8 0x000000010036a140
0x100008910: 0x0000000101404960 0x0001802800000003
0x100008920: 0x00000001014044b4

通过前面分析我们知道

  • 0x00000001000088d8isa
  • 0x000000010036a140为指向父类的指针superclass
  • 0x0000000101404960 0x0001802800000003cache
  • 0x00000001014044b4bits0x100008920bits的指针,bits的类型为class_data_bits_t,其中包含方法data()可以获取到class_rw_t
(lldb) p (class_data_bits_t *)0x100008920
(class_data_bits_t *) $1 = 0x0000000100008920
(lldb) p *$1.data()
(class_rw_t) $2 = {
    flags = 2148007936
    witness = 1
    ro_or_rw_ext = {
        std::__1::atomic<unsigned long> = {
            Value = 4295001368
        }
    }
    firstSubclass = nil
    nextSiblingClass = NSUUID
}
Fix-it applied, fixed expression was:
*$1->data()
(lldb) p/x 4295001368
(long) $3 = 0x0000000100008518

class_rw_t中包含ro_or_rw_extro_or_rw_ext支持class_ro_tclass_rw_ext_t两种模版,我们这里都是编译器可以确定的信息,所以推断ro_or_rw_ext中存储的是class_ro_t,也就是$3是指向class_ro_t的指针

(lldb) p (class_ro_t *)$3
(class_ro_t *) $4 = 0x0000000100008518
(lldb) p *$4
(class_ro_t) $5 = {
    flags = 0
    instanceStart = 8
    instanceSize = 32
    reserved = 0
    = {
        ivarLayout = 0x0000000000000000
        nonMetaclass = nil
    }
    name = {
        std::__1::atomic<const char *> = "JPerson" {
            Value = 0x0000000100003c29 "JPerson"
        }
    }
    baseMethodList = 0x0000000100008048
    baseProtocols = 0x0000000100008500
    ivars = 0x0000000100008560
    weakIvarLayout = 0x0000000000000000
    baseProperties = 0x00000001000080b0
    _swiftMetadataInitializer_NEVER_USE = {}
}

class_rw_t中也有methods()、properties()、protocols()方法

(lldb) p $2.methods()
(const method_array_t) $6 = {
    list_array_tt<method_t, method_list_t, method_list_t_authed_ptr> = {
    = {
        list = {
            ptr = 0x0000000100008048
        }
        arrayAndFlag = 4295000136
      }
   }
}
(lldb) p $2.properties()
(const property_array_t) $7 = {
list_array_tt<property_t, property_list_t, RawPtr> = {
    = {
        list = {
            ptr = 0x00000001000080b0
        }
        arrayAndFlag = 4295000240
    }
  }
}
(lldb) p $2.protocols()
(const protocol_array_t) $8 = {
list_array_tt<unsigned long, protocol_list_t, RawPtr> = {
    = {
    list = {
        ptr = 0x0000000100008500
    }
    arrayAndFlag = 4295001344
    }
  }
}

我们看到class_rw_tmethods()的指针和class_ro_t中的baseMethodList指针相同,同样属性类表和协议列表也相同,我们以方法列表为例查看其内容

(lldb) p $6.list.ptr
(method_list_t *const) $9 = 0x0000000100008048
(lldb) p *$9
(method_list_t) $10 = {
    entsize_list_tt<method_t, method_list_t, 4294901763, method_t::pointer_modifier> = (entsizeAndFlags = 27, count = 4)
}
(lldb) p $10.get(0).big()
(method_t::big) $11 = {
    name = "method_cat1"
    types = 0x0000000100003ea8 "v16@0:8"
    imp = 0x00000001000038f0 (KCObjcBuild`-[JPerson(JPerson_category) method_cat1])
}
(lldb) p $10.get(1).big()
(method_t::big) $12 = {
    name = "method_1"
    types = 0x0000000100003ea8 "v16@0:8"
    imp = 0x0000000100003900 (KCObjcBuild`-[JPerson method_1])
}
(lldb) p $10.get(2).big()
(method_t::big) $13 = {
    name = "nickName"
    types = 0x0000000100003ec3 "@16@0:8"
    imp = 0x0000000100003910 (KCObjcBuild`-[JPerson nickName])
}
(lldb) p $10.get(3).big()
(method_t::big) $14 = {
    name = "setNickName:"
    types = 0x0000000100003f72 "v24@0:8@16"
    imp = 0x0000000100003930 (KCObjcBuild`-[JPerson setNickName:])
}
(lldb) p $10.get(4).big()
  //越界了报错

我们看到了协议JProtocol中的method_1但是没有看到method_2。 我们看到了分类JPerson_category中的method_cat1但是没有看到method_cat2。 这说明未实现的方法不存在方法列表中。

我们看到了实例方法instanceMethod没有看到类方法classMethod,因为类方法存储在元类中

为什么需要.big()是因为method_t中对信息做了一层封装不能直接访问

struct method_t {
    ......
    // The representation of a "big" method. This is the traditional
    // representation of three pointers storing the selector, types
    // and implementation.
    struct big {
        SEL name;
        const char *types;
        MethodListIMP imp;
    };
    ......
public:
    big &big() const {
        ASSERT(!isSmall());
        return *(struct big *)this;
    }
    ......
};

这里我们只是简单验证了方法的存储位置,还留下了很多问题需要验证:

  • 协议提供的属性是怎么存储的?有没有生成成员变量??
  • 类别提供的属性是怎么存储的?有没有生成成员变量??
  • class_rw_t中的ro_or_rw_ext变量什么情况下会由class_ro_t变为class_rw_ext_t 我们在后面的文章中会进一步进行验证,敬请期待!!

参考文章

# 类的结构分析

# iOS底层探索 - 实例对象的创建

# WWDC2020

# Non Fragile ivars讲解