catch_t 内容和存储原理

226 阅读6分钟

导语

struct objc_class : objc_object {
    // Class ISA; // 继承自父类的属性
    Class superclass;
    cache_t cache;             // formerly cache pointer and vtable
    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags
}

前面的文章中,我们分析了 isabits,本次文章,我们分析 cache。

catch_t 数据结构

cache_t 源码

#if defined(__arm64__) && __LP64__
// 64位真机
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_HIGH_16
#elif defined(__arm64__) && !__LP64__
// 非64位真机
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_LOW_4
#else
// 其他(包括模拟器的 x86_64)
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_OUTLINED
#endif


struct cache_t {

#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
// 模拟器 x86
    explicit_atomic<struct bucket_t *> _buckets;
    explicit_atomic<mask_t> _mask;

#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
// 64 位真机
    explicit_atomic<uintptr_t> _maskAndBuckets;
    mask_t _mask_unused;

    static constexpr uintptr_t maskShift = 48;
    static constexpr uintptr_t maskZeroBits = 4;
    /** 
    内存占用示意图
    |--|---|----|
    64--48--44----1
    [49,64] 存储 mask 信息,共16位
    [1,48] 存储 Buckets 信息,但是 [45,48]区域强制为0,因此可用的只有 [0,44] 共44位
    */
    static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
    static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");

#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
// 其他非64位真机设备,没必要看
    explicit_atomic<uintptr_t> _maskAndBuckets;
    mask_t _mask_unused;

    static constexpr uintptr_t maskBits = 4;
    static constexpr uintptr_t maskMask = (1 << maskBits) - 1;
    static constexpr uintptr_t bucketsMask = ~maskMask;
#else
#error Unknown cache mask storage type.
#endif
    
#if __LP64__
    uint16_t _flags;
#endif
    uint16_t _occupied;


public
// 读取 sel 的函数
inline SEL sel() const { ... }
// 读取 imp 的函数
inline IMP imp(Class cls) const { ... }

};

bucket_t 源码

// bucket_t 结构体存储了一对 imp 和 sel

struct bucket_t {
private:
    // IMP-first is better for arm64e ptrauth and no worse for arm64.
    // SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
    explicit_atomic<uintptr_t> _imp;
    explicit_atomic<SEL> _sel;
#else
    explicit_atomic<SEL> _sel;
    explicit_atomic<uintptr_t> _imp;
#endif

至此,我们可以得到一个大致的印象。
catch_t 存储了一个列表,列表中的每一项存储了一对 sel 和 imp。
查找方法的时候,可以先到cache_t 中查询,直接查到 imp 可以提高访问速度。

catch_t 缓存方法

实验代码

#import <Foundation/Foundation.h>
#import <objc/runtime.h>

@interface BClass: NSObject

- (void)method1;
- (void)method2;
- (void)method3;
- (void)method4;
- (void)method5;
- (void)method6;

@end

@implementation BClass

- (void)method1{
    NSLog(@"%s", __func__);
};
- (void)method2{
    NSLog(@"%s", __func__);
};
- (void)method3{
    NSLog(@"%s", __func__);
};
- (void)method4{
    NSLog(@"%s", __func__);
};
- (void)method5{
    NSLog(@"%s", __func__);
};
- (void)method6{
    NSLog(@"%s", __func__);
};

@end


int main(int argc, const char * argv[]) {
    @autoreleasepool {
        // insert code here...
        BClass *b = [BClass alloc];
        Class bClass = [BClass class];
        
        [b method1]; // 断点1
        [b method2]; // 断点2
        [b method3];
        [b method4];
        [b method5];
        [b method6];
        NSLog(@"");
    }
    return 0;
}

lldb

// 断点1
(lldb) p/x bClass
(Class) $0 = 0x0000000100002338 BClass
(lldb) p/x (cache_t *)0x0000000100002348 // 在类的存储结构里,第三个8字节是存储的cache_t
(cache_t *) $1 = 0x0000000100002348
(lldb) p *$1
(cache_t) $2 = {
  _buckets = {
    std::__1::atomic<bucket_t *> = 0x000000010032e430 {
      _sel = {
        std::__1::atomic<objc_selector *> = (null)
      }
      _imp = {
        std::__1::atomic<unsigned long> = 0
      }
    }
  }
  _mask = {
    std::__1::atomic<unsigned int> = 0
  }
  _flags = 32784
  _occupied = 0 // 占用个数为0
}
2020-09-21 23:19:34.030091+0800 KCObjc[2991:32284] -[BClass method1]

// 进入断点2
(lldb) p *$1
(cache_t) $3 = {
  _buckets = {
    std::__1::atomic<bucket_t *> = 0x00000001007059f0 {
      _sel = {
        std::__1::atomic<objc_selector *> = ""
      }
      _imp = {
        std::__1::atomic<unsigned long> = 10696
      }
    }
  }
  _mask = {
    std::__1::atomic<unsigned int> = 3
  }
  _flags = 32784
  _occupied = 1 // 此时占用个数变为了1
}

(lldb) p *($3.buckets()) // 输出 cache_t.buckets 的信息
(bucket_t) $5 = {
  _sel = {
    std::__1::atomic<objc_selector *> = ""
  }
  _imp = {
    std::__1::atomic<unsigned long> = 10696
  }
}
(lldb) p $5.sel() // 读取方法
(SEL) $6 = "method1"
(lldb) p $5.imp(bClass) // 读取imp
(IMP) $7 = 0x0000000100000af0 (KCObjc`-[BClass method1] at main.m:25)
2020-09-21 23:22:16.671684+0800 KCObjc[2991:32284] -[BClass method2]

// 进入断点3
(lldb) p *$1
(cache_t) $2 = {
  _buckets = {
    std::__1::atomic<bucket_t *> = 0x0000000101972d20 {
      _sel = {
        std::__1::atomic<objc_selector *> = ""
      }
      _imp = {
        std::__1::atomic<unsigned long> = 10696
      }
    }
  }
  _mask = {
    std::__1::atomic<unsigned int> = 3
  }
  _flags = 32784
  _occupied = 2 // 占用个数变为了2
}

// 进入断点4
(lldb) p *$1
(cache_t) $3 = {
  _buckets = {
    std::__1::atomic<bucket_t *> = 0x000000010064db70 {
      _sel = {
        std::__1::atomic<objc_selector *> = (null)
      }
      _imp = {
        std::__1::atomic<unsigned long> = 0
      }
    }
  }
  _mask = {
    std::__1::atomic<unsigned int> = 7
  }
  _flags = 32784
  _occupied = 1 // 占用个数又变为了1
}

我们可以看到,_occupied 从零变到2,又变回了1,这中间到底发生了什么?

下面我们来探讨方法的缓存原理。

缓存流程实验环境搭建

// 下面的结构体都是参照源码中的 x86 结构体构造的,成员的大小相同,顺序一致。因此,在下面强制指向系统 class 内存之后,读取内容的时候,偏移量是正确的

struct jy_bucket_t {
    SEL _sel;
    uintptr_t _imp;
};

struct jy_cache_t {
    struct jy_bucket_t * _buckets;
    uint32_t _mask;
    uint16_t _flags;
    uint16_t _occupied;
};

struct jy_class_data_bits_t {
    uintptr_t bits;
};

struct jy_objc_class {
    Class ISA;
    Class superclass;
    struct jy_cache_t cache;
    struct jy_class_data_bits_t bits;
};


int main(int argc, const char * argv[]) {
    @autoreleasepool {
        // insert code here...
        BClass *b = [BClass alloc];
        Class bClass = [BClass class];
        
        [b method1];
        [b method2];
        [b method3];
        [b method4];
        [b method5];
        [b method6];
        
        // 将类的内存地址使用我们自定义的类结构体,由于内存偏移对的上,因此可以正确读取内容
        struct jy_objc_class *jy_class = (__bridge struct jy_objc_class *)(bClass);
        
        // struct 指针变量要用 -> 读取内容。
        NSLog(@"_occupied: %hu, _mask:%u",jy_class->cache._occupied, jy_class->cache._mask);
        
        // 我们读取mask中每一个值
        for (uint32_t i = 0; i < jy_class->cache._mask; i ++) {
            // 取出每个bucket
            struct jy_bucket_t bucket = jy_class->cache._buckets[i];
            // 打印sel 和 imp
            NSLog(@"第%u个:  sel:%@  imp:%p ", i, NSStringFromSelector(bucket._sel), bucket._imp);
        }
    }
    return 0;
}

输出信息

2020-09-21 23:47:57.482728+0800 KCObjc[4201:45645] -[BClass method1]
2020-09-21 23:47:57.483278+0800 KCObjc[4201:45645] -[BClass method2]
2020-09-21 23:47:57.483337+0800 KCObjc[4201:45645] -[BClass method3]
2020-09-21 23:47:57.483380+0800 KCObjc[4201:45645] -[BClass method4]
2020-09-21 23:47:57.483419+0800 KCObjc[4201:45645] -[BClass method5]
2020-09-21 23:47:57.483457+0800 KCObjc[4201:45645] -[BClass method6]
2020-09-21 23:47:57.483492+0800 KCObjc[4201:45645] _occupied: 4, _mask:7
2020-09-21 23:47:57.483686+0800 KCObjc[4201:45645] 第0个:  sel:method6  imp:0x2998 
2020-09-21 23:47:57.483802+0800 KCObjc[4201:45645] 第1个:  sel:(null)  imp:0x0 
2020-09-21 23:47:57.483858+0800 KCObjc[4201:45645] 第2个:  sel:(null)  imp:0x0 
2020-09-21 23:47:57.483909+0800 KCObjc[4201:45645] 第3个:  sel:(null)  imp:0x0 
2020-09-21 23:47:57.483989+0800 KCObjc[4201:45645] 第4个:  sel:method3  imp:0x2908 
2020-09-21 23:47:57.484044+0800 KCObjc[4201:45645] 第5个:  sel:method4  imp:0x2938 
2020-09-21 23:47:57.484094+0800 KCObjc[4201:45645] 第6个:  sel:method5  imp:0x29e8 

从输出中可以看出,_buckets 是乱序的。

缓存流程

在 cache_t 中我们找到下面两个方法, 是跟 buckets 和 occupied 有关

    void incrementOccupied();
    void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);
void cache_t::incrementOccupied() 
{
    // 仅自增
    _occupied++;
}

调用 setBucketsAndMask 的方法链如下图

cache_t::insert

void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)
{
    // ... 省略

    // Use the cache as-is if it is less than 3/4 full
    mask_t newOccupied = occupied() + 1;
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE;
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) {
        // Cache is less than 3/4 full. Use it as-is.
    }
    else {
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true);
    }

    bucket_t *b = buckets();
    mask_t m = capacity - 1;
    // hash 算法 (mask_t)(uintptr_t)sel & mask;
    // 取余算法,因此并不是按照顺序插入的
    mask_t begin = cache_hash(sel, m);
    mask_t i = begin;

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot because the
    // minimum size is 4 and we resized at 3/4 full.
    do {
        if (fastpath(b[i].sel() == 0)) {
            incrementOccupied();
            b[i].set<Atomic, Encoded>(sel, imp, cls);
            return;
        }
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));

    cache_t::bad_cache(receiver, (SEL)sel, cls);
}

reallocate

void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);

    setBucketsAndMask(newBuckets, newCapacity - 1);
    
    if (freeOld) {
        cache_collect_free(oldBuckets, oldCapacity);
    }
}

模拟器 setBucketsAndMask

void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
    // objc_msgSend uses mask and buckets with no locks.
    // It is safe for objc_msgSend to see new buckets but old mask.
    // (It will get a cache miss but not overrun the buckets' bounds).
    // It is unsafe for objc_msgSend to see old buckets and new mask.
    // Therefore we write new buckets, wait a lot, then write new mask.
    // objc_msgSend reads mask first, then buckets.

#ifdef __arm__
    // ensure other threads see buckets contents before buckets pointer
    mega_barrier();

    _buckets.store(newBuckets, memory_order::memory_order_relaxed);
    
    // ensure other threads see new buckets before new mask
    mega_barrier();
    
    _mask.store(newMask, memory_order::memory_order_relaxed);
    _occupied = 0;
#elif __x86_64__ || i386
    // ensure other threads see buckets contents before buckets pointer
    _buckets.store(newBuckets, memory_order::memory_order_release);
    
    // ensure other threads see new buckets before new mask
    _mask.store(newMask, memory_order::memory_order_release);
    _occupied = 0;
#else
#error Don't know how to do setBucketsAndMask on this architecture.
#endif
}

64位真机 setBucketsAndMask

void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
    uintptr_t buckets = (uintptr_t)newBuckets;
    uintptr_t mask = (uintptr_t)newMask;
    
    ASSERT(buckets <= bucketsMask);
    ASSERT(mask <= maxMask);
    
    _maskAndBuckets.store(((uintptr_t)newMask << maskShift) | (uintptr_t)newBuckets, std::memory_order_relaxed);
    _occupied = 0;
}