手撕iOS底层12 -- cache_t深入分析再通过struct bucket_t *buckets();这个方法来获

最近在看工作，求内推啊微信号`bgwx7788`

struct objc_class : objc_object {
    // Class ISA;       // 8
    Class superclass;   // 8
    cache_t cache;      // 16        // formerly cache pointer and vtable
    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags 属性 方法 协议 //8
	....
}

前边的系列文章分析了objc_class的isa， superClass与Class同理， bits也分析了，本文来分析cache_t。

cache_t的地址偏移是8+8=16 转为16进制=10,在基地址上+10

0x01 - `cache_t`里存储了什么？

先来看下cache_t这个结构体类型，了解下它里边存了一些什么东西?

truct cache_t {
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
  // explicit_atomic 显示原子性，目的是为了能够保证 增删查改时， 保证线程安全
  // 等价于 struct bucket_t * _buckets;
  // bucket_t 里放的是sel和imp， 根据架构不同，存放的顺序也不一样
  // cache_t 通过 buckets() 读取_buckets
    explicit_atomic<struct bucket_t *> _buckets; //8
    explicit_atomic<mask_t> _mask; // 4
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
    explicit_atomic<uintptr_t> _maskAndBuckets; // 说白了， 为了节省内存，读取方便mask和buckets存在一起
    mask_t _mask_unused;
    
    // How much the mask is shifted by.
    static constexpr uintptr_t maskShift = 48;
    
    // Additional bits after the mask which must be zero. msgSend
    // takes advantage of these additional bits to construct the value
    // `mask << 4` from `_maskAndBuckets` in a single instruction.
    static constexpr uintptr_t maskZeroBits = 4;
    
    // The largest mask value we can store.
    static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
    
    // The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
    
    // Ensure we have enough bits for the buckets pointer.
    static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
    // _maskAndBuckets stores the mask shift in the low 4 bits, and
    // the buckets pointer in the remainder of the value. The mask
    // shift is the value where (0xffff >> shift) produces the correct
    // mask. This is equal to 16 - log2(cache_size).
    explicit_atomic<uintptr_t> _maskAndBuckets;
    mask_t _mask_unused;

    static constexpr uintptr_t maskBits = 4;
    static constexpr uintptr_t maskMask = (1 << maskBits) - 1;
    static constexpr uintptr_t bucketsMask = ~maskMask;
#else
#error Unknown cache mask storage type.
#endif
  
#if __LP64__
    uint16_t _flags; // 2
#endif
    uint16_t _occupied; //2
public:
    static bucket_t *emptyBuckets();
    
    struct bucket_t *buckets();
    mask_t mask();
    mask_t occupied();
    //
    void incrementOccupied();
    void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);
    void initializeToEmpty();

    unsigned capacity();
    bool isConstantEmptyCache();
    bool canBeFreed();
  .......
}

先来看下这几个宏：

CACHE_MASK_STORAGE_OUTLINED 模拟器或者macOS
CACHE_MASK_STORAGE_HIGH_16 真机64位
CACHE_MASK_STORAGE_LOW_4 非64位真机

这几个宏是不同平台对应的宏

#define CACHE_MASK_STORAGE_OUTLINED 1
#define CACHE_MASK_STORAGE_HIGH_16 2
#define CACHE_MASK_STORAGE_LOW_4 3
// 编译器已经提前编译好架构选型
struct cache_t {
#if defined(__arm64__) && __LP64__ // 真机 并且是 64位
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_HIGH_16
#elif defined(__arm64__) && !__LP64__ // 真机 并且是 不是64位
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_LOW_4
#else
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_OUTLINED // 余下模拟器
#endif

`bucket_t`类型结构

struct bucket_t {
private:
    // IMP-first is better for arm64e ptrauth and no worse for arm64.
    // SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__ // 真机
  // 这里也通过 explicit_atomic 加了保护性
    explicit_atomic<uintptr_t> _imp;
    explicit_atomic<SEL> _sel;
#else // 非真机
    explicit_atomic<SEL> _sel;
    explicit_atomic<uintptr_t> _imp;
#endif
  
  public:
    inline SEL sel() const { return _sel.load(memory_order::memory_order_relaxed); } // 获取sel
		// 获取IMP
    inline IMP imp(Class cls) const {
        uintptr_t imp = _imp.load(memory_order::memory_order_relaxed);
        if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
        SEL sel = _sel.load(memory_order::memory_order_relaxed);
        return (IMP)
            ptrauth_auth_and_resign((const void *)imp,
                                    ptrauth_key_process_dependent_code,
                                    modifierForSEL(sel, cls),
                                    ptrauth_key_function_pointer, 0);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
        return (IMP)(imp ^ (uintptr_t)cls);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
        return (IMP)imp;
#else
#error Unknown method cache IMP encoding.
#endif
    }
}

上边俩个简单的看了cache_t的结构以及它的成员bucket_t结构，说白了，就是在cache_t里缓存的就是sel和imp

0x02 - 通过`LLDB`验证`cache_t`存储数据

现在我们通过源码环境 来验证上边的分析

@interface Person : NSObject 
 - (void)sayHello;
- (void)sayCode;
- (void)say1;
- (void)say2;
@end

@implementation
  - (void)sayHello
{
    
    NSLog(@"%s", __func__);
}
- (void)sayCode
{
    NSLog(@"%s", __func__);
}
- (void)say1
{
    NSLog(@"%s", __func__);
}
- (void)say2
{
    NSLog(@"%s", __func__);
}
@end

通过以上这个简单的Person类来调用这四个方法来验证码cache_t

先把breakpoint停到第一个要调用的方法，这时候这个person对象任何方法还没有调用，那么它的方法缓存是空的，然后在debug区域打印出类的首地址地址，在这个首地址上加10，就会得到cache_t的内存地址

平移得到的cache_t地址，打印其内容，因为没有调用过方法，所以打印出的内容是空的，如_occupied = 0

再通过struct bucket_t *buckets();这个方法来获取_buckets属性，拿到了_buckets属性，就可以通过bucket_t的sel()和imp(Class)方法来获取对应的sel和imp。

由上分析可知，每调用一次方法，就执行一次方法缓存。

那现在我们过一个断点，执行第一个方法

⚠️ 这里通过buckets获取的是一个数组，所以可以通过[]取值，也可以通过指针+1的形式来取值，通过者俩种方式来获取多个bucket_t的值

看到缓存里有了第一个缓存的方法，然后我们再通过MachOView来看下这个方法是我们调用的sayHello方法吗？

在MachOView里看到的方法地址和我们打印输出的地址是一模一样的，所以确认是我们调用的sayHello方法。

0x03 - 脱离`objc-781`源码环境测试`cache_t`存储数据

首先打开一个普通工程，将源码部分类型拷贝过来，如下：

typedef uint32_t mask_t;  // x86_64 & arm64 asm are less efficient with 16-bits
struct test_bucket_t {
    SEL _sel;
    IMP _imp;
};

struct test_cache_t {
    struct test_bucket_t * _buckets; //8
    mask_t _mask; // 4
    uint16_t _flags;
    uint16_t _occupied;
};

struct test_objc_class {
    Class ISA;       // 拷贝的时候，这里需要⚠️注意，因为在源码中，这个成员是从父类继承过来的，拿到这里，没有父类，需要显示的写出来
    Class superclass;   // 8
    struct test_cache_t cache;      // 16        // formerly cache pointer and vtable
};

int main(int argc, const char * argv[]) {
    @autoreleasepool {
        Person *p = [Person alloc];
        Class pClass = [Person class];
        [p say1];
        [p say2];
        [p say3];
        [p say4];
        
        struct test_objc_class *tepClass = (__bridge struct test_objc_class *)(pClass);
        NSLog(@"%hu - %u",tepClass->cache._occupied,tepClass->cache._mask);
        
        for (mask_t i = 0; i<tepClass->cache._mask; i++) {
            struct test_bucket_t bucket = tepClass->cache._buckets[i];
            NSLog(@"%@ - %p",NSStringFromSelector(bucket._sel),bucket._imp);
        }
    }
    return 0;
}

❤️有兴趣的烙铁可以自己玩一下。这样比在LLDB玩的更直观一些～

调用一次方法的打印。

调用4次方法的打印

通过打印可以看到一些问题？

输出的2 - 7代表什么？即occupied和mask是什么？
为什么调用1次方法和3次方法打印输出的内容不一样？
bucket为什么会有“丢失”的情况？如4次方法打印输出只有 say3和say4

0x04 -`cache_t`真正出击🧍‍♀️

带着上边的一些问题，来看cache_t的源码，看看能否得到答案？在上面的例子中，发现只有occupied增长位一定的数据时，会引起mask也要变化，在cache_t中发现只有函数incrementOccupied()会让occupied增加一。

void cache_t::incrementOccupied() 
{
    _occupied++;
}

再顺着这条线，去看一下，那里使用了这个函数，找到今天的核心函数void cache_t::insert

/* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
enum {
    INIT_CACHE_SIZE_LOG2 = 2,
    INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2),
    MAX_CACHE_SIZE_LOG2  = 16,
    MAX_CACHE_SIZE       = (1 << MAX_CACHE_SIZE_LOG2),
};
ALWAYS_INLINE
void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif

    ASSERT(sel != 0 && cls->isInitialized());

    // Use the cache as-is if it is less than 3/4 full
    mask_t newOccupied = occupied() + 1;// 没有调用方法的时候 occupied() = 0，newOccupied = 1
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) { // 小概率事件， occupied()是0的时候，也就是创建缓存是小概率事件
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE; //  初始化是4 （capacity = 1<<2）
        reallocate(oldCapacity, capacity, /* freeOld */false); // 初始化创建 开辟内存空间
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) {
        // Cache is less than 3/4 full. Use it as-is.
      	// 在这个小于四分之三范围内不用扩容
        // 在范围内什么也不用动
    }
    else {
      // 超出四分之三，就开始扩容处理
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; // 扩容俩倍 2*4=8
        if (capacity > MAX_CACHE_SIZE) { //
            capacity = MAX_CACHE_SIZE;
        }
      // 如果执行到这里， 说明之前存储过，有旧的缓存，但是已经满了， 开始重新分配内存
        reallocate(oldCapacity, capacity, true);
    }

    bucket_t *b = buckets(); // 获取buckte
    mask_t m = capacity - 1; // 掩码数据
    mask_t begin = cache_hash(sel, m); // cache哈希，也就是哈希下表， 通过哈希算法计算sel存储的下标
    mask_t i = begin;

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot because the
    // minimum size is 4 and we resized at 3/4 full.
  	// 
    do {
        if (fastpath(b[i].sel() == 0)) {// 通过下标去判断buckets里对应的位置是否有值，没有则存下来，
            incrementOccupied();
            b[i].set<Atomic, Encoded>(sel, imp, cls);
            return;
        }
        if (b[i].sel() == sel) { // 有值的话 判断是否相同sel
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin)); // 都不满足，重新hash计算，获取新的下标

    cache_t::bad_cache(receiver, (SEL)sel, cls);
}

这个void cache_t::insert函数从名字上看是插入的意思，在缓存中插入方法，

上面的代码片段主要做了这么几步

首先获取_occupied 现在目前为止占用的大小， 缓存占用大小
根据这个occupied大小执行指定的操作
在bucket内部存储imp和sel

关于occupied 缓存占用大小 的几点补充：

只要对象生成，调用init方法也会影响occupied的值，
属性的get和set方法也会影响occupied的值

根据`occupied`的值来决定执行什么操作

第一次执行的时候，缓存还没有，默认创建开辟4个

if (slowpath(isConstantEmptyCache())) { // 小概率事件， occupied()是0的时候，也就是创建缓存是小概率事件
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE; //  初始化是4 （capacity = 1<<2）
        reallocate(oldCapacity, capacity, /* freeOld */false); // 初始化创建 开辟内存空间
    }

如果占用的缓存小于四分之三的时候，不作任何处理

if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) {
        // Cache is less than 3/4 full. Use it as-is.
      	// 在这个小于四分之三范围内不用扩容
        // 在范围内什么也不用动
    }

如果超过缓存容量，需要扩容和重新开辟空间

else {
      // 超出四分之三，就开始扩容处理
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; // 扩容俩倍 2*4=8
        if (capacity > MAX_CACHE_SIZE) { //
            capacity = MAX_CACHE_SIZE;
        }
      // 如果执行到这里， 说明之前存储过，有旧的缓存，但是已经满了， 开始重新分配内存
        reallocate(oldCapacity, capacity, true);
    }

ALWAYS_INLINE
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    bucket_t *oldBuckets = buckets(); // 先获取旧的buckets， 第一次肯定是没有的
    bucket_t *newBuckets = allocateBuckets(newCapacity); // 根据capacity的数量开辟空间

    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);

    setBucketsAndMask(newBuckets, newCapacity - 1); // 存储新开辟的buckets和mask-1
    
    if (freeOld) {
        cache_collect_free(oldBuckets, oldCapacity);
    }
}

bucket_t *allocateBuckets(mask_t newCapacity)
{
    // Allocate one extra bucket to mark the end of the list.
    // This can't overflow mask_t because newCapacity is a power of 2.
    bucket_t *newBuckets = (bucket_t *)
        calloc(cache_t::bytesForCapacity(newCapacity), 1);

    bucket_t *end = cache_t::endMarker(newBuckets, newCapacity);

#if __arm__
    // End marker's sel is 1 and imp points BEFORE the first bucket.
    // This saves an instruction in objc_msgSend.
    end->set<NotAtomic, Raw>((SEL)(uintptr_t)1, (IMP)(newBuckets - 1), nil);
#else
    // End marker's sel is 1 and imp points to the first bucket.
    end->set<NotAtomic, Raw>((SEL)(uintptr_t)1, (IMP)newBuckets, nil);
#endif
    
    if (PrintCaches) recordNewCache(newCapacity);

    return newBuckets;
}

allocateBuckets 向系统申请开辟空间，也就是申请newCapacity个内存空间的buckets

setBucketsAndMask 把申请的buckets存入缓存中，这里也是根据平台架构执行不同的操作

如果是真机根据buckets和mask的位置存储，并将occupied设置为0

#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16

void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
    uintptr_t buckets = (uintptr_t)newBuckets;
    uintptr_t mask = (uintptr_t)newMask;
    
    ASSERT(buckets <= bucketsMask);
    ASSERT(mask <= maxMask);
    
    _maskAndBuckets.store(((uintptr_t)newMask << maskShift) | (uintptr_t)newBuckets, std::memory_order_relaxed);
    _occupied = 0;
}

如果是模拟器或者macOS，正常存储bucktes和mask，并将occupied设置为0

void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
    // objc_msgSend uses mask and buckets with no locks.
    // It is safe for objc_msgSend to see new buckets but old mask.
    // (It will get a cache miss but not overrun the buckets' bounds).
    // It is unsafe for objc_msgSend to see old buckets and new mask.
    // Therefore we write new buckets, wait a lot, then write new mask.
    // objc_msgSend reads mask first, then buckets.

#ifdef __arm__
    // ensure other threads see buckets contents before buckets pointer
    mega_barrier();

    _buckets.store(newBuckets, memory_order::memory_order_relaxed);
    
    // ensure other threads see new buckets before new mask
    mega_barrier();
    
    _mask.store(newMask, memory_order::memory_order_relaxed);
    _occupied = 0;
#elif __x86_64__ || i386
    // ensure other threads see buckets contents before buckets pointer
    _buckets.store(newBuckets, memory_order::memory_order_release);
    
    // ensure other threads see new buckets before new mask
    _mask.store(newMask, memory_order::memory_order_release);
    _occupied = 0; //  设置为0
#else
#error Don't know how to do setBucketsAndMask on this architecture.
#endif
}

 if (freeOld) {
        cache_collect_free(oldBuckets, oldCapacity);
    }
/***********************************************************************
* cache_collect_free.  Add the specified malloc'd memory to the list
* of them to free at some later point.
* size is used for the collection threshold. It does not have to be 
* precisely the block's size.
* Cache locks: cacheUpdateLock must be held by the caller.
**********************************************************************/
static void cache_collect_free(bucket_t *data, mask_t capacity)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif

    if (PrintCaches) recordDeadCache(capacity);

    _garbage_make_room (); //创建垃圾回收空间
    garbage_byte_size += cache_t::bytesForCapacity(capacity); 
    garbage_refs[garbage_count++] = data; // 将sel-imp存储在后面的位置
    cache_collect(false);// 垃圾回收，清理旧的bucket
}
static void _garbage_make_room(void)
{
    static int first = 1;

    // Create the collection table the first time it is needed 第一次创建的时候需要这个表
    if (first)
    {
        first = 0;
        garbage_refs = (bucket_t**)
            malloc(INIT_GARBAGE_COUNT * sizeof(void *));
        garbage_max = INIT_GARBAGE_COUNT;
    }

    // Double the table if it is full 内存增大了， 原有的内存扩容2倍，
    else if (garbage_count == garbage_max)
    {
        garbage_refs = (bucket_t**)
            realloc(garbage_refs, garbage_max * 2 * sizeof(void *));
        garbage_max *= 2; // 系统空间也需要增加内存段
    }
}

如果有旧的buckets，会调用cache_collect_free来清空oldCapacity个大小的内存，
如果第一次，需要创建垃圾回收空间
非第一次，则将内存扩容加大，原有基础*2
cache_collect 方法垃圾回收，清理旧的bucket

开始存储`sel`和`imp`

// Class points to cache. SEL is key. Cache buckets store SEL+IMP.
// Caches are never built in the dyld shared cache.

static inline mask_t cache_hash(SEL sel, mask_t mask) 
{
    return (mask_t)(uintptr_t)sel & mask;
}

这里主要是根据cache_hash(sel, m);这个方法来生成存储下标，分三种情况

如果当前下标的b[i].sel() == 0 说明是空的，可以存储sel-imp，并将occupied自增1

 if (fastpath(b[i].sel() == 0)) {
            incrementOccupied(); // 
            b[i].set<Atomic, Encoded>(sel, imp, cls);
            return;
        }

如果当前下表的位置不为空，判断当前sel是否同一个，同一个则直接return

if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }

如果以上俩个条件都不满足，则需要重新经过cache_next(i, m) ，即哈希冲突算法，重新计算存储下标，再去比对存储

#if __arm__  ||  __x86_64__  ||  __i386__
// objc_msgSend has few registers available.
// Cache scan increments and wraps at special end-marking bucket.
#define CACHE_END_MARKER 1
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return (i+1) & mask; //（将当前的哈希下标 +1） & mask，重新进行哈希计算，得到一个新的下标
}

#elif __arm64__
// objc_msgSend has lots of registers available.
// Cache scan decrements. No end marker needed.
#define CACHE_END_MARKER 0
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return i ? i-1 : mask; //如果i是空，则为mask，mask = cap -1，如果不为空，则 i-1，向前插入sel-imp
}

#else
#error unknown architecture
#endif

到这里，我想cache_t就要告一段落了，分析到这里，我想上面提出问题的答案也都有了，

_mask等于capacity-1,，也是掩码数据，用于在哈希算法和哈希冲突算法中计算哈希下标
_occupied是存储sel-imp占用大小，也就是分配的buckets中占用的个数。
init初始化方法也会导致occupied变化。
属性方法和方法调用都会影响occupied的变化。
为什么随着方法调用的增加，occupied和mask会变化？

因为在调用过程中，buckets会随着数量的增加进行扩容，以满足挡墙缓存的需要，具体怎么扩容，可以参考上边，
为什么buckets有丢失的情况？

因为在扩容的时候，会对之前的内存释放，重新申请内存，所以之前缓存的内容也不存在了。

arm下最大存储1左移15位

模拟器下忘了

手撕iOS底层12 -- cache_t深入分析

最近在看工作，求内推啊 微信号bgwx7788

0x01 - cache_t里存储了什么？

bucket_t类型结构

0x02 - 通过LLDB验证cache_t存储数据

0x03 - 脱离objc-781源码环境测试cache_t存储数据

0x04 -cache_t真正出击🧍‍♀️

上面的代码片段主要做了这么几步

根据occupied的值来决定执行什么操作

开始存储sel和imp

最近在看工作，求内推啊 微信号bgwx7788

欢迎大佬留言指正😄，码字不易，觉得好给个赞👍 有任何表达或者理解失误请留言交流；共同进步；

最近在看工作，求内推啊微信号`bgwx7788`

0x01 - `cache_t`里存储了什么？

`bucket_t`类型结构

0x02 - 通过`LLDB`验证`cache_t`存储数据

0x03 - 脱离`objc-781`源码环境测试`cache_t`存储数据

0x04 -`cache_t`真正出击🧍‍♀️

根据`occupied`的值来决定执行什么操作

开始存储`sel`和`imp`

最近在看工作，求内推啊微信号`bgwx7788`