iOS底层学习 - 类的前世今生(二)cache_t的底层是一个哈希表存在，用于缓存调用过的方法，提高查找速度，不用每次

通过上一个章节，我们已经知道的类在底层是以什么样的方式存在的，并且类的属性，成员变量和方法的存储也有了一定的了解，但是类的方法是怎么读取的，每次都要从存储的列表中读出来么，是不是又一定的缓存机制呢？我们开始研究

`cache_t`结构

通过查看类的结构，我们知道isa是用来指向类信息的，superclass是父类相关，class_data_bits_t是用来存储属性，方法等数据的，那么如果有缓存机制的话，一定是存储在cache_t中了

struct objc_class : objc_object {
    // Class ISA;           //8
    Class superclass;       //8
    cache_t cache;          //16        // formerly cache pointer and vtable
    class_data_bits_t bits;  
...省略方法等信息...
};

通过上一章节，我们对cache_t有个初步的了解，结构如图

`cache_t`功能

cache_t的底层是一个哈希表存在，用于缓存调用过的方法，提高查找速度，不用每次从class_data_bits_t进行遍历查找。用哈希表存储时，存储的位置是不确定的，空间也有一定的浪费，但是时间复杂度比较低，是典型的空间换时间

`cache_t`定义

`struct bucket_t *_buckets`

struct bucket_t *_buckets是一个结构体指针

cache_key_t为方法的SEL，也就是方法名,
MethodCacheIMP为对应的函数的内存地址

从struct bucket_t * find(cache_key_t key, id receiver);方法可以得出，cache_t底层的存储是一个以cache_key_t为key，bucket_t为value的一个哈希表

struct bucket_t {
private:
    // IMP-first is better for arm64e ptrauth and no worse for arm64.
    // SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
    MethodCacheIMP _imp;
    cache_key_t _key;
#else
    cache_key_t _key;
    MethodCacheIMP _imp;
#endif

public:
    inline cache_key_t key() const { return _key; }
    inline IMP imp() const { return (IMP)_imp; }
    inline void setKey(cache_key_t newKey) { _key = newKey; }
    inline void setImp(IMP newImp) { _imp = newImp; }

    void set(cache_key_t newKey, IMP newImp);
};

`mask_t _mask`

mask_t _mask只是一个32位的int值，等于(哈希表长度 - 1)

typedef uint32_t mask_t;

`mask_t _occupied`

同理_occupied也是一个值，记录了缓存的方法的数量

`cache_t`流程

通过对objc_cache.mm源码的注释的阅读，我们可以得到一个缓存读写的大致过程。相关读取的过程，即在方法转发过程中，获取到已缓存的IMP函数指针，从而获得方法，重点在存取的过程，可以从方法cache_fill开始

`cache_fill`

通过注释我们得知，存取的过程是需要加锁来保证线程安全的，_collecting_in_critical类似轮询线程，保证调用，所以，主要实现的主要方法再cache_fill_nolock(cls, sel, imp, receiver);中进行

void cache_fill(Class cls, SEL sel, IMP imp, id receiver)
{
#if !DEBUG_TASK_THREADS
    mutex_locker_t lock(cacheUpdateLock);
    cache_fill_nolock(cls, sel, imp, receiver);
#else
    _collecting_in_critical();
    return;
#endif
}

在locker构造时加锁，析构时解锁，正好保护了方法作用域内的方法调用。这和 EasyReact 中大量使用的__attribute__((cleanup(AnyFUNC), unused))如出一辙，都是为了实现自动解锁的效果。

class locker : nocopy_t {
        mutex_tt& lock;
    public:
        locker(mutex_tt& newLock) 
            : lock(newLock) { lock.lock(); }
        ~locker() { lock.unlock(); }
    };

`cache_fill_nolock`

static void cache_fill_nolock(Class cls, SEL sel, IMP imp, id receiver)
{
    cacheUpdateLock.assertLocked();

    // Never cache before +initialize is done
    if (!cls->isInitialized()) return;

    // Make sure the entry was not added to the cache by some other thread 
    // before we grabbed the cacheUpdateLock.
    
    if (cache_getImp(cls, sel)) return;

    cache_t *cache = getCache(cls);
    cache_key_t key = getKey(sel);

    // Use the cache as-is if it is less than 3/4 full
    
    mask_t newOccupied = cache->occupied() + 1;
    mask_t capacity = cache->capacity();
    if (cache->isConstantEmptyCache()) {
    // Cache is read-only. Replace it.
    cache->reallocate(capacity, capacity ?: INIT_CACHE_SIZE);
    }
    else if (newOccupied <= capacity / 4 * 3) {
    // Cache is less than 3/4 full. Use it as-is.
    }
    else {
    // Cache is too full. Expand it.
    cache->expand();
    }

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot because the 
    // minimum size is 4 and we resized at 3/4 full.
    bucket_t *bucket = cache->find(key, receiver);
    if (bucket->key() == 0) cache->incrementOccupied();
    bucket->set(key, imp);
}

通过上面的源码，我们可以得出以下主要步骤

if (!cls->isInitialized()) return;如果类没有进行初始化操作，则不能进行缓存的操作，这个比较好理解
if (cache_getImp(cls, sel)) return;因为有可能其他线程先进行了存储，所以需要再找查找一遍，如果可以找到缓存，则直接返回，不需要进行缓存的存储
cache_t *cache = getCache(cls);和cache_key_t key = getKey(sel);分别为获取到类的cache_t对象和根据方法名获取到cache_key_t对象
mask_t newOccupied = cache->occupied() + 1;和mask_t capacity = cache->capacity();分别为cache对象的Occupied和mask对象在原基础上+1
if (cache->isConstantEmptyCache())表示cache是只读的，此时，需要执行cache->reallocate(capacity, capacity ?: INIT_CACHE_SIZE);方法进行重新申请内存
else if (newOccupied <= capacity / 4 * 3)没有超出哈希表3/4容量时，跳过直接进行下面缓存的操作
如果超出哈希表3/4容量时，需要执行cache->expand();进行哈希表扩容
bucket_t *bucket = cache->find(key, receiver);根据key进行方法存储
cache->incrementOccupied()Occupied++
bucket->set(key, imp);写入哈希表

通过上面的分析，我们对cache的存储流程有了大体的了解，其中重点的流程在于缓存如何申请空间cache->reallocate(capacity, capacity ?: INIT_CACHE_SIZE)，如何扩容cache->expand();，如何写入缓存bucket_t *bucket = cache->find(key, receiver);

cache->reallocate(capacity, capacity ?: INIT_CACHE_SIZE)

首先是isConstantEmptyCache()方法,表示buckets是一个只读数组。主要逻辑如下

occupied是否为空，即_occupied值是否为0；
根据传入的capacity计算大小，如果小于EMPTY_BYTES,则直接返回(bucket_t *)&_objc_empty_cache,二进制运算后为空
所以这方法基本就表示此时空间还没有初始化，所以需要初始化

bool cache_t::isConstantEmptyCache()
{
    return 
    occupied() == 0  &&  
    buckets() == emptyBucketsForCapacity(capacity(), false);
}

bucket_t *emptyBucketsForCapacity(mask_t capacity, bool allocate = true)
{
    cacheUpdateLock.assertLocked();

    size_t bytes = cache_t::bytesForCapacity(capacity);

    // Use _objc_empty_cache if the buckets is small enough.
    if (bytes <= EMPTY_BYTES) {
        return (bucket_t *)&_objc_empty_cache;
    }

    // Use shared empty buckets allocated on the heap.
    static bucket_t **emptyBucketsList = nil;
    static mask_t emptyBucketsListCount = 0;
    
    mask_t index = log2u(capacity);

    if (index >= emptyBucketsListCount) {
        if (!allocate) return nil;

        mask_t newListCount = index + 1;
        bucket_t *newBuckets = (bucket_t *)calloc(bytes, 1);
        emptyBucketsList = (bucket_t**)
            realloc(emptyBucketsList, newListCount * sizeof(bucket_t *));
        // Share newBuckets for every un-allocated size smaller than index.
        // The array is therefore always fully populated.
        for (mask_t i = emptyBucketsListCount; i < newListCount; i++) {
            emptyBucketsList[i] = newBuckets;
        }
        emptyBucketsListCount = newListCount;

        if (PrintCaches) {
            _objc_inform("CACHES: new empty buckets at %p (capacity %zu)", 
                         newBuckets, (size_t)capacity);
        }
    }

    return emptyBucketsList[index];
}

其次，是cache_t::reallocate方法，这个方法主要是用来申请缓存空间，主要逻辑如下

canBeFreed()表示缓存空间不为空，如果为空则不需要后续的清空操作
bucket_t *oldBuckets = buckets();获取旧的缓存空间，bucket_t *newBuckets = allocateBuckets(newCapacity);是指根据传入的空间，生成新的缓存空间，初始值为INIT_CACHE_SIZE4字节
setBucketsAndMask(newBuckets, newCapacity - 1);设置cache_t中的属性
cache_collect_free(oldBuckets, oldCapacity);释放旧的缓存空间，在新的缓存空间进行缓存

void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity)
{
    bool freeOld = canBeFreed();

    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache is not old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    assert(newCapacity > 0);
    assert((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);

    setBucketsAndMask(newBuckets, newCapacity - 1);
    
    if (freeOld) {
        cache_collect_free(oldBuckets, oldCapacity);
        cache_collect(false);
    }
}

cache->expand()

这个方法就是判断如果此时存储大于了缓存空间的3/4时，对缓存空间进行扩容，算法也比较简单粗暴，就是之前缓存空间的2倍大小，完成后调用reallocate生成空间

void cache_t::expand()
{
    cacheUpdateLock.assertLocked();
    
    uint32_t oldCapacity = capacity();
    uint32_t newCapacity = oldCapacity ? oldCapacity*2 : INIT_CACHE_SIZE;

    if ((uint32_t)(mask_t)newCapacity != newCapacity) {
        // mask overflow - can not grow further
        // fixme this wastes one bit of mask
        newCapacity = oldCapacity;
    }

    reallocate(oldCapacity, newCapacity);
}

bucket_t *bucket = cache->find(key, receiver);

这个方法就是根据key找到底层哈希表存储的对应的bucket_t，主要流程如下

cache_hash 通过cache_hash函数,即key&mask计算出key值对应的index值 begin，用来记录查询起始索引
do while循环表示用这个i从散列表取值，如果取出来的bucket_t的 key = k，则查询成功，返回该bucket_t，如果key = 0，说明在索引i的位置上还没有缓存过方法，同样需要返回该bucket_t，用于中止缓存查询。
while相当于 i = i-1,回到上面do循环里面，相当于查找散列表上一个单元格里面的元素，再次进行key值k的比较，当i=0时，也就i指向散列表最首个元素索引的时候重新将mask赋值给i，使其指向散列表最后一个元素，重新开始反向遍历散列表，其实就相当于绕圈，把散列表头尾连起来，不就是一个圈嘛，从begin值开始，递减索引值，当走过一圈之后，必然会重新回到begin值，如果此时还没有找到key对应的bucket_t，或者是空的bucket_t，则循环结束，说明查找失败，调用bad_cache方法。

bucket_t * cache_t::find(cache_key_t k, id receiver)
{
    assert(k != 0);

    bucket_t *b = buckets();
    mask_t m = mask();
    mask_t begin = cache_hash(k, m);
    // begin 赋值给 i，用于切换索引
    mask_t i = begin;
    do {
        if (b[i].key() == 0  ||  b[i].key() == k) {
            return &b[i];
        }
    } while ((i = cache_next(i, m)) != begin);
   
    Class cls = (Class)((uintptr_t)this - offsetof(objc_class, cache));
    cache_t::bad_cache(receiver, (SEL)k, cls);
}

static inline mask_t cache_hash(cache_key_t key, mask_t mask) 
{
    return (mask_t)(key & mask);
}

static inline mask_t cache_next(mask_t i, mask_t mask) {
    return (i+1) & mask;
}

至此，一个cache的基本流程就完成了

总结

当方法调用需要被缓存时，以cache_key_t_key和MethodCacheIMP的方式缓存在类的_buckets中，初始是一个4字节的哈希表，mask值为哈希表长度-1。存储时，使用SEL转换为的cache_key_t_key&mask来当做下标存入哈希表
当存储控件大于哈希表容量3/4时，会进行扩容，扩容会清空之前所以缓存，并生成之前缓存空间2倍的新空间进行重新缓存

参考

iOS 底层拾遗：objc_msgSend 与方法缓存

Runtime笔记（三）—— OC Class的方法缓存cache_t

iOS底层学习 - 类的前世今生(二)

cache_t结构

cache_t功能

cache_t定义

struct bucket_t *_buckets

mask_t _mask

mask_t _occupied

cache_t流程

cache_fill

cache_fill_nolock