在探索objc_class时，还有一个遗珠cache_t cache还没有探索，现在就开始对它一探究竟吧。

cache_t源码

老规矩，先来看看cache_t的源码(省略部分代码)。

struct cache_t {
private:
    explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
    union {
        struct {
            explicit_atomic<mask_t>    _maybeMask;
#if __LP64__
            uint16_t                   _flags;
#endif
            uint16_t                   _occupied;
        };
        explicit_atomic<preopt_cache_t *> _originalPreoptCache;
    };

    bool isConstantEmptyCache() const;
    bool canBeFreed() const;
    mask_t mask() const;

#if CONFIG_USE_PREOPT_CACHES
    void initializeToPreoptCacheInDisguise(const preopt_cache_t *cache);
    const preopt_cache_t *disguised_preopt_cache() const;
#endif

    void incrementOccupied();
    void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);

    void reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld);
    void collect_free(bucket_t *oldBuckets, mask_t oldCapacity);

    static bucket_t *emptyBuckets();
    static bucket_t *allocateBuckets(mask_t newCapacity);
    static bucket_t *emptyBucketsForCapacity(mask_t capacity, bool allocate = true);
    static struct bucket_t * endMarker(struct bucket_t *b, uint32_t cap);
    void bad_cache(id receiver, SEL sel) __attribute__((noreturn, cold));

public:
    // The following four fields are public for objcdt's use only.
    // objcdt reaches into fields while the process is suspended
    // hence doesn't care for locks and pesky little details like this
    // and can safely use these.
    unsigned capacity() const;
    struct bucket_t *buckets() const;
    Class cls() const;

    mask_t occupied() const;
    void initializeToEmpty();

    void insert(SEL sel, IMP imp, id receiver);
};

成员变量

// unitptr_t就是unsigned long类型，explicit_atomic是为了保证在操作_bucketsAndMaybeMask时是线程安全的，占8字节
explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
// 联合体占8字节
    union {
        struct {
        // mask_t就是uint32_t，占4字节
            explicit_atomic<mask_t>    _maybeMask;
// 64位系统
#if __LP64__   
            // uint16_t 2字节
            uint16_t                   _flags;
#endif
            uint16_t                   _occupied;
        };
        // 结构体指针占8字节
        explicit_atomic<preopt_cache_t *> _originalPreoptCache;
    };

cache_t的大小为16字节。

insert

通过查看源码，发现有一个void insert(SEL sel, IMP imp, id receiver);函数，cache_t是一个缓存，那么这个insert函数是不是就是给缓存插入数据呢？那就接着看insert函数的实现。

void cache_t::insert(SEL sel, IMP imp, id receiver)
{
    
    runtimeLock.assertLocked();

    // Never cache before +initialize is done
    if (slowpath(!cls()->isInitialized())) {
        return;
    }

    if (isConstantOptimizedCache()) {
        _objc_fatal("cache_t::insert() called with a preoptimized cache for %s",
                    cls()->nameForLogging());
    }

#if DEBUG_TASK_THREADS
    return _collecting_in_critical();
#else
#if CONFIG_USE_CACHE_LOCK
    mutex_locker_t lock(cacheUpdateLock);
#endif

    ASSERT(sel != 0 && cls()->isInitialized());

    // Use the cache as-is if until we exceed our expected fill ratio.
    mask_t newOccupied = occupied() + 1;
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE;
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
        // Cache is less than 3/4 or 7/8 full. Use it as-is.
    }
#if CACHE_ALLOW_FULL_UTILIZATION
    else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
        // Allow 100% cache utilization for small buckets. Use it as-is.
    }
#endif
    else {
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true);
    }

    bucket_t *b = buckets();
    mask_t m = capacity - 1;
    mask_t begin = cache_hash(sel, m);
    mask_t i = begin;

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot.
    do {
        if (fastpath(b[i].sel() == 0)) {
            incrementOccupied();
            b[i].set<Atomic, Encoded>(b, sel, imp, cls());
            return;
        }
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));

    bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}

在inser函数中，我们发现了这样一段代码：

bucket_t *b = buckets();
mask_t m = capacity - 1;
mask_t begin = cache_hash(sel, m);
mask_t i = begin;

bucket_t

接着查看bucket_t的源码

// 省略了部分代码
struct bucket_t {
private:
    // IMP-first is better for arm64e ptrauth and no worse for arm64.
    // SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
    explicit_atomic<uintptr_t> _imp;
    explicit_atomic<SEL> _sel;
#else
    explicit_atomic<SEL> _sel;
    explicit_atomic<uintptr_t> _imp;
#endif

    // Sign newImp, with &_imp, newSel, and cls as modifiers.
    uintptr_t encodeImp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, IMP newImp, UNUSED_WITHOUT_PTRAUTH SEL newSel, Class cls) 

public:
    inline SEL sel() const { return _sel.load(memory_order_relaxed); }

    template <Atomicity, IMPEncoding>
    void set(bucket_t *base, SEL newSel, IMP newImp, Class cls);
};

// 省略了部分代码
// bucket_t中set函数实现
template<Atomicity atomicity, IMPEncoding impEncoding>
void bucket_t::set(bucket_t *base, SEL newSel, IMP newImp, Class cls)
{   
    uintptr_t newIMP = (impEncoding == Encoded
                        ? encodeImp(base, newImp, newSel, cls)
                        : (uintptr_t)newImp);
    if (atomicity == Atomic) {
        _imp.store(newIMP, memory_order_relaxed);
        if (_sel.load(memory_order_relaxed) != newSel) {
#ifdef __arm__
            mega_barrier();
            _sel.store(newSel, memory_order_relaxed);
#elif __x86_64__ || __i386__
            _sel.store(newSel, memory_order_release);
#else
#error Don't know how to do bucket_t::set on this architecture.
#endif
        }
    } else {
        _imp.store(newIMP, memory_order_relaxed);
        _sel.store(newSel, memory_order_relaxed);
    }
}

发现了bucket_t结构体中存储了imp和sel，并且在它的set函数的实现也是有一个模板函数template<Atomicity atomicity, IMPEncoding impEncoding>。这个模板函数有两个参数，atomicity是代表是否为原子操作，impEncoding表示imp是否需要编码。并且这个函数实际的作用就是_imp.store(newIMP, memory_order_relaxed);和_sel.store(newSel, memory_order_relaxed);，将imp和sel保存起来(内存中)。

知道了bucket_t的作用以后，我们再回过头来看刚才的insert函数中的那段代码，bucket_t *b = buckets();就是一个用于存放方法sel和imp的数组指针。

capacity

m的值是capacity-1，根据insert的源码unsigned oldCapacity = capacity(), capacity = oldCapacity;，capacity最终是由capacity()函数决定的，查看源码之~

unsigned cache_t::capacity() const
{
    return mask() ? mask()+1 : 0; 
}

mask_t cache_t::mask() const
{
    return _maybeMask.load(memory_order_relaxed);
}

capacity函数里面调用了mask函数，mask函数的实现就是读取了_maybeMask的值并返回，_maybeMask就是bucket_t的长度-1。mask()函数如果有返回值，那么capacity就是_maybeMask+1，就是bucket_t的长度。

cache_hash

接下来查看cache_hash的源码

static inline mask_t cache_hash(SEL sel, mask_t mask) 
{
    uintptr_t value = (uintptr_t)sel;
#if CONFIG_USE_PREOPT_CACHES
    value ^= value >> 7;
#endif
    return (mask_t)(value & mask);
}

它首先将传入的sel变成一个uintptr_t(即unsiged long)类型的数字，并且将它&上mask，这样子得到的结果最大不会超过mask。由前面可知，传入的这个mask(即m)是bucket_t的长度，这里return的结果就是sel在cache中的位置，也就是sel要放在bucket_t数组中的下标。（具体逻辑参照数据结构）

定义的变量搞清楚之后，接下来查看循环的逻辑。

insert中的do-while

do {
    // 判断当前sel通过hash出来的下标i，在bucket_t中是否为空，如果为空就没有出现哈希冲突，可以直接调用set来插入。
    if (fastpath(b[i].sel() == 0)) {
        incrementOccupied();
        b[i].set<Atomic, Encoded>(b, sel, imp, cls());
        return;
    }
    // 如果i里存的sel就是当前的sel则什么都不做
    if (b[i].sel() == sel) {
        // The entry was added to the cache by some other thread
        // before we grabbed the cacheUpdateLock.
        return;
    }
// 当出现哈希冲突时，调用cache_next,cache_next就是将i+1然后再&上mask，即将下标后移一位并且保证其不会数组越界。直到找到位置可以插入sel或者回到了begin的位置。
} while (fastpath((i = cache_next(i, m)) != begin));

#if CACHE_END_MARKER
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return (i+1) & mask;
}
#elif __arm64__
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return i ? i-1 : mask;
}

控制台验证

看完源码以后，我们还是用控制台来打印，看看cache_t里的数据。

通过打印发现，cache_t的成员变量中似乎并没有什么有用的信息。在之前查看cache_t的源码中，我们有看到struct bucket_t *buckets() const;这行代码，有一个buckets()函数返回了一个bucket_t类型的数组指针。那我们就尝试通过调用这个函数试试看。

根据打印的结果，bucket_t中的sel是""的，，imp是一个看不懂的数字，在bucket_t中，我们又发现了获取sel的函数inline SEL sel() const { return _sel.load(memory_order_relaxed); }和获取imp的函数inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const。接下来就调用这两个函数看看结果。

终于，我们得到了这个bucket_t对象的sel是class，那就接着尝试通过内存平移来获取数组中的其他元素，看看是否能有其他的收获。

通过多次平移，我们也仅仅发现在数组中还有一个respondsToSelector:，并没有发现我们在类中定义的一堆方法。但是转念一想，cache既然是缓存，那么有被调用过的方法才会缓存起来，我们自己定义的方法并没有调用，所以不在缓存中也合情合理，但是class方法和responseToSelector又是什么时候被调用的呢？这个疑问我们暂且放下，首先先来验证一下调用自己定义的方法，看看是否有被加到缓存中来。

首先我们先调用，再通过刚才的方法在控制台中查看。

int main(int argc, const char * argv[]) {
    MyObject *mo = [MyObject alloc];
    [mo method1];
    return 0;
}

再次查看控制台的打印，发现不仅没有找到我们调用的method1方法，连原先的respondsToSelector方法也消失了，这又是为什么呢？要解答这个问题，首先我们要先了解一下cache的扩容。

扩容

我们接着查看insert函数中的相关代码。

// occupied()返回的是成员变量occupied，当首次进入到insert函数中时，occupied的值为0，这个newOccupied的值就为1。
mask_t newOccupied = occupied() + 1;
// capacity函数也是取读取了成员变量maybeMask的值，如果maybeMask有值，就加1并返回，如果没有值，就返回0，第一次进来就是0
unsigned oldCapacity = capacity(), capacity = oldCapacity;
/**
 *    在arm64下开辟一个长度为2的桶子，其他架构下开辟长度为4的桶子
 */
// 判断cache是否为空，如果是第一次进来，就是空的
if (slowpath(isConstantEmptyCache())) {
    // Cache is read-only. Replace it.
    // 初始化capacity的值，即散列表的长度。根据源码(源码在这段源码后面)可知，在arm64下，capacity初始化为2，非arm64，初始化为4
    if (!capacity) capacity = INIT_CACHE_SIZE;
    // 根据capacity重新创建散列表(源码在后)
    reallocate(oldCapacity, capacity, /* freeOld */false);
}
/**
 *    第一次进来时newOccupied为1，+
 *    arm64下缓存大小小于桶子的7/8或非arm64下缓存大小小于桶子的3/4，啥也不干
 */
else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
    // Cache is less than 3/4 or 7/8 full. Use it as-is.
}
// 只有在64位arm64下，条件成立，判断这个else if语句
#if CACHE_ALLOW_FULL_UTILIZATION
    /**
     *    FULL_UTILIZATION_CACHE_SIZE（源码在INIT_CACHE_SIZE中）为8
     *    在arm64下，桶子大小<=8并且缓存方法的数量小于等于buckets的容量时啥也不干，即在arm64架构下，buckets长度小于等于8时，只有buckets装满了才扩容
     */
    else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
        // Allow 100% cache utilization for small buckets. Use it as-is.
    }
#endif
    // 前面条件都不满足，说明需要扩容了
    else {
        // capacity进行两倍的扩容
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        // 扩容的大小不大于MAX_CACHE_SIZE（源码在INIT_CACHE_SIZE中），MAX_CACHE_SIZE的值就是1左移16位，
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        // capacity翻倍以后，重新开辟桶子空间将旧桶子再给新桶子，然后释放旧桶子
        reallocate(oldCapacity, capacity, true);
    }

INIT_CACHE_SIZE源码

enum {
/**
 *    CACHE_END_MARKER如果是非arm64值为1，如果是arm64值为0，那么如果是非arm64或是arm64非64位时，INIT_CACHE_SIZE_LOG2为2，其他情况值为1
 */
#if CACHE_END_MARKER || (__arm64__ && !__LP64__)
    // When we have a cache end marker it fills a bucket slot, so having a
    // initial cache size of 2 buckets would not be efficient when one of the
    // slots is always filled with the end marker. So start with a cache size
    // 4 buckets.
    INIT_CACHE_SIZE_LOG2 = 2,
#else
    // Allow an initial bucket size of 2 buckets, since a large number of
    // classes, especially metaclasses, have very few imps, and we support
    // the ability to fill 100% of the cache before resizing.
    INIT_CACHE_SIZE_LOG2 = 1,
#endif
    /**
     *    在arm64下，1左移1位=2，非arm64下1左移2位=4
     */
    INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2),
    MAX_CACHE_SIZE_LOG2  = 16,
    MAX_CACHE_SIZE       = (1 << MAX_CACHE_SIZE_LOG2),
    FULL_UTILIZATION_CACHE_SIZE_LOG2 = 3,
    // 1左移3位=8
    FULL_UTILIZATION_CACHE_SIZE = (1 << FULL_UTILIZATION_CACHE_SIZE_LOG2),
};

CACHE_END_MARKER和cache_fill_ratio源码

/**
 *    CACHE_END_MARKER 在arm、x86_64、i386下值为1，cache_fill_ratio是capacity的3/4
 *    在arm64并且非64位下值为0，cache_fill_ratio是capacity的3/4
 *    在arm64并且64位下值为0，cache_fill_ratio是capacity的7/8，CACHE_ALLOW_FULL_UTILIZATION为1，其他无值
 */
#if __arm__  ||  __x86_64__  ||  __i386__

// objc_msgSend has few registers available.
// Cache scan increments and wraps at special end-marking bucket.
#define CACHE_END_MARKER 1

// Historical fill ratio of 75% (since the new objc runtime was introduced).
static inline mask_t cache_fill_ratio(mask_t capacity) {
    return capacity * 3 / 4;
}

#elif __arm64__ && !__LP64__

// objc_msgSend has lots of registers available.
// Cache scan decrements. No end marker needed.
#define CACHE_END_MARKER 0

// Historical fill ratio of 75% (since the new objc runtime was introduced).
static inline mask_t cache_fill_ratio(mask_t capacity) {
    return capacity * 3 / 4;
}

#elif __arm64__ && __LP64__

// objc_msgSend has lots of registers available.
// Cache scan decrements. No end marker needed.
#define CACHE_END_MARKER 0

// Allow 87.5% fill ratio in the fast path for all cache sizes.
// Increasing the cache fill ratio reduces the fragmentation and wasted space
// in imp-caches at the cost of potentially increasing the average lookup of
// a selector in imp-caches by increasing collision chains. Another potential
// change is that cache table resizes / resets happen at different moments.
static inline mask_t cache_fill_ratio(mask_t capacity) {
    return capacity * 7 / 8;
}
// Allow 100% cache utilization for smaller cache sizes. This has the same
// advantages and disadvantages as the fill ratio. A very large percentage
// of caches end up with very few entries and the worst case of collision
// chains in small tables is relatively small.
// NOTE: objc_msgSend properly handles a cache lookup with a full cache.
#define CACHE_ALLOW_FULL_UTILIZATION 1

#else
#error unknown architecture
#endif

reallocate源码

ALWAYS_INLINE
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    // 调用buckets()获取旧的桶子
    bucket_t *oldBuckets = buckets();
    // 创建大小为newCapacity的新桶子
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
    // 对cache_t的成员变量进行赋值，在arm64下什么操作都不做
    setBucketsAndMask(newBuckets, newCapacity - 1);
    // 判断是否需要释放旧桶子，第一次进来时是false，不需要释放
    if (freeOld) {
        collect_free(oldBuckets, oldCapacity);
    }
}

消失的方法缓存

刚才我们还留下了一个问题，class和respondsToSelector是什么时候调用的呢？我们在insert方法中加一个log，再插入方法时打印出添加的方法。

printf("%s", sel_getName(sel));

在调用method1时，准确打印了method1，在我们使用x/4gx mo.class指令来打印时，调用了responderToSelector和class，由此可知，这两个方法的调用class时加入到cache中的。

那为什么调用了method1之后，在cache中找不到method1和respondsToSelector呢？

由我们刚才的探索可知，在模拟器x86_64架构下，buckets初始化为4个长度，并且在在容量到达3/4时，会进行扩容。而在刚才的判断语句else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity)))中，CACHE_END_MARKER在x86_64下值为1，即(当前已经缓存的方法个数+1) <= buckets的3/4。

所以当方法缓存中已经有method1和respondsToSelector时，此时调用class方法，根据mask_t newOccupied = occupied() + 1;，occupied()函数返回的是当前缓存的方法数，所以newOccupied的值为3，CACHE_END_MARKER值为1，二者相加值为4，已经大于buckets长度的3/4，此时需要进行扩容。扩容时生成了新的buckets，再把class方法加到新buckets中，所以刚才只打印出了class方法，并没有method1和respondsToSelector。

总结

cache_t存放的是方法的缓存，当某个方法被调用时，cache_t中的insert函数就会被调用，把方法加入到buckets缓存中。buckets的默认大小在arm64下为2，在x86_64下为4。

当buckets不够容纳缓存的方法时，会对buckets进行扩容。

在arm64下当容器的⻓度⼩于8时，是满容量了才扩容。当容器的⻓度⼤于8时，是7/8扩容。也就是说当容器的⻓度为8时，容器可以存储8个⽅法。当容器的⻓度为16时，当第15个⽅法需要存储进来的时候，容器就要扩容了。

而在x86_64下，是3/4扩容。这⾥的3/4扩容指的是：如果容器的⻓度为4，当第3个数据需要存储的时候，就要扩容了。如果容器的⻓度为8，当第6个数据需要存储的时候，就要扩容了。也就是说容器只能存储容器⻓度的3/4减1个⽅法。

并且在扩容之后，会释放先前创建的buckets，也就意味着，之前缓存的方法也不复存在。

cache_t详解