iOS-cache源码 objc_class结构体 cache_t结构体整体结构流程分析部分重要变量 _bucke

源码

objc_class结构体

struct objc_class : objc_object {
    // Class ISA;
    Class superclass;
    cache_t cache;             // formerly cache pointer and vtable
    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags
    class_rw_t *data() const {
        return bits.data();
    }
 ......
}

cache_t结构体

struct cache_t {
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
    explicit_atomic<struct bucket_t *> _buckets;
    explicit_atomic<mask_t> _mask;
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
    explicit_atomic<uintptr_t> _maskAndBuckets;
    mask_t _mask_unused;

    // How much the mask is shifted by.
    static constexpr uintptr_t maskShift = 48;

    // Additional bits after the mask which must be zero. msgSend
    // takes advantage of these additional bits to construct the value
    // `mask << 4` from `_maskAndBuckets` in a single instruction.
    static constexpr uintptr_t maskZeroBits = 4;

    // The largest mask value we can store.
    static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;

    // The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;

    // Ensure we have enough bits for the buckets pointer.
    static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
    // _maskAndBuckets stores the mask shift in the low 4 bits, and
    // the buckets pointer in the remainder of the value. The mask
    // shift is the value where (0xffff >> shift) produces the correct
    // mask. This is equal to 16 - log2(cache_size).
    explicit_atomic<uintptr_t> _maskAndBuckets;
    mask_t _mask_unused;

    static constexpr uintptr_t maskBits = 4;
    static constexpr uintptr_t maskMask = (1 << maskBits) - 1;
    static constexpr uintptr_t bucketsMask = ~maskMask;
#else
#error Unknown cache mask storage type.
#endif
#if __LP64__
    uint16_t _flags;
#endif
    uint16_t _occupied;

public:
    static bucket_t *emptyBuckets();

    struct bucket_t *buckets();
    mask_t mask();
    mask_t occupied();
    void incrementOccupied();
    void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);
    void initializeToEmpty();

    unsigned capacity();
    bool isConstantEmptyCache();
    bool canBeFreed();
......
};

整体结构

流程分析

部分重要变量

_buckets
_buckets是struct bucket_t类型的散列表，方法的缓存（以散列表的形式存储bucket_t）

散列表（哈希表）是以空间换时间，例如：刚开始为cache_t分配一定的内存如10, 当内存不够用时内存扩大2倍, 依次类推。左边是索引, 右边是bucket_t结构体

_mask
_mask是指掩码数据，用于在哈希算法或者哈希冲突算法中计算哈希下标，其中mask等于capacity- 1（_buckets的数组长度-1（容量的临界值)）

为什么按位&_mask?
按位与可保证得到的值<=_mask,这样就不会超出分配的空间

_occupied
表示散列表中已占用的容量

散列表存储原理

初始时, 为对象的cach_t分配一个空间, 值为NULL
调用方法时, 为对象发送一个 SEL消息, 如 @selector(test), 将这个方法缓存
系统用SEL与_mask作按位与计算:@selector(test) & _mask,假设其值==2,
检查索引2对应的空间是否为NULL,如果为NULL就将这个bucket_t缓存在索引2对应空间
如果不为空,索引减1,再检查是否为NULL, 依次类推.如果索引<0,则使索引 =_mask-1,直至找到索引对应空间为NULL,再缓存

部分重要函数

incrementOccupied()

void cache_t::incrementOccupied() 
{
    _occupied++;
}

仅且会在void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)函数中调用\

void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)

理解为cache_t的插入，而cache中存储的就是sel-imp，所以cache的原理从insert方法开始

ALWAYS_INLINE
void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif
    ASSERT(sel != 0 && cls->isInitialized());
    // Use the cache as-is if it is less than 3/4 full
    mask_t newOccupied = occupied() + 1;
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE;
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    else if (fastpath(newOccupied <= capacity / 4 * 3)) {
        // Cache is less than 3/4 full. Use it as-is.
    }
    else {
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true);
    }
    bucket_t *b = buckets();
    mask_t m = capacity - 1;
    mask_t begin = cache_hash(sel, m);
    mask_t i = begin;
    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot because the
    // minimum size is 4 and we resized at 3/4 full.
    do {
        if (fastpath(b[i].sel() == 0)) {
            incrementOccupied();
            b[i].set<Atomic, Encoded>(sel, imp, cls);
            return;
        }
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));

    cache_t::bad_cache(receiver, (SEL)sel, cls);
}

主要分为以下几部分
1、计算出当前的缓存占用量
2、根据缓存占用量判断执行的操作
3、针对需要存储的bucket进行内部imp和sel赋值

计算出当前的缓存占用量

根据occupied的值计算出当前的缓存占用量，当属性未赋值及无方法调用时，此时的occupied()为0，而newOccupied为1，如下所示

mask_t newOccupied = occupied() + 1;

alloc申请空间时，此时的对象已经创建，如果再调用init方法，occupied也会+1
当有属性赋值时，会隐式调用set方法，occupied也会增加，即有几个属性赋值，occupied就会在原有的基础上加几个
当有方法调用时，occupied也会增加，即有几次调用，occupied就会在原有的基础上加几个

根据缓存占用量判断执行的操作

如果是第一次创建，则默认开辟4个

if (slowpath(isConstantEmptyCache())) {
    // Cache is read-only. Replace it.
    // INIT_CACHE_SIZE = (1<<2) = 4
    if (!capacity) capacity = INIT_CACHE_SIZE;
    reallocate(oldCapacity, capacity, /* freeOld */false);
}

如果缓存占用量小于等于3/4，则不作任何处理

else if (fastpath(newOccupied <= capacity / 4 * 3)) {
    // Cache is less than 3/4 full. Use it as-is.
}

如果缓存占用量超过3/4，则需要进行两倍扩容以及重新开辟空间

else {
    capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
    if (capacity > MAX_CACHE_SIZE) {
        capacity = MAX_CACHE_SIZE;
    }
    reallocate(oldCapacity, capacity, true);
}

reallocate方法：开辟空间

ALWAYS_INLINE
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);

    setBucketsAndMask(newBuckets, newCapacity - 1);

    if (freeOld) {
        cache_collect_free(oldBuckets, oldCapacity);
    }
}

allocateBuckets方法：向系统申请开辟内存，即开辟bucket，此时的bucket只是一个临时变量
setBucketsAndMask方法：将临时的bucket存入缓存中
cache_collect_free方法：清理之前的缓存

static void cache_collect_free(bucket_t *data, mask_t capacity)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif

    if (PrintCaches) recordDeadCache(capacity);

    _garbage_make_room ();
    garbage_byte_size += cache_t::bytesForCapacity(capacity);
    garbage_refs[garbage_count++] = data;
    cache_collect(false);
}

如果是第一次，需要分配回收空间
如果不是第一次，则将内存段加大，即原有内存*2
记录存储这次的bucket
cache_collect方法：垃圾回收，清理旧的bucket

针对需要存储的bucket进行内部imp和sel赋值

如果哈希下标的位置未存储sel，即该下标位置获取sel等于0，此时将sel-imp存储进去，并将occupied占用大小加1
如果当前哈希下标存储的sel等于即将插入的sel，则直接返回
如果当前哈希下标存储的sel不等于即将插入的sel，则重新经过cache_next方法即哈希冲突算法，重新进行哈希计算，得到新的下标，再去对比进行存储

cache_hash哈希算法

static inline mask_t cache_hash(SEL sel, mask_t mask) 
{
    return (mask_t)(uintptr_t)sel & mask;
}

cache_next哈希冲突算法
为什么有-1的算法，也是因为按位与，因为不同的值&_mask，可能结果相同。如果已经被占了就-1

static inline mask_t cache_next(mask_t i, mask_t mask) {
    return i ? i-1 : mask;
}

总结

1、_mask是指掩码数据，用于在哈希算法或者哈希冲突算法中计算哈希下标，其中mask等于capacity - 1
2、_occupied表示哈希表中sel-imp的占用大小(即可以理解为分配的内存中已经存储了sel-imp的的个数)

init会导致occupied变化
属性赋值，也会隐式调用set方法，导致occupied变化
方法调用，会导致occupied变化 3、cache初始化时，分配的空间是4个，随着方法调用的增多，当存储的sel-imp个数，即newOccupied+CACHE_END_MARKER（等于1）的和超过总容量的3/4,例如有4个时，当occupied等于2时，就需要对cache的内存进行两倍扩容
4、在哈希这种数据结构里面，有一个概念用来表示空位的多少叫做装载因子——装载因子越大，说明空闲位置越少，冲突越多，散列表的性能会下降。负载因子是3/4的时候，空间利用率比较高，而且避免了相当多的Hash冲突，提升了空间效率
5、sel-imp的存储是通过哈希算法计算下标的，其计算的下标有可能已经存储了sel，所以又需要通过哈希冲突算法重新计算哈希下标，所以导致下标是随机的，并不是固定的。