cache_t详解

315 阅读10分钟

在探索objc_class时,还有一个遗珠cache_t cache还没有探索,现在就开始对它一探究竟吧。

cache_t源码

老规矩,先来看看cache_t的源码(省略部分代码)。

struct cache_t {
private:
    explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
    union {
        struct {
            explicit_atomic<mask_t>    _maybeMask;
#if __LP64__
            uint16_t                   _flags;
#endif
            uint16_t                   _occupied;
        };
        explicit_atomic<preopt_cache_t *> _originalPreoptCache;
    };

    bool isConstantEmptyCache() const;
    bool canBeFreed() const;
    mask_t mask() const;

#if CONFIG_USE_PREOPT_CACHES
    void initializeToPreoptCacheInDisguise(const preopt_cache_t *cache);
    const preopt_cache_t *disguised_preopt_cache() const;
#endif

    void incrementOccupied();
    void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);

    void reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld);
    void collect_free(bucket_t *oldBuckets, mask_t oldCapacity);

    static bucket_t *emptyBuckets();
    static bucket_t *allocateBuckets(mask_t newCapacity);
    static bucket_t *emptyBucketsForCapacity(mask_t capacity, bool allocate = true);
    static struct bucket_t * endMarker(struct bucket_t *b, uint32_t cap);
    void bad_cache(id receiver, SEL sel) __attribute__((noreturn, cold));

public:
    // The following four fields are public for objcdt's use only.
    // objcdt reaches into fields while the process is suspended
    // hence doesn't care for locks and pesky little details like this
    // and can safely use these.
    unsigned capacity() const;
    struct bucket_t *buckets() const;
    Class cls() const;

    mask_t occupied() const;
    void initializeToEmpty();

    void insert(SEL sel, IMP imp, id receiver);
};

成员变量

// unitptr_t就是unsigned long类型,explicit_atomic是为了保证在操作_bucketsAndMaybeMask时是线程安全的,占8字节
explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
// 联合体占8字节
    union {
        struct {
        // mask_t就是uint32_t,占4字节
            explicit_atomic<mask_t>    _maybeMask;
// 64位系统
#if __LP64__   
            // uint16_t 2字节
            uint16_t                   _flags;
#endif
            uint16_t                   _occupied;
        };
        // 结构体指针占8字节
        explicit_atomic<preopt_cache_t *> _originalPreoptCache;
    };

cache_t的大小为16字节。

insert

通过查看源码,发现有一个void insert(SEL sel, IMP imp, id receiver);函数,cache_t是一个缓存,那么这个insert函数是不是就是给缓存插入数据呢?那就接着看insert函数的实现。

void cache_t::insert(SEL sel, IMP imp, id receiver)
{
    
    runtimeLock.assertLocked();

    // Never cache before +initialize is done
    if (slowpath(!cls()->isInitialized())) {
        return;
    }

    if (isConstantOptimizedCache()) {
        _objc_fatal("cache_t::insert() called with a preoptimized cache for %s",
                    cls()->nameForLogging());
    }

#if DEBUG_TASK_THREADS
    return _collecting_in_critical();
#else
#if CONFIG_USE_CACHE_LOCK
    mutex_locker_t lock(cacheUpdateLock);
#endif

    ASSERT(sel != 0 && cls()->isInitialized());

    // Use the cache as-is if until we exceed our expected fill ratio.
    mask_t newOccupied = occupied() + 1;
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE;
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
        // Cache is less than 3/4 or 7/8 full. Use it as-is.
    }
#if CACHE_ALLOW_FULL_UTILIZATION
    else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
        // Allow 100% cache utilization for small buckets. Use it as-is.
    }
#endif
    else {
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true);
    }

    bucket_t *b = buckets();
    mask_t m = capacity - 1;
    mask_t begin = cache_hash(sel, m);
    mask_t i = begin;

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot.
    do {
        if (fastpath(b[i].sel() == 0)) {
            incrementOccupied();
            b[i].set<Atomic, Encoded>(b, sel, imp, cls());
            return;
        }
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));

    bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}

inser函数中,我们发现了这样一段代码:

bucket_t *b = buckets();
mask_t m = capacity - 1;
mask_t begin = cache_hash(sel, m);
mask_t i = begin;

bucket_t

接着查看bucket_t的源码

// 省略了部分代码
struct bucket_t {
private:
    // IMP-first is better for arm64e ptrauth and no worse for arm64.
    // SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
    explicit_atomic<uintptr_t> _imp;
    explicit_atomic<SEL> _sel;
#else
    explicit_atomic<SEL> _sel;
    explicit_atomic<uintptr_t> _imp;
#endif

    // Sign newImp, with &_imp, newSel, and cls as modifiers.
    uintptr_t encodeImp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, IMP newImp, UNUSED_WITHOUT_PTRAUTH SEL newSel, Class cls) 

public:
    inline SEL sel() const { return _sel.load(memory_order_relaxed); }

    template <Atomicity, IMPEncoding>
    void set(bucket_t *base, SEL newSel, IMP newImp, Class cls);
};

// 省略了部分代码
// bucket_t中set函数实现
template<Atomicity atomicity, IMPEncoding impEncoding>
void bucket_t::set(bucket_t *base, SEL newSel, IMP newImp, Class cls)
{   
    uintptr_t newIMP = (impEncoding == Encoded
                        ? encodeImp(base, newImp, newSel, cls)
                        : (uintptr_t)newImp);
    if (atomicity == Atomic) {
        _imp.store(newIMP, memory_order_relaxed);
        if (_sel.load(memory_order_relaxed) != newSel) {
#ifdef __arm__
            mega_barrier();
            _sel.store(newSel, memory_order_relaxed);
#elif __x86_64__ || __i386__
            _sel.store(newSel, memory_order_release);
#else
#error Don't know how to do bucket_t::set on this architecture.
#endif
        }
    } else {
        _imp.store(newIMP, memory_order_relaxed);
        _sel.store(newSel, memory_order_relaxed);
    }
}

发现了bucket_t结构体中存储了impsel,并且在它的set函数的实现也是有一个模板函数template<Atomicity atomicity, IMPEncoding impEncoding>。这个模板函数有两个参数,atomicity是代表是否为原子操作,impEncoding表示imp是否需要编码。并且这个函数实际的作用就是_imp.store(newIMP, memory_order_relaxed);_sel.store(newSel, memory_order_relaxed);,将imp和sel保存起来(内存中)。

知道了bucket_t的作用以后,我们再回过头来看刚才的insert函数中的那段代码,bucket_t *b = buckets();就是一个用于存放方法sel和imp的数组指针。

capacity

m的值是capacity-1,根据insert的源码unsigned oldCapacity = capacity(), capacity = oldCapacity;capacity最终是由capacity()函数决定的,查看源码之~

unsigned cache_t::capacity() const
{
    return mask() ? mask()+1 : 0; 
}

mask_t cache_t::mask() const
{
    return _maybeMask.load(memory_order_relaxed);
}

capacity函数里面调用了mask函数,mask函数的实现就是读取了_maybeMask的值并返回,_maybeMask就是bucket_t的长度-1。mask()函数如果有返回值,那么capacity就是_maybeMask+1,就是bucket_t的长度。

cache_hash

接下来查看cache_hash的源码

static inline mask_t cache_hash(SEL sel, mask_t mask) 
{
    uintptr_t value = (uintptr_t)sel;
#if CONFIG_USE_PREOPT_CACHES
    value ^= value >> 7;
#endif
    return (mask_t)(value & mask);
}

它首先将传入的sel变成一个uintptr_t(即unsiged long)类型的数字,并且将它&上mask,这样子得到的结果最大不会超过mask。由前面可知,传入的这个mask(即m)bucket_t的长度,这里return的结果就是sel在cache中的位置,也就是sel要放在bucket_t数组中的下标。(具体逻辑参照数据结构)

定义的变量搞清楚之后,接下来查看循环的逻辑。

insert中的do-while

do {
    // 判断当前sel通过hash出来的下标i,在bucket_t中是否为空,如果为空就没有出现哈希冲突,可以直接调用set来插入。
    if (fastpath(b[i].sel() == 0)) {
        incrementOccupied();
        b[i].set<Atomic, Encoded>(b, sel, imp, cls());
        return;
    }
    // 如果i里存的sel就是当前的sel则什么都不做
    if (b[i].sel() == sel) {
        // The entry was added to the cache by some other thread
        // before we grabbed the cacheUpdateLock.
        return;
    }
// 当出现哈希冲突时,调用cache_next,cache_next就是将i+1然后再&上mask,即将下标后移一位并且保证其不会数组越界。直到找到位置可以插入sel或者回到了begin的位置。
} while (fastpath((i = cache_next(i, m)) != begin));

#if CACHE_END_MARKER
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return (i+1) & mask;
}
#elif __arm64__
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return i ? i-1 : mask;
}

控制台验证

看完源码以后,我们还是用控制台来打印,看看cache_t里的数据。

image.png

通过打印发现,cache_t的成员变量中似乎并没有什么有用的信息。在之前查看cache_t的源码中,我们有看到struct bucket_t *buckets() const;这行代码,有一个buckets()函数返回了一个bucket_t类型的数组指针。那我们就尝试通过调用这个函数试试看。

image.png

根据打印的结果,bucket_t中的sel是""的,,imp是一个看不懂的数字,在bucket_t中,我们又发现了获取sel的函数inline SEL sel() const { return _sel.load(memory_order_relaxed); }和获取imp的函数inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const。接下来就调用这两个函数看看结果。

image.png 终于,我们得到了这个bucket_t对象的sel是class,那就接着尝试通过内存平移来获取数组中的其他元素,看看是否能有其他的收获。

image.png

通过多次平移,我们也仅仅发现在数组中还有一个respondsToSelector:,并没有发现我们在类中定义的一堆方法。但是转念一想,cache既然是缓存,那么有被调用过的方法才会缓存起来,我们自己定义的方法并没有调用,所以不在缓存中也合情合理,但是class方法和responseToSelector又是什么时候被调用的呢?这个疑问我们暂且放下,首先先来验证一下调用自己定义的方法,看看是否有被加到缓存中来。

首先我们先调用,再通过刚才的方法在控制台中查看。

int main(int argc, const char * argv[]) {
    MyObject *mo = [MyObject alloc];
    [mo method1];
    return 0;
}

image.png 再次查看控制台的打印,发现不仅没有找到我们调用的method1方法,连原先的respondsToSelector方法也消失了,这又是为什么呢?要解答这个问题,首先我们要先了解一下cache的扩容。

扩容

我们接着查看insert函数中的相关代码。

// occupied()返回的是成员变量occupied,当首次进入到insert函数中时,occupied的值为0,这个newOccupied的值就为1。
mask_t newOccupied = occupied() + 1;
// capacity函数也是取读取了成员变量maybeMask的值,如果maybeMask有值,就加1并返回,如果没有值,就返回0,第一次进来就是0
unsigned oldCapacity = capacity(), capacity = oldCapacity;
/**
 *    在arm64下开辟一个长度为2的桶子,其他架构下开辟长度为4的桶子
 */
// 判断cache是否为空,如果是第一次进来,就是空的
if (slowpath(isConstantEmptyCache())) {
    // Cache is read-only. Replace it.
    // 初始化capacity的值,即散列表的长度。根据源码(源码在这段源码后面)可知,在arm64下,capacity初始化为2,非arm64,初始化为4
    if (!capacity) capacity = INIT_CACHE_SIZE;
    // 根据capacity重新创建散列表(源码在后)
    reallocate(oldCapacity, capacity, /* freeOld */false);
}
/**
 *    第一次进来时newOccupied为1,+
 *    arm64下缓存大小小于桶子的7/8或非arm64下缓存大小小于桶子的3/4,啥也不干
 */
else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
    // Cache is less than 3/4 or 7/8 full. Use it as-is.
}
// 只有在64位arm64下,条件成立,判断这个else if语句
#if CACHE_ALLOW_FULL_UTILIZATION
    /**
     *    FULL_UTILIZATION_CACHE_SIZE(源码在INIT_CACHE_SIZE中)为8
     *    在arm64下,桶子大小<=8并且缓存方法的数量小于等于buckets的容量时啥也不干,即在arm64架构下,buckets长度小于等于8时,只有buckets装满了才扩容
     */
    else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
        // Allow 100% cache utilization for small buckets. Use it as-is.
    }
#endif
    // 前面条件都不满足,说明需要扩容了
    else {
        // capacity进行两倍的扩容
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        // 扩容的大小不大于MAX_CACHE_SIZE(源码在INIT_CACHE_SIZE中),MAX_CACHE_SIZE的值就是1左移16位,
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        // capacity翻倍以后,重新开辟桶子空间将旧桶子再给新桶子,然后释放旧桶子
        reallocate(oldCapacity, capacity, true);
    }

INIT_CACHE_SIZE源码

enum {
/**
 *    CACHE_END_MARKER如果是非arm64值为1,如果是arm64值为0,那么如果是非arm64或是arm64非64位时,INIT_CACHE_SIZE_LOG2为2,其他情况值为1
 */
#if CACHE_END_MARKER || (__arm64__ && !__LP64__)
    // When we have a cache end marker it fills a bucket slot, so having a
    // initial cache size of 2 buckets would not be efficient when one of the
    // slots is always filled with the end marker. So start with a cache size
    // 4 buckets.
    INIT_CACHE_SIZE_LOG2 = 2,
#else
    // Allow an initial bucket size of 2 buckets, since a large number of
    // classes, especially metaclasses, have very few imps, and we support
    // the ability to fill 100% of the cache before resizing.
    INIT_CACHE_SIZE_LOG2 = 1,
#endif
    /**
     *    在arm64下,1左移1位=2,非arm64下1左移2位=4
     */
    INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2),
    MAX_CACHE_SIZE_LOG2  = 16,
    MAX_CACHE_SIZE       = (1 << MAX_CACHE_SIZE_LOG2),
    FULL_UTILIZATION_CACHE_SIZE_LOG2 = 3,
    // 1左移3位=8
    FULL_UTILIZATION_CACHE_SIZE = (1 << FULL_UTILIZATION_CACHE_SIZE_LOG2),
};

CACHE_END_MARKERcache_fill_ratio源码

/**
 *    CACHE_END_MARKER 在arm、x86_64、i386下值为1,cache_fill_ratio是capacity的3/4
 *    在arm64并且非64位下值为0,cache_fill_ratio是capacity的3/4
 *    在arm64并且64位下值为0,cache_fill_ratio是capacity的7/8,CACHE_ALLOW_FULL_UTILIZATION为1,其他无值
 */
#if __arm__  ||  __x86_64__  ||  __i386__

// objc_msgSend has few registers available.
// Cache scan increments and wraps at special end-marking bucket.
#define CACHE_END_MARKER 1

// Historical fill ratio of 75% (since the new objc runtime was introduced).
static inline mask_t cache_fill_ratio(mask_t capacity) {
    return capacity * 3 / 4;
}

#elif __arm64__ && !__LP64__

// objc_msgSend has lots of registers available.
// Cache scan decrements. No end marker needed.
#define CACHE_END_MARKER 0

// Historical fill ratio of 75% (since the new objc runtime was introduced).
static inline mask_t cache_fill_ratio(mask_t capacity) {
    return capacity * 3 / 4;
}

#elif __arm64__ && __LP64__

// objc_msgSend has lots of registers available.
// Cache scan decrements. No end marker needed.
#define CACHE_END_MARKER 0

// Allow 87.5% fill ratio in the fast path for all cache sizes.
// Increasing the cache fill ratio reduces the fragmentation and wasted space
// in imp-caches at the cost of potentially increasing the average lookup of
// a selector in imp-caches by increasing collision chains. Another potential
// change is that cache table resizes / resets happen at different moments.
static inline mask_t cache_fill_ratio(mask_t capacity) {
    return capacity * 7 / 8;
}
// Allow 100% cache utilization for smaller cache sizes. This has the same
// advantages and disadvantages as the fill ratio. A very large percentage
// of caches end up with very few entries and the worst case of collision
// chains in small tables is relatively small.
// NOTE: objc_msgSend properly handles a cache lookup with a full cache.
#define CACHE_ALLOW_FULL_UTILIZATION 1

#else
#error unknown architecture
#endif

reallocate源码

ALWAYS_INLINE
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    // 调用buckets()获取旧的桶子
    bucket_t *oldBuckets = buckets();
    // 创建大小为newCapacity的新桶子
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
    // 对cache_t的成员变量进行赋值,在arm64下什么操作都不做
    setBucketsAndMask(newBuckets, newCapacity - 1);
    // 判断是否需要释放旧桶子,第一次进来时是false,不需要释放
    if (freeOld) {
        collect_free(oldBuckets, oldCapacity);
    }
}

消失的方法缓存

刚才我们还留下了一个问题,classrespondsToSelector是什么时候调用的呢?我们在insert方法中加一个log,再插入方法时打印出添加的方法。

printf("%s", sel_getName(sel));

image.png 在调用method1时,准确打印了method1,在我们使用x/4gx mo.class指令来打印时,调用了responderToSelectorclass,由此可知,这两个方法的调用class时加入到cache中的。

那为什么调用了method1之后,在cache中找不到method1respondsToSelector呢?

由我们刚才的探索可知,在模拟器x86_64架构下,buckets初始化为4个长度,并且在在容量到达3/4时,会进行扩容。而在刚才的判断语句else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity)))中,CACHE_END_MARKER在x86_64下值为1,即(当前已经缓存的方法个数+1) <= buckets的3/4。

所以当方法缓存中已经有method1respondsToSelector时,此时调用class方法,根据mask_t newOccupied = occupied() + 1;occupied()函数返回的是当前缓存的方法数,所以newOccupied的值为3,CACHE_END_MARKER值为1,二者相加值为4,已经大于buckets长度的3/4,此时需要进行扩容。扩容时生成了新的buckets,再把class方法加到新buckets中,所以刚才只打印出了class方法,并没有method1respondsToSelector

总结

cache_t存放的是方法的缓存,当某个方法被调用时,cache_t中的insert函数就会被调用,把方法加入到buckets缓存中。buckets的默认大小在arm64下为2,在x86_64下为4。

当buckets不够容纳缓存的方法时,会对buckets进行扩容。

在arm64下当容器的⻓度⼩于8时,是满容量了才扩容。当容器的⻓度⼤于8时,是7/8扩容。也就是说当容器的⻓度为8时,容器可以存储8个⽅法。当容器的⻓度为16时,当第15个⽅法需要存储进来的时候,容器就要扩容了。

而在x86_64下,是3/4扩容。这⾥的3/4扩容指的是:如果容器的⻓度为4,当第3个数据需要存储的时候,就要扩容了。如果容器的⻓度为8,当第6个数据需要存储的时候,就要扩容了。也就是说容器只能存储容器⻓度的3/4减1个⽅法。

并且在扩容之后,会释放先前创建的buckets,也就意味着,之前缓存的方法也不复存在。