cache_t的探索与解读

143 阅读7分钟

通过源码分析cache_t的缓存内容

typedef struct objc_class *Class;
struct objc_class : objc_object {
...
    // Class ISA; //8 字节
    Class superclass; //8 字节
    cache_t cache;             // formerly cache pointer and vtable
    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags
  ...
}

现在我们要探索一下cache_t存储的数据,isa (8个字节)superclass(8个字节),也就是总共偏移16个字节。

通过lldb查看cache_t结构

cache_t结构 lldb.png

通过源码查看cache_t结构

struct cache_t {
private:
    explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
    union {
        struct {
            explicit_atomic<mask_t>    _maybeMask;
#if __LP64__
            uint16_t                   _flags;
#endif
            uint16_t                   _occupied;
        };
        explicit_atomic<preopt_cache_t *> _originalPreoptCache;
    };
...
}

通过上面两种方式我们能够看到cache_t的数据结构,

explicit_atomic<uintptr_t> _bucketsAndMaybeMask; // unsigned long  8字节
 
union { 8字节 
        struct {
            explicit_atomic<mask_t>    _maybeMask; // unsigned int 4字节 
#if __LP64__
            uint16_t                   _flags; // unsigned short 2字节
#endif
            uint16_t                   _occupied;// unsigned short 2字节
        };
        explicit_atomic<preopt_cache_t *> _originalPreoptCache; //结构体指针 8字节
    };

所以 cache_t是16字节。

但是通过lldb打印和源码目前看不出cache_t里面究竟缓存的是什么内容,接下来,我们就来看看cache_t里面给我们提供了哪些方法

struct cache_t {

private:
    explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
    union {
        struct {
            explicit_atomic<mask_t>    _maybeMask;
#if __LP64__
            uint16_t                   _flags;
#endif
            uint16_t                   _occupied;
        };
        explicit_atomic<preopt_cache_t *> _originalPreoptCache;
    };
    
...

    void insert(SEL sel, IMP imp, id receiver);
    void copyCacheNolock(objc_imp_cache_entry *buffer, int len);
    void destroy();
    void eraseNolock(const char *func);

    static void init();
    static void collectNolock(bool collectALot);
    static size_t bytesForCapacity(uint32_t cap);
    
...
}

因为cache_t是一个缓存,既然是一个缓存,那么我们就会向缓存中插入数据,因此,我们来看一下insert()这个方法。

很明显insert(SEL sel, IMP imp, id receiver)有三个参数,sel IMP是不是可以看成一个方法。接下来进入insert()方法

void cache_t::insert(SEL sel, IMP imp, id receiver)
{
    
    runtimeLock.assertLocked();

    // Never cache before +initialize is done
    if (slowpath(!cls()->isInitialized())) {
        return;
    }

    if (isConstantOptimizedCache()) {
        _objc_fatal("cache_t::insert() called with a preoptimized cache for %s",
                    cls()->nameForLogging());
    }

#if DEBUG_TASK_THREADS
    return _collecting_in_critical();
#else
#if CONFIG_USE_CACHE_LOCK
    mutex_locker_t lock(cacheUpdateLock);
#endif

    ASSERT(sel != 0 && cls()->isInitialized());

    // Use the cache as-is if until we exceed our expected fill ratio.
    mask_t newOccupied = occupied() + 1;
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE;
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
        // Cache is less than 3/4 or 7/8 full. Use it as-is.
    }
#if CACHE_ALLOW_FULL_UTILIZATION
    else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
        // Allow 100% cache utilization for small buckets. Use it as-is.
    }
#endif
    else {
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true);
    }

    bucket_t *b = buckets();
    mask_t m = capacity - 1;
    mask_t begin = cache_hash(sel, m);
    mask_t i = begin;

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot.
    do {
        if (fastpath(b[i].sel() == 0)) {
            incrementOccupied();
            b[i].set<Atomic, Encoded>(b, sel, imp, cls());
            return;
        }
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));

    bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}

我们看到do while循环来操作bucket_t,通过set()方法向bucket_t里添加数据,来看下set()方法的作用

template<Atomicity atomicity, IMPEncoding impEncoding>
void bucket_t::set(bucket_t *base, SEL newSel, IMP newImp, Class cls)
{
    ASSERT(_sel.load(memory_order_relaxed) == 0 ||
           _sel.load(memory_order_relaxed) == newSel);

    uintptr_t newIMP = (impEncoding == Encoded
                        ? encodeImp(base, newImp, newSel, cls)
                        : (uintptr_t)newImp);

    if (atomicity == Atomic) {
        _imp.store(newIMP, memory_order_relaxed);
        
        if (_sel.load(memory_order_relaxed) != newSel) {
#ifdef __arm__
            mega_barrier();
            _sel.store(newSel, memory_order_relaxed);
#elif __x86_64__ || __i386__
            _sel.store(newSel, memory_order_release);
#else
#error Don't know how to do bucket_t::set on this architecture.
#endif
        }
    } else {
        _imp.store(newIMP, memory_order_relaxed);
        _sel.store(newSel, memory_order_relaxed);
    }
}

store方法向内存中写入数据
load从内存中读取数据

总结: set方法的作用很明确,就是把newIMPnewSel写入到内存中去,说的直接点就是保存我们的方法。

我们看到在do while循环里通过set()方法向bucket_ti位置添加数据,我们继续分析

i = begin;
begin = cache_hash(sel, m);

我们知道sel是方法名 那么m又是什么呢

m = capacity - 1;

继续看 capacity

...
mask_t newOccupied = occupied() + 1;
unsigned oldCapacity = capacity(), capacity = oldCapacity;
if (slowpath(isConstantEmptyCache())) {
    // Cache is read-only. Replace it.
    if (!capacity) capacity = INIT_CACHE_SIZE;
    reallocate(oldCapacity, capacity, /* freeOld */false);
}
else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
    // Cache is less than 3/4 or 7/8 full. Use it as-is.
}
...

看到 capacity = oldCapacity

unsigned oldCapacity = capacity()

看下 capacity()函数

unsigned cache_t::capacity() const
{
    return mask() ? mask()+1 : 0; 
}

继续看一下mask()函数

mask_t cache_t::mask() const
{
    return _maybeMask.load(memory_order_relaxed); //读取_maybeMask的值 _maybeMask就是bucket_t的长度 - 1
}

_maybeMask == bucket_t的长度 - 1
capacity == bucket_t的长度

因此m = bucket_t的长度 - 1

接下来我们看下cache_hash(sel, m)的实现

static inline mask_t cache_hash(SEL sel, mask_t mask) 
{
    uintptr_t value = (uintptr_t)sel; //把sel转化成一个比较大的数字
#if CONFIG_USE_PREOPT_CACHES
    value ^= value >> 7;
#endif
    return (mask_t)(value & mask);
}

uintptr_t value = (uintptr_t)sel; //把sel转化成一个比较大的数字

举例 图片.png

return (mask_t)(value & mask); //一个比较大的数&一个比较小的数的时候,最后的值是不会超过那个较小的数,也就是说这个结果最大也就是mask

do {
    if (fastpath(b[i].sel() == 0)) {//bucket i位置的sel有没有值,没有的话通过set方法存入。
        incrementOccupied();
        b[i].set<Atomic, Encoded>(b, sel, imp, cls());
        return;
    }
    if (b[i].sel() == sel) {//这个位置的sel()是否等于我们要存储的sel
        // The entry was added to the cache by some other thread
        // before we grabbed the cacheUpdateLock.
        return;
    }
} while (fastpath((i = cache_next(i, m)) != begin));

我们通过 fastpath(b[i].sel() == 0b[i].sel() == sel会得到第三种情况 fastpath(b[i].sel() != 0b[i].sel() != sel 出现hash冲突,
此时会通过i = cache_next(i, m)这个算法解决 看一下cache_next(i, m)

static inline mask_t cache_next(mask_t i, mask_t mask) {
    return (i+1) & mask;
}

大概意思就是将要存储的数据放到下一个位置存储。

总结:通过对insert()源码的解读可以得出cache是缓存方法的,缓存的是selimp

cache扩容的引出

上面我们已经探索出cache是缓存方法的,缓存的是selimp,那接下来我们通过lldb打印一下cache_t,看一下selimp到底存储在哪里。

图片.png

我们发现通过打印cache_t的成员并没发现selimp,接下来我们看一下cache_t给我们提供了什么方法,

...
struct cache_t {

public:
   
    unsigned capacity() const;
    struct bucket_t *buckets() const;
    Class cls() const;

}
...

因为之前我们是通过insert()方法向bucket_t添加数据,因此看到cache_t提供了一个buckets()方法,返回一个bucket_t的结构体指针,所以从这个buckets()方法入手,先来看下bucket_t的数据结构:

struct bucket_t {
...
inline SEL sel() const { return _sel.load(memory_order_relaxed); }

inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const {
        uintptr_t imp = _imp.load(memory_order_relaxed);
        if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
        SEL sel = _sel.load(memory_order_relaxed);
        return (IMP)
            ptrauth_auth_and_resign((const void *)imp,
                                    ptrauth_key_process_dependent_code,
                                    modifierForSEL(base, sel, cls),
                                    ptrauth_key_function_pointer, 0);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
        return (IMP)(imp ^ (uintptr_t)cls);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
        return (IMP)imp;
#else
#error Unknown method cache IMP encoding.
#endif
    }

...
};

可以看到这两个方法返回selimp。 接下来通过lldb看一下

481650974895_.pic.jpg

通过打印,我们看到了cache_t 存储了 sel为class、respondsToSelector两个方法,但是这两个方法我并没有调用,这是为什么呢???我们之后在来看一下。
下面 我们自己写一个方法并调用,然后再通过lldb打印一下

LGPerson *p = [LGPerson alloc];
[p method1];

491650975644_.pic.jpg

通过打印我们发现只有一个sel为class的方法 我们自己调用的method1 并没有打印出来,而且之前的respondsToSelector也不在了,为什么会这样呢?这就引出了cache的扩容,那么cache是怎么去扩容的呢?

cache扩容规则解析

接下来我们将进行扩容的详细解析,首先进入之前的insert()方法,看源码

void cache_t::insert(SEL sel, IMP imp, id receiver)
{
    ...

    mask_t newOccupied = occupied() + 1;
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE;
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
        // Cache is less than 3/4 or 7/8 full. Use it as-is.
    }
#if CACHE_ALLOW_FULL_UTILIZATION
    else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
        // Allow 100% cache utilization for small buckets. Use it as-is.
    }
#endif
    else {
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true);
    }

    bucket_t *b = buckets();
    mask_t m = capacity - 1;
    mask_t begin = cache_hash(sel, m);
    mask_t i = begin;

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot.
    do {
        if (fastpath(b[i].sel() == 0)) {
            incrementOccupied();
            b[i].set<Atomic, Encoded>(b, sel, imp, cls());
            return;
        }
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));

    bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}
mask_t newOccupied = occupied() + 1;
unsigned oldCapacity = capacity(), capacity = oldCapacity;
if (slowpath(isConstantEmptyCache())) {
    // Cache is read-only. Replace it.
    if (!capacity) capacity = INIT_CACHE_SIZE;
    reallocate(oldCapacity, capacity, /* freeOld */false);
}

解析:occupied 第一次进入的时候是0,那么newOccupied == 1
之前我们说capacity == bucket_t长度
if (slowpath(isConstantEmptyCache()))判断cache是否为空, 第一次来的时候cache肯定是空的,因此capacity==0则会进入这个判断
if (!capacity) capacity = INIT_CACHE_SIZE 意思是capacity == 0会给我们一个初始值INIT_CACHE_SIZE,接下来看一下这个INIT_CACHE_SIZE

enum {
#if CACHE_END_MARKER || (__arm64__ && !__LP64__)
    // When we have a cache end marker it fills a bucket slot, so having a
    // initial cache size of 2 buckets would not be efficient when one of the
    // slots is always filled with the end marker. So start with a cache size
    // 4 buckets.
    INIT_CACHE_SIZE_LOG2 = 2,
#else
    // Allow an initial bucket size of 2 buckets, since a large number of
    // classes, especially metaclasses, have very few imps, and we support
    // the ability to fill 100% of the cache before resizing.
    INIT_CACHE_SIZE_LOG2 = 1,
#endif
    INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2),
    MAX_CACHE_SIZE_LOG2  = 16,
    MAX_CACHE_SIZE       = (1 << MAX_CACHE_SIZE_LOG2),
    FULL_UTILIZATION_CACHE_SIZE_LOG2 = 3,
    FULL_UTILIZATION_CACHE_SIZE = (1 << FULL_UTILIZATION_CACHE_SIZE_LOG2),
};

我们看到 INIT_CACHE_SIZE = (1 << INIT_CACHE_SIZE_LOG2),接下来架构判断:

#if __arm__  ||  __x86_64__  ||  __i386__

// objc_msgSend has few registers available.
// Cache scan increments and wraps at special end-marking bucket.
#define CACHE_END_MARKER 1

// Historical fill ratio of 75% (since the new objc runtime was introduced).
static inline mask_t cache_fill_ratio(mask_t capacity) {
    return capacity * 3 / 4;
}

#elif __arm64__ && !__LP64__

// objc_msgSend has lots of registers available.
// Cache scan decrements. No end marker needed.
#define CACHE_END_MARKER 0

// Historical fill ratio of 75% (since the new objc runtime was introduced).
static inline mask_t cache_fill_ratio(mask_t capacity) {
    return capacity * 3 / 4;
}

#elif __arm64__ && __LP64__

// objc_msgSend has lots of registers available.
// Cache scan decrements. No end marker needed.
#define CACHE_END_MARKER 0

// Allow 87.5% fill ratio in the fast path for all cache sizes.
// Increasing the cache fill ratio reduces the fragmentation and wasted space
// in imp-caches at the cost of potentially increasing the average lookup of
// a selector in imp-caches by increasing collision chains. Another potential
// change is that cache table resizes / resets happen at different moments.
static inline mask_t cache_fill_ratio(mask_t capacity) {
    return capacity * 7 / 8;
}

CACHE_END_MARKERx86_641arm640
INIT_CACHE_SIZEx86_64 1 << 2 为4
INIT_CACHE_SIZEarm64 1 << 1 为2

因此,capacity初始化时 在x86_64架构下为4arm64架构下 为2
接下来调用reallocate(oldCapacity, capacity, /* freeOld */false)这个方法,

ALWAYS_INLINE
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    bucket_t *oldBuckets = buckets(); 
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);

    setBucketsAndMask(newBuckets, newCapacity - 1);
    
    if (freeOld) {
        collect_free(oldBuckets, oldCapacity);
    }
}

总结:

第一个判断:

if (slowpath(isConstantEmptyCache())) {
    // Cache is read-only. Replace it.
    if (!capacity) capacity = INIT_CACHE_SIZE;
    reallocate(oldCapacity, capacity, /* freeOld */false);
}

作用:在arm64架构下开辟一个长度为2的桶子(bucket) 在x86_64 架构下开辟一个长度为4的桶子(bucket)

接下来看一下第二个判断:

else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
    // Cache is less than 3/4 or 7/8 full. Use it as-is.
}

作用:在arm64架构下如果缓存的大小小于等于桶子长度的7/8 在x86_64架构下如果缓存的大小小于等于桶子长度的3/4则什么也不干

第三个判断

else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
     // Allow 100% cache utilization for small buckets. Use it as-is.
}

FULL_UTILIZATION_CACHE_SIZE_LOG2 = 3,
FULL_UTILIZATION_CACHE_SIZE = (1 << FULL_UTILIZATION_CACHE_SIZE_LOG2),

作用:在arm64架构架构下当桶子的长度小于等于8的时候什么也不干

最后

else {
    capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
    if (capacity > MAX_CACHE_SIZE) {
        capacity = MAX_CACHE_SIZE;
    }
    reallocate(oldCapacity, capacity, true);
}

MAX_CACHE_SIZE_LOG2  = 16,
MAX_CACHE_SIZE       = (1 << MAX_CACHE_SIZE_LOG2),

capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE
如果capacity == 0 则是初始值大小,如果不为0,则2倍扩容

if (capacity > MAX_CACHE_SIZE)
如果扩容的大小大于MAX_CACHE_SIZE(1 << 16)极限值,就等于极限值;

reallocate(oldCapacity, capacity, true);
这里传过来的是true,会释放原来的老桶子。

cache扩容的规则:
x86_64架构下:当缓存的大小等于桶子长度的3/4的时候进行2倍扩容。
(注意:这里有一个小细节 CACHE_END_MARKERx86_64架构是1,)

arm64架构下:当缓存的大小大于桶子长度的7/8的时候进行2倍扩容,当桶子的长度小于等于8的时候不会扩容。

还记得我们之前调用method1方法时,打印cache_t的时候并没有打印这个方法以及respondsToSelector,只有一个class方法,这是因为扩容把之前的方法释放掉了。