手撕iOS底层12 -- cache_t深入分析

686 阅读8分钟

最近在看工作,求内推啊 微信号bgwx7788

struct objc_class : objc_object {
    // Class ISA;       // 8
    Class superclass;   // 8
    cache_t cache;      // 16        // formerly cache pointer and vtable
    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags 属性 方法 协议 //8
	....
}

前边的系列文章分析了objc_classisasuperClassClass同理, bits也分析了, 本文来分析cache_t

cache_t的地址偏移是8+8=16 转为16进制=10,在基地址上+10

0x01 - cache_t里存储了什么?

先来看下cache_t这个结构体类型, 了解下它里边存了一些什么东西?

truct cache_t {
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
  // explicit_atomic 显示原子性,目的是为了能够保证 增删查改时, 保证线程安全
  // 等价于 struct bucket_t * _buckets;
  // bucket_t 里放的是sel和imp, 根据架构不同,存放的顺序也不一样
  // cache_t 通过 buckets() 读取_buckets
    explicit_atomic<struct bucket_t *> _buckets; //8
    explicit_atomic<mask_t> _mask; // 4
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
    explicit_atomic<uintptr_t> _maskAndBuckets; // 说白了, 为了节省内存,读取方便mask和buckets存在一起
    mask_t _mask_unused;
    
    // How much the mask is shifted by.
    static constexpr uintptr_t maskShift = 48;
    
    // Additional bits after the mask which must be zero. msgSend
    // takes advantage of these additional bits to construct the value
    // `mask << 4` from `_maskAndBuckets` in a single instruction.
    static constexpr uintptr_t maskZeroBits = 4;
    
    // The largest mask value we can store.
    static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
    
    // The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
    
    // Ensure we have enough bits for the buckets pointer.
    static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
    // _maskAndBuckets stores the mask shift in the low 4 bits, and
    // the buckets pointer in the remainder of the value. The mask
    // shift is the value where (0xffff >> shift) produces the correct
    // mask. This is equal to 16 - log2(cache_size).
    explicit_atomic<uintptr_t> _maskAndBuckets;
    mask_t _mask_unused;

    static constexpr uintptr_t maskBits = 4;
    static constexpr uintptr_t maskMask = (1 << maskBits) - 1;
    static constexpr uintptr_t bucketsMask = ~maskMask;
#else
#error Unknown cache mask storage type.
#endif
  
#if __LP64__
    uint16_t _flags; // 2
#endif
    uint16_t _occupied; //2
public:
    static bucket_t *emptyBuckets();
    
    struct bucket_t *buckets();
    mask_t mask();
    mask_t occupied();
    //
    void incrementOccupied();
    void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);
    void initializeToEmpty();

    unsigned capacity();
    bool isConstantEmptyCache();
    bool canBeFreed();
  .......
}

先来看下这几个宏:

  • CACHE_MASK_STORAGE_OUTLINED 模拟器或者macOS
  • CACHE_MASK_STORAGE_HIGH_16 真机64位
  • CACHE_MASK_STORAGE_LOW_4 非64位真机

这几个宏是不同平台对应的宏

#define CACHE_MASK_STORAGE_OUTLINED 1
#define CACHE_MASK_STORAGE_HIGH_16 2
#define CACHE_MASK_STORAGE_LOW_4 3
// 编译器已经提前编译好架构选型
struct cache_t {
#if defined(__arm64__) && __LP64__ // 真机 并且是 64位
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_HIGH_16
#elif defined(__arm64__) && !__LP64__ // 真机 并且是 不是64位
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_LOW_4
#else
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_OUTLINED // 余下模拟器
#endif  

bucket_t类型结构

struct bucket_t {
private:
    // IMP-first is better for arm64e ptrauth and no worse for arm64.
    // SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__ // 真机
  // 这里也通过 explicit_atomic 加了保护性
    explicit_atomic<uintptr_t> _imp;
    explicit_atomic<SEL> _sel;
#else // 非真机
    explicit_atomic<SEL> _sel;
    explicit_atomic<uintptr_t> _imp;
#endif
  
  public:
    inline SEL sel() const { return _sel.load(memory_order::memory_order_relaxed); } // 获取sel
		// 获取IMP
    inline IMP imp(Class cls) const {
        uintptr_t imp = _imp.load(memory_order::memory_order_relaxed);
        if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
        SEL sel = _sel.load(memory_order::memory_order_relaxed);
        return (IMP)
            ptrauth_auth_and_resign((const void *)imp,
                                    ptrauth_key_process_dependent_code,
                                    modifierForSEL(sel, cls),
                                    ptrauth_key_function_pointer, 0);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
        return (IMP)(imp ^ (uintptr_t)cls);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
        return (IMP)imp;
#else
#error Unknown method cache IMP encoding.
#endif
    }
}

上边俩个简单的看了cache_t的结构以及它的成员bucket_t结构,说白了, 就是在cache_t里缓存的就是selimp

0x02 - 通过LLDB验证cache_t存储数据

现在我们通过源码环境 来验证上边的分析

@interface Person : NSObject 
 - (void)sayHello;
- (void)sayCode;
- (void)say1;
- (void)say2;
@end

@implementation
  - (void)sayHello
{
    
    NSLog(@"%s", __func__);
}
- (void)sayCode
{
    NSLog(@"%s", __func__);
}
- (void)say1
{
    NSLog(@"%s", __func__);
}
- (void)say2
{
    NSLog(@"%s", __func__);
}
@end

通过以上这个简单的Person类来调用这四个方法来验证码cache_t

先把breakpoint停到第一个要调用的方法,这时候这个person对象任何方法还没有调用,那么它的方法缓存是空的 ,然后在debug区域打印出类的首地址地址,在这个首地址上加10,就会得到cache_t的内存地址

平移得到的cache_t地址,打印其内容, 因为没有调用过方法,所以打印出的内容是空的,如_occupied = 0

再通过struct bucket_t *buckets();这个方法来获取_buckets属性, 拿到了_buckets属性,就可以通过bucket_tsel()imp(Class)方法来获取对应的sel和imp。

由上分析可知,每调用一次方法,就执行一次方法缓存。

那现在我们过一个断点,执行第一个方法

⚠️ 这里通过buckets获取的是一个数组,所以可以通过[]取值,也可以通过指针+1的形式来取值 ,通过者俩种方式来获取多个bucket_t的值

看到缓存里有了第一个缓存的方法,然后我们再通过MachOView来看下这个方法是我们调用的sayHello方法吗?

MachOView里看到的方法地址和我们打印输出的地址是一模一样的,所以确认是我们调用的sayHello方法。

0x03 - 脱离objc-781源码环境测试cache_t存储数据

首先打开一个普通工程,将源码部分类型拷贝过来,如下:

typedef uint32_t mask_t;  // x86_64 & arm64 asm are less efficient with 16-bits
struct test_bucket_t {
    SEL _sel;
    IMP _imp;
};

struct test_cache_t {
    struct test_bucket_t * _buckets; //8
    mask_t _mask; // 4
    uint16_t _flags;
    uint16_t _occupied;
};

struct test_objc_class {
    Class ISA;       // 拷贝的时候,这里需要⚠️注意,因为在源码中,这个成员是从父类继承过来的,拿到这里,没有父类,需要显示的写出来
    Class superclass;   // 8
    struct test_cache_t cache;      // 16        // formerly cache pointer and vtable
};

int main(int argc, const char * argv[]) {
    @autoreleasepool {
        Person *p = [Person alloc];
        Class pClass = [Person class];
        [p say1];
        [p say2];
        [p say3];
        [p say4];
        
        struct test_objc_class *tepClass = (__bridge struct test_objc_class *)(pClass);
        NSLog(@"%hu - %u",tepClass->cache._occupied,tepClass->cache._mask);
        
        for (mask_t i = 0; i<tepClass->cache._mask; i++) {
            struct test_bucket_t bucket = tepClass->cache._buckets[i];
            NSLog(@"%@ - %p",NSStringFromSelector(bucket._sel),bucket._imp);
        }
    }
    return 0;
}

❤️有兴趣的烙铁可以自己玩一下。 这样比在LLDB玩的更直观一些~

调用一次方法的打印。

调用4次方法的打印


通过打印可以看到一些问题?

  • 输出的2 - 7代表什么?即occupiedmask是什么?
  • 为什么调用1次方法3次方法打印输出的内容不一样?
  • bucket为什么会有“丢失”的情况? 如4次方法打印输出只有 say3say4

0x04 -cache_t真正出击🧍‍♀️

带着上边的一些问题, 来看cache_t的源码,看看能否得到答案?在上面的例子中,发现只有occupied增长位一定的数据时,会引起mask也要变化,在cache_t中发现只有函数incrementOccupied()会让occupied增加一。

void cache_t::incrementOccupied() 
{
    _occupied++;
}

再顺着这条线,去看一下,那里使用了这个函数,找到今天的核心函数void cache_t::insert

/* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
enum {
    INIT_CACHE_SIZE_LOG2 = 2,
    INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2),
    MAX_CACHE_SIZE_LOG2  = 16,
    MAX_CACHE_SIZE       = (1 << MAX_CACHE_SIZE_LOG2),
};
ALWAYS_INLINE
void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif

    ASSERT(sel != 0 && cls->isInitialized());

    // Use the cache as-is if it is less than 3/4 full
    mask_t newOccupied = occupied() + 1;// 没有调用方法的时候 occupied() = 0,newOccupied = 1
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) { // 小概率事件, occupied()是0的时候,也就是创建缓存是小概率事件
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE; //  初始化是4 (capacity = 1<<2)
        reallocate(oldCapacity, capacity, /* freeOld */false); // 初始化创建 开辟内存空间
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) {
        // Cache is less than 3/4 full. Use it as-is.
      	// 在这个小于四分之三范围内不用扩容
        // 在范围内什么也不用动
    }
    else {
      // 超出四分之三,就开始扩容处理
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; // 扩容俩倍 2*4=8
        if (capacity > MAX_CACHE_SIZE) { //
            capacity = MAX_CACHE_SIZE;
        }
      // 如果执行到这里, 说明之前存储过,有旧的缓存,但是已经满了, 开始重新分配内存
        reallocate(oldCapacity, capacity, true);
    }

    bucket_t *b = buckets(); // 获取buckte
    mask_t m = capacity - 1; // 掩码数据
    mask_t begin = cache_hash(sel, m); // cache哈希,也就是哈希下表, 通过哈希算法计算sel存储的下标
    mask_t i = begin;

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot because the
    // minimum size is 4 and we resized at 3/4 full.
  	// 
    do {
        if (fastpath(b[i].sel() == 0)) {// 通过下标去判断buckets里对应的位置是否有值,没有则存下来,
            incrementOccupied();
            b[i].set<Atomic, Encoded>(sel, imp, cls);
            return;
        }
        if (b[i].sel() == sel) { // 有值的话 判断是否相同sel
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin)); // 都不满足,重新hash计算,获取新的下标

    cache_t::bad_cache(receiver, (SEL)sel, cls);
}

这个void cache_t::insert函数从名字上看是插入的意思, 在缓存中插入方法,

上面的代码片段主要做了这么几步
  1. 首先获取_occupied 现在目前为止占用的大小, 缓存占用大小
  2. 根据这个occupied大小执行指定的操作
  3. bucket内部存储impsel

关于occupied 缓存占用大小 的几点补充:

  • 只要对象生成,调用init方法也会影响occupied的值,
  • 属性getset方法也会影响occupied的值

根据occupied的值来决定执行什么操作

  • 第一次执行的时候, 缓存还没有, 默认创建开辟4
if (slowpath(isConstantEmptyCache())) { // 小概率事件, occupied()是0的时候,也就是创建缓存是小概率事件
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE; //  初始化是4 (capacity = 1<<2)
        reallocate(oldCapacity, capacity, /* freeOld */false); // 初始化创建 开辟内存空间
    }
  • 如果占用的缓存小于四分之三的时候,不作任何处理
if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) {
        // Cache is less than 3/4 full. Use it as-is.
      	// 在这个小于四分之三范围内不用扩容
        // 在范围内什么也不用动
    }
  • 如果超过缓存容量, 需要扩容和重新开辟空间
else {
      // 超出四分之三,就开始扩容处理
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; // 扩容俩倍 2*4=8
        if (capacity > MAX_CACHE_SIZE) { //
            capacity = MAX_CACHE_SIZE;
        }
      // 如果执行到这里, 说明之前存储过,有旧的缓存,但是已经满了, 开始重新分配内存
        reallocate(oldCapacity, capacity, true);
    }
ALWAYS_INLINE
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    bucket_t *oldBuckets = buckets(); // 先获取旧的buckets, 第一次肯定是没有的
    bucket_t *newBuckets = allocateBuckets(newCapacity); // 根据capacity的数量开辟空间

    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);

    setBucketsAndMask(newBuckets, newCapacity - 1); // 存储新开辟的buckets和mask-1
    
    if (freeOld) {
        cache_collect_free(oldBuckets, oldCapacity);
    }
}

bucket_t *allocateBuckets(mask_t newCapacity)
{
    // Allocate one extra bucket to mark the end of the list.
    // This can't overflow mask_t because newCapacity is a power of 2.
    bucket_t *newBuckets = (bucket_t *)
        calloc(cache_t::bytesForCapacity(newCapacity), 1);

    bucket_t *end = cache_t::endMarker(newBuckets, newCapacity);

#if __arm__
    // End marker's sel is 1 and imp points BEFORE the first bucket.
    // This saves an instruction in objc_msgSend.
    end->set<NotAtomic, Raw>((SEL)(uintptr_t)1, (IMP)(newBuckets - 1), nil);
#else
    // End marker's sel is 1 and imp points to the first bucket.
    end->set<NotAtomic, Raw>((SEL)(uintptr_t)1, (IMP)newBuckets, nil);
#endif
    
    if (PrintCaches) recordNewCache(newCapacity);

    return newBuckets;
}

  • allocateBuckets 向系统申请开辟空间,也就是申请newCapacity个内存空间的buckets

  • setBucketsAndMask 把申请的buckets存入缓存中,这里也是根据平台架构执行不同的操作

    • 如果是真机 根据bucketsmask的位置存储,并将occupied设置为0
    #elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
    
    void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
    {
        uintptr_t buckets = (uintptr_t)newBuckets;
        uintptr_t mask = (uintptr_t)newMask;
        
        ASSERT(buckets <= bucketsMask);
        ASSERT(mask <= maxMask);
        
        _maskAndBuckets.store(((uintptr_t)newMask << maskShift) | (uintptr_t)newBuckets, std::memory_order_relaxed);
        _occupied = 0;
    }	
    
    • 如果是模拟器或者macOS,正常存储bucktesmask,并将occupied设置为0
    void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
    {
        // objc_msgSend uses mask and buckets with no locks.
        // It is safe for objc_msgSend to see new buckets but old mask.
        // (It will get a cache miss but not overrun the buckets' bounds).
        // It is unsafe for objc_msgSend to see old buckets and new mask.
        // Therefore we write new buckets, wait a lot, then write new mask.
        // objc_msgSend reads mask first, then buckets.
    
    #ifdef __arm__
        // ensure other threads see buckets contents before buckets pointer
        mega_barrier();
    
        _buckets.store(newBuckets, memory_order::memory_order_relaxed);
        
        // ensure other threads see new buckets before new mask
        mega_barrier();
        
        _mask.store(newMask, memory_order::memory_order_relaxed);
        _occupied = 0;
    #elif __x86_64__ || i386
        // ensure other threads see buckets contents before buckets pointer
        _buckets.store(newBuckets, memory_order::memory_order_release);
        
        // ensure other threads see new buckets before new mask
        _mask.store(newMask, memory_order::memory_order_release);
        _occupied = 0; //  设置为0
    #else
    #error Don't know how to do setBucketsAndMask on this architecture.
    #endif
    }
    
 if (freeOld) {
        cache_collect_free(oldBuckets, oldCapacity);
    }
/***********************************************************************
* cache_collect_free.  Add the specified malloc'd memory to the list
* of them to free at some later point.
* size is used for the collection threshold. It does not have to be 
* precisely the block's size.
* Cache locks: cacheUpdateLock must be held by the caller.
**********************************************************************/
static void cache_collect_free(bucket_t *data, mask_t capacity)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif

    if (PrintCaches) recordDeadCache(capacity);

    _garbage_make_room (); //创建垃圾回收空间
    garbage_byte_size += cache_t::bytesForCapacity(capacity); 
    garbage_refs[garbage_count++] = data; // 将sel-imp存储在后面的位置
    cache_collect(false);// 垃圾回收,清理旧的bucket
}
static void _garbage_make_room(void)
{
    static int first = 1;

    // Create the collection table the first time it is needed 第一次创建的时候需要这个表
    if (first)
    {
        first = 0;
        garbage_refs = (bucket_t**)
            malloc(INIT_GARBAGE_COUNT * sizeof(void *));
        garbage_max = INIT_GARBAGE_COUNT;
    }

    // Double the table if it is full 内存增大了, 原有的内存扩容2倍,
    else if (garbage_count == garbage_max)
    {
        garbage_refs = (bucket_t**)
            realloc(garbage_refs, garbage_max * 2 * sizeof(void *));
        garbage_max *= 2; // 系统空间也需要增加内存段
    }
}
  • 如果有旧的buckets,会调用cache_collect_free来清空oldCapacity个大小的内存,

  • 如果第一次,需要创建垃圾回收空间

  • 非第一次,则将内存扩容加大,原有基础*2

  • cache_collect 方法 垃圾回收,清理旧的bucket

开始存储selimp

// Class points to cache. SEL is key. Cache buckets store SEL+IMP.
// Caches are never built in the dyld shared cache.

static inline mask_t cache_hash(SEL sel, mask_t mask) 
{
    return (mask_t)(uintptr_t)sel & mask;
}

这里主要是根据cache_hash(sel, m);这个方法来生成存储下标,分三种情况

  • 如果当前下标的b[i].sel() == 0 说明是空的,可以存储sel-imp,并将occupied自增1

     if (fastpath(b[i].sel() == 0)) {
                incrementOccupied(); // 
                b[i].set<Atomic, Encoded>(sel, imp, cls);
                return;
            }
    
  • 如果当前下表的位置不为空,判断当前sel是否同一个, 同一个则直接return

    if (b[i].sel() == sel) {
                // The entry was added to the cache by some other thread
                // before we grabbed the cacheUpdateLock.
                return;
            }
    
  • 如果以上俩个条件都不满足,则需要重新经过cache_next(i, m) , 即哈希冲突算法,重新计算存储下标,再去比对存储

#if __arm__  ||  __x86_64__  ||  __i386__
// objc_msgSend has few registers available.
// Cache scan increments and wraps at special end-marking bucket.
#define CACHE_END_MARKER 1
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return (i+1) & mask; //(将当前的哈希下标 +1) & mask,重新进行哈希计算,得到一个新的下标
}

#elif __arm64__
// objc_msgSend has lots of registers available.
// Cache scan decrements. No end marker needed.
#define CACHE_END_MARKER 0
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return i ? i-1 : mask; //如果i是空,则为mask,mask = cap -1,如果不为空,则 i-1,向前插入sel-imp
}

#else
#error unknown architecture
#endif


到这里,我想cache_t就要告一段落了, 分析到这里,我想上面提出问题的答案也都有了,

  1. _mask等于capacity-1,,也是掩码数据, 用于在哈希算法和哈希冲突算法中计算哈希下标

  2. _occupied是存储sel-imp占用大小,也就是分配的buckets中占用的个数。

  3. init初始化方法也会导致occupied变化。

  4. 属性方法方法调用都会影响occupied的变化。

  5. 为什么随着方法调用的增加,occupiedmask会变化?

    因为在调用过程中,buckets会随着数量的增加进行扩容,以满足挡墙缓存的需要,具体怎么扩容,可以参考上边,

  6. 为什么buckets有丢失的情况?

    因为在扩容的时候,会对之前的内存释放,重新申请内存, 所以之前缓存的内容也不存在了。


arm下最大存储1左移15位

模拟器下 忘了


最近在看工作,求内推啊 微信号bgwx7788

欢迎大佬留言指正😄,码字不易,觉得好给个赞👍 有任何表达或者理解失误请留言交流;共同进步;