Runtime--Cache

212 阅读8分钟

类的结构体中,存在cache

struct objc_class : objc_object {
    isa_t isa;
    Class superclass;
    cache_t cache;  
    class_data_bits_t bits;   // class_data_bits_t 相当于是class_rw_t 指针加上rr/alloc标志
}

目的是为了加速方法的调用。

扒一下 cache。

基本

类型:cache_t

cache_t {
    struct bucket_t *_buckets; //散列表
    mask_t _mask; //散列表的长度
    mask_t _occupied;
}

底层使用增量扩展的哈希表结构进行存储。

struct bucket_t {
    private:
        // IMP-first is better for arm64e ptrauth and no worse for arm64.
        // SEL-first is better for armv7* and i386 and x86_64.
    #if __arm64__
        uintptr_t _imp;
        SEL _sel;
    #else
        SEL _sel;
        uintptr_t _imp;
    #endif
}

由此看出,bucket_t中存放的是SEL和imp的键值对

方法的查找

bucket_t * cache_t::find(cache_key_t k, id receiver)  //根据key值 k 进行查找
{
    assert(k != 0);
    bucket_t *b = buckets();
    mask_t m = mask();
    //通过cache_hash函数【begin  = k & m】计算出key值 k 对应的 index值 begin,用来记录查询起始索引
    mask_t begin = cache_hash(k, m);   
    // begin 赋值给 i,用于切换索引
    mask_t i = begin; 
    do {
        //用这个i从散列表取值,如果取出来的bucket_t的 key = k,则查询成功,返回该bucket_t,
        //如果key = 0,说明在索引i的位置上还没有缓存过方法,同样需要返回该bucket_t,用于中止缓存查询。
        if (b[i].key() == 0  ||  b[i].key() == k) { 
            return &b[i];
        }
    } while ((i = cache_next(i, m)) != begin);
// 这一步其实相当于 i = i-1,回到上面do循环里面,相当于查找散列表上一个单元格里面的元素,再次进行key值 k的比较,
//当i=0时,也就i指向散列表最首个元素索引的时候重新将mask赋值给i,使其指向散列表最后一个元素,重新开始反向遍历散列表,
//其实就相当于绕圈,把散列表头尾连起来,从begin值开始,递减索引值,当走过一圈之后,必然会重新回到begin值,
//如果此时还没有找到key对应的bucket_t,或者是空的bucket_t,则循环结束,说明查找失败,调用bad_cache方法。
    Class cls = (Class)((uintptr_t)this - offsetof(objc_class, cache));
    cache_t::bad_cache(receiver, (SEL)k, cls);
}

static inline mask_t cache_hash(cache_key_t key, mask_t mask) 
{
    return (mask_t)(key & mask);
}

static inline mask_t cache_next(mask_t i, mask_t mask) {
   // return (i-1) & mask;  // 非arm64
    return i ? i-1 : mask; // arm64
}

举例

index value
0 bucket_t(key_b, imp_b)
1 bucket_t(key_a, imp_a)
2
3 bucket_t(key_c, imp_c)
  • 查找 key_a。

    key_a & mask = 1 ==> index = 1 ==> 匹配,返回。

  • 查找 key_b。

    key_b & mask = 1 ==> index = 1 ==> key不对,index - 1 = 0 ==> 匹配,返回。

  • 查找 key_c。

    key_c & mask = 1 ==> index = 1 ==> key不对,index - 1 = 0 ==> key不对, index = mask = 4 ==> 匹配,返回。

  • 查找 key_d

    key_d & mask = 2 ==> 未缓存方法,返回,结束。

方法的缓存

cache_t::find函数还被源码里面的另一个函数调用过——cache_fill_nolock,缓存填充(插入)操作,源码如下:

static void cache_fill_nolock(Class cls, SEL sel, IMP imp, id receiver)
{
    cacheUpdateLock.assertLocked();
    // Never cache before +initialize is done
    //如果类没有完成初始化,则不能进行方法缓存
    if (!cls->isInitialized()) return; 
    // Make sure the entry wasn't added to the cache by some other thread 
    // before we grabbed the cacheUpdateLock.
    //确保没有其他线程将方法缓存,再次检查一下
    if (cache_getImp(cls, sel)) return;
    
    cache_t *cache = getCache(cls);
    cache_key_t key = getKey(sel);
    
    // Use the cache as-is if it is less than 3/4 full
    mask_t newOccupied = cache->occupied() + 1;
    mask_t capacity = cache->capacity();
    
    if (cache->isConstantEmptyCache()) {
        //缓存只读,重新申请缓存空间
        // Cache is read-only. Replace it.
        cache->reallocate(capacity, capacity ?: INIT_CACHE_SIZE);
    }
    else if (newOccupied <= capacity / 4 * 3) {
        // Cache is less than 3/4 full. Use it as-is.
        //已缓存的数量 + 1,没有超过总容量的 3/4
    }
    else {
        // Cache is too full. Expand it.
        //扩充缓存
        cache->expand();
    }
    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot because the 
    // minimum size is 4 and we resized at 3/4 full.
    //先查找有没有缓存过,如果没有,递增occupied
    bucket_t *bucket = cache->find(key, receiver);
    if (bucket->key() == 0) cache->incrementOccupied();
    //缓存方法
    bucket->set(key, imp);
}

如何扩容?

void cache_t::expand()
{
    cacheUpdateLock.assertLocked();
    
    uint32_t oldCapacity = capacity();
    uint32_t newCapacity = oldCapacity ? oldCapacity*2 : INIT_CACHE_SIZE;
    if ((uint32_t)(mask_t)newCapacity != newCapacity) {
        // mask overflow - can't grow further
        // fixme this wastes one bit of mask
        newCapacity = oldCapacity;
    }
    reallocate(oldCapacity, newCapacity);
}

扩容是按照 *2 递增,默认初始是多少呢?

enum {
    INIT_CACHE_SIZE_LOG2 = 2,
    INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2)
};

初始容量为4

扩容之后,原先缓存的内容,如何处理,看reallocate

void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity)
{
    bool freeOld = canBeFreed();
    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity);
    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this
    assert(newCapacity > 0);
    assert((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
    setBucketsAndMask(newBuckets, newCapacity - 1);
    
    if (freeOld) {
        cache_collect_free(oldBuckets, oldCapacity);
        cache_collect(false);
    }
}

要不要释放原空间,取决于freeOld,查看canBeFreed

bool cache_t::canBeFreed()
{
    return !isConstantEmptyCache();
}
bool cache_t::isConstantEmptyCache()
{
    return 
        occupied() == 0  &&  
        buckets() == emptyBucketsForCapacity(capacity(), false);
}

如果缓存为空,没必要释放;如果不为空,则直接释放,不保留原先内容。因为,在扩容之前缓存的方法,需要在下次调用时,重新缓存。

涉及到父类呢?

对 一个对象发送消息,先通过isa找到class对象,然后按照cache_t --> 方法列表的顺序查找,如果找到了,缓存调用。

如果没有,则会进入到父类的class对象。继续按照cache_t --> 方法列表的顺序查找。

如果找到了,这个时候,方法是缓存在 谁的 cache_t中呢?

采用的方法很简单,看看有哪些地方调用了cache_fill_nolock,这个方法的参数有Class cls,找一下这个cls是谁,就知道存到哪里了。

cache_fill_nolock --> cache_fill -->

log_and_fill_cache ---> lookUpImpOrForward
lookUpImpOrForward
lookupMethodInClassAndLoadCache

看下lookupMethodInClassAndLoadCache这个方法,在方法的注释中,明确指出不会查找父类

/***********************************************************************
* lookupMethodInClassAndLoadCache.
* Like _class_lookupMethodAndLoadCache, but 【【does not search superclasses】】.【不会查找父类】
* Caches and returns objc_msgForward if the method is not found in the class.
**********************************************************************/
IMP lookupMethodInClassAndLoadCache(Class cls, SEL sel)
{
    Method meth;
    IMP imp;
    // fixme this is incomplete - no resolver, +initialize - 
    // but it's only used for .cxx_construct/destruct so we don't care
    assert(sel == SEL_cxx_construct  ||  sel == SEL_cxx_destruct);
    // Search cache first.先查找
    imp = cache_getImp(cls, sel);
    if (imp) return imp;
    // Cache miss. Search method list.
    mutex_locker_t lock(runtimeLock);
    meth = getMethodNoSuper_nolock(cls, sel);
    if (meth) {
        // Hit in method list. Cache it.查找到
        cache_fill(cls, sel, meth->imp, nil);
        return meth->imp;
    } else {
        // Miss in method list. Cache objc_msgForward.
        cache_fill(cls, sel, _objc_msgForward_impcache, nil);
        return _objc_msgForward_impcache;
    }
}

重点放在lookUpImpOrForward,⚠️⚠️⚠️⚠️重点!标准的IMP查找流程

/***********************************************************************
* lookUpImpOrForward.
* The standard IMP lookup. 【标准的IMP查找流程】
* initialize==NO tries to avoid +initialize (but sometimes fails)
* cache==NO skips optimistic unlocked lookup (but uses cache elsewhere)
* Most callers should use initialize==YES and cache==YES.
* inst is an instance of cls or a subclass thereof, or nil if none is known. 
*   If cls is an un-initialized metaclass then a non-nil inst is faster.
* May return _objc_msgForward_impcache. IMPs destined for external use 
*   must be converted to _objc_msgForward or _objc_msgForward_stret.
*   If you don't want forwarding at all, use lookUpImpOrNil() instead.
**********************************************************************/
IMP lookUpImpOrForward(Class cls, SEL sel, id inst, 
                       bool initialize, bool cache, bool resolver)
{
    IMP imp = nil;
    bool triedResolver = NO;
    runtimeLock.assertUnlocked();
    // Optimistic cache lookup
    if (cache) { 【先查找cache】
        imp = cache_getImp(cls, sel);
        if (imp) return imp;
    }
    ...省略注释
    runtimeLock.lock();
    checkIsKnownClass(cls);

    if (!cls->isRealized()) { //【没有realize先realize】
        cls = realizeClassMaybeSwiftAndLeaveLocked(cls, runtimeLock);
        // runtimeLock may have been dropped but is now locked again
    }

    if (initialize && !cls->isInitialized()) { //【类没有初始化,则先初始化】
        cls = initializeAndLeaveLocked(cls, inst, runtimeLock);
        ...省略注释
    }
 retry:    
    runtimeLock.assertLocked();
    // Try this class's cache.【从 cache 中查找】
    imp = cache_getImp(cls, sel);
    if (imp) goto done;
    // Try this class's method lists.【从该类的 方法列表 中查找】
    {
        Method meth = getMethodNoSuper_nolock(cls, sel);
        if (meth) {
            log_and_fill_cache(cls, meth->imp, sel, inst, cls);
            imp = meth->imp;
            goto done;
        }
    }
    // Try superclass caches and method lists.【从该类的 父类 的 cache 和 方法列表 中查找】
    {
        unsigned attempts = unreasonableClassCount();
        for (Class curClass = cls->superclass; 【for循环,父类】
             curClass != nil;
             curClass = curClass->superclass)
        {
            // Halt if there is a cycle in the superclass chain.
            if (--attempts == 0) {
                _objc_fatal("Memory corruption in class list.");
            }
            
            // Superclass cache. 【父类的cache】
            imp = cache_getImp(curClass, sel); 
            if (imp) {
                if (imp != (IMP)_objc_msgForward_impcache) {
                    // Found the method in a superclass. Cache it in this class.
                    //【cache中找到,缓存,注意参数,cls】
                    log_and_fill_cache(cls, imp, sel, inst, curClass);
                    goto done;
                }
                else {
                    // Found a forward:: entry in a superclass.
                    // Stop searching, but don't cache yet; call method 
                    // resolver for this class first.
                    break;
                }
            }
            
            // Superclass method list.【父类的方法列表】
            Method meth = getMethodNoSuper_nolock(curClass, sel);
            if (meth) {
                //【方法列表中找到,缓存,注意参数,cls】
                log_and_fill_cache(cls, meth->imp, sel, inst, curClass);
                imp = meth->imp;
                goto done;
            }
        }
    }

    // No implementation found. Try method resolver once.【没有找到,方法解析】
    if (resolver  &&  !triedResolver) {
        runtimeLock.unlock();
        resolveMethod(cls, sel, inst);
        runtimeLock.lock();
        // Don't cache the result; we don't hold the lock so it may have 
        // changed already. Re-do the search from scratch instead.
        triedResolver = YES;
        goto retry;
    }

    // No implementation found, and method resolver didn't help. 
    // Use forwarding.【方法解析失败,进行方法转发】
    imp = (IMP)_objc_msgForward_impcache;
    cache_fill(cls, sel, imp, inst);

 done:
    runtimeLock.unlock();
    return imp;
}

通过上面代码可以看出,当发送给子类的消息,在最后调用的是父类方法时,该方法将被缓存到子类的cache中。

看下如何在方法列表中查找?

在方法列表中查找方法,用的是getMethodNoSuper_nolock这个方法,跟进看下

static method_t *getMethodNoSuper_nolock(Class cls, SEL sel)
{
    runtimeLock.assertLocked();
    assert(cls->isRealized());
    // fixme nil cls? 
    // fixme nil sel?
    for (auto mlists = cls->data()->methods.beginLists(), 
              end = cls->data()->methods.endLists(); 
         mlists != end;
         ++mlists)
    {
        method_t *m = search_method_list(*mlists, sel);//【️核心函数】
        if (m) return m;
    }
    return nil;
}
static method_t *search_method_list(const method_list_t *mlist, SEL sel)
{
    int methodListIsFixedUp = mlist->isFixedUp();
    int methodListHasExpectedSize = mlist->entsize() == sizeof(method_t);
    if (__builtin_expect(methodListIsFixedUp && methodListHasExpectedSize, 1)) {
        //️⚠️如果方法列表是经过排序的,则进行二分查找
        return findMethodInSortedMethodList(sel, mlist);
    } else {
        // Linear search of unsorted method list
        //️⚠️如果方法列表没有进行排序,则进行线性遍历查找
        for (auto& meth : *mlist) {
            if (meth.name == sel) return &meth;
        }
    }
#if DEBUG
    // sanity-check negative results
    if (mlist->isFixedUp()) {
        for (auto& meth : *mlist) {
            if (meth.name == sel) {
                _objc_fatal("linear search worked when binary search did not");
            }
        }
    }
#endif
    return nil;
}
static method_t *findMethodInSortedMethodList(SEL key, const method_list_t *list)
{
    assert(list);
    const method_t * const first = &list->first;
    const method_t *base = first;
    const method_t *probe;
    uintptr_t keyValue = (uintptr_t)key;
    uint32_t count;
    //⚠️count >>= 1相当于 count/=2,说明是从数组中间开始查找,也就是二分查找发
    for (count = list->count; count != 0; count >>= 1) {
        probe = base + (count >> 1);
        
        uintptr_t probeValue = (uintptr_t)probe->name;
        
        if (keyValue == probeValue) {
            // `probe` is a match.
            // Rewind looking for the *first* occurrence of this value.
            // This is required for correct category overrides.
            while (probe > first && keyValue == (uintptr_t)probe[-1].name) {
                probe--;
            }
            return (method_t *)probe;
        }
        
        if (keyValue > probeValue) {
            base = probe + 1;
            count--;
        }
    }
    return nil;
}

总结

方法与cache的具体流程

  1. 对象接收消息
  2. 根据isa进入到类对象cls中
    • 从cache_t中查找,如果找到,直接调用,结束。
    • 从cls的方法列表中查找,采用二分/线性遍历,如果找到,缓存,调用,结束。
  3. cls中找不到,根据superClass指针,找父类f_cls
    • 从cache_t中查找,找到,缓存到cls中的cache_t,调用,结束。
    • 从f_cls的方法列表中查找,二分/线性遍历,找到,缓存到cls中的cache_t,调用,结束。
  4. f_cls找不到,继续重复3,直到基类NSObject,仍然没有找到,则进入到消息机制的动态方法解析。

⚠️:缓存方法时,先检查容量,3/4,不够,按照 *2扩容,扩容时,丢弃旧缓存,加入新缓存。