OC底层-揭开cache_t的神秘面纱

435 阅读4分钟

cache_t 有什么

i386(macOS)和 x86_64(模拟器)下的cache_t

cache_t部分:
    explicit_atomic<struct bucket_t *> _buckets;
    explicit_atomic<mask_t> _mask;
    
bucket_t部分:
    explicit_atomic<uintptr_t> _imp;
    explicit_atomic<SEL> _sel;

arm64(真机)下的cache_t

cache_t部分:
    explicit_atomic<uintptr_t> _maskAndBuckets;
    mask_t _mask_unused;
    
    // How much the mask is shifted by.
    static constexpr uintptr_t maskShift = 48;
    
    // Additional bits after the mask which must be zero. msgSend
    // takes advantage of these additional bits to construct the value
    // `mask << 4` from `_maskAndBuckets` in a single instruction.
    static constexpr uintptr_t maskZeroBits = 4;
    
    // The largest mask value we can store.
    static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
    
    // The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
    
    // Ensure we have enough bits for the buckets pointer.
    static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");
bucket_t部分:
    explicit_atomic<SEL> _sel;
    explicit_atomic<uintptr_t> _imp;
  • 这里要注意的是_maskAndBuckets采用了位域的方式存储maskbuckets,节省内存和读取方便。
  • maskShift = 48这里的shift其实就是buckets,暗指mask存储16位在高位,buckets存储48位在低位
  • 提供了算取最大mask的算法maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1,即左移16位
  • 提供了获取buckets的算法掩码bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;

公共部分

    uint16_t _flags;
    uint16_t _occupied;

public:
    static bucket_t *emptyBuckets();
    
    struct bucket_t *buckets();
    mask_t mask();
    mask_t occupied();
    void incrementOccupied();
    void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);
    void initializeToEmpty();

    unsigned capacity();
    bool isConstantEmptyCache();
    bool canBeFreed();

这里测试下方法在cache的存储:

2020-09-19 18:35:07.709067+0800 MuObjcTest[3990:224312] hello
2020-09-19 18:35:07.710220+0800 MuObjcTest[3990:224312] iOS
(lldb) p/x pClass
(Class) $0 = 0x0000000100008188 MuPerson
(lldb) p/x (cache_t *)0x0000000100008198
(cache_t *) $1 = 0x0000000100008198
(lldb) p/x *$1.buckets()
(bucket_t) $2 = {
  _sel = {
    std::__1::atomic<objc_selector *> = "" {
      Value = 0x0000000100003f34 ""
    }
  }
  _imp = {
    std::__1::atomic<unsigned long> = {
      Value = 0x000000000000bfa8
    }
  }
}
  Fix-it applied, fixed expression was: 
    *$1->buckets()
(lldb) p/x *$1.buckets()[0]
error: <user expression 4>:1:4: member reference type 'cache_t *' is a pointer; did you mean to use '->'?
*$1.buckets()[0]
 ~~^
   ->
error: <user expression 4>:1:1: indirection requires pointer operand ('bucket_t' invalid)
*$1.buckets()[0]
^~~~~~~~~~~~~~~~
(lldb) p/x $1.buckets()[0]
(bucket_t) $3 = {
  _sel = {
    std::__1::atomic<objc_selector *> = "" {
      Value = 0x0000000100003f34 ""
    }
  }
  _imp = {
    std::__1::atomic<unsigned long> = {
      Value = 0x000000000000bfa8
    }
  }
}
  Fix-it applied, fixed expression was: 
    $1->buckets()[0]
(lldb) p/x $1.buckets()[1]
(bucket_t) $4 = {
  _sel = {
    std::__1::atomic<objc_selector *> = (null) {
      Value = 0x0000000000000000 (null)
    }
  }
  _imp = {
    std::__1::atomic<unsigned long> = {
      Value = 0x0000000000000000
    }
  }
}
  Fix-it applied, fixed expression was: 
    $1->buckets()[1]
(lldb) p/x $3.sel()
(SEL) $5 = 0x0000000100003f34 "funcIOS"
(lldb) p/x $3.imp(pClass)
(IMP) $6 = 0x0000000100003e20 (MuObjcTest`-[MuPerson funcIOS] at MuPerson.m:16)
(lldb) p/x $4.sel()
(SEL) $7 = 0x0000000000000000 <no value available>
(lldb) p/x $4.imp(pClass)
(IMP) $8 = 0x0000000000000000
(lldb) 
(lldb) p/x $1.buckets()[2]
(bucket_t) $9 = {
  _sel = {
    std::__1::atomic<objc_selector *> = "" {
      Value = 0x0000000100003f2a ""
    }
  }
  _imp = {
    std::__1::atomic<unsigned long> = {
      Value = 0x000000000000bc78
    }
  }
}
  Fix-it applied, fixed expression was: 
    $1->buckets()[2]
(lldb) p/x $9.sel()
(SEL) $16 = 0x0000000100003f2a "funcHello"
(lldb) p/x $9.imp(pClass)
(IMP) $17 = 0x0000000100003df0 (MuObjcTest`-[MuPerson funcHello] at MuPerson.m:12)
(lldb) 
  • 这里留几个疑问:
    • funcHellofuncIOS 是怎么存储,为什么没有顺序
    • funcHellofuncIOS 中间还有这么多的,内存是怎么分配的
    • _occupiedmask 是怎么变化的

cache_t 是怎么工作的

 * Cache writers (hold cacheUpdateLock while reading or writing; not PC-checked)
 * cache_fill         (acquires lock)
    * cache_expand       (only called from cache_fill)
    * cache_create       (only called from cache_expand)
    * bcopy               (only called from instrumented cache_expand)
 * flush_caches        (acquires lock)
    * cache_flush        (only called from cache_fill and flush_caches)
    * cache_collect_free (only called from cache_expand and cache_flush)

cache_fill

cache_expand && cache_create

  • 这个阶段为扩容阶段初始化阶段
  // Use the cache as-is if it is less than 3/4 full
    mask_t newOccupied = occupied() + 1;
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it.
        if (!capacity) capacity = INIT_CACHE_SIZE;
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) {
        // Cache is less than 3/4 full. Use it as-is.
    }
    else {
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true);
    }
    
    
  • 计算出当前缓存占有数量 mask_t newOccupied = occupied() + 1
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);

    setBucketsAndMask(newBuckets, newCapacity - 1);
    
    if (freeOld) {
        cache_collect_free(oldBuckets, oldCapacity);
    }
}
  • 第一次进来,为capacity申请空间INIT_CACHE_SIZE(4),为buckets申请内存,并且存储bucketsmask,_occupied = 0
  • 假如进来,capacity已经申请过了,并且 newOccupied + CACHE_END_MARKER <= capacity / 4 * 3时候,不做申请空间处理。这里要注意的是CACHE_END_MARKER在不同平台下的值,笔者的运行环境是macOS环境,故CACHE_END_MARKER = 1,当类发送第三个方法的时候已经不满足此条件。
#if __arm__  ||  __x86_64__  ||  __i386__
// objc_msgSend has few registers available.
// Cache scan increments and wraps at special end-marking bucket.
#define CACHE_END_MARKER 1
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return (i+1) & mask;
}

#elif __arm64__
// objc_msgSend has lots of registers available.
// Cache scan decrements. No end marker needed.
#define CACHE_END_MARKER 0
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return i ? i-1 : mask;
}

#else
#error unknown architecture
#endif
  • capacity + CACHE_END_MARKER >= 3/4的时候做扩容处理,重新申请空间。申请的空间为之前空间的二倍。如第三个方法调试进来,这里即申请8个位置。最大空间为1 <<16 即2的16次方。重新为buckets申请内存,并且存储bucketsmask,_occupied = 0。这里要经过 cache_collect_free(oldBuckets, oldCapacity)的操作,后面分析。

bcopy

  • 这个阶段为赋值阶段
    bucket_t *b = buckets();
    mask_t m = capacity - 1;
    mask_t begin = cache_hash(sel, m);
    mask_t i = begin;
static inline mask_t cache_hash(SEL sel, mask_t mask) 
{
    return (mask_t)(uintptr_t)sel & mask;
}
  • 空间开辟完毕,创建bucketsmask = capacity - 1。通过hash算法cache_hash算出selbuckets中的下标位置。这里使用hash算法即散列表排布,注定了sel在分配内存中的无序性,并且在一定程度上避免了方法cache冲突,下面还有特殊冲突的处理办法。
 // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot because the
    // minimum size is 4 and we resized at 3/4 full.
    do {
        if (fastpath(b[i].sel() == 0)) {
            incrementOccupied();
            b[i].set<Atomic, Encoded>(sel, imp, cls);
            return;
        }
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));
  • 如果buckets的第i位置即通过下标算出来的这个位置的bucket.sel()没有值,表示我们可以再这个位置存储我们的selIMP。如果这个位置恰好有当前sel,多个地方同时存储导致的特殊性,就跳出。如果这个位置有值并且和我们存储的sel不相同,继续用i = cache_next(i, m) 重新计算存储的下标并赋值给i,直到i的位置没有值,就在这个位置存储selIMP。这里依然要注意对应平台的算法不同。在__arm____x86_64____i386__采用从当前位置往后+1找空余位置。在__arm64__中采用最大位置-1向前找空位置。

flush_caches

cache_flush && cache_collect_free

  • 这个阶段为使无效释放阶段
static void cache_collect_free(bucket_t *data, mask_t capacity)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif

    if (PrintCaches) recordDeadCache(capacity);

    _garbage_make_room ();
    garbage_byte_size += cache_t::bytesForCapacity(capacity);
    garbage_refs[garbage_count++] = data;
    cache_collect(false);
}

static void _garbage_make_room(void)
{
    static int first = 1;

    // Create the collection table the first time it is needed
    if (first)
    {
        first = 0;
        garbage_refs = (bucket_t**)
            malloc(INIT_GARBAGE_COUNT * sizeof(void *));
        garbage_max = INIT_GARBAGE_COUNT;
    }

    // Double the table if it is full
    else if (garbage_count == garbage_max)
    {
        garbage_refs = (bucket_t**)
            realloc(garbage_refs, garbage_max * 2 * sizeof(void *));
        garbage_max *= 2;
    }
  • 创建垃圾袋列表,默认分配128空间,如果当前要回收的大小大于128,则重新申请2倍的回收空间。
  • garbage_refs[garbage_count++] = data 把我们的data首地址指针赋值,即我们buckets指向了垃圾袋列表的末尾,不被访问到,让我们之前的buckets无效。
  • cache_collect释放这段垃圾空间。
  // Dispose all refs now in the garbage
    // Erase each entry so debugging tools don't see stale pointers.
    while (garbage_count--) {
        auto dead = garbage_refs[garbage_count];
        garbage_refs[garbage_count] = nil;
        free(dead);
    }
    
    // Clear the garbage count and total size indicator
    garbage_count = 0;
    garbage_byte_size = 0;