环境:xcode 11.5
源码:objc4-781
上一篇文章中探索了类的结构,如下所示:
struct objc_class : objc_object {
// Class ISA;
Class superclass;
cache_t cache; // formerly cache pointer and vtable
class_data_bits_t bits; // class_rw_t * plus custom rr/alloc flags
...
}
其中ISA是一个指向objc_class的指针,superclass指向的是父类,bits中存储的当前类的一些基本信息。今天来继续探索objc_class结构体中最后一个变量cache_t。
Cache_t简介
首先我们看一下Cache_t的结构:
struct cache_t {
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
explicit_atomic<struct bucket_t *> _buckets; //可以理解为结构体bucket_t数据
explicit_atomic<mask_t> _mask;
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
explicit_atomic<uintptr_t> _maskAndBuckets;
mask_t _mask_unused;
一些常量:
static constexpr uintptr_t maskShift = 48;
static constexpr uintptr_t maskZeroBits = 4;
static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
...
#endif
#if __LP64__
uint16_t _flags;
#endif
uint16_t _occupied; //当前缓存中的方法个数
}
在arm架构和x86架构下有些不同。具体区别在于arm架构下mask和buckets指针存储在一个uintptr_t数据中,x86架构下分别存储。同时arm架构下还定义了一些掩码等数据来进行数据的读取,即上述代码中的一些常量。
其中bucket_t的结构为:
struct bucket_t {
private:
// IMP-first is better for arm64e ptrauth and no worse for arm64.
// SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
explicit_atomic<uintptr_t> _imp;
explicit_atomic<SEL> _sel;
#else
explicit_atomic<SEL> _sel;
explicit_atomic<uintptr_t> _imp;
#endif
bucket_t有两个属性,key 为方法标识,而_imp为方法实现。
Cache_t探索
接下来我们直接使用lldb来进行探索。
@interface ZHYObject : NSObject
- (void)method1;
- (void)method2;
- (void)method3;
@end
int main(int argc, const char * argv[]) {
@autoreleasepool {
ZHYObject *object = [[ZHYObject alloc] init];
[object method1];
[object method2];
[object method3];
NSLog(@"%@",object);
}
return 0;
}
- 实例方法调用之前进行断点并打印
(lldb) x/4gx pClass
0x1000023d0: 0x00000001000023a8 0x00000001003ef140
0x1000023e0: 0x00000001003e9450 0x0000801000000000
(lldb) p (cache_t*)0x1000023e0
(cache_t *) $1 = 0x00000001000023e0
(lldb) p *$1
(cache_t) $2 = {
_buckets = {
std::__1::atomic<bucket_t *> = 0x00000001003e9450 {
_sel = {
std::__1::atomic<objc_selector *> = 0x0000000000000000
}
_imp = {
std::__1::atomic<unsigned long> = 0
}
}
}
_mask = {
std::__1::atomic<unsigned int> = 0
}
_flags = 32784
_occupied = 0
}
当前cache_t中,没有方法缓存,occupied为0。
接下来断点走一步。重新打印cache_t
(lldb) p *$1
(cache_t) $3 = {
_buckets = {
std::__1::atomic<bucket_t *> = 0x00000001006263e0 {
_sel = {
std::__1::atomic<objc_selector *> = 0x00007fff7482ec60
}
_imp = {
std::__1::atomic<unsigned long> = 4046048
}
}
}
_mask = {
std::__1::atomic<unsigned int> = 3
}
_flags = 32784
_occupied = 1
}
(lldb) p $3.buckets()
(bucket_t *) $4 = 0x00000001006263e0
(lldb) p *($4)
(bucket_t) $5 = {
_sel = {
std::__1::atomic<objc_selector *> = 0x00007fff7482ec60
}
_imp = {
std::__1::atomic<unsigned long> = 4046048
}
}
(lldb) p $5.sel()
(SEL) $6 = "init"
(lldb) p *($4+1)
(bucket_t) $7 = {
_sel = {
std::__1::atomic<objc_selector *> = 0x0000000000000000
}
_imp = {
std::__1::atomic<unsigned long> = 0
}
}
对比可以发现:
_buckets内存地址发生了改变,由0x00000001003e9450变成了0x00000001006263e0_buckets数组里面只有一个bucket_t,对应的是init方法_mask由0变成了3_occupied由0变成了1
接下来断点再走一步,并进行打印
(lldb) p *$1
(cache_t) $8 = {
_buckets = {
std::__1::atomic<bucket_t *> = 0x00000001006263e0 {
_sel = {
std::__1::atomic<objc_selector *> = 0x00007fff7482ec60
}
_imp = {
std::__1::atomic<unsigned long> = 4046048
}
}
}
_mask = {
std::__1::atomic<unsigned int> = 3
}
_flags = 32784
_occupied = 2
}
(lldb) p *($4)
(bucket_t) $9 = {
_sel = {
std::__1::atomic<objc_selector *> = 0x00007fff7482ec60
}
_imp = {
std::__1::atomic<unsigned long> = 4046048
}
}
(lldb) p $9.sel()
(SEL) $10 = "init"
(lldb) p *($4+1)
(bucket_t) $11 = {
_sel = {
std::__1::atomic<objc_selector *> = 0x0000000100000e15
}
_imp = {
std::__1::atomic<unsigned long> = 11904
}
}
(lldb) p $11.sel()
(SEL) $12 = "method1"
对比可以发现:
_buckets和第一次相比,地址并没有发生变化_mask也没有发生变化_occupied变成了2,和打印结果一致,_buckets数组里面有两个方法,和_occupied的值一致
断点继续下一步,并打印
(lldb) p *$1
(cache_t) $14 = {
_buckets = {
std::__1::atomic<bucket_t *> = 0x0000000101929660 {
_sel = {
std::__1::atomic<objc_selector *> = 0x0000000000000000
}
_imp = {
std::__1::atomic<unsigned long> = 0
}
}
}
_mask = {
std::__1::atomic<unsigned int> = 7
}
_flags = 32784
_occupied = 1
}
(lldb) p $14.buckets()
(bucket_t *) $20 = 0x0000000101929660
(lldb) p *$20
(bucket_t) $21 = {
_sel = {
std::__1::atomic<objc_selector *> = 0x0000000000000000
}
_imp = {
std::__1::atomic<unsigned long> = 0
}
}
(lldb) p *($20+1)
(bucket_t) $22 = {
_sel = {
std::__1::atomic<objc_selector *> = 0x0000000000000000
}
_imp = {
std::__1::atomic<unsigned long> = 0
}
}
(lldb) p *($20+2)
(bucket_t) $23 = {
_sel = {
std::__1::atomic<objc_selector *> = 0x0000000000000000
}
_imp = {
std::__1::atomic<unsigned long> = 0
}
}
(lldb) p *($20+3)
(bucket_t) $24 = {
_sel = {
std::__1::atomic<objc_selector *> = 0x0000000000000000
}
_imp = {
std::__1::atomic<unsigned long> = 0
}
}
(lldb) p *($20+4)
(bucket_t) $25 = {
_sel = {
std::__1::atomic<objc_selector *> = 0x0000000000000000
}
_imp = {
std::__1::atomic<unsigned long> = 0
}
}
(lldb) p *($20+5)
(bucket_t) $26 = {
_sel = {
std::__1::atomic<objc_selector *> = 0x0000000100000e1d
}
_imp = {
std::__1::atomic<unsigned long> = 11952
}
}
(lldb) p $26.sel()
(SEL) $27 = "method2"
对比可以发现:
_buckets内存地址再次发生了改变_mask由3变成了7_occupied由3变成了1method2方法并不是存储在第一个空位,我们找了好几次才找到
根据以上的探索,产生了几个疑问:
_buckets的内存地址什么时候会发生改变?_mask的含义是什么?_occupied为什么变成了1,反而变小了?_buckets中存储方法的位置有什么规则?
Cache_t底层实现
根据上面的lldb探索,可以发现这样的规律,每次_buckets发生变化的时候,_mask也会发生变化,而且_mask的值为2的n次方-1,同时_occupied的值会变为1。那么接下来我们通过相关源码来验证一下我们的发现。
在objc_cache.mm的注释中发现了一下几行注释:
* Cache writers (hold cacheUpdateLock while reading or writing; not PC-checked)
* cache_fill (acquires lock)
* cache_expand (only called from cache_fill)
* cache_create (only called from cache_expand)
* bcopy (only called from instrumented cache_expand)
* flush_caches (acquires lock)
* cache_flush (only called from cache_fill and flush_caches)
* cache_collect_free (only called from cache_expand and cache_flush)
我们从cache_fill中开始追踪源码
void cache_fill(Class cls, SEL sel, IMP imp, id receiver)
{
runtimeLock.assertLocked();
#if !DEBUG_TASK_THREADS
// Never cache before +initialize is done
if (cls->isInitialized()) {
cache_t *cache = getCache(cls);
#if CONFIG_USE_CACHE_LOCK
mutex_locker_t lock(cacheUpdateLock);
#endif
cache->insert(cls, sel, imp, receiver);
}
#else
_collecting_in_critical();
#endif
}
首先判断类是否已经初始化,如果初始化就从objc_class结构体中读取对应的cache变量,然后执行insert方法。
insert
ALWAYS_INLINE
void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)
{
#if CONFIG_USE_CACHE_LOCK
cacheUpdateLock.assertLocked();
#else
runtimeLock.assertLocked();
#endif
ASSERT(sel != 0 && cls->isInitialized());
// Use the cache as-is if it is less than 3/4 full
mask_t newOccupied = occupied() + 1;
unsigned oldCapacity = capacity(), capacity = oldCapacity;
if (slowpath(isConstantEmptyCache())) {
//如果缓存此时还没有,初始化容量为4,重新开辟缓存的内存
// Cache is read-only. Replace it.
if (!capacity) capacity = INIT_CACHE_SIZE;
reallocate(oldCapacity, capacity, /* freeOld */false);
}
else if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) { // 4 3 + 1 bucket cache_t
// Cache is less than 3/4 full. Use it as-is.
}
else {
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; // 扩容两倍 4
if (capacity > MAX_CACHE_SIZE) {
capacity = MAX_CACHE_SIZE;
}
reallocate(oldCapacity, capacity, true); // 内存 库容完毕
}
bucket_t *b = buckets();
mask_t m = capacity - 1;
mask_t begin = cache_hash(sel, m);
mask_t i = begin;
// Scan for the first unused slot and insert there.
// There is guaranteed to be an empty slot because the
// minimum size is 4 and we resized at 3/4 full.
do {
if (fastpath(b[i].sel() == 0)) {
incrementOccupied();
b[i].set<Atomic, Encoded>(sel, imp, cls);
return;
}
if (b[i].sel() == sel) {
// The entry was added to the cache by some other thread
// before we grabbed the cacheUpdateLock.
return;
}
} while (fastpath((i = cache_next(i, m)) != begin));
cache_t::bad_cache(receiver, (SEL)sel, cls);
}
insert是cache_t进行缓存的核心方法。主要操作如下:
1. 判断是否需要扩容
当`cache`为空或者插入后`bucket`个数大于原有容量的`3/4`时,会调用`reallocate`方法。`reallocate`主要负责`_buckets`的重新开辟内存以及`旧_buckets`的释放。
ALWAYS_INLINE
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
bucket_t *oldBuckets = buckets();
bucket_t *newBuckets = allocateBuckets(newCapacity);
// Cache's old contents are not propagated.
// This is thought to save cache memory at the cost of extra cache fills.
// fixme re-measure this
ASSERT(newCapacity > 0);
ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
setBucketsAndMask(newBuckets, newCapacity - 1);
if (freeOld) {
cache_collect_free(oldBuckets, oldCapacity);
}
}
在reallocate中做了一下几件事情:
- 根据传入的
newCapacity创建新的bucket - 将新的
bucket赋给当前的cache,并且更新mask的值为newCapacity - 1 - 如果需要的话,释放旧的
bucket - 需要注意的是,因为初始的容量为
4,而且每次扩容都会变为旧容量的2倍,因此新的容量必然是2n,mask的值是2n-1
2. 将sel和imp放到指定的bucket中
之前有说到,_buckets中的方法存储不是按顺序排列的,它是一个什么规则呢?答案也在insert函数中。寻找对应bucket的思路大致如下图:
- 根据
cache_hash方法获取sel对应的哈希值key。
static inline mask_t cache_hash(SEL sel, mask_t mask)
{
return (mask_t)(uintptr_t)sel & mask;
}
如果位置2对应的bucket->sel为0,就是说该位置没有存入某个方法,查找结束,赋值即可,同时_occupied自增
如果位置2对应的bucket->sel和当前sel相等,有可能是其他的线程缓存了这个方法,直接返回。
否则的话就是说出现了hash冲突,那么就要寻找新的hash值。
//__arm64__ 不同的架构下cache_next实现不一样,需要注意。
static inline mask_t cache_next(mask_t i, mask_t mask) {
return i ? i-1 : mask;
}
在arm64架构下解决冲突的方式是往前一位寻找,如果找到bucket数组的第一个还是冲突,则从数组的最后一位开始。
当寻找了一遍之后又回到最初的起点时,说明当前出现异常,执行
bad_cache逻辑。
自此,方法进行缓存的流程结束,核心方法insert流程图如下: