通过前文的探索我们知道类的结构包括四个成员变量
struct objc_class : objc_object {
......
// Class ISA;
Class superclass;
cache_t cache; // formerly cache pointer and vtable
class_data_bits_t bits; // class_rw_t * plus custom rr/alloc flags
......
};
isa
、superclass
、bits
我们都探索过了(虽然很浅)
isa
包含指向类/元类的指针superclass
是指向父类的指针bits
包含方法列表、属性列表、成员变量类表、协议列表等信息 还剩下一个cache
我们来探索一下。
数据结构
cache_t
struct cache_t {
private:
explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
union {
struct {
explicit_atomic<mask_t> _maybeMask;
#if __LP64__
uint16_t _flags;
#endif
uint16_t _occupied;
};
explicit_atomic<preopt_cache_t *> _originalPreoptCache;
};
......
public:
void insert(SEL sel, IMP imp, id receiver);
......
};
__LP64__
表示Unix
和类Unix
系统(Linux、Mac OS X),所以我们的iOS
系统也是支持的
从cache_t
的数据结构我们看不出来他到底缓存的什么,从下面的增、删、改、查方法入手来分析
cache_t::insert
void cache_t::insert(SEL sel, IMP imp, id receiver)
{
......
bucket_t *b = buckets();
mask_t m = capacity - 1;
mask_t begin = cache_hash(sel, m);
mask_t i = begin;
// Scan for the first unused slot and insert there.
// There is guaranteed to be an empty slot.
do {
if (fastpath(b[i].sel() == 0)) {
incrementOccupied();
b[i].set<Atomic, Encoded>(b, sel, imp, cls());
return;
}
if (b[i].sel() == sel) {
// The entry was added to the cache by some other thread
// before we grabbed the cacheUpdateLock.
return;
}
} while (fastpath((i = cache_next(i, m)) != begin));
......
}
通过cache_t
中的buckets()
方法拿到一个数据结构b
进行操,可以通过下标索引b[i]
的方式访问b
中的成员,说明b
不是一个链表,而是一个哈希表
cache_t::buckets()
struct bucket_t *cache_t::buckets() const
{
uintptr_t addr = _bucketsAndMaybeMask.load(memory_order_relaxed);
return (bucket_t *)(addr & bucketsMask);
}
使用_bucketsAndMaybeMask
进行一些操作获取到一个bucket_t *
指针
bucket_t
struct bucket_t {
private:
#if __arm64__
explicit_atomic<uintptr_t> _imp;
explicit_atomic<SEL> _sel;
#else
explicit_atomic<SEL> _sel;
explicit_atomic<uintptr_t> _imp;
#endif
public:
inline SEL sel() const {
return _sel.load(memory_order_relaxed);
}
inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const {
...
}
};
很明显bucket_t
是用来存储方法的sel
和imp
。
到这里我们对cache
有了一个基本的认识,cache_t
通过_bucketsAndMaybeMask
进行一些地址操作获取到哈希表的头指针bucket_t *
,bucket_t
结构体中存储了方法的sel
和imp
LLDB验证
@interface JPerson : NSObject
- (void)instanceMethod; //.m实现之
@end
我们自定义一个类JPerson
,定义一个方法instanceMethod
并实现之,在main
方法中写如下代码
JPerson *p1 = [JPerson alloc];
[p1 instanceMethod];
Class pClass = object_getClass(p1);
NSLog(@"%@-%@",p1,pClass); //打断点
(lldb) x/5gx pClass
0x100008430: 0x0000000100008408 0x000000010036a140
0x100008440: 0x0000000101252430 0x0001801000000003
0x100008450: 0x0000000101252414
0x100008440
为cache_t
的指针
(lldb) p (cache_t *)0x100008440
(cache_t *) $1 = 0x0000000100008440
(lldb) p *$1
(cache_t) $2 = {
_bucketsAndMaybeMask = {
std::__1::atomic<unsigned long> = {
Value = 4314178608
}
}
= {
= {
_maybeMask = {
std::__1::atomic<unsigned int> = {
Value = 3
}
}
_flags = 32784
_occupied = 1
}
_originalPreoptCache = {
std::__1::atomic<preopt_cache_t *> = {
Value = 0x0001801000000003
}
}
}
}
我们可以通过buckets()
获取到哈希表,遍历此哈希表
(lldb) p $2.buckets()[0].sel()
(SEL) $3 = "instanceMethod"
巧了巧了,第一个就获取到了instanceMethod
方法,但是我们要知道下标是经过哈希算法计算得出来的,可能每一次尝试得到的结果都不一样。
(lldb) p $2.buckets()[0].imp(nil,pClass)
(IMP) $4 = 0x0000000100003c00 (KCObjcBuild`-[JPerson instanceMethod])
底层函数
上面我们简单探索了一下cache_t
的内容,知道了他确实是用来缓存方法的,在这里我们对其实现细节进行探索。
由于我们暂时还不知道cache_t
何时被创建的,我们还是从insert
方法来分析
insert
忽略一下无关主流程的代码
void cache_t::insert(SEL sel, IMP imp, id receiver)
{
...
// Use the cache as-is if until we exceed our expected fill ratio.
// 这获取了两个变量,我们后面看他是干什么用的
mask_t newOccupied = occupied() + 1;
unsigned oldCapacity = capacity(), capacity = oldCapacity;
// 第一次进来的时候还没有创建cache
if (slowpath(isConstantEmptyCache())) {
// Cache is read-only. Replace it.
// INIT_CACHE_SIZE = 4,既初始容量为4
if (!capacity) capacity = INIT_CACHE_SIZE;
reallocate(oldCapacity, capacity, /* freeOld */false);
}
...
else {
// 这里是扩容的逻辑,容量扩大到原来的2倍
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
if (capacity > MAX_CACHE_SIZE) {
capacity = MAX_CACHE_SIZE;
}
// 和上面不同的是最后一个参数为true,既释放掉旧的buckets
reallocate(oldCapacity, capacity, true);
}
// 获取到buckets
bucket_t *b = buckets();
// 容量-1,最后一个bucket_t存储了标记信息
mask_t m = capacity - 1;
// 哈希算法,计算出下标
mask_t begin = cache_hash(sel, m);
mask_t i = begin;
// Scan for the first unused slot and insert there.
// There is guaranteed to be an empty slot.
do {
// 如果找到的bucket_t为空,直接存储
if (fastpath(b[i].sel() == 0)) {
// _occupied++,可知_occupied存储了哈希表中已经存储了几个值
incrementOccupied();
b[i].set<Atomic, Encoded>(b, sel, imp, cls());
return;
}
// 如果找到的bucket_t中的sel和新的sel相同,不需要做任何操作
if (b[i].sel() == sel) {
// The entry was added to the cache by some other thread
// before we grabbed the cacheUpdateLock.
return;
}
} while (fastpath((i = cache_next(i, m)) != begin));
// 走错误的逻辑
bad_cache(receiver, (SEL)sel);
}
如果是第一次执行那么创建一个长度为4
的buckets
,使用sel
经过哈希算法cache_hash
计算得到下标。
如果当前位置为空那么直接存储并且_occupied++
,如果已经存储了相同的sel
什么都不用做,否则的话执行哈希碰撞的逻辑cache_next
。
从这里我们也知道了_occupied
是用来存储buckets
中已经存储的值的数量。
occupied
mask_t cache_t::occupied() const
{
return _occupied;
}
当前buckets
已经存储的数量
capacity
unsigned cache_t::capacity() const
{
return mask() ? mask()+1 : 0;
}
buckets
的容量
mask
mask_t cache_t::mask() const
{
return _maybeMask.load(memory_order_relaxed);
}
reallocate
ALWAYS_INLINE
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
// 获取老的buckets,如果存在并且freeOld=true那么就释放他
// 用来扩容的时候才会存在,第一次不存在
bucket_t *oldBuckets = buckets();
// 开辟一块新的可以容纳newCapacity个bucket_t的空间
bucket_t *newBuckets = allocateBuckets(newCapacity);
// Cache's old contents are not propagated.
// This is thought to save cache memory at the cost of extra cache fills.
// fixme re-measure this
ASSERT(newCapacity > 0);
ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
setBucketsAndMask(newBuckets, newCapacity - 1);
// 如果存在旧的并且freeOld=true那么就释放掉
if (freeOld) {
collect_free(oldBuckets, oldCapacity);
}
}
创建buckets
并且给_bucketsAndMaybeMask
和_maybeMask
赋初始值。
如果是扩容的逻辑还需要释放旧的buckets
allocateBuckets
bucket_t *cache_t::allocateBuckets(mask_t newCapacity)
{
// Allocate one extra bucket to mark the end of the list.
// This can't overflow mask_t because newCapacity is a power of 2.
// 开辟一块sizeof(bucket_t) * newCapacity大小的空间
bucket_t *newBuckets = (bucket_t *)calloc(bytesForCapacity(newCapacity), 1);
// 找到最后一个bucket_t
bucket_t *end = endMarker(newBuckets, newCapacity);
#if __arm__
// End marker's sel is 1 and imp points BEFORE the first bucket.
// This saves an instruction in objc_msgSend.
end->set<NotAtomic, Raw>(newBuckets, (SEL)(uintptr_t)1, (IMP)(newBuckets - 1), nil);
#else
// End marker's sel is 1 and imp points to the first bucket.
// 给最后一个bucket的sel存储1作为标记
end->set<NotAtomic, Raw>(newBuckets, (SEL)(uintptr_t)1, (IMP)newBuckets, nil);
#endif
if (PrintCaches) recordNewCache(newCapacity);
return newBuckets;
}
这个方法是用来开辟buckets
空间。
开辟了一块newCapacity
个bucket_t
大小的空间并返回首地址,找到最后一个bucket_t
并给其sel
复制1
作为标记
set
template<Atomicity atomicity, IMPEncoding impEncoding>
void bucket_t::set(bucket_t *base, SEL newSel, IMP newImp, Class cls)
{
// ==0说明当前这个bucket是空的
// ==newSel说明当前sel和新传入的sel相同
ASSERT(_sel.load(memory_order_relaxed) == 0 ||
_sel.load(memory_order_relaxed) == newSel);
// objc_msgSend uses sel and imp with no locks.
// It is safe for objc_msgSend to see new imp but NULL sel
// (It will get a cache miss but not dispatch to the wrong place.)
// It is unsafe for objc_msgSend to see old imp and new sel.
// Therefore we write new imp, wait a lot, then write new sel.
// newIMP是否需要编码,从方法的调用看到传入的是RAW,原始的,不需要编码
uintptr_t newIMP = (impEncoding == Encoded
? encodeImp(base, newImp, newSel, cls)
: (uintptr_t)newImp);
// 原子性操作
if (atomicity == Atomic) {
// 将newIMP进行存储到_imp中
_imp.store(newIMP, memory_order_relaxed);
// !=newSel说明==0,既当前bucket为空的,需要存储sel
if (_sel.load(memory_order_relaxed) != newSel) {
#ifdef __arm__
mega_barrier();
_sel.store(newSel, memory_order_relaxed);
#elif __x86_64__ || __i386__
_sel.store(newSel, memory_order_release);
#else
#error Don't know how to do bucket_t::set on this architecture.
#endif
}
} else {
_imp.store(newIMP, memory_order_relaxed);
_sel.store(newSel, memory_order_relaxed);
}
}
这个方法是用来向buckets
中存储内容。
两种情况下可以使用当前bucket_t
- 当前
bucket_t
为空,可以存储sel
和imp
- 当前
bucket_t
已经存储了相同的sel
,只需要重新存储imp
就可以了
store
为存储方法,load
为读取方法
setBucketsAndMask
void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
...
#elif __x86_64__ || i386
// _bucketsAndMaybeMask存储的是buckets的首地址
_bucketsAndMaybeMask.store((uintptr_t)newBuckets, memory_order_release);
// _maybeMask存储的是newMask,也就是newCapacity-1的值
// newCapacity为buckets的容量
_maybeMask.store(newMask, memory_order_release);
_occupied = 0;
#else
...
}
从这里我们知道了cache_t
中的
_bucketsAndMaybeMask
用来存储buckets
的首地址_maybeMask
用来存储buckets
容量-1
的值,因为最后一个bucket_t
存储了标记
这里简单了解一下方法缓存的结构,后面我们研究方法查找流程的时候还会涉及到cacht_t