前言
我们知道对象
的本质是objc_object
结构体,内存结构是成员变量
。而类
的本质是objc_class
结构体,有成员变量isa(结构体指针8字节)、superclass(结构体指针8字节)、cache、bits(8字节长度结构体),ios 对象的本质与isa我们探索了isa以及类的关联关系,supperclasss是类的父类,ios 类的结构分析我们探索了bits以及bits里的方法、属性以及协议,那么cache里面的存储的是什么呢
?字面意思是缓存,那么缓存的是什么呢,为什么要缓存呢?抱着这样的疑问我们探索下。探索需要的objc源码objc4-818源码地址
cache_t
cache是一个cache_t结构体,如下:
struct cache_t {
private:
explicit_atomic<uintptr_t> _bucketsAndMaybeMask;//uintptr_t为unsigned long 8个字节
union {
struct {
explicit_atomic<mask_t> _maybeMask;//mask_t为uint32_t 4个字节
#if __LP64__
uint16_t _flags;//2个字节
#endif
uint16_t _occupied;//2个字节
};
explicit_atomic<preopt_cache_t *> _originalPreoptCache; //结构体指针 8个字节
};
//.....省略一些干扰分析的方法体
public:
unsigned capacity() const;
struct bucket_t *buckets() const;//猜测 buckets()存有方法
Class cls() const;
#if CONFIG_USE_PREOPT_CACHES
const preopt_cache_t *preopt_cache() const;
#endif
mask_t occupied() const;
void initializeToEmpty();
结构分析:
8字节
无符号长整形变量bucketsAndMaybeMask
8字节
的联合体
(联合体是互斥的,共享内存,大小取决于最大元素),_maybeMask
、flags、occupied
cache_t
是一个结构体
,总大小16个字节
。
从成员变量看都是一些整形变量,并没有需要进一步探索的欲望,那么是否跟类的结构分析中bit一样存在某个方法里呢?发现buckets()这个指向bucket_t结构体的函数指针在cache_t中出现的频率很高,猜测bucket_t
是缓存的关键,查看bucket_t源码。
struct bucket_t *cache_t::buckets() const
{ //内存加载获取bucket首地址
uintptr_t addr = _bucketsAndMaybeMask.load(memory_order_relaxed);
return (bucket_t *)(addr & bucketsMask);
}
struct bucket_t {
private:
// IMP-first is better for arm64e ptrauth and no worse for arm64.
// SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
explicit_atomic<uintptr_t> _imp;
explicit_atomic<SEL> _sel;
#else
explicit_atomic<SEL> _sel;
explicit_atomic<uintptr_t> _imp;
/*
*省略部分不分析的代码
*/
inline SEL sel() const { return _sel.load(memory_order_relaxed); }
inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const{ uintptr_t imp = _imp.load(memory_order_relaxed);
if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
SEL sel = _sel.load(memory_order_relaxed);
return (IMP)
ptrauth_auth_and_resign((const void *)imp,
ptrauth_key_process_dependent_code,
modifierForSEL(base, sel, cls),
ptrauth_key_function_pointer, 0);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
return (IMP)(imp ^ (uintptr_t)cls);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
return (IMP)imp;}
分析:
- 通过
bucketsAndMaybeMask
加载内存,获取bucket_t
结构体指针。 bucket_t存储着SEL和IMP
。并且arm64下IMP在前
,SEL在后
,否则SEL在前,IMP在后。由于下面的源码分析是macos环境,所以是第二种情况。- bucket_t可以通过
sel()
方法以及imp()
方法获取sel以及imp IMP
的获取不是简单的获取,(IMP)(imp ^ (uintptr_t)cls)会通过这样一个异或运算
返回IMP,下面会详细探讨。
lldb探索cache_t
像探讨bit一样先lldb测试一下bucket是否存储着IMP和SEL。首先定义个对象GyPerson,添加一个对象方法testfunction。在main()入口函数中 GyPerson *p1 = [GyPerson alloc]初始化p1对象,然后lldb调试。
(lldb) p/x GyPerson.class
(Class) $0 = 0x00000001000084a8 GyPerson //对象isa
(lldb) p/x 0x00000001000084a8 + 0x10 //isa向下偏移16个字节
(long) $1 = 0x00000001000084b8
(lldb) p/x (cache_t*)0x00000001000084b8
(cache_t *) $2 = 0x00000001000084b8
(lldb) p *$2 //cache_t取值
(cache_t) $3 = {
_bucketsAndMaybeMask = {
std::__1::atomic<unsigned long> = {
Value = 4298515312
}
}
= {
= {
_maybeMask = {
std::__1::atomic<unsigned int> = {
Value = 0
}
}
_flags = 32792
_occupied = 0
}
_originalPreoptCache = {
std::__1::atomic<preopt_cache_t *> = {
Value = 0x0000801800000000
}
}
}
}
(lldb) p $3.buckets() //获取buckets()
(bucket_t *) $4 = 0x0000000100362370
(lldb) p *$4
(bucket_t) $5 = {
_sel = {
std::__1::atomic<objc_selector *> = (null) {
Value = (null)
}
}
_imp = {
std::__1::atomic<unsigned long> = {
Value = 0
}
}
}
(lldb) p [p testfunction1] //lldb动态调用方法
(lldb) p *$2
(cache_t) $6 = {
_bucketsAndMaybeMask = {
std::__1::atomic<unsigned long> = {
Value = 4302336176
}
}
= {
= {
_maybeMask = {
std::__1::atomic<unsigned int> = {
Value = 7
}
}
_flags = 32792
_occupied = 1
}
_originalPreoptCache = {
std::__1::atomic<preopt_cache_t *> = {
Value = 0x0001801800000007
}
}
}
}
(lldb) p $6.buckets() //获取buckets()
(bucket_t *) $7 = 0x00000001007070b0
(lldb) p $6.buckets()[1] //向下平移获取第二个buckers()
(bucket_t) $8 = {
_sel = {
std::__1::atomic<objc_selector *> = "" {
Value = ""
}
}
_imp = {
std::__1::atomic<unsigned long> = {
Value = 48648
}
}
}
(lldb) p $8.sel()
(SEL) $9 = "testfunction1"
(lldb) p $8.imp(nil,$0) //获取imp
(IMP) $10 = 0x0000000100003aa0 (KCObjcBuild`-[GyPerson testfunction1] at main.m:29)
(lldb) p/x 48648 //imp的value值16进制显示
(int) $11 = 0x0000be08
(lldb) p/x 0x0000be08 ^ 0x00000001000084a8 //异或运算,value^isa
(long) $12 = 0x0000000100003aa0 //$12==$1,验证异或运算
(lldb) p/x 4302336176 //_bucketsAndMaybeMask值16进制
(long) $13 = 0x00000001007070b0 //发现$13=$7?
分析:
isa
向下偏移16个字节
获取catch_t
bucket()
中存储中SEL和IMP
,并且存在多个IMP
是与isa
的异或运算,value^isa
既然bucket中存储着SEL和IMP,那成员变量_bucketsAndMaybeMask、_maybeMask、_occupied有什么作用呢?它和bucket有什么关系呢?LLDB调试不符合开发习惯
,而且每次添加方法都要重新获取bucket()很麻烦,试着用代码还原一下lldb测试过程。
代码还原探索cache_t
@interface GyPerson:NSObject
-(void)testfunction1;
-(void)testfunction2;
-(void)testfunction3;
-(void)testfunction4;
-(void)testfunction5;
@end
@implementation GyPerson
-(void)testfunction1{}
-(void)testfunction2{}
-(void)testfunction3{}
-(void)testfunction4{}
-(void)testfunction5{}
@end
typedef uint32_t mask_t;
struct gy_bucket_t{
SEL _sel;
IMP _imp;
};
struct gy_cache_t{
struct gy_bucket_t * _buckets;
mask_t _maybeMask;
uint16_t _flags;
uint16_t _occupied;
};
struct gy_class_data_bits_t{
uintptr_t bits;//8字节
};
struct gy_objc_class{
Class isa;
Class superclass;
struct gy_cache_t cache; // formerly cache pointer and vtable
struct gy_class_data_bits_t bits;
};
int main(int argc, const char * argv[]) {
@autoreleasepool {
GyPerson *p = [GyPerson alloc];
[p testfunction1];
// [p testfunction2];
// [p testfunction3];
// [p testfunction4];
// [p testfunction5];
Class gyclass=p.class;
struct gy_objc_class * g_class=(__bridge struct gy_objc_class *)gyclass;
NSLog(@"%hu-%u",g_class->cache._occupied,g_class->cache._maybeMask);
for(mask_t i=0;i<g_class->cache._maybeMask;i++){
struct gy_bucket_t bucket=g_class->cache._buckets[i];
NSLog(@"%@ - %p",NSStringFromSelector(bucket._sel),bucket._imp);
}
}
return 0;
}
输出如下:
1-3
(null) - 0x0
(null) - 0x0
testfunction1 - 0xbe10
上面的代码把注释掉的方法testfunction2、testfunction3、testfunction4、testfunction5打开,输出如下:
3-7
(null) - 0x0
(null) - 0x0
testfunction5 - 0xbe20
(null) - 0x0
testfunction4 - 0xbed0
(null) - 0x0
testfunction3 - 0xbec0
分析:
构造cache_t结构体
。参考源码构造objc_class结构体,即objc_class
结构体含有isa、superclass、cache、bits
成员变量。cache、bit结构体同样参考源码构造。- 构造
bucket_t
结构体主要指令环境,macos下SEL在前
IMP在后 - cache_t的构造中,第一个元素应该是bucketsAndMaybeMask,但由于
bucket()
的获取需要通过bucketsAndMaybeMask.load()
,再加上我们分析的就是bucket,所以直接替换成指向bucket_t的指针。 - 随着方法的追加,
_maybeMask
和_occupied的
值都发生了改变,说明maybeMask和occupied与缓存方法的个数有关。
调试总结:
- cache_t缓存着SEL和IMP,SEL、IMP存储在bucket中
bucket有很多个
,bucketsAndMaybeMask的值
是第一个bucket的指针地址
。
疑问?追加方法后打印buckets中SEL,发现testfunction1不见了
,也没有testfunction2?testfunction5方法的前面为什么有两个空的SEL
?
insert
要想弄清楚疑问,那么就要看它是如何插入缓存的,试着全局搜索一下insert,果然有相关方法。使用上面的GyPerson,初始化对象并添加testfunction1,testfunction2,testfunction3,testfunction4,testfunction5方法,断点调试
。
void cache_t::insert(SEL sel, IMP imp, id receiver)
{
runtimeLock.assertLocked();
// Never cache before +initialize is done
if (slowpath(!cls()->isInitialized())) {
return;
}
if (isConstantOptimizedCache()) {
_objc_fatal("cache_t::insert() called with a preoptimized cache for %s",
cls()->nameForLogging());
}
#if DEBUG_TASK_THREADS
return _collecting_in_critical();
#else
#if CONFIG_USE_CACHE_LOCK
mutex_locker_t lock(cacheUpdateLock);
#endif
ASSERT(sel != 0 && cls()->isInitialized());
//断点第一次进来occupied=0,newOccupied=1,第二次进来occupied=1
mask_t newOccupied = occupied() + 1;
//首次capacity()为0,即oldCapacity=0,capacity=0
unsigned oldCapacity = capacity(), capacity = oldCapacity;
//缓存为空,第一次进入时
if (slowpath(isConstantEmptyCache())) {
// Cache is read-only. Replace it.
//capacity=1左移2,即2^2=4,capacity=4
if (!capacity) capacity = INIT_CACHE_SIZE;
//开辟缓存空间,oldCapacity=0,capacity=4,freeOld是否释放旧内存
reallocate(oldCapacity, capacity, /* freeOld */false);
}
//newOccupied+1<=capacity * 3 / 4,真机CACHE_END_MARKER=1,非真机CACHE_END_MARKER=0
else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
//Cache is less than 3/4 or 7/8 full. Use it as-is.
}
#if CACHE_ALLOW_FULL_UTILIZATION
// capacity<=8&&newOccupied+1<=capacity CACHE_END_MARKER真机是0 非真机是1
else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
// Allow 100% cache utilization for small buckets. Use it as-is.
//如果允许存满
}
#endif
else {
//真机INIT_CACHE_SIZE=4 非真机INIT_CACHE_SIZE=2
//capacity有值capacity扩容两倍,否则capacity=INIT_CACHE_SIZE
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
if (capacity > MAX_CACHE_SIZE) {//如果capacity大于2^15 capacity=2^15
capacity = MAX_CACHE_SIZE;
}
reallocate(oldCapacity, capacity, true);
}
bucket_t *b = buckets();
mask_t m = capacity - 1;
//通过sel和capacity计算hash下标index
mask_t begin = cache_hash(sel, m);
mask_t i = begin;
// Scan for the first unused slot and insert there.
// There is guaranteed to be an empty slot.
do {
//index处bucket的sel为空的话 设置值,fastpath表示极有可能为真
if (fastpath(b[i].sel() == 0)) {
incrementOccupied();
b[i].set<Atomic, Encoded>(b, sel, imp, cls());
return;
}
//index处存在sel的话返回
if (b[i].sel() == sel) {
// The entry was added to the cache by some other thread
// before we grabbed the cacheUpdateLock.
return;
}
//fastpath极有可能为真 cache_next解决hash冲突
} while (fastpath((i = cache_next(i, m)) != begin));
bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}
断点调试分析(x86 macos下):
首次
插入testfunction1,capacity=4
,occupied=0,reallocate
开辟了4个bucket内存空间,再插入
方法occupied++
,occupied
值为方法数-1
,从0开始- 插入testfunction3第三个方法时,capacity扩容了两倍为8,
reallocate重新开辟
了8个bucket内存空间 - SEL,IMP插入是先用
cache_hash
(sel,capacity-1)计算出下标
,再根据下标
找到指定
的bucket,指定的bucket调用set()
方法插入SEL和IMP。这也是为什么buckets中方法不是按插入先后排序的
原因,buckets
本质是一个哈希链表结构
,通过hash计算出的下标存储。 - set()方法插入SEL和IMP时,
先判断bucket下有没有sel,如果有就返回,如果没有再插入
- bucket内存开辟规则:
首次
开辟4
个bucket内存空间3/4
规则,(occupied+1)+1<=capacity * 3 / 4
,当occupied为1时,即第二次插入方法不会开辟bucket内存空间允许存满时
,CACHE_ALLOW_FULL_UTILIZATION=1,CACHE_ALLOW_FULL_UTILIZATION变量为1时代表允许存满,比如开辟了4个bucket大小的内存,正好存满。两倍扩容规则
。大于3/4就扩容,occupied=2即第三次插入方法
。capacity有值时扩容2倍,没有值时扩容INIT_CACHE_SIZE,真机INIT_CACHE_SIZE=4,非真机INIT_CACHE_SIZE=2,最大扩容2^15
reallocate()
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
bucket_t *oldBuckets = buckets();//获取oldBuckets首地址
bucket_t *newBuckets = allocateBuckets(newCapacity);//获取newBuckets首地址
ASSERT(newCapacity > 0);
ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
//设置bucketsAndMaybeMask和maybeMask
setBucketsAndMask(newBuckets, newCapacity - 1);
if (freeOld) {//扩容freeOld=true释放旧内存
collect_free(oldBuckets, oldCapacity);
}
}
分析:
allocateBuckets开辟内存
,内存大小newCapacity * bucket_t
扩容时
freeOld=true,释放旧的buckets
。比如我们上面写测试方法,插入testfunction1和testfunction2并没有达到扩容条件,当插入testfunction3时,达到扩容条件,释放了旧的
的bucket。
allocateBuckets()
bucket_t *cache_t::allocateBuckets(mask_t newCapacity)
{
// Allocate one extra bucket to mark the end of the list.
// This can't overflow mask_t because newCapacity is a power of 2.
//开辟newCapacity个bucket内存
bucket_t *newBuckets = (bucket_t *)calloc(bytesForCapacity(newCapacity), 1);
//获取最后一个bucket指针地址
bucket_t *end = endMarker(newBuckets, newCapacity);
#if __arm__
// End marker's sel is 1 and imp points BEFORE the first bucket.
// This saves an instruction in objc_msgSend.
end->set<NotAtomic, Raw>(newBuckets, (SEL)(uintptr_t)1, (IMP)(newBuckets - 1), nil);
#else
// End marker's sel is 1 and imp points to the first bucket.
//最后一个bucket sel=1,占位
end->set<NotAtomic, Raw>(newBuckets, (SEL)(uintptr_t)1, (IMP)newBuckets, nil);
#endif
if (PrintCaches) recordNewCache(newCapacity);
return newBuckets;
}
分析:
- 开辟
多少个bucket
内存,是由Capacity值
决定的,首次
开辟Capacity为4
。 endMarker()
定位到最后一个bucket
,end->set(),把最后bucket的SEL设置为1
,IMP
设置为首个bucket
的地址,即最后一个bucket为占位bucket
,没有存储着真正的SEL和IMP。
setBucketsAndMask()
void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
#ifdef __arm__
// ensure other threads see buckets contents before buckets pointer
mega_barrier();
_bucketsAndMaybeMask.store((uintptr_t)newBuckets, memory_order_relaxed);
// ensure other threads see new buckets before new mask
mega_barrier();
_maybeMask.store(newMask, memory_order_relaxed);
_occupied = 0;
#elif __x86_64__ || i386
// ensure other threads see buckets contents before buckets pointer
//_bucketsAndMaybeMask存储着第一个bucket的地址
_bucketsAndMaybeMask.store((uintptr_t)newBuckets, memory_order_release);
// ensure other threads see new buckets before new mask
//maybemask=newCapacity-1
_maybeMask.store(newMask, memory_order_release);
_occupied = 0;
#else
#error Don't know how to do setBucketsAndMask on this architecture.
#endif
}
分析:
bucketsAndMaybeMask
存储着bucket首地址
maybemask
为Capacity-1
,首次插入方法,Capacity为4,maybemask即为3。maybemask
的值也是真正意义上bucket个数
,因为最后一个bucket是占位bucket。
bucket_t::set、cache_hash
//mask=capacity-1
static inline mask_t cache_hash(SEL sel, mask_t mask)
{
uintptr_t value = (uintptr_t)sel;
#if CONFIG_USE_PREOPT_CACHES //CONFIG_USE_PREOPT_CACHES=1 真机
value ^= value >> 7;//value=value^value右移动7位
#endif
return (mask_t)(value & mask);//value与mask与运算
}
#if CACHE_END_MARKER //非真机
static inline mask_t cache_next(mask_t i, mask_t mask) {
return (i+1) & mask;
}
#elif __arm64__ //真机
static inline mask_t cache_next(mask_t i, mask_t mask) {
return i ? i-1 : mask;
}
void bucket_t::set(bucket_t *base, SEL newSel, IMP newImp, Class cls)
{
ASSERT(_sel.load(memory_order_relaxed) == 0 ||
_sel.load(memory_order_relaxed) == newSel);
static_assert(offsetof(bucket_t,_imp) == 0 &&
offsetof(bucket_t,_sel) == sizeof(void *),
"bucket_t layout doesn't match arm64 bucket_t::set()");
// imp
uintptr_t encodedImp = (impEncoding == Encoded
? encodeImp(base, newImp, newSel, cls)
: (uintptr_t)newImp);
stp(encodedImp, (uintptr_t)newSel, this);
}
uintptr_t encodeImp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, IMP newImp, UNUSED_WITHOUT_PTRAUTH SEL newSel, Class cls) const {
/*
*省略部分代码
*/
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
//异或运算得到IMP
return (uintptr_t)newImp ^ (uintptr_t)cls;
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
return (uintptr_t)newImp;
#else
#error Unknown method cache IMP encoding.
#endif
}
分析:
cache_hash
主要是生成hash下标
,cache_next
主要是解决hash冲突
encodeImp
方法会对imp进行编码
(uintptr_t)newImp ^ (uintptr_t)cls即异或运算
。cls有值
imp进行编码
,cls没有值
imp相当于没编码
。cls即isa,上面lldb最后面的调试可以看出,imp编码后的value异或了isa,最后还原了IMP地址。
通过上面的分析我们再补充一个图方便理解:
总结:
cache_t
中bucketsAndMaybeMask的值是bucket()首地址,即第一个bucket地址cache_t
中bucket()结构体指针函数缓存着SEL与IMP- bucket个数等于maybeMask+1
最后一个
bucket是占位bucket,SEL=1,IMP=bucket首地址
bucket
开辟内存空间以3/4
为界限,首次开辟4个bucket内存大小。大于3/4时需要扩容,真机扩容2倍,非真机扩容8,最大扩容2的15次方
。bucket
存储是通过哈希计算下标方式存储
,所以SEL的存储不是连续的,本质是哈希链表结构。IMP
是imp^isa
异或运算的结果,结果再异或isa
可以还原imp指针
。