源码
objc_class结构体
struct objc_class : objc_object {
// Class ISA;
Class superclass;
cache_t cache; // formerly cache pointer and vtable
class_data_bits_t bits; // class_rw_t * plus custom rr/alloc flags
class_rw_t *data() const {
return bits.data();
}
......
}
cache_t结构体
struct cache_t {
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
explicit_atomic<struct bucket_t *> _buckets;
explicit_atomic<mask_t> _mask;
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
explicit_atomic<uintptr_t> _maskAndBuckets;
mask_t _mask_unused;
// How much the mask is shifted by.
static constexpr uintptr_t maskShift = 48;
// Additional bits after the mask which must be zero. msgSend
// takes advantage of these additional bits to construct the value
// `mask << 4` from `_maskAndBuckets` in a single instruction.
static constexpr uintptr_t maskZeroBits = 4;
// The largest mask value we can store.
static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
// The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
// Ensure we have enough bits for the buckets pointer.
static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
// _maskAndBuckets stores the mask shift in the low 4 bits, and
// the buckets pointer in the remainder of the value. The mask
// shift is the value where (0xffff >> shift) produces the correct
// mask. This is equal to 16 - log2(cache_size).
explicit_atomic<uintptr_t> _maskAndBuckets;
mask_t _mask_unused;
static constexpr uintptr_t maskBits = 4;
static constexpr uintptr_t maskMask = (1 << maskBits) - 1;
static constexpr uintptr_t bucketsMask = ~maskMask;
#else
#error Unknown cache mask storage type.
#endif
#if __LP64__
uint16_t _flags;
#endif
uint16_t _occupied;
public:
static bucket_t *emptyBuckets();
struct bucket_t *buckets();
mask_t mask();
mask_t occupied();
void incrementOccupied();
void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);
void initializeToEmpty();
unsigned capacity();
bool isConstantEmptyCache();
bool canBeFreed();
......
};
整体结构
流程分析
部分重要变量
_buckets
_buckets是struct bucket_t类型的散列表,方法的缓存(以散列表的形式存储bucket_t)
散列表(哈希表)是以空间换时间,例如:刚开始为
cache_t分配一定的内存如10, 当内存不够用时内存扩大2倍, 依次类推。左边是索引, 右边是
bucket_t结构体
_mask
_mask是指掩码数据,用于在哈希算法或者哈希冲突算法中计算哈希下标,其中mask等于capacity- 1(_buckets的数组长度-1(容量的临界值))
为什么按位&
_mask?
按位与可保证得到的值<=_mask,这样就不会超出分配的空间
_occupied
表示散列表中已占用的容量
散列表存储原理
- 初始时, 为对象的
cach_t分配一个空间, 值为NULL - 调用方法时, 为对象发送一个
SEL消息, 如@selector(test), 将这个方法缓存 - 系统用
SEL与_mask作按位与计算:@selector(test) & _mask,假设其值==2, - 检查索引2对应的空间是否为
NULL,如果为NULL就将这个bucket_t缓存在索引2对应空间 - 如果不为空,索引减1,再检查是否为
NULL, 依次类推.如果索引<0,则使索引 =_mask-1,直至找到索引对应空间为NULL,再缓存
部分重要函数
incrementOccupied()
void cache_t::incrementOccupied()
{
_occupied++;
}
仅且会在void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)函数中调用\
void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)
理解为cache_t的插入,而cache中存储的就是sel-imp,所以cache的原理从insert方法开始
ALWAYS_INLINE
void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)
{
#if CONFIG_USE_CACHE_LOCK
cacheUpdateLock.assertLocked();
#else
runtimeLock.assertLocked();
#endif
ASSERT(sel != 0 && cls->isInitialized());
// Use the cache as-is if it is less than 3/4 full
mask_t newOccupied = occupied() + 1;
unsigned oldCapacity = capacity(), capacity = oldCapacity;
if (slowpath(isConstantEmptyCache())) {
// Cache is read-only. Replace it.
if (!capacity) capacity = INIT_CACHE_SIZE;
reallocate(oldCapacity, capacity, /* freeOld */false);
}
else if (fastpath(newOccupied <= capacity / 4 * 3)) {
// Cache is less than 3/4 full. Use it as-is.
}
else {
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
if (capacity > MAX_CACHE_SIZE) {
capacity = MAX_CACHE_SIZE;
}
reallocate(oldCapacity, capacity, true);
}
bucket_t *b = buckets();
mask_t m = capacity - 1;
mask_t begin = cache_hash(sel, m);
mask_t i = begin;
// Scan for the first unused slot and insert there.
// There is guaranteed to be an empty slot because the
// minimum size is 4 and we resized at 3/4 full.
do {
if (fastpath(b[i].sel() == 0)) {
incrementOccupied();
b[i].set<Atomic, Encoded>(sel, imp, cls);
return;
}
if (b[i].sel() == sel) {
// The entry was added to the cache by some other thread
// before we grabbed the cacheUpdateLock.
return;
}
} while (fastpath((i = cache_next(i, m)) != begin));
cache_t::bad_cache(receiver, (SEL)sel, cls);
}
主要分为以下几部分
1、计算出当前的缓存占用量
2、根据缓存占用量判断执行的操作
3、针对需要存储的bucket进行内部imp和sel赋值
计算出当前的缓存占用量
根据occupied的值计算出当前的缓存占用量,当属性未赋值及无方法调用时,此时的occupied()为0,而newOccupied为1,如下所示
mask_t newOccupied = occupied() + 1;
alloc申请空间时,此时的对象已经创建,如果再调用init方法,occupied也会+1
当有属性赋值时,会隐式调用set方法,occupied也会增加,即有几个属性赋值,occupied就会在原有的基础上加几个
当有方法调用时,occupied也会增加,即有几次调用,occupied就会在原有的基础上加几个
根据缓存占用量判断执行的操作
如果是第一次创建,则默认开辟4个
if (slowpath(isConstantEmptyCache())) {
// Cache is read-only. Replace it.
// INIT_CACHE_SIZE = (1<<2) = 4
if (!capacity) capacity = INIT_CACHE_SIZE;
reallocate(oldCapacity, capacity, /* freeOld */false);
}
如果缓存占用量小于等于3/4,则不作任何处理
else if (fastpath(newOccupied <= capacity / 4 * 3)) {
// Cache is less than 3/4 full. Use it as-is.
}
如果缓存占用量超过3/4,则需要进行两倍扩容以及重新开辟空间
else {
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
if (capacity > MAX_CACHE_SIZE) {
capacity = MAX_CACHE_SIZE;
}
reallocate(oldCapacity, capacity, true);
}
reallocate方法:开辟空间
ALWAYS_INLINE
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
bucket_t *oldBuckets = buckets();
bucket_t *newBuckets = allocateBuckets(newCapacity);
// Cache's old contents are not propagated.
// This is thought to save cache memory at the cost of extra cache fills.
// fixme re-measure this
ASSERT(newCapacity > 0);
ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
setBucketsAndMask(newBuckets, newCapacity - 1);
if (freeOld) {
cache_collect_free(oldBuckets, oldCapacity);
}
}
allocateBuckets方法:向系统申请开辟内存,即开辟bucket,此时的bucket只是一个临时变量
setBucketsAndMask方法:将临时的bucket存入缓存中
cache_collect_free方法:清理之前的缓存
static void cache_collect_free(bucket_t *data, mask_t capacity)
{
#if CONFIG_USE_CACHE_LOCK
cacheUpdateLock.assertLocked();
#else
runtimeLock.assertLocked();
#endif
if (PrintCaches) recordDeadCache(capacity);
_garbage_make_room ();
garbage_byte_size += cache_t::bytesForCapacity(capacity);
garbage_refs[garbage_count++] = data;
cache_collect(false);
}
如果是第一次,需要分配回收空间
如果不是第一次,则将内存段加大,即原有内存*2
记录存储这次的bucket
cache_collect方法:垃圾回收,清理旧的bucket
针对需要存储的bucket进行内部imp和sel赋值
- 如果哈希下标的位置未存储sel,即该下标位置获取sel等于0,此时将sel-imp存储进去,并将occupied占用大小加1
- 如果当前哈希下标存储的sel等于即将插入的sel,则直接返回
- 如果当前哈希下标存储的sel不等于即将插入的sel,则重新经过cache_next方法 即哈希冲突算法,重新进行哈希计算,得到新的下标,再去对比进行存储
cache_hash哈希算法
static inline mask_t cache_hash(SEL sel, mask_t mask)
{
return (mask_t)(uintptr_t)sel & mask;
}
cache_next哈希冲突算法
为什么有-1的算法,也是因为按位与,因为不同的值&_mask,可能结果相同。如果已经被占了就-1
static inline mask_t cache_next(mask_t i, mask_t mask) {
return i ? i-1 : mask;
}
总结
1、_mask是指掩码数据,用于在哈希算法或者哈希冲突算法中计算哈希下标,其中mask等于capacity - 1
2、_occupied表示哈希表中sel-imp的占用大小(即可以理解为分配的内存中已经存储了sel-imp的的个数)
init会导致occupied变化- 属性赋值,也会隐式调用
set方法,导致occupied变化 - 方法调用,会导致
occupied变化 3、cache初始化时,分配的空间是4个,随着方法调用的增多,当存储的sel-imp个数,即newOccupied+CACHE_END_MARKER(等于1)的和超过总容量的3/4,例如有4个时,当occupied等于2时,就需要对cache的内存进行两倍扩容
4、在哈希这种数据结构里面,有一个概念用来表示空位的多少叫做装载因子——装载因子越大,说明空闲位置越少,冲突越多,散列表的性能会下降。负载因子是3/4的时候,空间利用率比较高,而且避免了相当多的Hash冲突,提升了空间效率
5、sel-imp的存储是通过哈希算法计算下标的,其计算的下标有可能已经存储了sel,所以又需要通过哈希冲突算法重新计算哈希下标,所以导致下标是随机的,并不是固定的。