在 类的原理 中我们查看 objc_class 看到了 cache 成员,这次我们就来看看其中的结构
1、cache结构
1.1、cache_t
struct cache_t {
private:
explicit_atomic<uintptr_t> _bucketsAndMaybeMask; //存bucket_t首地址 ,8字节
union {
struct {
//_maybeMask (bucket_t长度 - 1)
explicit_atomic<mask_t> _maybeMask; //bucket_t长度 - 1 ,4字节
#if __LP64__ //long point
uint16_t _flags; //2字节
#endif
uint16_t _occupied; //占用长度 ,2字节
};
explicit_atomic<preopt_cache_t *> _originalPreoptCache; //8字节
};
......
//有SEL、IMP重点关注 insert 方法
void insert(SEL sel, IMP imp, id receiver);
void copyCacheNolock(objc_imp_cache_entry *buffer, int len);
void destroy();
void eraseNolock(const char *func);
static void init();
static void collectNolock(bool collectALot);
static size_t bytesForCapacity(uint32_t cap);
......
}
_bucketsAndMaybeMask: bucket_t首地址_maybeMask: bucket_t容器容量_occupied: bucket_t存储方法的数量
1.2、insert方法 : cache -- 缓存方法(核心)
- 通过该方法将方法进行缓存
fastpath:大概率执行,slowpath:小概率执行
void cache_t::insert(SEL sel, IMP imp, id receiver)
{
runtimeLock.assertLocked();
// Never cache before +initialize is done
if (slowpath(!cls()->isInitialized())) {
return;
}
if (isConstantOptimizedCache()) {
_objc_fatal("cache_t::insert() called with a preoptimized cache for %s",
cls()->nameForLogging());
}
#if DEBUG_TASK_THREADS
return _collecting_in_critical();
#else
#if CONFIG_USE_CACHE_LOCK
mutex_locker_t lock(cacheUpdateLock);
#endif
ASSERT(sel != 0 && cls()->isInitialized());
//此处开始重点关注
// Use the cache as-is if until we exceed our expected fill ratio.
mask_t newOccupied = occupied() + 1;
unsigned oldCapacity = capacity(), capacity = oldCapacity;
//在arm64架构下开辟一个长度为2的桶子,在x86_64架构下开辟一个长度为4的桶子
if (slowpath(isConstantEmptyCache())) {
// Cache is read-only. Replace it.
//根据架构初始化得到要初始化的桶子大小,arm64下为2,x86_64下为4
if (!capacity) capacity = INIT_CACHE_SIZE;
//重新开辟 bucket_t 大小
reallocate(oldCapacity, capacity, /* freeOld */false);
}
//扩容条件,在arm64架构若缓存的大小小于等于桶子长度的7/8、在x86_64架构若缓存的大小小于等于桶子长度的3/4,则什么都不做
else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
// Cache is less than 3/4 or 7/8 full. Use it as-is.
}
#if CACHE_ALLOW_FULL_UTILIZATION
else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
// Allow 100% cache utilization for small buckets. Use it as-is.
}
#endif
else {
//进行扩容
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
if (capacity > MAX_CACHE_SIZE) {
capacity = MAX_CACHE_SIZE;
}
reallocate(oldCapacity, capacity, true);
}
//后续对 bucket_t 生成的 b 进行了一系列do while操作,因此重点关注
bucket_t *b = buckets();
mask_t m = capacity - 1;
//cache_hash() 传入bucket_t长度 - 1,返回的值也不会超过 bucket_t长度 - 1
mask_t begin = cache_hash(sel, m);
//插入位置 i 值通过 sel、capacity(bucket_t容积)计算得来(m 的类型 mask_t 为 uint32_t 的别名)
mask_t i = begin;
// Scan for the first unused slot and insert there.
// There is guaranteed to be an empty slot.
do {
//若没有缓存方法则缓存
if (fastpath(b[i].sel() == 0)) {
incrementOccupied();
//bucket_t 的 set 方法(保存SEL与IMP,保存位置为 i)
b[i].set<Atomic, Encoded>(b, sel, imp, cls());
return;
}
//若已缓存方法则跳过
if (b[i].sel() == sel) {
// The entry was added to the cache by some other thread
// before we grabbed the cacheUpdateLock.
return;
}
//如果存在 hash冲突,那么就将内容存到下一个下标中
} while (fastpath((i = cache_next(i, m)) != begin));
bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}
- 在上边源码中,我们主要对几个重要内容进行解释
1.2.1、INIT_CACHE_SIZE
- 获取 bucket_t 初始容量
/* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
enum {
#if CACHE_END_MARKER || (__arm64__ && !__LP64__)
// When we have a cache end marker it fills a bucket slot, so having a
// initial cache size of 2 buckets would not be efficient when one of the
// slots is always filled with the end marker. So start with a cache size
// 4 buckets.
INIT_CACHE_SIZE_LOG2 = 2,
#else
// Allow an initial bucket size of 2 buckets, since a large number of
// classes, especially metaclasses, have very few imps, and we support
// the ability to fill 100% of the cache before resizing.
INIT_CACHE_SIZE_LOG2 = 1,
#endif
INIT_CACHE_SIZE = (1 << INIT_CACHE_SIZE_LOG2),
MAX_CACHE_SIZE_LOG2 = 16,
MAX_CACHE_SIZE = (1 << MAX_CACHE_SIZE_LOG2),
FULL_UTILIZATION_CACHE_SIZE_LOG2 = 3,
FULL_UTILIZATION_CACHE_SIZE = (1 << FULL_UTILIZATION_CACHE_SIZE_LOG2),
};
可以看出,INIT_CACHE_SIZE 的大小受 CACHE_END_MARKER 影响,而 CACHE_END_MARKER 是由架构类型决定的
#if __arm__ || __x86_64__ || __i386__
// objc_msgSend has few registers available.
// Cache scan increments and wraps at special end-marking bucket.
#define CACHE_END_MARKER 1
// Historical fill ratio of 75% (since the new objc runtime was introduced).
static inline mask_t cache_fill_ratio(mask_t capacity) {
return capacity * 3 / 4;
}
#elif __arm64__ && !__LP64__
// objc_msgSend has lots of registers available.
// Cache scan decrements. No end marker needed.
#define CACHE_END_MARKER 0
// Historical fill ratio of 75% (since the new objc runtime was introduced).
static inline mask_t cache_fill_ratio(mask_t capacity) {
return capacity * 3 / 4;
}
#elif __arm64__ && __LP64__
// objc_msgSend has lots of registers available.
// Cache scan decrements. No end marker needed.
#define CACHE_END_MARKER 0
// Allow 87.5% fill ratio in the fast path for all cache sizes.
// Increasing the cache fill ratio reduces the fragmentation and wasted space
// in imp-caches at the cost of potentially increasing the average lookup of
// a selector in imp-caches by increasing collision chains. Another potential
// change is that cache table resizes / resets happen at different moments.
static inline mask_t cache_fill_ratio(mask_t capacity) {
return capacity * 7 / 8;
}
从中可知,在 arm(不同于arm64架构,为32位架构)、x86_64、i386 架构下 CACHE_END_MARKER 值为 1,arm64 架构下值为 0 ,因此 INIT_CACHE_SIZE 在arm64架构下值为 1 左移1位得到 2,在x86_64等架构下值为 1 左移2位得到 4(这段代码也透露了扩容规则)
1.2.2、reallocate :
- 再开辟 bucket_t容量,源码中如果进行了扩容会传入 true,释放原有桶子内容
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
bucket_t *oldBuckets = buckets();
bucket_t *newBuckets = allocateBuckets(newCapacity);
// Cache's old contents are not propagated.
// This is thought to save cache memory at the cost of extra cache fills.
// fixme re-measure this
ASSERT(newCapacity > 0);
ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
setBucketsAndMask(newBuckets, newCapacity - 1);
//扩容后释放原有的桶子
if (freeOld) {
collect_free(oldBuckets, oldCapacity);
}
}
1.2.3、cache_hash()
- 获取插入位置
static inline mask_t cache_hash(SEL sel, mask_t mask)
{
//将方法转换为一个值(数值较大)
uintptr_t value = (uintptr_t)sel;
#if CONFIG_USE_PREOPT_CACHES
value ^= value >> 7;
#endif
//value 和 mask 取与,因为value值很大,结果不会超过 mask
return (mask_t)(value & mask);
}
1.2.4、cache_next()
- 用于解决哈希冲突时获取新的插入索引值
#if CACHE_END_MARKER
static inline mask_t cache_next(mask_t i, mask_t mask) {
return (i+1) & mask;
}
#elif __arm64__
static inline mask_t cache_next(mask_t i, mask_t mask) {
return i ? i-1 : mask;
}
1.2.5、bucket_t
- 存放 imp指针 与 sel方法名 的容器
struct bucket_t {
private:
// IMP-first is better for arm64e ptrauth and no worse for arm64.
// SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
//bucket_t 中存放了 imp指针 和 sel方法名
explicit_atomic<uintptr_t> _imp;
explicit_atomic<SEL> _sel;
#else
explicit_atomic<SEL> _sel;
explicit_atomic<uintptr_t> _imp;
#endif
// Compute the ptrauth signing modifier from &_imp, newSel, and cls.
uintptr_t modifierForSEL(bucket_t *base, SEL newSel, Class cls) const {
return (uintptr_t)base ^ (uintptr_t)newSel ^ (uintptr_t)cls;
}
// Sign newImp, with &_imp, newSel, and cls as modifiers.
uintptr_t encodeImp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, IMP newImp, UNUSED_WITHOUT_PTRAUTH SEL newSel, Class cls) const {
if (!newImp) return 0;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
return (uintptr_t)
ptrauth_auth_and_resign(newImp,
ptrauth_key_function_pointer, 0,
ptrauth_key_process_dependent_code,
modifierForSEL(base, newSel, cls));
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
return (uintptr_t)newImp ^ (uintptr_t)cls;
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
return (uintptr_t)newImp;
#else
#error Unknown method cache IMP encoding.
#endif
}
public:
static inline size_t offsetOfSel() { return offsetof(bucket_t, _sel); }
//SEL方法名
inline SEL sel() const { return _sel.load(memory_order_relaxed); }
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
#define MAYBE_UNUSED_ISA
#else
#define MAYBE_UNUSED_ISA __attribute__((unused))
#endif
inline IMP rawImp(MAYBE_UNUSED_ISA objc_class *cls) const {
uintptr_t imp = _imp.load(memory_order_relaxed);
if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
imp ^= (uintptr_t)cls;
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
#else
#error Unknown method cache IMP encoding.
#endif
return (IMP)imp;
}
//IMP指针 传入bucket_t 和 class类
inline IMP* imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const {
uintptr_t imp = _imp.load(memory_order_relaxed);
if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
SEL sel = _sel.load(memory_order_relaxed);
return (IMP)
ptrauth_auth_and_resign((const void *)imp,
ptrauth_key_process_dependent_code,
modifierForSEL(base, sel, cls),
ptrauth_key_function_pointer, 0);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
return (IMP)(imp ^ (uintptr_t)cls);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
return (IMP)imp;
#else
#error Unknown method cache IMP encoding.
#endif
}
template <Atomicity, IMPEncoding>
void set(bucket_t *base, SEL newSel, IMP newImp, Class cls);
};
-
bucket_t 的
.set方法 :保存SEL与IMP//保存方法 #if __arm64__ ...... #else //模板创建,传入Atomicity原子性、IMPEncoding编码 template<Atomicity atomicity, IMPEncoding impEncoding> void bucket_t::set(bucket_t *base, SEL newSel, IMP newImp, Class cls) { ASSERT(_sel.load(memory_order_relaxed) == 0 || _sel.load(memory_order_relaxed) == newSel); // objc_msgSend uses sel and imp with no locks. // It is safe for objc_msgSend to see new imp but NULL sel // (It will get a cache miss but not dispatch to the wrong place.) // It is unsafe for objc_msgSend to see old imp and new sel. // Therefore we write new imp, wait a lot, then write new sel. //通过 encodeImp() 方法 将 sel 重新签名 uintptr_t newIMP = (impEncoding == Encoded ? encodeImp(base, newImp, newSel, cls) : (uintptr_t)newImp); if (atomicity == Atomic) { //.store 从内存中写数据 _imp.store(newIMP, memory_order_relaxed); //.load 从内存中读数据 if (_sel.load(memory_order_relaxed) != newSel) { #ifdef __arm__ mega_barrier(); _sel.store(newSel, memory_order_relaxed); #elif __x86_64__ || __i386__ _sel.store(newSel, memory_order_release); #else #error Don't know how to do bucket_t::set on this architecture. #endif } } else { _imp.store(newIMP, memory_order_relaxed); _sel.store(newSel, memory_order_relaxed); } } -
读 bucket_t 中缓存的方法(通过+1、+2向下读其他缓存方法的 sel 与 imp指针)
总结
- INIT_CACHE_SIZE : 根据架构有不同的默认初始 bucket_t容量值,计算是否要扩容与扩容后大小
- reallocate() :
初始化/扩容重创建bucket_t,根据传入参数 释放旧的bucket_t - cache_hash() : 计算插入位置的索引
- bucket_t 通过 set 方法存储方法(
如果通过runtime进行了方法交换,会存sel与交换后的imp指针) - 源码中 capacity 与 occupied 的区别
capacity :容量,容纳能力
occupied :实际占用的容量 例如,一个10L的瓶子,装了5L水,那么capacity = 10,occupied = 5.
2、cache扩容
- 根据 1.2.1 中的源码可知,先判断 arm64架构 下占用空间
大于分配的空间的7/8,x86_64等架构 下占用空间大于等于分配空间的3/4 - 再判断容量,若 小于8时,查看存入当前 sel 后是否超过 当前容量,如果超过就扩容,若 大于8时 直接扩容
- 重新创建 buckets_t,更新
cache的_bucketsAndMaybeMask和_maybeMask,并释放旧的 buckets_t
总结
-
bucket_t 是一个散列表,通过
sel& capacity - 1哈希算法定位存储位置 -
为什么要释放旧的 buckets_t :
- 由于哈希表的特性 -- 地址映射,当每次
总表扩容时所有元素的映射都会失效,因为总容量变了,下标哈希结果也会改变
- 由于哈希表的特性 -- 地址映射,当每次
-
bucket_t 中读已经调用的方法可能读不到,是因为 方法先被执行缓存了,但后续系统调用了其他 responseToSelector: 、class、retain 或者 dealloc 等方法,触发了扩容,导致先前缓存的bucket_t被释放,缓存的方法会一同消失