OC方法调用之objc_msgSend慢速查找

649 阅读11分钟

在前文# OC方法调用之objc_msgSend快速查找中我们分析了objc_msgSend的快速查找流程,即从缓存cache中查找到imp并执行之,本文我们来分析没有缓存过的方法的查找流程,称之为慢速查找

承接前文,在快速查找没找到会执行MissLabelDynamic

.macro CacheLookup Mode, Function, MissLabelDynamic, MissLabelConstant

// 快速查找流程
.......

// 快速查找没找到
LLookupEnd\Function:
LLookupRecover\Function:

// 执行MissLabelDynamic方法
b \MissLabelDynamic

MissLabelDynamicCacheLookup方法传入的第三个参数,即__objc_msgSend_uncached

......
LGetIsaDone:
    // calls imp or objc_msgSend_uncached
    CacheLookup NORMAL, _objc_msgSend, __objc_msgSend_uncached
......

__objc_msgSend_uncached

STATIC_ENTRY __objc_msgSend_uncached
UNWIND __objc_msgSend_uncached, FrameWithNoSaves

// THIS IS NOT A CALLABLE C FUNCTION
// Out-of-band p15 is the class to search

// 这里只是调用了这两个方法
MethodTableLookup
TailCallFunctionPointer x17

END_ENTRY __objc_msgSend_uncached

MethodTableLookup

.macro MethodTableLookup
    SAVE_REGS MSGSEND
    // lookUpImpOrForward(obj, sel, cls, LOOKUP_INITIALIZE | LOOKUP_RESOLVER)
    // receiver and selector already in x0 and x1
    
    // 通过前文可知x16存储的是指向类对象的指针
    mov x2, x16
    mov x3, #3
    
    // 执行方法,如果_lookUpImpOrForward方法有返回值的话
    // 会存储到x0寄存器中(x0寄存器用来存储函数返回值)
    bl _lookUpImpOrForward


    // IMP in x0 
    // 这里提示_lookUpImpOrForward返回的就是imp
    // 这里将_lookUpImpOrForward函数返回值存储到x17中
    mov x17, x0

    RESTORE_REGS MSGSEND

.endmacro

TailCallFunctionPointer

.macro TailCallFunctionPointer
    // $0 = function pointer value
    br $0
.endmacro

MethodTableLookup中将imp存储到x17中并作为参数传入到TailCallFunctionPointer方法中来,即$0 = x17 = imp,这里执行了查找到的imp

通过这两个方法可知方法的慢速查找是在_lookUpImpOrForward方法进行的,也就是我们要研究的重点

_lookUpImpOrForward

这里终于回到了c++的怀抱,相比于汇编语言c++真是太友好了😄

NEVER_INLINE

IMP lookUpImpOrForward(id inst, SEL sel, Class cls, int behavior)
{
    // 这里返回一个异常方法 unrecognized selector sent to...
    const IMP forward_imp = (IMP)_objc_msgForward_impcache;
    IMP imp = nil;
    Class curClass;

    runtimeLock.assertUnlocked();

    // 类对象还没有初始化就开始使用,出问题了
    if (slowpath(!cls->isInitialized())) {
        // The first message sent to a class is often +new or +alloc, or +self
        // which goes through objc_opt_* or various optimized entry points.
        //
        // However, the class isn't realized/initialized yet at this point,
        // and the optimized entry points fall down through objc_msgSend,
        // which ends up here.
        //
        // We really want to avoid caching these, as it can cause IMP caches
        // to be made with a single entry forever.
        //
        // Note that this check is racy as several threads might try to
        // message a given class for the first time at the same time,
        // in which case we might cache anyway.
        behavior |= LOOKUP_NOCACHE;
    }

    // runtimeLock is held during isRealized and isInitialized checking
    // to prevent races against concurrent realization.
    // runtimeLock is held during method search to make
    // method-lookup + cache-fill atomic with respect to method addition.
    // Otherwise, a category could be added but ignored indefinitely because
    // the cache was re-filled with the old value after the cache flush on
    // behalf of the category.
    // 为了防止出现多线程问题,这里先锁住
    runtimeLock.lock();

    // We don't want people to be able to craft a binary blob that looks like
    // a class but really isn't one and do a CFI attack.
    //
    // To make these harder we want to make sure this is a class that was
    // either built into the binary or legitimately registered through
    // objc_duplicateClass, objc_initializeClassPair or objc_allocateClassPair.
    // 检查一下这个类是不是已经在注册表中,防止伪装类
    // 关于类的注册我们在后面还会大篇幅的研究类的加载流程,
    // 这里只需要知道是校验类的合法性就可以了
    checkIsKnownClass(cls);

    // 完善类和元类的继承链,当前类中查找不到方法的时候会去父类中查找
    cls = realizeAndInitializeIfNeeded_locked(inst, cls, behavior & LOOKUP_INITIALIZE);
    // runtimeLock may have been dropped but is now locked again
    runtimeLock.assertLocked();
    
    // 到这里类的合法性验证通过了,类的继承链完善了
    curClass = cls;

    // The code used to lookup the class's cache again right after
    // we take the lock but for the vast majority of the cases
    // evidence shows this is a miss most of the time, hence a time loss.
    //
    // The only codepath calling into this without having performed some
    // kind of cache lookup is class_getInstanceMethod().

    // 这里是慢速查找的核心代码
    // 这里是一个for循环,但是没给终止条件,所以是一个死循环
    // 只能在for循环里面来终止,例如break、goto、return等
    for (unsigned attempts = unreasonableClassCount();;) {
    
        // 这里还是先去查一遍缓存,执行过程中缓存也在改变
        if (curClass->cache.isConstantOptimizedCache(/* strict */true)) {
#if CONFIG_USE_PREOPT_CACHES
            imp = cache_getImp(curClass, sel);
            if (imp) goto done_unlock;
            curClass = curClass->cache.preoptFallbackClass();
#endif
        } else {
            // curClass method list.
            // 先查找自己类的method list
            Method meth = getMethodNoSuper_nolock(curClass, sel);
            if (meth) {
                // 如果查到了取出imp,跳转到done继续执行
                imp = meth->imp(false);
                goto done;
            }

            // 在当前类没找到方法并且父类是空
            // 说明在整个继承链中都没有找到方法
            // 注意这里curClass已经指向了父类
            if (slowpath((curClass = curClass->getSuperclass()) == nil)) {
                // No implementation found, and method resolver didn't help.
                // Use forwarding.
                
                // forward_imp是报错的那个方法
                // 报unrecognized selector sent to...错误
                imp = forward_imp;
                break;
            }
        }

        // Halt if there is a cycle in the superclass chain.
        // 遍历完了也找不到
        if (slowpath(--attempts == 0)) {
            _objc_fatal("Memory corruption in class list.");
        }

        // Superclass cache.
        // 
        // 此时的curClass已经指向了父类
        // 去父类缓存中查找,查不到就会去方法列表中查找,重复前文和本文中的流程
        imp = cache_getImp(curClass, sel);
        
        // 报错了
        if (slowpath(imp == forward_imp)) {
            // Found a forward:: entry in a superclass.
            // Stop searching, but don't cache yet; call method
            // resolver for this class first.
            break;
        }
        
        // 找到了方法的imp
        if (fastpath(imp)) {
            // Found the method in a superclass. Cache it in this class.
            goto done;
        }
    }

    // No implementation found. Try method resolver once.
    // 这里还不知道什么意思,先不分析,先看done
    if (slowpath(behavior & LOOKUP_RESOLVER)) {
        behavior ^= LOOKUP_RESOLVER;
        return resolveMethod_locked(inst, sel, cls, behavior);
    }

 done:
    if (fastpath((behavior & LOOKUP_NOCACHE) == 0)) {
#if CONFIG_USE_PREOPT_CACHES
        while (cls->cache.isConstantOptimizedCache(/* strict */true)) {
            cls = cls->cache.preoptFallbackClass();
        }
#endif
        // 方法查到之后进行缓存,下次查找就可以在cache中找到了
        log_and_fill_cache(cls, imp, sel, inst, curClass);
    }
 done_unlock:
    runtimeLock.unlock();
    if (slowpath((behavior & LOOKUP_NIL) && imp == forward_imp)) {
        return nil;
    }
    return imp;
}

_objc_msgForward_impcache

/********************************************************************
*
* id _objc_msgForward(id self, SEL _cmd,...);
*
* _objc_msgForward is the externally-callable
*   function returned by things like method_getImplementation().
* _objc_msgForward_impcache is the function pointer actually stored in
*   method caches.
*
********************************************************************/
STATIC_ENTRY __objc_msgForward_impcache
    // No stret specialization.
    b __objc_msgForward
END_ENTRY __objc_msgForward_impcache

__objc_msgForward

ENTRY __objc_msgForward
    adrp x17, __objc_forward_handler@PAGE
    ldr p17, [x17, __objc_forward_handler@PAGEOFF]
    TailCallFunctionPointer x17
END_ENTRY __objc_msgForward

_objc_forward_handler

// Default forward handler halts the process.
__attribute__((noreturn, cold)) void
objc_defaultForwardHandler(id self, SEL sel)
{
    _objc_fatal("%c[%s %s]: unrecognized selector sent to instance %p "
                "(no message forward handler is installed)", 
                class_isMetaClass(object_getClass(self)) ? '+' : '-', 
                object_getClassName(self), sel_getName(sel), self);
}
void *_objc_forward_handler = (void*)objc_defaultForwardHandler;

到这里可知forward_imp = (IMP)_objc_msgForward_impcache中的forward_imp是一个报错的方法

checkIsKnownClass

/***********************************************************************
* checkIsKnownClass
* Checks the given class against the list of all known classes. Dies
* with a fatal error if the class is not known.
* Locking: runtimeLock must be held by the caller.
**********************************************************************/
ALWAYS_INLINE
static void
checkIsKnownClass(Class cls)
{
    if (slowpath(!isKnownClass(cls))) {
        _objc_fatal("Attempt to use unknown class %p.", cls);
    }
}

isKnownClass

/***********************************************************************
* isKnownClass
* Return true if the class is known to the runtime (located within the
* shared cache, within the data segment of a loaded image, or has been
* allocated with obj_allocateClassPair).
*
* The result of this operation is cached on the class in a "witness"
* value that is cheaply checked in the fastpath.
**********************************************************************/
ALWAYS_INLINE
static bool
isKnownClass(Class cls)
{
    if (fastpath(objc::dataSegmentsRanges.contains(cls->data()->witness, (uintptr_t)cls))) {
        return true;
    }
    auto &set = objc::allocatedClasses.get();
    return set.find(cls) != set.end() || dataSegmentsContain(cls);
}

这里判断类是否被注册过,后面研究类的加载流程的时候再详细讨论

realizeAndInitializeIfNeeded_locked

/***********************************************************************
* realizeAndInitializeIfNeeded_locked
* Realize the given class if not already realized, and initialize it if
* not already initialized.
* inst is an instance of cls or a subclass, or nil if none is known.
* cls is the class to initialize and realize.
* initializer is true to initialize the class, false to skip initialization.
**********************************************************************/
static Class
realizeAndInitializeIfNeeded_locked(id inst, Class cls, bool initialize)
{
    runtimeLock.assertLocked();
    if (slowpath(!cls->isRealized())) {
        // 如果类没有被实现走这个方法
        cls = realizeClassMaybeSwiftAndLeaveLocked(cls, runtimeLock);
        // runtimeLock may have been dropped but is now locked again
    }

    if (slowpath(initialize && !cls->isInitialized())) {
        // 如果类已经实现了,但是还没有接收过消息,
        // 也就是还没有被使用过,走这个方法
        cls = initializeAndLeaveLocked(cls, inst, runtimeLock);
        // runtimeLock may have been dropped but is now locked again
        // If sel == initialize, class_initialize will send +initialize and
        // then the messenger will send +initialize again after this
        // procedure finishes. Of course, if this is not being called
        // from the messenger then it won't happen. 2778172
    }
    return cls;
}

realizeClassMaybeSwiftAndLeaveLocked

static Class
realizeClassMaybeSwiftAndLeaveLocked(Class cls, mutex_t& lock)
{
    return realizeClassMaybeSwiftMaybeRelock(cls, lock, true);
}

realizeClassMaybeSwiftMaybeRelock


/***********************************************************************
* realizeClassMaybeSwift (MaybeRelock / AndUnlock / AndLeaveLocked)
* Realize a class that might be a Swift class.
* Returns the real class structure for the class. 
* Locking: 
*   runtimeLock must be held on entry
*   runtimeLock may be dropped during execution
*   ...AndUnlock function leaves runtimeLock unlocked on exit
*   ...AndLeaveLocked re-acquires runtimeLock if it was dropped
* This complication avoids repeated lock transitions in some cases.
**********************************************************************/
static Class
realizeClassMaybeSwiftMaybeRelock(Class cls, mutex_t& lock, bool leaveLocked)
{
    lock.assertLocked();

    if (!cls->isSwiftStable_ButAllowLegacyForNow()) {
        // Non-Swift class. Realize it now with the lock still held.
        // fixme wrong in the future for objc subclasses of swift classes
        
        // 不是swift类,看这个
        realizeClassWithoutSwift(cls, nil);
        if (!leaveLocked) lock.unlock();
    } else {
        // swift类看这个,我们先不看
        // Swift class. We need to drop locks and call the Swift
        // runtime to initialize it.
        lock.unlock();
        cls = realizeSwiftClass(cls);
        ASSERT(cls->isRealized());    // callback must have provoked realization
        if (leaveLocked) lock.lock();
    }
    return cls;
}

realizeClassWithoutSwift

这个方法很长,我们本次不深入讨论,只需要知道他是干什么的就行了

/***********************************************************************
* realizeClassWithoutSwift
* Performs first-time initialization on class cls, 
* including allocating its read-write data.
* Does not perform any Swift-side initialization.
* Returns the real class structure for the class. 
* Locking: runtimeLock must be write-locked by the caller
**********************************************************************/
static Class realizeClassWithoutSwift(Class cls, Class previously)
{
    // 这里是关于rw、ro、supercls、metacls的一系列操作
    ......
    // Update superclass and metaclass in case of remapping
    // 从这里可以看到类的继承链是懒加载方式实现的
    // 当一个类第一次被使用的时候才回去完善他的继承链
    cls->setSuperclass(supercls);
    cls->initClassIsa(metacls);
    ......
}

initializeAndLeaveLocked

// Locking: caller must hold runtimeLock; this may drop and re-acquire it
static Class initializeAndLeaveLocked(Class cls, id obj, mutex_t& lock)
{
    return initializeAndMaybeRelock(cls, obj, lock, true);
}

initializeAndMaybeRelock

/***********************************************************************
* class_initialize.  Send the '+initialize' message on demand to any
* uninitialized class. Force initialization of superclasses first.
* inst is an instance of cls, or nil. Non-nil is better for performance.
* Returns the class pointer. If the class was unrealized then 
* it may be reallocated.
* Locking: 
*   runtimeLock must be held by the caller
*   This function may drop the lock.
*   On exit the lock is re-acquired or dropped as requested by leaveLocked.
**********************************************************************/
static Class initializeAndMaybeRelock(Class cls, id inst,
                                      mutex_t& lock, bool leaveLocked)
{
    // 如果接收过消息就直接返回
    if (cls->isInitialized()) {
        if (!leaveLocked) lock.unlock();
        return cls;
    }

    // Find the non-meta class for cls, if it is not already one.
    // The +initialize message is sent to the non-meta class object.
    // 给非meta类发送+initialize消息
    Class nonmeta = getMaybeUnrealizedNonMetaClass(cls, inst);
    ......
}

getMethodNoSuper_nolock

这个方法是在本类的method list中进行查找。 前面我们看了这么多方法其实对于今天的主题来说都不是核心内容,这个方法才是今天研究的核心方法之一

/***********************************************************************
 * getMethodNoSuper_nolock
 * fixme
 * Locking: runtimeLock must be read- or write-locked by the caller
 **********************************************************************/
static method_t *
getMethodNoSuper_nolock(Class cls, SEL sel)
{
    runtimeLock.assertLocked();

    ASSERT(cls->isRealized());
    // fixme nil cls? 
    // fixme nil sel?
    
    // 获取当前类的方法列表
    auto const methods = cls->data()->methods();
    
    // 方法列表可能是一个一维数组也可能是二维数组
    // 如果是一维数组那么mlists就是当前数组的头指针,进入内部方法进行二分查找
    // 如果是二维数组那么mlists是一维数组的指针,进入内部方法进行二分查找
    for (auto mlists = methods.beginLists(),
              end = methods.endLists();
         mlists != end;
         ++mlists)
    {
        // <rdar://problem/46904873> getMethodNoSuper_nolock is the hottest
        // caller of search_method_list, inlining it turns
        // getMethodNoSuper_nolock into a frame-less function and eliminates
        // any store from this codepath.
        
        // 
        method_t *m = search_method_list_inline(*mlists, sel);
        if (m) return m;
    }

    return nil;
}

beginLists & endLists

class list_array_tt {
    struct array_t {
        uint32_t count;
        // 这里是二维数组
        Ptr<List> lists[0];
        ......
    };
    
public:
    union {
        // 这里是一维数组
        Ptr<List> list;
        uintptr_t arrayAndFlag;
    };

    // 这里分情况可能返回二维数组lists 也可能返回一维数组list
    // 那么什么时候方法列表是二维数组,什么时候是一维数组呢??我们后面类的家在流程会详细讨论
    const Ptr<List>* beginLists() const {
        if (hasArray()) {
            return array()->lists;
        } else {
            return &list;
        }
    }
    
    const Ptr<List>* endLists() const {
        if (hasArray()) {
            return array()->lists + array()->count;
        } else if (list) {
            return &list + 1;
        } else {
            return &list;
        }
    }
}

list_array_tt的结构中我们可以知道方法列表可能是一维数组,也可能是二维数组,那么究竟什么时候是一维什么时候是二维我们会在后面类的加载流程中详细讨论

search_method_list_inline

ALWAYS_INLINE static method_t *
search_method_list_inline(const method_list_t *mlist, SEL sel)
{
    // 这里做一些fixup操作,纠错
    int methodListIsFixedUp = mlist->isFixedUp();
    int methodListHasExpectedSize = mlist->isExpectedSize();

    
    if (fastpath(methodListIsFixedUp && methodListHasExpectedSize)) {
        // 正常情况下走这里
        return findMethodInSortedMethodList(sel, mlist);
    } else {
        // Linear search of unsorted method list
        if (auto *m = findMethodInUnsortedMethodList(sel, mlist))
            return m;
    }
    // DEBUG的不看
    return nil;
}

findMethodInSortedMethodList

ALWAYS_INLINE static method_t *
findMethodInSortedMethodList(SEL key, const method_list_t *list)
{
    // M1电脑走这里
    if (list->isSmallList()) {
        if (CONFIG_SHARED_CACHE_RELATIVE_DIRECT_SELECTORS && objc::inSharedCache((uintptr_t)list)) {
            return findMethodInSortedMethodList(key, list, [](method_t &m) { return m.getSmallNameAsSEL(); });
        } else {
            return findMethodInSortedMethodList(key, list, [](method_t &m) { return m.getSmallNameAsSELRef(); });
        }
    } else {
        // 我们不研究M1,看这里
        return findMethodInSortedMethodList(key, list, [](method_t &m) { return m.big().name; });
    }
}

findMethodInSortedMethodList

/***********************************************************************
 * search_method_list_inline
 **********************************************************************/
template<class getNameFunc>
ALWAYS_INLINE static method_t *
findMethodInSortedMethodList(SEL key, const method_list_t *list, const getNameFunc &getName)
{
    ASSERT(list);

    // 数组起始位置
    auto first = list->begin();
    auto base = first;
    
    // decltype表示类型推导,意思就是创建一个和first类型相同的变量 probe
    decltype(first) probe;

    uintptr_t keyValue = (uintptr_t)key;
    uint32_t count;
    
    for (count = list->count; count != 0; count >>= 1) {
    
        // probe指针指向数组中间位置
        // 假如count为偶数就是count/2
        // 假如count为奇数就是count/2 - 1
        probe = base + (count >> 1);

        // getName为传入的参数,其作用是 m.big().name 获取方法名
        uintptr_t probeValue = (uintptr_t)getName(probe);

        // 如果命中了方法
        if (keyValue == probeValue) {
            // `probe` is a match.
            // Rewind looking for the *first* occurrence of this value.
            // This is required for correct category overrides.
            
            // 如果命中了方法还要向前查找,直到找到排在最前面的
            // 分类中如果重写了类中的方法,那么分类中的方法排在前面,这个后面类加载流程我们会研究
            // 这里就说明了类和分类中有相同方法,那么执行的时候会执行分类中的 nice
            while (probe > first && keyValue == (uintptr_t)getName((probe - 1))) {
                probe--;
            }
            return &*probe;
        }

        // 如果要查找的值 大于 中间位置的值
        if (keyValue > probeValue) {
            // 那么base指针指向中间位置的后一个位置,作为初始位置
            base = probe + 1;
            count--;
        }
        
        // 那么如果要查找的值 小于 中间位置的值呢?????
        // 看上面的for循环 probe = base + (count >> 1); 
        // base指针已经指向了中间位置后一个位置
        // 所以for循环本身就是在处理 keyValue < probeValue的情况
    }

    return nil;
}

通过二分查找搜索方法,我们知道二分法的前提是有序数组,那么这里就可以知道method list一定是有序的

log_and_fill_cache

/***********************************************************************
* log_and_fill_cache
* Log this method call. If the logger permits it, fill the method cache.
* cls is the method whose cache should be filled. 
* implementer is the class that owns the implementation in question.
**********************************************************************/
static void
log_and_fill_cache(Class cls, IMP imp, SEL sel, id receiver, Class implementer)
{
#if SUPPORT_MESSAGE_LOGGING
    if (slowpath(objcMsgLogEnabled && implementer)) {
        bool cacheIt = logMessageSend(implementer->isMetaClass(), 
                                      cls->nameForLogging(),
                                      implementer->nameForLogging(), 
                                      sel);
        if (!cacheIt) return;
    }
#endif

    // 这里执行了insert操作,和前文内容呼应上了
    cls->cache.insert(sel, imp, receiver);
}

我们看到这里执行了cacheinsert方法,和前文研究的内容呼应上了

小结

本文中源码比较多,但是主线代码逻辑仍然很简单,如果一个实例对象调用了一个方法,那么实际是通过objc_msgSend给该对象发送了消息,实例对象接收到消息之后先通过isa获取到类对象指针,进而通过内存平移获取到cache,先从cachebuckets中查找方法,我们称之为快速查找流程,如果查寻不到那么去类对象的method list中去查找,找不到就去父类的method list中查找,这个过程我们称之为慢速查找流程

方法的查找流程我们已经看完了,但是仍然还可能找不到,那么后面还有消息动态决议消息转发,我们后面会继续探索。

参考文章

# objc_msgSend分析-慢速查找