objc库源码分析(1)-加载

270 阅读6分钟

objc库中初始化方法如下

void _objc_init(void)
{
    static bool initialized = false;
    if (initialized) return;
    initialized = true;
    
    // fixme defer initialization until an objc-using image is found?
    environ_init();//初始化运行环境
    tls_init();
    static_init();//初始化静态变量方法
    runtime_init();//初始化运行时环境
    exception_init();//初始化异常
    cache_init();//初始化缓存
    _imp_implementationWithBlock_init();

    _dyld_objc_notify_register(&map_images, load_images, unmap_image);

#if __OBJC2__
    didCallDyldNotifyRegister = true;
#endif
}

其中包含了一些列的初始化,以及image的映射和image的加载。 其中 _dyld_objc_notify_register(&map_images, load_images, unmap_image);是供dyld做回调使用,其中&map_images是方法是处理image的映射的, map_images函数最最终会调用 runtime-new.mm中_read_images的函数, 其中_read_images函数的调用逻辑如下

_read_images

void _read_images(header_info **hList, uint32_t hCount, int totalClasses, int unoptimizedTotalClasses)
{
    header_info *hi;
    uint32_t hIndex;
    size_t count;
    size_t i;
    Class *resolvedFutureClasses = nil;
    size_t resolvedFutureClassCount = 0;
    static bool doneOnce;
    bool launchTime = NO;
    TimeLogger ts(PrintImageTimes);

    runtimeLock.assertLocked();

#define EACH_HEADER \
    hIndex = 0;         \
    hIndex < hCount && (hi = hList[hIndex]); \
    hIndex++

    if (!doneOnce) {
        doneOnce = YES;//控制{}里面的代码只执行一次
        launchTime = YES;

#if SUPPORT_NONPOINTER_ISA
        // Disable non-pointer isa under some conditions.

# if SUPPORT_INDEXED_ISA //如果库中有旧版本swift代码 则不支持isa指针优化
        // Disable nonpointer isa if any image contains old Swift code
        for (EACH_HEADER) {
            if (hi->info()->containsSwift()  &&
                hi->info()->swiftUnstableVersion() < objc_image_info::SwiftVersion3)
            {
                DisableNonpointerIsa = true;
                if (PrintRawIsa) {
                    _objc_inform("RAW ISA: disabling non-pointer isa because "
                                 "the app or a framework contains Swift code "
                                 "older than Swift 3.0");
                }
                break;
            }
        }
# endif

# if TARGET_OS_OSX //如果是 OS X 10.11之前的系统,不支持isa指针优化
        // Disable non-pointer isa if the app is too old
        // (linked before OS X 10.11)
        if (dyld_get_program_sdk_version() < DYLD_MACOSX_VERSION_10_11) {
            DisableNonpointerIsa = true;
            if (PrintRawIsa) {
                _objc_inform("RAW ISA: disabling non-pointer isa because "
                             "the app is too old (SDK version " SDK_FORMAT ")",
                             FORMAT_SDK(dyld_get_program_sdk_version()));
            }
        }

        // Disable non-pointer isa if the app has a __DATA,__objc_rawisa section
        // New apps that load old extensions may need this.
        for (EACH_HEADER) {
            if (hi->mhdr()->filetype != MH_EXECUTE) continue;
            unsigned long size;
            if (getsectiondata(hi->mhdr(), "__DATA", "__objc_rawisa", &size)) {
                DisableNonpointerIsa = true;
                if (PrintRawIsa) {
                    _objc_inform("RAW ISA: disabling non-pointer isa because "
                                 "the app has a __DATA,__objc_rawisa section");
                }
            }
            break;  // assume only one MH_EXECUTE image
        }
# endif

#endif

        if (DisableTaggedPointers) {
            //经过前面的判断,如果不支持指针优化,
            disableTaggedPointers();
        }
        //tagged pointer混淆,让指针地址对开发者看起来像是正常的内存地址
        initializeTaggedPointerObfuscator();

        if (PrintConnecting) {
            _objc_inform("CLASS: found %d classes during launch", totalClasses);
        }

        // namedClasses
        // Preoptimized classes don't go in this table.
        // 4/3 is NXMapTable's load factor
        //创建存储已识别类的哈希表
        //只会执行一次
        int namedClassesSize = 
            (isPreoptimized() ? unoptimizedTotalClasses : totalClasses) * 4 / 3;
        gdb_objc_realized_classes =
            NXCreateMapTable(NXStrValueMapPrototype, namedClassesSize);

        ts.log("IMAGE TIMES: first time tasks");
    }

    // Fix up @selector references
#pragma mark // 注册方法
    static size_t UnfixedSelectors;
    {
        mutex_locker_t lock(selLock);
        for (EACH_HEADER) {
            if (hi->hasPreoptimizedSelectors()) continue;

            bool isBundle = hi->isBundle();
            SEL *sels = _getObjc2SelectorRefs(hi, &count);//获取可执行文件中待注册的方法
           
            for (i = 0; i < count; i++) {
                const char *name = sel_cname(sels[i]);
                SEL sel = sel_registerNameNoLock(name, isBundle);//注册方法
                if (sels[i] != sel) {
                    sels[i] = sel;
                }
            }
        }
    }

    ts.log("IMAGE TIMES: fix up selector references");
#pragma mark// Discover classes. Fix up unresolved future classes. Mark bundle classes.
    bool hasDyldRoots = dyld_shared_cache_some_image_overridden();

    for (EACH_HEADER) {
        if (! mustReadClasses(hi, hasDyldRoots)) {
            // Image is sufficiently optimized that we need not call readClass() 判断是否需要执行readclass()方法
            continue;
        }

        classref_t const *classlist = _getObjc2ClassList(hi, &count);

        bool headerIsBundle = hi->isBundle();
        bool headerIsPreoptimized = hi->hasPreoptimizedClasses();

        for (i = 0; i < count; i++) {
            Class cls = (Class)classlist[i];
            Class newCls = readClass(cls, headerIsBundle, headerIsPreoptimized);

            if (newCls != cls  &&  newCls) {
                // Class was moved but not deleted. Currently this occurs 
                // only when the new class resolved a future class.
                // Non-lazily realize the class below.
                //如果readClass返回的结果不是原来的class,则将class放入带解决的class列表
                resolvedFutureClasses = (Class *)
                    realloc(resolvedFutureClasses, 
                            (resolvedFutureClassCount+1) * sizeof(Class));
                resolvedFutureClasses[resolvedFutureClassCount++] = newCls;//已解决的futurec class
            }
        }
    }

    ts.log("IMAGE TIMES: discover classes");

    // Fix up remapped classes
    // Class list and nonlazy class list remain unremapped.
    // Class refs and super refs are remapped for message dispatching.
    // ref是源文件,才用了ASLR (Address Space Layout Randomization) 地址空间布局随机化技术
    // 所以要映射类的地址,才可以找到真实地址
    if (!noClassesRemapped()) {//如果存在类没有被影射到,则重新影射
        for (EACH_HEADER) {
            Class *classrefs = _getObjc2ClassRefs(hi, &count);
            for (i = 0; i < count; i++) {
                remapClassRef(&classrefs[i]);
            }
            // fixme why doesn't test future1 catch the absence of this?
            classrefs = _getObjc2SuperRefs(hi, &count);
            for (i = 0; i < count; i++) {
                remapClassRef(&classrefs[i]);
            }
        }
    }

    ts.log("IMAGE TIMES: remap classes");

#if SUPPORT_FIXUP
    
#pragma mark    // Fix up old objc_msgSend_fixup call sites 修正引用计数和消息发送相关的方法
    for (EACH_HEADER) {
        message_ref_t *refs = _getObjc2MessageRefs(hi, &count);
        if (count == 0) continue;

        if (PrintVtables) {
            _objc_inform("VTABLES: repairing %zu unsupported vtable dispatch "
                         "call sites in %s", count, hi->fname());
        }
        for (i = 0; i < count; i++) {
            fixupMessageRef(refs+i);
        }
    }

    ts.log("IMAGE TIMES: fix up objc_msgSend_fixup");
#endif
#pragma mark 协议
    bool cacheSupportsProtocolRoots = sharedCacheSupportsProtocolRoots();

    // Discover protocols. Fix up protocol refs. 影射协议和影射协议的修正
    for (EACH_HEADER) {
        extern objc_class OBJC_CLASS_$_Protocol;
        Class cls = (Class)&OBJC_CLASS_$_Protocol;
        ASSERT(cls);
        NXMapTable *protocol_map = protocols();
        bool isPreoptimized = hi->hasPreoptimizedProtocols();

        // Skip reading protocols if this is an image from the shared cache
        // and we support roots
        // Note, after launch we do need to walk the protocol as the protocol
        // in the shared cache is marked with isCanonical() and that may not
        // be true if some non-shared cache binary was chosen as the canonical
        // definition
        if (launchTime && isPreoptimized && cacheSupportsProtocolRoots) { //如果是在运行过程中,而且是动态库的协议,则跳过,不加载该类的协议
            if (PrintProtocols) {
                _objc_inform("PROTOCOLS: Skipping reading protocols in image: %s",
                             hi->fname());
            }
            continue;
        }

        bool isBundle = hi->isBundle();

        //
        protocol_t * const *protolist = _getObjc2ProtocolList(hi, &count);
        for (i = 0; i < count; i++) {
            readProtocol(protolist[i], cls, protocol_map, 
                         isPreoptimized, isBundle);//这一步会把符合条件的协议(之前不存在同名的协议)添加到协议列表
        }
    }

    ts.log("IMAGE TIMES: discover protocols");

    // Fix up @protocol references
    // Preoptimized images may have the right 
    // answer already but we don't know for sure.
    for (EACH_HEADER) {
        // At launch time, we know preoptimized image refs are pointing at the
        // shared cache definition of a protocol.  We can skip the check on
        // launch, but have to visit @protocol refs for shared cache images
        // loaded later.
        if (launchTime && cacheSupportsProtocolRoots && hi->isPreoptimized())
            continue;
        protocol_t **protolist = _getObjc2ProtocolRefs(hi, &count);
        for (i = 0; i < count; i++) {
            remapProtocolRef(&protolist[i]);//重新影射一次协议。如果协议中存在不符合条件的协议,统计出其中的数量
        }
    }

    ts.log("IMAGE TIMES: fix up @protocol references");

    // Discover categories. Only do this after the initial category
    // attachment has been done. For categories present at startup,
    // discovery is deferred until the first load_images call after
    // the call to _dyld_objc_notify_register completes. rdar://problem/53119145
    if (didInitialAttachCategories) {//等所有的分类绑定完以后才影射
        for (EACH_HEADER) {
            load_categories_nolock(hi);//把分类里面的内容添加到类里面
        }
    }

    ts.log("IMAGE TIMES: discover categories");

    // Category discovery MUST BE Late to avoid potential races
    // when other threads call the new category code before
    // this thread finishes its fixups.

    // +load handled by prepare_load_methods()

    // Realize non-lazy classes (for +load methods and static instances)
    for (EACH_HEADER) {
        classref_t const *classlist = 
            _getObjc2NonlazyClassList(hi, &count);
        for (i = 0; i < count; i++) {
            Class cls = remapClass(classlist[i]);
            if (!cls) continue;

            addClassTableEntry(cls);

            if (cls->isSwiftStable()) {
                if (cls->swiftMetadataInitializer()) {
                    _objc_fatal("Swift class %s with a metadata initializer "
                                "is not allowed to be non-lazy",
                                cls->nameForLogging());
                }
                // fixme also disallow relocatable classes
                // We can't disallow all Swift classes because of
                // classes like Swift.__EmptyArrayStorage
            }
            realizeClassWithoutSwift(cls, nil);
        }
    }

    ts.log("IMAGE TIMES: realize non-lazy classes");

    // Realize newly-resolved future classes, in case CF manipulates them 对readclass()以后那些待处理的类,重新创建。以防CoreFoundate去调用
    if (resolvedFutureClasses) {
        for (i = 0; i < resolvedFutureClassCount; i++) {
            Class cls = resolvedFutureClasses[i];
            if (cls->isSwiftStable()) {
                _objc_fatal("Swift class is not allowed to be future");
            }
            realizeClassWithoutSwift(cls, nil);//重新创建类
            cls->setInstancesRequireRawIsaRecursively(false/*inherited*/);//
        }
        free(resolvedFutureClasses);
    }

    ts.log("IMAGE TIMES: realize future classes");

    //如果不存在带修正的变量,则realize所有类
    //什么是带修正的变量?带修正的变量是指类继承了别的类,别的类有变量,则子类的结构要修改
    if (DebugNonFragileIvars) {
        realizeAllClasses();
    }


    // Print preoptimization statistics 输出动态库中由dyld进行了预先优化的类
    if (PrintPreopt) {
        static unsigned int PreoptTotalMethodLists;
        static unsigned int PreoptOptimizedMethodLists;
        static unsigned int PreoptTotalClasses;
        static unsigned int PreoptOptimizedClasses;

        for (EACH_HEADER) {
            if (hi->hasPreoptimizedSelectors()) {
                _objc_inform("PREOPTIMIZATION: honoring preoptimized selectors "
                             "in %s", hi->fname());
            }
            else if (hi->info()->optimizedByDyld()) {//如果由dyld进行了优化,则不用执行
                _objc_inform("PREOPTIMIZATION: IGNORING preoptimized selectors "
                             "in %s", hi->fname());
            }

            classref_t const *classlist = _getObjc2ClassList(hi, &count);
            for (i = 0; i < count; i++) {
                Class cls = remapClass(classlist[i]);
                if (!cls) continue;

                PreoptTotalClasses++;
                if (hi->hasPreoptimizedClasses()) {
                    PreoptOptimizedClasses++;
                }
                
                const method_list_t *mlist;
                if ((mlist = ((class_ro_t *)cls->data())->baseMethods())) {
                    PreoptTotalMethodLists++;
                    if (mlist->isFixedUp()) {
                        PreoptOptimizedMethodLists++;
                    }
                }
                if ((mlist=((class_ro_t *)cls->ISA()->data())->baseMethods())) {
                    PreoptTotalMethodLists++;
                    if (mlist->isFixedUp()) {
                        PreoptOptimizedMethodLists++;
                    }
                }
            }
        }

        _objc_inform("PREOPTIMIZATION: %zu selector references not "
                     "pre-optimized", UnfixedSelectors);
        _objc_inform("PREOPTIMIZATION: %u/%u (%.3g%%) method lists pre-sorted",
                     PreoptOptimizedMethodLists, PreoptTotalMethodLists, 
                     PreoptTotalMethodLists
                     ? 100.0*PreoptOptimizedMethodLists/PreoptTotalMethodLists 
                     : 0.0);
        _objc_inform("PREOPTIMIZATION: %u/%u (%.3g%%) classes pre-registered",
                     PreoptOptimizedClasses, PreoptTotalClasses, 
                     PreoptTotalClasses 
                     ? 100.0*PreoptOptimizedClasses/PreoptTotalClasses
                     : 0.0);
        _objc_inform("PREOPTIMIZATION: %zu protocol references not "
                     "pre-optimized", UnfixedProtocolReferences);
    }

#undef EACH_HEADER
}

_read_images的调用逻辑如下

_read_images执行流程.jpg

最主要做了下面几个事情 1 初始化 2 修正方法 Fix up @selector references 3 发现类Discover classes 4 修正消息发送相关fixupMessageRef 5 发现协议discover protocols 6 加载分类到类中 7 realize实现了load方法的类 8 输出动态库优化的内容

为什么要fix up呢?

在加载所有的动态链接库之后,它们只是处在相互独立的状态,需要将它们绑定起来,这就是 Fix-ups。代码签名使得我们不能修改指令,那样就不能让一个 dylib 的调用另一个 dylib。这时需要加很多间接层。 现代 code-gen 被叫做动态 PIC(Position Independent Code),意味着代码可以被加载到间接的地址上。当调用发生时,code-gen 实际上会在 __DATA 段中创建一个指向被调用者的指针,然后加载指针并跳转过去。所以 dyld 做的事情就是修正(fix-up)指针和数据。Fix-up 有两种类型,rebasing 和 binding。 Rebasing:在镜像内部调整指针的指向 Binding:将指针指向镜像外部的内容

简而言之,fix up就是为了找到调整指针的指向,找到我们所需要的内容。

discover class

#pragma mark// Discover classes. Fix up unresolved future classes. Mark bundle classes.
    bool hasDyldRoots = dyld_shared_cache_some_image_overridden();

    for (EACH_HEADER) {
        if (! mustReadClasses(hi, hasDyldRoots)) {
            // Image is sufficiently optimized that we need not call readClass() 判断是否需要执行readclass()方法
            continue;
        }

        classref_t const *classlist = _getObjc2ClassList(hi, &count);

        bool headerIsBundle = hi->isBundle();
        bool headerIsPreoptimized = hi->hasPreoptimizedClasses();

        for (i = 0; i < count; i++) {
            Class cls = (Class)classlist[i];
            Class newCls = readClass(cls, headerIsBundle, headerIsPreoptimized);

            if (newCls != cls  &&  newCls) {
                // Class was moved but not deleted. Currently this occurs 
                // only when the new class resolved a future class.
                // Non-lazily realize the class below.
                //如果readClass返回的结果不是原来的class,则将class放入带解决的class列表
                resolvedFutureClasses = (Class *)
                    realloc(resolvedFutureClasses, 
                            (resolvedFutureClassCount+1) * sizeof(Class));
                resolvedFutureClasses[resolvedFutureClassCount++] = newCls;//已解决的futurec class
            }
        }
    }

判断是否是动态库的内容,如果不是动态库。则遍历这个可执行文件里的类。


load_images

load_images的实现如下

load_images(const char *path __unused, const struct mach_header *mh)
{
    recursive_mutex_locker_t lock(loadMethodLock);

    // Discover +load methods
    prepare_load_methods((const headerType *)mh);

    // Call +load methods (without classLock - re-entrant)
    call_load_methods();
}

做了两个事情 1 准备load方法prepare_load_methods() 2 执行load方法call_load_methods()

准备load方法prepare_load_methods


void prepare_load_methods(const headerType *mhdr)
{
    size_t count, i;

    runtimeLock.assertLocked();

    classref_t const *classlist = 
        _getObjc2NonlazyClassList(mhdr, &count);//获取有load方法的类
    for (i = 0; i < count; i++) {
    //将实现了load方法的类放到loadable_classes数组中
        schedule_class_load(remapClass(classlist[i]));
    }


    //将实现了load方法的分类放到loadable_categories数组中
    category_t * const *categorylist = _getObjc2NonlazyCategoryList(mhdr, &count);
    for (i = 0; i < count; i++) {
        category_t *cat = categorylist[i];
        Class cls = remapClass(cat->cls);
        if (!cls) continue;  // category for ignored weak-linked class//忽略弱连接的类
        if (cls->isSwiftStable()) {//swfit 的extensions 和 catrogry不支持load方法
            _objc_fatal("Swift class extensions and categories on Swift "
                        "classes are not allowed to have +load methods");
        }
        realizeClassWithoutSwift(cls, nil);
        ASSERT(cls->ISA()->isRealized());
        add_category_to_loadable_list(cat);
    }
}


//将实现了load方法的类放到loadable_classes数组中
static void schedule_class_load(Class cls)
{
    if (!cls) return;
    ASSERT(cls->isRealized());  // _read_images should realize

    if (cls->data()->flags & RW_LOADED) return;

    // Ensure superclass-first ordering  递归调用方法,确保父类的load优先执行
    schedule_class_load(cls->superclass);

    add_class_to_loadable_list(cls);
    cls->setInfo(RW_LOADED); 
}

prepare_load_methods方法主要是执行了两个事情

1 将实现了load方法的类放到loadable_classes数组中 2 将实现了load方法的分类放到loadable_categories数组中

_getObjc2NonlazyClassList返回的是image中带有load方法的class的数组,会按照编译的顺序添加到数组loadable_classes当中。 _getObjc2NonlazyCategoryList返回的是image中带有load方法的category的数组,会按照编译的顺序添加到数组loadable_categories当中。

一般来说,先编译的class和category,其的load方法会先执行 注意在prepare_load_methods()中调用schedule_class_load()方法,schedule_class_load()方法是一个递归调用,如果存在父类,则调用schedule_class_load()方法,这样保证了实现了load的方法可以放在loadable_classes数组的前面,这样也就保证了父类的load方法比子类的load方法优先执行

执行load方法call_load_methods()

call_load_methods()实现如下

void call_load_methods(void)
{
    static bool loading = NO;
    bool more_categories;

    loadMethodLock.assertLocked();

    // Re-entrant calls do nothing; the outermost call will finish the job.
    if (loading) return; //保证call_load_methods只执行一次
    loading = YES;

    void *pool = objc_autoreleasePoolPush();//自动释放池

    do {
        // 1. Repeatedly call class +loads until there aren't any more
        //循环执行类的load的方法
        while (loadable_classes_used > 0) {
            call_class_loads();
        }
        // 2. Call category +loads ONCE
        //执行完类的load方法后才cagetgory中的方法
        //检测是否有未执行load方法的category,如果有,则循环执行
        //为什么会有未执行load方法的category?因为category可能会引入别的有load方法class,别的class的load要先执行,category中的load要等待执行
          more_categories = call_category_loads();

        // 3. Run more +loads if there are classes OR more untried categories
    } while (loadable_classes_used > 0  ||  more_categories);

    objc_autoreleasePoolPop(pool);

    loading = NO;
}

可以看到,通过do while循环中确保所有的类和分类的load方法都被调用。

do while中优先call_class_loads(),再执行call_category_loads(),也就是说类的load方法比分类的load方法先执行

call_class_loads方法会从loadable_classes数组中找到对应的类,并通过直接调用的方法,调用类的loadable_classes方法。

call_category_loads()方法会从loadable_categories中找到相应的分类load方法。并调用load方法.

在前面可知,在类和分类的加载过程中,有三个结果

  • 先编译的class和category,其的load方法会先执行
  • 父类的load方法比子类的load方法优先执行
  • 类的load方法比分类的load方法先执行。