这是我参与8月更文挑战的第2天,活动详情查看: 8月更文挑战
在上篇文章中我们分析了dyld的加载流程,但是最终
map_images和load_images究竟做了什么呢?他们又是什么时候被调用的呢?
dyld链接objc的函数执行
在上一篇文章中我们了解到map_images和load_images起到了dyld和objc沟通桥梁的作用,其调用流程为:
recursiveInitialization(dyld)doInitialization(dyld)doModInitFunctions(dyld)libSystem_initializer(libSystem.B.dylib)libdispatch_init(libdispatch.dylib)_os_object_init(libdispatch.dylib)_objc_init(libobjc.A.dylib)_dyld_objc_notify_register(libobjc.A.dylib)
那么map_images和load_images究竟做了哪些工作呢?
map_images和load_images
我们以_dyld_objc_notify_register(&map_images, load_images, unmap_image);为突破口进行分析
map_images
map_images主要是用来管理文件和动态库中的所有的符号(class,protocol,selector,category),并加载到内存中
void
map_images(unsigned count, const char * const paths[],
const struct mach_header * const mhdrs[])
{
mutex_locker_t lock(runtimeLock);
return map_images_nolock(count, paths, mhdrs);
}
void
map_images_nolock(unsigned mhCount, const char * const mhPaths[],
const struct mach_header * const mhdrs[])
{
static bool firstTime = YES;
header_info *hList[mhCount];
uint32_t hCount;
size_t selrefCount = 0;
// Perform first-time initialization if necessary.
// This function is called before ordinary library initializers.
// fixme defer initialization until an objc-using image is found?
if (firstTime) {
preopt_init();
}
if (PrintImages) {
_objc_inform("IMAGES: processing %u newly-mapped images...\n", mhCount);
}
// Find all images with Objective-C metadata.
hCount = 0;
// Count classes. Size various table based on the total.
int totalClasses = 0;
int unoptimizedTotalClasses = 0;
{
uint32_t i = mhCount;
while (i--) {
const headerType *mhdr = (const headerType *)mhdrs[i];
auto hi = addHeader(mhdr, mhPaths[i], totalClasses, unoptimizedTotalClasses);
if (!hi) {
// no objc data in this entry
continue;
}
if (mhdr->filetype == MH_EXECUTE) {
// Size some data structures based on main executable's size
#if __OBJC2__
// If dyld3 optimized the main executable, then there shouldn't
// be any selrefs needed in the dynamic map so we can just init
// to a 0 sized map
if ( !hi->hasPreoptimizedSelectors() ) {
size_t count;
_getObjc2SelectorRefs(hi, &count);
selrefCount += count;
_getObjc2MessageRefs(hi, &count);
selrefCount += count;
}
#else
_getObjcSelectorRefs(hi, &selrefCount);
#endif
#if SUPPORT_GC_COMPAT
// Halt if this is a GC app.
if (shouldRejectGCApp(hi)) {
_objc_fatal_with_reason
(OBJC_EXIT_REASON_GC_NOT_SUPPORTED,
OS_REASON_FLAG_CONSISTENT_FAILURE,
"Objective-C garbage collection "
"is no longer supported.");
}
#endif
}
hList[hCount++] = hi;
if (PrintImages) {
_objc_inform("IMAGES: loading image for %s%s%s%s%s\n",
hi->fname(),
mhdr->filetype == MH_BUNDLE ? " (bundle)" : "",
hi->info()->isReplacement() ? " (replacement)" : "",
hi->info()->hasCategoryClassProperties() ? " (has class properties)" : "",
hi->info()->optimizedByDyld()?" (preoptimized)":"");
}
}
}
// Perform one-time runtime initialization that must be deferred until
// the executable itself is found. This needs to be done before
// further initialization.
// (The executable may not be present in this infoList if the
// executable does not contain Objective-C code but Objective-C
// is dynamically loaded later.
if (firstTime) {
sel_init(selrefCount);
arr_init();
#if SUPPORT_GC_COMPAT
// Reject any GC images linked to the main executable.
// We already rejected the app itself above.
// Images loaded after launch will be rejected by dyld.
for (uint32_t i = 0; i < hCount; i++) {
auto hi = hList[i];
auto mh = hi->mhdr();
if (mh->filetype != MH_EXECUTE && shouldRejectGCImage(mh)) {
_objc_fatal_with_reason
(OBJC_EXIT_REASON_GC_NOT_SUPPORTED,
OS_REASON_FLAG_CONSISTENT_FAILURE,
"%s requires Objective-C garbage collection "
"which is no longer supported.", hi->fname());
}
}
#endif
#if TARGET_OS_OSX
// Disable +initialize fork safety if the app is too old (< 10.13).
// Disable +initialize fork safety if the app has a
// __DATA,__objc_fork_ok section.
// if (!dyld_program_sdk_at_least(dyld_platform_version_macOS_10_13)) {
// DisableInitializeForkSafety = true;
// if (PrintInitializing) {
// _objc_inform("INITIALIZE: disabling +initialize fork "
// "safety enforcement because the app is "
// "too old.)");
// }
// }
for (uint32_t i = 0; i < hCount; i++) {
auto hi = hList[i];
auto mh = hi->mhdr();
if (mh->filetype != MH_EXECUTE) continue;
unsigned long size;
if (getsectiondata(hi->mhdr(), "__DATA", "__objc_fork_ok", &size)) {
DisableInitializeForkSafety = true;
if (PrintInitializing) {
_objc_inform("INITIALIZE: disabling +initialize fork "
"safety enforcement because the app has "
"a __DATA,__objc_fork_ok section");
}
}
break; // assume only one MH_EXECUTE image
}
#endif
}
if (hCount > 0) {
_read_images(hList, hCount, totalClasses, unoptimizedTotalClasses);
}
firstTime = NO;
// Call image load funcs after everything is set up.
for (auto func : loadImageFuncs) {
for (uint32_t i = 0; i < mhCount; i++) {
func(mhdrs[i]);
}
}
}
load_images
load_images主要用来加载执行load方法
void
load_images(const char *path __unused, const struct mach_header *mh)
{
if (!didInitialAttachCategories && didCallDyldNotifyRegister) {
didInitialAttachCategories = true;
loadAllCategories(); // 加载所有的Category
}
// Return without taking locks if there are no +load methods here.
if (!hasLoadMethods((const headerType *)mh)) return;
recursive_mutex_locker_t lock(loadMethodLock);
// Discover load methods 发现load方法
{
mutex_locker_t lock2(runtimeLock);
// 准备load方法
prepare_load_methods((const headerType *)mh);
}
// Call +load methods (without runtimeLock - re-entrant)
// 调用load方法
call_load_methods();
}
loadAllCategories()加载所有的Categoryprepare_load_methods((const headerType *)mh)准备load方法call_load_methods()调用load方法
loadAllCategories
static void loadAllCategories() {
mutex_locker_t lock(runtimeLock);
for (auto *hi = FirstHeader; hi != NULL; hi = hi->getNext()) {
load_categories_nolock(hi);
}
}
prepare_load_methods
void prepare_load_methods(const headerType *mhdr)
{
size_t count, i;
runtimeLock.assertLocked();
// 准备好所有懒加载的类
classref_t const *classlist =
_getObjc2NonlazyClassList(mhdr, &count);
for (i = 0; i < count; i++) {
// 调度当前类的load方法
schedule_class_load(remapClass(classlist[i]));
}
// 准备好Category
category_t * const *categorylist = _getObjc2NonlazyCategoryList(mhdr, &count);
for (i = 0; i < count; i++) {
category_t *cat = categorylist[i];
Class cls = remapClass(cat->cls);
if (!cls) continue; // category for ignored weak-linked class
if (cls->isSwiftStable()) {
_objc_fatal("Swift class extensions and categories on Swift "
"classes are not allowed to have +load methods");
}
// 实现当前的类
realizeClassWithoutSwift(cls, nil);
ASSERT(cls->ISA()->isRealized());
// 添加当前的load方法
add_category_to_loadable_list(cat);
}
}
static void schedule_class_load(Class cls)
{
if (!cls) return;
ASSERT(cls->isRealized()); // _read_images should realize
if (cls->data()->flags & RW_LOADED) return;
// Ensure superclass-first ordering
// 递归调用当前类的父类的load方法
schedule_class_load(cls->getSuperclass());
// 把load方法添加到list
add_class_to_loadable_list(cls);
cls->setInfo(RW_LOADED);
}
void add_class_to_loadable_list(Class cls)
{
IMP method;
loadMethodLock.assertLocked();
// 拿到所有的load方法
method = cls->getLoadMethod();
if (!method) return; // Don't bother if cls has no +load method
if (PrintLoading) {
_objc_inform("LOAD: class '%s' scheduled for +load",
cls->nameForLogging());
}
if (loadable_classes_used == loadable_classes_allocated) {
loadable_classes_allocated = loadable_classes_allocated*2 + 16;
loadable_classes = (struct loadable_class *)
realloc(loadable_classes,
loadable_classes_allocated *
sizeof(struct loadable_class));
}
// 类和方法赋值保存到loadable_classes
loadable_classes[loadable_classes_used].cls = cls;
loadable_classes[loadable_classes_used].method = method;
loadable_classes_used++;
}
void add_category_to_loadable_list(Category cat)
{
IMP method;
loadMethodLock.assertLocked();
method = _category_getLoadMethod(cat);
// Don't bother if cat has no +load method
if (!method) return;
if (PrintLoading) {
_objc_inform("LOAD: category '%s(%s)' scheduled for +load",
_category_getClassName(cat), _category_getName(cat));
}
if (loadable_categories_used == loadable_categories_allocated) {
loadable_categories_allocated = loadable_categories_allocated*2 + 16;
loadable_categories = (struct loadable_category *)
realloc(loadable_categories,
loadable_categories_allocated *
sizeof(struct loadable_category));
}
// 将类和方法保存到loadable_categories里
loadable_categories[loadable_categories_used].cat = cat;
loadable_categories[loadable_categories_used].method = method;
loadable_categories_used++;
}
call_load_methods
void call_load_methods(void)
{
static bool loading = NO;
bool more_categories;
loadMethodLock.assertLocked();
// Re-entrant calls do nothing; the outermost call will finish the job.
if (loading) return;
loading = YES;
void *pool = objc_autoreleasePoolPush();
do {
// 1. Repeatedly call class +loads until there aren't any more
// 递归调用load方法
while (loadable_classes_used > 0) {
call_class_loads();
}
// 2. Call category +loads ONCE
// 调用Category的load方法
more_categories = call_category_loads();
// 3. Run more +loads if there are classes OR more untried categories
} while (loadable_classes_used > 0 || more_categories);
objc_autoreleasePoolPop(pool);
loading = NO;
}
static void call_class_loads(void)
{
int i;
// Detach current loadable list.
struct loadable_class *classes = loadable_classes;
int used = loadable_classes_used;
loadable_classes = nil;
loadable_classes_allocated = 0;
loadable_classes_used = 0;
// Call all +loads for the detached list.
for (i = 0; i < used; i++) {
Class cls = classes[i].cls;
// 取出方法
load_method_t load_method = (load_method_t)classes[i].method;
if (!cls) continue;
if (PrintLoading) {
_objc_inform("LOAD: +[%s load]\n", cls->nameForLogging());
}
// 函数调用
(*load_method)(cls, @selector(load));
}
// Destroy the detached list.
if (classes) free(classes);
}
load、C++构造函数和main方法的调用流程
在分析load、C++构造函数以及main方法的调用流程之前,我们现在objc的源码中添加一下代码:
在objc-os.mm文件中,_objc_init函数上方添加C++构造方法:
__attribute__((constructor)) void objcFunc() {
printf("这是objc的Func--->%s \n", __func__);
}
在main.m文件中添加C++构造方法:
__attribute__((constructor)) void mineFunc() {
printf("这是我自己的Func--->%s \n", __func__);
}
在类Person中添加load方法的实现:
@implementation Person
+ (void)load {
NSLog(@"%s", __func__);
}
@end
接下来,运行源码项目,查看打印结果:
这是objc的Func--->objcFunc
+[Person load]
这是我自己的Func--->mineFunc
Hello, World!
结论: 优先执行
objc源码中的C++构造函数,其次执行类的load方法,然后执行我们自己的C++构造方法,之后执行了main方法;
为什么会出现这种情况呢?接下来,我们通过断点执行,查看堆栈调用信息的方式来分析一下:
objc源码中的C++构造函数执行堆栈:
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 2.1
* frame #0: 0x000000010031b6a4 libobjc.A.dylib`objcFunc() at objc-os.mm:923:5 [opt]
frame #1: 0x00000001002e82d4 libobjc.A.dylib`_objc_init [inlined] UnsignedInitializer::operator(this=<unavailable>)() const at objc-file.h:66:9 [opt]
frame #2: 0x00000001002e82c4 libobjc.A.dylib`_objc_init at objc-os.mm:662 [opt]
frame #3: 0x00000001002e82a8 libobjc.A.dylib`_objc_init at objc-os.mm:935 [opt]
frame #4: 0x00000001003bb88f libdispatch.dylib`_os_object_init + 13
frame #5: 0x00000001003cca03 libdispatch.dylib`libdispatch_init + 285
frame #6: 0x00007fff2a55e5ff libSystem.B.dylib`libSystem_initializer + 238
frame #7: 0x00000001000316c7 dyld`ImageLoaderMachO::doModInitFunctions(ImageLoader::LinkContext const&) + 535
frame #8: 0x0000000100031ad2 dyld`ImageLoaderMachO::doInitialization(ImageLoader::LinkContext const&) + 40
frame #9: 0x000000010002c4b6 dyld`ImageLoader::recursiveInitialization(ImageLoader::LinkContext const&, unsigned int, char const*, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 492
frame #10: 0x000000010002c421 dyld`ImageLoader::recursiveInitialization(ImageLoader::LinkContext const&, unsigned int, char const*, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 343
frame #11: 0x000000010002a26f dyld`ImageLoader::processInitializers(ImageLoader::LinkContext const&, unsigned int, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 191
frame #12: 0x000000010002a310 dyld`ImageLoader::runInitializers(ImageLoader::LinkContext const&, ImageLoader::InitializerTimingList&) + 82
frame #13: 0x000000010001686b dyld`dyld::initializeMainExecutable() + 129
frame #14: 0x000000010001ceb2 dyld`dyld::_main(macho_header const*, unsigned long, int, char const**, char const**, char const**, unsigned long*) + 8702
frame #15: 0x0000000100015224 dyld`dyldbootstrap::start(dyld3::MachOLoaded const*, int, char const**, dyld3::MachOLoaded const*, unsigned long*) + 450
frame #16: 0x0000000100015025 dyld`_dyld_start + 37
main.m文件中的C++构造方法的执行堆栈:
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 3.1
* frame #0: 0x0000000100003ca4 KCObjcBuild`mineFunc at main.m:27:5 [opt]
frame #1: 0x00000001000316c7 dyld`ImageLoaderMachO::doModInitFunctions(ImageLoader::LinkContext const&) + 535
frame #2: 0x0000000100031ad2 dyld`ImageLoaderMachO::doInitialization(ImageLoader::LinkContext const&) + 40
frame #3: 0x000000010002c4b6 dyld`ImageLoader::recursiveInitialization(ImageLoader::LinkContext const&, unsigned int, char const*, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 492
frame #4: 0x000000010002a26f dyld`ImageLoader::processInitializers(ImageLoader::LinkContext const&, unsigned int, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 191
frame #5: 0x000000010002a310 dyld`ImageLoader::runInitializers(ImageLoader::LinkContext const&, ImageLoader::InitializerTimingList&) + 82
frame #6: 0x00000001000168b1 dyld`dyld::initializeMainExecutable() + 199
frame #7: 0x000000010001ceb2 dyld`dyld::_main(macho_header const*, unsigned long, int, char const**, char const**, char const**, unsigned long*) + 8702
frame #8: 0x0000000100015224 dyld`dyldbootstrap::start(dyld3::MachOLoaded const*, int, char const**, dyld3::MachOLoaded const*, unsigned long*) + 450
frame #9: 0x0000000100015025 dyld`_dyld_start + 37
可以看出来,objc源码中的C++构造方法的堆栈信息多了一部分调用objc源码的信息,除此之外,在进入main方法之前,两者的调用堆栈基本一致,而且recursiveInitialization的调用在执行objc源码中C++构造方法是执行了两次,这也就从侧面说明了recursiveInitialization是一个递归调用的初始化方法;
在递归初始化的过程中,会优先初始化底层依赖库的相关内容,这也就是objc源码中的C++构造方法先执行的原因;又因为load方法由notifySingle来调用,而notifySingle在doInitialization之前,所以load方法比同一个镜像文件中的C++构造方法要先执行;
那么main函数为什么是最后执行的呢?而且是在dyld执行完毕之后执行?
这就需要我们结合_dyld_start的实现来分析,其汇编实现如下:
__dyld_start:
popq %rdi # param1 = mh of app
pushq $0 # push a zero for debugger end of frames marker
movq %rsp,%rbp # pointer to base of kernel frame
andq $-16,%rsp # force SSE alignment
subq $16,%rsp # room for local variables
# call dyldbootstrap::start(app_mh, argc, argv, dyld_mh, &startGlue)
movl 8(%rbp),%esi # param2 = argc into %esi
leaq 16(%rbp),%rdx # param3 = &argv[0] into %rdx
leaq ___dso_handle(%rip),%rcx # param4 = dyldsMachHeader into %rcx
leaq -8(%rbp),%r8 # param5 = &glue into %r8
call __ZN13dyldbootstrap5startEPKN5dyld311MachOLoadedEiPPKcS3_Pm
movq -8(%rbp),%rdi
cmpq $0,%rdi
jne Lnew
# clean up stack and jump to "start" in main executable
movq %rbp,%rsp # restore the unaligned stack pointer
addq $8,%rsp # remove the mh argument, and debugger end frame marker
movq $0,%rbp # restore ebp back to zero
jmp *%rax # jump to the entry point
# LC_MAIN case, set up stack for call to main()
Lnew: addq $16,%rsp # remove local variables
pushq %rdi # simulate return address into _start in libdyld
movq 8(%rbp),%rdi # main param1 = argc into %rdi
leaq 16(%rbp),%rsi # main param2 = &argv[0] into %rsi
leaq 0x8(%rsi,%rdi,8),%rdx # main param3 = &env[0] into %rdx
movq %rdx,%rcx
Lapple: movq (%rcx),%r8
add $8,%rcx
testq %r8,%r8 # look for NULL ending env[] array
jne Lapple # main param4 = apple into %rcx
jmp *%rax # jump to main(argc,argv,env,apple) with
根据汇编方法的调用分析,在调用流程的过程中,通过给main方法的参数赋值,最后在jmp *%rax调用了main方法,那么究竟是不是这样呢?我们运行项目,查看main函数的汇编代码来看一下:
新建一个iOS工程,给main.m文件中的C++构造函数添加断点(因为其别main方法先执行),使用模拟器执行,因为我们刚才看的是x86_64的汇编代码:
查看汇编:
继续单步向下执行:
_dyld_start执行完毕之后的汇编代码与之前我们在dyld源码工程中看到的汇编代码极为相似,最后jmpq *%rax,那么rax寄存器究竟是不是main方法呢?
通过打印寄存器,发现rax寄存器中存放的确实是main函数:
这也就验证了main方法确实是在最后执行的;
关于_dyld_objc_notify_register方法的补充说明
我们在dyld的源码中搜索_dyld_objc_notify_register发现有多处实现:
第一处:
dyld/src/dyldAPIs.cpp
void _dyld_objc_notify_register(_dyld_objc_notify_mapped mapped,
_dyld_objc_notify_init init,
_dyld_objc_notify_unmapped unmapped)
{
dyld::registerObjCNotifiers(mapped, init, unmapped);
}
第二处:
dyld/src/dyldAPIsInLibSystem.cpp
void _dyld_objc_notify_register(_dyld_objc_notify_mapped mapped,
_dyld_objc_notify_init init,
_dyld_objc_notify_unmapped unmapped)
{
if ( gUseDyld3 )
return dyld3::_dyld_objc_notify_register(mapped, init, unmapped);
DYLD_LOCK_THIS_BLOCK;
typedef bool (*funcType)(_dyld_objc_notify_mapped, _dyld_objc_notify_init, _dyld_objc_notify_unmapped);
static funcType __ptrauth_dyld_function_ptr p = NULL;
if(p == NULL)
dyld_func_lookup_and_resign("__dyld_objc_notify_register", &p);
p(mapped, init, unmapped);
}
第三处:
dyld/dyld3/APIs.cpp
void _dyld_objc_notify_register(_dyld_objc_notify_mapped mapped,
_dyld_objc_notify_init init,
_dyld_objc_notify_unmapped unmapped)
{
log_apis("_dyld_objc_notify_register(%p, %p, %p)\n", mapped, init, unmapped);
gAllImages.setObjCNotifiers(mapped, init, unmapped);
}
其中第三处是由第二处方法中的dyld3::_dyld_objc_notify_register触发的,我们可以暂时不讨论;那么第一处和第二处那一处才是我们项目中真正调用的呢?
因为_dyld_objc_notify_register里边会触发load_images,所以可以在objc的源码的load_images方法中打上断点,然后运行项目,然后通过bt打印堆栈信息:
通过堆栈信息发现,它是由registerObjCNotifiers触发的,也就说明objc源码中调用的是第一处的_dyld_objc_notify_register;
但是需要注意的是,这是一个Mac工程,那么iOS工程呢?
新建一个iOS工程,在工程中添加_dyld_objc_notify_register符号断点,然后运行项目:
发现,在此处_dyld_objc_notify_register里边调用了dyld3::_dyld_objc_notify_register,也就说明此处调用的是第二处的_dyld_objc_notify_register;
结论: Mac系统尚未全面支持
dyld3,所以不会走dyld3;iOS支持了dyld3,所以iOS系统会走dyld3;