应用程序加载(上)

398 阅读6分钟

动态库和静态库

  • 静态库:链接时会被完整的复制到可执行文件中,多次使用就会有多份冗余拷贝,oc通常为.a和.framework文件。
  • 动态库: 链接时不复制,程序运行时由系统动态加载到内存,供程序调用,系统只加载一次,多个程序共用,oc通常为.dylib和.framework文件。

应用加载流程引入

@interface LKPerson : NSObject
@end

@implementation LKPerson

+ (void)load
{
    NSLog(@"load");
}

@end

int main(int argc, const char * argv[]) {
    @autoreleasepool {

    }
    return 0;
}

load函数处打下断点,bt查看堆栈信息

image.png 可以看到load函数调用之前的流程

  • dyld`_dyld_start ->
  • dyld`dyldbootstrap::start ->
  • dyld`dyld::_main ->
  • dyld`dyld::initializeMainExecutable() ->
  • dyld`ImageLoader::runInitializers ->
  • dyld`ImageLoader::processInitializers
  • dyld`ImageLoader::recursiveInitialization ->
  • dyld`dyld::notifySingle ->
  • libobjc.A.dylibload_images 可以看到,整个应用加载的流程是从_dyld_start`开始

dyld:动态链接器 历程

WWDC2017:App Startup Time: Past, Present, and Future

引用自 BBLV-应用程序加载(上)
dyld 1.0(1996-2004)

包含在NeXTStep3.3中
作用并不是特别大,其历史早于标准化POSIX diopen调用
他是在大多数使用C++动态库的系统之前编写的
在macOS Cheetah(10.0)添加了与绑定功能(P retending)

dyld 2.0(2004-2007)

包含在macOS Tiger 中
dyld2是完全重写的
正确支持C++初始化器的语义
具有完整的dlopen和dlsym实现
dyld2的设计目标是提高速度,因此进行了有限的健全性检查
安全性增强
由于素的的大幅提升,因此可以减少了预绑定的工作量
不同于dyld1编辑你的程序数据,此次只编辑系统库,并且仅在软件更新的时候更新预绑定。

dyld 2.x(2007-2017)

增加了大量基础结构和平台(x86、x86_64、arm、arm64)
通过多种途径增强安全性
增加代码签名和ASLR(地址空间的随机加载)
增强了性能(消除了预绑定,转而使用了共享缓存 )

dyld 3(2017-至今)

dyld3是全新的动态连接器
2017以后的apple所有的系统都将使用dyld3
为了性能,尽可能的更快的提高启动速度
在设计上提升了安全性
可测试性与可靠性

dyld加载流程

相关源码libdispatchLibsystemobjc4dyld可以在opensource.apple.com/tarballs/ 上下载
打开dyld-852源码,全局搜索_dyld_start

#if __arm64__ && !TARGET_OS_SIMULATOR
	.text
	.align 2
	.globl __dyld_start
__dyld_start:
	mov 	x28, sp
	and     sp, x28, #~15		// force 16-byte alignment of stack
	mov	x0, #0
	mov	x1, #0
	stp	x1, x0, [sp, #-16]!	// make aligned terminating frame
	mov	fp, sp			// set up fp to point to terminating frame
	sub	sp, sp, #16             // make room for local variables
#if __LP64__
	ldr     x0, [x28]               // get app's mh into x0
	ldr     x1, [x28, #8]           // get argc into x1 (kernel passes 32-bit int argc as 64-bits on stack to keep alignment)
	add     x2, x28, #16            // get argv into x2
#else
	ldr     w0, [x28]               // get app's mh into x0
	ldr     w1, [x28, #4]           // get argc into x1 (kernel passes 32-bit int argc as 64-bits on stack to keep alignment)
	add     w2, w28, #8             // get argv into x2
#endif
	adrp	x3,___dso_handle@page
	add 	x3,x3,___dso_handle@pageoff // get dyld's mh in to x4
	mov	x4,sp                   // x5 has &startGlue

	// call dyldbootstrap::start(app_mh, argc, argv, dyld_mh, &startGlue)
	bl	__ZN13dyldbootstrap5startEPKN5dyld311MachOLoadedEiPPKcS3_Pm
	mov	x16,x0                  // save entry point address in x16
#if __LP64__
	ldr     x1, [sp]
#else
	ldr     w1, [sp]
#endif
	cmp	x1, #0
	b.ne	Lnew

	// LC_UNIXTHREAD way, clean up stack and jump to result
#if __LP64__
	add	sp, x28, #8             // restore unaligned stack pointer without app mh
#else
	add	sp, x28, #4             // restore unaligned stack pointer without app mh
#endif
#if __arm64e__
	braaz   x16                     // jump to the program's entry point
#else
	br      x16                     // jump to the program's entry point
#endif

	// LC_MAIN case, set up stack for call to main()
Lnew:	mov	lr, x1		    // simulate return address into _start in libdyld.dylib
#if __LP64__
	ldr	x0, [x28, #8]       // main param1 = argc
	add	x1, x28, #16        // main param2 = argv
	add	x2, x1, x0, lsl #3
	add	x2, x2, #8          // main param3 = &env[0]
	mov	x3, x2
Lapple:	ldr	x4, [x3]
	add	x3, x3, #8
#else
	ldr	w0, [x28, #4]       // main param1 = argc
	add	x1, x28, #8         // main param2 = argv
	add	x2, x1, x0, lsl #2
	add	x2, x2, #4          // main param3 = &env[0]
	mov	x3, x2
Lapple:	ldr	w4, [x3]
	add	x3, x3, #4
#endif
	cmp	x4, #0
	b.ne	Lapple		    // main param4 = apple
#if __arm64e__
	braaz   x16
#else
	br      x16
#endif

#endif // __arm64__ && !TARGET_OS_SIMULATOR

根据注释// call dyldbootstrap::start(app_mh, argc, argv, dyld_mh, &startGlue)定位到dyldbootstrap::start。搜索dyldbootstrap,找到start函数

uintptr_t start(const dyld3::MachOLoaded* appsMachHeader, int argc, const char* argv[],
				const dyld3::MachOLoaded* dyldsMachHeader, uintptr_t* startGlue)
{

    // Emit kdebug tracepoint to indicate dyld bootstrap has started <rdar://46878536>
    dyld3::kdebug_trace_dyld_marker(DBG_DYLD_TIMING_BOOTSTRAP_START, 0, 0, 0, 0);

	// if kernel had to slide dyld, we need to fix up load sensitive locations
	// we have to do this before using any global variables
    rebaseDyld(dyldsMachHeader);

	// kernel sets up env pointer to be just past end of agv array
	const char** envp = &argv[argc+1];
	
	// kernel sets up apple pointer to be just past end of envp array
	const char** apple = envp;
	while(*apple != NULL) { ++apple; }
	++apple;

	// set up random value for stack canary
	__guard_setup(apple);

#if DYLD_INITIALIZER_SUPPORT
	// run all C++ initializers inside dyld
	runDyldInitializers(argc, argv, envp, apple);
#endif

	_subsystem_init(apple);

	// now that we are done bootstrapping dyld, call dyld's main
	uintptr_t appsSlide = appsMachHeader->getSlide();
	return dyld::_main((macho_header*)appsMachHeader, appsSlide, argc, argv, envp, apple, startGlue);
}

最终返回值调用dyld::_main函数

dyld::_main函数

main函数过长,代码略

//获取架构信息
getHostInfo(mainExecutableMH, mainExecutableSlide);

//实例化主程序
sMainExecutable = instantiateFromLoadedImage(mainExecutableMH, mainExecutableSlide, sExecPath);

//加载插入动态库
loadInsertedDylib(*lib);

//link主程序
link(sMainExecutable, sEnv.DYLD_BIND_AT_LAUNCH, true, ImageLoader::RPathChain(NULL, NULL), -1);

//link插入的动态库
link(image, sEnv.DYLD_BIND_AT_LAUNCH, true, ImageLoader::RPathChain(NULL, NULL), -1);

//弱引用绑定主程序
sMainExecutable->weakBind(gLinkContext);

//初始化
initializeMainExecutable(); 

// notify any montoring proccesses that this process is about to enter main()
notifyMonitoringDyldMain();

initializeMainExecutable流程

void initializeMainExecutable()
{
	// record that we've reached this step
	gLinkContext.startedInitializingMainExecutable = true;

	// run initialzers for any inserted dylibs
	ImageLoader::InitializerTimingList initializerTimes[allImagesCount()];
	initializerTimes[0].count = 0;
	const size_t rootCount = sImageRoots.size();
	if ( rootCount > 1 ) {
		for(size_t i=1; i < rootCount; ++i) {
			sImageRoots[i]->runInitializers(gLinkContext, initializerTimes[0]);
		}
	}
	
	// run initializers for main executable and everything it brings up 
	sMainExecutable->runInitializers(gLinkContext, initializerTimes[0]);
	
	// register cxa_atexit() handler to run static terminators in all loaded images when this process exits
	if ( gLibSystemHelpers != NULL ) 
		(*gLibSystemHelpers->cxa_atexit)(&runAllStaticTerminators, NULL, NULL);

	// dump info if requested
	if ( sEnv.DYLD_PRINT_STATISTICS )
		ImageLoader::printStatistics((unsigned int)allImagesCount(), initializerTimes[0]);
	if ( sEnv.DYLD_PRINT_STATISTICS_DETAILS )
		ImageLoaderMachO::printStatisticsDetails((unsigned int)allImagesCount(), initializerTimes[0]);
}

根据sMainExecutable->runInitializers(gLinkContext, initializerTimes[0]);,得到runInitializers函数

void ImageLoader::runInitializers(const LinkContext& context, InitializerTimingList& timingInfo)
{
	uint64_t t1 = mach_absolute_time();
	mach_port_t thisThread = mach_thread_self();
	ImageLoader::UninitedUpwards up;
	up.count = 1;
	up.imagesAndPaths[0] = { this, this->getPath() };
	processInitializers(context, thisThread, timingInfo, up); 
	context.notifyBatch(dyld_image_state_initialized, false);
	mach_port_deallocate(mach_task_self(), thisThread);
	uint64_t t2 = mach_absolute_time();
	fgTotalInitTime += (t2 - t1);
}

查看runInitializers,找到processInitializers函数

void ImageLoader::processInitializers(const LinkContext& context, mach_port_t thisThread,
									 InitializerTimingList& timingInfo, ImageLoader::UninitedUpwards& images)
{
        //递归所有镜像列表中的所有`image`,如果有没有初始化就去初始化
	for (uintptr_t i=0; i < images.count; ++i) {
		images.imagesAndPaths[i].first->recursiveInitialization(context, thisThread, images.imagesAndPaths[i].second, timingInfo, ups);
	}
        //其它代码省略
}

全局搜索recursiveInitialization,找打对应函数

void ImageLoader::recursiveInitialization(const LinkContext& context, mach_port_t this_thread, const char* pathToInitialize,
										  InitializerTimingList& timingInfo, UninitedUpwards& uninitUps)
        //其它代码省略
        context.notifySingle(dyld_image_state_dependents_initialized, this, &timingInfo);
			
        // initialize this image
        bool hasInitializers = this->doInitialization(context);
}

全局搜索notifySingle

static void notifySingle(dyld_image_states state, const ImageLoader* image, ImageLoader::InitializerTimingList* timingInfo)
{
        //其它代码省略
	if ( (state == dyld_image_state_dependents_initialized) && (sNotifyObjCInit != NULL) && image->notifyObjC() ) {
		uint64_t t0 = mach_absolute_time();
		dyld3::ScopedTimer timer(DBG_DYLD_TIMING_OBJC_INIT, (uint64_t)image->machHeader(), 0, 0);
		(*sNotifyObjCInit)(image->getRealPath(), image->machHeader());
		uint64_t t1 = mach_absolute_time();
		uint64_t t2 = mach_absolute_time();
		uint64_t timeInObjC = t1-t0;
		uint64_t emptyTime = (t2-t1)*100;
		if ( (timeInObjC > emptyTime) && (timingInfo != NULL) ) {
			timingInfo->addTime(image->getShortName(), timeInObjC);
		}
	}
}

全局搜索sNotifyObjCInit

void registerObjCNotifiers(_dyld_objc_notify_mapped mapped, _dyld_objc_notify_init init, _dyld_objc_notify_unmapped unmapped)
{
	// record functions to call
	sNotifyObjCMapped	= mapped;
	sNotifyObjCInit		= init;
	sNotifyObjCUnmapped = unmapped;
}

void _dyld_objc_notify_register(_dyld_objc_notify_mapped    mapped,
                                _dyld_objc_notify_init      init,
                                _dyld_objc_notify_unmapped  unmapped)
{
	dyld::registerObjCNotifiers(mapped, init, unmapped);
}

最终得到_dyld_objc_notify_register函数,发现只有定义,没有调用。
添加符号断点_dyld_objc_notify_register image.png 整个流程为

  • dyld`_dyld_start
  • dyld`dyldbootstrap::start
  • dyld`dyld::_main
  • dyld`dyld::initializeMainExecutable()
  • dyld`ImageLoader::runInitializers
  • dyld`ImageLoader::processInitializers
  • dyld`ImageLoader::recursiveInitialization
  • dyld`ImageLoader::recursiveInitialization
  • dyld`ImageLoaderMachO::doInitialization
  • dyld`ImageLoaderMachO::doModInitFunctions
  • libSystem.B.dylib`libSystem_initializer
  • libdispatch.dylib`libdispatch_init
  • libdispatch.dylib`_os_object_init
  • libobjc.A.dylib`_objc_init
  • libdyld.dylib_dyld_objc_notify_register _dyld_objc_notify_register之前,是libobjc.A.dylib_objc_init`函数。
void _objc_init(void)
{
    static bool initialized = false;
    if (initialized) return;
    initialized = true;
    
    // fixme defer initialization until an objc-using image is found?
    environ_init();
    tls_init();
    static_init();
    runtime_init();
    exception_init();
#if __OBJC2__
    cache_t::init();
#endif
    _imp_implementationWithBlock_init();

    _dyld_objc_notify_register(&map_images, load_images, unmap_image);

#if __OBJC2__
    didCallDyldNotifyRegister = true;
#endif
}

发现sNotifyObjCInit实际就是第二个参数load_images函数。