iOS-启动优化-创建进程
准备工作
正文
对于操作系统而言,执行的进程就是拿到的指令集和,而这个指令集合是从可执行文件中获取的,那么就可以将进程的加载简化为 某种操作 -> 读取可执行文件(MachO) -> 执行可执行文件的指令
Lauchd进程
首先第一个关注的进程Launchd,这个进程是用户态的第一个进程。而且在MacOS中,会存在多个Launchd用户范围的的Launchd实在用户登录的时候执行的,通过SSH远程登录也会创建Launchd,在iOS中只有一个Launchd进程。
Launchd创建过程
-
在
bsd_init.c文件中我们可以找到void bsdinit_task(void)方法,这个方法里面调用了load_init_program(p);就是用来初始化Launchd进程的,而void bsdinit_task(void)则是在内核启动时调用的。void bsdinit_task(void) { proc_t p = current_proc(); // 'init'进程其实就是'Lauchd'进程 process_name("init", p); /* Set up exception-to-signal reflection */ ux_handler_setup(); //...此处省略若干代码... // todo: iOS虚拟内存解析 vm_init_before_launchd(); //...此处省略若干代码... load_init_program(p); lock_trace = 1; }
-
调用到
void load_init_program(proc_t p)函数,可以看到直接在代码以static关键字修饰的路径中,其中有Debug \ DEVELOPMENT \ RELEASE,因为DEBUG环境下会有两个路径,所以用了个for循环只要成功运行一个即可。static const char * init_programs[] = { #if DEBUG "/usr/appleinternal/sbin/launchd.debug", #endif #if DEVELOPMENT || DEBUG "/usr/appleinternal/sbin/launchd.development", #endif "/sbin/launchd", }; // 传入的 'p' 其实就是 'struct proc *' void load_init_program(proc_t p) { int error = ENOENT; for (i = 0; i < sizeof(init_programs) / sizeof(init_programs[0]); i++) { error = load_init_program_at_path(p, (user_addr_t)scratch_addr, init_programs[i]); if (!error) { return; } //...此处省略若干代码... } }
-
在
load_init_program还有另外一个调用static int load_init_program_at_path(proc_t p, user_addr_t scratch_addr, const char* path),我自己所理解的是,这一步中我所理解的是一些对齐操作+设置环境变量static int load_init_program_at_path(proc_t p, user_addr_t scratch_addr, const char* path) { //...此处省略若干代码... return execve(p, &init_exec_args, retval); }
-
其中
int execve(proc_t p, struct execve_args *uap, int32_t *retval)会创建新的线程和Task,将调用该函数的进程转换成指定的新进程,涉及到加载MachO文件到内存, 其中需要重点关注proc_t结构体的内容。struct filedesc { struct fileproc **fd_ofiles; /* file structures for open files */ lck_mtx_t fd_kqhashlock; /* lock for dynamic kqueue hash */ u_long fd_kqhashmask; /* size of dynamic kqueue hash */ struct kqwllist *fd_kqhash; /* hash table for dynamic kqueues */ struct kqworkq *fd_wqkqueue; /* the workq kqueue */ char *fd_ofileflags; /* per-process open file flags */ struct vnode *fd_cdir; /* current directory */ struct vnode *fd_rdir; /* root directory */ int fd_nfiles; /* number of open files allocated */ int fd_lastfile; /* high-water mark of fd_ofiles */ int fd_freefile; /* approx. next free file */ mode_t fd_cmask; /* mask for file creation */ int fd_flags; int fd_knlistsize; /* size of knlist */ struct klist *fd_knlist; /* list of attached knotes */ u_long fd_knhashmask; /* size of knhash */ struct klist *fd_knhash; /* hash table for attached knotes */ lck_mtx_t fd_knhashlock; /* lock for hash table for attached knotes */ }; struct proc { struct proc *p_forw; /* Doubly-linked run/sleep queue. */ struct proc *p_back; struct proc *p_next; /* Linked list of active procs */ struct proc **p_prev; /* and zombies. */ // 重点关注 'filedesc'从名字我们可以知道这个结构体为文件描述符。 /* substructures: */ struct pcred *p_cred; /* Process owner's identity. */ struct filedesc *p_fd; /* Ptr to open files structure. */ struct pstats *p_stats; /* Accounting/statistics (PROC ONLY). */ struct plimit *p_limit; /* Process limits. */ struct vmspace *p_vmspace; /* Address space. */ struct sigacts *p_sigacts; /* Signal actions, state (PROC ONLY). */ //...此处省略若干代码... }; typedef struct proc *proc_t; //********************************************************************* int execve(proc_t p, struct execve_args *uap, int32_t *retval) { //...初始化操作... //...此处省略若干代码... err = __mac_execve(p, &muap, retval); return err; } // 主要用于创建新的Task和线程然后调用底层的 exec_activate_image int __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) { //...主要用于初始化imgp结构体中的一些通用数据... //...此处省略若干代码... // 创新新的线程和Task uthread = get_bsdthread_info(current_thread()); if (uthread->uu_flag & UT_VFORK) { // 这里有个位操作,标记vfork imgp->ip_flags |= IMGPF_VFORK_EXEC; in_vfexec = TRUE; } else { imgp->ip_flags |= IMGPF_EXEC; imgp->ip_new_thread = fork_create_child(old_task, NULL, p, FALSE, p->p_flag & P_LP64, task_get_64bit_data(old_task), TRUE); if (imgp->ip_new_thread == NULL) { error = ENOMEM; goto exit_with_error; } new_task = get_threadtask(imgp->ip_new_thread); context.vc_thread = imgp->ip_new_thread; } // exec_activate_image函数,内存映射函数 error = exec_activate_image(imgp); //...资源释放等... return error; } -
根据上面的函数调用得知
static int exec_activate_image(struct image_params *imgp)会将MachO映射到内存。struct execsw { int(*const ex_imgact)(struct image_params *); // 函数指针 const char *ex_name; } const execsw[] = { { exec_mach_imgact, "Mach-o Binary" }, // MachO文件 { exec_fat_imgact, "Fat Binary" }, // 多指令集MachO文件(例如 Launchd就是多指令集,找到Launchd和手机App的可执行文件分别丢入烂苹果中瞅一眼) { exec_shell_imgact, "Interpreter Script" }, { NULL, NULL} }; static int exec_activate_image(struct image_params *imgp) { //...省略若干代码... //因为判断条件中有判断是不是为NULL,而在定义数组的时候最后一个元素为NULL,所以不会有数组越界, for (i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) { // 因为主要观察 iOS加载所以先看 单指令集的调用 (exec_mach_imgact) error = (*execsw[i].ex_imgact)(imgp); } } -
exec_activate_image主要映射MachO文件到内存并设置权限。static int exec_mach_imgact(struct image_params *imgp) { //...此处省略若干代码... // We are being called to activate an image subsequent to a vfork() // operation; in this case, we know that our task, thread, and // uthread are actually those of our parent, and our proc, which we // obtained indirectly from the image_params vfs_context_t, is the // new child process. if (imgp->ip_flags & IMGPF_VFORK_EXEC) { imgp->ip_new_thread = fork_create_child(task, NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT_ADDR), (imgp->ip_flags & IMGPF_IS_64BIT_DATA), FALSE); /* task and thread ref returned, will be released in __mac_execve */ if (imgp->ip_new_thread == NULL) { error = ENOMEM; goto bad; } } /* reset local idea of thread, uthread, task */ thread = imgp->ip_new_thread; uthread = get_bsdthread_info(thread); task = new_task = get_threadtask(thread); lret = load_machfile(imgp, mach_header, thread, &map, &load_result); //...此处省略若干代码... vm_map_set_user_wire_limit(map, (vm_size_t)proc_limitgetcur(p, RLIMIT_MEMLOCK, FALSE)); //...此处省略若干代码... } -
接下来就是开始加载
MachO文件,执行的函数为load_machfile(...)。load_return_t load_machfile(struct image_params *imgp, struct mach_header *header, thread_t thread, vm_map_t *mapp, load_result_t *result) { // 为task分配内存 pmap = pmap_create_options(get_task_ledger(ledger_task), (vm_map_size_t) 0, pmap_flags); if (pmap == NULL) { return LOAD_RESOURCE; } // 分配虚拟内存 map = vm_map_create(pmap, 0, vm_compute_max_offset(result->is_64bit_addr), TRUE); /* Forcibly disallow execution from data pages on even if the arch * normally permits it. */ if ((header->flags & MH_NO_HEAP_EXECUTION) && !(imgp->ip_flags & IMGPF_ALLOW_DATA_EXEC)) { vm_map_disallow_data_exec(map); } /* * Compute a random offset for ASLR, and an independent random offset for dyld. */ if (!(imgp->ip_flags & IMGPF_DISABLE_ASLR)) { vm_map_get_max_aslr_slide_section(map, &aslr_section_offset, &aslr_section_size); aslr_section_offset = (random() % aslr_section_offset) * aslr_section_size; // MachO的ASLR aslr_page_offset = random(); aslr_page_offset %= vm_map_get_max_aslr_slide_pages(map); aslr_page_offset <<= vm_map_page_shift(map); // dyld的ASLR dyld_aslr_page_offset = random(); dyld_aslr_page_offset %= vm_map_get_max_loader_aslr_slide_pages(map); dyld_aslr_page_offset <<= vm_map_page_shift(map); aslr_page_offset += aslr_section_offset; } // 解析MachO文件 lret = parse_machfile(vp, map, thread, header, file_offset, macho_size, 0, aslr_page_offset, dyld_aslr_page_offset, result, NULL, imgp); //...此处省略若干代码... return LOAD_SUCCESS; } -
接下来进入到
static load_return_t parse_machfile开始解析MachO,但是此函数会执行两次分别用于解析MachO和dyldstatic load_return_t parse_machfile(struct vnode *vp, vm_map_t map, thread_t thread, struct mach_header *header, off_t file_offset, off_t macho_size, int depth, int64_t aslr_offset, int64_t dyld_aslr_offset, load_result_t *result, load_result_t *binresult, struct image_params *imgp) { // parse_machfile函数会执行两次 // 第一次:用于解析MachO文件 // 第二次:用于解析dyld if (depth > 2) { return LOAD_FAILURE; } depth++; //...省略若干代码 //检查MachO的文件类型 switch (header->filetype) { case MH_EXECUTE: //...... case MH_DYLINKER: //...... default: return LOAD_FAILURE; } /* * Scan through the commands, processing each one as necessary. * We parse in three passes through the headers: * 0: determine if TEXT and DATA boundary can be page-aligned, load platform version * 1: thread state, uuid, code signature * 2: segments * 3: dyld, encryption, check entry point */ // PASS = 0: 确定文本和数据边界是否可以页对齐,加载平台的版本号 // PASS = 1: 检查线程的状态,uuid, 代码签名等 // PASS = 2: 扫描并处理需要在内核态执行的LoadCommands // PASS = 3: 动态连接器,检查程序入口 for (pass = 0; pass <= 3; pass++) { //...此处省略若干代码... /* * Loop through each of the load_commands indicated by the * Mach-O header; if an absurd value is provided, we just * run off the end of the reserved section by incrementing * the offset too far, so we are implicitly fail-safe. */ offset = mach_header_sz; ncmds = header->ncmds; while (ncmds--) { //...此处省略若干代码... lcp = (struct load_command *)(addr + offset); oldoffset = offset; //...此处省略若干代码... // 针对不同指令做不同处理 switch (lcp->cmd) { case LC_SEGMENT: { //...此处省略若干代码... ret = load_segment(lcp, header->filetype, control, file_offset, macho_size, vp, map, slide, result, imgp); //...此处省略若干代码... } case LC_SEGMENT_64: { //...此处省略若干代码... ret = load_segment(lcp, header->filetype, control, file_offset, macho_size, vp, map, slide, result, imgp); //...此处省略若干代码... } //之后研究一下 case LC_UNIXTHREAD: { boolean_t is_x86_64_compat_binary = FALSE; if (pass != 1) { break; } ret = load_unixthread( (struct thread_command *) lcp, thread, slide, is_x86_64_compat_binary, result); break; } //MachO中的LC_Main命令,只会对MachO的执行load_main操作,主要找到程序入口 case LC_MAIN: //...此处省略若干代码... //只对栈和线程进行初始化操作 ret = load_main((struct entry_point_command *) lcp, thread, slide, result); break; case LC_LOAD_DYLINKER: //...此处省略若干代码... //在处理MachO的时候将dyld的结构体指针存储到dlp中 if ((depth == 1) && (dlp == 0)) { dlp = (struct dylinker_command *)lcp; } else { ret = LOAD_FAILURE; } break; case LC_UUID: //...此处省略若干代码... case LC_CODE_SIGNATURE: /* CODE SIGNING */ //...此处省略若干代码... break; #if CONFIG_CODE_DECRYPTION case LC_ENCRYPTION_INFO: case LC_ENCRYPTION_INFO_64: //...此处省略若干代码... break; #endif case LC_VERSION_MIN_IPHONEOS: case LC_VERSION_MIN_MACOSX: case LC_VERSION_MIN_WATCHOS: case LC_VERSION_MIN_TVOS: { //...此处省略若干代码... } case LC_BUILD_VERSION: { //...此处省略若干代码... } default: /* Other commands are ignored by the kernel */ //其他操作不在内核态执行 ret = LOAD_SUCCESS; break; } if (ret != LOAD_SUCCESS) { break; } } if (ret != LOAD_SUCCESS) { break; } } if (ret == LOAD_SUCCESS) { //...此处省略若干代码... if ((ret == LOAD_SUCCESS) && (dlp != 0)) { /* * load the dylinker, and slide it by the independent DYLD ASLR * offset regardless of the PIE-ness of the main binary. */ // MachO文件命令加载完毕后去加载dyld的可执行文件 ret = load_dylinker(dlp, header->cputype, map, thread, depth, dyld_aslr_offset, result, imgp); } //...此处省略若干代码... } //...此处省略若干代码... kfree(addr, alloc_size); return ret; } -
第一次
MachO解析完成后,会紧接着解析dyld,会进入到load_dylinker(...)的流程然后回调到parse_machfile解析dyldstruct dylinker_command { uint32_t cmd; /* LC_ID_DYLINKER, LC_LOAD_DYLINKER or LC_DYLD_ENVIRONMENT */ uint32_t cmdsize; /* includes pathname string */ // 用于指定动态链接器的名称 union lc_str name; /* dynamic linker's path name */ }; static load_return_t load_dylinker(struct dylinker_command *lcp, cpu_type_t cputype, vm_map_t map, thread_t thread, int depth, int64_t slide, load_result_t *result, struct image_params *imgp) { //...此处省略若干代码... // 读取Dyld ret = get_macho_vnode(name, cputype, header, &file_offset, &macho_size, macho_data, &vp, imgp); //...此处省略若干代码... // 解析Dyld的MachO格式 ret = parse_machfile(vp, map, thread, header, file_offset, macho_size, depth, slide, 0, myresult, result, imgp); if (ret == LOAD_SUCCESS) { if (result->threadstate) { /* don't use the app's threadstate if we have a dyld */ kfree(result->threadstate, result->threadstate_sz); } result->threadstate = myresult->threadstate; result->threadstate_sz = myresult->threadstate_sz; result->dynlinker = TRUE; // 解析完成后将result->entry_point赋值为myresult->entry_point,就拿到了dyld的入口 result->entry_point = myresult->entry_point; result->validentry = myresult->validentry; result->all_image_info_addr = myresult->all_image_info_addr; result->all_image_info_size = myresult->all_image_info_size; if (myresult->platform_binary) { result->csflags |= CS_DYLD_PLATFORM; } } //...此处省略若干代码... return ret; } -
执行完
load_dylinker(...)后我们拿到了result->entry_point也就是dyld的入口,可是链路走完但还没形成闭环,此时我们返回到exec_mach_imgact(...)函数可以在下面看到另外一个调用activate_exec_state(...)static int exec_mach_imgact(struct image_params *imgp) { //...此处省略若干daima... // load_result中包含dyld的入口点 lret = activate_exec_state(task, p, thread, &load_result); //...此处省略若干代码... return error; } static int activate_exec_state(task_t task, proc_t p, thread_t thread, load_result_t *result) { //...此处省略若干daima... //开始走_dyld_start的流程 thread_setentrypoint(thread, result->entry_point); return KERN_SUCCESS; }
总结
-
通过某种方式分配进程空间
-
Launched-
bsdinit_task(void)
void load_init_program(proc_t p)static int load_init_program_at_path(...)
-
-
SpringBoard点击,通知Launched进行fork()操作等。 -
执行
int execve(...) -
执行
__mac_execve(...)-
执行
exec_mach_imgact(...)-
执行
load_machfile(...) -
第一次执行
parse_machfile(...)解析MachO-
执行
load_dylinker(...) -
第二次执行
parse_machfile(...)解析dyld- 赋值
enter_point拿到dyld的入口
- 赋值
-
-
执行
activate_exec_state(...)进入到_dyld_start的流程
-
-
-
参考
- XUN源码(7195.81.3)
- 从内核探究Mac OS X和iOS App 进程的创建原理
- MachO && dyld
- XNU加载Mach-O和dyld
- Mach-O 格式
- 《深入解析 MacOS & iOS 操作系统》
推荐文章/书籍
后话
操作系统是一个长期学习的东西,有新的理解会继续更新
欢迎大家指出任何形式的错误,与更好的思路等,感谢