对于 iOS 上的 Mach-O 文件,其数据段中的标号(懒加载和非懒加载),都可通过 fishhook 实现标号的动态绑定,即可在程序运行的任何时刻进行多次绑定,且最后一次绑定有效。
被 dyld 处理过的标号, fishhook 再绑定过程分为三个步骤:
- 根据传入的原标号名称,找到替换的位置
- 读取原函数指针,写入传入的引用变量
- 写入新的替换标号,完成 hook
标号名查找
在数据段中有两个节与动态标号绑定相关,即__nl_symbol_ptr 和__la_symbol_ptr:
-
__nl_symbol_ptr中存储非懒加载的标号指针数组,在加载时完成绑定 __la_symbol_ptr数组中的函数指针,在首次调用标号时由dyld_stub_binder动态绑定,也可以在启动时让 dyld 绑定
__la_symbol_ptr 中记录动态链接C函数。应用未启动之前 __la_symbol_ptr 中的函数的标号对应的函数指针指向 __stub_helper 历程。在首次调用标号对应的函数时,应用调用
__stub_helper取得绑定信息,并通过dyld_stub_binder更新__la_symbol_ptr中标号的函数指针地址。
struct section(位于 <mach-o/loader.h> 中)结构体中的 offset 字段,提供标号指针表的偏移值。
struct section_64 { /* for 64-bit architectures */
char sectname[16]; /* name of this section */
char segname[16]; /* segment this section goes in */
uint64_t addr; /* memory address of this section */
uint64_t size; /* size in bytes of this section */
uint32_t offset; /* file offset of this section */
uint32_t align; /* section alignment (power of 2) */
uint32_t reloff; /* file offset of relocation entries */
uint32_t nreloc; /* number of relocation entries */
uint32_t flags; /* flags (section type and attributes)*/
uint32_t reserved1; /* reserved (for offset or index) */
uint32_t reserved2; /* reserved (for count or sizeof) */
uint32_t reserved3; /* reserved */
};
标号指针表的偏移值 == 0x1020,请参考下面的标号指针表部分
链接原理
对于 text 段内部的代码之间的引用是通过基址+偏移的方式解决。对于多个 text 段之间的代码引用则需要借助 data段 来辅助完成,即(nonlazy, lazy, weak)bind 过程。对于采用 PIC 机制的 text 段内部的代码之间的引用也需要借助data段 来辅助完成,即 rebase 过程。
指针是运行时进行内存操作中的概念
递归库加载过程可参考 void ImageLoader::link(const LinkContext& context, bool forceLazysBound, bool preflightOnly, bool neverUnload, const RPathChain& loaderRPaths, const char* imagePath)
查找过程
为了找到标号的名称在那个节中的那个位置上,需要经过几个间接层的查找。下面是查找标号指针表中的函数指针所对应的标号名称的过程图:
标号指针表
有两种标号指针表,即__nl_symbol_ptr 和 __la_symbol_ptr。标号指针表与间接符号表中的记录的 index 保持一对一映射。
下面我们以 __la_symbol_ptr 中的 _objc_autoreleasePoolPop 为例,验证整个查找过程。
_objc_autoreleasePoolPop 在标号指针****表中 index == 0
标号指针表的偏移值 为0x1020,由 struc section 中的 offset 字段确定
间接标号表
_objc_autoreleasePoolPop 在间接标号表中的 index == 0
间接标号表就是标号表中记录的索引,它们都属于 __LINKEDIT 段。
0x63 = 99 标号表中 _objc_autoreleasePoolPop 下标
标号表
struct nlist_64 {
union {
uint32_t n_strx; /* index into the string table */
} n_un;
uint8_t n_type; /* type flag, see below */
uint8_t n_sect; /* section number or NO_SECT */
uint16_t n_desc; /* see <mach-o/stab.h> */
uint64_t n_value; /* value of this symbol (or stab offset) */
};
标号表中存储 struct nlist (<mach-o/nlist.h>)数组,每个包含一个指向 __LINKEDIT 段中的字符串表的索引,**即 ****n_strx **字段。
_objc_autoreleasePoolPop 索引值:0x141 是16进制
字符串表
符号表偏移值:10824 = 0x2a48j
字符串表中存储实际的标号名称。根据 n_strx 字段的值,对于 __nl_symbol_ptr 和 __la_symbol_ptr 中的每一个指针,都可以在字符串表中找到对应的标号名称,与传入的标号名称比较成功之后,便可以进行替换 __nl_symbol_ptr 或 __la_symbol_ptr 中的指针。
n_strx + 字符串表基址偏移 ( string table offset ) = 0x141 + 0x2a48 = 0x2b89
源码分析
fishhook API
struct rebinding {
const char *name;//被 hook 函数名称
void *replacement;//新函数的地址
void **replaced;//函数指针,用来返回原始函数的地址
};
struct rebindings_entry {
struct rebinding *rebindings;
size_t rebindings_nel;
struct rebindings_entry *next;
};
static struct rebindings_entry *_rebindings_head;
FISHHOOK_VISIBILITY
int rebind_symbols(struct rebinding rebindings[],
size_t rebindings_nel);
struct rebinding[] 都会经 struct rebindings_entry 封装,加入到以 _rebindings_head 为
发起 hook 请求
int rebind_symbols(struct rebinding rebindings[],
size_t rebindings_nel) {
int retval = prepend_rebindings(&_rebindings_head, rebindings,
rebindings_nel);
if (retval < 0) {
return retval;
}
//发起hook
if (!_rebindings_head->next) {
//是第一次调用:
//调用_dyld_register_func_for_add_image注册监听方法
//1 已经被dyld加载的image,会立刻进入回调
//2 之后的image,会在dyld装载的时候触发回调
_dyld_register_func_for_add_image(_rebind_symbols_for_image);
} else {
//遍历已经加载的image
uint32_t c = _dyld_image_count();//获取image数量
for (uint32_t i = 0; i < c; i++) {
_rebind_symbols_for_image(_dyld_get_image_header(i),
_dyld_get_image_vmaddr_slide(i));
}
}
return retval;
}
//3 hook 入口
//const struct mach_header *header //mach-o头//intptr_t slide //进程内偏移
static void _rebind_symbols_for_image(
const struct mach_header *header,
intptr_t slide) {
rebind_symbols_for_image(_rebindings_head, header, slide);
}
二进制映像信息查找
typedef struct dl_info {
const char *dli_fname;
void *dli_fbase;
const char *dli_sname;
void *dli_saddr;
} Dl_info;
使用 dladdr()读取取 mach_header 代表的 Mach-O 中有效信息到 dl_info 中:
- fname:Mach-O文件路径
- dli_fbase:该Mach-O的起始地址,即基地址
- dli_sname:最相似的标号名称
- dli_saddr :最相似的标号地址
#define SEG_LINKEDIT "__LINKEDIT" 该段包含编译器创建与维护的所有结构体
static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
const struct mach_header *header,
intptr_t slide) {
Dl_info info;
if (dladdr(header, &info) == 0) {
//在进程内寻址header信息
//如果 mach_header 指定的 Mach-O 不在当前进程的地址空间中,返回 0
return;
}
//准备从 Mach-O 中寻址的变量
segment_command_t *cur_seg_cmd;//当前命令,存在多个 LC_SEGMENT_64 段
segment_command_t *linkedit_segment = NULL;//编译器段,结合 slide 计算基址
struct symtab_command* symtab_cmd = NULL;//标号表,
struct dysymtab_command* dysymtab_cmd = NULL;//动态标号表
//遍历命令,获取段指针
uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);//load command 位置
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) { //架构依赖
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
linkedit_segment = cur_seg_cmd;
}
} else if (cur_seg_cmd->cmd == LC_SYMTAB) {
symtab_cmd = (struct symtab_command*)cur_seg_cmd;
} else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
}
}
if (!symtab_cmd
|| !dysymtab_cmd
|| !linkedit_segment
|| !dysymtab_cmd->nindirectsyms) {
return;
}
// Find base symbol/string table addresses
//链接时程序的基址
uintptr_t linkedit_base = (uintptr_t)slide
+ linkedit_segment->vmaddr
- linkedit_segment->fileoff;
//符号表的地址
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
//字符串表的地址
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
//间接(动态)符号表地址
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
//寻找到data段
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
// + sizeof(section_t) * j
section_t *sect = (section_t *)(cur + sizeof(segment_command_t)) + j;
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
//懒加载表
perform_rebinding_with_section(rebindings, sect, slide,
symtab, strtab, indirect_symtab);
}
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
//非懒加载表
perform_rebinding_with_section(rebindings, sect, slide,
symtab, strtab, indirect_symtab);
}
}
}
}
}
File Offset 为当前的静态偏移
符号表的地址 = 基址 + 符号表偏移量 字符串表的地址 = 基址 + 字符串表偏移量
间接(动态)符号表地址 = 基址 + 动态符号表偏移量
hook 流程代码
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
section_t *section,
intptr_t slide,
nlist_t *symtab,
char *strtab,
uint32_t *indirect_symtab) {
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
for (uint i = 0; i < section->size / sizeof(void *); i++) {
uint32_t symtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS
|| symtab_index == INDIRECT_SYMBOL_LOCAL
|| symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) {
continue;
}
uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
char *symbol_name = strtab + strtab_offset;
bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
struct rebindings_entry *cur = rebindings;
while (cur) {
for (uint j = 0; j < cur->rebindings_nel; j++) {
if (symbol_name_longer_than_1 &&
strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
if (cur->rebindings[j].replaced != NULL &&
indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
*(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
}
indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
goto symbol_loop;
}
}
cur = cur->next;
}
symbol_loop:;
}
}
参考资料
blog.cnbluebox.com/blog/2017/1… 动态链接过程分析