Mach-O:fishhook 剖析

2,571 阅读5分钟

对于 iOS 上的 Mach-O 文件,其数据段中的标号(懒加载和非懒加载),都可通过 fishhook 实现标号的动态绑定,即可在程序运行的任何时刻进行多次绑定,且最后一次绑定有效。

被 dyld 处理过的标号, fishhook 再绑定过程分为三个步骤:

  1. 根据传入的原标号名称,找到替换的位置
  2. 读取原函数指针,写入传入的引用变量
  3. 写入新的替换标号,完成 hook

标号名查找

在数据段中有两个节与动态标号绑定相关,即__nl_symbol_ptr __la_symbol_ptr:

  •  __nl_symbol_ptr  中存储非懒加载的标号指针数组,在加载时完成绑定
  • __la_symbol_ptr 数组中的函数指针,在首次调用标号时由  dyld_stub_binder  动态绑定,也可以在启动时让 dyld 绑定

__la_symbol_ptr 中记录动态链接C函数。应用未启动之前 __la_symbol_ptr 中的函数的标号对应的函数指针指向 __stub_helper 历程。在首次调用标号对应的函数时,应用调__stub_helper取得绑定信息,并通过dyld_stub_binder更新__la_symbol_ptr中标号的函数指针地址。

struct section(位于 <mach-o/loader.h> 中)结构体中的 offset 字段,提供标号指针表的偏移值。

struct section_64 { /* for 64-bit architectures */
	char		sectname[16];	/* name of this section */
	char		segname[16];	/* segment this section goes in */
	uint64_t	addr;		/* memory address of this section */
	uint64_t	size;		/* size in bytes of this section */
	uint32_t	offset;		/* file offset of this section */
	uint32_t	align;		/* section alignment (power of 2) */
	uint32_t	reloff;		/* file offset of relocation entries */
	uint32_t	nreloc;		/* number of relocation entries */
	uint32_t	flags;		/* flags (section type and attributes)*/
	uint32_t	reserved1;	/* reserved (for offset or index) */
	uint32_t	reserved2;	/* reserved (for count or sizeof) */
	uint32_t	reserved3;	/* reserved */
};

标号指针表的偏移值 == 0x1020,请参考下面的标号指针表部分

链接原理

对于 text 段内部的代码之间的引用是通过基址+偏移的方式解决。对于多个 text 段之间的代码引用则需要借助 data段 来辅助完成,即(nonlazy, lazy, weak)bind 过程。对于采用 PIC 机制的 text 段内部的代码之间的引用也需要借助data段 来辅助完成,即 rebase 过程。

指针是运行时进行内存操作中的概念

递归库加载过程可参考 void ImageLoader::link(const LinkContext& context, bool forceLazysBound, bool preflightOnly, bool neverUnload, const RPathChain& loaderRPaths, const char* imagePath)

查找过程

为了找到标号的名称在那个节中的那个位置上,需要经过几个间接层的查找。下面是查找标号指针表中的函数指针所对应的标号名称的过程图:

标号指针表

有两种标号指针表,即__nl_symbol_ptr__la_symbol_ptr。标号指针表与间接符号表中的记录的 index 保持一对一映射。

下面我们以 __la_symbol_ptr 中的 _objc_autoreleasePoolPop 为例,验证整个查找过程。

_objc_autoreleasePoolPop 在标号指针****表中 index == 0

标号指针表的偏移值 为0x1020,由 struc section 中的 offset 字段确定

间接标号表

_objc_autoreleasePoolPop 在间接标号表中的 index == 0

间接标号表就是标号表中记录的索引,它们都属于 __LINKEDIT 段。

0x63 = 99 标号表中 _objc_autoreleasePoolPop 下标

标号表

struct nlist_64 {
    union {
        uint32_t  n_strx; /* index into the string table */
    } n_un;
    uint8_t n_type;        /* type flag, see below */
    uint8_t n_sect;        /* section number or NO_SECT */
    uint16_t n_desc;       /* see <mach-o/stab.h> */
    uint64_t n_value;      /* value of this symbol (or stab offset) */
};

标号表中存储 struct nlist (<mach-o/nlist.h>)数组,每个包含一个指向 __LINKEDIT 段中的字符串表的索引,**即 ****n_strx **字段

_objc_autoreleasePoolPop 索引值:0x141  是16进制

字符串表

符号表偏移值:10824 = 0x2a48j

字符串表中存储实际的标号名称。根据 n_strx 字段的值,对于 __nl_symbol_ptr__la_symbol_ptr 中的每一个指针,都可以在字符串表中找到对应的标号名称,与传入的标号名称比较成功之后,便可以进行替换 __nl_symbol_ptr__la_symbol_ptr 中的指针。

n_strx + 字符串表基址偏移 ( string table offset ) = 0x141 + 0x2a48 = 0x2b89

源码分析

fishhook API

struct rebinding {
  const char *name;//被 hook 函数名称
  void *replacement;//新函数的地址
  void **replaced;//函数指针,用来返回原始函数的地址
};
struct rebindings_entry {
  struct rebinding *rebindings;
  size_t rebindings_nel;
  struct rebindings_entry *next;
};
static struct rebindings_entry *_rebindings_head;

FISHHOOK_VISIBILITY
int rebind_symbols(struct rebinding rebindings[], 
                   size_t rebindings_nel);

struct rebinding[] 都会经 struct rebindings_entry 封装,加入到以 _rebindings_head 

头的链表的头部

发起 hook 请求

int rebind_symbols(struct rebinding rebindings[], 
                   size_t rebindings_nel) {
    int retval = prepend_rebindings(&_rebindings_head, rebindings, 
                                    rebindings_nel);
    if (retval < 0) {
    return retval;
  }
   //发起hook
  if (!_rebindings_head->next) {
      //是第一次调用:
      //调用_dyld_register_func_for_add_image注册监听方法
      //1 已经被dyld加载的image,会立刻进入回调
      //2 之后的image,会在dyld装载的时候触发回调
    _dyld_register_func_for_add_image(_rebind_symbols_for_image);
  } else {
    //遍历已经加载的image
    uint32_t c = _dyld_image_count();//获取image数量
    for (uint32_t i = 0; i < c; i++) {
      _rebind_symbols_for_image(_dyld_get_image_header(i), 
                                 _dyld_get_image_vmaddr_slide(i));
    }
  }
  return retval;
}

//3 hook 入口
//const struct mach_header *header     //mach-o头//intptr_t slide                       //进程内偏移
static void _rebind_symbols_for_image(
                                 const struct mach_header *header,
                                 intptr_t slide) {
    rebind_symbols_for_image(_rebindings_head, header, slide);
}

二进制映像信息查找 

typedef struct dl_info {
        const char      *dli_fname;     
        void            *dli_fbase;
        const char      *dli_sname;
        void            *dli_saddr; 
} Dl_info;

使用 dladdr()读取取 mach_header 代表的 Mach-O 中有效信息到 dl_info 中:

  • fname:Mach-O文件路径 
  • dli_fbase:该Mach-O的起始地址,即基地址
  • dli_sname:最相似的标号名称 
  • dli_saddr :最相似的标号地址 

#define SEG_LINKEDIT "__LINKEDIT"  该段包含编译器创建与维护的所有结构体

static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
                                     const struct mach_header *header,
                                     intptr_t slide) {
  Dl_info info;
  if (dladdr(header, &info) == 0) {
  //在进程内寻址header信息
  //如果 mach_header 指定的 Mach-O 不在当前进程的地址空间中,返回 0 
    return;
  }

  //准备从 Mach-O 中寻址的变量
  segment_command_t *cur_seg_cmd;//当前命令,存在多个 LC_SEGMENT_64 段
  segment_command_t *linkedit_segment = NULL;//编译器段,结合 slide 计算基址
  struct symtab_command* symtab_cmd = NULL;//标号表,
  struct dysymtab_command* dysymtab_cmd = NULL;//动态标号表

  //遍历命令,获取段指针
  uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);//load command 位置
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {    //架构依赖
      if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
        linkedit_segment = cur_seg_cmd;
      }
    } else if (cur_seg_cmd->cmd == LC_SYMTAB) {            
      symtab_cmd = (struct symtab_command*)cur_seg_cmd;
    } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {          
      dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
    }
  }
  if (!symtab_cmd 
        || !dysymtab_cmd 
        || !linkedit_segment 
        || !dysymtab_cmd->nindirectsyms) {
    return;
  }

  // Find base symbol/string table addresses
  //链接时程序的基址
  uintptr_t linkedit_base = (uintptr_t)slide 
                                + linkedit_segment->vmaddr 
                                - linkedit_segment->fileoff;
  //符号表的地址
  nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
  //字符串表的地址
  char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
  //间接(动态)符号表地址
  uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);

  cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
        //寻找到data段
      if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
          strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
        continue;
      }
      for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
        // + sizeof(section_t) * j
        section_t *sect = (section_t *)(cur + sizeof(segment_command_t)) + j;
        if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
          //懒加载表
          perform_rebinding_with_section(rebindings, sect, slide, 
                                         symtab, strtab, indirect_symtab);
        }
        if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
          //非懒加载表
          perform_rebinding_with_section(rebindings, sect, slide, 
                                         symtab, strtab, indirect_symtab);
        }
      }

    }
  }
}

File Offset 为当前的静态偏移

符号表的地址 = 基址 + 符号表偏移量 字符串表的地址 = 基址 + 字符串表偏移量

间接(动态)符号表地址 = 基址 + 动态符号表偏移量

hook 流程代码

static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
                                           section_t *section,
                                           intptr_t slide,
                                           nlist_t *symtab,
                                           char *strtab,
                                           uint32_t *indirect_symtab) {
  uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
  void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
  for (uint i = 0; i < section->size / sizeof(void *); i++) {
    uint32_t symtab_index = indirect_symbol_indices[i];
    if (symtab_index == INDIRECT_SYMBOL_ABS 
          || symtab_index == INDIRECT_SYMBOL_LOCAL 
          || symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {
      continue;
    }
      uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
      char *symbol_name = strtab + strtab_offset;      
      bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];      
      struct rebindings_entry *cur = rebindings;
      while (cur) {
          for (uint j = 0; j < cur->rebindings_nel; j++) {
               if (symbol_name_longer_than_1 &&
                  strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
                   if (cur->rebindings[j].replaced != NULL &&
                      indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
                      *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
                  }
                   indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
                  goto symbol_loop;
        }
      }
      cur = cur->next;
    }
  symbol_loop:;
  }
}

参考资料

github.com/facebook/fi… 

blog.cnbluebox.com/blog/2017/1… 动态链接过程分析

objccn.io/issue-19-4/