APM - iOS 基础功能 Hook - Fishhook 原理解析

816 阅读9分钟

简介

fishhook是动态重新绑定iOS中Mach-O二进制文件中符号的库。

这是一个很好的用于hook libSystem的方式,以便调试和追踪。iOS系统中多数系统库都是动态库的形式,由dyld在运行时加载。可以应用于hook load方法,hook GCD的方法等。

功能类似于在OS X中使用的DYLD_INTERPOSE。

DYLD_INTERPOSE

iOS/OS X系统中,进程首先加载可执行文件,然后从Mach-O文件的Load_Command中找到dyld的加载路径,只后交给dyld进行动态连接,通常采用/usr/lib/dyld作为动态链接器。

dyld提供了一些独有的特性,如函数拦截等。DYLD_INTERPOSE宏定义允许一个库将其函数实现替换为另一个函数实现。

 #if !defined(_DYLD_INTERPOSING_H_)

 #define _DYLD_INTERPOSING_H_



 #define DYLD_INTERPOSE(_replacment,_replacee) \ __attribute__((used)) static strut{const void* replacment;const void* replacee;}

_interpose_##_replace \ __attribute__((section ("__DATA,__interpose"))) = { (const void*)(unsigned long)&_replacement, (const void*)(unsigned long)&_replacee};



 #endif

一、Mach-O结构

注意一下Mach-O的结构,以及Segment Command指向Section X Data

Section是编译器对.o内容的划分,将同类资源在逻辑上划分到一起

Segment是按照虚拟内存访问权限来分配的,是权限属性相同section的集合

  • TEXT(VM_PROT_READ/VM_PROT_EXECUTE)

  • DATA(VM_PROT_READ/VM_PROT_WRITE)

  • LINKEDIT(VM_PROT_READ)

二、Fishhook代码结构

1. Public API

 /*

 * A structure representing a particular intended rebinding from a symbol

 * name to its replacement

 */ struct rebinding {

  const char *name;

  void *replacement;

  void **replaced;

};



 /*

 * For each rebinding in rebindings, rebinds references to external, indirect

 * symbols with the specified name to instead point at replacement for each

 * image in the calling process as well as for all future images that are loaded

 * by the process. If rebind_functions is called more than once, the symbols to

 * rebind are added to the existing list of rebindings, and if a given symbol

 * is rebound more than once, the later rebinding will take precedence.

 */

 // 对所有的image中的符号做替换/*

 int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel);





 * Rebinds as above, but only in the specified image. The header should point

 * to the mach-o header, the slide should be the slide offset. Others as above.

 */

 // 对指定image中的符号做替换

 int rebind_symbols_image(void *header,

                         intptr_t slide,

                         struct rebinding rebindings[],

                         size_t rebindings_nel); 

**

2. 内部实现

static int prepend_rebindings(struct rebindings_entry **rebindings_head,

                              struct rebinding rebindings[],

                              size_t nel);



static void perform_rebinding_with_section(struct rebindings_entry *rebindings,

                                           section_t *section,

                                           intptr_t slide,

                                           nlist_t *symtab,

                                           char *strtab,

                                           uint32_t *indirect_symtab)



static void rebind_symbols_for_image(struct rebindings_entry *rebindings,

                                     const struct mach_header *header,

                                     intptr_t slide);



 /* _dyld_register_func_for_add_image(_rebind_symbols_for_image); */ static void _rebind_symbols_for_image(const struct mach_header *header,

                                      intptr_t slide);





int rebind_symbols_image(void *header,

                         intptr_t slide,

                         struct rebinding rebindings[],

                         size_t rebindings_nel);                      



int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel;

三、Fishhook使用

#import "ViewController.h"

#import "fishhook.h"

@interface ViewController ()

@end

@implementation ViewController

- (void)viewDidLoad {

    [super viewDidLoad];

    // Do any additional setup after loading the view.

    NSLog(@"hello world");

    

    struct rebinding nslogBind;

    nslogBind.name = "NSLog";

    nslogBind.replacement = myNSLog;

    nslogBind.replaced = (void *)&old_nslog;

    

    struct rebinding rebs[] = {nslogBind};

    

    rebind_symbols(rebs, 1);

}

static void (*old_nslog)(NSString *format, ...);

void myNSLog(NSString *format,  ...) {

    format = [format stringByAppendingString:@"勾上了"];

    old_nslog(format);

}

- (void)touchesBegan:(NSSet<UITouch *> *)touches withEvent:(UIEvent *)event {

    NSLog(@"点击了屏幕!");

}

@end

四、Fishhook原理

dyld通过更新在一个Mach-O二进制文件的__DATA段中的特定区域的指针,来绑定懒加载和非懒加载符号。fishhook重新绑定这些符号找到每个传进rebind_symbols的符号的位置,更新这些符号的位置,并用符合的替换。

在给定的Image中,__DATA段包括两个跟动态符号绑定的区域__nl_symbol_ptr和__la_symbol_ptr.

__nl_symbol_ptr是一个数组的指针指向于非懒加载绑定数据(这些被绑定在库被加载的时候)

__la_symbol_ptr是一个数组的指针指向引入的函数,通常被一个叫做dyld_stub_binder机制在符号首次调用的时候绑定(也可以是指定dyld在启动的时候来绑定)

为了在这些section中找到指定符号名字的位置,我们必须跳转几个间接层。

对于这两个section,section header(struct section结构体在<mach-o/loader.h>)提供偏移量(在reserved1字段中),是间接符号表中的偏移位置。间接符号表也存在二进制的 __LINKEDIT Segment中,间接符号表就是一个索引数组,索引值指向符号表(也是在__LINKEDIT),符号表的目的是为了定位非懒加载和懒加载符号section中的函数指针。有了结构体struct section nl_symbol_ptr, 符合的符号表中的index第一个section中的地址是indirect_symbol_table[nl_symbol_ptr->reserved1].

这个符号表本身是一个struct nlist的数组(结构体在<mach-o/nlist.h>),并且每个nlist包括了一个索引到__LINKEDIT string table,那里是实际存储符号名称的地方。

对于每个__nl_symbol_ptr和__la_symbol_ptr的指针,我们可以找到对应的符号名称字符串,逐个遍历和需要替换的符号名字做比较,并且如果匹配的话,我们就用指定的新的函数指针,替换这个指针。

从一个给定的懒加载或者非懒加载符号表中寻找给定的entry名称的过程,如图所示:

__got

对于程序段__text里的代码,对数据型符号的引用,指向到了__got;可以把__got看作是一个表,每个条目是一个地址值。

在符号绑定(binding)前,__got里所有条目的内容都是 0,当镜像被加载时,dyld 会对__got每个条目所对应的符号进行重定位,将其真正的地址填入,作为条目的内容。__got各个条目的具体值,在加载期会被 dyld 重写,这也是为啥这个 section 被分配在 __DATA segment 的原因。

dyld_stub_binder

函数符号在第一次使用时才进行绑定,是通过dyld_stub_binder来进行符号查找和地址重定位。绑定好之后函数地址会被写到__la_symbol_ptr条目中,再次访问的时候可以直接跳转。

dyld_stub_binder使用汇编实现的,在 dyld_stub_binder.s 中。

调用链路如下:

 // 汇编中调用 fastBindLazySymbol

1. dyld::fastBindLazySymbol



 // 调用 ImageLoader 处理

2. ImageLoaderMachOCompressed::doBindFastLazySymbol



 // 符号绑定

3. ImageLoaderMachOCompressed::bindAt



 // 符号地址解析

4. ImageLoaderMachOCompressed::resolve



 // 符号地址更新

5. ImageLoaderMachO::bindLocation

五、Fishhook代码解析

示例代码

print.c

 #include <stdio.h>



char *global = "hello";

void print(char *str)

{

 printf("%s\n", str);

}

main.c

void print(char *str);

extern char *global;

int main()

{

 print(global);

  return  0;

}

1. 注册镜像加载回调,遍历所有的Image

int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {

  int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);

  if (retval < 0) {

    return retval;

  }

  // If this was the first call, register callback for image additions (which is also invoked for

  // existing images, otherwise, just run on existing images

  if (!_rebindings_head->next) {

    _dyld_register_func_for_add_image(_rebind_symbols_for_image);

  } else {

    uint32_t c = _dyld_image_count();

    for (uint32_t i = 0; i < c; i++) {

      _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));

    }

  }

  return retval;

}



static void _rebind_symbols_for_image(const struct mach_header *header,

                                      intptr_t slide) {

    rebind_symbols_for_image(_rebindings_head, header, slide);

}

_dyld_register_func_for_add_image

注册了一个镜像被加载时的回调函数

  • 当镜像被加载但未初始化前就会调用注册的回调函数,回调函数的两个入参分别表示加载的镜像的头结构和对应的Slide值
  • 如果在调用_dyld_register_func_for_add_image时系统已经加载了某些映像,则会分别对这些加载完毕的每个映像调用注册的回调函数

2. prepend_rebindings方法使用头插法把结构体数组转成了单链表

static struct rebindings_entry *_rebindings_head;



 /* 把输入的rebindings数组组合为单链表 */ struct rebindings_entry {

  struct rebinding *rebindings;

  size_t rebindings_nel;

  struct rebindings_entry *next;

};



static int prepend_rebindings(struct rebindings_entry **rebindings_head,

                              struct rebinding rebindings[],

                              size_t nel) {

  struct rebindings_entry *new_entry = malloc(sizeof(struct rebindings_entry));

  if (!new_entry) {

    return -1;

  }

  new_entry->rebindings = malloc(sizeof(struct rebinding) * nel);

  if (!new_entry->rebindings) {

    free(new_entry);

    return -1;

  }

  memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);

  new_entry->rebindings_nel = nel;

  new_entry->next = *rebindings_head;

  *rebindings_head = new_entry;

  return 0;

}

3. rebind_symbols_for_image从Image中寻找S_LAZY_SYMBOL_POINTERS和S_NON_LAZY_SYMBOL_POINTERS这两个section

在LC_SYMTAB中找到Symbol Table Offset,String Table Offset

在LC_DYSYMTAB找到IndSym Table Offset

static void rebind_symbols_for_image(struct rebindings_entry *rebindings,

                                     const struct mach_header *header,

                                     intptr_t slide) {



  // 4.1 从LC_Command寻找linkedit_segment, symtab_cmd, dysymtab_cmd地址

  Dl_info info;

  if (dladdr(header, &info) == 0) {

    return;

  }



  segment_command_t *cur_seg_cmd;

  segment_command_t *linkedit_segment = NULL;

  struct symtab_command* symtab_cmd = NULL;

  struct dysymtab_command* dysymtab_cmd = NULL;



  uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);

  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {

    cur_seg_cmd = (segment_command_t *)cur;

    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {

      if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {

        linkedit_segment = cur_seg_cmd;

      }

    } else if (cur_seg_cmd->cmd == LC_SYMTAB) {

      symtab_cmd = (struct symtab_command*)cur_seg_cmd;

    } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {

      dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;

    }

  }



  // 4.2 寻找symtab, strtab, indirect_symtab地址

  if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||

      !dysymtab_cmd->nindirectsyms) {

    return;

  }



  // Find base symbol/string table addresses

  uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;

  nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);

  char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);



  // Get indirect symbol table (array of uint32_t indices into symbol table)

  uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);



  // 4.3 遍历LC_Command寻找SEG_DATA, SEG_DATA_CONST这两个Segment;从这两个Segment寻找S_LAZY_SYMBOL_POINTERS, S_NON_LAZY_SYMBOL_POINTERS这两个Section,执行perform_rebinding_with_section.

  cur = (uintptr_t)header + sizeof(mach_header_t);

  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {

    cur_seg_cmd = (segment_command_t *)cur;

    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {

      if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&

          strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {

        continue;

      }

      for (uint j = 0; j < cur_seg_cmd->nsects; j++) {

        section_t *sect =

          (section_t *)(cur + sizeof(segment_command_t)) + j;

        if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {

          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);

        }

        if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {

          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);

        }

      }

    }

  }

}

4. perform_rebinding_with_section函数的参数分别是需要执行替换和被替换的结构体的单链表rebindings,S_LAZY_SYMBOL_POINTERS, S_NON_LAZY_SYMBOL_POINTERS这两个Section的地址,偏移量,symtab地址,strtab地址,indirect_symtab地址

在S_NON_LAZY_SYMBOL_POINTERS中找到Address,找到__got Section中的符号绑定表。同样方式,S_LAZY_SYMBOL_POINTERS中找到__la_symbol_ptr符号绑定表。

这就是符号绑定表,最后会修改这边的Data,来填充最终执行的函数指针。

在S_NON_LAZY_SYMBOL_POINTERS中找到Indirect Sym Index为1

在Indirect Symbols中找到Index为1的符号,这个就是__got这个section的开始位置。

遍历这个section,获取到Data值,即符号Index。

使用Index在Symbol Table找到对应的符号。

如上图,对应的index3和index5的符号,String Table Index的Data是0x0000001C和0x0000002B。这也是这个符号名字在String Table中存储的位置。从String Table中找到这个名字。

可以对照16进制ASCII码表看一下,0x5F是_,0x67是g,0x6C是l,0x6F是o,下一行后面几个分别是b,a,l,正是图中Value显示的值。

最终,遍历符号绑定表,遍历需要重新绑定的单链表,使用符号名称做匹配,匹配之后,**replaced指向原来indirect_symbol_bindings[i]中记录的指针,indirect_symbol_bindings[i]指针替换为cur->rebindings[j].replacement

static void perform_rebinding_with_section(struct rebindings_entry *rebindings,

                                           section_t *section,

                                           intptr_t slide,

                                           nlist_t *symtab,

                                           char *strtab,

                                           uint32_t *indirect_symtab) {

  uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;

  void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);

  for (uint i = 0; i < section->size / sizeof(void *); i++) {

    uint32_t symtab_index = indirect_symbol_indices[i];

    if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||

        symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {

      continue;

    }

    uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;

    char *symbol_name = strtab + strtab_offset;

    struct rebindings_entry *cur = rebindings;

    while (cur) {

      for (uint j = 0; j < cur->rebindings_nel; j++) {

        if (strlen(symbol_name) > 1 &&

            strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {

          if (cur->rebindings[j].replaced != NULL &&

              indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {

            *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];

          }

          indirect_symbol_bindings[i] = cur->rebindings[j].replacement;

          goto symbol_loop;

        }

      }

      cur = cur->next;

    }

  symbol_loop:;

  }

}

六、Fishhook应用

由于可以hook系统函数,使用场景很多,可以用与监控和调试、安全和逆向等场景。这边只做简单介绍。

  1. Wechat-Matrix

耗电监控中,Hook GCD函数,用于获取异步任务堆栈

#define BEGIN_HOOK(func) \

ks_rebind_symbols((struct ks_rebinding[2]){{#func, WRAP(func), (void *)&ORIFUNC(func)}}, 1);



#define HOOK_FUNC(ret_type, func, ...) \

ret_type func(__VA_ARGS__); \

static ret_type WRAP(func)(__VA_ARGS__); \

static ret_type (*ORIFUNC(func))(__VA_ARGS__); \

ret_type WRAP(func)(__VA_ARGS__) {



    // 1. hook dispatch

    BEGIN_HOOK(dispatch_async);

    BEGIN_HOOK(dispatch_after);

    BEGIN_HOOK(dispatch_barrier_async);

    

    BEGIN_HOOK(dispatch_async_f);

    BEGIN_HOOK(dispatch_after_f);

    BEGIN_HOOK(dispatch_barrier_async_f);



HOOK_FUNC(void, dispatch_async, dispatch_queue_t queue, dispatch_block_t block)

orig_dispatch_async(queue,blockRecordAsyncTrace(block));

}



static inline dispatch_block_t blockRecordAsyncTrace(dispatch_block_t block)

{

    // 1. get origin stack

    AsyncStackTrace stackTrace = getCurAsyncStackTrace();

    NSMutableArray *stackArray = [[NSMutableArray alloc] init];

    for (int i = 0; i < stackTrace.size; i++) {

        NSNumber *temp =[NSNumber numberWithUnsignedLong:(unsigned long)stackTrace.backTrace[i]];

        [stackArray addObject:temp];

    }

    free(stackTrace.backTrace);

    stackTrace.backTrace = NULL;

    

    // 2. execute the block

    dispatch_block_t newBlock = ^() {

        pthread_mutex_lock(&m_threadLock);

        thread_t current_thread = (thread_t)ksthread_self();

        NSNumber *key = [[NSNumber alloc] initWithInt:current_thread];

        [asyncOriginThreadDict setObject:stackArray forKey:key];

        pthread_mutex_unlock(&m_threadLock);

        

        block();

        

        pthread_mutex_lock(&m_threadLock);

        if (key != nil && [asyncOriginThreadDict objectForKey:key] != nil) {

            [asyncOriginThreadDict removeObjectForKey:key];

        }

        pthread_mutex_unlock(&m_threadLock);

    };

    

    // 3. return the new block

    return newBlock;

}

2. KSCrash

hook __cxa_throw ,用于track CPP抛出异常函数

static void setEnabled(bool isEnabled)

{

    if(isEnabled != g_isEnabled)

    {

        g_isEnabled = isEnabled;

        if(isEnabled)

        {

            initialize();



            ksid_generate(g_eventID);

            g_originalTerminateHandler = std::set_terminate(CPPExceptionTerminate);

            

            ks_rebinding item;

            item.name = "__cxa_throw";

            item.replacement = (void *)my_cxa_throw;

            item.replaced = (void **)&orig_cxa_throw;

            ks_rebind_symbols(&item, 1);

//            int ret_val = ks_rebind_symbols(&item, 1);

//            KSLOG_DEBUG("rebind ret: %d", ret_val);

        }

        else

        {

            std::set_terminate(g_originalTerminateHandler);

        }

        g_captureNextStackTrace = isEnabled;

    }

}

3. MLeaksFinder

hook objc_setAssociatedObject 用于track强引用

+ (void)hook

{

#if _INTERNAL_RCD_ENABLED

  std::lock_guard<std::mutex> l(*FB::AssociationManager::hookMutex);

  rcd_rebind_symbols((struct rcd_rebinding[2]){

    {

      "objc_setAssociatedObject",

      (void *)FB::AssociationManager::fb_objc_setAssociatedObject,

      (void **)&FB::AssociationManager::fb_orig_objc_setAssociatedObject

    },

    {

      "objc_removeAssociatedObjects",

      (void *)FB::AssociationManager::fb_objc_removeAssociatedObjects,

      (void **)&FB::AssociationManager::fb_orig_objc_removeAssociatedObjects

    }}, 2);

  FB::AssociationManager::hookTaken = true;

#endif //_INTERNAL_RCD_ENABLED

}



static void fb_objc_setAssociatedObject(id object, void *key, id value, objc_AssociationPolicy policy) {

    {

      std::lock_guard<std::mutex> l(*_associationMutex);

      // Track strong references only

      if (policy == OBJC_ASSOCIATION_RETAIN ||

          policy == OBJC_ASSOCIATION_RETAIN_NONATOMIC) {

        _threadUnsafeSetStrongAssociation(object, key, value);

      } else {

        // We can change the policy, we need to clear out the key

        _threadUnsafeResetAssociationAtKey(object, key);

      }

    }



    /**

     We are doing that behind the lock. Otherwise it could deadlock.

     The reason for that is when objc calls up _object_set_associative_reference, when we nil out

     a reference for some object, it will also release this value, which could cause it to dealloc.

     This is done inside _object_set_associative_reference without lock. Otherwise it would deadlock,

     since the object that is released, could also clean up some associated objects.



     If we would keep a lock during that, we would fall for that deadlock.



     Unfortunately this also means the association manager can be not a 100% accurate, since there

     can technically be a race condition between setting values on the same object and same key from

     different threads. (One thread sets value, other nil, we are missing this value)

     */

    fb_orig_objc_setAssociatedObject(object, key, value, policy);

  }



void _threadUnsafeSetStrongAssociation(id object, void *key, id value) {

    if (value) {

      auto i = _associationMap->find(object);

      ObjectAssociationSet *refs;

      if (i != _associationMap->end()) {

        refs = i->second;

      } else {

        refs = new ObjectAssociationSet;

        (*_associationMap)[object] = refs;

      }

      refs->insert(key);

    } else {

      _threadUnsafeResetAssociationAtKey(object, key);

    }

  }
  1. 调试与反调试

Hook method_getImplementation、method_setImplementation函数、sysctl函数、dlsym函数,用于调试和反调试

#import "InjectCode.h"

#import "fishhook.h"

#import <sys/sysctl.h>

@implementation InjectCode

//原始函数指针

int  (*sysctl_p)(int *, u_int, void *, size_t *, void *, size_t);



//新函数地址

int my_sysctl(int *name, u_int namelen, void *info, size_t *infosize, void *newInfo, size_t newInfoSize) {

    if (namelen == 4

        && name[0] == CTL_KERN

        && name[1] == KERN_PROC

        && name[2] == KERN_PROC_PID

        && info

        && (int)*infosize == sizeof(struct kinfo_proc)) {

        

        int err = sysctl_p(name,namelen,info,infosize,newInfo,newInfoSize);

        struct kinfo_proc * myinfo = (struct kinfo_proc *)info;

        if ((myinfo->kp_proc.p_flag & P_TRACED) != 0) {

            //使用异或可以取反

            myinfo->kp_proc.p_flag ^= P_TRACED;

        }

        

        return err;

    }

    

    return sysctl_p(name,namelen,info,infosize,newInfo,newInfoSize);

}



+(void)load

{

    rebind_symbols((struct rebinding[1]){{"sysctl",my_sysctl,(void *)&sysctl_p}}, 1);

}



@end