记 boost 协程切换 bug 发现和分析

在macosx i386和mips平台上实现协程切换时,发现boost那套汇编实现是有问题的,如果放到tbox切换demo上运行,会直接挂掉。


static tb_void_t func1(tb_context_from_t from)
    // check
    tb_context_ref_t* contexts = (tb_context_ref_t*)from.priv;

    // 先保存下主函数入口context,方便之后切换回去
    contexts[0] = from.context;

    // 初始化切换到func2
    from.context = contexts[2];

    // loop
    tb_size_t count = 10;
    while (count--)
        // trace
        tb_trace_i("func1: %lu", count);

        // 切换到func2,返回后更新from中的context地址
        from = tb_context_jump(from.context, contexts);

    // 切换回主入口函数
    tb_context_jump(contexts[0], tb_null);
static tb_void_t func2(tb_context_from_t from)
    // check
    tb_context_ref_t* contexts = (tb_context_ref_t*)from.priv;

    // loop
    tb_size_t count = 10;
    while (count--)
        // trace
        tb_trace_i("func2: %lu", count);

        // 切换到func1,返回后更新from中的context地址
        from = tb_context_jump(from.context, contexts);

    // 切换回主入口函数
    tb_context_jump(contexts[0], tb_null);
static tb_void_t test()
    // 定义全局堆栈
    static tb_context_ref_t contexts[3];
    static tb_byte_t        stacks1[8192];
    static tb_byte_t        stacks2[8192];

    // 生成两个context上下文,绑定对应函数和堆栈
    contexts[1] = tb_context_make(stacks1, sizeof(stacks1), func1);
    contexts[2] = tb_context_make(stacks2, sizeof(stacks2), func2);

    // 切换到func1并传递私有参数:context数组
    tb_context_jump(contexts[1], contexts);





  1. macosx i386下,从func2切换回到func1时发生了崩溃
  2. mips32下,在执行完10次来回切换后,切回主函数是,发生了崩溃

macosx i386下的问题分析

我们先来分析下macosx i386的这个问题,由于之前tbox已经参考了boost的linux i386下的实现,完成了上下文切换,是能正常运行的。


大体可以猜到,应该是调用栈布局的不同导致的问题,因此我们看下macosx上的boost jump实现:

.globl _jump_fcontext
.align 2
    pushl  %ebp  /* save EBP */
    pushl  %ebx  /* save EBX */
    pushl  %esi  /* save ESI */
    pushl  %edi  /* save EDI */

    /* store fcontext_t in ECX */
    movl  %esp, %ecx

    /* first arg of jump_fcontext() == context jumping to */
    movl  0x18(%esp), %eax

    /* second arg of jump_fcontext() == data to be transferred */
    movl  0x1c(%esp), %edx

    /* restore ESP (pointing to context-data) from EAX */
    movl  %eax, %esp

    /* address of returned transport_t */
    movl 0x14(%esp), %eax
    /* return parent fcontext_t */
    movl  %ecx, (%eax)
    /* return data */
    movl %edx, 0x4(%eax)

    popl  %edi  /* restore EDI */
    popl  %esi  /* restore ESI */
    popl  %ebx  /* restore EBX */
    popl  %ebp  /* restore EBP */

    /* jump to context */
    ret $4

jump_fcontext的参数原型是:struct(context, data) = jump_fcontext(context, data),跟tboxtb_context_jump差不多


而从上面的代码中可以看到,从esp + 0x18处取了第一个参数context,esp + 0x1c取得是第二个参数data,换算到_jump_fcontext的入口处


esp + 12: data参数
esp + 8:  context参数
esp + 4:  ??
esp    :  _jump_fcontext的返回地址

按照i386的调用栈布局,函数入口处第一个参数,应该是通过 esp + 4 访问的,那为什么context参数却是在esp + 8处呢,esp + 4指向的内容又是什么?


pushl data
pushl context 
pushl hidden 
call _jump_fcontext
addl $12, %esp

其实编译器在调用_jump_fcontext处,实际压入了三个参数,这个esp + 4指向的hidden数据,这个是_jump_fcontext返回的struct数据的栈空间地址

用于在_jump_fcontext内部,设置返回struct(context, data)的数据,也就是:

/* address of returned transport_t */
movl 0x14(%esp), %eax
/* return parent fcontext_t */
movl %ecx, (%eax)
/* return data */
movl %edx, 0x4(%eax)

说白了,linux i386上返回struct数据,是通过传入一个指向栈空间的变量指针,作为隐藏的第一个参数,用于设置struct数据返回。

而boost在macosx i386上,也直接照搬了这种布局来实现,那macosx上是否真的也是这么做的呢?


static tb_context_from_t test()
    tb_context_from_t from = {0};
    return from;


__text:00051BD0 _test           proc near               
__text:00051BD0 var_10          = dword ptr -10h
__text:00051BD0 var_C           = dword ptr -0Ch
__text:00051BD0 var_8           = dword ptr -8
__text:00051BD0 var_4           = dword ptr -4
__text:00051BD0                 push    ebp
__text:00051BD1                 mov     ebp, esp
__text:00051BD3                 sub     esp, 10h
__text:00051BD6                 mov     [ebp+var_C], 0
__text:00051BDD                 mov     [ebp+var_10], 0
__text:00051BE4                 mov     [ebp+var_4], 0
__text:00051BEB                 mov     [ebp+var_8], 0
__text:00051BF2                 mov     eax, [ebp+var_8]
__text:00051BF5                 mov     edx, [ebp+var_4]
__text:00051BF8                 add     esp, 10h
__text:00051BFB                 pop     ebp
__text:00051BFC                 retn
__text:00051BFC _test           endp

可以看到,实际上并没有像linux上那样通过一个struct指针来返回,而是直接将struct(context, data),通过 eax, edx 进行返回。



1. 调整jump实现,改用eax,edx直接返回from结构体
2. 由于不再像linux那样通过保留一个额外的栈空间返回struct,可以把linux那种跳板实现去掉,改为直接jump到实际位置(提升切换效率)



static tb_void_t func1(tb_context_from_t from)
    // check
    tb_context_ref_t* contexts = (tb_context_ref_t*)from.priv;

    // 先保存下主函数入口context,方便之后切换回去
    contexts[0] = from.context;

    // 初始化切换到func2
    from.context = contexts[2];

    // loop
    tb_size_t count = 10;
    while (count--)
        // trace
        tb_trace_i("func1: %lu", count);

        // 切换到func2,返回后更新from中的context地址
        from = tb_context_jump(from.context, contexts);

    // 切换回主入口函数
    tb_context_jump(contexts[0], tb_null);   <-----  此处发生崩溃


可能是传入jump的参数不对导致的问题,最有可能的是 contexts[0] 指向的主函数上下文地址已经不对了。

通过printf确认,确实值不对了,那么在func1入口处这个contexts[0],是否正确呢,我又继续printf了下,居然还是不对。 = =

然后,我又继续打印contexts[0], contexts[1], contexts[2]这三个在func1入口处的值,发现只有contexts[2]是对的



1. contexts这块buffer的前两处数据,在jump切换到func1的时候被自动改写了
2. 而且改写后的数据值,正好是from里面的context和data


那什么情况下, contexts指向的数据会发生越界呢,可以先看下contexts的定义:

static tb_void_t test()
    // 定义全局堆栈
    static tb_context_ref_t contexts[3];
    static tb_byte_t        stacks1[8192];
    static tb_byte_t        stacks2[8192];

    // 生成两个context上下文,绑定对应函数和堆栈
    contexts[1] = tb_context_make(stacks1, sizeof(stacks1), func1);
    contexts[2] = tb_context_make(stacks2, sizeof(stacks2), func2);

    // 切换到func1并传递私有参数:context数组
    tb_context_jump(contexts[1], contexts);



.globl make_fcontext
.align 2
.type make_fcontext,@function
.ent make_fcontext
#ifdef __PIC__
.set    noreorder
.cpload $t9
.set    reorder
    # first arg of make_fcontext() == top address of context-stack
    move $v0, $a0

    # shift address in A0 to lower 16 byte boundary
    move $v1, $v0
    li $v0, -16 # 0xfffffffffffffff0
    and $v0, $v1, $v0

    # reserve space for context-data on context-stack
    # including 48 byte of shadow space (sp % 16 == 0)
    addiu $v0, $v0, -112

    # third arg of make_fcontext() == address of context-function
    sw  $a2, 44($v0)
    # save global pointer in context-data
    sw  $gp, 48($v0)

    # compute address of returned transfer_t
    addiu $t0, $v0, 52
    sw  $t0, 36($v0)

    # compute abs address of label finish
    la  $t9, finish
    # save address of finish as return-address for context-function
    # will be entered after context-function returns
    sw  $t9, 40($v0)

    jr  $ra # return pointer to context-data

    lw $gp, 0($sp)
    # allocate stack space (contains shadow space for subroutines)
    addiu  $sp, $sp, -32
    # save return address
    sw  $ra, 28($sp)

    # restore GP (global pointer)
#    move  $gp, $s1
    # exit code is zero
    move  $a0, $zero
    # address of exit
    lw  $t9, %call16(_exit)($gp)
    # exit application
    jalr  $t9
.end make_fcontext
.size make_fcontext, .-make_fcontext

.globl jump_fcontext
.align 2
.type jump_fcontext,@function
.ent jump_fcontext
    # reserve space on stack
    addiu $sp, $sp, -112

    sw  $s0, ($sp)  # save S0
    sw  $s1, 4($sp)  # save S1
    sw  $s2, 8($sp)  # save S2
    sw  $s3, 12($sp)  # save S3
    sw  $s4, 16($sp)  # save S4
    sw  $s5, 20($sp)  # save S5
    sw  $s6, 24($sp)  # save S6
    sw  $s7, 28($sp)  # save S7
    sw  $fp, 32($sp)  # save FP
    sw  $a0, 36($sp)  # save hidden, address of returned transfer_t
    sw  $ra, 40($sp)  # save RA
    sw  $ra, 44($sp)  # save RA as PC

    # store SP (pointing to context-data) in A0
    move  $a0, $sp

    # restore SP (pointing to context-data) from A1
    move  $sp, $a1

    lw  $s0, ($sp)  # restore S0
    lw  $s1, 4($sp)  # restore S1
    lw  $s2, 8($sp)  # restore S2
    lw  $s3, 12($sp)  # restore S3
    lw  $s4, 16($sp)  # restore S4
    lw  $s5, 20($sp)  # restore S5
    lw  $s6, 24($sp)  # restore S6
    lw  $s7, 28($sp)  # restore S7
    lw  $fp, 32($sp)  # restore FP
    lw  $t0, 36($sp)  # restore hidden, address of returned transfer_t
    lw  $ra, 40($sp)  # restore RA

    # load PC
    lw  $t9, 44($sp)

    # adjust stack
    addiu $sp, $sp, 112
    # return transfer_t from jump
    sw  $a0, ($t0)  # fctx of transfer_t
    sw  $a1, 4($t0) # data of transfer_t
    # pass transfer_t as first arg in context function
    # A0 == fctx, A1 == data
    move  $a1, $a2 

    # jump to context
    jr  $t9
.end jump_fcontext
.size jump_fcontext, .-jump_fcontext







.text:00453F04 func1:     
.text:00453F04 var_30          = -0x30
.text:00453F04 var_2C          = -0x2C
.text:00453F04 var_28          = -0x28
.text:00453F04 var_20          = -0x20
.text:00453F04 var_18          = -0x18
.text:00453F04 var_14          = -0x14
.text:00453F04 var_10          = -0x10
.text:00453F04 var_8           = -8
.text:00453F04 var_4           = -4
.text:00453F04 arg_0           =  0
.text:00453F04 arg_4           =  4
.text:00453F04                 addiu   $sp, -0x40
.text:00453F08                 sw      $ra, 0x40+var_4($sp)
.text:00453F0C                 sw      $fp, 0x40+var_8($sp)
.text:00453F10                 move    $fp, $sp
.text:00453F14                 la      $gp, unk_5706A0
.text:00453F1C                 sw      $gp, 0x40+var_20($sp)
.text:00453F20                 sw      $a0, 0x40+arg_0($fp)    <------------ 此处发生越界,改写了contexts[0] = from.context
.text:00453F24                 sw      $a1, 0x40+arg_4($fp)    <------------ 此处发生越界,改写了contexts[1] = from.data
.text:00453F28                 lw      $v0, 0x40+arg_4($fp)
.text:00453F2C                 sw      $v0, 0x40+var_14($fp)
.text:00453F30                 lw      $v0, 0x40+var_14($fp)
.text:00453F34                 sltu    $v0, $zero, $v0
.text:00453F38                 andi    $v0, 0xFF
.text:00453F3C                 move    $v1, $v0





| other args |
|   a0-a3    | <- 参数传递使用a0-a3,但是还是会为这四个参数保留栈空间出来
|     ra     | <- 返回地址
| fp gp s0-7 | <- 保存的一些其他寄存器
|   locals   |



.globl make_fcontext

    # reserve space for context-data on context-stack
    # including 48 byte of shadow space (sp % 16 == 0)
#    addiu $v0, $v0, -112
    addiu $v0, $v0, -146




    # reserve space on stack
    addiu $sp, $sp, -112

    sw  $s0, ($sp)  # save S0
    sw  $s1, 4($sp)  # save S1
    sw  $s2, 8($sp)  # save S2
    sw  $s3, 12($sp)  # save S3
    sw  $s4, 16($sp)  # save S4
    sw  $s5, 20($sp)  # save S5
    sw  $s6, 24($sp)  # save S6
    sw  $s7, 28($sp)  # save S7
    sw  $fp, 32($sp)  # save FP
    sw  $a0, 36($sp)  # save hidden, address of returned transfer_t
    sw  $ra, 40($sp)  # save RA
    sw  $ra, 44($sp)  # save RA as PC
                      <-------------------- 此处boost虽然为gp保留了48($sp)空间,但是确没去保存gp寄存器

    # store SP (pointing to context-data) in A0
    move  $a0, $sp

    # restore SP (pointing to context-data) from A1
    move  $sp, $a1

    lw  $s0, ($sp)  # restore S0
    lw  $s1, 4($sp)  # restore S1
    lw  $s2, 8($sp)  # restore S2
    lw  $s3, 12($sp)  # restore S3
    lw  $s4, 16($sp)  # restore S4
    lw  $s5, 20($sp)  # restore S5
    lw  $s6, 24($sp)  # restore S6
    lw  $s7, 28($sp)  # restore S7
    lw  $fp, 32($sp)  # restore FP
    lw  $t0, 36($sp)  # restore hidden, address of returned transfer_t
    lw  $ra, 40($sp)  # restore RA
                      <-------------------- 此处boost也没去恢复gp寄存器

    # load PC
    lw  $t9, 44($sp)

    # adjust stack
    addiu $sp, $sp, 112
    # return transfer_t from jump
    sw  $a0, ($t0)  # fctx of transfer_t
    sw  $a1, 4($t0) # data of transfer_t  <------------- 此处应该使用 a2 而不是 a1 
    # pass transfer_t as first arg in context function
    # A0 == fctx, A1 == data
    move  $a1, $a2 

    # jump to context
    jr  $t9
.end jump_fcontext

最后说一下,本文是针对boost 1.62.0 版本做的分析,如有不对之处,欢迎指正哈。。