源码分析go调度器四: 非main goroutine的调度循环

328 阅读4分钟

本节的重点主要有:

  1. 非main goroutine如何返回到goexit的
  2. mcall如何切换到g0继续执行的
  3. 调度循环

有如下代码

package main

import "time"

func hello() {
	println("msg")
}

func main() {
	go hello()
	time.Sleep(time.Hour * 1)
}

编译go build -gcflags "-N -l" -ldflags=-compressdwarf=false main.go
使用gdb调试:gdb main

(gdb) b main.hello     
Breakpoint 1 at 0x1057240: file /Users/zixuan.xu/workspace/study/learn-go-plan9/goroutine/main.go, line 5.
(gdb) r
Starting program: /Users/zixuan.xu/workspace/study/learn-go-plan9/goroutine/main
[New Thread 0x1a03 of process 88861]
[New Thread 0x1b03 of process 88861]
warning: unhandled dyld version (17)
[New Thread 0x1d03 of process 88861]
[New Thread 0x1f07 of process 88861]
[New Thread 0x2307 of process 88861]
[New Thread 0x5403 of process 88861]
[Switching to Thread 0x2307 of process 88861]

Thread 5 hit Breakpoint 1, main.hello () at /Users/zixuan.xu/workspace/study/learn-go-plan9/goroutine/main.go:5
5	func hello() {
(gdb) disas
Dump of assembler code for function main.hello:
=> 0x0000000001057240 <+0>:	cmp    0x10(%r14),%rsp
   0x0000000001057244 <+4>:	jbe    0x1057279 <main.hello+57>
   0x0000000001057246 <+6>:	sub    $0x18,%rsp
   0x000000000105724a <+10>:	mov    %rbp,0x10(%rsp)
   0x000000000105724f <+15>:	lea    0x10(%rsp),%rbp
   0x0000000001057254 <+20>:	call   0x102d2e0 <runtime.printlock>
   0x0000000001057259 <+25>:	lea    0xcc55(%rip),%rax        # 0x1063eb5 <go.string.*+309>
   0x0000000001057260 <+32>:	mov    $0x4,%ebx
   0x0000000001057265 <+37>:	call   0x102dc00 <runtime.printstring>
   0x000000000105726a <+42>:	call   0x102d360 <runtime.printunlock>
   0x000000000105726f <+47>:	mov    0x10(%rsp),%rbp
   0x0000000001057274 <+52>:	add    $0x18,%rsp
   0x0000000001057278 <+56>:	ret
   0x0000000001057279 <+57>:	call   0x1052da0 <runtime.morestack_noctxt>
   0x000000000105727e <+62>:	xchg   %ax,%ax
   0x0000000001057280 <+64>:	jmp    0x1057240 <main.hello>
End of assembler dump.
(gdb) b *0x0000000001057278
Breakpoint 2 at 0x1057278: file /Users/zixuan.xu/workspace/study/learn-go-plan9/goroutine/main.go, line 7.
(gdb) c
Continuing.
msg

Thread 5 hit Breakpoint 2, 0x0000000001057278 in main.hello () at /Users/zixuan.xu/workspace/study/learn-go-plan9/goroutine/main.go:7
7	}
(gdb) disas
Dump of assembler code for function main.hello:
   0x0000000001057240 <+0>:	cmp    0x10(%r14),%rsp
   0x0000000001057244 <+4>:	jbe    0x1057279 <main.hello+57>
   0x0000000001057246 <+6>:	sub    $0x18,%rsp
   0x000000000105724a <+10>:	mov    %rbp,0x10(%rsp)
   0x000000000105724f <+15>:	lea    0x10(%rsp),%rbp
   0x0000000001057254 <+20>:	call   0x102d2e0 <runtime.printlock>
   0x0000000001057259 <+25>:	lea    0xcc55(%rip),%rax        # 0x1063eb5 <go.string.*+309>
   0x0000000001057260 <+32>:	mov    $0x4,%ebx
   0x0000000001057265 <+37>:	call   0x102dc00 <runtime.printstring>
   0x000000000105726a <+42>:	call   0x102d360 <runtime.printunlock>
   0x000000000105726f <+47>:	mov    0x10(%rsp),%rbp
   0x0000000001057274 <+52>:	add    $0x18,%rsp
=> 0x0000000001057278 <+56>:	ret
   0x0000000001057279 <+57>:	call   0x1052da0 <runtime.morestack_noctxt>
   0x000000000105727e <+62>:	xchg   %ax,%ax
   0x0000000001057280 <+64>:	jmp    0x1057240 <main.hello>
End of assembler dump.
(gdb) si
runtime.goexit () at /Users/zixuan.xu/.gvm/gos/go1.17/src/runtime/asm_amd64.s:1582
1582		CALL	runtime·goexit1(SB)	// does not return
(gdb) disas
Dump of assembler code for function runtime.goexit:
   0x0000000001053060 <+0>:	nop
=> 0x0000000001053061 <+1>:	call   0x10551e0 <runtime.goexit1>
   0x0000000001053066 <+6>:	nop
End of assembler dump.
  1. 在main.hello下断点,并执行到这里
  2. 在ret处下断点,并执行到ret位置
  3. 使用si进入ret的调用,能看到进入的是goexit这个函数

asm_amd64.s:1580

// The top-most function running on a goroutine
// returns to goexit+PCQuantum.
TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0
   BYTE   $0x90  // NOP
   CALL   runtime·goexit1(SB)    // does not return
   // traceback from goexit1 must hit code range of goexit
   BYTE   $0x90  // NOP

其实si调用完,指向的指令就是goexit的第二行指令,就是上一节中newg.sched.sp字段。
现在也可以证明非girouitne执行完并不会退出

proc.go:3622

接下来调用goexit1

// Finishes execution of the current goroutine.
func goexit1() {
   if raceenabled {
      racegoend()
   }
   if trace.enabled {
      traceGoEnd()
   }
   mcall(goexit0)
}

这里通过mcallgoroutine的栈切换到g0的栈

asm_amd64.s:282

// func mcall(fn func(*g))
// Switch to m->g0's stack, call fn(g).
// Fn must never return. It should gogo(&g->sched)
// to keep running g.
#ifdef GOEXPERIMENT_regabiargs
TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
   MOVQ   AX, DX // DX = fn

   // save state in g->sched
   MOVQ   0(SP), BX  // caller's PC
   MOVQ   BX, (g_sched+gobuf_pc)(R14) // newg.sched.pc=return addr
   LEAQ   fn+0(FP), BX   // caller's SP   // BX=&fn
   MOVQ   BX, (g_sched+gobuf_sp)(R14)   //newg.sched.sp=&fn
   MOVQ   BP, (g_sched+gobuf_bp)(R14)   //newg.sched.bp = BP

   // switch to m->g0 & its stack, call fn
   MOVQ   g_m(R14), BX     // BX=newg.m
   MOVQ   m_g0(BX), SI   // SI = g.m.g0
   CMPQ   SI, R14    // if g == m->g0 call badmcall
   JNE    goodm
   JMP    runtime·badmcall(SB)
goodm:
   MOVQ   R14, AX       // AX (and arg 0) = g
   MOVQ   SI, R14       // g = g.m.g0
   get_tls(CX)       // Set G in TLS
   MOVQ   R14, g(CX)
   MOVQ   (g_sched+gobuf_sp)(R14), SP    // sp = g0.sched.sp
   PUSHQ  AX // open up space for fn's arg spill slot
   MOVQ   0(DX), R12
   CALL   R12       // fn(g)
   POPQ   AX
   JMP    runtime·badmcall2(SB)
   RET
#else

通过MOVQ (g_sched+gobuf_sp)(R14), SP就能看出,sp切换到g0.sched.sp,所以每次栈每次都会切换到同样的位置

proc.go:3633

// goexit continuation on g0.
func goexit0(gp *g) {
   _g_ := getg()

   casgstatus(gp, _Grunning, _Gdead)
   if isSystemGoroutine(gp, false) {
      atomic.Xadd(&sched.ngsys, -1)
   }
   gp.m = nil
   locked := gp.lockedm != 0
   gp.lockedm = 0
   _g_.m.lockedg = 0
   gp.preemptStop = false
   gp.paniconfault = false
   gp._defer = nil // should be true already but just in case.
   gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
   gp.writebuf = nil
   gp.waitreason = 0
   gp.param = nil
   gp.labels = nil
   gp.timer = nil

   if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 {
      // Flush assist credit to the global pool. This gives
      // better information to pacing if the application is
      // rapidly creating an exiting goroutines.
      assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte))
      scanCredit := int64(assistWorkPerByte * float64(gp.gcAssistBytes))
      atomic.Xaddint64(&gcController.bgScanCredit, scanCredit)
      gp.gcAssistBytes = 0
   }

   dropg()

   if GOARCH == "wasm" { // no threads yet on wasm
      gfput(_g_.m.p.ptr(), gp)
      schedule() // never returns
   }

   if _g_.m.lockedInt != 0 {
      print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n")
      throw("internal lockOSThread error")
   }
   gfput(_g_.m.p.ptr(), gp)
   if locked {
      // The goroutine may have locked this thread because
      // it put it in an unusual kernel state. Kill it
      // rather than returning it to the thread pool.

      // Return to mstart, which will release the P and exit
      // the thread.
      if GOOS != "plan9" { // See golang.org/issue/22227.
         gogo(&_g_.m.g0.sched)
      } else {
         // Clear lockedExt on plan9 since we may end up re-using
         // this thread.
         _g_.m.lockedExt = 0
      }
   }
   schedule()
}
  1. 清理g的信息
  2. 将g放入gfree
  3. 继续调用schedule

总结下流程:

image.png