程序启动流程 | 文章大纲
1 先启动G0,M0并绑定(汇编代码)
2 初始化调度器 | schedinit
2-1 设置m的最大数量为10000
2-2 初始化各类资源:stack,malloc,cpu,m0,gc
2-3 设置p数量为 gomaxprocs值
2-4 初始化p
3 新建goroutine(main) | newProc
3-1 创建新goroutine
3-2 挂到当前p的运行队列
4 启动当前的m | mstart
4-1 新建m | newm
4-2 初始化g0 (stackguard、pc、sp)| mstart0,mstart1
4-3 初始化m | mint
5 调度goroutine | schedule
SchedInit函数 | 只在golang启动时调用
// The bootstrap sequence is:
//
// call osinit
// call schedinit
// make & queue new G
// call runtime·mstart
//
// The new G calls runtime·main.
func schedinit() {
lockInit(&sched.lock, lockRankSched)
lockInit(&sched.sysmonlock, lockRankSysmon)
lockInit(&sched.deferlock, lockRankDefer)
lockInit(&sched.sudoglock, lockRankSudog)
lockInit(&deadlock, lockRankDeadlock)
lockInit(&paniclk, lockRankPanic)
lockInit(&allglock, lockRankAllg)
lockInit(&allpLock, lockRankAllp)
lockInit(&reflectOffs.lock, lockRankReflectOffs)
lockInit(&finlock, lockRankFin)
lockInit(&cpuprof.lock, lockRankCpuprof)
allocmLock.init(lockRankAllocmR, lockRankAllocmRInternal, lockRankAllocmW)
execLock.init(lockRankExecR, lockRankExecRInternal, lockRankExecW)
traceLockInit()
// Enforce that this lock is always a leaf lock.
// All of this lock's critical sections should be
// extremely short.
lockInit(&memstats.heapStats.noPLock, lockRankLeafRank)
// raceinit must be the first call to race detector.
// In particular, it must be done before mallocinit below calls racemapshadow.
gp := getg()
if raceenabled {
gp.racectx, raceprocctx0 = raceinit()
}
sched.maxmcount = 10000
// The world starts stopped.
worldStopped()
ticks.init() // run as early as possible
moduledataverify()
stackinit()
mallocinit()
godebug := getGodebugEarly()
initPageTrace(godebug) // must run after mallocinit but before anything allocates
cpuinit(godebug) // must run before alginit
randinit() // must run before alginit, mcommoninit
alginit() // maps, hash, rand must not be used before this call
mcommoninit(gp.m, -1)
modulesinit() // provides activeModules
typelinksinit() // uses maps, activeModules
itabsinit() // uses activeModules
stkobjinit() // must run before GC starts
sigsave(&gp.m.sigmask)
initSigmask = gp.m.sigmask
goargs()
goenvs()
secure()
checkfds()
parsedebugvars()
gcinit()
// Allocate stack space that can be used when crashing due to bad stack
// conditions, e.g. morestack on g0.
gcrash.stack = stackalloc(16384)
gcrash.stackguard0 = gcrash.stack.lo + 1000
gcrash.stackguard1 = gcrash.stack.lo + 1000
// if disableMemoryProfiling is set, update MemProfileRate to 0 to turn off memprofile.
// Note: parsedebugvars may update MemProfileRate, but when disableMemoryProfiling is
// set to true by the linker, it means that nothing is consuming the profile, it is
// safe to set MemProfileRate to 0.
if disableMemoryProfiling {
MemProfileRate = 0
}
lock(&sched.lock)
sched.lastpoll.Store(nanotime())
procs := ncpu
if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
procs = n
}
if procresize(procs) != nil {
throw("unknown runnable goroutine during bootstrap")
}
unlock(&sched.lock)
// World is effectively started now, as P's can run.
worldStarted()
if buildVersion == "" {
// Condition should never trigger. This code just serves
// to ensure runtime·buildVersion is kept in the resulting binary.
buildVersion = "unknown"
}
if len(modinfo) == 1 {
// Condition should never trigger. This code just serves
// to ensure runtime·modinfo is kept in the resulting binary.
modinfo = ""
}
}
堆栈-内存分配
栈 | stackinit()
scheduledInit在初始化时,会初始化 stackPool和stackLarge,一个用于分配小栈,一个用于分配大栈
func stackinit() {
if _StackCacheSize&_PageMask != 0 {
throw("cache size must be a multiple of page size")
}
for i := range stackpool {
stackpool[i].item.span.init()
lockInit(&stackpool[i].item.mu, lockRankStackpool)
}
for i := range stackLarge.free {
stackLarge.free[i].init()
lockInit(&stackLarge.lock, lockRankStackLarge)
}
}
堆 | mallocinit()
:::info page、Span、ThreadCache、CentralCache、PageHeap 在malloc.go中初始化mheap(centrals,pages),mCache
- mheap
- central字段:存放mcentral数组,下标为spanclass
- pages字段:存放chunks二层数组,里面有pallocData,其用于 用于跟踪页面的分配情况
:::
func mallocinit() {
if class_to_size[_TinySizeClass] != _TinySize {
throw("bad TinySizeClass")
}
if heapArenaBitmapWords&(heapArenaBitmapWords-1) != 0 {
// heapBits expects modular arithmetic on bitmap
// addresses to work.
throw("heapArenaBitmapWords not a power of 2")
}
// Check physPageSize.
if physPageSize == 0 {
// The OS init code failed to fetch the physical page size.
throw("failed to get system page size")
}
if physPageSize > maxPhysPageSize {
print("system page size (", physPageSize, ") is larger than maximum page size (", maxPhysPageSize, ")\n")
throw("bad system page size")
}
if physPageSize < minPhysPageSize {
print("system page size (", physPageSize, ") is smaller than minimum page size (", minPhysPageSize, ")\n")
throw("bad system page size")
}
if physPageSize&(physPageSize-1) != 0 {
print("system page size (", physPageSize, ") must be a power of 2\n")
throw("bad system page size")
}
if physHugePageSize&(physHugePageSize-1) != 0 {
print("system huge page size (", physHugePageSize, ") must be a power of 2\n")
throw("bad system huge page size")
}
if physHugePageSize > maxPhysHugePageSize {
// physHugePageSize is greater than the maximum supported huge page size.
// Don't throw here, like in the other cases, since a system configured
// in this way isn't wrong, we just don't have the code to support them.
// Instead, silently set the huge page size to zero.
physHugePageSize = 0
}
if physHugePageSize != 0 {
// Since physHugePageSize is a power of 2, it suffices to increase
// physHugePageShift until 1<<physHugePageShift == physHugePageSize.
for 1<<physHugePageShift != physHugePageSize {
physHugePageShift++
}
}
if pagesPerArena%pagesPerSpanRoot != 0 {
print("pagesPerArena (", pagesPerArena, ") is not divisible by pagesPerSpanRoot (", pagesPerSpanRoot, ")\n")
throw("bad pagesPerSpanRoot")
}
if pagesPerArena%pagesPerReclaimerChunk != 0 {
print("pagesPerArena (", pagesPerArena, ") is not divisible by pagesPerReclaimerChunk (", pagesPerReclaimerChunk, ")\n")
throw("bad pagesPerReclaimerChunk")
}
if goexperiment.AllocHeaders {
// Check that the minimum size (exclusive) for a malloc header is also
// a size class boundary. This is important to making sure checks align
// across different parts of the runtime.
minSizeForMallocHeaderIsSizeClass := false
for i := 0; i < len(class_to_size); i++ {
if minSizeForMallocHeader == uintptr(class_to_size[i]) {
minSizeForMallocHeaderIsSizeClass = true
break
}
}
if !minSizeForMallocHeaderIsSizeClass {
throw("min size of malloc header is not a size class boundary")
}
// Check that the pointer bitmap for all small sizes without a malloc header
// fits in a word.
if minSizeForMallocHeader/goarch.PtrSize > 8*goarch.PtrSize {
throw("max pointer/scan bitmap size for headerless objects is too large")
}
}
if minTagBits > taggedPointerBits {
throw("taggedPointerbits too small")
}
// Initialize the heap.
mheap_.init()
mcache0 = allocmcache()
lockInit(&gcBitsArenas.lock, lockRankGcBitsArenas)
lockInit(&profInsertLock, lockRankProfInsert)
lockInit(&profBlockLock, lockRankProfBlock)
lockInit(&profMemActiveLock, lockRankProfMemActive)
for i := range profMemFutureLock {
lockInit(&profMemFutureLock[i], lockRankProfMemFuture)
}
lockInit(&globalAlloc.mutex, lockRankGlobalAlloc)
// Create initial arena growth hints.
if goarch.PtrSize == 8 {
// On a 64-bit machine, we pick the following hints
// because:
//
// 1. Starting from the middle of the address space
// makes it easier to grow out a contiguous range
// without running in to some other mapping.
//
// 2. This makes Go heap addresses more easily
// recognizable when debugging.
//
// 3. Stack scanning in gccgo is still conservative,
// so it's important that addresses be distinguishable
// from other data.
//
// Starting at 0x00c0 means that the valid memory addresses
// will begin 0x00c0, 0x00c1, ...
// In little-endian, that's c0 00, c1 00, ... None of those are valid
// UTF-8 sequences, and they are otherwise as far away from
// ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
// addresses. An earlier attempt to use 0x11f8 caused out of memory errors
// on OS X during thread allocations. 0x00c0 causes conflicts with
// AddressSanitizer which reserves all memory up to 0x0100.
// These choices reduce the odds of a conservative garbage collector
// not collecting memory because some non-pointer block of memory
// had a bit pattern that matched a memory address.
//
// However, on arm64, we ignore all this advice above and slam the
// allocation at 0x40 << 32 because when using 4k pages with 3-level
// translation buffers, the user address space is limited to 39 bits
// On ios/arm64, the address space is even smaller.
//
// On AIX, mmaps starts at 0x0A00000000000000 for 64-bit.
// processes.
//
// Space mapped for user arenas comes immediately after the range
// originally reserved for the regular heap when race mode is not
// enabled because user arena chunks can never be used for regular heap
// allocations and we want to avoid fragmenting the address space.
//
// In race mode we have no choice but to just use the same hints because
// the race detector requires that the heap be mapped contiguously.
for i := 0x7f; i >= 0; i-- {
var p uintptr
switch {
case raceenabled:
// The TSAN runtime requires the heap
// to be in the range [0x00c000000000,
// 0x00e000000000).
p = uintptr(i)<<32 | uintptrMask&(0x00c0<<32)
if p >= uintptrMask&0x00e000000000 {
continue
}
case GOARCH == "arm64" && GOOS == "ios":
p = uintptr(i)<<40 | uintptrMask&(0x0013<<28)
case GOARCH == "arm64":
p = uintptr(i)<<40 | uintptrMask&(0x0040<<32)
case GOOS == "aix":
if i == 0 {
// We don't use addresses directly after 0x0A00000000000000
// to avoid collisions with others mmaps done by non-go programs.
continue
}
p = uintptr(i)<<40 | uintptrMask&(0xa0<<52)
default:
p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
}
// Switch to generating hints for user arenas if we've gone
// through about half the hints. In race mode, take only about
// a quarter; we don't have very much space to work with.
hintList := &mheap_.arenaHints
if (!raceenabled && i > 0x3f) || (raceenabled && i > 0x5f) {
hintList = &mheap_.userArena.arenaHints
}
hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
hint.addr = p
hint.next, *hintList = *hintList, hint
}
} else {
// On a 32-bit machine, we're much more concerned
// about keeping the usable heap contiguous.
// Hence:
//
// 1. We reserve space for all heapArenas up front so
// they don't get interleaved with the heap. They're
// ~258MB, so this isn't too bad. (We could reserve a
// smaller amount of space up front if this is a
// problem.)
//
// 2. We hint the heap to start right above the end of
// the binary so we have the best chance of keeping it
// contiguous.
//
// 3. We try to stake out a reasonably large initial
// heap reservation.
const arenaMetaSize = (1 << arenaBits) * unsafe.Sizeof(heapArena{})
meta := uintptr(sysReserve(nil, arenaMetaSize))
if meta != 0 {
mheap_.heapArenaAlloc.init(meta, arenaMetaSize, true)
}
// We want to start the arena low, but if we're linked
// against C code, it's possible global constructors
// have called malloc and adjusted the process' brk.
// Query the brk so we can avoid trying to map the
// region over it (which will cause the kernel to put
// the region somewhere else, likely at a high
// address).
procBrk := sbrk0()
// If we ask for the end of the data segment but the
// operating system requires a little more space
// before we can start allocating, it will give out a
// slightly higher pointer. Except QEMU, which is
// buggy, as usual: it won't adjust the pointer
// upward. So adjust it upward a little bit ourselves:
// 1/4 MB to get away from the running binary image.
p := firstmoduledata.end
if p < procBrk {
p = procBrk
}
if mheap_.heapArenaAlloc.next <= p && p < mheap_.heapArenaAlloc.end {
p = mheap_.heapArenaAlloc.end
}
p = alignUp(p+(256<<10), heapArenaBytes)
// Because we're worried about fragmentation on
// 32-bit, we try to make a large initial reservation.
arenaSizes := []uintptr{
512 << 20,
256 << 20,
128 << 20,
}
for _, arenaSize := range arenaSizes {
a, size := sysReserveAligned(unsafe.Pointer(p), arenaSize, heapArenaBytes)
if a != nil {
mheap_.arena.init(uintptr(a), size, false)
p = mheap_.arena.end // For hint below
break
}
}
hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
hint.addr = p
hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
// Place the hint for user arenas just after the large reservation.
//
// While this potentially competes with the hint above, in practice we probably
// aren't going to be getting this far anyway on 32-bit platforms.
userArenaHint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
userArenaHint.addr = p
userArenaHint.next, mheap_.userArena.arenaHints = mheap_.userArena.arenaHints, userArenaHint
}
// Initialize the memory limit here because the allocator is going to look at it
// but we haven't called gcinit yet and we're definitely going to allocate memory before then.
gcController.memoryLimit.Store(maxInt64)
}
GMP模型初始化(无G)
CPU | cpuinit(godebug)
根据操作系统和cpu架构,初始化cpu,(带上调试信息 -E)
// cpuinit sets up CPU feature flags and calls internal/cpu.Initialize. env should be the complete
// value of the GODEBUG environment variable.
func cpuinit(env string) {
switch GOOS {
case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux":
cpu.DebugOptions = true
}
cpu.Initialize(env)
// Support cpu feature variables are used in code generated by the compiler
// to guard execution of instructions that can not be assumed to be always supported.
switch GOARCH {
case "386", "amd64":
x86HasPOPCNT = cpu.X86.HasPOPCNT
x86HasSSE41 = cpu.X86.HasSSE41
x86HasFMA = cpu.X86.HasFMA
case "arm":
armHasVFPv4 = cpu.ARM.HasVFPv4
case "arm64":
arm64HasATOMICS = cpu.ARM64.HasATOMICS
}
}
Machine | 系统级线程 mcommoninit(gp.m, -1)
这里m0变为普通M了
// Pre-allocated ID may be passed as 'id', or omitted by passing -1.
func mcommoninit(mp *m, id int64) {
gp := getg()
// g0 stack won't make sense for user (and is not necessary unwindable).
if gp != gp.m.g0 {
callers(1, mp.createstack[:])
}
lock(&sched.lock)
if id >= 0 {
mp.id = id
} else {
mp.id = mReserveID()
}
mrandinit(mp)
mpreinit(mp)
if mp.gsignal != nil {
mp.gsignal.stackguard1 = mp.gsignal.stack.lo + stackGuard
}
// Add to allm so garbage collector doesn't free g->m
// when it is just in a register or thread-local storage.
mp.alllink = allm
// NumCgoCall() and others iterate over allm w/o schedlock,
// so we need to publish it safely.
atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp))
unlock(&sched.lock)
// Allocate memory to hold a cgo traceback if the cgo call crashes.
if iscgo || GOOS == "solaris" || GOOS == "illumos" || GOOS == "windows" {
mp.cgoCallers = new(cgoCallers)
}
}
Procressor | procresize(nprocs int32)
:::warning Procre 在初始化时会判断allp数组里是否还有其他P,如有则还是会创建,但会丢出错误
- 更新allp数组大小
- 初始化新的p
- 获取当前goroutine(g0)
- 删除没有的老p(>当前的nprocs)
- 修剪allp数组
- 判断除了当前goroutine以外的p是否使用过,若使用过,则在遍历完所有p后返回最后一个使用过的p(正常应该返回nil,因为 在schedinit里已经stopWorld了)若没使用过调用pidleput(pp *p, now int64)函数将p加入到sched对象的 :::
if procresize(procs) != nil {
throw("unknown runnable goroutine during bootstrap")
}
// Change number of processors.
//
// sched.lock must be held, and the world must be stopped.
//
// gcworkbufs must not be being modified by either the GC or the write barrier
// code, so the GC must not be running if the number of Ps actually changes.
//
// Returns list of Ps with local work, they need to be scheduled by the caller.
func procresize(nprocs int32) *p {
assertLockHeld(&sched.lock)
assertWorldStopped()
old := gomaxprocs
if old < 0 || nprocs <= 0 {
throw("procresize: invalid arg")
}
trace := traceAcquire()
if trace.ok() {
trace.Gomaxprocs(nprocs)
traceRelease(trace)
}
// update statistics
now := nanotime()
if sched.procresizetime != 0 {
sched.totaltime += int64(old) * (now - sched.procresizetime)
}
sched.procresizetime = now
maskWords := (nprocs + 31) / 32
// Grow allp if necessary.
if nprocs > int32(len(allp)) {
// Synchronize with retake, which could be running
// concurrently since it doesn't run on a P.
lock(&allpLock)
if nprocs <= int32(cap(allp)) {
allp = allp[:nprocs]
} else {
nallp := make([]*p, nprocs)
// Copy everything up to allp's cap so we
// never lose old allocated Ps.
copy(nallp, allp[:cap(allp)])
allp = nallp
}
if maskWords <= int32(cap(idlepMask)) {
idlepMask = idlepMask[:maskWords]
timerpMask = timerpMask[:maskWords]
} else {
nidlepMask := make([]uint32, maskWords)
// No need to copy beyond len, old Ps are irrelevant.
copy(nidlepMask, idlepMask)
idlepMask = nidlepMask
ntimerpMask := make([]uint32, maskWords)
copy(ntimerpMask, timerpMask)
timerpMask = ntimerpMask
}
unlock(&allpLock)
}
// initialize new P's
for i := old; i < nprocs; i++ {
pp := allp[i]
if pp == nil {
pp = new(p)
}
pp.init(i)
atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
}
gp := getg()
if gp.m.p != 0 && gp.m.p.ptr().id < nprocs {
// continue to use the current P
gp.m.p.ptr().status = _Prunning
gp.m.p.ptr().mcache.prepareForSweep()
} else {
// release the current P and acquire allp[0].
//
// We must do this before destroying our current P
// because p.destroy itself has write barriers, so we
// need to do that from a valid P.
if gp.m.p != 0 {
trace := traceAcquire()
if trace.ok() {
// Pretend that we were descheduled
// and then scheduled again to keep
// the trace sane.
trace.GoSched()
trace.ProcStop(gp.m.p.ptr())
traceRelease(trace)
}
gp.m.p.ptr().m = 0
}
gp.m.p = 0
pp := allp[0]
pp.m = 0
pp.status = _Pidle
acquirep(pp)
trace := traceAcquire()
if trace.ok() {
trace.GoStart()
traceRelease(trace)
}
}
// g.m.p is now set, so we no longer need mcache0 for bootstrapping.
mcache0 = nil
// release resources from unused P's
for i := nprocs; i < old; i++ {
pp := allp[i]
pp.destroy()
// can't free P itself because it can be referenced by an M in syscall
}
// Trim allp.
if int32(len(allp)) != nprocs {
lock(&allpLock)
allp = allp[:nprocs]
idlepMask = idlepMask[:maskWords]
timerpMask = timerpMask[:maskWords]
unlock(&allpLock)
}
var runnablePs *p
for i := nprocs - 1; i >= 0; i-- {
pp := allp[i]
if gp.m.p.ptr() == pp {
continue
}
pp.status = _Pidle
if runqempty(pp) {
pidleput(pp, now)
} else {
pp.m.set(mget())
pp.link.set(runnablePs)
runnablePs = pp
}
}
stealOrder.reset(uint32(nprocs))
var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs))
if old != nprocs {
// Notify the limiter that the amount of procs has changed.
gcCPULimiter.resetCapacity(now, nprocs)
}
return runnablePs
}
newProc函数 | go 关键字调用
// Create a new g running fn.
// Put it on the queue of g's waiting to run.
// The compiler turns a go statement into a call to this.
func newproc(fn *funcval) {
gp := getg()
pc := getcallerpc()
systemstack(func() {
newg := newproc1(fn, gp, pc)
pp := getg().m.p.ptr()
runqput(pp, newg, true)
if mainStarted {
wakep()
}
})
}
newProc1函数创建新goroutine
先从当前p的空闲队列取,当前p取不到再去schedt里一次性拿32个出来 获取goroutine的主要语句:newg := gfget(pp) ,其他都是在为goroutine做初始化
// Create a new g in state _Grunnable, starting at fn. callerpc is the
// address of the go statement that created this. The caller is responsible
// for adding the new g to the scheduler.
func newproc1(fn *funcval, callergp *g, callerpc uintptr) *g {
if fn == nil {
fatal("go of nil func value")
}
mp := acquirem() // disable preemption because we hold M and P in local vars.
pp := mp.p.ptr()
newg := gfget(pp)
if newg == nil {
newg = malg(stackMin)
casgstatus(newg, _Gidle, _Gdead)
allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
}
if newg.stack.hi == 0 {
throw("newproc1: newg missing stack")
}
if readgstatus(newg) != _Gdead {
throw("newproc1: new g is not Gdead")
}
totalSize := uintptr(4*goarch.PtrSize + sys.MinFrameSize) // extra space in case of reads slightly beyond frame
totalSize = alignUp(totalSize, sys.StackAlign)
sp := newg.stack.hi - totalSize
if usesLR {
// caller's LR
*(*uintptr)(unsafe.Pointer(sp)) = 0
prepGoExitFrame(sp)
}
if GOARCH == "arm64" {
// caller's FP
*(*uintptr)(unsafe.Pointer(sp - goarch.PtrSize)) = 0
}
memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
newg.sched.sp = sp
newg.stktopsp = sp
newg.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function
newg.sched.g = guintptr(unsafe.Pointer(newg))
gostartcallfn(&newg.sched, fn)
newg.parentGoid = callergp.goid
newg.gopc = callerpc
newg.ancestors = saveAncestors(callergp)
newg.startpc = fn.fn
if isSystemGoroutine(newg, false) {
sched.ngsys.Add(1)
} else {
// Only user goroutines inherit pprof labels.
if mp.curg != nil {
newg.labels = mp.curg.labels
}
if goroutineProfile.active {
// A concurrent goroutine profile is running. It should include
// exactly the set of goroutines that were alive when the goroutine
// profiler first stopped the world. That does not include newg, so
// mark it as not needing a profile before transitioning it from
// _Gdead.
newg.goroutineProfiled.Store(goroutineProfileSatisfied)
}
}
// Track initial transition?
newg.trackingSeq = uint8(cheaprand())
if newg.trackingSeq%gTrackingPeriod == 0 {
newg.tracking = true
}
gcController.addScannableStack(pp, int64(newg.stack.hi-newg.stack.lo))
// Get a goid and switch to runnable. Make all this atomic to the tracer.
trace := traceAcquire()
casgstatus(newg, _Gdead, _Grunnable)
if pp.goidcache == pp.goidcacheend {
// Sched.goidgen is the last allocated id,
// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
// At startup sched.goidgen=0, so main goroutine receives goid=1.
pp.goidcache = sched.goidgen.Add(_GoidCacheBatch)
pp.goidcache -= _GoidCacheBatch - 1
pp.goidcacheend = pp.goidcache + _GoidCacheBatch
}
newg.goid = pp.goidcache
pp.goidcache++
newg.trace.reset()
if trace.ok() {
trace.GoCreate(newg, newg.startpc)
traceRelease(trace)
}
// Set up race context.
if raceenabled {
newg.racectx = racegostart(callerpc)
newg.raceignore = 0
if newg.labels != nil {
// See note in proflabel.go on labelSync's role in synchronizing
// with the reads in the signal handler.
racereleasemergeg(newg, unsafe.Pointer(&labelSync))
}
}
releasem(mp)
return newg
}
runqput函数将新创建的goroutine放到p中的运行队列
根据next为true,则新增goroutine加到p的待运行goroutine队列队首; 若next为false,则加到p的队尾 若队列满了,则将p的半个运行队列加到全局队列,通过h,t(uint32索引)
// runqput tries to put g on the local runnable queue.
// If next is false, runqput adds g to the tail of the runnable queue.
// If next is true, runqput puts g in the pp.runnext slot.
// If the run queue is full, runnext puts g on the global queue.
// Executed only by the owner P.
func runqput(pp *p, gp *g, next bool) {
if randomizeScheduler && next && randn(2) == 0 {
next = false
}
if next {
retryNext:
oldnext := pp.runnext
if !pp.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) {
goto retryNext
}
if oldnext == 0 {
return
}
// Kick the old runnext out to the regular run queue.
gp = oldnext.ptr()
}
retry:
h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers
t := pp.runqtail
if t-h < uint32(len(pp.runq)) {
pp.runq[t%uint32(len(pp.runq))].set(gp)
atomic.StoreRel(&pp.runqtail, t+1) // store-release, makes the item available for consumption
return
}
if runqputslow(pp, gp, h, t) {
return
}
// the queue is not full, now the put above must succeed
goto retry
}
mstart函数 | newm函数调用
newm函数获取系统线程,mstart初始化m,mstart为空,mstart的执行交给mstart0
// Create a new m. It will start off with a call to fn, or else the scheduler.
// fn needs to be static and not a heap allocated closure.
// May run with m.p==nil, so write barriers are not allowed.
//
// id is optional pre-allocated m ID. Omit by passing -1.
//
//go:nowritebarrierrec
func newm(fn func(), pp *p, id int64) {
// allocm adds a new M to allm, but they do not start until created by
// the OS in newm1 or the template thread.
//
// doAllThreadsSyscall requires that every M in allm will eventually
// start and be signal-able, even with a STW.
//
// Disable preemption here until we start the thread to ensure that
// newm is not preempted between allocm and starting the new thread,
// ensuring that anything added to allm is guaranteed to eventually
// start.
acquirem()
mp := allocm(pp, fn, id)
mp.nextp.set(pp)
mp.sigmask = initSigmask
if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" {
// We're on a locked M or a thread that may have been
// started by C. The kernel state of this thread may
// be strange (the user may have locked it for that
// purpose). We don't want to clone that into another
// thread. Instead, ask a known-good thread to create
// the thread for us.
//
// This is disabled on Plan 9. See golang.org/issue/22227.
//
// TODO: This may be unnecessary on Windows, which
// doesn't model thread creation off fork.
lock(&newmHandoff.lock)
if newmHandoff.haveTemplateThread == 0 {
throw("on a locked thread with no template thread")
}
mp.schedlink = newmHandoff.newm
newmHandoff.newm.set(mp)
if newmHandoff.waiting {
newmHandoff.waiting = false
notewakeup(&newmHandoff.wake)
}
unlock(&newmHandoff.lock)
// The M has not started yet, but the template thread does not
// participate in STW, so it will always process queued Ms and
// it is safe to releasem.
releasem(getg().m)
return
}
newm1(mp)
releasem(getg().m)
}
mstart0 | 再初始化G0,保证G0函数栈帧安全
// mstart0 is the Go entry-point for new Ms.
// This must not split the stack because we may not even have stack
// bounds set up yet.
//
// May run during STW (because it doesn't have a P yet), so write
// barriers are not allowed.
//
//go:nosplit
//go:nowritebarrierrec
func mstart0() {
gp := getg()
osStack := gp.stack.lo == 0
if osStack {
// Initialize stack bounds from system stack.
// Cgo may have left stack size in stack.hi.
// minit may update the stack bounds.
//
// Note: these bounds may not be very accurate.
// We set hi to &size, but there are things above
// it. The 1024 is supposed to compensate this,
// but is somewhat arbitrary.
size := gp.stack.hi
if size == 0 {
size = 16384 * sys.StackGuardMultiplier
}
gp.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
gp.stack.lo = gp.stack.hi - size + 1024
}
// Initialize stack guard so that we can start calling regular
// Go code.
gp.stackguard0 = gp.stack.lo + stackGuard
// This is the g0, so we can also call go:systemstack
// functions, which check stackguard1.
gp.stackguard1 = gp.stackguard0
mstart1()
// Exit this thread.
if mStackIsSystemAllocated() {
// Windows, Solaris, illumos, Darwin, AIX and Plan 9 always system-allocate
// the stack, but put it in gp.stack before mstart,
// so the logic above hasn't set osStack yet.
osStack = true
}
mexit(osStack)
}
mstart1 | 将pc,sp赋值到goroutine上,
// The go:noinline is to guarantee the getcallerpc/getcallersp below are safe,
// so that we can set up g0.sched to return to the call of mstart1 above.
//
//go:noinline
func mstart1() {
gp := getg()
if gp != gp.m.g0 {
throw("bad runtime·mstart")
}
// Set up m.g0.sched as a label returning to just
// after the mstart1 call in mstart0 above, for use by goexit0 and mcall.
// We're never coming back to mstart1 after we call schedule,
// so other calls can reuse the current frame.
// And goexit0 does a gogo that needs to return from mstart1
// and let mstart0 exit the thread.
gp.sched.g = guintptr(unsafe.Pointer(gp))
gp.sched.pc = getcallerpc()
gp.sched.sp = getcallersp()
asminit()
minit()
// Install signal handlers; after minit so that minit can
// prepare the thread to be able to handle the signals.
if gp.m == &m0 {
mstartm0()
}
if fn := gp.m.mstartfn; fn != nil {
fn()
}
if gp.m != &m0 {
acquirep(gp.m.nextp.ptr())
gp.m.nextp = 0
}
schedule()
}
minit 初始化 m
// Called to initialize a new m (including the bootstrap m).
// Called on the new thread, cannot allocate memory.
func minit() {
var thandle uintptr
if stdcall7(_DuplicateHandle, currentProcess, currentThread, currentProcess, uintptr(unsafe.Pointer(&thandle)), 0, 0, _DUPLICATE_SAME_ACCESS) == 0 {
print("runtime.minit: duplicatehandle failed; errno=", getlasterror(), "\n")
throw("runtime.minit: duplicatehandle failed")
}
mp := getg().m
lock(&mp.threadLock)
mp.thread = thandle
mp.procid = uint64(stdcall0(_GetCurrentThreadId))
// Configure usleep timer, if possible.
if mp.highResTimer == 0 && haveHighResTimer {
mp.highResTimer = createHighResTimer()
if mp.highResTimer == 0 {
print("runtime: CreateWaitableTimerEx failed; errno=", getlasterror(), "\n")
throw("CreateWaitableTimerEx when creating timer failed")
}
}
unlock(&mp.threadLock)
// Query the true stack base from the OS. Currently we're
// running on a small assumed stack.
var mbi memoryBasicInformation
res := stdcall3(_VirtualQuery, uintptr(unsafe.Pointer(&mbi)), uintptr(unsafe.Pointer(&mbi)), unsafe.Sizeof(mbi))
if res == 0 {
print("runtime: VirtualQuery failed; errno=", getlasterror(), "\n")
throw("VirtualQuery for stack base failed")
}
// The system leaves an 8K PAGE_GUARD region at the bottom of
// the stack (in theory VirtualQuery isn't supposed to include
// that, but it does). Add an additional 8K of slop for
// calling C functions that don't have stack checks and for
// lastcontinuehandler. We shouldn't be anywhere near this
// bound anyway.
base := mbi.allocationBase + 16<<10
// Sanity check the stack bounds.
g0 := getg()
if base > g0.stack.hi || g0.stack.hi-base > 64<<20 {
print("runtime: g0 stack [", hex(base), ",", hex(g0.stack.hi), ")\n")
throw("bad g0 stack")
}
g0.stack.lo = base
g0.stackguard0 = g0.stack.lo + stackGuard
g0.stackguard1 = g0.stackguard0
// Sanity check the SP.
stackcheck()
}
schedule | 调度goroutine执行
// One round of scheduler: find a runnable goroutine and execute it.
// Never returns.
func schedule() {
mp := getg().m
if mp.locks != 0 {
throw("schedule: holding locks")
}
if mp.lockedg != 0 {
stoplockedm()
execute(mp.lockedg.ptr(), false) // Never returns.
}
// We should not schedule away from a g that is executing a cgo call,
// since the cgo call is using the m's g0 stack.
if mp.incgo {
throw("schedule: in cgo")
}
top:
pp := mp.p.ptr()
pp.preempt = false
// Safety check: if we are spinning, the run queue should be empty.
// Check this before calling checkTimers, as that might call
// goready to put a ready goroutine on the local run queue.
if mp.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) {
throw("schedule: spinning with local work")
}
gp, inheritTime, tryWakeP := findRunnable() // blocks until work is available
if debug.dontfreezetheworld > 0 && freezing.Load() {
// See comment in freezetheworld. We don't want to perturb
// scheduler state, so we didn't gcstopm in findRunnable, but
// also don't want to allow new goroutines to run.
//
// Deadlock here rather than in the findRunnable loop so if
// findRunnable is stuck in a loop we don't perturb that
// either.
lock(&deadlock)
lock(&deadlock)
}
// This thread is going to run a goroutine and is not spinning anymore,
// so if it was marked as spinning we need to reset it now and potentially
// start a new spinning M.
if mp.spinning {
resetspinning()
}
if sched.disable.user && !schedEnabled(gp) {
// Scheduling of this goroutine is disabled. Put it on
// the list of pending runnable goroutines for when we
// re-enable user scheduling and look again.
lock(&sched.lock)
if schedEnabled(gp) {
// Something re-enabled scheduling while we
// were acquiring the lock.
unlock(&sched.lock)
} else {
sched.disable.runnable.pushBack(gp)
sched.disable.n++
unlock(&sched.lock)
goto top
}
}
// If about to schedule a not-normal goroutine (a GCworker or tracereader),
// wake a P if there is one.
if tryWakeP {
wakep()
}
if gp.lockedm != 0 {
// Hands off own p to the locked m,
// then blocks waiting for a new p.
startlockedm(gp)
goto top
}
execute(gp, inheritTime)
}
找到可运行的goroutine
每61次,取1次在全局队列,其他都是优先在p的本地队列取
// Finds a runnable goroutine to execute.
// Tries to steal from other P's, get g from local or global queue, poll network.
// tryWakeP indicates that the returned goroutine is not normal (GC worker, trace
// reader) so the caller should try to wake a P.
func findRunnable() (gp *g, inheritTime, tryWakeP bool) {
mp := getg().m
// The conditions here and in handoffp must agree: if
// findrunnable would return a G to run, handoffp must start
// an M.
top:
pp := mp.p.ptr()
if sched.gcwaiting.Load() {
gcstopm()
goto top
}
if pp.runSafePointFn != 0 {
runSafePointFn()
}
// now and pollUntil are saved for work stealing later,
// which may steal timers. It's important that between now
// and then, nothing blocks, so these numbers remain mostly
// relevant.
now, pollUntil, _ := checkTimers(pp, 0)
// Try to schedule the trace reader.
if traceEnabled() || traceShuttingDown() {
gp := traceReader()
if gp != nil {
trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
if trace.ok() {
trace.GoUnpark(gp, 0)
traceRelease(trace)
}
return gp, false, true
}
}
// Try to schedule a GC worker.
if gcBlackenEnabled != 0 {
gp, tnow := gcController.findRunnableGCWorker(pp, now)
if gp != nil {
return gp, false, true
}
now = tnow
}
// Check the global runnable queue once in a while to ensure fairness.
// Otherwise two goroutines can completely occupy the local runqueue
// by constantly respawning each other.
if pp.schedtick%61 == 0 && sched.runqsize > 0 {
lock(&sched.lock)
gp := globrunqget(pp, 1)
unlock(&sched.lock)
if gp != nil {
return gp, false, false
}
}
// Wake up the finalizer G.
if fingStatus.Load()&(fingWait|fingWake) == fingWait|fingWake {
if gp := wakefing(); gp != nil {
ready(gp, 0, true)
}
}
if *cgo_yield != nil {
asmcgocall(*cgo_yield, nil)
}
// local runq
if gp, inheritTime := runqget(pp); gp != nil {
return gp, inheritTime, false
}
// global runq
if sched.runqsize != 0 {
lock(&sched.lock)
gp := globrunqget(pp, 0)
unlock(&sched.lock)
if gp != nil {
return gp, false, false
}
}
// Poll network.
// This netpoll is only an optimization before we resort to stealing.
// We can safely skip it if there are no waiters or a thread is blocked
// in netpoll already. If there is any kind of logical race with that
// blocked thread (e.g. it has already returned from netpoll, but does
// not set lastpoll yet), this thread will do blocking netpoll below
// anyway.
if netpollinited() && netpollAnyWaiters() && sched.lastpoll.Load() != 0 {
if list, delta := netpoll(0); !list.empty() { // non-blocking
gp := list.pop()
injectglist(&list)
netpollAdjustWaiters(delta)
trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
if trace.ok() {
trace.GoUnpark(gp, 0)
traceRelease(trace)
}
return gp, false, false
}
}
// Spinning Ms: steal work from other Ps.
//
// Limit the number of spinning Ms to half the number of busy Ps.
// This is necessary to prevent excessive CPU consumption when
// GOMAXPROCS>>1 but the program parallelism is low.
if mp.spinning || 2*sched.nmspinning.Load() < gomaxprocs-sched.npidle.Load() {
if !mp.spinning {
mp.becomeSpinning()
}
gp, inheritTime, tnow, w, newWork := stealWork(now)
if gp != nil {
// Successfully stole.
return gp, inheritTime, false
}
if newWork {
// There may be new timer or GC work; restart to
// discover.
goto top
}
now = tnow
if w != 0 && (pollUntil == 0 || w < pollUntil) {
// Earlier timer to wait for.
pollUntil = w
}
}
// We have nothing to do.
//
// If we're in the GC mark phase, can safely scan and blacken objects,
// and have work to do, run idle-time marking rather than give up the P.
if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) && gcController.addIdleMarkWorker() {
node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
if node != nil {
pp.gcMarkWorkerMode = gcMarkWorkerIdleMode
gp := node.gp.ptr()
trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
if trace.ok() {
trace.GoUnpark(gp, 0)
traceRelease(trace)
}
return gp, false, false
}
gcController.removeIdleMarkWorker()
}
// wasm only:
// If a callback returned and no other goroutine is awake,
// then wake event handler goroutine which pauses execution
// until a callback was triggered.
gp, otherReady := beforeIdle(now, pollUntil)
if gp != nil {
trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
if trace.ok() {
trace.GoUnpark(gp, 0)
traceRelease(trace)
}
return gp, false, false
}
if otherReady {
goto top
}
// Before we drop our P, make a snapshot of the allp slice,
// which can change underfoot once we no longer block
// safe-points. We don't need to snapshot the contents because
// everything up to cap(allp) is immutable.
allpSnapshot := allp
// Also snapshot masks. Value changes are OK, but we can't allow
// len to change out from under us.
idlepMaskSnapshot := idlepMask
timerpMaskSnapshot := timerpMask
// return P and block
lock(&sched.lock)
if sched.gcwaiting.Load() || pp.runSafePointFn != 0 {
unlock(&sched.lock)
goto top
}
if sched.runqsize != 0 {
gp := globrunqget(pp, 0)
unlock(&sched.lock)
return gp, false, false
}
if !mp.spinning && sched.needspinning.Load() == 1 {
// See "Delicate dance" comment below.
mp.becomeSpinning()
unlock(&sched.lock)
goto top
}
if releasep() != pp {
throw("findrunnable: wrong p")
}
now = pidleput(pp, now)
unlock(&sched.lock)
// Delicate dance: thread transitions from spinning to non-spinning
// state, potentially concurrently with submission of new work. We must
// drop nmspinning first and then check all sources again (with
// #StoreLoad memory barrier in between). If we do it the other way
// around, another thread can submit work after we've checked all
// sources but before we drop nmspinning; as a result nobody will
// unpark a thread to run the work.
//
// This applies to the following sources of work:
//
// * Goroutines added to the global or a per-P run queue.
// * New/modified-earlier timers on a per-P timer heap.
// * Idle-priority GC work (barring golang.org/issue/19112).
//
// If we discover new work below, we need to restore m.spinning as a
// signal for resetspinning to unpark a new worker thread (because
// there can be more than one starving goroutine).
//
// However, if after discovering new work we also observe no idle Ps
// (either here or in resetspinning), we have a problem. We may be
// racing with a non-spinning M in the block above, having found no
// work and preparing to release its P and park. Allowing that P to go
// idle will result in loss of work conservation (idle P while there is
// runnable work). This could result in complete deadlock in the
// unlikely event that we discover new work (from netpoll) right as we
// are racing with _all_ other Ps going idle.
//
// We use sched.needspinning to synchronize with non-spinning Ms going
// idle. If needspinning is set when they are about to drop their P,
// they abort the drop and instead become a new spinning M on our
// behalf. If we are not racing and the system is truly fully loaded
// then no spinning threads are required, and the next thread to
// naturally become spinning will clear the flag.
//
// Also see "Worker thread parking/unparking" comment at the top of the
// file.
wasSpinning := mp.spinning
if mp.spinning {
mp.spinning = false
if sched.nmspinning.Add(-1) < 0 {
throw("findrunnable: negative nmspinning")
}
// Note the for correctness, only the last M transitioning from
// spinning to non-spinning must perform these rechecks to
// ensure no missed work. However, the runtime has some cases
// of transient increments of nmspinning that are decremented
// without going through this path, so we must be conservative
// and perform the check on all spinning Ms.
//
// See https://go.dev/issue/43997.
// Check global and P runqueues again.
lock(&sched.lock)
if sched.runqsize != 0 {
pp, _ := pidlegetSpinning(0)
if pp != nil {
gp := globrunqget(pp, 0)
if gp == nil {
throw("global runq empty with non-zero runqsize")
}
unlock(&sched.lock)
acquirep(pp)
mp.becomeSpinning()
return gp, false, false
}
}
unlock(&sched.lock)
pp := checkRunqsNoP(allpSnapshot, idlepMaskSnapshot)
if pp != nil {
acquirep(pp)
mp.becomeSpinning()
goto top
}
// Check for idle-priority GC work again.
pp, gp := checkIdleGCNoP()
if pp != nil {
acquirep(pp)
mp.becomeSpinning()
// Run the idle worker.
pp.gcMarkWorkerMode = gcMarkWorkerIdleMode
trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
if trace.ok() {
trace.GoUnpark(gp, 0)
traceRelease(trace)
}
return gp, false, false
}
// Finally, check for timer creation or expiry concurrently with
// transitioning from spinning to non-spinning.
//
// Note that we cannot use checkTimers here because it calls
// adjusttimers which may need to allocate memory, and that isn't
// allowed when we don't have an active P.
pollUntil = checkTimersNoP(allpSnapshot, timerpMaskSnapshot, pollUntil)
}
// Poll network until next timer.
if netpollinited() && (netpollAnyWaiters() || pollUntil != 0) && sched.lastpoll.Swap(0) != 0 {
sched.pollUntil.Store(pollUntil)
if mp.p != 0 {
throw("findrunnable: netpoll with p")
}
if mp.spinning {
throw("findrunnable: netpoll with spinning")
}
delay := int64(-1)
if pollUntil != 0 {
if now == 0 {
now = nanotime()
}
delay = pollUntil - now
if delay < 0 {
delay = 0
}
}
if faketime != 0 {
// When using fake time, just poll.
delay = 0
}
list, delta := netpoll(delay) // block until new work is available
// Refresh now again, after potentially blocking.
now = nanotime()
sched.pollUntil.Store(0)
sched.lastpoll.Store(now)
if faketime != 0 && list.empty() {
// Using fake time and nothing is ready; stop M.
// When all M's stop, checkdead will call timejump.
stopm()
goto top
}
lock(&sched.lock)
pp, _ := pidleget(now)
unlock(&sched.lock)
if pp == nil {
injectglist(&list)
netpollAdjustWaiters(delta)
} else {
acquirep(pp)
if !list.empty() {
gp := list.pop()
injectglist(&list)
netpollAdjustWaiters(delta)
trace := traceAcquire()
casgstatus(gp, _Gwaiting, _Grunnable)
if trace.ok() {
trace.GoUnpark(gp, 0)
traceRelease(trace)
}
return gp, false, false
}
if wasSpinning {
mp.becomeSpinning()
}
goto top
}
} else if pollUntil != 0 && netpollinited() {
pollerPollUntil := sched.pollUntil.Load()
if pollerPollUntil == 0 || pollerPollUntil > pollUntil {
netpollBreak()
}
}
stopm()
goto top
}
执行该goroutine
execute:绑定g和m
mp := getg().m
mp.curg = gp gp.m = mp
// Schedules gp to run on the current M.
// If inheritTime is true, gp inherits the remaining time in the
// current time slice. Otherwise, it starts a new time slice.
// Never returns.
//
// Write barriers are allowed because this is called immediately after
// acquiring a P in several places.
//
//go:yeswritebarrierrec
func execute(gp *g, inheritTime bool) {
mp := getg().m
if goroutineProfile.active {
// Make sure that gp has had its stack written out to the goroutine
// profile, exactly as it was when the goroutine profiler first stopped
// the world.
tryRecordGoroutineProfile(gp, osyield)
}
// Assign gp.m before entering _Grunning so running Gs have an
// M.
mp.curg = gp
gp.m = mp
casgstatus(gp, _Grunnable, _Grunning)
gp.waitsince = 0
gp.preempt = false
gp.stackguard0 = gp.stack.lo + stackGuard
if !inheritTime {
mp.p.ptr().schedtick++
}
// Check whether the profiler needs to be turned on or off.
hz := sched.profilehz
if mp.profilehz != hz {
setThreadCPUProfiler(hz)
}
trace := traceAcquire()
if trace.ok() {
// GoSysExit has to happen when we have a P, but before GoStart.
// So we emit it here.
if !goexperiment.ExecTracer2 && gp.syscallsp != 0 {
trace.GoSysExit(true)
}
trace.GoStart()
traceRelease(trace)
}
gogo(&gp.sched)
}
gogo: 执行goroutine
无代码,函数为空,func gogo(buf *gobuf)
退出该goroutine
exit:解绑g和m
- M切换 G 为 g0
- 清空G的信息
- 取消M和G的关联
- 将G放入P的空闲G列表
- 调用runtime.schedule()开启新一轮调度