必知必会系列-sync.Mutex

174 阅读7分钟

Mutex锁分为两种模式,正常模式 和 饥饿模式

正常模式下, 对于新加入的协程, 它有两种选择, 要么抢到锁,直接结束; 要么抢不到锁, 追加到等待队列尾部, 等待被唤醒

饥饿模式下, 对于新加入的协程只能追加到等待队列尾部, 等待被唤醒。在该模式下, 所有锁竞争者都不能自旋

主要字段

image.png

// A Mutex is a mutual exclusion lock.
// The zero value for a Mutex is an unlocked mutex.
//
// A Mutex must not be copied after first use.
//
// In the terminology of the Go memory model,
// the n'th call to Unlock “synchronizes before” the m'th call to Lock
// for any n < m.
// A successful call to TryLock is equivalent to a call to Lock.
// A failed call to TryLock does not establish any “synchronizes before”
// relation at all.
type Mutex struct {
	state int32
	sema  uint32
}

// A Locker represents an object that can be locked and unlocked.
type Locker interface {
	Lock()
	Unlock()
}

const (
	mutexLocked = 1 << iota            // 标识上锁成功
	mutexWoken                         // 标识唤醒流程
	mutexStarving                      // 标识进入饥饿模式
	mutexWaiterShift = iota            // state>>=mutexWaiterShift表示当前阻塞等待锁的协程个数

	// Mutex fairness.
	//
	// Mutex can be in 2 modes of operations: normal and starvation.
	// In normal mode waiters are queued in FIFO order, but a woken up waiter
	// does not own the mutex and competes with new arriving goroutines over
	// the ownership. New arriving goroutines have an advantage -- they are
	// already running on CPU and there can be lots of them, so a woken up
	// waiter has good chances of losing. In such case it is queued at front
	// of the wait queue. If a waiter fails to acquire the mutex for more than 1ms,
	// it switches mutex to the starvation mode.
	//
	// In starvation mode ownership of the mutex is directly handed off from
	// the unlocking goroutine to the waiter at the front of the queue.
	// New arriving goroutines don't try to acquire the mutex even if it appears
	// to be unlocked, and don't try to spin. Instead they queue themselves at
	// the tail of the wait queue.
	//
	// If a waiter receives ownership of the mutex and sees that either
	// (1) it is the last waiter in the queue, or (2) it waited for less than 1 ms,
	// it switches mutex back to normal operation mode.
	//
	// Normal mode has considerably better performance as a goroutine can acquire
	// a mutex several times in a row even if there are blocked waiters.
	// Starvation mode is important to prevent pathological cases of tail latency.
	starvationThresholdNs = 1e6  // 等待时间大于1毫秒 则会进入饥饿模式
)

Lock

// Lock locks m.
// If the lock is already in use, the calling goroutine
// blocks until the mutex is available.
func (m *Mutex) Lock() {
	// Fast path: grab unlocked mutex.
        // 快速路径
	if atomic.CompareAndSwapInt32(&m.state, 0, mutexLocked) {
		if race.Enabled {
			race.Acquire(unsafe.Pointer(m))
		}
		return
	}
	// Slow path (outlined so that the fast path can be inlined)
	m.lockSlow()
}

lockSlow

func (m *Mutex) lockSlow() {
	var waitStartTime int64
	starving := false
	awoke := false
	iter := 0
	old := m.state
	for {
	        // 进入以下分支的条件(也就是说该协程会经过有限次的自旋等待来尝试获取锁):
          // 1. 锁被占用了并且没有进入饥饿模式
          // 2. runtime_canSpin(iter)=true 需要满足比较苛刻的条件, 才会返回true 下文细讲
		if old&(mutexLocked|mutexStarving) == mutexLocked && runtime_canSpin(iter) {
			// 如果没有设置唤醒标识 并且等待队列为空 通过CAS指令设置唤醒标识
			if !awoke && old&mutexWoken == 0 && old>>mutexWaiterShift != 0 &&
				atomic.CompareAndSwapInt32(&m.state, old, old|mutexWoken) {
				awoke = true
			}
                   // 内部会循环执行 `PAUSE` 指令30次
			runtime_doSpin()
			iter++
			old = m.state
			continue
		}
		new := old
		// 非饥饿模式下, 尝试抢锁
		if old&mutexStarving == 0 {
			new |= mutexLocked
		}
           // 如果是已经上锁或者处于饥饿模式下 阻塞队列count+1
		if old&(mutexLocked|mutexStarving) != 0 {
			new += 1 << mutexWaiterShift
		}
		// The current goroutine switches mutex to starvation mode.
		// But if the mutex is currently unlocked, don't do the switch.
		// Unlock expects that starving mutex has waiters, which will not
		// be true in this case.
           // 只有锁被占用的情况下 才可以设置为饥饿模式
		if starving && old&mutexLocked != 0 {
			new |= mutexStarving
		}
           // 说明当前协程自旋过, 但现在已经自旋结束了,要取消唤醒标识
		if awoke {
			// The goroutine has been woken from sleep,
			// so we need to reset the flag in either case.
			if new&mutexWoken == 0 {
				throw("sync: inconsistent mutex state")
			}
			new &^= mutexWoken
		}
                // CAS尝试更新最新的state状态
		if atomic.CompareAndSwapInt32(&m.state, old, new) {
                  // 说明上锁成功
			if old&(mutexLocked|mutexStarving) == 0 {
				break // locked the mutex with CAS
			}
			// If we were already waiting before, queue at the front of the queue.
			queueLifo := waitStartTime != 0
			if waitStartTime == 0 {
				waitStartTime = runtime_nanotime()
			}
                        // 取锁失败了,就使用sleep原语来阻塞当前goroutine
                        // 通过信号量来排队获取锁
                        // 如果是新来的协程放到队列尾部
                        // 如果是被唤醒的协程放到队列头部
			runtime_SemacquireMutex(&m.sema, queueLifo, 1)
                        
                        // 如果被唤醒了 继续执行下面的流程
                        // 如果该协程等待时间大于1ms 则应该进入饥饿模式(提升锁公平)
			starving = starving || runtime_nanotime()-waitStartTime > starvationThresholdNs
			old = m.state
                        // 如果唤醒后是饥饿模式, 这次锁一定是被该协程获取
			if old&mutexStarving != 0 {
				// If this goroutine was woken and mutex is in starvation mode,
				// ownership was handed off to us but mutex is in somewhat
				// inconsistent state: mutexLocked is not set and we are still
				// accounted as waiter. Fix that.
				if old&(mutexLocked|mutexWoken) != 0 || old>>mutexWaiterShift == 0 {
					throw("sync: inconsistent mutex state")
				}
                           // 等待队列长度-1
				delta := int32(mutexLocked - 1<<mutexWaiterShift)
				if !starving || old>>mutexWaiterShift == 1 {
					// Exit starvation mode.
					// Critical to do it here and consider wait time.
					// Starvation mode is so inefficient, that two goroutines
					// can go lock-step infinitely once they switch mutex
					// to starvation mode.
                                   // 如果等待队列只有一个元素则退出饥饿模式
					delta -= mutexStarving
				}
				atomic.AddInt32(&m.state, delta)
				break
			}
                   // 还没有进入饥饿模式, 为了保持公平性, 会同时设置为唤醒模式 与其他新加入的协程一起竞争锁
			awoke = true
			iter = 0
		} else {
                        // 说明别的协程成功修改了state 重新for循环尝试
			old = m.state
		}
	}

	if race.Enabled {
		race.Acquire(unsafe.Pointer(m))
	}
}

Unlock

// Unlock unlocks m.
// It is a run-time error if m is not locked on entry to Unlock.
//
// A locked Mutex is not associated with a particular goroutine.
// It is allowed for one goroutine to lock a Mutex and then
// arrange for another goroutine to unlock it.
func (m *Mutex) Unlock() {
	if race.Enabled {
		_ = m.state
		race.Release(unsafe.Pointer(m))
	}

	// Fast path: drop lock bit.
	new := atomic.AddInt32(&m.state, -mutexLocked)
	if new != 0 {
		// Outlined slow path to allow inlining the fast path.
		// To hide unlockSlow during tracing we skip one extra frame when tracing GoUnblock.
		m.unlockSlow(new)
	}
}

unlockSlow

func (m *Mutex) unlockSlow(new int32) {
   // 不能多次执行unclock
	if (new+mutexLocked)&mutexLocked == 0 {
		fatal("sync: unlock of unlocked mutex")
	}
	if new&mutexStarving == 0 {
		old := new
		for {
			// If there are no waiters or a goroutine has already
			// been woken or grabbed the lock, no need to wake anyone.
			// In starvation mode ownership is directly handed off from unlocking
			// goroutine to the next waiter. We are not part of this chain,
			// since we did not observe mutexStarving when we unlocked the mutex above.
			// So get off the way.
                  // 1. 没有被阻塞的协程, 直接返回
                  // 2. 有阻塞的协程, 但处于唤醒模式下, 直接返回
                  // 3. 有阻塞的协程, 但被上锁了。可能发生在for循环内
                  //    第一次CAS不成功, 可能因为CAS前被新加入的协程抢到锁, 直接返回
                 // 4. 有阻塞的协程, 但锁处于饥饿模式下
                 //    进入循环前是「非 Starving」状态,而现在却是 Starving 模式
                 //    说明这段时间里出现了 (Lock/Unlock)../Lock 连续调用, 导致「被其他 Unlock                  //    调用唤醒的协程拿不到锁,进入到 Starving 模式.
                 //    此情况下应该直接退出, 交给下一次 Unlock 调用处理
			if old>>mutexWaiterShift == 0 || old&(mutexLocked|mutexWoken|mutexStarving) != 0 {
				return
			}
			// Grab the right to wake someone.
                        // 等待队列-1 设置唤醒标识
			new = (old - 1<<mutexWaiterShift) | mutexWoken
			if atomic.CompareAndSwapInt32(&m.state, old, new) {
				runtime_Semrelease(&m.sema, false, 1)
				return
			}
			old = m.state
		}
	} else {
		// Starving mode: handoff mutex ownership to the next waiter, and yield
		// our time slice so that the next waiter can start to run immediately.
		// Note: mutexLocked is not set, the waiter will set it after wakeup.
		// But mutex is still considered locked if mutexStarving is set,
		// so new coming goroutines won't acquire it.
                // 饥饿模式下 手递手唤醒一个协程
		runtime_Semrelease(&m.sema, true, 1)
	}
}

runtime_canSpin

可以自旋的条件:

  • 重试次数小于4
  • GOMAXPROCS>1
  • 至少有一个处于running状态的P并且本地runq为空 可见自旋获取锁的条件很苛刻了
// src/runtime/proc.go

active_spin     = 4


// Active spinning for sync.Mutex.
//
//go:linkname sync_runtime_canSpin sync.runtime_canSpin
//go:nosplit
func sync_runtime_canSpin(i int) bool {
	// sync.Mutex is cooperative, so we are conservative with spinning.
	// Spin only few times and only if running on a multicore machine and
	// GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
	// As opposed to runtime mutex we don't do passive spinning here,
	// because there can be work on global runq or on other Ps.
	if i >= active_spin || ncpu <= 1 || gomaxprocs <= int32(sched.npidle+sched.nmspinning)+1 {
		return false
	}
	if p := getg().m.p.ptr(); !runqempty(p) {
		return false
	}
	return true
}

阻塞与唤醒语义

// SemacquireMutex is like Semacquire, but for profiling contended Mutexes.
// If lifo is true, queue waiter at the head of wait queue.
// skipframes is the number of frames to omit during tracing, counting from
// runtime_SemacquireMutex's caller.
func runtime_SemacquireMutex(s *uint32, lifo bool, skipframes int)

// Semrelease atomically increments *s and notifies a waiting goroutine
// if one is blocked in Semacquire.
// It is intended as a simple wakeup primitive for use by the synchronization
// library and should not be used directly.
// If handoff is true, pass count directly to the first waiter.
// skipframes is the number of frames to omit during tracing, counting from
// runtime_Semrelease's caller.
func runtime_Semrelease(s *uint32, handoff bool, skipframes int)