Go源码解读-sync.Map的实现

751 阅读11分钟

前言

我有一个朋友, 最近困扰于map的线程安全问题, 每次都要单独定义个结构体加锁处理, 例如以下结构体

type SafeMap struct {
	m map[string]interface{}
	mu sync.RWMutex
}

每次都要加锁解锁太麻烦, 问我有没有其他的实现方式

这不巧了吗, 官方考虑到了这种情况已经实现了sync.Map 供使用,让我们看看它是怎么实现的

正文

存储结构体

type Map struct {
	// 操作写map和miss计数器的时候加锁
	mu Mutex

	// 读map
	read atomic.Value // readOnly
	
	// 写map, 如果不为nil的话里面存放除已删除外的所有数据
	dirty map[interface{}]*entry

	// miss计数器, 数量>=len(dirty)的时候写map会升级为读map
	misses int
}

type readOnly struct {
	// 只读结构map
	m       map[interface{}]*entry

	// 如果写map中读map不存在的key时值为true, 为false的时候写map为nii
	amended bool 
}

type entry struct {
	// 存放值的地址, 方便后面用原子的方法进行比较和替换
	p unsafe.Pointer
}

Load 获取指定key的值

优先去读map中获取值, 如果没有并且读写map不一致, 则去读map中获取一次, 并增加一次miss计数

func (m *Map) Load(key interface{}) (value interface{}, ok bool) {
	// 获取读map
	read, _ := m.read.Load().(readOnly)

	// 判断读map里是否存在这个key
	e, ok := read.m[key]

	// 如果读map不存在这个key并且写map里存在它没有key
	if !ok && read.amended {
		// 加锁准备查写map
		m.mu.Lock()
		// 为了防止加锁过程中写map升级为读map, 这里再查一次读map
		read, _ = m.read.Load().(readOnly)
		e, ok = read.m[key]
		// 如果还是不存在key并且写map可能存在
		if !ok && read.amended {
			// 去写map里面获取这个key
			e, ok = m.dirty[key]
			// 不管查没查中都加一次miss数
			m.missLocked()
		}
		// 解锁
		m.mu.Unlock()
	}
	// 如果都不存在这个key, 返回
	if !ok {
		return nil, false
	}
	// 存在返回
	return e.load()
}

// load 获取映射的值
func (e *entry) load() (value interface{}, ok bool) {
	// 获取值的地址
	p := atomic.LoadPointer(&e.p)
	// 如果为nil/expunged, 则证明这个key被删除了, 返回nil,false
	if p == nil || p == expunged {
		return nil, false
	}
	// 正常返回
	return *(*interface{})(p), true
}


// missLocked 增加写map miss计数
func (m *Map) missLocked() {
	// miss数自增
	m.misses++
	// 如果miss数小于写map的长度, 则不做操作
	if m.misses < len(m.dirty) {
		return
	}
	// miss数 >= 写map的长度, 读map升级为写map
	m.read.Store(readOnly{m: m.dirty})
	// 读map重置为nil
	m.dirty = nil
	// miss数重置为0
	m.misses = 0
}

Store 添加/修改

  1. 读map存在key且未删除时直接修改值的地址
  2. 读map存在key且key被删除, 则写map增加key, 然后修改值的地址
  3. 读map不存在key且写map存在, 直接修改值的地址
  4. 读写map都不存在key, 写map为nil的话复制读map, 写map新增值
func (m *Map) Store(key, value interface{}) {
	// 获取读map
	read, _ := m.read.Load().(readOnly)
	// 如果读map中存在这个key 并且尝试修改值, 成功则返回
	if e, ok := read.m[key]; ok && e.tryStore(&value) {
		return
	}

	// 加锁
	m.mu.Lock()
	// 为了防止加锁过程中写map升级为读map, 这里再查一次读map
	read, _ = m.read.Load().(readOnly)
	// 如果读map中存在这个key
	if e, ok := read.m[key]; ok {
		// 判断原读map是不是已经删除这个key, 如果是改为nil, 返回true, 否则为false
		if e.unexpungeLocked() {
			// 修改写map
			m.dirty[key] = e
		}
		// 修改value值
		e.storeLocked(&value)
	} else if e, ok := m.dirty[key]; ok {
		// 如果这个key不存在读map且存在于写map, 则直接修改value值
		e.storeLocked(&value)
	} else {
		// 如果这个key读写map都不存在 且 写map为nil(升级为读map后未进行更新)
		if !read.amended {
			// 写map复制读map中除删除外的数据
			m.dirtyLocked()
			// 读map的 amended 改为true, 即写map拥有读map不存在的key
			m.read.Store(readOnly{m: read.m, amended: true})
		}
		// 写map添加key, value
		m.dirty[key] = newEntry(value)
	}
	// 解锁
	m.mu.Unlock()
}

// tryStore 尝试修改值
func (e *entry) tryStore(i *interface{}) bool {
	for {
		// 获取值的地址
		p := atomic.LoadPointer(&e.p)
		// 如果=expunged, 为已删除, 则返回false
		if p == expunged {
			return false
		}
		// 原子操作修改地址指向 i
		if atomic.CompareAndSwapPointer(&e.p, p, unsafe.Pointer(i)) {
			return true
		}
	}
}


// unexpungeLocked 如果原地址为expunged(已删除), 则修改为nil, 否则返回false
func (e *entry) unexpungeLocked() (wasExpunged bool) {
	return atomic.CompareAndSwapPointer(&e.p, expunged, nil)
}

// storeLocked 原子存储值的地址
func (e *entry) storeLocked(i *interface{}) {
	atomic.StorePointer(&e.p, unsafe.Pointer(i))
}

// dirtyLocked 写map操作
func (m *Map) dirtyLocked() {
	// 如果写map不等于nil, 返回, 这块应该是必等于nil的, 只有当写map升级为读map后read.amended才为false
	if m.dirty != nil {
		return
	}
	// 获取读map
	read, _ := m.read.Load().(readOnly)
	// 写map创建map
	m.dirty = make(map[interface{}]*entry, len(read.m))
	// 循环写map写入
	for k, e := range read.m {
		// 判断值是否为已删除, 已删除的不写入写map
		if !e.tryExpungeLocked() {
			// 写map赋值
			m.dirty[k] = e
		}
	}
}

// tryExpungeLocked 尝试修改为nil为expunged
func (e *entry) tryExpungeLocked() (isExpunged bool) {
	// 获取值的地址
	p := atomic.LoadPointer(&e.p)
	// 如果等于nil的死循环修改为expunged
	for p == nil {
		// 原子操作修改原值为nil的话改为expunged
		if atomic.CompareAndSwapPointer(&e.p, nil, expunged) {
			return true
		}
		// 失败的话重新获取值
		p = atomic.LoadPointer(&e.p)
	}
	// 返回值 == 已删除
	return p == expunged
}

Delete 删除

  1. 读map有的话直接修改值的地址为nil;
  2. 读map没有并且与写map不一致时, 强删key
func (m *Map) Delete(key interface{}) {
	// 获取读map
	read, _ := m.read.Load().(readOnly)
	// 获取读map是否存在这个key
	e, ok := read.m[key]
	// 如果读map不存在这个key并且写map里存在它没有key
	if !ok && read.amended {
		// 加锁
		m.mu.Lock()
		// 为了防止加锁过程中写map升级为读map, 这里再查一次读map 
		read, _ = m.read.Load().(readOnly)
		e, ok = read.m[key]
		// 如果还是不存在key并且写map可能存在
		if !ok && read.amended {
			// 如果写map存在这个key并且没被删除, 则修改为nil
			delete(m.dirty, key)
		}
		// 解锁
		m.mu.Unlock()
	}
	// 如果读map存在
	if ok {
		// 修改为nil
		e.delete()
	}
}

// delete 删除
func (e *entry) delete() (hadValue bool) {
	for {
		// 获取值
		p := atomic.LoadPointer(&e.p)
		// 如果为nil/expunged, 则证明这个key被删除了, 返回false
		if p == nil || p == expunged {
			return false
		}
		// 修改值为nil
		if atomic.CompareAndSwapPointer(&e.p, p, nil) {
			return true
		}
	}
}

Range 循环

  1. 读写map不一致, 则写map升级为读map
  2. 循环读map进行操作
func (m *Map) Range(f func(key, value interface{}) bool) {
	// 获取读map
	read, _ := m.read.Load().(readOnly)
	// 如果写map存在读map中不存在的key
	if read.amended {
		// 解锁
		m.mu.Lock()
		// 为了防止加锁过程中写map升级为读map, 这里再查一次读map 
		read, _ = m.read.Load().(readOnly)
		// 如果写map还是存在读map中不存在的key
		if read.amended {
			// 写map升级为读map
			read = readOnly{m: m.dirty}
			m.read.Store(read)
			// 读map重置为nil
			m.dirty = nil
			// miss数重置为0
			m.misses = 0
		}
		// 解锁
		m.mu.Unlock()
	}
	// 循环读map, 读map一定为当前最全的值
	for k, e := range read.m {
		// 获取值
		v, ok := e.load()
		// key被删除则跳过
		if !ok {
			continue
		}
		// 如果循环函数返回false, 则终止循环
		if !f(k, v) {
			break
		}
	}
}

// load 获取值
func (e *entry) load() (value interface{}, ok bool) {
	// 获取值
	p := atomic.LoadPointer(&e.p)
	// 如果为nil/expunged, 则证明这个key被删除了, 返回nil, false
	if p == nil || p == expunged {
		return nil, false
	}
	// 正常返回
	return *(*interface{})(p), true
}

总结

  1. sync.Map 是用读写分离的方式实现的, 用空间换时间, 最多不超过一倍的内存占用(如果读map=写map的话就会把写map升级成读map, 写map置空);
  2. 只适用于少量写的方式, 或者在初始化写后少量写入, 不然写map一直升级为读map, 性能可能会拉跨;
  3. 相对的, 对于大量读, 绝对比上述自己加锁的方式性能要好上很多;

思考

  1. len方法的实现, 可能是因为并发操作导致更新比较快, 数据没有什么参考意义所以没有实现, 想自己实现的话参考 Range方法就统计值就可以了;
  2. 线程安全的map 性能瓶颈主要在加锁这块, 在大量写的情况下肯定是不能用sync.Map的, 最好的方法应该是使锁的粒度尽可能的小, 也是对map进行分组操作(这不就跟数据库优化方案一样了, 先读写分离, 再分表分库);
  3. expunged 的设计点, 我自己尝试修改了源码删除了expunged, 发现也可以正常使用, 也不会出现其他博主说的会造成脏内存的情况, 这块还得再想想, 或者哪位大神可以解释下, 下面是我的测试代码

map.go

package main

import (
	"sync"
	"sync/atomic"
	"unsafe"
)

// Map is like a Go map[interface{}]interface{} but is safe for concurrent use
// by multiple goroutines without additional locking or coordination.
// Loads, stores, and deletes run in amortized constant time.
//
// The Map type is specialized. Most code should use a plain Go map instead,
// with separate locking or coordination, for better type safety and to make it
// easier to maintain other invariants along with the map content.
//
// The Map type is optimized for two common use cases: (1) when the entry for a given
// key is only ever written once but read many times, as in caches that only grow,
// or (2) when multiple goroutines read, write, and overwrite entries for disjoint
// sets of keys. In these two cases, use of a Map may significantly reduce lock
// contention compared to a Go map paired with a separate Mutex or RWMutex.
//
// The zero Map is empty and ready for use. A Map must not be copied after first use.
type Map struct {
	mu sync.Mutex

	// read contains the portion of the map's contents that are safe for
	// concurrent access (with or without mu held).
	//
	// The read field itself is always safe to load, but must only be stored with
	// mu held.
	//
	// Entries stored in read may be updated concurrently without mu, but updating
	// a previously-expunged entry requires that the entry be copied to the dirty
	// map and unexpunged with mu held.
	read atomic.Value // readOnly

	// dirty contains the portion of the map's contents that require mu to be
	// held. To ensure that the dirty map can be promoted to the read map quickly,
	// it also includes all of the non-expunged entries in the read map.
	//
	// Expunged entries are not stored in the dirty map. An expunged entry in the
	// clean map must be unexpunged and added to the dirty map before a new value
	// can be stored to it.
	//
	// If the dirty map is nil, the next write to the map will initialize it by
	// making a shallow copy of the clean map, omitting stale entries.
	dirty map[interface{}]*entry

	// misses counts the number of loads since the read map was last updated that
	// needed to lock mu to determine whether the key was present.
	//
	// Once enough misses have occurred to cover the cost of copying the dirty
	// map, the dirty map will be promoted to the read map (in the unamended
	// state) and the next store to the map will make a new dirty copy.
	misses int
}

// readOnly is an immutable struct stored atomically in the Map.read field.
type readOnly struct {
	m       map[interface{}]*entry
	amended bool // true if the dirty map contains some key not in m.
}

// An entry is a slot in the map corresponding to a particular key.
type entry struct {
	// p points to the interface{} value stored for the entry.
	//
	// If p == nil, the entry has been deleted and m.dirty == nil.
	//
	// If p == expunged, the entry has been deleted, m.dirty != nil, and the entry
	// is missing from m.dirty.
	//
	// Otherwise, the entry is valid and recorded in m.read.m[key] and, if m.dirty
	// != nil, in m.dirty[key].
	//
	// An entry can be deleted by atomic replacement with nil: when m.dirty is
	// next created, it will atomically replace nil with expunged and leave
	// m.dirty[key] unset.
	//
	// An entry's associated value can be updated by atomic replacement, provided
	// p != expunged. If p == expunged, an entry's associated value can be updated
	// only after first setting m.dirty[key] = e so that lookups using the dirty
	// map find the entry.
	p unsafe.Pointer // *interface{}
}

func newEntry(i interface{}) *entry {
	return &entry{p: unsafe.Pointer(&i)}
}

// Load returns the value stored in the map for a key, or nil if no
// value is present.
// The ok result indicates whether value was found in the map.
func (m *Map) Load(key interface{}) (value interface{}, ok bool) {
	read, _ := m.read.Load().(readOnly)
	e, ok := read.m[key]
	if !ok && read.amended {
		m.mu.Lock()
		// Avoid reporting a spurious miss if m.dirty got promoted while we were
		// blocked on m.mu. (If further loads of the same key will not miss, it's
		// not worth copying the dirty map for this key.)
		read, _ = m.read.Load().(readOnly)
		e, ok = read.m[key]
		if !ok && read.amended {
			e, ok = m.dirty[key]
			// Regardless of whether the entry was present, record a miss: this key
			// will take the slow path until the dirty map is promoted to the read
			// map.
			m.missLocked()
		}
		m.mu.Unlock()
	}
	if !ok {
		return nil, false
	}
	return e.load()
}

func (e *entry) load() (value interface{}, ok bool) {
	p := atomic.LoadPointer(&e.p)
	if p == nil {
		return nil, false
	}
	return *(*interface{})(p), true
}

// Store sets the value for a key.
func (m *Map) Store(key, value interface{}) {
	read, _ := m.read.Load().(readOnly)
	if e, ok := read.m[key]; ok && e.tryStore(&value) {
		return
	}

	m.mu.Lock()
	read, _ = m.read.Load().(readOnly)
	if e, ok := read.m[key]; ok {

		// The entry was previously expunged, which implies that there is a
		// non-nil dirty map and this entry is not in it.
		m.dirty[key] = e

		e.storeLocked(&value)
	} else if e, ok := m.dirty[key]; ok {
		e.storeLocked(&value)
	} else {
		if !read.amended {
			// We're adding the first new key to the dirty map.
			// Make sure it is allocated and mark the read-only map as incomplete.
			m.dirtyLocked()
			m.read.Store(readOnly{m: read.m, amended: true})
		}
		m.dirty[key] = newEntry(value)
	}
	m.mu.Unlock()
}

// tryStore stores a value if the entry has not been expunged.
//
// If the entry is expunged, tryStore returns false and leaves the entry
// unchanged.
func (e *entry) tryStore(i *interface{}) bool {
	for {
		p := atomic.LoadPointer(&e.p)
		if p == nil {
			return false
		}
		if atomic.CompareAndSwapPointer(&e.p, p, unsafe.Pointer(i)) {
			return true
		}
	}
}

// storeLocked unconditionally stores a value to the entry.
//
// The entry must be known not to be expunged.
func (e *entry) storeLocked(i *interface{}) {
	atomic.StorePointer(&e.p, unsafe.Pointer(i))
}

// Delete deletes the value for a key.
func (m *Map) Delete(key interface{}) {
	read, _ := m.read.Load().(readOnly)
	e, ok := read.m[key]
	if !ok && read.amended {
		m.mu.Lock()
		read, _ = m.read.Load().(readOnly)
		e, ok = read.m[key]
		if !ok && read.amended {
			delete(m.dirty, key)
		}
		m.mu.Unlock()
	}
	if ok {
		e.delete()
	}
}

func (e *entry) delete() (hadValue bool) {
	for {
		p := atomic.LoadPointer(&e.p)
		if p == nil {
			return false
		}
		if atomic.CompareAndSwapPointer(&e.p, p, nil) {
			return true
		}
	}
}

// Range calls f sequentially for each key and value present in the map.
// If f returns false, range stops the iteration.
//
// Range does not necessarily correspond to any consistent snapshot of the Map's
// contents: no key will be visited more than once, but if the value for any key
// is stored or deleted concurrently, Range may reflect any mapping for that key
// from any point during the Range call.
//
// Range may be O(N) with the number of elements in the map even if f returns
// false after a constant number of calls.
func (m *Map) Range(f func(key, value interface{}) bool) {
	// We need to be able to iterate over all of the keys that were already
	// present at the start of the call to Range.
	// If read.amended is false, then read.m satisfies that property without
	// requiring us to hold m.mu for a long time.
	read, _ := m.read.Load().(readOnly)
	if read.amended {
		// m.dirty contains keys not in read.m. Fortunately, Range is already O(N)
		// (assuming the caller does not break out early), so a call to Range
		// amortizes an entire copy of the map: we can promote the dirty copy
		// immediately!
		m.mu.Lock()
		read, _ = m.read.Load().(readOnly)
		if read.amended {
			read = readOnly{m: m.dirty}
			m.read.Store(read)
			m.dirty = nil
			m.misses = 0
		}
		m.mu.Unlock()
	}

	for k, e := range read.m {
		v, ok := e.load()
		if !ok {
			continue
		}
		if !f(k, v) {
			break
		}
	}
}

func (m *Map) missLocked() {
	m.misses++
	if m.misses < len(m.dirty) {
		return
	}
	m.read.Store(readOnly{m: m.dirty})
	m.dirty = nil
	m.misses = 0
}

func (m *Map) dirtyLocked() {
	if m.dirty != nil {
		return
	}

	read, _ := m.read.Load().(readOnly)
	m.dirty = make(map[interface{}]*entry, len(read.m))
	for k, e := range read.m {
		if !e.tryExpungeLocked() {
			m.dirty[k] = e
		}
	}
}

func (e *entry) tryExpungeLocked() (isExpunged bool) {
	p := atomic.LoadPointer(&e.p)
	return p == nil
}

map_test.go

package main

import (
	"fmt"
	"log"
	"strconv"
	"sync"
	"testing"
)

func TestMap(t *testing.T) {
	m := Map{}
	m.Store("name", "fly")
	log.Println(m.Load("name"))
	wg := sync.WaitGroup{}
	for i := 0; i < 100; i++ {
		wg.Add(1)
		go func(i int) {
			defer wg.Done()
			m.Store(strconv.Itoa(i), i)
		}(i)
	}
	for i := 0; i < 99; i++ {
		wg.Add(1)
		go func(i int) {
			defer wg.Done()
			m.Delete(strconv.Itoa(i))
		}(i)
	}
	wg.Wait()
	m.Store("age", "25")
	m.Range(func(key, value interface{}) bool {
		fmt.Println(key, value)
		return true
	})
	m.Store("sex", 1)
	log.Println(m.Load("sex"))
}