剖析sync.Map为什么不适合写多读少的情况

110 阅读3分钟

sync.Map的数据结构

type Map struct {
   mu Mutex

   // read contains the portion of the map's contents that are safe for
   // concurrent access (with or without mu held).
   //
   // The read field itself is always safe to load, but must only be stored with
   // mu held.
   //
   // Entries stored in read may be updated concurrently without mu, but updating
   // a previously-expunged entry requires that the entry be copied to the dirty
   // map and unexpunged with mu held.
   read atomic.Value // readOnly

   // dirty contains the portion of the map's contents that require mu to be
   // held. To ensure that the dirty map can be promoted to the read map quickly,
   // it also includes all of the non-expunged entries in the read map.
   //
   // Expunged entries are not stored in the dirty map. An expunged entry in the
   // clean map must be unexpunged and added to the dirty map before a new value
   // can be stored to it.
   //
   // If the dirty map is nil, the next write to the map will initialize it by
   // making a shallow copy of the clean map, omitting stale entries.
   dirty map[any]*entry

   // misses counts the number of loads since the read map was last updated that
   // needed to lock mu to determine whether the key was present.
   //
   // Once enough misses have occurred to cover the cost of copying the dirty
   // map, the dirty map will be promoted to the read map (in the unamended
   // state) and the next store to the map will make a new dirty copy.
   misses int
}
  • read 实现了无锁化的只读map -> readOnly struct 中含有两个变量 1.map[any]*entry 2.amended(bool型变量 用来判断read map中的数据是否有缺失)
  • dirty 可读可写map 严格意义上来说 read map是dirty map的子集 当read.amended == false时 两者的数据相同;操作dirty map的时候需要加互斥锁来访问
  • misses 未命中read的次数->读操作 我们读map中的数据时 会先去查看read 如果read中有该数据 那么直接返回 相当于一个缓存的作用 若未命中read map 但是read.amended为false 说明read中的数据就是这个sync.Map中的所有数据 在read中没有访问到 那么说明sync.Map中没有这个数据 直接返回 nil,false 只有当未命中read且read map中数据有缺失的时候 才会去加锁访问dirty map 此时由于我们判断read中是否有该key到加锁有一段时间空白 可能有别的goroutine写了数据 所以需要double check 再次判断read map中是否有该key 如果还没有的话 只能死了在read map中寻找该key的心思了 老老实实去dirty map中查看 此时触发 missLocked方法 使得misses++ 并且misses也是有阈值的 read map的未命中次数在len(dirty)之下 还可以忍受 如果超过了 那还要你这个read map干嘛 我忍不了了 但是为了保持你只读的人设 我只能通过原子操作用dirty map给你覆盖掉 而不是一个一个地更新(这不就是写操作了嘛)
if !ok && read.amended {
   e, ok = m.dirty[key]
   // Regardless of whether the entry was present, record a miss: this key
   // will take the slow path until the dirty map is promoted to the read
   // map.
   m.missLocked()
}
func (m *Map) missLocked() {
   m.misses++
   if m.misses < len(m.dirty) {
      return
   }
   m.read.Store(readOnly{m: m.dirty})
   m.dirty = nil
   m.misses = 0
}

sync.Map写数据的Store方法

func (m *Map) Store(key, value any) {
   read, _ := m.read.Load().(readOnly)
   if e, ok := read.m[key]; ok && e.tryStore(&value) {
      return
   }

   m.mu.Lock()
   read, _ = m.read.Load().(readOnly)
   if e, ok := read.m[key]; ok {
      if e.unexpungeLocked() {
         // The entry was previously expunged, which implies that there is a
         // non-nil dirty map and this entry is not in it.
         m.dirty[key] = e
      }
      e.storeLocked(&value)
   } else if e, ok := m.dirty[key]; ok {
      e.storeLocked(&value)
   } else {
      if !read.amended {
         // We're adding the first new key to the dirty map.
         // Make sure it is allocated and mark the read-only map as incomplete.
         m.dirtyLocked()
         m.read.Store(readOnly{m: read.m, amended: true})
      }
      m.dirty[key] = newEntry(value)
   }
   m.mu.Unlock()
}

处于写多读少的情况时 当需要插入一条新数据(注意 这里不是更新map中原有的数据)时 会进入sync.Map.Store方法 由于我们插入的是一条新数据 则需要加锁后对dirty map进行操作 此时如果read.amended(read map中是否有缺失数据)是false 即read和dirty 两个map中的数据一致时 会进入dirtyLocked()方法 在此方法中 如果dirty map == nil (即刚刚发生过missLocked操作->在read map中读取不到key值已经大于等于len(dirty)次 会将dirty map中的数据覆盖到read map中 并且将dirty map置空) 会遍历整个read map 将所有软删除态的数据全部置为expunged 之后会将read map中所有状态不为expunged的全部加载到dirty map中 完成数据的迁移

也就是说dirty map中的数据都是未被删除的数据(这里的删除包括软删除和硬删除) 这里注意 发生missLocked操作之后的dirtyLocked操作会遍历整个read map 此时的时间复杂度就是O(n)了 -> 此时的写操作时间复杂度时O(n) 取决于read map的容量 与map初衷的读写时间复杂度都是O(1)相悖

func (m *Map) dirtyLocked() {
   if m.dirty != nil {
      return
   }

   read, _ := m.read.Load().(readOnly)
   m.dirty = make(map[any]*entry, len(read.m))
   for k, e := range read.m {
      if !e.tryExpungeLocked() {
         m.dirty[k] = e
      }
   }
}
func (e *entry) tryExpungeLocked() (isExpunged bool) {
   p := atomic.LoadPointer(&e.p)
   for p == nil {
      if atomic.CompareAndSwapPointer(&e.p, nil, expunged) {
         return true
      }
      p = atomic.LoadPointer(&e.p)
   }
   return p == expunged
}

只有在写多读少的情况下 会高频触发上述操作 使得时间复杂度提高 所以处于写多读少情况下的时候 尽量不要使用sync.Map