关于hotkey的那些事儿## 背景某一日, 葫芦去面试，面试官-十三十三: 你们项目中的热key是如何处理的葫芦

背景

某一日, 葫芦去面试，面试官-十三

十三: 你们项目中的热key是如何处理的

葫芦: 热key我们会放到缓存中，如果不存在，用单飞从redis获取，在加载到缓存，缓存用的go-cache

十三: 那你了解go-cache的实现原理吗

葫芦: 没看过哎

十三: 回去等通知吧

葫芦GG

go-cache 实现

go-cache 是一个基于内存的缓存库

结构体定义

cache定义

type cache struct {
   defaultExpiration time.Duration
   items             map[string]Item
   mu                sync.RWMutex
   onEvicted         func(string, interface{})
   janitor           *janitor
}

type Item struct {
   Object     interface{}
   Expiration int64
}

mu 读写锁，保证并发安全
defaultExpiration 默认过期时间
janitor 缓存项清理控制器
items 实际存储
onEvicted 回调函数，用于删除缓存

核心方法

New

func newCache(de time.Duration, m map[string]Item) *cache {
   if de == 0 {
      de = -1
   }
   c := &cache{
      defaultExpiration: de,
      items:             m,
   }
   return c
}

func newCacheWithJanitor(de time.Duration, ci time.Duration, m map[string]Item) *Cache {
   c := newCache(de, m)
   C := &Cache{c}
   if ci > 0 {
      runJanitor(c, ci)
      runtime.SetFinalizer(C, stopJanitor)
   }
   return C
}

func New(defaultExpiration, cleanupInterval time.Duration) *Cache {
   items := make(map[string]Item)
   return newCacheWithJanitor(defaultExpiration, cleanupInterval, items)
}

New 方法返回一个Cache指针，如果未设置过期时间defaultExpiration,默认为永不过期

cleanupInterval 为清理周期，如果cleanupInterval > 0 会异步使用一个goroutine 来删除过期的key-value, 该过程会上锁

func (j *janitor) Run(c *cache) {
   ticker := time.NewTicker(j.Interval)
   for {
      select {
      case <-ticker.C:
         c.DeleteExpired()
      case <-j.stop:
         ticker.Stop()
         return
      }
   }
}

// Delete all expired items from the cache.
func (c *cache) DeleteExpired() {
   var evictedItems []keyAndValue
   now := time.Now().UnixNano()
   c.mu.Lock()
   for k, v := range c.items {
      // "Inlining" of expired
      if v.Expiration > 0 && now > v.Expiration {
         ov, evicted := c.delete(k)
         if evicted {
            evictedItems = append(evictedItems, keyAndValue{k, ov})
         }
      }
   }
   c.mu.Unlock()
   for _, v := range evictedItems {
      c.onEvicted(v.key, v.value)
   }
}

一个小细节，如果配置了回调函数onEvicted, delete 方法除了从item map 删除外，还会调用onEvicted

onEvicted 配置方式

// Sets an (optional) function that is called with the key and value when an
// item is evicted from the cache. (Including when it is deleted manually, but
// not when it is overwritten.) Set to nil to disable.
func (c *cache) OnEvicted(f func(string, interface{})) {
   c.mu.Lock()
   c.onEvicted = f
   c.mu.Unlock()
}

Add 添加元素

// Add an item to the cache only if an item doesn't already exist for the given
// key, or if the existing item has expired. Returns an error otherwise.
func (c *cache) Add(k string, x interface{}, d time.Duration) error {
   c.mu.Lock()
   _, found := c.get(k)
   if found {
      c.mu.Unlock()
      return fmt.Errorf("Item %s already exists", k)
   }
   c.set(k, x, d)
   c.mu.Unlock()
   return nil
}

func (c *cache) set(k string, x interface{}, d time.Duration) {
   var e int64
   if d == DefaultExpiration {
      d = c.defaultExpiration
   }
   if d > 0 {
      e = time.Now().Add(d).UnixNano()
   }
   c.items[k] = Item{
      Object:     x,
      Expiration: e,
   }
}

如果map中不存在或已过期，set到map中，否则报错

Get 获取元素

// Get an item from the cache. Returns the item or nil, and a bool indicating
// whether the key was found.
func (c *cache) Get(k string) (interface{}, bool) {
   c.mu.RLock()
   // "Inlining" of get and Expired
   item, found := c.items[k]
   if !found {
      c.mu.RUnlock()
      return nil, false
   }
   if item.Expiration > 0 {
      if time.Now().UnixNano() > item.Expiration {
         c.mu.RUnlock()
         return nil, false
      }
   }
   c.mu.RUnlock()
   return item.Object, true
}

加了读锁，从map中获取，会进行过期时间的校验，但并没有进行lazy del，以避免加写锁

高级一些的用法

go-cache 提供了sharded cache 的模式，按key进行djb33的算法后，分到不同的bucket中，可以减少加锁的范围，提高查询效率

type shardedCache struct {
   seed    uint32
   m       uint32
   cs      []*cache
   janitor *shardedJanitor
}

go-cache 可能会导致内存一直增长

Kratos-Hotkey

local cache

基于LRU 实现的本地缓存

// Cache is an LRU cache. It is not safe for concurrent access.
type Cache struct {
   // MaxEntries is the maximum number of cache entries before
   // an item is evicted. Zero means no limit.
   MaxEntries int

   // OnEvicted optionally specifies a callback function to be
   // executed when an entry is purged from the cache.
   OnEvicted func(key Key, value interface{})

   ll    *list.List
   cache map[interface{}]*list.Element
}

MaxEntries: 最大元素数量
OnEvicted: 回调函数，用于删除缓存
ll: 缓存列表，使用双向链表实现。保存缓存键值对的顺序，最近访问的条目在前面
cache: 缓存映射，用于快速查找键对应的值在链表中的位置

hotkey

Option 配置选项

type Option struct {
   HotKeyCnt     int                  // 用于指定热门关键词的数量，即topk的K值
   LocalCacheCnt int                  // 用于指定本地缓存实例的数量
   AutoCache     bool                 // 用于指定是否自动缓存
   CacheMs       int                  // 用于指定缓存的时间，以毫秒为单位
   MinCount      int                  // 指定当一个关键词的搜索次数小于MinCount时，它不会被记录
   WhileList     []*CacheRuleConfig   // 用于指定白名单，即只记录在白名单中的关键词搜索次数
   BlackList     []*CacheRuleConfig   // 用于指定黑名单，即不记录在黑名单中的关键词搜索次数
   LocalCache    LocalCache           // lru localcache
}

NewHotkey 设置热点缓存

func NewHotkey(option *Option) (*HotKeyWithCache, error) {
   var err error
   h := &HotKeyWithCache{option: option}
   if option.HotKeyCnt > 0 {
      factor := uint32(math.Log(float64(option.HotKeyCnt)))
      if factor < 1 {
         factor = 1
      }
      h.topk = topk.NewHeavyKeeper(uint32(option.HotKeyCnt), 1024*factor, 4, 0.925, uint32(option.MinCount))
   }
   if len(h.option.WhileList) > 0 {
      h.whilelist, err = h.initCacheRules(h.option.WhileList)
      if err != nil {
         return nil, err
      }
   }
   if len(h.option.BlackList) > 0 {
      h.blacklist, err = h.initCacheRules(h.option.BlackList)
      if err != nil {
         return nil, err
      }
   }
   if h.option.AutoCache || len(h.whilelist) > 0 {
      if h.option.LocalCache != nil {
         h.localCache = h.option.LocalCache
      } else {
         h.localCache = NewLocalCache(int(h.option.LocalCacheCnt))
      }
   }
   return h, nil
}

首先根据传入的option参数中的HotKeyCnt值初始化了一个topk数据结构的实例，用于存储热门的关键词。如果WhileList或BlackList不为空，则通过initCacheRules函数初始化了白名单和黑名单的缓存规则。如果option.AutoCache为true或whilelist列表不为空，则根据LocalCacheCnt值创建相应数量的本地缓存实例。最后返回初始化好的HotKeyWithCache类型结构体的指针。

initCacheRules 初始话缓存规则

func (h *HotKeyWithCache) initCacheRules(rules []*CacheRuleConfig) ([]*cacheRule, error) {
   list := make([]*cacheRule, 0, len(rules))
   for _, rule := range rules {
      ttl := rule.TTLMs
      if ttl == 0 {
         ttl = uint32(h.option.CacheMs)
      }
      cacheRule := &cacheRule{ttl: ttl}
      if rule.Mode == ruleTypeKey {
         cacheRule.value = rule.Value
      } else if rule.Mode == ruleTypePattern {
         regexp, err := regexp.Compile(rule.Value)
         if err != nil {
            return nil, fmt.Errorf("localcache: add rule pattern failed, err:%v", err)
         }
         cacheRule.regexp = regexp
      } else {
         return nil, fmt.Errorf("invalid local cache rule mode")
      }
      list = append(list, cacheRule)
   }
   return list, nil
}

规则匹配，白名单类似

func (h *HotKeyWithCache) inBlacklist(key string) bool {
   if len(h.blacklist) == 0 {
      return false
   }
   for _, b := range h.blacklist {
      if b.value == key {
         return true
      }
      if b.regexp != nil && b.regexp.Match([]byte(key)) {
         return true
      }
   }
   return false
}

Add

// Add add item to topk, and return true if it's hotkey.
func (h *HotKeyWithCache) Add(key string, incr uint32) bool {
   if h.topk == nil {
      return false
   }
   h.mutex.Lock()
   defer h.mutex.Unlock()
   _, hotkey := h.topk.Add(key, incr)
   return hotkey
}

// AddWithValue add item to topk, and return true if it's hotkey.
func (h *HotKeyWithCache) AddWithValue(key string, value interface{}, incr uint32) bool {
   if h.topk == nil && h.localCache == nil {
      return false
   }
   h.mutex.Lock()
   defer h.mutex.Unlock()
   var added bool
   if h.topk != nil {
      var expelled string
      expelled, added = h.topk.Add(key, incr)
      if len(expelled) > 0 && h.localCache != nil {
         h.localCache.Remove(expelled)
      }
      if h.option.AutoCache && added {
         if !h.inBlacklist(key) {
            h.localCache.Add(key, value, uint32(h.option.CacheMs))
         }
         return added
      }
   }
   if ttl, ok := h.inWhitelist(key); ok {
      h.localCache.Add(key, value, ttl)
   }
   return added
}

Add方法接收一个键和一个增量作为参数，将该键加入到TopK中，并返回一个布尔值表示是否为热点。AddWithValue方法还额外接收一个值和一个时间戳作为参数，将该键值对加入到TopK和LocalCache中。如果TopK已满，还需要清除掉一个被TopK剔除出去的键，并将这个键从LocalCache中清除。同时，如果启用了自动缓存（根据option.AutoCache的设置），那么还需要检查这个键是否在黑名单中，如果不在，就将它加入到LocalCache中。如果该键在白名单中，还需要设置它的过期时间为白名单中的指定时间。最终，添加成功后返回一个布尔值表示是否为热点。

Get

func (h *HotKeyWithCache) Get(key string) (interface{}, bool) {
   if h.localCache == nil {
      return "", false
   }
   h.mutex.Lock()
   defer h.mutex.Unlock()
   if v, ok := h.localCache.Get(key); ok {
      return v, true
   }
   return "", false
}

互斥锁
从localcache中获取

Fading 热度衰落

func (h *HotKeyWithCache) Fading() {
   if h.topk == nil {
      return
   }
   h.mutex.Lock()
   defer h.mutex.Unlock()
   h.topk.Fading()
}

遍历topk，将元素计数值/2

kratos 的hotKey 通过 lru list 和 topk 组合的机制实现了固定空间的hotkey 本地缓存

通过 minCount、whitelist、blocklist 机制来过滤进入topk的数据

参考

go-cache: github.com/patrickmn/g…

kratos-hotkey: github.com/go-kratos/a…