通常情况下,根据 sync.pool 分配内存会导致额外的 24 byte 内存分配,这是因为堆逃逸导致的。
空间换时间
当我们总的需要使用的 []byte 数量是一定的时候,我们可以预分配,然后复用:
package utils
import (
"reflect"
"sync/atomic"
"unsafe"
)
// BytePool is used to concurrently obtain byte arrays of size cap.
type BytePool struct {
i atomic.Int64
caches [][]byte
used []atomic.Bool
head uintptr
cap int
}
// NewBytePool creates a byte array pool with max byte arrays, each with a capacity of cap.
// If sync.Pool is used, at least 24 bytes of memory will be allocated each time,
// see https://blog.mike.norgate.xyz/unlocking-go-slice-performance-navigating-sync-pool-for-enhanced-efficiency-7cb63b0b453e.
// By using the space-for-time method, zero memory allocation can be achieved.
// Here, a large byte array is first allocated, and then it is divided into small byte arrays.
// The address difference between the small byte arrays is the same.
// For each byte array, an atomic.Bool is used to determine whether it is in use.
// If it is false, it means it is not in use, and it is converted to true and returned to the caller;
// if it is true, it means it has been used.
// When the byte array is used,
// we find the corresponding atomic.Bool by the difference between its address and the first address of the large byte array,
// and then set it to false.
func NewBytePool(cap, max int) *BytePool {
data := make([]byte, cap*max)
caches := make([][]byte, max)
for i := range caches {
caches[i] = data[i*cap : (i+1)*cap][:0]
}
rt := (*reflect.SliceHeader)(unsafe.Pointer(&data))
return &BytePool{
caches: caches,
used: make([]atomic.Bool, max),
head: rt.Data,
cap: cap,
}
}
// Get byte array from pool.
func (b *BytePool) Get() []byte {
for {
cur := b.i.Add(1)
if cur >= int64(len(b.caches)) {
cur = 0
b.i.Store(0)
}
if b.used[cur].CompareAndSwap(false, true) {
return b.caches[cur][:0]
}
}
}
// Put the data back into the BytePool.
func (b *BytePool) Put(x []byte) {
rt := (*reflect.SliceHeader)(unsafe.Pointer(&x))
i := int(rt.Data-b.head) / b.cap
b.used[i].Store(false)
}
压测
压测效果: 比正常的 sync.pool 快了一倍,并且零内存分配。
// goos: linux
// goarch: amd64
// pkg: pool
// cpu: Intel(R) Xeon(R) Platinum 8269CY CPU @ 2.50GHz
// Benchmark/byte_pool-32 38809202 30.97 ns/op 0 B/op 0 allocs/op
// Benchmark/pool-32 20152437 61.58 ns/op 24 B/op 1 allocs/op
package utils
import (
"testing"
"time"
"github.com/bytedance/gopkg/lang/mcache"
"github.com/stretchr/testify/require"
)
func Benchmark(b *testing.B) {
b.Run("byte pool", func(b *testing.B) {
x := NewBytePool(10, 10)
b.ReportAllocs()
for i := 0; i < b.N; i++ {
data := x.Get()
x.Put(data)
}
})
b.Run("pool", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
data := mcache.Malloc(0, 10)
mcache.Free(data)
}
})
}
func TestConcurrent(t *testing.T) {
x := NewBytePool(65000, 100)
for i := 0; i < 10; i++ {
go func() {
data := x.Get()
require.Equal(t, 0, len(data))
data = append(data, []byte("test")...)
require.Equal(t, []byte("test"), data)
x.Put(data)
}()
}
time.Sleep(time.Second)
}