理论
-
cpu的特点是三级别缓存 L1,l2,l3 其中数字越大缓存空间越大,但是速度就会越慢
-
L1 的缓存层只有 64b ,所以这里涉及一个伪缓存的问题,就是两个 goroutine 互相抢夺,常见 避免方法就是 避免通用一个缓存页即可。
BenchmarkFalseSharing_Bad BenchmarkFalseSharing_Bad-10 3 424660431 ns/op
BenchmarkFalseSharing_Good BenchmarkFalseSharing_Good-10 9 115935042 ns/op
代码,跑跑看我有没有骗你
package main
import (
"runtime"
"sync"
"sync/atomic"
"testing"
)
const (
loops = 100_000_00
)
// 坏例:两个计数紧挨着,易落同一行
type bad struct {
a int64
b int64
}
// 好例:让字段跨行(8B + 56B = 64B)
type shard struct {
n int64
_ [7]int64 // 7*8=56B 填充,保证 n 独占一行
}
func BenchmarkFalseSharing_Bad(b *testing.B) {
var x bad
b.ResetTimer()
for i := 0; i < b.N; i++ {
var wg sync.WaitGroup
wg.Add(2)
go func() {
for k := 0; k < loops; k++ {
atomic.AddInt64(&x.a, 1)
}
wg.Done()
}()
go func() {
for k := 0; k < loops; k++ {
atomic.AddInt64(&x.b, 1)
}
wg.Done()
}()
wg.Wait()
}
}
func BenchmarkFalseSharing_Good(b *testing.B) {
var x struct {
a shard
b shard
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
var wg sync.WaitGroup
wg.Add(2)
go func() {
for k := 0; k < loops; k++ {
atomic.AddInt64(&x.a.n, 1)
}
wg.Done()
}()
go func() {
for k := 0; k < loops; k++ {
atomic.AddInt64(&x.b.n, 1)
}
wg.Done()
}()
wg.Wait()
}
}
func init() { runtime.GOMAXPROCS(runtime.NumCPU()) }