Kafka - Sarama(go 客户端)

7,093 阅读3分钟

Sarama github 地址 : github.com/Shopify/sar…

kafka相关文章分享:

kafka 快速开始

Kafka -- 内部原理

Kafka ISR 副本同步机制

kafka实战分享,微信文章

《深入理解Kafka:核心设计与实践原理》

1、前期环境

1、zookeeper环境, version : 3.4.14

➜  software ls |grep zoo
zookeeper-3.4.14
zookeeper-3.4.14.tar.gz

2、kafka环境, version :kafka_2.11-0.11.0.1

➜  software ls |grep kafka
kafka_2// 机器2
kafka_2.11-0.11.0.1// 机器1
kafka_2.11-0.11.0.1.tgz
kafka_3// 机器3

启动三台,分别是"localhost:9092,localhost:9092,localhost:9092" ,记住改一下配置文件server.properties,顺便改一下日志文件位置。基本就启动起来了

新建topic, 3分区,2副本

/Users/dong/software/zookeeper-3.4.14/bin/zkServer.sh start
/Users/dong/software/kafka_1/bin/kafka-server-start.sh  -daemon /Users/dong/software/kafka_1/config/server.properties
/Users/dong/software/kafka_2/bin/kafka-server-start.sh  -daemon /Users/dong/software/kafka_2/config/server.properties
/Users/dong/software/kafka_3/bin/kafka-server-start.sh  -daemon /Users/dong/software/kafka_3/config/server.properties

启动成功后

创建三个分区,两个备份。

bin/kafka-topics.sh --create --topic user_event_dev --partitions 3  --replication-factor 2 --zookeeper localhost:2181

其中每个查看topic

bin/kafka-topics.sh --list --zookeeper localhost:2181
user_event_dev

2、down源码 , 最新源码,下载依赖

curl https://codeload.github.com/Shopify/sarama/zip/master -o ./sarama-master

cd ./sarama-master

go mod vendor

2、快速开始

// no care
const (
	topic = "user_event_dev"
	group = "user_event_dev_1"
	host  = "localhost:9092,localhost:9093,localhost:9094"
)

func getAddr() []string {
	return strings.Split(host, ",")
}
func panicError(err error) {
	if err != nil {
		panic(err)
	}
}
// handler,核心的消费者业务实现
type exampleConsumerGroupHandler struct{}
func (exampleConsumerGroupHandler) Setup(s sarama.ConsumerGroupSession) error {
	fmt.Println("set up ....") // 当连接完毕的时候会通知这个,start
	return nil
}
func (exampleConsumerGroupHandler) Cleanup(s sarama.ConsumerGroupSession) error {
	fmt.Println("Cleanup") // end,当这一次消费完毕,会通知,这里最好commit
	return nil
}
func (h exampleConsumerGroupHandler) ConsumeClaim(sess sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error { // consume
	for msg := range claim.Messages() { // 接受topic消息
		fmt.Printf("[Consumer] Message topic:%q partition:%d offset:%d add:%d\n", msg.Topic, msg.Partition, msg.Offset, claim.HighWaterMarkOffset()-msg.Offset)
		sess.MarkMessage(msg, "") // 必须设置这个,不然你的偏移量无法提交。
	}
	return nil
}

func main() {
	go func() {
		http.ListenAndServe(":8888", http.DefaultServeMux) // pprof
	}()
	sarama.Logger = log.New(os.Stderr, "[SARAMA] ", log.LstdFlags) // 可以使用自定义日志存储,全局
	wg := sync.WaitGroup{}
	wg.Add(2)
	producer(&wg) // 生产者
	consumer(&wg)// 消费者
	wg.Wait()
}

func consumer(wg *sync.WaitGroup) {
	go func() {
		defer wg.Done()
    client, err := sarama.NewConsumerGroup(getAddr(), group, newKafkaConfig())// broker_ip,消费者组(broker记录偏移量),kafka 配置设置
		panicError(err)
		for { // for循环的目的是因为存在重平衡,他会重新启动
			handler := new(exampleConsumerGroupHandler) // 必须传递一个handler
			err = client.Consume(context.TODO(), []string{topic}, handler) // consume 操作,死循环。exampleConsumerGroupHandler的ConsumeClaim不允许退出,也就是操作到完毕。
			panicError(err)
			fmt.Println("re  balance") 
		}
	}()
}

func newKafkaConfig() *sarama.Config {
	config := sarama.NewConfig()
	config.ClientID = "sarama_demo" // 
	config.Version = sarama.V0_11_0_1 // kafka server的版本号
	config.Producer.Return.Successes = true // sync必须设置这个
	config.Producer.RequiredAcks = sarama.WaitForAll // 也就是等待foolower同步,才会返回
	config.Producer.Return.Errors = true
	config.Consumer.Return.Errors = true
	config.Metadata.Full = false // 不用拉取全部的信息
	config.Consumer.Offsets.AutoCommit.Enable = true // 自动提交偏移量,默认开启,说时候,我没找到手动提交。
	config.Consumer.Offsets.AutoCommit.Interval = time.Second // 这个看业务需求,commit提交频率,不然容易down机后造成重复消费。
	config.Consumer.Offsets.Initial = sarama.OffsetOldest // 从最开始的地方消费,业务中看有没有需求,新业务重跑topic。
	config.Consumer.Group.Rebalance.Strategy = sarama.BalanceStrategyRange // rb策略,默认就是range
	return config
}

func producer(wg *sync.WaitGroup) {
	go func() {
		config := newKafkaConfig()
		defer wg.Done()
		producer, err := sarama.NewSyncProducer(getAddr(), config) // producer,就很简单了
		panicError(err)
		buffer := bytes.Buffer{}
		for {
			buffer.Reset()
			time.Sleep(time.Millisecond * 100)
			buffer.WriteString(fmt.Sprintf("curent: %v", time.Now().UnixNano()))
			partition, offset, err := producer.SendMessage(&sarama.ProducerMessage{
				Topic: topic, // 需要指定topic
				Value: sarama.ByteEncoder(buffer.Bytes()), // value,对于kafka来说不推荐传递key,因为容易造成分区不均匀。
			})
			panicError(err)
			fmt.Fprintf(os.Stdout, "[Producer] partition: %v, offset: %v, topic: %v\n", partition, offset, topic)
		}
	}()
}

关于 sarama的一些参数设置

func NewConfig() *Config {
	c := &Config{}

	c.Admin.Retry.Max = 5
	c.Admin.Retry.Backoff = 100 * time.Millisecond
	c.Admin.Timeout = 3 * time.Second

	c.Net.MaxOpenRequests = 5
	c.Net.DialTimeout = 30 * time.Second
	c.Net.ReadTimeout = 30 * time.Second
	c.Net.WriteTimeout = 30 * time.Second
	c.Net.SASL.Handshake = true
	c.Net.SASL.Version = SASLHandshakeV0

	c.Metadata.Retry.Max = 3
	c.Metadata.Retry.Backoff = 250 * time.Millisecond
	c.Metadata.RefreshFrequency = 10 * time.Minute
	c.Metadata.Full = true

	c.Producer.MaxMessageBytes = 1000000
	c.Producer.RequiredAcks = WaitForLocal
	c.Producer.Timeout = 10 * time.Second
	c.Producer.Partitioner = NewHashPartitioner
	c.Producer.Retry.Max = 3
	c.Producer.Retry.Backoff = 100 * time.Millisecond
	c.Producer.Return.Errors = true
	c.Producer.CompressionLevel = CompressionLevelDefault

	c.Consumer.Fetch.Min = 1
	c.Consumer.Fetch.Default = 1024 * 1024
	c.Consumer.Retry.Backoff = 2 * time.Second
	c.Consumer.MaxWaitTime = 250 * time.Millisecond // 这里是每次消费者请求broker时,拉取消息的最大超时时间,比如我要拉取配置的单次最小是1m的数据,此时broker在250ms内并未发现1m的数据产生,则broker发现超时后,有多少数据返回多少数据。
	c.Consumer.MaxProcessingTime = 100 * time.Millisecond
	c.Consumer.Return.Errors = false
	c.Consumer.Offsets.AutoCommit.Enable = true
	c.Consumer.Offsets.AutoCommit.Interval = 1 * time.Second
	c.Consumer.Offsets.Initial = OffsetNewest
	c.Consumer.Offsets.Retry.Max = 3

	c.Consumer.Group.Session.Timeout = 10 * time.Second
	c.Consumer.Group.Heartbeat.Interval = 3 * time.Second
	c.Consumer.Group.Rebalance.Strategy = BalanceStrategyRange
	c.Consumer.Group.Rebalance.Timeout = 60 * time.Second
	c.Consumer.Group.Rebalance.Retry.Max = 4
	c.Consumer.Group.Rebalance.Retry.Backoff = 2 * time.Second

	c.ClientID = defaultClientID
	c.ChannelBufferSize = 256
	c.Version = MinVersion
	c.MetricRegistry = metrics.NewRegistry()

	return c
}