kafka主流golang客户端sdk多维度对比(IBM、confluent、segmentio)

904 阅读4分钟

大家好,这里是小奏,觉得文章不错可以关注公众号小奏技术

背景

之前搭建基于danielqsj/kafka-exporter搭建了kafka相关的监控

发现部分监控指标不满足自己的需求,就打算基于kafka-exporter二开一下。

简单看了一下danielqsj/kafka-exporter源码发现danielqsj/kafka-exporter使用的kafka golang sdk竟然不是官方的,是Shopifysarama

我还以为kafka官方会有一个golangsdk,但是竟然完全没有,全是三方组织自己开源出来的

Shopify sarama

之前说过danielqsj/kafka-exporter使用的是Shopifysarama

不过这个仓库不知道什么时候给移动到IBM组织下了

所以你这里就算点击https://github.com/Shopify/sarama 链接也会自动跳转到IBMsarama

IBM sarama

IBM saramaShopify sarama本质来说是同一个,所以上面没有详细讨论Shopify sarama

这里重点来看看IBM sarama的项目情况

IBM saramastar数量目前是所有开源kafka golang sdk中最多的,有11.6k

已知在github上使用了该sdk的有3.5k左右

项目维护非常活跃

使用

首先引入依赖

go get github.com/IBM/sarama

发送消息

func TestKafkaSendMessage(t *testing.T) {

	// 生产者配置
	config := sarama.NewConfig()
	config.Producer.Return.Successes = true
	config.Producer.Return.Errors = true
	config.Producer.RequiredAcks = sarama.WaitForAll
	config.Producer.Retry.Max = 5

	// 创建生产者
	producer, err := sarama.NewSyncProducer(kafkaBrokers, config)
	if err != nil {
		log.Fatalf("Error creating producer: %v", err)
	}
	defer producer.Close()

	// 创建消息
	msg := &sarama.ProducerMessage{
		Topic: "xiao-zou-topic",
		Value: sarama.StringEncoder("Hello, Kafka!"),
	}

	// 发送消息
	partition, offset, err := producer.SendMessage(msg)
	if err != nil {
		log.Printf("Error sending message: %v", err)
	} else {
		log.Printf("Message sent successfully! Partition: %d, Offset: %d", partition, offset)
	}

}

消费消息

 func TestKafkaConsumerMessage(t *testing.T) {

	keepRunning := true
	log.Println("Starting a new Sarama consumer")

	config := sarama.NewConfig()

	config.Consumer.Group.Rebalance.GroupStrategies = []sarama.BalanceStrategy{sarama.NewBalanceStrategyRoundRobin()}
	// 消费最早的消息
	config.Consumer.Offsets.Initial = sarama.OffsetOldest

	consumer := Consumer{
		ready: make(chan bool),
	}

	ctx, cancel := context.WithCancel(context.Background())
	client, err := sarama.NewConsumerGroup(kafkaBrokers, group, config)
	if err != nil {
		log.Panicf("Error creating consumer group client: %v", err)
	}

	consumptionIsPaused := false
	wg := &sync.WaitGroup{}
	wg.Add(1)
	go func() {
		defer wg.Done()
		for {

			if err := client.Consume(ctx, strings.Split(topics, ","), &consumer); err != nil {
				if errors.Is(err, sarama.ErrClosedConsumerGroup) {
					return
				}
				log.Panicf("Error from consumer: %v", err)
			}
			// check if context was cancelled, signaling that the consumer should stop
			if ctx.Err() != nil {
				return
			}
			consumer.ready = make(chan bool)
		}
	}()

	<-consumer.ready
	log.Println("Sarama consumer up and running!...")

	sigusr1 := make(chan os.Signal, 1)
	signal.Notify(sigusr1, syscall.SIGUSR1)

	sigterm := make(chan os.Signal, 1)
	signal.Notify(sigterm, syscall.SIGINT, syscall.SIGTERM)

	for keepRunning {
		select {
		case <-ctx.Done():
			log.Println("terminating: context cancelled")
			keepRunning = false
		case <-sigterm:
			log.Println("terminating: via signal")
			keepRunning = false
		case <-sigusr1:
			toggleConsumptionFlow(client, &consumptionIsPaused)
		}
	}
	cancel()
	wg.Wait()
	if err = client.Close(); err != nil {
		log.Panicf("Error closing client: %v", err)
	}

}

func toggleConsumptionFlow(client sarama.ConsumerGroup, isPaused *bool) {
	if *isPaused {
		client.ResumeAll()
		log.Println("Resuming consumption")
	} else {
		client.PauseAll()
		log.Println("Pausing consumption")
	}

	*isPaused = !*isPaused
}

type Consumer struct {
	ready chan bool
}

func (consumer *Consumer) Setup(sarama.ConsumerGroupSession) error {
	close(consumer.ready)
	return nil
}

func (consumer *Consumer) Cleanup(sarama.ConsumerGroupSession) error {
	return nil
}

func (consumer *Consumer) ConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error {
	for {
		select {
		case message, ok := <-claim.Messages():
			if !ok {
				log.Printf("message channel was closed")
				return nil
			}
			log.Printf("Message claimed: value = %s, timestamp = %v, topic = %s", string(message.Value), message.Timestamp, message.Topic)
			session.MarkMessage(message, "")

		case <-session.Context().Done():
			return nil
		}
	}
}

管理kafka集群

如果我们想要管理kafka集群,也可以创建一个admin client

admin, err := sarama.NewClusterAdmin(kafkaBrokers, config)

confluent-kafka-go

confluent-kafka-goconfluent公司开发的kafka golang sdk,由于confluent公司维护

star数量在4.7k左右, github公开仓库使用该sdk的数量在6.2k左右

项目维护也是比较活跃的

confluent-kafka-gostar数量比IBM sarama少,但是使用量比IBM sarama

IBM sarama是纯golang编写的,confluent-kafka-go底层是cgo的方式实现的,所以confluent-kafka-go的性能更好

使用

首先引入依赖

go get github.com/confluentinc/confluent-kafka-go

发送消息

	p, err := kafka.NewProducer(&kafka.ConfigMap{"bootstrap.servers": "localhost"})
	if err != nil {
		panic(err)
	}

	defer p.Close()

	// Delivery report handler for produced messages
	go func() {
		for e := range p.Events() {
			switch ev := e.(type) {
			case *kafka.Message:
				if ev.TopicPartition.Error != nil {
					fmt.Printf("Delivery failed: %v\n", ev.TopicPartition)
				} else {
					fmt.Printf("Delivered message to %v\n", ev.TopicPartition)
				}
			}
		}
	}()

	// Produce messages to topic (asynchronously)
	topic := topics
	for _, word := range []string{"Welcome", "to", "the", "Confluent", "Kafka", "Golang", "client"} {
		p.Produce(&kafka.Message{
			TopicPartition: kafka.TopicPartition{Topic: &topic, Partition: kafka.PartitionAny},
			Value:          []byte(word),
		}, nil)
	}

	// Wait for message deliveries before shutting down
	p.Flush(15 * 1000)

消费消息

	c, err := kafka.NewConsumer(&kafka.ConfigMap{
		"bootstrap.servers": "localhost",
		"group.id":          group,
		"auto.offset.reset": "earliest",
	})

	if err != nil {
		panic(err)
	}

	err = c.SubscribeTopics([]string{topics}, nil)

	if err != nil {
		panic(err)
	}

	run := true

	for run {
		msg, err := c.ReadMessage(time.Second)
		if err == nil {
			fmt.Printf("Message on %s: %s\n", msg.TopicPartition, string(msg.Value))
		} else if !err.(kafka.Error).IsRetriable() {
			fmt.Printf("Consumer error: %v (%v)\n", err, msg)
		}
	}
	c.Close()

segmentio/kafka-go

segmentio/kafka-gosegmentio公司开发的kafka golang sdk

目前star数量是7.7k,公开的github使用仓库是7.8k。也是纯golang编写

项目活跃情况相比前面两个要低一点

具体的使用方式大同小异,这里就不演示了

总结

目前主流的kafka golang sdk有两个,一个是IBM sarama,一个是confluent-kafka-go

这里个个表格对比一下

项目stargithub仓库公开使用量语言实现优点缺点
IBM sarama11.6k3.5k纯golang编写跨平台能力更强,不依赖c语言,纯开源项目性能不如confluent-kafka-go,使用相对更复杂,因为更偏底层
confluent-kafka-go4.7k6.2k底层是cgo实现(即c)性能更好,学习使用成本更低商业化团队维护
segmentio/kafka-go7.7k7.8k纯golang编写使用更简洁、简单没有太明显的缺点

总的来说三个库各有千秋,还是要结合实际的生产情况来选择

比如需要最佳性能可以选择confluent-kafka-go,如果想要最简单入手使用可以选择segmentio/kafka-go。 如果想要更灵活更定制化,跨平台,可以选择IBM sarama

参考