confluent-kafka集群搭建

2,104 阅读2分钟

1.   什么是confluent-kafka?

Confluent Platform 是一个流数据平台,能够组织管理来自不同数据源的数据,拥有稳定高效的系统。
Confluent Platform 不仅提供数据传输的系统, 还提供所有的工具:连接数据源的工具,应用, 以及数据接收。 
Confluent是基于Kafka构造的软件,他有一个企业版 试用30天,一个是开源版

这里我们使用开源版的 confluent-kafka, 开源版包括以下组件:

完整工具以免费试用组件请见官网:  www.confluent.io/download

组件功能介绍:  www.cnblogs.com/dadadecheng…

2.  开始搭建 confluent-kafka 

2.1 安装最新版本 confluent-kafka 

yum install curl which -y
rpm --import https://packages.confluent.io/rpm/5.3/archive.key

配置yum源

cat > /etc/yum.repos.d/confluent.repo <<EOF
[Confluent.dist]
name=Confluent repository (dist)
baseurl=https://packages.confluent.io/rpm/5.3/7
gpgcheck=1
gpgkey=https://packages.confluent.io/rpm/5.3/archive.key
enabled=1

[Confluent]
name=Confluent repository
baseurl=https://packages.confluent.io/rpm/5.3
gpgcheck=1
gpgkey=https://packages.confluent.io/rpm/5.3/archive.key
enabled=1
EOF

Confluent Platform using only Confluent Community components:

yum clean all &&  yum install confluent-community-2.12 -y

2.2  安装jdk1.8

去oracle官网下载jdk, 上传到服务器,执行下面命令安装:

rpm -ivh jdk-8u231-linux-x64.rpm

oracle jdk1.8下载页面:  www.oracle.com/technetwork…

2.3  配置zookeeper

vim /etc/kafka/zookeeper.properties

集群配置

dataDir=/data/zookeeper/data/
dataLogDir=/data/zookeeper/logs/
maxClientCnxns=500
clientPort=2181
initLimit=5
syncLimit=2
server.1=xxx:2888:3888
server.2=xxx:2888:3888
server.3=xxx:2888:3888
autopurge.snapRetainCount=3
autopurge.purgeInterval=24

注*  zoo1 zoo2 zoo3 为主机名,请提前配置好hosts

echo "1" > /data/zookeeper/data/myid   #请在zoo1 机器上执行echo "2" > /data/zookeeper/data/myid   #请在zoo2 机器上执行echo "3" > /data/zookeeper/data/myid   #请在zoo3 机器上执行启

设置目录权限

chmod 777 -R /data/zookeeper/logs/
chmod 777 -R /data/zookeeper/data/

设置开机启动并启动zookeeper

systemctl enable confluent-zookeeper && systemctl start confluent-zookeeper

查看状态

systemctl status confluent-zookeeper

查看日志

tail -f /var/log/messages

2.3  配置kafka

vim /etc/kafka/server.properties  #修改以下2个选项 zoo

zookeeper.connect=zoo1:2181,zoo2:2181,zoo3:2181broker.id.generation.enable=true

设置开机启动并启动kafka

systemctl enable confluent-kafka && systemctl start confluent-kafka

查看状态

systemctl status confluent-kafka

查看日志

tail -f /var/log/messages

3. 测试kafka生产消息

3.1 创建topic 

kafka-topics --create --bootstrap-server zoo1:9092,zoo2:9092,zoo3:9092 --replication-factor 1 --partitions 5 --topic kafkatest

3.2 生产消息

kafka-console-producer --broker-list zoo1:9092,zoo2:9092,zoo3:9092 --topic kafkatest

3.3 消费消息

kafka-console-consumer --bootstrap-server zoo1:9092,zoo2:9092,zoo3:9092 --topic kafkatest --from-beginning

4. 使用golang demo测试生产消息消耗的时间

package main

import (
	"fmt"	
	"log"
	"time"
        "github.com/Shopify/sarama"
)

func main()  {
    var address = []string{"ip1:9092","ip2:9092","ip3:9092"}
    producer(address)
}

func producer(address []string) {
	config := sarama.NewConfig()
	config.Producer.Return.Successes = true
	config.Producer.Timeout = 5 * time.Second
	p, err := sarama.NewSyncProducer(address, config)
	if err != nil {
		log.Println(err)
	}
	defer p.Close()
	strKey := "key: "
	srcValue := "testKafka: test message, index=%d"
	log.Println("start")
	for i := 0; i < 10000; i++ {
		value := fmt.Sprintf(srcValue, i)
		msg := &sarama.ProducerMessage{
			Key:   sarama.StringEncoder(strKey),
			Topic: kafkatest,
			Value: sarama.ByteEncoder(value),
		}
		part, offset, err := p.SendMessage(msg)
		if err != nil {
			log.Println(err, value, part, offset)
		}
	}
	log.Println("end")
}

4.1 10000和100000条所消耗的时间

1万条:

10万条:

速度还是很快的!

5. 编写消费者

package main

import (
	"fmt"
	"github.com/Shopify/sarama"
	"sync"
)

func main() {
	consumer, err := sarama.NewConsumer([]string{"ip:9092"}, nil)
	if err != nil {
		fmt.Println(err)
	}
	fmt.Println("1")
	partitionList, err := consumer.Partitions("test-golang-kafka") 
	if err != nil {
		fmt.Println(err)
	}
	fmt.Println("2")
	fmt.Println(partitionList)
	var wg sync.WaitGroup
	wg.Add(1)
	for partition := range partitionList { // 遍历所有的分区
		// 针对每个分区创建一个对应的分区消费者
		pc, err := consumer.ConsumePartition("test-golang-kafka", int32(partition), sarama.OffsetNewest)
		if err != nil {
			fmt.Printf("failed to start consumer for partition %d,err:%v\n", partition, err)
			return
		}

		defer pc.AsyncClose()
		// 异步从每个分区消费信息
		go func(sarama.PartitionConsumer) {
			defer wg.Done()
			for msg := range pc.Messages() {
				fmt.Printf("Partition:%d Offset:%d Key:%v Value:%v\n", msg.Partition, msg.Offset, msg.Key, msg.Value)
			}
		}(pc)
	}
	wg.Wait()
}