1. 什么是confluent-kafka?
Confluent Platform 是一个流数据平台,能够组织管理来自不同数据源的数据,拥有稳定高效的系统。
Confluent Platform 不仅提供数据传输的系统, 还提供所有的工具:连接数据源的工具,应用, 以及数据接收。
Confluent是基于Kafka构造的软件,他有一个企业版 试用30天,一个是开源版
这里我们使用开源版的 confluent-kafka, 开源版包括以下组件:
完整工具以免费试用组件请见官网: www.confluent.io/download
组件功能介绍: www.cnblogs.com/dadadecheng…
2. 开始搭建 confluent-kafka
2.1 安装最新版本 confluent-kafka
yum install curl which -y
rpm --import https://packages.confluent.io/rpm/5.3/archive.key
配置yum源
cat > /etc/yum.repos.d/confluent.repo <<EOF
[Confluent.dist]
name=Confluent repository (dist)
baseurl=https://packages.confluent.io/rpm/5.3/7
gpgcheck=1
gpgkey=https://packages.confluent.io/rpm/5.3/archive.key
enabled=1
[Confluent]
name=Confluent repository
baseurl=https://packages.confluent.io/rpm/5.3
gpgcheck=1
gpgkey=https://packages.confluent.io/rpm/5.3/archive.key
enabled=1
EOF
Confluent Platform using only Confluent Community components:
yum clean all && yum install confluent-community-2.12 -y
2.2 安装jdk1.8
去oracle官网下载jdk, 上传到服务器,执行下面命令安装:
rpm -ivh jdk-8u231-linux-x64.rpm
oracle jdk1.8下载页面: www.oracle.com/technetwork…
2.3 配置zookeeper
vim /etc/kafka/zookeeper.properties
集群配置
dataDir=/data/zookeeper/data/
dataLogDir=/data/zookeeper/logs/
maxClientCnxns=500
clientPort=2181
initLimit=5
syncLimit=2
server.1=xxx:2888:3888
server.2=xxx:2888:3888
server.3=xxx:2888:3888
autopurge.snapRetainCount=3
autopurge.purgeInterval=24
注* zoo1 zoo2 zoo3 为主机名,请提前配置好hosts
echo "1" > /data/zookeeper/data/myid #请在zoo1 机器上执行echo "2" > /data/zookeeper/data/myid #请在zoo2 机器上执行echo "3" > /data/zookeeper/data/myid #请在zoo3 机器上执行启
设置目录权限
chmod 777 -R /data/zookeeper/logs/
chmod 777 -R /data/zookeeper/data/
设置开机启动并启动zookeeper
systemctl enable confluent-zookeeper && systemctl start confluent-zookeeper
查看状态
systemctl status confluent-zookeeper
查看日志
tail -f /var/log/messages
2.3 配置kafka
vim /etc/kafka/server.properties #修改以下2个选项 zoo
zookeeper.connect=zoo1:2181,zoo2:2181,zoo3:2181broker.id.generation.enable=true
设置开机启动并启动kafka
systemctl enable confluent-kafka && systemctl start confluent-kafka
查看状态
systemctl status confluent-kafka
查看日志
tail -f /var/log/messages
3. 测试kafka生产消息
3.1 创建topic
kafka-topics --create --bootstrap-server zoo1:9092,zoo2:9092,zoo3:9092 --replication-factor 1 --partitions 5 --topic kafkatest
3.2 生产消息
kafka-console-producer --broker-list zoo1:9092,zoo2:9092,zoo3:9092 --topic kafkatest
3.3 消费消息
kafka-console-consumer --bootstrap-server zoo1:9092,zoo2:9092,zoo3:9092 --topic kafkatest --from-beginning
4. 使用golang demo测试生产消息消耗的时间
package main
import (
"fmt"
"log"
"time"
"github.com/Shopify/sarama"
)
func main() {
var address = []string{"ip1:9092","ip2:9092","ip3:9092"}
producer(address)
}
func producer(address []string) {
config := sarama.NewConfig()
config.Producer.Return.Successes = true
config.Producer.Timeout = 5 * time.Second
p, err := sarama.NewSyncProducer(address, config)
if err != nil {
log.Println(err)
}
defer p.Close()
strKey := "key: "
srcValue := "testKafka: test message, index=%d"
log.Println("start")
for i := 0; i < 10000; i++ {
value := fmt.Sprintf(srcValue, i)
msg := &sarama.ProducerMessage{
Key: sarama.StringEncoder(strKey),
Topic: kafkatest,
Value: sarama.ByteEncoder(value),
}
part, offset, err := p.SendMessage(msg)
if err != nil {
log.Println(err, value, part, offset)
}
}
log.Println("end")
}
4.1 10000和100000条所消耗的时间
1万条:
10万条:
速度还是很快的!
5. 编写消费者
package main
import (
"fmt"
"github.com/Shopify/sarama"
"sync"
)
func main() {
consumer, err := sarama.NewConsumer([]string{"ip:9092"}, nil)
if err != nil {
fmt.Println(err)
}
fmt.Println("1")
partitionList, err := consumer.Partitions("test-golang-kafka")
if err != nil {
fmt.Println(err)
}
fmt.Println("2")
fmt.Println(partitionList)
var wg sync.WaitGroup
wg.Add(1)
for partition := range partitionList { // 遍历所有的分区
// 针对每个分区创建一个对应的分区消费者
pc, err := consumer.ConsumePartition("test-golang-kafka", int32(partition), sarama.OffsetNewest)
if err != nil {
fmt.Printf("failed to start consumer for partition %d,err:%v\n", partition, err)
return
}
defer pc.AsyncClose()
// 异步从每个分区消费信息
go func(sarama.PartitionConsumer) {
defer wg.Done()
for msg := range pc.Messages() {
fmt.Printf("Partition:%d Offset:%d Key:%v Value:%v\n", msg.Partition, msg.Offset, msg.Key, msg.Value)
}
}(pc)
}
wg.Wait()
}