protobuf协议简介
Protocol Buffers,是Google公司开发的一种数据描述语言,类似于XML能够将结构化数据序列化,可用于数据存储、通信协议等方面。它不依赖于语言和平台并且可扩展性极强。
Protocol buffers在序列化结构化数据方面有许多优点:
- 更简单;
- 数据描述文件只需原来的1/10至1/3;
- 解析速度是原来的20倍至100倍;
- 减少了二义性;
- 生成了更容易在编程中使用的数据访问;
- 支持多种编程语言;
启动本地服务
1.启动本地zookeeper服务,端口2181
bin/zookeeper-server-start etc/kafka/zookeeper.properties
2.启动单节点的kafka服务,端口9092
bin/kafka-server-start etc/kafka/server.properties
3.启动schema-registry服务,端口8081
bin/schema-registry-start etc/schema-registry/schema-registry.properties
4.启动rest服务,端口8082
bin/kafka-rest-start etc/kafka-rest/kafka-rest.properties
测试
官网文档地址 这里我们使用功能更丰富REST Proxy API v3进行测试。
1 获取cluster_id
header = {"Content-Type": "application/json"}
def test_clusters():
host = "http://localhost:8082/v3/clusters"
r = requests.get(host, headers=header)
print("=================")
for data in json.loads(r.content.decode("utf-8"))["data"]:
print(data['cluster_id'])
2 创建topic
def test_add_topic():
host = "http://localhost:8082/v3/clusters/1CprZ1uLRMaWqGrZiPopCQ/topics"
data = {"topic_name": "pb_test_topic_v2"}
r = requests.post(host, data=json.dumps(data), headers=header)
print("=================")
print(r.status_code)
print(r.content.decode("utf-8"))
def test_list_topics():
host = "http://localhost:8082/v3/clusters/1CprZ1uLRMaWqGrZiPopCQ/topics"
r = requests.get(host, headers=header)
print("=================")
for data in json.loads(r.content.decode("utf-8"))["data"]:
print(data['topic_name'])
3 注册schema
schema_header = {"Content-Type": "application/vnd.schemaregistry.v1+json"}
def test_registry_new_schema():
host = "http://localhost:8081/subjects/pb_test_topic_v2-value/versions"
data = {"schemaType": "PROTOBUF", "references": [],
"schema": """
syntax = "proto2";
package com.testcompany.schema;
option java_multiple_files = true;
option go_package = "./pb_test";
option java_package = "com.testcompany.schema.feature";
message UserSample {
optional int32 label = 1;
optional string impid = 2;
optional int64 userid = 3;
optional int64 ts = 4;
optional int32 weekly_act = 5;
}
"""
}
data_str = json.dumps(data)
r = requests.post(host, data=data_str, headers=schema_header)
print("=================")
print(r.content.decode('utf-8'))
def test_list_schema_subjects():
host = "http://localhost:8081/subjects"
r = requests.get(host, headers=schema_header)
print("=================")
print(r.content.decode('utf-8'))
def test_get_subject_id():
host = " http://localhost:8081/subjects/pb_test_topic_v2-value/versions/latest"
r = requests.get(host, headers=schema_header)
print("=================")
print(json.loads(r.content.decode('utf-8'))['id'])
4 生产数据
文档里面没看到使用v3发送protobuf数据的方式,此处我们使用v2的api发送数据
def test_produce_protobuf_topic_record():
host = "http://localhost:8082/topics/pb_test_topic_v2/"
records = {
"records": [{"value": {"label": 9, "impid": "impid666", "userid": 9, "ts": 7, "weekly_act": 9}}],
"value_schema_id": 1
}
records_str = json.dumps(records)
r = requests.post(host, headers={"Content-Type": "application/vnd.kafka.protobuf.v2+json"}, data=records_str)
print("=================")
print(r.status_code)
print(r.content.decode('utf-8'))
5 消费数据
protoc -I=. --proto_path=. --python_out=. *.proto
from confluent_kafka.serialization import SerializationContext, MessageField
from confluent_kafka.schema_registry.protobuf import ProtobufDeserializer
from kafka import KafkaConsumer
from my_confluent_kafka.pb_test.UserSample_pb2 import UserSample
if __name__ == '__main__':
protobuf_deserializer = ProtobufDeserializer(UserSample, {'use.deprecated.format': False})
consumer = KafkaConsumer(
"pb_test_topic_v2",
bootstrap_servers=["localhost:9092"],
auto_offset_reset="earliest"
)
for msg in consumer:
value = msg.value.decode('utf-8')
user = protobuf_deserializer(msg.value, SerializationContext("pb_test_topic_v2", MessageField.VALUE))
if user is not None:
print("User record {}:\n"
.format(user))
exit(0)