Confluent Kafka Protobuf 测试(rest api)

38 阅读2分钟

protobuf协议简介

Protocol Buffers,是Google公司开发的一种数据描述语言,类似于XML能够将结构化数据序列化,可用于数据存储、通信协议等方面。它不依赖于语言和平台并且可扩展性极强。

Protocol buffers在序列化结构化数据方面有许多优点:

  1. 更简单;
  2. 数据描述文件只需原来的1/10至1/3;
  3. 解析速度是原来的20倍至100倍;
  4. 减少了二义性;
  5. 生成了更容易在编程中使用的数据访问;
  6. 支持多种编程语言;

启动本地服务

1.启动本地zookeeper服务,端口2181

bin/zookeeper-server-start etc/kafka/zookeeper.properties

2.启动单节点的kafka服务,端口9092

bin/kafka-server-start etc/kafka/server.properties

3.启动schema-registry服务,端口8081

bin/schema-registry-start etc/schema-registry/schema-registry.properties

4.启动rest服务,端口8082

bin/kafka-rest-start etc/kafka-rest/kafka-rest.properties

测试

官网文档地址 这里我们使用功能更丰富REST Proxy API v3进行测试。

1 获取cluster_id

header = {"Content-Type": "application/json"}


def test_clusters():
    host = "http://localhost:8082/v3/clusters"
    r = requests.get(host, headers=header)
    print("=================")
    for data in json.loads(r.content.decode("utf-8"))["data"]:
        print(data['cluster_id'])

2 创建topic

def test_add_topic():
    host = "http://localhost:8082/v3/clusters/1CprZ1uLRMaWqGrZiPopCQ/topics"
    data = {"topic_name": "pb_test_topic_v2"}
    r = requests.post(host, data=json.dumps(data), headers=header)
    print("=================")
    print(r.status_code)
    print(r.content.decode("utf-8"))


def test_list_topics():
    host = "http://localhost:8082/v3/clusters/1CprZ1uLRMaWqGrZiPopCQ/topics"
    r = requests.get(host, headers=header)
    print("=================")
    for data in json.loads(r.content.decode("utf-8"))["data"]:
        print(data['topic_name'])

3 注册schema

schema_header = {"Content-Type": "application/vnd.schemaregistry.v1+json"}

def test_registry_new_schema():
    host = "http://localhost:8081/subjects/pb_test_topic_v2-value/versions"
    data = {"schemaType": "PROTOBUF", "references": [],
            "schema": """
                syntax = "proto2";

                package com.testcompany.schema;

                option java_multiple_files = true;
                option go_package = "./pb_test";
                option java_package = "com.testcompany.schema.feature";

                message UserSample {
                    optional int32 label = 1;
                    optional string impid = 2;
                    optional int64 userid = 3;
                    optional int64 ts = 4;
                    optional int32 weekly_act = 5;
                }
                """
    }
    data_str = json.dumps(data)
    r = requests.post(host, data=data_str, headers=schema_header)
    print("=================")
    print(r.content.decode('utf-8'))


def test_list_schema_subjects():
    host = "http://localhost:8081/subjects"
    r = requests.get(host, headers=schema_header)
    print("=================")
    print(r.content.decode('utf-8'))
    

def test_get_subject_id():
    host = " http://localhost:8081/subjects/pb_test_topic_v2-value/versions/latest"
    r = requests.get(host, headers=schema_header)
    print("=================")
    print(json.loads(r.content.decode('utf-8'))['id'])

4 生产数据

文档里面没看到使用v3发送protobuf数据的方式,此处我们使用v2的api发送数据

def test_produce_protobuf_topic_record():
    host = "http://localhost:8082/topics/pb_test_topic_v2/"
    records = {
        "records": [{"value": {"label": 9, "impid": "impid666", "userid": 9, "ts": 7, "weekly_act": 9}}],
        "value_schema_id": 1
    }
    records_str = json.dumps(records)

    r = requests.post(host, headers={"Content-Type": "application/vnd.kafka.protobuf.v2+json"}, data=records_str)
    print("=================")
    print(r.status_code)
    print(r.content.decode('utf-8'))

5 消费数据

protoc -I=. --proto_path=. --python_out=. *.proto

from confluent_kafka.serialization import SerializationContext, MessageField
from confluent_kafka.schema_registry.protobuf import ProtobufDeserializer
from kafka import KafkaConsumer

from my_confluent_kafka.pb_test.UserSample_pb2 import UserSample


if __name__ == '__main__':

    protobuf_deserializer = ProtobufDeserializer(UserSample,  {'use.deprecated.format': False})

    consumer = KafkaConsumer(
        "pb_test_topic_v2",
        bootstrap_servers=["localhost:9092"],
        auto_offset_reset="earliest"
    )

    for msg in consumer:
        value = msg.value.decode('utf-8')
        user = protobuf_deserializer(msg.value, SerializationContext("pb_test_topic_v2", MessageField.VALUE))

        if user is not None:
            print("User record {}:\n"
                  .format(user))
    exit(0)