vector调研

35 阅读6分钟

一、部署

# 131物理机上docker部署vector
docker run --name vector -v /opt/vector/config/vector.yaml:/etc/vector/vector.yaml:ro -p 8686:8686 -d harbor.hwwt2.com/ai-engineering/base/vector:0.52.0-debian


# 
vi /opt/vector/config/vector.yaml
# 
sources:
  kafka_source:
    type: kafka
    bootstrap_servers: 172.21.241.185:9092,172.21.241.186:9092,172.21.241.187:9092,172.21.241.188:9092,172.21.241.189:9092
    group_id: vector
    topics:
       - mid-uh-go-server
    decoding:
       codec: "json"
    auto_offset_reset: "latest"
transforms:
  message_normalizer:
    type: remap
    inputs:
      - kafka_source
    source: |
      # 如果存在message字段但不存在msg字段,将message重命名为msg
      if exists(.message) && !exists(.msg) {
        .msg = .message
        del(.message)
      }
      # 如果两个字段都不存在,创建一个默认的msg字段
      if !exists(.msg) && !exists(.message) {
        .msg = "No message field found"
      }

sinks:
  victorialogs_sink:
    type: http
    inputs:
       - message_normalizer
    uri: http://172.21.20.131:9481/insert/jsonline?_msg_field=msg&_time_field=timestamp
    compression: zstd
    encoding:
      codec: json
    framing:
      method: newline_delimited
    healthcheck:
      enabled: true

api:
 enabled: true
 address: 0.0.0.0:8686
 playground: true



二、vector与logstash性能对比

  1. 启动1个topic

- mid-uh-go-server                                                                                                                                           

5min 写入23,351,550条 Vector 的平均 吞吐量 约为 77,839 条/秒。cpumax为20c mem max为11g

追平kafka数据时, Vector 的平均 吞吐量 不变。 cpu 约为4c mem 3g

  1. 对比

说明: vector 和 logstash同时收 uh的日志, vector的速率是 logstash的2倍。

17:05-17:06之间的消费速率比较

vector的速率

logstash的速率

说明:logstash 每秒写入3w,vector每秒写入7w+




三.vector调优

sinks:
  victorialogs_sink:
    type: http
    inputs:
       - message_normalizer
    uri: http://172.21.20.131:9481/insert/jsonline?_msg_field=msg&_time_field=timestamp
    compression: zstd
    encoding:
      codec: json
    framing:
      method: newline_delimited
    healthcheck:
      enabled: true
    batch:
      max_events: 1000      
      timeout_secs: 5    
    buffer:
      type: "disk"      
      max_size: 536870976      
      when_full: "block"
    request:
      concurrency: 10
      timeout: 30
      retry_max_duration: 300
启用Vector自监控
sources:
   vector_metrics:
   type: internal_metrics
   scrape_interval_secs: 10
     
sinks:
  prometheus:
    type: prometheus_exporter
    inputs:
      - vector_metrics
    address: 0.0.0.0:9090



四、优化结果与结论

  1. 三个节点部署vector实例数量

172.21.20.133 上2个vector

172.21.20.132上4个vector

172.21.20.131上4个vector

  1. vector的配置文件

sources:
  kafka_source:
    type: kafka
    bootstrap_servers: 172.21.241.185:9092,172.21.241.186:9092,172.21.241.187:9092,172.21.241.188:9092,172.21.241.189:9092
    group_id: vector
    topics:
       - mid-uh-go-server
       - ec-kfcp-orderingcore-server
       - br-kfc-promotion-server
       - st-cpos2-server
       - ec-yumchina-d3-server
       - ec-kfcp-menu-server
       - ec-dso-open-server
       - br-kfcapp-ms-service
       - mid-payhub
       - ec-kfcp-orderingapi-server
       - mid-techub-sonic-server
       - ec-superapp-kfcappadvert-service
       - br-primeserver-service
       - ec-kfc-3rd-server
       - ec-d3-commonconf-server
       - ec-kfcpre-order-server
       - envoy_gateway_audit_ks-gateway
       - ec-kfcd-orderingcore-server
       - st-ph-ssporder-server
       - infra-s-innerflowcontrol-janus-nginx
       - st-coc-server
       - bd-datadrivenrewards-floatingbar-server
       - ec-kfc-menu-server
    decoding:
       codec: "json"
    auto_offset_reset: "latest"
transforms:
  message_normalizer:
    type: remap
    inputs:
      - kafka_source
    source: |
      # 如果存在message字段但不存在msg字段,将message重命名为msg
      if exists(.message) && !exists(.msg) {
        .msg = .message
        del(.message)
      }

sinks:
  victorialogs_sink:
    type: http
    inputs:
       - message_normalizer
    uri: http://172.21.20.131:9481/insert/jsonline?_msg_field=msg&_time_field=timestamp
    compression: zstd
    encoding:
      codec: json
    framing:
      method: newline_delimited
    healthcheck:
      enabled: true
    batch:
      max_events: 5000        # 每批最多 5000 条(根据 VictoriaLogs 能力调整)
      timeout_secs: 1         # 最多等 1 秒
    request:
      in_flight_limit: 100     # 允许 100 个并发 HTTP 请求
      timeout_secs: 10
      retry_attempts: 3
      rate_limit_duration_secs: 1
      rate_limit_num: 200000  # 每秒最多发 20w 条(按需调)
api:
 enabled: true
 address: 0.0.0.0:8686
 playground: true

3. ## 三个节点上分别部署nginx

通过9481 转发到各自节点的4个insert上。

  1. 每个节点上的vlinsert拆分成4个


# ###
#  131 和 132 拆解 victorialogs
# ###

172.21.20.131
172.21.20.132
172.21.20.133

# ###
# vlstorage 131
# 2个storage
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9491 -storageDataPath=/opt/victoria-logs-data1 -retentionPeriod=2d   > /opt/victoria/vlstorage01.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9492 -storageDataPath=/opt/victoria-logs-data2 -retentionPeriod=2d   > /opt/victoria/vlstorage02.log 2>&1 &

# vlinsert 131
# 4个 vlinsert
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9501 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576  > /opt/victoria/vlinsert9501.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9502 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576  > /opt/victoria/vlinsert9502.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9503 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576  > /opt/victoria/vlinsert9503.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9504 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576  > /opt/victoria/vlinsert9504.log 2>&1 &

# vlselect 131
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9471 -storageNode=172.21.20.131:9491,172.21.20.132:9491,172.21.20.133:9491  > /opt/victoria/vlselect.log 2>&1 &

# ###
# vlstorage 132
# 2个storage
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9491 -storageDataPath=/opt/victoria-logs-data -retentionPeriod=2d   > /opt/victoria/vlstorage.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9492 -storageDataPath=/optvictoria-logs-data -retentionPeriod=2d   > /opt/victoria/vlstorage.log 2>&1 &

# vlinsert 132
# 4个 vlinsert
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9501 -storageNode=172.21.20.132:9491,172.21.20.132:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576  > /opt/victoria/vlinsert9501.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9502 -storageNode=172.21.20.132:9491,172.21.20.132:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576  > /opt/victoria/vlinsert9502.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9503 -storageNode=172.21.20.132:9491,172.21.20.132:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576  > /opt/victoria/vlinsert9503.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9504 -storageNode=172.21.20.132:9491,172.21.20.132:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576  > /opt/victoria/vlinsert9504.log 2>&1 &

# vlselect 132
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9471 -storageNode=172.21.20.131:9491,172.21.20.132:9491,172.21.20.133:9491   > /opt/victoria/vlselect.log 2>&1 &

# ###
# vlstorage 133
# 2个storage
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9491 -storageDataPath=/optvictoria-logs-data1 -retentionPeriod=2d   > /opt/victoria/vlstorage01.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9492 -storageDataPath=/optvictoria-logs-data2 -retentionPeriod=2d   > /opt/victoria/vlstorage02.log 2>&1 &

# vlinsert 133
# 4个 vlinsert
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9501 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576  > /opt/victoria/vlinsert9501.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9502 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576  > /opt/victoria/vlinsert9502.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9503 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576  > /opt/victoria/vlinsert9503.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9504 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576  > /opt/victoria/vlinsert9504.log 2>&1 &

# vlselect 133
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9471 -storageNode=172.21.20.131:9491,172.21.20.132:9491,172.21.20.133:9491  > /opt/victoria/vlselect.log 2>&1 &
  1. 通过nginx实现轮询vlinsert

stream部分


# 👇 关键:VictoriaLogs Insert 的 TCP 负载均衡(L4)
stream {
    # 日志格式(可选)
    log_format vlogs_stream '$remote_addr [$time_local] '
                            'proxy_upstream_addr=$upstream_addr '
                            'bytes_sent=$bytes_sent bytes_received=$bytes_received '
                            'session_time=$session_time';

    access_log /var/log/nginx/victorialogs_stream.log vlogs_stream;
    error_log /var/log/nginx/victorialogs_stream_error.log warn;

    # 上游 insert 节点池
    upstream victorialogs_insert {
        # 轮询(round-robin)策略,也可用 least_conn
        server 127.0.0.1:9501 max_fails=3 fail_timeout=10s;
        server 127.0.0.1:9502 max_fails=3 fail_timeout=10s;
        server 127.0.0.1:9503 max_fails=3 fail_timeout=10s;
        server 127.0.0.1:9504 max_fails=3 fail_timeout=10s;

        # 可选:开启 keepalive(对长连接有帮助)
        # keepalive 32;
    }

    # 监听 VictoriaLogs 默认 insert 端口
    server {
        listen 9481;               # Vector 仍写这个端口
        listen [::]:9481;

        proxy_pass victorialogs_insert;

        # 代理超时(根据你的日志 batch 大小调整)
        proxy_timeout 10s;
        proxy_responses 1;

        # 启用 TCP keepalive 到后端
        proxy_socket_keepalive on;
    }

    # 可选:暴露一个健康检查端口(HTTP 层)
    # server {
    #     listen 9482;
    #     return 200 "OK\n";
    #     add_header Content-Type text/plain;
    # }
}

6. ## 晚餐期消费能力

  1. topic的lag

  1. 131、132和133主机上此时的CPU、内存、磁盘IO监控情况:

  1. 三台主机的CPU、内存和磁盘IO:

说明:CPU、内存和磁盘IO均没到瓶颈

  1. kafka-ui中的实时lag信息

  1. 0121总结疯四的压力测试结果

昨天疯四,vector的消费延迟比较严重。

后来16:20左右 扩容的vector实例(4个),vlinsert进程扩容了一倍(总32个,每个节点8个) 。 vector消费能力提升了点,但仍然落后kafka的生产速率。

  1. 2个节点的磁盘IO到瓶颈

  1. 测试的3个节点的victorialogs的磁盘写入到瓶颈

说明:磁盘IO到瓶颈

  1. 单节点的最高写入58w/s

  1. 三个节点总写入速率149w/s

  1. 涉及的topic
- mid-uh-go-server
- ec-kfcp-orderingcore-server
- br-kfc-promotion-server
- st-cpos2-server
- ec-yumchina-d3-server
- ec-kfcp-menu-server
- ec-dso-open-server
- br-kfcapp-ms-service
- mid-payhub
- ec-kfcp-orderingapi-server
- mid-techub-sonic-server
- ec-superapp-kfcappadvert-service
- br-primeserver-service
- ec-kfc-3rd-server
- ec-d3-commonconf-server
- ec-kfcpre-order-server
- envoy_gateway_audit_ks-gateway
- ec-kfcd-orderingcore-server
- st-ph-ssporder-server
- infra-s-innerflowcontrol-janus-nginx
- st-coc-server
- bd-datadrivenrewards-floatingbar-server
- ec-kfc-menu-server

【victorialogs结论】

3个vlstorage

3个vlselect

32个vlinsert

victorialogs最大的insert速率149w/s,但写入性能不稳定,原因在磁盘IO到瓶颈。

所以平时流量可以支撑,但疯四的量顶不住