一、部署
# 131物理机上docker部署vector
docker run --name vector -v /opt/vector/config/vector.yaml:/etc/vector/vector.yaml:ro -p 8686:8686 -d harbor.hwwt2.com/ai-engineering/base/vector:0.52.0-debian
#
vi /opt/vector/config/vector.yaml
#
sources:
kafka_source:
type: kafka
bootstrap_servers: 172.21.241.185:9092,172.21.241.186:9092,172.21.241.187:9092,172.21.241.188:9092,172.21.241.189:9092
group_id: vector
topics:
- mid-uh-go-server
decoding:
codec: "json"
auto_offset_reset: "latest"
transforms:
message_normalizer:
type: remap
inputs:
- kafka_source
source: |
# 如果存在message字段但不存在msg字段,将message重命名为msg
if exists(.message) && !exists(.msg) {
.msg = .message
del(.message)
}
# 如果两个字段都不存在,创建一个默认的msg字段
if !exists(.msg) && !exists(.message) {
.msg = "No message field found"
}
sinks:
victorialogs_sink:
type: http
inputs:
- message_normalizer
uri: http://172.21.20.131:9481/insert/jsonline?_msg_field=msg&_time_field=timestamp
compression: zstd
encoding:
codec: json
framing:
method: newline_delimited
healthcheck:
enabled: true
api:
enabled: true
address: 0.0.0.0:8686
playground: true
二、vector与logstash性能对比
-
启动1个topic
- mid-uh-go-server
5min 写入23,351,550条 Vector 的平均 吞吐量 约为 77,839 条/秒。cpumax为20c mem max为11g
追平kafka数据时, Vector 的平均 吞吐量 不变。 cpu 约为4c mem 3g
-
对比
说明: vector 和 logstash同时收 uh的日志, vector的速率是 logstash的2倍。
17:05-17:06之间的消费速率比较
vector的速率
logstash的速率
说明:logstash 每秒写入3w,vector每秒写入7w+
三.vector调优
sinks:
victorialogs_sink:
type: http
inputs:
- message_normalizer
uri: http://172.21.20.131:9481/insert/jsonline?_msg_field=msg&_time_field=timestamp
compression: zstd
encoding:
codec: json
framing:
method: newline_delimited
healthcheck:
enabled: true
batch:
max_events: 1000
timeout_secs: 5
buffer:
type: "disk"
max_size: 536870976
when_full: "block"
request:
concurrency: 10
timeout: 30
retry_max_duration: 300
启用Vector自监控
sources:
vector_metrics:
type: internal_metrics
scrape_interval_secs: 10
sinks:
prometheus:
type: prometheus_exporter
inputs:
- vector_metrics
address: 0.0.0.0:9090
四、优化结果与结论
-
三个节点部署vector实例数量
172.21.20.133 上2个vector
172.21.20.132上4个vector
172.21.20.131上4个vector
-
vector的配置文件
sources:
kafka_source:
type: kafka
bootstrap_servers: 172.21.241.185:9092,172.21.241.186:9092,172.21.241.187:9092,172.21.241.188:9092,172.21.241.189:9092
group_id: vector
topics:
- mid-uh-go-server
- ec-kfcp-orderingcore-server
- br-kfc-promotion-server
- st-cpos2-server
- ec-yumchina-d3-server
- ec-kfcp-menu-server
- ec-dso-open-server
- br-kfcapp-ms-service
- mid-payhub
- ec-kfcp-orderingapi-server
- mid-techub-sonic-server
- ec-superapp-kfcappadvert-service
- br-primeserver-service
- ec-kfc-3rd-server
- ec-d3-commonconf-server
- ec-kfcpre-order-server
- envoy_gateway_audit_ks-gateway
- ec-kfcd-orderingcore-server
- st-ph-ssporder-server
- infra-s-innerflowcontrol-janus-nginx
- st-coc-server
- bd-datadrivenrewards-floatingbar-server
- ec-kfc-menu-server
decoding:
codec: "json"
auto_offset_reset: "latest"
transforms:
message_normalizer:
type: remap
inputs:
- kafka_source
source: |
# 如果存在message字段但不存在msg字段,将message重命名为msg
if exists(.message) && !exists(.msg) {
.msg = .message
del(.message)
}
sinks:
victorialogs_sink:
type: http
inputs:
- message_normalizer
uri: http://172.21.20.131:9481/insert/jsonline?_msg_field=msg&_time_field=timestamp
compression: zstd
encoding:
codec: json
framing:
method: newline_delimited
healthcheck:
enabled: true
batch:
max_events: 5000 # 每批最多 5000 条(根据 VictoriaLogs 能力调整)
timeout_secs: 1 # 最多等 1 秒
request:
in_flight_limit: 100 # 允许 100 个并发 HTTP 请求
timeout_secs: 10
retry_attempts: 3
rate_limit_duration_secs: 1
rate_limit_num: 200000 # 每秒最多发 20w 条(按需调)
api:
enabled: true
address: 0.0.0.0:8686
playground: true
3. ## 三个节点上分别部署nginx
通过9481 转发到各自节点的4个insert上。
-
每个节点上的vlinsert拆分成4个
# ###
# 131 和 132 拆解 victorialogs
# ###
172.21.20.131
172.21.20.132
172.21.20.133
# ###
# vlstorage 131
# 2个storage
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9491 -storageDataPath=/opt/victoria-logs-data1 -retentionPeriod=2d > /opt/victoria/vlstorage01.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9492 -storageDataPath=/opt/victoria-logs-data2 -retentionPeriod=2d > /opt/victoria/vlstorage02.log 2>&1 &
# vlinsert 131
# 4个 vlinsert
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9501 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576 > /opt/victoria/vlinsert9501.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9502 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576 > /opt/victoria/vlinsert9502.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9503 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576 > /opt/victoria/vlinsert9503.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9504 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576 > /opt/victoria/vlinsert9504.log 2>&1 &
# vlselect 131
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9471 -storageNode=172.21.20.131:9491,172.21.20.132:9491,172.21.20.133:9491 > /opt/victoria/vlselect.log 2>&1 &
# ###
# vlstorage 132
# 2个storage
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9491 -storageDataPath=/opt/victoria-logs-data -retentionPeriod=2d > /opt/victoria/vlstorage.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9492 -storageDataPath=/optvictoria-logs-data -retentionPeriod=2d > /opt/victoria/vlstorage.log 2>&1 &
# vlinsert 132
# 4个 vlinsert
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9501 -storageNode=172.21.20.132:9491,172.21.20.132:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576 > /opt/victoria/vlinsert9501.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9502 -storageNode=172.21.20.132:9491,172.21.20.132:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576 > /opt/victoria/vlinsert9502.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9503 -storageNode=172.21.20.132:9491,172.21.20.132:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576 > /opt/victoria/vlinsert9503.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9504 -storageNode=172.21.20.132:9491,172.21.20.132:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576 > /opt/victoria/vlinsert9504.log 2>&1 &
# vlselect 132
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9471 -storageNode=172.21.20.131:9491,172.21.20.132:9491,172.21.20.133:9491 > /opt/victoria/vlselect.log 2>&1 &
# ###
# vlstorage 133
# 2个storage
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9491 -storageDataPath=/optvictoria-logs-data1 -retentionPeriod=2d > /opt/victoria/vlstorage01.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9492 -storageDataPath=/optvictoria-logs-data2 -retentionPeriod=2d > /opt/victoria/vlstorage02.log 2>&1 &
# vlinsert 133
# 4个 vlinsert
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9501 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576 > /opt/victoria/vlinsert9501.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9502 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576 > /opt/victoria/vlinsert9502.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9503 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576 > /opt/victoria/vlinsert9503.log 2>&1 &
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9504 -storageNode=172.21.20.133:9491,172.21.20.133:9492 -insert.disableCompression=false -insert.maxLineSizeBytes=1048576 > /opt/victoria/vlinsert9504.log 2>&1 &
# vlselect 133
nohup /opt/victoria/victoria-logs-prod -httpListenAddr=:9471 -storageNode=172.21.20.131:9491,172.21.20.132:9491,172.21.20.133:9491 > /opt/victoria/vlselect.log 2>&1 &
-
通过nginx实现轮询vlinsert
stream部分
# 👇 关键:VictoriaLogs Insert 的 TCP 负载均衡(L4)
stream {
# 日志格式(可选)
log_format vlogs_stream '$remote_addr [$time_local] '
'proxy_upstream_addr=$upstream_addr '
'bytes_sent=$bytes_sent bytes_received=$bytes_received '
'session_time=$session_time';
access_log /var/log/nginx/victorialogs_stream.log vlogs_stream;
error_log /var/log/nginx/victorialogs_stream_error.log warn;
# 上游 insert 节点池
upstream victorialogs_insert {
# 轮询(round-robin)策略,也可用 least_conn
server 127.0.0.1:9501 max_fails=3 fail_timeout=10s;
server 127.0.0.1:9502 max_fails=3 fail_timeout=10s;
server 127.0.0.1:9503 max_fails=3 fail_timeout=10s;
server 127.0.0.1:9504 max_fails=3 fail_timeout=10s;
# 可选:开启 keepalive(对长连接有帮助)
# keepalive 32;
}
# 监听 VictoriaLogs 默认 insert 端口
server {
listen 9481; # Vector 仍写这个端口
listen [::]:9481;
proxy_pass victorialogs_insert;
# 代理超时(根据你的日志 batch 大小调整)
proxy_timeout 10s;
proxy_responses 1;
# 启用 TCP keepalive 到后端
proxy_socket_keepalive on;
}
# 可选:暴露一个健康检查端口(HTTP 层)
# server {
# listen 9482;
# return 200 "OK\n";
# add_header Content-Type text/plain;
# }
}
6. ## 晚餐期消费能力
- topic的lag
- 131、132和133主机上此时的CPU、内存、磁盘IO监控情况:
- 三台主机的CPU、内存和磁盘IO:
说明:CPU、内存和磁盘IO均没到瓶颈
- kafka-ui中的实时lag信息
-
0121总结疯四的压力测试结果
昨天疯四,vector的消费延迟比较严重。
后来16:20左右 扩容的vector实例(4个),vlinsert进程扩容了一倍(总32个,每个节点8个) 。 vector消费能力提升了点,但仍然落后kafka的生产速率。
- 2个节点的磁盘IO到瓶颈
- 测试的3个节点的victorialogs的磁盘写入到瓶颈
说明:磁盘IO到瓶颈
- 单节点的最高写入58w/s
- 三个节点总写入速率149w/s
- 涉及的topic
- mid-uh-go-server
- ec-kfcp-orderingcore-server
- br-kfc-promotion-server
- st-cpos2-server
- ec-yumchina-d3-server
- ec-kfcp-menu-server
- ec-dso-open-server
- br-kfcapp-ms-service
- mid-payhub
- ec-kfcp-orderingapi-server
- mid-techub-sonic-server
- ec-superapp-kfcappadvert-service
- br-primeserver-service
- ec-kfc-3rd-server
- ec-d3-commonconf-server
- ec-kfcpre-order-server
- envoy_gateway_audit_ks-gateway
- ec-kfcd-orderingcore-server
- st-ph-ssporder-server
- infra-s-innerflowcontrol-janus-nginx
- st-coc-server
- bd-datadrivenrewards-floatingbar-server
- ec-kfc-menu-server
【victorialogs结论】
3个vlstorage
3个vlselect
32个vlinsert
victorialogs最大的insert速率149w/s,但写入性能不稳定,原因在磁盘IO到瓶颈。
所以平时流量可以支撑,但疯四的量顶不住