此篇文档为生产系统K8S环境迁移服务,在全部文档中编号为9,也是最后一篇。内容为使用elk对k8s中运行的服务进行日志处理
架构
- 使用filebeat收集业务容器的日志,以边车模式运行
- filebeat收集到的日志先打到kafka中,以topic进行区分
- 使用logstash取kafka里的topic,再上传到elasticsearch
- 在es中以index-pattern区分环境,并传到kibana
- kibana进行数据展示
安装elasticsearch
这玩意太吃资源,生产上建议专机部署,单结点最大给32g内存,多于32g以上也不建议,gc回收时间太长,7以下版本支持jdk8,7开始就是jdk11了
wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-6.8.23.tar.gz
mkdir /server/data/es/ /server/logs/es
vim elasticsearch.yml
cluster.name: es.ylls.com
node.name: an0-11
path.data: /server/data/es
path.logs: /server/logs/es
bootstrap.memory_lock: true
network.host: 10.0.0.11
http.port: 9200
vim jvm.yml
根据需求调整
这里给512m
useradd -s /bin/bash -M es
chown -R es:es /server/logs/es
chown -R es:es /server/data/es
chown -R es:es /server/elasticsearch
vim /etc/security/limits.conf/es.conf
es hard nofile 65535
es soft fsize unlimited
es hard memlock unlimited
es soft memlock unlimited
echo " vm.max_map_count=262144" >> /etc/sysctl.conf
sysctl -p
su -c "/server/elasticsearch/bin/elasticsearch -d" es
自启动脚本
vim /etc/systemd/system/es.service
[Unit]
Description=ElasticSearch
Requires=network.service
After=network.service
[Service]
User=es
Group=es
LimitNOFILE=65536
LimitMEMLOCK=infinity
Environment=JAVA_HOME=/server/jdk8
ExecStart=/server/elasticsearch/bin/elasticsearch
SuccessExitStatus=143
[Install]
WantedBy=multi-user.target
chmod +x /etc/systemd/system/es.service
systemctl daemon-reload
systemctl start es
调整ES日志模板
curl -H "Content-Type:application/json" -XPUT http://10.0.0.11:9200/_template/k8s -d '{
"template" : "k8s*",
"index_patterns": ["k8s*"],
"settings": {
"number_of_shards": 5,
"number_of_replicas": 0
}
}'
kafka
建议kafka版本不要用到2.2.0以上,因为要用到yahoo开源的kafkamanager,最高支持到2.2.0,yahoo没了kafkamanager现在改名叫cmak,需要jdk11,所以用kafka用2.2.0,kafka-manager用2.0.0.2
wget https://archive.apache.org/dist/kafka/2.2.0/kafka_2.12-2.2.0.tgz
vim kafka/config/server.properties
log.dirs=/server/logs/kafka
zookeeper.connect=localhost:2181
log.flush.interval.messages=10000
log.flush.interval.ms=1000
delete.topic.enable=true
delete.topic.enable=true
host.name=an0-11.ylls.com
kafka/bin/kafka-server-start.sh -daemon kafka/config/server.properties
kafka-manager
curl https://bintray.com/sbt/rpm/rpm > bintray-sbt-rpm.repo
mv bintray-sbt-rpm.repo /etc/yum.repos.d/
yum install sbt
cd /server/dockerfile/kafka-manager
vim dockerfile
FROM hseeberger/scala-sbt:11.0.2-oraclelinux7_1.3.13_2.13.3
ENV ZK_HOSTS=10.0.0.11:2181 KM_VERSION=2.0.0.2
RUN yum -y install wget unzip && mkdir -p /tmp && cd /tmp && wget https://github.com/yahoo/kafka-manager/archive/\${KM_VERSION}.tar.gz && tar xxf \${KM_VERSION}.tar.gz && cd /tmp/CMAK-\${KM_VERSION} && sbt clean dist && unzip -d / /tmp/CMAK-\${KM_VERSION}/target/ universal/cmak-\${KM_VERSION} && rm -fr /tmp/\${KM_VERSION} /tmp/CMAK-\${KM_VERSION}
WORKDIR /cmak-\${KM_VERSION}
EXPOSE 9000
ENTRYPOINT ["./bin/cmak","-Dconfig.file=conf/application.conf"]
dp.yaml
kind: Deployment
apiVersion: extensions/v1beta1
metadata:
name: kafka-manager
namespace: infra
labels:
name: kafka-manager
spec:
replicas: 1
selector:
matchLabels:
name: kafka-manager
template:
metadata:
labels:
app: kafka-manager
name: kafka-manager
spec:
containers:
- name: kafka-manager
image: harbor.ylls.com/infra/kafka-manager:v3.0.0.5
ports:
- containerPort: 9000
protocol: TCP
env:
- name: ZK_HOSTS
value: zk1.ylls.com:2181
- name: APPLICATION_SECRET
value: letmein
imagePullPolicy: IfNotPresent
imagePullSecrets:
- name: harbor
restartPolicy: Always
terminationGracePeriodSeconds: 30
securityContext:
runAsUser: 0
schedulerName: default-scheduler
strategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
maxSurge: 1
revisionHistoryLimit: 7
progressDeadlineSeconds: 600
svc.yaml
kind: Service
apiVersion: v1
metadata:
name: kafka-manager
namespace: infra
spec:
ports:
- protocol: TCP
port: 9000
targetPort: 9000
selector:
app: kafka-manager
clusterIP: None
type: ClusterIP
sessionAffinity: None
ingress.yaml
kind: Ingress
apiVersion: extensions/v1beta1
metadata:
name: kafka-manager
namespace: infra
spec:
rules:
- host: km.ylls.com
http:
paths:
- path: /
backend:
serviceName: kafka-manager
servicePort: 9000
km.ylls.com
add cluster
cluster name = ylls.com
cluster zookeeper hosts =
10.0.0.11:2181,10.0.0.12:2181,10.0.0.11:2181
kafka version =
2.2.0
save
filebeat底包
下载地址
https://www.elastic.co/downloads/past-releases#filebeat
这里用7.5.0
dockerfile
FROM debian:jessie
ENV FILEBEAT_VERSION=7.5.0 \
FILEBEAT_SHA1=31c56bcf63548be284cd512f162563a25d5176551caa1c34cffbc69fbe8fae4a8abaabac20abb6ce41aea9281fd674c716983d1ad23fb210b5b85d4aa2da6dd4
RUN set -x && \
apt-get update && \
apt-get install -y wget && \
wget https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-\${FILEBEAT_VERSION}-linux-x86_64.tar.gz -O /opt/filebeat.tar.gz && \
cd /opt && \
echo "${FILEBEAT_SHA1} filebeat.tar.gz" | sha512sum -c - && \
tar xzvf filebeat.tar.gz && \
cd filebeat-* && \
cp filebeat /bin && \
cd /opt && \
rm -rf filebeat* && \
apt-get purge -y wget && \
apt-get autoremove -y && \
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
COPY entrypoint.sh /
ENTRYPOINT ["/entrypoint.sh"]
entrypoint.sh
ENV=${ENV:-"test"}
PROJ_NAME=${PROJ_NAME:-"no-define"}
MULTILINE=${MULTILINE:-"^\d{2}"}
cat > /etc/filebeat.yaml << EOF
filebeat.inputs:
- type: log
fields_under_root: true
fields:
topic: logm-\${PROJ_NAME}
paths:
- /logm/*.log
- /logm/*/*.log
- /logm/*/*/*.log
- /logm/*/*/*/*.log
- /logm/*/*/*/*/*.log
scan_frequency: 120s
max_bytes: 10485760
multiline.pattern: '\$MULTILINE'
multiline.negate: true
multiline.match: after
multiline.max_lines: 100
- type: log
fields_under_root: true
fields:
topic: logu-${PROJ_NAME}
paths:
- /logu/*.log
- /logu/*/*.log
- /logu/*/*/*.log
- /logu/*/*/*/*.log
- /logu/*/*/*/*/*.log
- /logu/*/*/*/*/*/*.log
output.kafka:
hosts: ["10.0.0.12:9092"]
topic: k8s-fb-$ENV-%{[topic]}
version: 2.0.0
required_acks: 0
max_message_bytes: 10485760
EOF
set -xe
# If user don't provide any command
# Run filebeat
if [[ "\$1" == "" ]]; then
exec filebeat -c /etc/filebeat.yaml
else
# Else allow the user to run arbitrarily commands like bash
exec "\$@"
fi
ENV=${ENV:-"test"} 日志环境
PROJ_NAME=${PROJ_NAME:-"no-define"} 项目名,与kafka的topic统一
MULTILINE=${MULTILINE:-"^\d{2}"} 日志多行匹配,用正则^\d{2}以两位数学开头进行匹配,如一条java异常信息会打出很多不是以时间开头的行,在这个正则下会进行归类到一行进行处理,同时这个也是默认规则,实现使用时要根据实际情况使用
logm:多行日志匹配,logm下五级目录中所有的.log都进行收集
logm配置下会有配套的设置项
scan_frequency: 120s
max_bytes: 10485760
multiline.pattern: '\$MULTILINE'
multiline.negate: true
multiline.match: after
multiline.max_lines: 100
logu:单行日志匹配,logu下五级目录中所有的.log都进行收集
kafka配置,
k8s-fb-$ENV-%{[topic]}指 k8s-filebeat-环境-项目,这里的topic取值是在脚本中生成的 /etc/filebeat.yaml里定义的值
version,一定要写最大一位的版本,比如使用2.2.0那么一定要写2.0.0
output.kafka:
hosts: ["10.0.0.12:9092"]
topic: k8s-fb-$ENV-%{[topic]}
version: 2.0.0
required_acks: 0
max_message_bytes: 10485760
chmod +x entrypoint.sh
docker build . -t harbor.ylls.com/infra/filebeat:v7.5.0
docker push harbor.ylls.com/infra/filebeat:v7.5.0
修改一个app的dp.yaml,用边车模式使用filebeat,在container中加入
- name: filebeat
image: harbor.ylls.com/infra/filebeat:v7.5.0
imagePullPolicy: IfNotPresent
env:
- name: ENV
value: test
- name: PROJ_NAME
value: dubbo-provider
volumeMounts:
- mountPath: /logm
name: logm
volumes:
- emptyDir: {}
name: logm
imagePullSecrets:
- name: harbor
restartPolicy: Always
terminationGracePeriodSeconds: 30
securityContext:
runAsUser: 0
schedulerName: default-scheduler
以上配置中,声明了一个volumes, 类型为emptyDir: {} 空目录,名为logm,在宿主机中随机建立一片存储空间挂到容器中,容器销毁了,这片存储空间一并销毁
同时在app的容器部分,添加volumeMounts,挂载之前定义的logm, 意味两个容器共享了宿主机上的为logm划分的空间
volumeMounts:
- mountPath: /server/logs/dubbo-provider
name: logm
修改后全文如下
kind: Deployment
apiVersion: extensions/v1beta1
metadata:
name: dubbo-provider
namespace: app
labels:
name: dubbo-provider
spec:
replicas: 1
selector:
matchLabels:
name: dubbo-provider
template:
metadata:
labels:
app: dubbo-provider
name: dubbo-provider
annotations:
blackbox_port: "20880"
blackbox_scheme: "tcp"
prometheus_io_scrape: "true"
prometheus_io_port: "12346"
prometheus_io_path: "/"
spec:
containers:
- name: dubbo-provider
image: harbor.ylls.com/app/dubbo-provider:master_2210131619
ports:
- containerPort: 20880
protocol: TCP
env:
- name: JAR_NAME
value: dubbo-samples-spring-boot-provider-1.0-SNAPSHOT.jar
volumeMounts:
- mountPath: /server/logs/dubbo-provider
name: logm
imagePullPolicy: IfNotPresent
imagePullSecrets:
- name: harbor
restartPolicy: Always
terminationGracePeriodSeconds: 30
securityContext:
runAsUser: 0
schedulerName: default-scheduler
- name: filebeat
image: harbor.ylls.com/infra/filebeat:v7.5.0
imagePullPolicy: IfNotPresent
env:
- name: ENV
value: test
- name: PROJ_NAME
value: dubbo-provider
volumeMounts:
- mountPath: /logm
name: logm
volumes:
- emptyDir: {}
name: logm
imagePullSecrets:
- name: harbor
restartPolicy: Always
terminationGracePeriodSeconds: 30
securityContext:
runAsUser: 0
schedulerName: default-scheduler
strategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
maxSurge: 1
revisionHistoryLimit: 7
progressDeadlineSeconds: 600
logstash
根据选型指导,这里用6.8.23
https://www.elastic.co/support/matrix
docker pull logstash:6.8.23
logstash配置文件,test环境
/etc/logstash/logstash-test.conf
input {
kafka {
bootstrap_servers => "10.0.0.13:9092" #kafka地址
client_id => "10.0.0.11" #本机地址
consumer_threads => 4
group_id => "k8s_test"
topics_pattern => "k8s-fb-test-.*" #匹配样式,test
}
}
filter {
json {
source => "message"
}
}
output {
elasticsearch {
hosts => ["10.0.0.12:9200"] #es地址
index => "k8s-test-%{+YYYY.MM.DD}"
}
}
docker run -d --restart=always --name logstash-test -v /etc/logstash:/etc/logstash logstash:v6.8.6 -f /etc/logstash/logstash-test.conf
使用curl http://10.0.0.12:9200/_cat/indices?v 查看es是否收到日志,看到green为正常,因为是异步的,所以这个需要一点时间
kibana
docker pull kibana:6.8.38
dp.yaml
kind: Deployment
apiVersion: extensions/v1beta1
metadata:
name: kibana
namespace: infra
labels:
name: kibana
spec:
replicas: 1
selector:
matchLabels:
name: kibana
template:
metadata:
labels:
app: kibana
name: kibana
spec:
containers:
- name: kibana
images: harbor.ylls.com/infra/kibana:6.8.38
imagePullPolicy: IfNotPressent
ports:
- containerPort: 5601
protocol: TCP
env:
- name: ELASTICSEARCH_URL
value: es.ylls.com:9200
imagePullSecrets:
- name: harbor
securityContext:
runAsUser: 0
strategy:
type: RollingUpdate
rollingUpdate:
maxUnavailbl: 1
maxSurge: 1
revisionHistoryLimit: 7
progressDeadlineSeconds: 600
svc.yaml
kind: Service
apiVersion: v1
metadata:
name: kibana
namespace: infra
spec:
ports:
- port: 5601
targetPort: 5601
protocol: TCP
selector:
app: kibana
ingress.yaml
kind: Ingress
apiVersion: extensions/v1beta1
metadata:
name: kinbana
namespace: infra
spec:
rules:
- host: kibana.ylls.com
http:
paths:
- path: /
backend:
serviceName: kibana
servicePort: 5601
kibana.ylls.com
explore on my own
monitoring -> turn on monioring
management -> index patterms -> k8s-test-* -> next -> @timestamp -> create index pattern
discover