读取RDS的审计日志并存储到ElasticSearch

296 阅读3分钟

开发背景

我司的数据库主要是阿里云的RDS-MySQL数据库,因为要做用户查询的统计分析,所以需要把查询语句抓取到ElasticSearch中并进行分析

开发代码

引入依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>aliyun-rds</artifactId>
    <version>1.0-SNAPSHOT</version>
    <packaging>jar</packaging>

    <dependencies>
        <dependency>
            <groupId>com.aliyun.openservices</groupId>
            <artifactId>aliyun-log</artifactId>
            <version>0.6.57</version>
        </dependency>

        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.12</version>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>1.7.25</version>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>jcl-over-slf4j</artifactId>
            <version>1.7.25</version>
        </dependency>

        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-core</artifactId>
            <version>2.13.2</version>
        </dependency>

        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-api</artifactId>
            <version>2.13.2</version>
        </dependency>

        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-slf4j-impl</artifactId>
            <version>2.13.2</version>
        </dependency>

        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>elasticsearch-rest-high-level-client</artifactId>
            <version>7.2.1</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-dependency-plugin</artifactId>
                <version>3.1.1</version>
                <executions>
                    <execution>
                        <id>copy-dependencies</id>
                        <phase>package</phase>
                        <goals>
                            <goal>copy-dependencies</goal>
                        </goals>
                        <configuration>
                            <outputDirectory>${project.build.directory}/lib</outputDirectory>
                            <overWriteReleases>false</overWriteReleases>
                            <overWriteSnapshots>false</overWriteSnapshots>
                            <overWriteIfNewer>true</overWriteIfNewer>
                        </configuration>
                    </execution>
                </executions>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.0.0</version>
                <configuration>
                    <archive>
                        <manifest>
                            <mainClass>com.plf.aliyun.log.RdsSearch</mainClass>
                        </manifest>
                    </archive>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id> <!-- this is used for inheritance merges -->
                        <phase>package</phase> <!-- 指定在打包节点执行jar包合并操作 -->
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

这里需要做一些独特的配置,我是不用web的工程项目,是使用定时任务执行,所以需要配置将第三方包打入此jar包中,并指定main方法进行运行。

模型类

package com.plf.aliyun.log;

import lombok.Data;

@Data
public class RdsLog {
    private String sql;
    private String ip;
    private String user;
    private String db;
    private long hash;
    private int root;
    private long return_rows;
    private int isbind;
    private long latency;
    private int fail;
    private long check_rows;
    private long update_rows;
    private long tid;
    //单位微秒
    private long origin_time;
    //单位毫秒
    private long createtime;
}

这是我能从rds审计日志能获取的字段。

ES保存实现类

package com.plf.aliyun.log;

import com.alibaba.fastjson.JSON;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.xcontent.XContentType;

import java.io.IOException;
import java.util.List;

public class RdsLogSaveEs {
    private static RestHighLevelClient client;
    private static final IP = "";
    private static final PORT=;
    private static final NAME="";
    private static final PASSWORD="";
	
    static {
        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        credentialsProvider.setCredentials(AuthScope.ANY,
                new UsernamePasswordCredentials(NAME, PASSWORD));
        client = new RestHighLevelClient(RestClient.builder(new HttpHost(IP, PORT, "http"))
                .setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
                    public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
                        httpClientBuilder.disableAuthCaching();
                        return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
                    }
                }));
    }

    public IndexResponse save(RdsLog rdsLog) {
        if(rdsLog==null){
            return null;
        }
        IndexRequest request = new IndexRequest("rdslog");
        // request.id("2");
        String jsonString = JSON.toJSONString(rdsLog);
        request.source(jsonString, XContentType.JSON);
        IndexResponse indexResponse = null;
        try {
            indexResponse = client.index(request, RequestOptions.DEFAULT);
            client.close();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return indexResponse;
    }

    public BulkResponse saveBulk(List<RdsLog> rdsLogList) {
        if(rdsLogList==null || rdsLogList.size()==0){
            return null;
        }

        BulkRequest bulkRequest = new BulkRequest();
        for (RdsLog rdsLog : rdsLogList) {
            bulkRequest.add(new IndexRequest("rdslog").source(JSON.toJSONString(rdsLog), XContentType.JSON));
        }
        BulkResponse bulkResponse = null;
        try {
            bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return bulkResponse;
    }
}

读取日志并保存

package com.plf.aliyun.log;

import com.aliyun.openservices.log.Client;
import com.aliyun.openservices.log.common.*;
import com.aliyun.openservices.log.request.PullLogsRequest;
import com.aliyun.openservices.log.response.*;
import lombok.extern.slf4j.Slf4j;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;

@Slf4j
public class RdsSearch {
    private final static RdsLogSaveEs esService = new RdsLogSaveEs();
   /**
     * 根据实际情况读取
     /
    private final static String accessId = "";
    private final static String accessKey = "";
    private final static String project = "";
    private final static String endpoint = "";
    private final static String logStore = "";

    private static Client client = null;

    static {
        client = new Client(endpoint, accessId, accessKey);
    }

    public static void main(String[] args) {
        ScheduledExecutorService scheduledExecutorService =
                Executors.newScheduledThreadPool(5);

	/**
          * 每4小时执行一次,取四小时之前到目前的数据
          */
        scheduledExecutorService.scheduleAtFixedRate(new Runnable() {
            @Override
            public void run() {
                log.info("now data {} schedule the parse data", System.currentTimeMillis());
                try {
                    int shardId = 0;  // 只读取0号shard的数据
                    GetCursorResponse response;

                    // 获取最近4h接收到的第一批日志的cursor位置
                    long fromTime = Double.valueOf(System.currentTimeMillis() / 1000.0 - 14400).longValue();
                    response = client.GetCursor(project, logStore, shardId, fromTime);

                    String cursor = response.GetCursor();
                    long startNowTime = System.currentTimeMillis();
                    log.info("start time : {}", startNowTime);
                    while (true) {
                        List<RdsLog> rdsLogList = new ArrayList<>();
                        PullLogsResponse pullLogsResponse = client.pullLogs(new PullLogsRequest(project, logStore, shardId, 100, cursor));
                        log.info("pullLogsResponse count : {}", pullLogsResponse.getCount());
                        List<LogGroupData> logGroupDataList = pullLogsResponse.getLogGroups();
                        log.info("logGroupDataList size : {}", logGroupDataList.size());
                        for (LogGroupData logGroupData : logGroupDataList) {
                            Logs.LogGroup logGroup = logGroupData.GetLogGroup();
                            List<Logs.Log> logList = logGroup.getLogsList();
                            for (Logs.Log logText : logList) {
                                List<Logs.Log.Content> contentList = logText.getContentsList();
                                RdsLog rdsLog = packageRdsLog(contentList);
                                if (rdsLog != null) {
                                    if (isSave(rdsLog)) {
                                        rdsLogList.add(rdsLog);
                                    }
                                }
                            }
                        }
                        log.info("prepare to save rdsLog , rdsLogList size : {}", rdsLogList.size());
                        esService.saveBulk(rdsLogList);
                        String nextCursor = pullLogsResponse.getNextCursor();
                        if (cursor.equals(nextCursor)) {
                            break;
                        }
                        cursor = nextCursor;
                    }
                    long endNowTime = System.currentTimeMillis();
                    log.info("end time : {},all count {} ms", endNowTime, (endNowTime - startNowTime));
                } catch (Exception e) {
                    log.error("exception time : {} ms", System.currentTimeMillis());
                    e.printStackTrace();
                }
            }
        }, 0, 4, TimeUnit.HOURS);
    }

    /**
     * 过滤设置,可以自定义
     * @param rdsLog
     * @return
     */
    private static boolean isSave(RdsLog rdsLog){
        if(rdsLog == null){
            return false;
        }else{
            if(isPrefixToSave(rdsLog.getSql())){
                return true;
            }else{
                return false;
            }
        }
    }

    private static boolean isPrefixToSave(String sql){
        if(sql==null || sql.length()==0){
            return false;
        }
        return (sql.startsWith("select") || sql.startsWith("SELECT"));
    }

    /**
     * 封装RdsLog
     * @param contentList
     * @return
     */
    private static RdsLog packageRdsLog(List<Logs.Log.Content> contentList){
        RdsLog rdsLog = new RdsLog();
        if(contentList==null || contentList.size()<=0){
            return null;
        }
        rdsLog.setCreatetime(System.currentTimeMillis());
        for(Logs.Log.Content content:contentList){
            if("ip".equals(content.getKey())){
                rdsLog.setIp(content.getValue());
            } else if("db".equals(content.getKey())){
                rdsLog.setDb(content.getValue());
            }else if("sql".equals(content.getKey())){
                rdsLog.setSql(content.getValue());
            }else if("user".equals(content.getKey())){
                rdsLog.setUser(content.getValue());
            }else if("origin_time".equals(content.getKey())){
                rdsLog.setOrigin_time(Long.valueOf(content.getValue()));
            }else if("hash".equals(content.getKey())){
                rdsLog.setHash(Long.valueOf(content.getValue()));
            }else if("root".equals(content.getKey())){
                rdsLog.setRoot(Integer.valueOf(content.getValue()));
            }else if("return_rows".equals(content.getKey())){
                rdsLog.setReturn_rows(Long.valueOf(content.getValue()));
            }else if("isbind".equals(content.getKey())){
                rdsLog.setIsbind(Integer.valueOf(content.getValue()));
            }else if("fail".equals(content.getKey())){
                rdsLog.setFail(Integer.valueOf(content.getValue()));
            }else if("latency".equals(content.getKey())){
                rdsLog.setLatency(Long.valueOf(content.getValue()));
            }else if("tid".equals(content.getKey())){
                rdsLog.setTid(Long.valueOf(content.getValue()));
            }else if("check_rows".equals(content.getKey())){
                rdsLog.setCheck_rows(Long.valueOf(content.getValue()));
            }else if("update_rows".equals(content.getKey())){
                rdsLog.setUpdate_rows(Long.valueOf(content.getValue()));
            }else{
                continue;
            }
        }
        return rdsLog;
    }
}

总结

以上即为我实现的功能,但是其实还是有很多优化的地方,比如保存到ES的那个程序会因为网络波动而导致client失效,这就影响数据的保存。还有rds日志很庞大,可以用kafka先缓存消息,然后用程序慢慢消费。