开发背景
我司的数据库主要是阿里云的RDS-MySQL数据库,因为要做用户查询的统计分析,所以需要把查询语句抓取到ElasticSearch中并进行分析
开发代码
引入依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>aliyun-rds</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>com.aliyun.openservices</groupId>
<artifactId>aliyun-log</artifactId>
<version>0.6.57</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.12</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.13.2</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.13.2</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.13.2</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.2.1</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.1.1</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/lib</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<archive>
<manifest>
<mainClass>com.plf.aliyun.log.RdsSearch</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id> <!-- this is used for inheritance merges -->
<phase>package</phase> <!-- 指定在打包节点执行jar包合并操作 -->
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
这里需要做一些独特的配置,我是不用web的工程项目,是使用定时任务执行,所以需要配置将第三方包打入此jar包中,并指定main方法进行运行。
模型类
package com.plf.aliyun.log;
import lombok.Data;
@Data
public class RdsLog {
private String sql;
private String ip;
private String user;
private String db;
private long hash;
private int root;
private long return_rows;
private int isbind;
private long latency;
private int fail;
private long check_rows;
private long update_rows;
private long tid;
//单位微秒
private long origin_time;
//单位毫秒
private long createtime;
}
这是我能从rds审计日志能获取的字段。
ES保存实现类
package com.plf.aliyun.log;
import com.alibaba.fastjson.JSON;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.xcontent.XContentType;
import java.io.IOException;
import java.util.List;
public class RdsLogSaveEs {
private static RestHighLevelClient client;
private static final IP = "";
private static final PORT=;
private static final NAME="";
private static final PASSWORD="";
static {
final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
credentialsProvider.setCredentials(AuthScope.ANY,
new UsernamePasswordCredentials(NAME, PASSWORD));
client = new RestHighLevelClient(RestClient.builder(new HttpHost(IP, PORT, "http"))
.setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
httpClientBuilder.disableAuthCaching();
return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
}
}));
}
public IndexResponse save(RdsLog rdsLog) {
if(rdsLog==null){
return null;
}
IndexRequest request = new IndexRequest("rdslog");
// request.id("2");
String jsonString = JSON.toJSONString(rdsLog);
request.source(jsonString, XContentType.JSON);
IndexResponse indexResponse = null;
try {
indexResponse = client.index(request, RequestOptions.DEFAULT);
client.close();
} catch (IOException e) {
e.printStackTrace();
}
return indexResponse;
}
public BulkResponse saveBulk(List<RdsLog> rdsLogList) {
if(rdsLogList==null || rdsLogList.size()==0){
return null;
}
BulkRequest bulkRequest = new BulkRequest();
for (RdsLog rdsLog : rdsLogList) {
bulkRequest.add(new IndexRequest("rdslog").source(JSON.toJSONString(rdsLog), XContentType.JSON));
}
BulkResponse bulkResponse = null;
try {
bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
return bulkResponse;
}
}
读取日志并保存
package com.plf.aliyun.log;
import com.aliyun.openservices.log.Client;
import com.aliyun.openservices.log.common.*;
import com.aliyun.openservices.log.request.PullLogsRequest;
import com.aliyun.openservices.log.response.*;
import lombok.extern.slf4j.Slf4j;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
@Slf4j
public class RdsSearch {
private final static RdsLogSaveEs esService = new RdsLogSaveEs();
/**
* 根据实际情况读取
/
private final static String accessId = "";
private final static String accessKey = "";
private final static String project = "";
private final static String endpoint = "";
private final static String logStore = "";
private static Client client = null;
static {
client = new Client(endpoint, accessId, accessKey);
}
public static void main(String[] args) {
ScheduledExecutorService scheduledExecutorService =
Executors.newScheduledThreadPool(5);
/**
* 每4小时执行一次,取四小时之前到目前的数据
*/
scheduledExecutorService.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
log.info("now data {} schedule the parse data", System.currentTimeMillis());
try {
int shardId = 0; // 只读取0号shard的数据
GetCursorResponse response;
// 获取最近4h接收到的第一批日志的cursor位置
long fromTime = Double.valueOf(System.currentTimeMillis() / 1000.0 - 14400).longValue();
response = client.GetCursor(project, logStore, shardId, fromTime);
String cursor = response.GetCursor();
long startNowTime = System.currentTimeMillis();
log.info("start time : {}", startNowTime);
while (true) {
List<RdsLog> rdsLogList = new ArrayList<>();
PullLogsResponse pullLogsResponse = client.pullLogs(new PullLogsRequest(project, logStore, shardId, 100, cursor));
log.info("pullLogsResponse count : {}", pullLogsResponse.getCount());
List<LogGroupData> logGroupDataList = pullLogsResponse.getLogGroups();
log.info("logGroupDataList size : {}", logGroupDataList.size());
for (LogGroupData logGroupData : logGroupDataList) {
Logs.LogGroup logGroup = logGroupData.GetLogGroup();
List<Logs.Log> logList = logGroup.getLogsList();
for (Logs.Log logText : logList) {
List<Logs.Log.Content> contentList = logText.getContentsList();
RdsLog rdsLog = packageRdsLog(contentList);
if (rdsLog != null) {
if (isSave(rdsLog)) {
rdsLogList.add(rdsLog);
}
}
}
}
log.info("prepare to save rdsLog , rdsLogList size : {}", rdsLogList.size());
esService.saveBulk(rdsLogList);
String nextCursor = pullLogsResponse.getNextCursor();
if (cursor.equals(nextCursor)) {
break;
}
cursor = nextCursor;
}
long endNowTime = System.currentTimeMillis();
log.info("end time : {},all count {} ms", endNowTime, (endNowTime - startNowTime));
} catch (Exception e) {
log.error("exception time : {} ms", System.currentTimeMillis());
e.printStackTrace();
}
}
}, 0, 4, TimeUnit.HOURS);
}
/**
* 过滤设置,可以自定义
* @param rdsLog
* @return
*/
private static boolean isSave(RdsLog rdsLog){
if(rdsLog == null){
return false;
}else{
if(isPrefixToSave(rdsLog.getSql())){
return true;
}else{
return false;
}
}
}
private static boolean isPrefixToSave(String sql){
if(sql==null || sql.length()==0){
return false;
}
return (sql.startsWith("select") || sql.startsWith("SELECT"));
}
/**
* 封装RdsLog
* @param contentList
* @return
*/
private static RdsLog packageRdsLog(List<Logs.Log.Content> contentList){
RdsLog rdsLog = new RdsLog();
if(contentList==null || contentList.size()<=0){
return null;
}
rdsLog.setCreatetime(System.currentTimeMillis());
for(Logs.Log.Content content:contentList){
if("ip".equals(content.getKey())){
rdsLog.setIp(content.getValue());
} else if("db".equals(content.getKey())){
rdsLog.setDb(content.getValue());
}else if("sql".equals(content.getKey())){
rdsLog.setSql(content.getValue());
}else if("user".equals(content.getKey())){
rdsLog.setUser(content.getValue());
}else if("origin_time".equals(content.getKey())){
rdsLog.setOrigin_time(Long.valueOf(content.getValue()));
}else if("hash".equals(content.getKey())){
rdsLog.setHash(Long.valueOf(content.getValue()));
}else if("root".equals(content.getKey())){
rdsLog.setRoot(Integer.valueOf(content.getValue()));
}else if("return_rows".equals(content.getKey())){
rdsLog.setReturn_rows(Long.valueOf(content.getValue()));
}else if("isbind".equals(content.getKey())){
rdsLog.setIsbind(Integer.valueOf(content.getValue()));
}else if("fail".equals(content.getKey())){
rdsLog.setFail(Integer.valueOf(content.getValue()));
}else if("latency".equals(content.getKey())){
rdsLog.setLatency(Long.valueOf(content.getValue()));
}else if("tid".equals(content.getKey())){
rdsLog.setTid(Long.valueOf(content.getValue()));
}else if("check_rows".equals(content.getKey())){
rdsLog.setCheck_rows(Long.valueOf(content.getValue()));
}else if("update_rows".equals(content.getKey())){
rdsLog.setUpdate_rows(Long.valueOf(content.getValue()));
}else{
continue;
}
}
return rdsLog;
}
}
总结
以上即为我实现的功能,但是其实还是有很多优化的地方,比如保存到ES的那个程序会因为网络波动而导致client失效,这就影响数据的保存。还有rds日志很庞大,可以用kafka先缓存消息,然后用程序慢慢消费。