SpringBoot 整合elasticsearch

1,666 阅读5分钟

1、前言

es的客户端连接有几种方式:

  • REST:

执行REST风格的HTTP请求。7.x版本开始弃用TransportClient,并用高级rest客户端替代。

  • TransportClient:

作为外部访问者,请求ES的集群。5.x前主要的连接方式,作为rest客户端的过渡版本。

  • NodeClient

作为ES集群的一个节点,它是ES中的一环,其他的节点对它是感知的。通常不建议使用。

springBoot本身提供了spring-data-elasticsearch来连接es,但是由于es版本更新较快,目前最新版的spring-data-elasticsearch也只有3.1.10.RELEASE,对应es版本是6.4.3,而且提供的ElasticsearchTemplate类(当然spring-data-elasticsearch也提供了elasticsearch-rest-client包,但版本也是6.4.3,这个版本的api不全,而且跟我们直接引用rest包一样需要自己另写方法使用),实际上还是使用TransportClient或NodeClient来进行操作,因此不是很建议使用spring-data-elasticsearch,更建议直接使用elasticsearch-rest-high-level-client,以下也是基于elasticsearch-rest-high-level-client来操作es,注意es客户端的版本需要跟es集群版本对应,我这里是使用7.3.1

2、引入依赖

        <!--注意:如果使用了parent那么需要在此定义es版本号,因为spring-boot-start-parent中已经定义了es相关依赖的版本号 
		,high-level-client中的部分依赖会被覆盖成低版本的,导出出现莫名其妙的错误 -->
	<dependencyManagement>
		<dependencies>
			<dependency>
				<groupId>org.elasticsearch.client</groupId>
				<artifactId>elasticsearch-rest-high-level-client</artifactId>
				<version>7.3.1</version>
			</dependency>
			<!-- https://mvnrepository.com/artifact/org.elasticsearch/elasticsearch -->
			<dependency>
				<groupId>org.elasticsearch</groupId>
				<artifactId>elasticsearch</artifactId>
				<version>7.3.1</version>
			</dependency>
			<!--&lt;!&ndash; https://mvnrepository.com/artifact/org.elasticsearch.client/elasticsearch-rest-client 
				&ndash;&gt; -->
			<dependency>
				<groupId>org.elasticsearch.client</groupId>
				<artifactId>elasticsearch-rest-client</artifactId>
				<version>7.3.1</version>
			</dependency>
		</dependencies>
	</dependencyManagement>

<dependencies>
        <dependency>
			<groupId>org.elasticsearch.client</groupId>
			<artifactId>elasticsearch-rest-high-level-client</artifactId>
		</dependency>
</dependencies>

3、编写config配置类

@Configuration
public class EsConfig {

	@Value("${elasticsearch.nodes}")
	private List<String> nodes;

	@Value("${elasticsearch.schema}")
	private String schema;

	@Value("${elasticsearch.max-connect-total}")
	private Integer maxConnectTotal;

	@Value("${elasticsearch.max-connect-per-route}")
	private Integer maxConnectPerRoute;

	@Value("${elasticsearch.connection-request-timeout-millis}")
	private Integer connectionRequestTimeoutMillis;

	@Value("${elasticsearch.socket-timeout-millis}")
	private Integer socketTimeoutMillis;

	@Value("${elasticsearch.connect-timeout-millis}")
	private Integer connectTimeoutMillis;

	@Bean
	public RestHighLevelClient getRestHighLevelClient() {

		List<HttpHost> httpHosts = new ArrayList<>();
		//解析节点
		for (String node : nodes) {
			String[] parts = StringUtils.split(node, ":");
			Assert.notNull(parts, "Must defined");
			Assert.state(parts.length == 2, "Must be defined as 'host:port'");
			httpHosts.add(new HttpHost(parts[0], Integer.parseInt(parts[1]), schema));
		}
		//创建builder
		HttpHost[] httpHostArr = new HttpHost[httpHosts.size()];
		httpHosts.toArray(httpHostArr);
		RestClientBuilder builder =RestClient.builder(httpHostArr);
		//设置请求超时时间
		builder.setRequestConfigCallback(requestConfigBuilder -> {
			requestConfigBuilder.setConnectTimeout(connectTimeoutMillis);
			requestConfigBuilder.setSocketTimeout(socketTimeoutMillis);
			requestConfigBuilder.setConnectionRequestTimeout(connectionRequestTimeoutMillis);
			return requestConfigBuilder;
		});
		//设置连接
		builder.setHttpClientConfigCallback(httpClientBuilder -> {
			httpClientBuilder.setMaxConnTotal(maxConnectTotal);
			httpClientBuilder.setMaxConnPerRoute(maxConnectPerRoute);
			return httpClientBuilder;
		});
		return new RestHighLevelClient(builder);

	}

}

4、编写操作工具类

@Component
public class EsUtils {
    private static Logger logger = LoggerFactory.getLogger(EsUtils.class);
    
    @Resource
    private RestHighLevelClient client;
    
    /**
     * 插入或更新文档
     * 
     * @param index
     * @param es
     */
    public void insertOrUpdateOne(String index, EsEntity es) {
        IndexRequest request = new IndexRequest(index);
        request.id(es.getId());
        request.source(JSON.toJSONString(es.getData()), XContentType.JSON);
        try {
            client.index(request, RequestOptions.DEFAULT);
        } catch (Exception e) {
            logger.error("更新数据失败", e);
        }
    }
    
    /**
     * 批量插入文档
     * 
     * @param index
     * @param list
     */
    public void insertBatch(String index, List<EsEntity> list) {
        BulkRequest request = new BulkRequest();
        list.forEach(item -> request.add(
            new IndexRequest(index).id(item.getId()).source(JSON.toJSONString(item.getData()), XContentType.JSON)));
        try {
            client.bulk(request, RequestOptions.DEFAULT);
        } catch (Exception e) {
            logger.error("批量插入数据失败", e);
        }
    }
    
    /**
     * 根据id 批量删除文档
     * 
     * @param index
     * @param idList
     */
    public void deleteBatch(String index, List<String> idList) {
        BulkRequest request = new BulkRequest();
        idList.forEach(id -> request.add(new DeleteRequest(index, id)));
        try {
            client.bulk(request, RequestOptions.DEFAULT);
        } catch (Exception e) {
            logger.error("批量删除数据失败", e);
        }
    }
    
    /**
     * 查询数据,SearchSourceBuilder默认查询长度是10,最大长度是10000,不适合用于全量数据查询
     * 
     * @param <T> 返回数据类型
     * @param index 索引名称
     * @param builder 查询builder对象
     * @param c 返回数据类型
     * @return
     */
    public <T> List<T> search(String index, QueryBuilder queryBuilder, Class<T> c) {
        List<T> res = new ArrayList<>();
        try {
            //创建查询对象
            SearchSourceBuilder searchBuilder = new SearchSourceBuilder();
            searchBuilder.query(queryBuilder);//设置查询条件对象
            //创建查询请求对象
            SearchRequest request = new SearchRequest(index);
            request.source(searchBuilder);
            SearchResponse response = client.search(request, RequestOptions.DEFAULT);
            SearchHit[] hits = response.getHits().getHits();
            
            for (SearchHit hit : hits) {
                res.add(JSON.parseObject(hit.getSourceAsString(), c));
            }
        } catch (Exception e) {
            logger.error("批量查询数据失败", e);
        }
        return res;
    }
    
    /**
     * 滚动获取全量数据
     * 
     * @param <T> 返回数据类型
     * @param index 索引名称
     * @param queryBuilder 查询builder对象
     * @param sliceBuilder 分片对象,为空查询全部数据
     * @param c 返回数据类型
     * @return
     */
    public <T> List<T> searchByScroll(String index, QueryBuilder queryBuilder, SliceBuilder sliceBuilder, Class<T> c) {
        List<T> res = new ArrayList<>();
        try {
            //创建查询对象
            SearchSourceBuilder searchBuilder = new SearchSourceBuilder();
            if (sliceBuilder != null) {
                searchBuilder.slice(sliceBuilder);//设置分片对象
            }
            searchBuilder.query(queryBuilder);//设置查询条件对象
            searchBuilder.size(10000);//每次返回10000条数据
            //创建查询请求对象
            SearchRequest searchRequest = new SearchRequest(index);
            searchRequest.source(searchBuilder);
            searchRequest.scroll(TimeValue.timeValueMinutes(1L));//游标缓存时间
            //第一次查询
            SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
            String scrollId = searchResponse.getScrollId();//游标
            SearchHit[] searchHits = searchResponse.getHits().getHits();
            logger.info("总数{}", searchResponse.getHits().getTotalHits());
            while (searchHits != null && searchHits.length > 0) {
                for (SearchHit hit : searchHits) {
                    res.add(JSON.parseObject(hit.getSourceAsString(), c));
                }
                //设置游标点,查询
                SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
                scrollRequest.scroll(TimeValue.timeValueMinutes(1L));
                searchResponse = client.scroll(scrollRequest, RequestOptions.DEFAULT);
                scrollId = searchResponse.getScrollId();
                searchHits = searchResponse.getHits().getHits();
            }
        } catch (Exception e) {
            logger.error("滚动查询数据失败", e);
        }
        return res;
    }
    
    /**
     * 多线程滚动查询数据,适用于大数据量全量查询
     * 
     * @param <T> 返回数据类型
     * @param index 索引名称
     * @param queryBuilder 查询builder对象
     * @param sliceNum 线程数,需大于1,建议为索引分片数
     * @param c 返回数据类型
     * @return
     */
    public <T> List<T> searchBySliceScroll(String index, QueryBuilder queryBuilder, int sliceNum, Class<T> c) {
        //结果列表
        List<T> totalRes = new ArrayList<>();
        //线程结果列表
        List<Future<List<T>>> futureList = new ArrayList<>();
        //线程池
        ExecutorService sliceTask = Executors.newFixedThreadPool(sliceNum);
        //分线程查询数据
        try {
            for (int i = 0; i < sliceNum; i++) {
                final int spliceId = i;
                Future<List<T>> future = sliceTask.submit(() -> {
                    //创建查询分片对象
                    SliceBuilder sliceBuilder = new SliceBuilder(spliceId, sliceNum);
                    //滚动查询该分片数据
                    return searchByScroll(index, queryBuilder, sliceBuilder, c);
                });
                futureList.add(future);
            }
        } catch (Exception e) {
            logger.error("多线程滚动查询数据失败", e);
        } finally {
            //关闭线程池
            sliceTask.shutdown();
        }
        //汇总数据
        for (Future<List<T>> fs : futureList) {
            try {
                List<T> subRes = fs.get(60, TimeUnit.SECONDS);
                totalRes.addAll(subRes);
            } catch (Exception e) {
                logger.error("整合数据失败", e);
            } finally {
                fs.cancel(true);
            }
        }
        return totalRes;
    }
    
}

注意es每次查询默认返回10条数据,最多也只能返回10000条(可以通过修改集群配置调整,但是不建议),因此对于大数据量的查询,建议使用scroll游标分多次查询;如果想进一步提供效率,还可以结合slice分片多线程查询,slice会在查询时,根据分片总数、分片id和文档id,将索引的文档数据分类到每个分片,只返回该分片下的文档数据。同时查询分片总数建议为索引分片数,不建议大于索引分片数,因为会影响数据分类的效率。