大数据开发Elasticsearch查询详解(第五十九篇)

482 阅读6分钟

这是我参与「掘金日新计划 · 2 月更文挑战」的第 8 天,点击查看活动详情

一、Elasticsearch查询详解

  1. 查询单条数据可以使用Get
  2. 查询一批满足条件的数据需要使用Search
准备数据:

curl -XPUT 'http://192.168.234.100:9200/user'

curl -H "Content-Type:application/json" -XPOST 'http://192.168.234.100:9200/user/_doc/1' -d '{"name":"zhangsan","age":18}'

1.1、Java代码
  1. pom文件引入

    <dependency>
        <groupId>org.elasticsearch.client</groupId>
        <artifactId>elasticsearch-rest-high-level-client</artifactId>
        <version>7.17.6</version>
    </dependency><dependency>
        <groupId>org.apache.logging.log4j</groupId>
        <artifactId>log4j-core</artifactId>
        <version>2.20.0</version>
    </dependency>
    
  2. log4j2配置

    appender.console.type = Console
    appender.console.name = LogToConsole
    appender.console.layout.type = PatternLayout
    appender.console.layout.pattern = [%-5level] %d{yyyy-MM-dd HH:mm:ss.SSS} [%t] %c{1} - %msg%n
    ​
    rootLogger.level = info
    rootLogger.appenderRef.stdout.ref = LogToConsole
    
  3. java代码

    package com.strivelearn.es;
    ​
    import java.io.IOException;
    ​
    import org.apache.http.HttpHost;
    import org.apache.lucene.search.TotalHits;
    import org.elasticsearch.action.search.SearchRequest;
    import org.elasticsearch.action.search.SearchResponse;
    import org.elasticsearch.client.RequestOptions;
    import org.elasticsearch.client.RestClient;
    import org.elasticsearch.client.RestHighLevelClient;
    import org.elasticsearch.search.SearchHit;
    import org.elasticsearch.search.SearchHits;
    ​
    /**
     * @author strivelearn
     * @version EsSearchOp.java, 2023年02月26日
     */
    public class EsSearchOp {
        public static void main(String[] args) throws IOException {
            RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(new HttpHost("192.168.234.100", 9200)));
            SearchRequest searchRequest = new SearchRequest();
            // 指定索引库,支持指定一个或者多个,也支持通配符,例如:user*
            searchRequest.indices("user");
            SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
            // 获取查询返回的结果
            SearchHits hits = searchResponse.getHits();
            // 获取数据总量
            TotalHits totalHits = hits.getTotalHits();
            System.out.println("totalHits = " + totalHits);
            // 迭代解析具体的内容
            for (SearchHit hit : hits.getHits()) {
                String sourceAsString = hit.getSourceAsString();
                System.out.println("sourceAsString = " + sourceAsString);
            }
            client.close();
        }
    }
    
  4. 执行结果

    totalHits = 1 hits
    sourceAsString = {"name":"zhangsan","age":18}
    

二、ES查询扩展

在ES查询数据的时候,我们可以在searchRequest中指定一些参数,实现过滤、分页、排序、高亮等功能。

2.1、过滤功能

使用到的测试数据

curl -H "Content-Type:application/json" -XPOST 'http://192.168.234.100:9200/user/_doc/100' -d '{"name":"刘德华","age":36}'
curl -H "Content-Type:application/json" -XPOST 'http://192.168.234.100:9200/user/_doc/200' -d '{"name":"刘能","age":41}'
curl -H "Content-Type:application/json" -XPOST 'http://192.168.234.100:9200/user/_doc/300' -d '{"name":"zhangsan","age":18}'
package com.strivelearn.es;
​
import org.apache.http.HttpHost;
import org.apache.lucene.search.TotalHits;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
​
import java.io.IOException;
​
/**
 * The type Es search op.
 *
 * @author strivelearn
 * @version EsSearchOp.java, 2023年02月26日
 */
public class EsSearchOp {
   /**
     * The entry point of application.
     *
     * @param args the input arguments
     * @throws IOException the io exception
     */
    public static void main(String[] args) throws IOException {
        RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(new HttpHost("192.168.234.100", 9200)));
        SearchRequest searchRequest = new SearchRequest();
        // 指定索引库,支持指定一个或者多个,也支持通配符,例如:user*
        searchRequest.indices("user");
​
        SearchSourceBuilder searchSourceBuilder = getSearchSourceDontParticiple();
​
        searchRequest.source(searchSourceBuilder);
​
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        // 获取查询返回的结果
        SearchHits hits = searchResponse.getHits();
        // 获取数据总量
        TotalHits totalHits = hits.getTotalHits();
        System.out.println("totalHits = " + totalHits);
        // 迭代解析具体的内容
        for (SearchHit hit : hits.getHits()) {
            String sourceAsString = hit.getSourceAsString();
            System.out.println("sourceAsString = " + sourceAsString);
        }
        client.close();
    }
​
    /**
     * 查询条件
     *
     * @return the search source builder
     */
    private static SearchSourceBuilder getSearchSourceBuilder() {
        // 执行查询条件
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        // 查询所有,可以不指定,默认是查询索引库中所有的数据
        searchSourceBuilder.query(QueryBuilders.matchAllQuery());
        // 对指定字段的值进行过滤。在查询数据的时候会对数据进行分词
        // 如果指定多个query,后面的query会覆盖前面的query
        // 针对字符串类型内容的查询,不支持通配符
        // searchSourceBuilder.query(QueryBuilders.matchQuery("name", "zhangsan"));
​
        // 针对字符串类型内容的查询,支持通配符,但是性能比较差,类似于mysql的全表扫描
        searchSourceBuilder.query(QueryBuilders.wildcardQuery("name", "zhang*"));
        return searchSourceBuilder;
    }
​
    /**
     * 区间查询
     * 有from+to
     * 或者
     * gt,gte,lt,lte
     *
     * @return the search source builder
     */
    private static SearchSourceBuilder getSearchSourceIntervalBuilder() {
        // 执行查询条件
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        // to的值设置为null,表示不设置边界
        searchSourceBuilder.query(QueryBuilders.rangeQuery("age")
                                               .from(18)
                                               .to(20));
        return searchSourceBuilder;
    }
​
    /**
     * 多个条件,and对应es里面是(must) 或者or 操作 对应es里面是(should)
     *
     * @return the search source multi builder
     */
    private static SearchSourceBuilder getSearchSourceMultiBuilder() {
        // // 执行查询条件
        // SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        // searchSourceBuilder.query(QueryBuilders.boolQuery()
        //                                        .should(QueryBuilders.matchQuery("name", "zhangsan"))
        //                                        .should(QueryBuilders.matchQuery("age", "18")));
​
        // // 执行or查询条件,设置权重。权重越大排在前面
        // SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        // searchSourceBuilder.query(QueryBuilders.boolQuery()
        //                                        .should(QueryBuilders.matchQuery("name", "zhangsan")
        //                                                             .boost(5.0f))
        //                                        .should(QueryBuilders.matchQuery("age", "18")
        //                                                             .boost(1.0f)));
​
        // 执行and查询条件
        // 对多个指定字段的值进行过滤,注意:多个字段的数据类型必须一致,否则会报错,如果查询的字段不存在不会报错
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        searchSourceBuilder.query(QueryBuilders.multiMatchQuery("zhangsan", "name", "name2"));
        return searchSourceBuilder;
    }
​
    /**
     * es查询基于Lucene语法的测试
     *
     * @return the search source multi builder
     */
    private static SearchSourceBuilder getSearchSourceByLucene() {
        // 执行and查询条件
        // 对多个指定字段的值进行过滤,注意:多个字段的数据类型必须一致,否则会报错,如果查询的字段不存在不会报错
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        // searchSourceBuilder.query(QueryBuilders.queryStringQuery("name:zhangsan AND age:[15 TO 18]"));
        // 上面的lucene的语法对应的是es语法如下
        // searchSourceBuilder.query(QueryBuilders.boolQuery()
        //                                        .must(QueryBuilders.matchQuery("name", "zhangsan"))
        //                                        .must(QueryBuilders.rangeQuery("age")
        //                                                           .from(15)
        //                                                           .to(18)));
​
        // lucene支持通配符
        searchSourceBuilder.query(QueryBuilders.queryStringQuery("name:zhang*"));
        return searchSourceBuilder;
    }
}
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.matchQuery("name", "刘德华"));
return searchSourceBuilder;

ES在查询的时候会对name进行分词,查询出来的结果:

totalHits = 2 hits
sourceAsString = {"name":"刘德华","age":36}
sourceAsString = {"name":"刘能","age":41}

因为matchQuery默认会根据分词的结果进行or操作,满足任意一个词语的数据都会查询出来。

正常情况下想要使用termQuery实现精确查询的字段不能进行分词,但是有时候会遇到某个字段已经分词建立索引了,后期还想要精确查询。重新建立索引也不现实。此时可以用queryStringQuery,也可以使用matchQuery的operator的操作。原理也就是对分词的结果实现and操作。

searchSourceBuilder.query(QueryBuilders.queryStringQuery("name:"刘德华""));
searchSourceBuilder.query(QueryBuilders.matchQuery("name", "刘德华")
                                       .operator(Operator.AND));
2.2、分页、排序功能
// 分页功能
// 设置每页的起始位置,默认是0
searchSourceBuilder.from(0);
// 设置每页的数据量,默认是10
searchSourceBuilder.size(10);
​
// 排序功能
searchSourceBuilder.sort("age", SortOrder.DESC);

值得注意的是,数字类型,不需要分词,而字符串类型,默认是会被分词,所以不支持排序和聚合操作。如果想要根据这些会被分词的字段进行排序或者聚合,需要指定使用它们的keyword类型,这个类型表示不会对数据进行分词

searchSourceBuilder.sort("name.keyword", SortOrder.DESC);

keyword类型的特性其实也适用于精确查询的场景,可以在matchQuery中指定字段的keyword类型实现精确查询,不管在建立索引的时候有没有被分词

2.3、高亮功能

针对用户搜索的一些关键词,如果结果里面匹配到了,那最终在页面展示结果的时候,把这些关键字标红,高亮显示,这样看起来比较清晰

// 支持多个高亮字段,使用多个field方法指定即可
HighlightBuilder highlightBuilder = new HighlightBuilder().field("name");
highlightBuilder.preTags("<font color='red'>");
highlightBuilder.postTags("</font>");
searchSourceBuilder.highlighter(highlightBuilder);
// 迭代解析具体的内容
for (SearchHit hit : hits.getHits()) {
    String sourceAsString = hit.getSourceAsString();
    System.out.println("sourceAsString = " + sourceAsString);
    // 获取高亮字段内容
    Map<String, HighlightField> highlightFields = hit.getHighlightFields();
    HighlightField highlightField = highlightFields.get("name");
    if (highlightField != null) {
        Text[] fragments = highlightField.getFragments();
        String nameValue = "";
        for (Text fragment : fragments) {
            nameValue += fragment;
        }
        System.out.println("高亮内容:" + nameValue);
    }
}
totalHits = 1 hits
sourceAsString = {"name":"刘德华","age":36}
高亮内容:<font color='red'>刘</font><font color='red'>德</font><font color='red'>华</font>
2.4、聚合查询
姓名科目成绩
zhangsan语文59
zhangsan数学88
lisi语文89
lisi数学88
curl -H "Content-Type:application/json" -XPOST 'http://192.168.234.100:9200/score/_doc/1' -d '{"name":"zhangsan","subject":"语文","score":59}'
curl -H "Content-Type:application/json" -XPOST 'http://192.168.234.100:9200/score/_doc/2' -d '{"name":"zhangsan","subject":"数学","score":88}'
curl -H "Content-Type:application/json" -XPOST 'http://192.168.234.100:9200/score/_doc/3' -d '{"name":"lisi","subject":"语文","score":89}'
curl -H "Content-Type:application/json" -XPOST 'http://192.168.234.100:9200/score/_doc/4' -d '{"name":"lisi","subject":"数学","score":88}'
package com.strivelearn.es;
​
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.Sum;
import org.elasticsearch.search.builder.SearchSourceBuilder;
​
import java.io.IOException;
import java.util.List;
​
/**
 * @author strivelearn
 * @version EsAggOp.java, 2023年02月28日
 */
public class EsAggOp {
    public static void main(String[] args) throws IOException {
        RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(new HttpHost("192.168.234.100", 9200)));
        SearchRequest searchRequest = new SearchRequest();
        // 指定索引库,支持指定一个或者多个,也支持通配符,例如:user*
        searchRequest.indices("score");
​
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        TermsAggregationBuilder aggregation = AggregationBuilders.terms("name_term")
                                                                 // 指定分组字段,如果是字符串类型,需要指定使用keyword类型
                                                                 .field("name.keyword")
                                                                 // 指定求sum,也支持avg、min、max
                                                                 .subAggregation(AggregationBuilders.sum("sum_score")
                                                                                                    .field("score"));
        searchSourceBuilder.aggregation(aggregation);
​
        searchRequest.source(searchSourceBuilder);
​
        // 执行查询操作
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        Terms nameTerm = searchResponse.getAggregations()
                                       .get("name_term");
        List<? extends Terms.Bucket> buckets = nameTerm.getBuckets();
        for (Terms.Bucket bucket : buckets) {
            Sum sumScore = bucket.getAggregations()
                                 .get("sum_score");
            System.out.println(bucket.getKey() + "--" + sumScore.getValue());
        }
        client.close();
    }
}
lisi--177.0
zhangsan--147.0

注意的是:要获取所有的分组的数据,需要额外指定。当然20是你知道有多少数据,你可以指定Integer.MAX_VALUE来表示不知道多少数据返回。实际工作中不推荐

AggregationBuilders.terms("age_term")
                   .field("age")
                   // 获取指定分组的个数的数据
                   .size(20);