elasticsearch 基础DSL查询语句示例和对应Java API使用

501 阅读9分钟

ES基础查询语法以及对应Java API(基于org.elasticsearch.client#rest-high-level;7.10.2版本)简单示例

查看所有索引信息

GET /_cat/indices?v

查看索引字段mapping

GET /[indexName]/_mapping

根据id查询记录

GET /[indexName]/_doc/[id]  // 这里是根据id精确匹配的

按条件查询(全部)

GET /[indexName]/_search
{
  "query": {
    "match_all": {}
  }
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
searchSourceBuilder.query(matchAllQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

分页查询

GET /[indexName]/_search
{
  "query": {
    "match_all": {}
  },
  "from": 1,
  "size": 5
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
searchSourceBuilder.query(matchAllQueryBuilder);
searchSourceBuilder.from(1);
searchSourceBuilder.size(5);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

排序

GET /[indexName]/_search
{
  "query": {
    "match_all": {}
  },
  "sort": [
    {
      "age": {
        "order": "asc/desc"
      }
    }
  ]
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
searchSourceBuilder.query(matchAllQueryBuilder);
searchSourceBuilder.sort("age", SortOrder.DESC);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

term查询(term是代表完全匹配,即不进行分词器分析,文档中必须包含contains整个搜索的词汇)

GET /[indexName]/_search
{
  "query": {
    "term": {
      "age": 20 // 对于text类型的字段,要使用term,建议加上keyword
    }
  }
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("age", 20);
searchSourceBuilder.query(termQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

terms 查询

GET /[indexName]/_search
{
  "query": {
    "terms": {
      "name": ["州", "海"] // terms中相当于or
    }
  }
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
TermsQueryBuilder termQueryBuilder = QueryBuilders.termsQuery("name", "州", "海");
searchSourceBuilder.query(termQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

按分词查询 (Match Query)

GET /[indexName]/_search
{
  "query": {
    "match": {
      "name" : "张三"  // 这里是模糊匹配,例如name字段中张和三分词匹配到任何一个就行
    }
  }
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("name", "张三");
searchSourceBuilder.query(matchQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

多字段查询(Multi-match Query)

GET /[indexName]/_search
{
  "query": {
    "multi_match": {
      "query": "张三",
      "fields": ["name", "address"]  // 只要name和address任意一个匹配到就可以,匹配规则和match一样
    }
  }
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MultiMatchQueryBuilder matchQueryBuilder = QueryBuilders.multiMatchQuery("张三", "name", "address");
searchSourceBuilder.query(matchQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

范围查询

GET /[indexName]/_search
{
  "query": {
    "range": {
      "age": {
        "gte": 50,
        "lte": 100
      }
    }
  }
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("age").gte(50).lte(100);
searchSourceBuilder.query(rangeQuery);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

match phrase(它要求查询字符串中的短语按照顺序完整匹配字段内容)

GET /[indexName]/_search
{
  "query": {
    "match_phrase": {
      "name" : "张三"  // 按短语查询,不再利用分词技术,直接用短语在原始数据中匹配。例如索引中name字段值为张三,你用张或张三搜索能搜到,用小张三或张五是无法匹配出结果的
    }
  }
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MatchPhraseQueryBuilder matchPhraseQuery = QueryBuilders.matchPhraseQuery("name", "张三");
searchSourceBuilder.query(matchPhraseQuery);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

布尔查询(Bool Query)

GET /[indexName]/_search
{
  "query": {
    "bool": {
      "must": [  // must 两个条件必须同时满足
        { "match_phrase": { "name": "张三" } },
        { "range": { "age": { "gte": 50 } } }
      ]
    }
  }
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery()
                .must(QueryBuilders.matchPhraseQuery("name", "张三"))
                .must(QueryBuilders.rangeQuery("age").gte(50));
searchSourceBuilder.query(boolQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

组合查询 (must,should,must_not)

GET /[indexName]/_search
{
  "query": {
    "bool": {
      "must": [  // must表示必须满足的条件
        {
          "match_phrase": {"name": "张三"}  
        }
      ],
      "should": [  // should表示满足其中一个即可
        {
          "range": {"age": {"gte": 100}}
        },
        {
          "range": {"height": {"gt": 150}}
        }
      ],
      "must_not": [ // must_not表示必须不满足的条件
        {
          "term": {"address.keyword": "铜锣湾"}
        }
      ]
    }
  }
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery()
                .minimumShouldMatch(1)
                .must(QueryBuilders.matchPhraseQuery("name", "张三"))
                .should(QueryBuilders.rangeQuery("age").gte(100))
                .should(QueryBuilders.rangeQuery("height").gte(150))
                .mustNot(QueryBuilders.termQuery("address.keyword", "铜锣湾"));
searchSourceBuilder.query(boolQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

filter

// filter与query的区别:filter只根据搜索条件过滤出符合的文档, 将这些文档的评分固定为1, 不计算相关度分数。query先查询符合搜索条件的文档, 然后计算每个文档对于搜索条件的相关度分数, 再根据评分倒序排序。
// 使用场景:如果对搜索结果有排序的要求, 要将最匹配的文档排在最前面, 就用query;如果只是根据一定的条件筛选出部分数据, 不关注结果的排序, 就用filter
// 原则上来说,使用query做全文本搜索或其他需要进行相关性评分。剩下的全部用filter语句
GET /[indexName]/_search
{
  "query": {
    "bool": {
      "must": {
        "match": {"name": "张三"}
      },
      "filter": {
        "bool": {
          "must": [{"match_phrase": {"address.keyword": "苏州"}}],
          "should": [
            {"range": {"age": {"gte": 20}}},
            {"range": {"height": {"gt": 155}}}]
        }
      }
    }
  }
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder innerBoolQuery = QueryBuilders.boolQuery().minimumShouldMatch(1)
                .must(QueryBuilders.matchPhraseQuery("address.keyword", "苏州"))
                .should(QueryBuilders.rangeQuery("age").gte(20))
                .should(QueryBuilders.rangeQuery("height").gt(155));
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery()
                .must(QueryBuilders.matchQuery("name", "张三"))
                .filter(innerBoolQuery);
searchSourceBuilder.query(boolQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

聚合统计

GET /[indexName]/_search
{
  "size": 0, // size = 0,代表不想返回query查询结果,只要统计结果
  "query": { // 设置query查询条件,后面的aggs统计,仅对query查询结果进行统计
     "range": {
      "age": {
        "gte": 20,
        "lte": 30
      }
    }
  },
  "aggs": { // 统计query查询结果, 默认情况如果不写query语句,则代表统计所有数据
    "avg_height": { // 聚合查询身高,计算身高平均值
      "avg": {
        "field": "height"
      }
    },
    "min_height": { // 聚合查询身高,计算身高最小值
      "min": { 
        "field": "height" 
      }
    },
    "max_height": { // 聚合查询身高,计算身高最大值
      "max": { 
        "field": "height"
      }
    }
  }
}
// 对应Java代码
RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("age").gte(20).lte(30);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
                .size(0)
                .query(rangeQuery)  // stats一次计算出count max min avg sum
                .aggregation(AggregationBuilders.stats("heightStats").field("height"));
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

分组聚合

GET /order/_search
{
  "size": 0, // size = 0,代表不想返回query查询结果,只要统计结果
  "aggs": {
    "shop": { // 聚合查询的名字,随便取个名字
      "terms": { // 聚合类型为: terms
        "field": "shop_id" // 根据shop_id字段值,分桶
      }
    }
  }
}
​
// 等价SQLselect shop_id, count(*) from order group by shop_id
ES常用的桶聚合如下:
Terms聚合 - 类似SQLgroup by,根据字段唯一值分组
Histogram聚合 - 根据数值间隔分组,例如: 价格按100间隔分组,0100200300等等
Date histogram聚合 - 根据时间间隔分组,例如:按月、按天、按小时分组
Range聚合 - 按数值范围分组,例如: 0-150一组,150-200一组,200-500一组。
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
                .size(0)
                .aggregation(AggregationBuilders.terms("shop").field("shop_id"));
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

高亮

GET /[indexName]/_search
{
  "query": {
    "match": {
      "name": "张三"
    }
  },
  "highlight": {
    "fields": {
      "name": {}
    },
    "pre_tags": "<font color='red'>",
    "post_tags": "</font>"
  }
}
// 对应Java代码
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.preTags("<font color='red'>");
highlightBuilder.postTags("</font>");
highlightBuilder.field("name");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
                .query(QueryBuilders.matchQuery("name", "张三"))
                .highlighter(highlightBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

exist query

GET /[indexName]/_search
{
    "query": {
        "exists": {
            "field": "notexistfield" // exists查询,用以返回字段存在值的记录,默认情况下只有字段的值为null或者[]的时候,elasticsearch才会认为字段不存在
        }
    }
}
​
ExistsQueryBuilder builder = QueryBuilders.existsQuery("notexistfield");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(builder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

分页查询-scroll(scroll分页不支持跳页查询,且非实时的)

GET /[indexName]/_search?scroll=1m  // 保持游标查询窗口一分钟。这个查询的返回结果包括一个字段 _scroll_id, 它是一个base64编码的长字符串 。传递字段 _scroll_id 到 _search/scroll 查询接口获取下一批结果
{
    "query": { "match_all": {}},
    "sort" : ["_doc"],  // 关键字 _doc 是最有效的排序顺序
    "size":  1000  // 尽管我们指定字段 size 的值为1000,我们有可能取到超过这个值数量的文档。 当查询的时候, 字段 size 作用于单个分片,所以每个批次实际返回的文档数量最大为 size * number_of_primary_shards 。
}
​
GET /_search/scroll
{
    "scroll": "1m", 
    "scroll_id" : "cXVlcnlUaGVuRmV0Y2g7NTsxMDk5NDpkUmpiR2FjOFNhNnlCM1ZDMWpWYnRROzEwOTk1OmRSamJHYWM4U2E2eUIzVkMxalZidFE7MTA5OTM6ZFJqYkdhYzhTYTZ5QjNWQzFqVmJ0UTsxMTE5MDpBVUtwN2lxc1FLZV8yRGVjWlI2QUVBOzEwOTk2OmRSamJHYWM4U2E2eUIzVkMxalZidFE7MDs="
}
​
DELETE /_search/scroll  // 别忘了清除scroll_id。scroll的搜索上下文会在scroll的保留时间截止后自动清除,但是scroll是非常消耗资源的,所以一个建议就是当不需要了scroll数据的时候,尽可能快的把scroll_id显式删除掉
{
  "scroll_id" : "FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFmR2N2VQTTlDUmZPU0puajA3NlZTX2cAAAAAAApMaxY2ckdQbGZfQlJiU2JLSVpRREtCOERn"
}
Scroll scroll = new Scroll(TimeValue.timeValueMinutes(1L));
MatchAllQueryBuilder builder = QueryBuilders.matchAllQuery();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(builder).size(1000).sort("_doc");
SearchRequest searchRequest=new SearchRequest().indices("index").source(searchSourceBuilder).scroll(scroll); 
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
​
String scrollId = searchResponse.getScrollId();
SearchHit[] hits = searchResponse.getHits().getHits();
while (hits != null && hits.length > 0) {
    SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
    scrollRequest.scroll(scroll);
    SearchResponse rp = restHighLevelClient.scroll(scrollRequest, RequestOptions.DEFAULT);
    hits = rp.getHits().getHits();
}
​
// 清除scroll
ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
clearScrollRequest.addScrollId(scrollId);
restHighLevelClient.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
注意(官方不建议使用scroll):
We no longer recommend using the scroll API for deep pagination. If you need to preserve the index state while paging through more than 10,000 hits, use the search_after parameter with a point in time (PIT).
​
不再建议使用滚动API进行深度分页。如果在分页超过10000次点击时需要保留索引状态,请将search_after参数与时间点(PIT)一起使用
​
https://www.elastic.co/guide/en/elasticsearch/reference/7.17/paginate-search-results.html

分页查询-search after(不支持跳页查询)

// search_after 不支持自由跳转到随机页面。它与 scroll API 非常相似,但也有所不同,search_after 参数是无状态的,它始终针对最新版本的搜索器进行解析。因此,排序顺序可能会在执行期间发生变化,具体取决于索引的更新和删除。
GET /[indexName]/_search
{
    "query": {
        "match": {
            "title": "elasticsearch"
        }
    },
    "sort": [
        {"date": "asc"},
        {"tie_breaker_id": "asc"}      
    ]
}
// 返回信息
{
  "took" : 17,
  "timed_out" : false,
  "_shards" : ...,
  "hits" : {
    "total" : ...,
    "max_score" : null,
    "hits" : [
      ...
      {
        "_index" : "index",
        "_id" : "654322",
        "_score" : null,
        "_source" : ...,
        "sort" : [
          1463538855,
          "654322"
        ]
      },
      {
        "_index" : "index",
        "_id" : "654323",
        "_score" : null,
        "_source" : ...,
        "sort" : [                                
          1463538857,
          "654323"
        ]
      }
    ]
  }
}
//  要检索下一页的结果,请重复请求,获取上次命中的最后一条数据的排序值,并将这些值插入search_after数组
GET /[indexName]/_search
{
    "query": {
        "match": {
            "title": "elasticsearch"
        }
    },
    "search_after": [1463538857, "654323"],
    "sort": [
        {"date": "asc"},
        {"tie_breaker_id": "asc"}
    ]
}
// 使用 point in time (PIT) 分页查询。使用search_after需要多个具有相同查询和排序值的搜索请求。如果在这些请求之间进行刷新,结果的顺序可能会发生变化,从而导致页面之间的结果不一致。为了防止这种情况发生,可以创建一个时间点(PIT)来在搜索中保留当前索引状态。
// 创建PIT
POST /my-index-000001/_pit?keep_alive=1m
// 查询
GET /_search
{
  "size": 10000,
  "query": {
    "match" : {
      "user.id" : "elkbee"
    }
  },
  "pit": {
    "id":  "46ToAwMDaWR5BXV1aWQyKwZub2RlXzMAAAAAAAAAACoBYwADaWR4BXV1aWQxAgZub2RlXzEAAAAAAAAAAAEBYQADaWR5BXV1aWQyKgZub2RlXzIAAAAAAAAAAAwBYgACBXV1aWQyAAAFdXVpZDEAAQltYXRjaF9hbGw_gAAAAA==", 
    "keep_alive": "1m"
  },
  "sort": [ 
    {"@timestamp": {"order": "asc", "format": "strict_date_optional_time_nanos", "numeric_type" : "date_nanos" }}
  ]
}
// 返回信息
{
  "pit_id" : "46ToAwMDaWR5BXV1aWQyKwZub2RlXzMAAAAAAAAAACoBYwADaWR4BXV1aWQxAgZub2RlXzEAAAAAAAAAAAEBYQADaWR5BXV1aWQyKgZub2RlXzIAAAAAAAAAAAwBYgACBXV1aWQyAAAFdXVpZDEAAQltYXRjaF9hbGw_gAAAAA==", 
  "took" : 17,
  "timed_out" : false,
  "_shards" : ...,
  "hits" : {
    "total" : ...,
    "max_score" : null,
    "hits" : [
      ...
      {
        "_index" : "my-index-000001",
        "_id" : "FaslK3QBySSL_rrj9zM5",
        "_score" : null,
        "_source" : ...,
        "sort" : [                                
          "2021-05-20T05:30:04.832Z",
          4294967298                              
        ]
      }
    ]
  }
}
// 使用search after查询
GET /_search
{
  "size": 10000,
  "query": {
    "match" : {
      "user.id" : "elkbee"
    }
  },
  "pit": {
    "id":  "46ToAwMDaWR5BXV1aWQyKwZub2RlXzMAAAAAAAAAACoBYwADaWR4BXV1aWQxAgZub2RlXzEAAAAAAAAAAAEBYQADaWR5BXV1aWQyKgZub2RlXzIAAAAAAAAAAAwBYgACBXV1aWQyAAAFdXVpZDEAAQltYXRjaF9hbGw_gAAAAA==", 
    "keep_alive": "1m"
  },
  "sort": [
    {"@timestamp": {"order": "asc", "format": "strict_date_optional_time_nanos"}}
  ],
  "search_after": [                                
    "2021-05-20T05:30:04.832Z",
    4294967298
  ],
  "track_total_hits": false    // 禁用对总数据量的跟踪以加快分页                    
}
// 删除pit
DELETE /_pit
{
    "id" : "46ToAwMDaWR5BXV1aWQyKwZub2RlXzMAAAAAAAAAACoBYwADaWR4BXV1aWQxAgZub2RlXzEAAAAAAAAAAAEBYQADaWR5BXV1aWQyKgZub2RlXzIAAAAAAAAAAAwBYgACBXV1aWQyAAAFdXVpZDEAAQltYXRjaF9hbGw_gAAAAA=="
}

script脚本(未完待续)