ES基础查询语法以及对应Java API(基于org.elasticsearch.client#rest-high-level;7.10.2版本)简单示例
查看所有索引信息
GET /_cat/indices?v
查看索引字段mapping
GET /[indexName]/_mapping
根据id查询记录
GET /[indexName]/_doc/[id] // 这里是根据id精确匹配的
按条件查询(全部)
GET /[indexName]/_search
{
"query": {
"match_all": {}
}
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
searchSourceBuilder.query(matchAllQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
分页查询
GET /[indexName]/_search
{
"query": {
"match_all": {}
},
"from": 1,
"size": 5
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
searchSourceBuilder.query(matchAllQueryBuilder);
searchSourceBuilder.from(1);
searchSourceBuilder.size(5);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
排序
GET /[indexName]/_search
{
"query": {
"match_all": {}
},
"sort": [
{
"age": {
"order": "asc/desc"
}
}
]
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
searchSourceBuilder.query(matchAllQueryBuilder);
searchSourceBuilder.sort("age", SortOrder.DESC);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
term查询(term是代表完全匹配,即不进行分词器分析,文档中必须包含contains整个搜索的词汇)
GET /[indexName]/_search
{
"query": {
"term": {
"age": 20 // 对于text类型的字段,要使用term,建议加上keyword
}
}
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("age", 20);
searchSourceBuilder.query(termQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
terms 查询
GET /[indexName]/_search
{
"query": {
"terms": {
"name": ["州", "海"] // terms中相当于or
}
}
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
TermsQueryBuilder termQueryBuilder = QueryBuilders.termsQuery("name", "州", "海");
searchSourceBuilder.query(termQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
按分词查询 (Match Query)
GET /[indexName]/_search
{
"query": {
"match": {
"name" : "张三" // 这里是模糊匹配,例如name字段中张和三分词匹配到任何一个就行
}
}
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("name", "张三");
searchSourceBuilder.query(matchQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
多字段查询(Multi-match Query)
GET /[indexName]/_search
{
"query": {
"multi_match": {
"query": "张三",
"fields": ["name", "address"] // 只要name和address任意一个匹配到就可以,匹配规则和match一样
}
}
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MultiMatchQueryBuilder matchQueryBuilder = QueryBuilders.multiMatchQuery("张三", "name", "address");
searchSourceBuilder.query(matchQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
范围查询
GET /[indexName]/_search
{
"query": {
"range": {
"age": {
"gte": 50,
"lte": 100
}
}
}
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("age").gte(50).lte(100);
searchSourceBuilder.query(rangeQuery);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
match phrase(它要求查询字符串中的短语按照顺序完整匹配字段内容)
GET /[indexName]/_search
{
"query": {
"match_phrase": {
"name" : "张三" // 按短语查询,不再利用分词技术,直接用短语在原始数据中匹配。例如索引中name字段值为张三,你用张或张三搜索能搜到,用小张三或张五是无法匹配出结果的
}
}
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MatchPhraseQueryBuilder matchPhraseQuery = QueryBuilders.matchPhraseQuery("name", "张三");
searchSourceBuilder.query(matchPhraseQuery);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
布尔查询(Bool Query)
GET /[indexName]/_search
{
"query": {
"bool": {
"must": [ // must 两个条件必须同时满足
{ "match_phrase": { "name": "张三" } },
{ "range": { "age": { "gte": 50 } } }
]
}
}
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery()
.must(QueryBuilders.matchPhraseQuery("name", "张三"))
.must(QueryBuilders.rangeQuery("age").gte(50));
searchSourceBuilder.query(boolQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
组合查询 (must,should,must_not)
GET /[indexName]/_search
{
"query": {
"bool": {
"must": [ // must表示必须满足的条件
{
"match_phrase": {"name": "张三"}
}
],
"should": [ // should表示满足其中一个即可
{
"range": {"age": {"gte": 100}}
},
{
"range": {"height": {"gt": 150}}
}
],
"must_not": [ // must_not表示必须不满足的条件
{
"term": {"address.keyword": "铜锣湾"}
}
]
}
}
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery()
.minimumShouldMatch(1)
.must(QueryBuilders.matchPhraseQuery("name", "张三"))
.should(QueryBuilders.rangeQuery("age").gte(100))
.should(QueryBuilders.rangeQuery("height").gte(150))
.mustNot(QueryBuilders.termQuery("address.keyword", "铜锣湾"));
searchSourceBuilder.query(boolQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
filter
// filter与query的区别:filter只根据搜索条件过滤出符合的文档, 将这些文档的评分固定为1, 不计算相关度分数。query先查询符合搜索条件的文档, 然后计算每个文档对于搜索条件的相关度分数, 再根据评分倒序排序。
// 使用场景:如果对搜索结果有排序的要求, 要将最匹配的文档排在最前面, 就用query;如果只是根据一定的条件筛选出部分数据, 不关注结果的排序, 就用filter
// 原则上来说,使用query做全文本搜索或其他需要进行相关性评分。剩下的全部用filter语句
GET /[indexName]/_search
{
"query": {
"bool": {
"must": {
"match": {"name": "张三"}
},
"filter": {
"bool": {
"must": [{"match_phrase": {"address.keyword": "苏州"}}],
"should": [
{"range": {"age": {"gte": 20}}},
{"range": {"height": {"gt": 155}}}]
}
}
}
}
}
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder innerBoolQuery = QueryBuilders.boolQuery().minimumShouldMatch(1)
.must(QueryBuilders.matchPhraseQuery("address.keyword", "苏州"))
.should(QueryBuilders.rangeQuery("age").gte(20))
.should(QueryBuilders.rangeQuery("height").gt(155));
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery()
.must(QueryBuilders.matchQuery("name", "张三"))
.filter(innerBoolQuery);
searchSourceBuilder.query(boolQueryBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
聚合统计
GET /[indexName]/_search
{
"size": 0, // size = 0,代表不想返回query查询结果,只要统计结果
"query": { // 设置query查询条件,后面的aggs统计,仅对query查询结果进行统计
"range": {
"age": {
"gte": 20,
"lte": 30
}
}
},
"aggs": { // 统计query查询结果, 默认情况如果不写query语句,则代表统计所有数据
"avg_height": { // 聚合查询身高,计算身高平均值
"avg": {
"field": "height"
}
},
"min_height": { // 聚合查询身高,计算身高最小值
"min": {
"field": "height"
}
},
"max_height": { // 聚合查询身高,计算身高最大值
"max": {
"field": "height"
}
}
}
}
// 对应Java代码
RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("age").gte(20).lte(30);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.size(0)
.query(rangeQuery) // stats一次计算出count max min avg sum
.aggregation(AggregationBuilders.stats("heightStats").field("height"));
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
分组聚合
GET /order/_search
{
"size": 0, // size = 0,代表不想返回query查询结果,只要统计结果
"aggs": {
"shop": { // 聚合查询的名字,随便取个名字
"terms": { // 聚合类型为: terms
"field": "shop_id" // 根据shop_id字段值,分桶
}
}
}
}
// 等价SQL:select shop_id, count(*) from order group by shop_id
ES常用的桶聚合如下:
Terms聚合 - 类似SQL的group by,根据字段唯一值分组
Histogram聚合 - 根据数值间隔分组,例如: 价格按100间隔分组,0、100、200、300等等
Date histogram聚合 - 根据时间间隔分组,例如:按月、按天、按小时分组
Range聚合 - 按数值范围分组,例如: 0-150一组,150-200一组,200-500一组。
// 对应Java代码
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.size(0)
.aggregation(AggregationBuilders.terms("shop").field("shop_id"));
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
高亮
GET /[indexName]/_search
{
"query": {
"match": {
"name": "张三"
}
},
"highlight": {
"fields": {
"name": {}
},
"pre_tags": "<font color='red'>",
"post_tags": "</font>"
}
}
// 对应Java代码
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.preTags("<font color='red'>");
highlightBuilder.postTags("</font>");
highlightBuilder.field("name");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.query(QueryBuilders.matchQuery("name", "张三"))
.highlighter(highlightBuilder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
exist query
GET /[indexName]/_search
{
"query": {
"exists": {
"field": "notexistfield" // exists查询,用以返回字段存在值的记录,默认情况下只有字段的值为null或者[]的时候,elasticsearch才会认为字段不存在
}
}
}
ExistsQueryBuilder builder = QueryBuilders.existsQuery("notexistfield");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(builder);
SearchRequest searchRequest = new SearchRequest().indices("index").source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
分页查询-scroll(scroll分页不支持跳页查询,且非实时的)
GET /[indexName]/_search?scroll=1m // 保持游标查询窗口一分钟。这个查询的返回结果包括一个字段 _scroll_id, 它是一个base64编码的长字符串 。传递字段 _scroll_id 到 _search/scroll 查询接口获取下一批结果
{
"query": { "match_all": {}},
"sort" : ["_doc"], // 关键字 _doc 是最有效的排序顺序
"size": 1000 // 尽管我们指定字段 size 的值为1000,我们有可能取到超过这个值数量的文档。 当查询的时候, 字段 size 作用于单个分片,所以每个批次实际返回的文档数量最大为 size * number_of_primary_shards 。
}
GET /_search/scroll
{
"scroll": "1m",
"scroll_id" : "cXVlcnlUaGVuRmV0Y2g7NTsxMDk5NDpkUmpiR2FjOFNhNnlCM1ZDMWpWYnRROzEwOTk1OmRSamJHYWM4U2E2eUIzVkMxalZidFE7MTA5OTM6ZFJqYkdhYzhTYTZ5QjNWQzFqVmJ0UTsxMTE5MDpBVUtwN2lxc1FLZV8yRGVjWlI2QUVBOzEwOTk2OmRSamJHYWM4U2E2eUIzVkMxalZidFE7MDs="
}
DELETE /_search/scroll // 别忘了清除scroll_id。scroll的搜索上下文会在scroll的保留时间截止后自动清除,但是scroll是非常消耗资源的,所以一个建议就是当不需要了scroll数据的时候,尽可能快的把scroll_id显式删除掉
{
"scroll_id" : "FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFmR2N2VQTTlDUmZPU0puajA3NlZTX2cAAAAAAApMaxY2ckdQbGZfQlJiU2JLSVpRREtCOERn"
}
Scroll scroll = new Scroll(TimeValue.timeValueMinutes(1L));
MatchAllQueryBuilder builder = QueryBuilders.matchAllQuery();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(builder).size(1000).sort("_doc");
SearchRequest searchRequest=new SearchRequest().indices("index").source(searchSourceBuilder).scroll(scroll);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
String scrollId = searchResponse.getScrollId();
SearchHit[] hits = searchResponse.getHits().getHits();
while (hits != null && hits.length > 0) {
SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
scrollRequest.scroll(scroll);
SearchResponse rp = restHighLevelClient.scroll(scrollRequest, RequestOptions.DEFAULT);
hits = rp.getHits().getHits();
}
// 清除scroll
ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
clearScrollRequest.addScrollId(scrollId);
restHighLevelClient.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
注意(官方不建议使用scroll):
We no longer recommend using the scroll API for deep pagination. If you need to preserve the index state while paging through more than 10,000 hits, use the search_after parameter with a point in time (PIT).
不再建议使用滚动API进行深度分页。如果在分页超过10000次点击时需要保留索引状态,请将search_after参数与时间点(PIT)一起使用
https://www.elastic.co/guide/en/elasticsearch/reference/7.17/paginate-search-results.html
分页查询-search after(不支持跳页查询)
// search_after 不支持自由跳转到随机页面。它与 scroll API 非常相似,但也有所不同,search_after 参数是无状态的,它始终针对最新版本的搜索器进行解析。因此,排序顺序可能会在执行期间发生变化,具体取决于索引的更新和删除。
GET /[indexName]/_search
{
"query": {
"match": {
"title": "elasticsearch"
}
},
"sort": [
{"date": "asc"},
{"tie_breaker_id": "asc"}
]
}
// 返回信息
{
"took" : 17,
"timed_out" : false,
"_shards" : ...,
"hits" : {
"total" : ...,
"max_score" : null,
"hits" : [
...
{
"_index" : "index",
"_id" : "654322",
"_score" : null,
"_source" : ...,
"sort" : [
1463538855,
"654322"
]
},
{
"_index" : "index",
"_id" : "654323",
"_score" : null,
"_source" : ...,
"sort" : [
1463538857,
"654323"
]
}
]
}
}
// 要检索下一页的结果,请重复请求,获取上次命中的最后一条数据的排序值,并将这些值插入search_after数组
GET /[indexName]/_search
{
"query": {
"match": {
"title": "elasticsearch"
}
},
"search_after": [1463538857, "654323"],
"sort": [
{"date": "asc"},
{"tie_breaker_id": "asc"}
]
}
// 使用 point in time (PIT) 分页查询。使用search_after需要多个具有相同查询和排序值的搜索请求。如果在这些请求之间进行刷新,结果的顺序可能会发生变化,从而导致页面之间的结果不一致。为了防止这种情况发生,可以创建一个时间点(PIT)来在搜索中保留当前索引状态。
// 创建PIT
POST /my-index-000001/_pit?keep_alive=1m
// 查询
GET /_search
{
"size": 10000,
"query": {
"match" : {
"user.id" : "elkbee"
}
},
"pit": {
"id": "46ToAwMDaWR5BXV1aWQyKwZub2RlXzMAAAAAAAAAACoBYwADaWR4BXV1aWQxAgZub2RlXzEAAAAAAAAAAAEBYQADaWR5BXV1aWQyKgZub2RlXzIAAAAAAAAAAAwBYgACBXV1aWQyAAAFdXVpZDEAAQltYXRjaF9hbGw_gAAAAA==",
"keep_alive": "1m"
},
"sort": [
{"@timestamp": {"order": "asc", "format": "strict_date_optional_time_nanos", "numeric_type" : "date_nanos" }}
]
}
// 返回信息
{
"pit_id" : "46ToAwMDaWR5BXV1aWQyKwZub2RlXzMAAAAAAAAAACoBYwADaWR4BXV1aWQxAgZub2RlXzEAAAAAAAAAAAEBYQADaWR5BXV1aWQyKgZub2RlXzIAAAAAAAAAAAwBYgACBXV1aWQyAAAFdXVpZDEAAQltYXRjaF9hbGw_gAAAAA==",
"took" : 17,
"timed_out" : false,
"_shards" : ...,
"hits" : {
"total" : ...,
"max_score" : null,
"hits" : [
...
{
"_index" : "my-index-000001",
"_id" : "FaslK3QBySSL_rrj9zM5",
"_score" : null,
"_source" : ...,
"sort" : [
"2021-05-20T05:30:04.832Z",
4294967298
]
}
]
}
}
// 使用search after查询
GET /_search
{
"size": 10000,
"query": {
"match" : {
"user.id" : "elkbee"
}
},
"pit": {
"id": "46ToAwMDaWR5BXV1aWQyKwZub2RlXzMAAAAAAAAAACoBYwADaWR4BXV1aWQxAgZub2RlXzEAAAAAAAAAAAEBYQADaWR5BXV1aWQyKgZub2RlXzIAAAAAAAAAAAwBYgACBXV1aWQyAAAFdXVpZDEAAQltYXRjaF9hbGw_gAAAAA==",
"keep_alive": "1m"
},
"sort": [
{"@timestamp": {"order": "asc", "format": "strict_date_optional_time_nanos"}}
],
"search_after": [
"2021-05-20T05:30:04.832Z",
4294967298
],
"track_total_hits": false // 禁用对总数据量的跟踪以加快分页
}
// 删除pit
DELETE /_pit
{
"id" : "46ToAwMDaWR5BXV1aWQyKwZub2RlXzMAAAAAAAAAACoBYwADaWR4BXV1aWQxAgZub2RlXzEAAAAAAAAAAAEBYQADaWR5BXV1aWQyKgZub2RlXzIAAAAAAAAAAAwBYgACBXV1aWQyAAAFdXVpZDEAAQltYXRjaF9hbGw_gAAAAA=="
}