springboot 2.x集成elasticSearch依赖工具类创建索引-1111111111111111111

依赖

<dependency>
  <groupId>org.elasticsearch.client</groupId>
  <artifactId>elasticsearch-rest-client</artifactId>
  <version>7.17.28</version>
</dependency>

    <dependency>
      <groupId>co.elastic.clients</groupId>
      <artifactId>elasticsearch-java</artifactId>
      <version>7.17.28</version>
      <exclusions>
        <exclusion>
          <groupId>org.elasticsearch.client</groupId>
          <artifactId>elasticsearch-rest-client</artifactId>
        </exclusion>
      </exclusions>
    </dependency>

    <dependency>
      <groupId>com.fasterxml.jackson.core</groupId>
      <artifactId>jackson-databind</artifactId>
      <version>2.17.0</version>
    </dependency>
    <dependency>
      <groupId>jakarta.json</groupId>
      <artifactId>jakarta.json-api</artifactId>
      <version>2.0.1</version>
    </dependency>

工具类

package org.example.esbak;

import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Configuration;

@Configuration
public class EsConfig {


    public static String url;

    public static String apiKeyValue;

    public static String userName;

    public static String password;

    public static String ip;

    public static Integer port;



    @Value("${elasticsearch.url:localhost:9200}")
    public void setUrl(String url) {
        this.url = url;
    }


    @Value("${elasticsearch.apiKeyValue:aWxkbFVwWUItaVZFbVpXZEJTc0c6U1dnQ0J2OFdULW1CcVE4YTAxN3BvQQ==}")
    public void setApiKey(String apiKeyValue) {
        this.apiKeyValue = apiKeyValue;
    }


    @Value("${elasticsearch.username:elastic}")
    public  void setUserName(String userName) {
        this.userName = userName;
    }

    @Value("${elasticsearch.password:123456}")
    public  void setPassword(String password) {
        this.password = password;
    }

    @Value("${elasticsearch.ip:localhost}")
    public  void setIp(String ip) {
        this.ip = ip;
    }

    @Value("${elasticsearch.port:9200}")
    public  void setPort(Integer port) {
        this.port = port;
    }
}

package org.example.esbak;

import co.elastic.clients.elasticsearch.ElasticsearchClient;
import co.elastic.clients.json.jackson.JacksonJsonpMapper;
import co.elastic.clients.transport.ElasticsearchTransport;
import co.elastic.clients.transport.rest_client.RestClientTransport;
import org.apache.http.Header;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.message.BasicHeader;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.springframework.stereotype.Component;

import java.nio.charset.StandardCharsets;
import java.util.Base64;

@Component
public class Es7Client {


    private static ElasticsearchClient restClient = null;

    public ElasticsearchClient getInstance() {
        if (restClient == null) {
            synchronized (this) {
                final CredentialsProvider credentialsProvider =
                        new BasicCredentialsProvider();
                credentialsProvider.setCredentials(AuthScope.ANY,
                        new UsernamePasswordCredentials(EsConfig.userName, EsConfig.password));

                RestClientBuilder builder = RestClient.builder(
                                new HttpHost(EsConfig.ip, EsConfig.port))
                        .setHttpClientConfigCallback(httpClientBuilder -> httpClientBuilder
                                .setDefaultCredentialsProvider(credentialsProvider));
                ElasticsearchTransport transport = new RestClientTransport(
                        builder.build(), new JacksonJsonpMapper());
                restClient = new ElasticsearchClient(transport);
            }
        }
        return restClient;

    }

    public ElasticsearchClient getRestClientByApiKey () {

           if (restClient == null) {
               synchronized (this) {
//                   String apiKeyId = "ildlUpYB-iVEmZWdBSsG";
//                   String apiKeySecret = "SWgCBv8WT-mBqQ8a017poA";
//                   String apiKeyAuth =
//                           Base64.getEncoder().encodeToString(
//                                   (apiKeyId + ":" + apiKeySecret)
//                                           .getBytes(StandardCharsets.UTF_8));
                   RestClientBuilder builder = RestClient.builder(
                           new HttpHost(EsConfig.ip, EsConfig.port, "http"));
                   Header[] defaultHeaders =
                           new Header[]{new BasicHeader("Authorization",
                                   "ApiKey " + EsConfig.apiKeyValue)};
                   builder.setDefaultHeaders(defaultHeaders);
                   ElasticsearchTransport transport = new RestClientTransport(
                           builder.build(), new JacksonJsonpMapper());
                   restClient = new ElasticsearchClient(transport);
               }
           }

        return restClient;
    }

    public static void closeEsClient(){
        try{
            if (restClient != null) {
                restClient.close();
            }
        }catch(Exception e){
          log.error("关闭失败：",e);
        }
    }

}

创建索引

PUT /index_title
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1   
  },
  "mappings": {
    "properties": {
      "title": { "type": "keyword" },      
      "price": { "type": "float" },    
      "create_time": { 
        "type": "date", 
        "format": "yyyy-MM-dd HH:mm:ss" 
      },
	  "note":{"type":"text"}
    }
  }
}

@PostMapping(value = "/esTest")
public void esTest() {

    ElasticsearchClient instance = esClient.getRestClientByApiKey();
    try {
        CreateIndexResponse createIndexResponse = instance.indices().create(c ->
                c.index("index_user_info")
                        .settings(s -> s.numberOfShards("1").numberOfReplicas("3"))
                        .mappings(m -> m.properties("id", p -> p.text(d -> d))
                                .properties("name", p -> p.text(t -> t.analyzer("ik_max_word")))
                                .properties("height", p -> p.double_(d -> d))
                                .properties("createTime", p -> p.date(d -> d.format("yyyy-MM-dd")))
                        )
        );
        boolean acknowledged = createIndexResponse.acknowledged();
        log.info("创建index_user_info索引返回结果：{}",acknowledged);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }


}

DELETE /your_index_name

删除索引

@PostMapping(value = "/deleteIndex/{indexId}")
public void deleteIndex(@PathVariable(value = "indexId")String indexId) {

  // 首先判断索引是否存在，如果存在，则删除
    ExistsRequest existsRequest = ExistsRequest.of(e -> e.index(indexId));
    try {
        BooleanResponse exists = esClient.getInstance().indices().exists(existsRequest);
        if (exists.value()) {
            // 删除索引
            DeleteIndexRequest deleteIndexRequest = DeleteIndexRequest.of(d -> d.index(indexId));
            DeleteIndexResponse delete = esClient.getInstance().indices().delete(deleteIndexRequest);
            if (delete.acknowledged()) {
                log.info("删除索引成功{}",indexId);
            }
        }
    } catch (IOException e) {
        log.error("删除索引{}失败",indexId);
        throw new RuntimeException(e);
    }

}

推送数据

PUT /index_title/_doc/1
{
  "title": "John Ming",
  "price": 60,
  "create_time": "2025-05-14 10:00:00",
  "note": "备注1"
}

GET /index_user_info/_search
{
  "query": {
    "match_all": {}  
  },
  "size": 10         
}

{
  "took" : 858,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 3,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "index_user_info",
        "_type" : "_doc",
        "_id" : "03aNcZYB0cCRs9DKk9c0",
        "_score" : 1.0,
        "_source" : {
          "name" : "张名",
          "id" : "1",
          "height" : 172.1,
          "createTime" : "2025-04-26"
        }
      },
      {
        "_index" : "index_user_info",
        "_type" : "_doc",
        "_id" : "1HaNcZYB0cCRs9DKk9c0",
        "_score" : 1.0,
        "_source" : {
          "name" : "刘高",
          "id" : "2",
          "height" : 175.1,
          "createTime" : "2025-04-26"
        }
      },
      {
        "_index" : "index_user_info",
        "_type" : "_doc",
        "_id" : "1XaNcZYB0cCRs9DKk9c0",
        "_score" : 1.0,
        "_source" : {
          "name" : "陈古",
          "id" : "3",
          "height" : 180.1,
          "createTime" : "2025-04-26"
        }
      }
    ]
  }
}

POST /_bulk
{ "index": { "_index": "index_title", "_id": "4" }}
{ "title": "Data Analytics", "price": 59.99 , "create_time": "2025-05-14 10:00:00","note": "备注1"}
{ "index": { "_index": "index_title", "_id": "5" }}
{ "title": "Data AB", "price": 69.99 , "create_time": "2025-05-14 10:00:00", "note": "备注1"}
{ "index": { "_index": "index_title", "_id": "6" }}
{ "title": "Data AC", "price": 80.99 , "create_time": "2025-05-14 10:00:00","note": "备注1"}
{ "index": { "_index": "index_title", "_id": "7" }}
{ "title": "Data AD", "price": 90.99 , "create_time": "2025-05-14 10:00:00","note": "备注1" }

@PostMapping(value = "/pushData/{indexId}")
public void pushData(@PathVariable(value = "indexId")String indexId) {
    // 模拟数据
    List<UserInfo> userInfoList = new ArrayList<>();
    DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
    LocalDateTime date = LocalDateTime.now();
    String dateStr = formatter.format(date);
    userInfoList.add(new UserInfo().setId("1").setName("张名").setHeight(172.1).setCreateTime(dateStr));
    userInfoList.add(new UserInfo().setId("2").setName("刘高").setHeight(175.1).setCreateTime(dateStr));
    userInfoList.add(new UserInfo().setId("3").setName("陈古").setHeight(180.1).setCreateTime(dateStr));

    BulkRequest.Builder builder = new BulkRequest.Builder();
    for (UserInfo userInfo: userInfoList) {
       builder.operations(op -> op.index(idx -> idx.index(indexId).document(userInfo)));
    }
    try {
        BulkResponse bulk = esClient.getInstance().bulk(builder.build());
        boolean errors = bulk.errors();
        if (errors) {
            log.error("推送失败{}",  JSON.toJSONString(bulk));
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

删除数据

已知_id的情况下

DELETE /索引名称/_doc/文档ID

DELETE /index_title/_doc/1

先查询，在删除

  POST /index_title/_delete_by_query
{
  "query" :{
    "term" :{
      "title": "John Do"
    }
  }
}

批量删除

  POST /_bulk
{ "delete": { "_index": "index_title", "_id": "8" }}
{ "delete": { "_index": "index_title", "_id": "9" }}

// 删除数据
@PostMapping(value = "/deleteData/{indexId}")
public void  deleteData(@PathVariable(value = "indexId")String indexId) {

    // id为docId
    DeleteRequest deleteRequest = DeleteRequest.of(dr -> dr.index(indexId).id("1"));
    try {
        DeleteResponse delete = esClient.getInstance().
                delete(deleteRequest);
        if (delete != null) {
            Boolean b = delete.forcedRefresh();
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
// 先查询，在删除
DeleteByQueryRequest request = DeleteByQueryRequest.of(b -> b
        .index(indexId)
        .query(q -> q
                .term(t -> t
                        .field("id")
                        .value(v -> v.stringValue("1"))
                )
        )
);

try {
    DeleteByQueryResponse deleteByQueryResponse = esClient.getInstance().deleteByQuery(request);
    Long deleted = deleteByQueryResponse.deleted();
    log.error("删除的个数{}",deleted);
} catch (Exception e) {

}
}

批量删除

// 批量删除
@PostMapping(value = "/batchDeleteData/{indexId}")
public void  batchDeleteData(@PathVariable(value = "indexId")String indexId) {

    // 文档ID删除

    List<String> ids = new ArrayList<>();
    ids.add("4");
    ids.add("5");
    BulkRequest.Builder builder = new BulkRequest.Builder();
    for (String a: ids) {
        // id为文档的_id
        builder.operations(op -> op.delete(idx -> idx.index(indexId).id(a)));
    }
    // 执行批量操作
    try {
        BulkResponse bulk = esClient.getInstance().bulk(builder.build());

        // 异常处理
        if (bulk.errors()) {
            bulk.items().forEach(e->{
                if (e.error() != null) {
                    log.error("删除失败 {} {}",e.id(),e.error().reason());
                }
            });
        }

    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // 先查询，再删除
    DeleteByQueryRequest request = DeleteByQueryRequest.of(b -> b
            .index(indexId)
            .query(q -> q
                    .terms(t -> t
                            .field("id")
                            .terms(ts -> ts
                                    .value(ids.stream()
                                            .map(FieldValue::of).collect(Collectors.toList()))
                            )
                    )
            )
    );
    try {
        DeleteByQueryResponse deleteByQueryResponse = esClient.getInstance().deleteByQuery(request);
        Long deleted = deleteByQueryResponse.deleted();
        log.error("删除的个数{}",deleted);
    } catch (Exception e) {
    }
}

查询数据

普通查询： Term（精确匹配）、Match（全文匹配）、Range（范围查询）等，通常仅支持单一条件查询
BoolQuery查询：通过 must（AND）、should（OR）、must_not（NOT）、filter（过滤，不计算相关性得分）等子句组合多个查询逻辑
区别：Match Query 仅支持分词后的全文检索，Term Query 仅支持未分词的精确匹配，‌BoolQuery‌ 可以混合不同字段、不同查询类型的条件，结合 term（精确匹配 keyword 字段）与 match_phrase（短语匹配 text 字段），在 filter 子句中使用 range 过滤数值或日期范围

分页查询的方式

page/size 分页：这是较常用的一种分页方式，使用简单，page指定页号， size指定返回的文档数，但有记录数10000的限制（from+size<=10000），且当越往后翻页时，性能越差，即不适合进行深分页
search after 分页： search after 利用游标来帮我们解决实时滚动的问题。搜索时需要指定排序，并且保证排序是唯一的，第一排序不唯一时，可使用多排序，以消除排序不稳定的现象，若业务字段无法保证排序唯一性，可以通过最后加入 _id 来保证排序唯一性。由于是滚动分页，因此不支持跳页
scroll 分页：这也是滚动分页的一种方式，类似search after，但这种分页方式在分页遍历过程中，新增的数据不会插进结果集中， scroll 分页最大的缺点是不支持高并发场景，仅适用于管理后台数据导出等少量人员操作的场景，返回值中有scroll_id是有时效的， scroll=1m表示生成后1分钟内有效

查询语法介绍

match语法

GET /index_user_info/_search
{
  "query": {
    "match": {"name":"陈古"}  
  }
}

{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 3,
      "relation" : "eq"
    },
    "max_score" : 2.0572429,
    "hits" : [
      {
        "_index" : "index_user_info",
        "_type" : "_doc",
        "_id" : "1XaNcZYB0cCRs9DKk9c0",
        "_score" : 2.0572429,
        "_source" : {
          "name" : "陈古",
          "id" : "3",
          "height" : 180.1,
          "createTime" : "2025-04-26"
        }
      },
      {
        "_index" : "index_user_info",
        "_type" : "_doc",
        "_id" : "9clkx5YBNRvU4PL57GzP",
        "_score" : 1.7460635,
        "_source" : {
          "name" : "陈古51",
          "id" : "9",
          "height" : 180.1,
          "createTime" : "2025-05-13"
        }
      },
      {
        "_index" : "index_user_info",
        "_type" : "_doc",
        "_id" : "grnOw5YBwm5zQPEO_DwH",
        "_score" : 1.7460635,
        "_source" : {
          "name" : "陈古5",
          "id" : "6",
          "height" : 180.1,
          "createTime" : "2025-05-12"
        }
      }
    ]
  }
}

match:基础的全文检索，对查询文本进行分词后匹配分词后默认使用 OR 逻辑匹配任意词项（可通过 operator 设置为 AND）不关注词项顺序和位置

GET /index_user_info/_search
{
  "query": {
    "match": {"name":"陈古"}  
  }
}

match_all:匹配所有文档,无查询条件，通常结合过滤或分页使用

GET /index_user_info/_search
{
  "query": {
    "match_all": {}  
  }
}

match_phrase:精确短语匹配，要求词项顺序一致且位置相邻, 支持 slop 参数，允许词项间有一定间隔（默认 slop=0）

GET /index_user_info/_search
{
  "query": {
    "match_phrase": {
      "name": {
        "query": "李开复",
        "slop": 1,
        "analyzer": ""
      }
    }  
  }
}

match_phrase_prefix:短语匹配，但最后一个词项匹配前缀适用于实现“搜索即输入”的自动补全功能, 通过 max_expansions 限制前缀扩展数量（默认 50）

GET /index_user_info/_search
{
  "query": {
    "match_phrase_prefix": {
      "name": {
        "query": "陈古",
        "max_expansions": 10
      }
    }  
  }
}

multi_match:在多个字段上执行同一查询, 可通过 type 指定匹配策略（如 best_fields、most_fields）, 可对字段加权（如 title^3）

GET /index_user_info/_search
{
  "query": {
    "multi_match": {
      "query": "陈古",
      "fields": ["id","name^2"],
      "type": "most_fields"
    }  
  }
}

term语法

term: 查询直接匹配索引中的原始词项（未经分词处理），要求查询值与文档字段值完全一致, 避免对 text 字段使用 term‌ text 字段默认会被分词处理（如转为小写），直接使用 term 可能无法匹配。若需精确匹配文本，应使用 keyword 类型字段 term 是包含操作，而非等值判断‌ 若字段值为数组（如 "tags": ["search", "open_source"]）， term 会匹配包含该词项的文档，而非要求完全相等

GET /index_user_info/_search
{
  "query": {
    "term": {
      "id": {
        "value": "10"
      }
    } 
  }
}

terms:terms查询用于匹配字段中包含任意一个指定值的文档，适用于同时搜索多个精确值的场景, 与 term 查询类似，terms 直接匹配索引中的原始词项（未经分词），要求查询值与文档字段值完全一致, 查询值需以数组形式传递，支持数值、字符串（keyword 类型）、布尔值等结构化数据 boost‌：调整查询结果的权重，影响相关性评分（需与 bool 查询结合时使用）, 避免对 text 字段使用‌ text 类型字段默认分词存储，若需精确匹配多个值，应使用 .keyword 子字段, ‌查询逻辑：包含而非等值‌ terms 匹配的是字段值包含数组中任意值的文档。若字段值为数组（如 ["apple", "banana"]），查询值与数组元素有交集即匹配, ‌性能优化‌: 使用 constant_score 过滤器跳过评分计算，提高查询速度, 避免一次性传入大量查询值（如超过 1000 个），可能引发性能问题

GET /index_user_info/_search
{
  "query": {
    "terms": {
      "id": [
        "9",
        "10"
      ]
    } 
  }
}

GET /index_user_info/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "terms": {
          "id": [
            "9",
            "10"
          ]
        }
      },
      "boost": 1.2
    }
  }
}

range语法

range： range 查询用于匹配字段值在指定区间内的文档，支持数值、日期、字符串等有序类型字段的筛选示例：筛选 price 在 100 到 500 之间，或 register_date 在 2025-01-01 至 2025-05-13 内的文档支持以下比较操作符定义区间边界： gt：大于 gte：大于等于 lt：小于 lte：小于等于可组合使用（如 gt 与 lte 结合）日期类型字段需指定 format 参数匹配索引中的日期格式（如 yyyy-MM-dd），支持时区修正（time_zone）字段类型匹配‌ 数值类型（integer/long/double）直接比较数值大小日期类型需严格遵循 format 定义格式，否则查询失败支持通配符匹配字段名（如 logs-*-timestamp），但需注意性能影响（避免匹配过多字段）结合 constant_score 过滤器跳过评分计算避免大范围查询（如时间段跨度过长），建议分页或结合时间分区策略


GET /my_index/_search
{
  "query": {
    "range": {
      "<field>": {
        "gte": "<最小值>",  
        "lte": "<最大值>",  
        "format": "yyyy-MM-dd",  // 日期格式（可选）
        "time_zone": "+08:00",   // 时区（可选）
        "boost": 2.0             // 权重（可选）
      }
    }
  }
}

GET /index_user_info/_search
{
  "query": {
    "range": {
      "height": {
        "gte": 10,
        "lte": 200
      }
    }
  }
}

GET /index_user_info/_search
{
  "query": {
    "range": {
      "createTime": {
        "gte": "2025-05-12",
        "lte": "2025-05-12",
        "format": "yyyy-MM-dd",
        "time_zone": "+08:00"
      }
    }
  }
}

复合查询（`bool`）

bool查询：‌布尔逻辑组合‌ bool 查询通过 must（AND）、 should（OR）、 must_not（NOT）、 filter（非评分过滤）四种子句组合多个独立查询条件，实现复杂逻辑筛选示例：筛选 status 为 active（must）、 price 大于 100（must）且不包含标签 expired（must_not）的商品。 ‌灵活的评分机制‌ must 和 should 子句影响文档相关性评分（_score），而 filter 和 must_not 仅过滤文档，不参与评分 filter 子句会启用缓存机制，提升重复查询性能。 ‌嵌套查询支持‌ 可在 bool 子句中嵌套其他 bool 查询，实现多层级逻辑组合（如 (A AND B) OR (C AND D)） ‌性能优化‌: 优先使用 filter 替代 must 处理非评分条件，减少不必要的评分计算避免在 should 子句中添加过多条件，可能降低查询效率 ‌字段类型匹配‌: 精确值匹配（如 status）需使用 term 查询，且字段应为 keyword 类型 text 类型字段需通过 .keyword 子字段进行精确过滤 ‌嵌套查询深度控制‌ 多层嵌套 bool 查询可能增加复杂度，建议结合业务需求简化逻辑

GET /index/_search
{
  "query": {
    "bool": {
      "must": [         // 必须满足的所有条件（AND）
        { "term": { "status": "active" } }
      ],
      "must_not": [     // 必须不满足的条件（NOT）
        { "term": { "tag": "expired" } }
      ],
      "should": [       // 至少满足一个条件（OR）
        { "match": { "description": "urgent" } }
      ],
      "filter": [       // 过滤条件（不评分）
        { "range": { "price": { "gte": 100 } } }
      ],
      "minimum_should_match": 1  // should 子句最小匹配数
    }
  }
}

GET /index_user_info/_search
{
  "query": {
    "bool": {"must": [
      {"term": {
        "id": {
          "value": "9"
        }
      }}
    ]}
  }
}

GET /index_user_info/_search
{
  "query": {
    "bool": {
      
      "must_not": [
        {"term": {
          "name": {
            "value": "陈古"
          }
        }}
      ],
      "should": [
        {"match_phrase_prefix": {
          "name": "刘"
        }}
      ],
      "filter": [
        {"range": {
          "createTime": {
            "gte": "2025-05-12",
            "lte": "2025-05-12"
          }
        }}
      ], 
      "minimum_should_match": 1
      
    }
  }
}

分页和排序实例

‌默认排序规则‌ Elasticsearch 默认按相关性评分 _score 降序排序 ‌自定义排序‌ 支持对以下字段类型排序： ‌数值类型‌：integer、long、double ‌日期类型‌：按时间戳排序（需指定 format） ‌Keyword类型‌：精确值按字典序排序 ‌地理坐标‌：按距离排序（需使用 geo_distance） ‌Text字段排序‌ text 类型字段需通过 .keyword 子字段排序（未经分词处理）
from+size

GET /index_user_info/_search
{
  "from": 0,
  "size": 100,
  "sort": [
    {
      "height": {
        "order": "desc"
      },
      "createTime": {"order": "desc"}
    }
  ],
  "query": {"match": {
    "name": "陈古"
  }}
}


‌使用 .keyword 子字段排序‌
‌原理‌：创建索引时自动生成未分词的 keyword 子字段，保留完整原始值

PUT /my_index
{
  "mappings": {
    "properties": {
      "title": {
        "type": "text",
        "fields": {
          "keyword": {  // 自动生成的 keyword 子字段
            "type": "keyword",
            "ignore_above": 256
          }
        }
      }
    }
  }
}

// 排序时指定 keyword 子字段
GET /my_index/_search
{
  "sort": [
    { "title.keyword": "asc" }  // 按完整原始值排序
  ]
}
‌优点‌：内存占用低，性能稳定。
‌限制‌：原始值超过 ignore_above 长度时会被截断

search_after方式排序

GET /index_user_info/_search
{
  
  "size": 2,
  "query":
  { 
    "match_all": {} 
    
  },
  "sort": [
    { 
      
      "createTime": "desc"    // 排序字段
      
    }, 
    { 
      "_id": "asc"   // 辅助字段
      
    }          
  ]

}

{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [
      {
        "_index" : "index_user_info",
        "_type" : "_doc",
        "_id" : "-Mlox5YBNRvU4PL5mmzf",
        "_score" : null,
        "_source" : {
          "name" : "张志明",
          "id" : "10",
          "height" : 172.1,
          "createTime" : "2025-05-13"
        },
        "sort" : [
          1747094400000,
          "-Mlox5YBNRvU4PL5mmzf"
        ]
      },
      {
        "_index" : "index_user_info",
        "_type" : "_doc",
        "_id" : "-clox5YBNRvU4PL5mmzf",
        "_score" : null,
        "_source" : {
          "name" : "李开复",
          "id" : "11",
          "height" : 175.1,
          "createTime" : "2025-05-13"
        },
        "sort" : [
          1747094400000,
          "-clox5YBNRvU4PL5mmzf"
        ]
      }
    ]
  }
}

 "sort" : [
          1747094400000,
          "-clox5YBNRvU4PL5mmzf"
        ]

GET /index_user_info/_search
{
  
  "size": 2,
  "query":
  { 
    "match_all": {} 
    
  },
  "sort": [
    { 
      
      "createTime": "desc"    
      
    }, 
    { 
      "_id": "asc"   
      
    }          
  ],
  "search_after": [
    1747094400000,
          "-clox5YBNRvU4PL5mmzf"
    ]
}

scroll Scroll 的排序规则仅在‌初始化阶段‌生效，后续滚动请求沿用初始排序 ‌适用场景‌ ‌批量导出‌：全量数据离线处理（如日志备份）。 ‌一致性快照‌：保持排序结果不变（即使索引数据更新） ‌性能优化建议‌ ‌避免高开销排序‌：禁用 _score 计算（"track_scores": false） ‌简化排序字段‌：优先使用 _doc 或数值/日期字段 ‌控制窗口时间‌：合理设置 scroll 参数（如 scroll=2m）避免资源泄漏

POST /logs/_search?scroll=2m
{
  "size": 500,
  "sort": [
    { "timestamp": "desc" },  // 主排序字段（日期类型）
    { "_doc": "asc" }          // 辅助排序（确保唯一性）
  ],
  "query": {
    "range": { "timestamp": { "gte": "2025-05-01" }}
  },
  "track_scores": false  // 关闭相关性评分计算
}
‌响应中携带 `_scroll_id`**‌：用于后续滚动请求
POST /_search/scroll { "scroll": "2m", "scroll_id": "DXF1ZXJ5...==_16" }
（后续请求无需重复指定排序参数）

POST /index_user_info/_search?scroll=1m
{
  
  "size": 2,
   "query": {"match_all": {}},
   "sort": [
     {
       "createTime": {
         "order": "desc"
       },
       "_id" :{
         "order": "asc"
       }
     }
   ],
   "track_scores": false

}

{
  "_scroll_id" : "FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFkdySU0ySm9sUmhPVTVfbjl3NnhIRmcAAAAAAAAtHRYxR0Y0Q0kzLVJUcUFCMW5oUnlnV1J3",
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [
      {
        "_index" : "index_user_info",
        "_type" : "_doc",
        "_id" : "-Mlox5YBNRvU4PL5mmzf",
        "_score" : null,
        "_source" : {
          "name" : "张志明",
          "id" : "10",
          "height" : 172.1,
          "createTime" : "2025-05-13"
        },
        "sort" : [
          1747094400000,
          "-Mlox5YBNRvU4PL5mmzf"
        ]
      },
      {
        "_index" : "index_user_info",
        "_type" : "_doc",
        "_id" : "-clox5YBNRvU4PL5mmzf",
        "_score" : null,
        "_source" : {
          "name" : "李开复",
          "id" : "11",
          "height" : 175.1,
          "createTime" : "2025-05-13"
        },
        "sort" : [
          1747094400000,
          "-clox5YBNRvU4PL5mmzf"
        ]
      }
    ]
  }
}

POST /_search/scroll
{
  "scroll": "2m", 
  "scroll_id" : "FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFkdySU0ySm9sUmhPVTVfbjl3NnhIRmcAAAAAAAAt4hYxR0Y0Q0kzLVJUcUFCMW5oUnlnV1J3"
}

{
  "_scroll_id" : "FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFkdySU0ySm9sUmhPVTVfbjl3NnhIRmcAAAAAAAAt4hYxR0Y0Q0kzLVJUcUFCMW5oUnlnV1J3",
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [
      {
        "_index" : "index_user_info",
        "_type" : "_doc",
        "_id" : "-slox5YBNRvU4PL5mmzf",
        "_score" : null,
        "_source" : {
          "name" : "赵子房",
          "id" : "12",
          "height" : 180.1,
          "createTime" : "2025-05-13"
        },
        "sort" : [
          1747094400000,
          "-slox5YBNRvU4PL5mmzf"
        ]
      },
      {
        "_index" : "index_user_info",
        "_type" : "_doc",
        "_id" : "88lkx5YBNRvU4PL57GzP",
        "_score" : null,
        "_source" : {
          "name" : "张名12",
          "id" : "7",
          "height" : 172.1,
          "createTime" : "2025-05-13"
        },
        "sort" : [
          1747094400000,
          "88lkx5YBNRvU4PL57GzP"
        ]
      }
    ]
  }
}

聚合查询（`aggs`）

‌桶聚合（Bucket Aggregations）
‌指标聚合（Metric Aggregations）
‌管道聚合（Pipeline Aggregations）‌

一、聚合分类与核心功能
‌桶聚合（Bucket Aggregations）‌
将文档按规则分组形成「桶」，类似 SQL 的 GROUP BY

‌常见类型‌：
terms：按字段唯一值分组（需使用 .keyword 字段处理文本）
date_histogram：按时间间隔分组（如按月/小时统计）
range：自定义数值或时间范围分组

// 按 job 字段分组统计（需使用 keyword 类型）
GET /test/_search
{
  "size": 0,
  "aggs": {
    "job_count": {
      "terms": { "field": "job.keyword" }
    }
  }
}
‌指标聚合（Metric Aggregations）‌
对数值字段进行统计计算（如求和、求平均等）

‌核心指标‌：
avg、sum、min、max、stats（综合统计）、value_count（计数）


// 计算价格字段的平均值与最大值
"aggs": {
  "avg_price": { "avg": { "field": "price" } },
  "max_price": { "max": { "field": "price" } }
}

‌管道聚合（Pipeline Aggregations）‌
基于其他聚合结果二次计算（如百分比、移动平均等）47。

// 计算满足条件的文档占比（引用其他聚合结果）
"aggs": {
  "percentage_agg": {
    "bucket_script": {
      "buckets_path": {
        "total": "total_count", 
        "filtered": "filtered_count"
      },
      "script": "params.filtered / params.total * 100"
    }
  }
}
二、组合嵌套与高级用法
‌嵌套聚合‌
桶内嵌套指标或子桶，实现多维分析


// 先按品牌分组，再计算每组平均价格
"aggs": {
  "brand_agg": {
    "terms": { "field": "brand.keyword" },
    "aggs": { "avg_price": { "avg": { "field": "price" } } }
  }
}
‌聚合范围限定‌
结合 query 条件筛选参与聚合的文档


GET /hotel/_search
{
  "query": {
    "range": { "price": { "gte": 200 } }  // 仅统计价格≥200的文档
  },
  "aggs": { ... }
}
‌动态脚本聚合‌
使用 Painless 脚本实现复杂逻辑


// 统计字段非空率（通过脚本判断）
"aggs": {
  "filtered_count": {
    "value_count": {
      "script": "doc['my_field'].size() != 0 ? 1 : 0"
    }
  }
}
三、性能优化与注意事项
‌字段类型限制‌

参与聚合的字段需为 keyword、numeric 或 date 类型，text 类型需转换为 .keyword
‌内存与效率优化‌

设置 size: 0 避免返回原始文档
限制返回桶数量（如 terms 聚合的 size 参数）
‌实时性权衡‌

高频更新的索引聚合结果可能滞后，可通过 refresh_interval 调整刷新频率

例子1

GET /index_title/_search
{
  "size": 0,
  "aggs": {
    "title_count": {
     "terms": {
       "field": "title"
    }
    }
  }
}

{
  "took" : 18,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 9,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "title_count" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Data AC",
          "doc_count" : 2
        },
        {
          "key" : "Data AD",
          "doc_count" : 2
        },
        {
          "key" : "Data AB",
          "doc_count" : 1
        },
        {
          "key" : "Data Analytics",
          "doc_count" : 1
        },
        {
          "key" : "Elasticsearch Guide",
          "doc_count" : 1
        },
        {
          "key" : "John Doe",
          "doc_count" : 1
        },
        {
          "key" : "John Ming",
          "doc_count" : 1
        }
      ]
    }
  }
}

例子2

GET /index_title/_search
{
  "size": 0,
  "aggs": {
    "title_count": {
     "terms": {
       "field": "note.keyword"
    }
    }
  }
}

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 9,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "title_count" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [ ]
    }
  }
}

例子3

GET /index_title/_search
{
  "size": 0,
  "aggs": {
    "avg_price": {
     "avg": {
       "field": "price"
    }
    }
  }
}

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 9,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "avg_price" : {
      "value" : 71.65777693854437
    }
  }
}

例子4

GET /index_title/_search
{
  "size": 0,
  "aggs": {
    "max_price": {
     "max": {
       "field": "price"
    }
    }
  }
}

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 9,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "max_price" : {
      "value" : 103.98999786376953
    }
  }
}

例子5，先分组，再求平均值

GET /index_title/_search
{
  "size": 0,
  "aggs": {
  "title_agg": {
    "terms": { "field": "title" },
    "aggs": { "avg_price": { "avg": { "field": "price" } } }
  }
}
}

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 9,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "title_agg" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Data AC",
          "doc_count" : 2,
          "avg_price" : {
            "value" : 91.98999786376953
          }
        },
        {
          "key" : "Data AD",
          "doc_count" : 2,
          "avg_price" : {
            "value" : 97.48999786376953
          }
        },
        {
          "key" : "Data AB",
          "doc_count" : 1,
          "avg_price" : {
            "value" : 69.98999786376953
          }
        },
        {
          "key" : "Data Analytics",
          "doc_count" : 1,
          "avg_price" : {
            "value" : 59.9900016784668
          }
        },
        {
          "key" : "Elasticsearch Guide",
          "doc_count" : 1,
          "avg_price" : {
            "value" : 45.9900016784668
          }
        },
        {
          "key" : "John Doe",
          "doc_count" : 1,
          "avg_price" : {
            "value" : 29.989999771118164
          }
        },
        {
          "key" : "John Ming",
          "doc_count" : 1,
          "avg_price" : {
            "value" : 60.0
          }
        }
      ]
    }
  }
}

模糊查询

通配符查询（Wildcard Query）匹配包含特定通配符模式的词项，适用于简单模糊匹配

GET /index_title/_search
{
  "query": {
    "wildcard": {
      "title": {
        "value": "John*"
      }
    }
  }
}

{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "index_title",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 1.0,
        "_source" : {
          "title" : "John Ming",
          "price" : 60,
          "create_time" : "2025-05-14 10:00:00",
          "note" : "备注1"
        }
      },
      {
        "_index" : "index_title",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 1.0,
        "_source" : {
          "title" : "John Doe",
          "price" : 29.99,
          "create_time" : "2025-05-14 10:00:00",
          "note" : "备注1"
        }
      }
    ]
  }
}

‌前缀查询（Prefix Query）‌ ‌用途‌：匹配以指定前缀开头的词项，如搜索用户输入提示

GET /index_title/_search
{
  "query": {
    "prefix": {
      "title": {
        "value": "John"
      }
    }
  }
}

{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "index_title",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 1.0,
        "_source" : {
          "title" : "John Ming",
          "price" : 60,
          "create_time" : "2025-05-14 10:00:00",
          "note" : "备注1"
        }
      },
      {
        "_index" : "index_title",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 1.0,
        "_source" : {
          "title" : "John Doe",
          "price" : 29.99,
          "create_time" : "2025-05-14 10:00:00",
          "note" : "备注1"
        }
      }
    ]
  }
}

‌正则表达式查询（Regexp Query）‌ ‌用途‌：使用正则表达式匹配复杂模式，如邮箱或电话号码验证

GET /users/_search
{
  "query": {
    "regexp": { "email": ".*@example\\.com" }
  }
}

‌模糊查询（Fuzzy Query）‌ ‌用途‌：基于编辑距离（Levenshtein 算法）匹配近似词，适合拼写纠错

GET /index_title/_search
{
  "query": {
    "fuzzy": {
      "note": {
        "value": "备注1",
        "fuzziness": 2
      }
    }
  }
}

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 9,
      "relation" : "eq"
    },
    "max_score" : 0.0,
    "hits" : [
      {
        "_index" : "index_title",
        "_type" : "_doc",
        "_id" : "5",
        "_score" : 0.0,
        "_source" : {
          "title" : "Data AB",
          "price" : 69.99,
          "create_time" : "2025-05-14 10:00:00",
          "note" : "备注1"
        }
      },
      {
        "_index" : "index_title",
        "_type" : "_doc",
        "_id" : "7",
        "_score" : 0.0,
        "_source" : {
          "title" : "Data AD",
          "price" : 90.99,
          "create_time" : "2025-05-14 10:00:00",
          "note" : "备注1"
        }
      },
      {
        "_index" : "index_title",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 0.0,
        "_source" : {
          "title" : "John Ming",
          "price" : 60,
          "create_time" : "2025-05-14 10:00:00",
          "note" : "备注1"
        }
      },
      {
        "_index" : "index_title",
        "_type" : "_doc",
        "_id" : "3",
        "_score" : 0.0,
        "_source" : {
          "title" : "Elasticsearch Guide",
          "price" : 45.99,
          "create_time" : "2025-05-14 10:00:00",
          "note" : "备注1"
        }
      },
      {
        "_index" : "index_title",
        "_type" : "_doc",
        "_id" : "4",
        "_score" : 0.0,
        "_source" : {
          "title" : "Data Analytics",
          "price" : 59.99,
          "create_time" : "2025-05-14 10:00:00",
          "note" : "备注1"
        }
      },
      {
        "_index" : "index_title",
        "_type" : "_doc",
        "_id" : "10",
        "_score" : 0.0,
        "_source" : {
          "title" : "Data AC",
          "price" : 102.99,
          "create_time" : "2025-05-14 10:00:00",
          "note" : "备注1"
        }
      },
      {
        "_index" : "index_title",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 0.0,
        "_source" : {
          "title" : "John Doe",
          "price" : 29.99,
          "create_time" : "2025-05-14 10:00:00",
          "note" : "备注1"
        }
      },
      {
        "_index" : "index_title",
        "_type" : "_doc",
        "_id" : "6",
        "_score" : 0.0,
        "_source" : {
          "title" : "Data AC",
          "price" : 80.99,
          "create_time" : "2025-05-14 10:00:00",
          "note" : "备注1"
        }
      },
      {
        "_index" : "index_title",
        "_type" : "_doc",
        "_id" : "11",
        "_score" : 0.0,
        "_source" : {
          "title" : "Data AD",
          "price" : 103.99,
          "create_time" : "2025-05-14 10:00:00",
          "note" : "备注1"
        }
      }
    ]
  }
}

模糊查询总结：注意模糊查询性能问题

 一：自定义分词 + match_phrase‌ 的组合可优化短语匹配的精准性与灵活性
 以下是核心实现策略与注意事项：
 1、‌自定义分词器配置‌
通过 nGram 或 edge_ngram 分词器增强灵活性，例如中文场景结合 ik 分词器
PUT /my_index
{
  "settings": {
    "analysis": {
      "analyzer": {
        "ik_ngram": {
          "tokenizer": "ik_max_word",  // 中文分词
          "filter": ["ngram_filter"]   // 添加 nGram 切分
        }
      },
      "filter": {
        "ngram_filter": {
          "type": "nGram",
          "min_gram": 2,
          "max_gram": 3
        }
      }
    }
  }
}

‌2、索引数据时应用分词规则‌
定义字段映射时指定自定义分词器：
PUT /my_index/_mapping
{
  "properties": {
    "title": {
      "type": "text",
      "analyzer": "ik_ngram"  // 使用自定义分词器
    }
  }
}

‌3、查询时使用 match_phrase‌
结合分词结果进行短语匹配，保持词序一致：
GET /my_index/_search
{
  "query": {
    "match_phrase": {
      "title": {
        "query": "人大四次会议开幕",
        "slop": 1  // 允许词间间隔 1 个位置
      }
    }
  }
}

二、适用场景
‌中文模糊匹配‌
解决中文分词颗粒度问题（如「人大四次」无法匹配「人大四次会议」的分词结果）
‌顺序敏感型搜索‌
例如法律条文、地址匹配等要求词序严格一致的场景
‌搜索性能优化‌
相比 wildcard 查询，通过预分词减少全字段扫描

三、存在的问题与优化
‌索引体积膨胀‌
nGram 分词导致索引体积显著增大，需权衡存储与查询性能
‌优化‌：针对核心字段使用自定义分词，非核心字段采用默认分词

‌查询性能开销‌
match_phrase 需计算词项位置，大数据量时性能低于普通 match 查询
‌优化‌：

合理设置 slop 参数（允许词间间隔）提升召回率
结合 bool 查询混合 match 过滤初步结果
‌分词策略冲突‌
若查询文本分词结果与索引分词不一致，会导致匹配失败（如「人大四次」分词为 ["人大", "四次"]，但索引中为 ["人大四次", "会议"]）
‌优化‌：

统一查询与索引的分词器配置
添加同义词扩展词库减少分词差异

四、典型问题案例
// 文档内容
{"title": "人大四次会议开幕"}

// 查询语句
GET /my_index/_search
{
  "query": {
    "match_phrase": {
      "title": "人大四次"  // 默认分词为 ["人大", "四次"]
    }
  }
}
‌问题原因‌：索引中 title 字段可能分词为 ["人大四次会议", "开幕"]，无法匹配查询词项顺序

‌解决方案‌：

调整分词器 min_gram=2 并添加 slop 参数
使用 ik_max_word + nGram 组合分词提升细粒度

通过合理配置分词策略与参数调优，可显著提升 match_phrase 的召回率与查询效率

PUT /index_ik_data
{
  "settings": {
    "analysis": {
      "analyzer": {
        "ik_ngram": {
          "tokenizer": "ik_max_word",  
          "filter": ["ngram_filter"]   
        }
      },
      "filter": {
        "ngram_filter": {
          "type": "ngram",
          "min_gram": 2,
          "max_gram": 3
        }
      }
    }
  },
  "mappings": {
  "properties": {
    "title": {
      "type": "text",
      "analyzer": "ik_ngram"  
    }
  }
  }
}

PUT /index_ik_data/_doc/1
{
  "title": "中华人民共和国人大四次会议开幕",
}




GET /index_ik_data/_search
{
  "query": {
    "match_phrase": {
      "title": {
        "query": "人大四次会议开幕",
        "slop": 1  
      }
    }
  }
}

GET /index_ik_data/_search
{
  "query": {
    "match_phrase": {
      "title": "人大四次"  
    }
  }
}

DELETE /index_ik_data

分词器


一、分词器选择与安装
‌内置分词器适用场景‌

‌standard‌：默认分词器，适合英文文本（按空格/标点拆分并转小写）
‌simple‌：仅按非字母字符拆分，适合标准化文本的简单处理
中文场景推荐 ‌IK 分词器‌（细分 ik_smart 和 ik_max_word 模式）
‌IK 分词器安装‌
下载与 Elasticsearch 完全匹配的版本（如 7.17.3 对应插件版本一致）37。
解压至 plugins/ik 目录并重启服务，通过 elasticsearch-plugin list 验证安装状态35。
二、分词器配置与使用
‌索引 Mapping 定义‌

PUT /my_index
{
  "mappings": {
    "properties": {
      "content": {
        "type": "text",
        "analyzer": "ik_max_word",  // 索引时分词（细粒度）
        "search_analyzer": "ik_smart"  // 搜索时分词（粗粒度）
      }
    }
  }
}
analyzer 定义索引存储的分词策略，search_analyzer 控制搜索匹配逻辑
‌动态测试分词效果‌

GET /_analyze
{
  "text": "我爱美羊羊",
  "analyzer": "ik_smart"
}
通过 _analyze API 验证分词结果，调整算法参数
三、自定义词库扩展
‌本地词库配置‌

编辑 config/main.dic 文件添加新词（需分词器重启生效）
适用于静态词库维护，如专业术语
‌远程词库（推荐）‌

配置 config/IKAnalyzer.cfg.xml 指定远程 URL：

<entry key="remote_ext_dict">http://your-nginx-server/es_dict.txt</entry>
定时热更新（默认 60 秒轮询）15，支持动态扩展网络热词
四、注意事项
‌版本兼容性‌：插件版本需与 Elasticsearch 严格匹配，否则启动失败
‌性能权衡‌：ik_max_word 存储开销更高，搜索时建议用 ik_smart 减少计算量
‌热更新限制‌：远程词库更新仅增删词项，不支持修改已有词权重
通过以上策略，可平衡分词准确性、维护成本及系统性能，实现灵活高效的中文搜索体验

四、分词器核心组件
Elasticsearch 的分词器由三部分组成

‌Character Filters（字符过滤器）‌：预处理原始文本（如删除 HTML 标签、符号替换）；
‌Tokenizer（分词器）‌：按规则切分文本（如按空格分割）；
‌Token Filters（词元过滤器）‌：标准化处理分词结果（如转小写、停用词移除、同义词扩展）
五、自定义分词器实现步骤
1. 基于外部词库扩展
‌适用场景‌：需添加行业术语、新词或停用词。

‌创建词库文件‌：新建文本文件（如 custom_words.txt），每行一个词

特殊术语1
特殊术语2
‌部署词库‌：
将文件放入 $ES_HOME/config/analysis/ 目录
通过配置 synonyms_path 或 keywords_path 指定路径
2. 配置自定义分析器
在索引配置中定义分析器组件


PUT /my_index
{
  "settings": {
    "analysis": {
      "filter": {
        "my_stopwords": {
          "type": "stop",
          "stopwords": ["的", "了", "是"]  // 自定义停用词
        },
        "my_synonyms": {
          "type": "synonym",
          "synonyms_path": "analysis/synonyms.txt"  // 同义词文件路径
        }
      },
      "analyzer": {
        "my_custom_analyzer": {
          "type": "custom",
          "char_filter": ["html_strip"],  // 字符过滤器
          "tokenizer": "ik_max_word",      // 使用 IK 分词器
          "filter": ["lowercase", "my_stopwords", "my_synonyms"]
        }
      }
    }
  }
}
3. 应用自定义分词器
在字段映射中指定分词器

PUT /my_index/_mapping
{
  "properties": {
    "content": {
      "type": "text",
      "analyzer": "my_custom_analyzer"  // 应用自定义分析器
    }
  }
}
4. 重启并验证
‌重启 Elasticsearch 服务‌
‌通过 API 测试分词效果‌

GET /my_index/_analyze
{
  "analyzer": "my_custom_analyzer",
  "text": "测试文本包含特殊术语1"
}
六：高级自定义（插件开发）
‌适用场景‌：需完全定制分词逻辑（如专用算法）

编写 Java 类继承 AbstractAnalyzer，重写 tokenize 方法实现分词逻辑
打包为插件并部署到 Elasticsearch 的 plugins 目录
在索引配置中引用插件分词器。
七、典型实践推荐
‌中文分词‌：优先集成 IK 分词器，通过 ik_smart（粗粒度）或 ik_max_word（细粒度）模式优化
‌同义词扩展‌：通过 synonym 过滤器加载外部词库文件
‌停用词过滤‌：结合业务场景动态更新停用词列表
通过上述方法，可灵活适配不同语言和业务场景的分词需求。调试时建议结合 _analyze API 实时验证分词效果

PUT /my_index
{
  "mappings": {
    "properties": {
      "content": {
        "type": "text",
        "analyzer": "ik_max_word",  
        "search_analyzer": "ik_smart" 
      }
    }
  }
}

PUT /my_index/_doc/1
{
  "title": "美羊羊，我爱美羊羊"
}


GET /my_index/_analyze
{
  "text": "我爱美羊羊",
  "analyzer": "ik_smart"
}

倒序索引

‌1. 倒排索引的基本概念‌
‌倒排索引‌是一种将文档中的‌词项（Term）‌映射到包含这些词项的‌文档集合‌的数据结构。
与传统数据库的“正排索引”（通过文档ID查找内容）不同，
倒排索引是通过‌关键词反向追溯文档‌，
类似于书籍末页的“关键词索引”。

‌举例说明‌
假设有以下三个文档：

‌Doc1‌: "苹果是一种水果"
‌Doc2‌: "苹果公司生产手机"
‌Doc3‌: "水果手机很流行"
倒排索引会将这些文档拆解为词项，并记录每个词项所在的文档信息：

text
Copy Code
词项    | 出现的文档及位置
-------------------------------
苹果    → Doc1（位置0）, Doc2（位置0）
水果    → Doc1（位置2）, Doc3（位置0）
手机    → Doc2（位置3）, Doc3（位置2）
公司    → Doc2（位置1）
生产    → Doc2（位置2）
...
‌2. 倒排索引的核心组成‌
‌词项字典（Term Dictionary）‌：所有文档中唯一的词项列表，通常排序后存储在内存中以加速查找。
‌倒排列表（Postings List）‌：每个词项对应的文档ID列表，以及附加信息（如词频、位置、偏移量等）。
‌文档ID（Doc ID）‌：包含该词项的文档标识。
‌词频（Term Frequency, TF）‌：词项在文档中出现的次数（用于相关性评分）。
‌位置（Position）‌：词项在文档中的位置（用于短语查询或邻近搜索）。
‌偏移量（Offset）‌：词项在文本中的起始和结束字符位置。
‌3. Elasticsearch 中倒排索引的构建流程‌
‌文本分析（Text Analysis）‌：

‌分词（Tokenization）‌：将文本拆分为独立的词项（如“苹果公司” → “苹果”和“公司”）。
‌过滤（Filtering）‌：移除停用词（如“的”、“是”），并进行大小写转换、词干提取（如“running” → “run”）等操作。
最终生成标准化的词项列表。
‌索引写入‌：

将词项与文档的映射关系（包括位置、词频等信息）写入倒排索引。
‌4. 倒排索引的优势‌
‌高效查询‌：直接通过词项定位文档，无需逐条扫描所有文档。
‌支持复杂搜索‌：
‌布尔查询‌（AND/OR/NOT）：合并多个词项的倒排列表。
‌短语查询‌：利用位置信息匹配连续的词项序列。
‌模糊查询‌：通过编辑距离（如“appel” → “apple”）匹配近似词项。
‌相关性评分‌：基于词频（TF）、逆文档频率（IDF）等计算文档与查询的相关性（如TF-IDF算法或BM25）。
‌5. 倒排索引的实际应用示例‌
‌搜索“苹果手机”‌：
拆分为词项“苹果”和“手机”。
查询倒排索引，找到包含这两个词项的文档（如Doc2和Doc3）。
根据位置信息判断是否为连续短语（如Doc2中“苹果”在位置0，“公司”在位置1，“生产”在位置2，“手机”在位置3，因此“苹果手机”不连续；Doc3中“水果”在位置0，“手机”在位置2，也不连续）。
返回相关性评分最高的结果。
‌6. 与其他索引的对比‌
‌正排索引（Forward Index）‌：通过文档ID查找内容，适用于文档检索，但无法高效支持关键词搜索。
‌B-Tree索引（传统数据库）‌：适合精确匹配和范围查询，但在全文搜索中性能较低。
‌总结‌
Elasticsearch 的倒排索引通过词项到文档的反向映射，
实现了高效的全文搜索能力。结合分词、过滤、评分等机制，
使其能够快速处理复杂的查询需求（如布尔逻辑、短语匹配、模糊搜索等），
成为大规模文本搜索场景下的核心工具。

springboot 2.x集成elasticSearch

依赖

工具类

创建索引

删除索引

推送数据

删除数据

批量删除

查询数据

分页查询的方式

查询语法介绍

match语法

term语法

range语法

复合查询（bool）

分页和排序实例

聚合查询（aggs）

模糊查询

分词器

复合查询（`bool`）

聚合查询（`aggs`）