Elasticsearch基本操作

253 阅读6分钟

一、docker安装elasticsearch

  1. 安装elasticsearch

    docker pull elasticsearch
    
  2. 安装kibana

    docker pull kibana
    
  3. 新建目录

    mkdir -p /mydata/elasticsearch/config
    mkdir -p /mydata/elasticsearch/data
    
  4. 添加配置文件

    echo "http.host: 0.0.0.0">>/mydata/elasticsearch/config/elasticsearch.yml
    
  5. 修改权限

    chmod -R 777 /mydata/elasticsearch/
    
  6. 运行

    docker run --name elasticsearch --privileged=true -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" -e ES_JAVA_OPTS="-Xms64m -Xmx128m" -v /mydata/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml -v /mydata/elasticsearch/config/data:/usr/share/elasticsearch/data -v /mydata/elasticsearch/plugins:/usr/share/elasticsearch/plugins -d elasticsearch:10.7.1
    
  7. 运行Kibana

    docker run --name kibana -e ELASTICSEARCH_HOSTS=http://192.168.1.16:9200 -p 5601:5601 -d kibana:7.10.1
    
  8. Kibana server is not ready yet:ELASTICSEARCH_HOSTS的ip不对

二、初步检索

1. _cat

  1. GET/_cat/nodes:查看所有节点
  2. GET/_cat/health:查看es健康状态
  3. GET/_cat/master:查看主节点
  4. GET/_cat/indices:查看所有索引

2. 索引一个文档

  1. PUT:请求必须带id,第一次请求为新增,第二次相同请求为更新。

    http://192.168.1.16:9200/mall/product/1
    
    {
        "name":"小米"
    }
    

    返回数据

    {
        "_index": "mall",
        "_type": "product",
        "_id": "1",
        "_version": 1,
        "result": "created",
        "_shards": {
            "total": 2,
            "successful": 1,
            "failed": 0
        },
        "_seq_no": 0,
        "_primary_term": 1
    }
    
  2. POST

    1. 携带id(和PUT相同)如果不指定 _id ,将会自动生成一个 ID

    2. 不携带id,新增

      http://192.168.1.16:9200/mall/product/
      
      {
          "_index": "mall",
          "_type": "product",
          "_id": "sH7hqncBShQ_AhSZanfv",
          "_version": 1,
          "result": "created",
          "_shards": {
              "total": 2,
              "successful": 1,
              "failed": 0
          },
          "_seq_no": 1,
          "_primary_term": 1
      }
      
  3. 乐观锁修改

    http://192.168.1.16:9200/mall/product/1?if_seq_no=0&if_primary_term=1
    
    {
        "name":"xioami"
    }
    

    结果

    {
        "_index": "mall",
        "_type": "product",
        "_id": "1",
        "_version": 2,
        "result": "updated",
        "_shards": {
            "total": 2,
            "successful": 1,
            "failed": 0
        },
        "_seq_no": 2,
        "_primary_term": 1
    }
    

    再次发送请求,发生409

    {
        "error": {
            "root_cause": [
                {
                    "type": "version_conflict_engine_exception",
                    "reason": "[1]: version conflict, required seqNo [0], primary term [1]. current document has seqNo [2] and primary term [1]",
                    "index_uuid": "GvqWxKBsSQCq9NJbDiIhQQ",
                    "shard": "0",
                    "index": "mall"
                }
            ],
            "type": "version_conflict_engine_exception",
            "reason": "[1]: version conflict, required seqNo [0], primary term [1]. current document has seqNo [2] and primary term [1]",
            "index_uuid": "GvqWxKBsSQCq9NJbDiIhQQ",
            "shard": "0",
            "index": "mall"
        },
        "status": 409
    }
    

3. 查询文档

  1. GET

    http://192.168.1.16:9200/mall/product/1
    
    {
        "_index": "mall",	//索引
        "_type": "product",	//类型
        "_id": "1",			//id
        "_version": 1,		//版本号
        "_seq_no": 0,		//并发控制字段,每次更新都会+1,用来做乐观锁
        "_primary_term": 1,	//同上,主分片重新分配,如重启就会变化
        "found": true,
        "_source": {		//内容
            "name": "小米"
        }
    }
    

4. 更新文档

  1. POST,带_update,检查原数据,可增加属性

    http://192.168.1.16:9200/mall/product/1/_update
    
    {
        "doc":{
            "name":"小米1"
        }
    }
    

    结果

    {
        "_index": "mall",
        "_type": "product",
        "_id": "1",
        "_version": 4,
        "result": "updated",
        "_shards": {
            "total": 2,
            "successful": 1,
            "failed": 0
        },
        "_seq_no": 4,
        "_primary_term": 1
    }
    

    再次发送请求,会对比数据是否发生变化,若未发生变化,则不做操作,_version和_seq_no都不变

    {
        "_index": "mall",
        "_type": "product",
        "_id": "1",
        "_version": 4,
        "result": "noop",
        "_shards": {
            "total": 0,
            "successful": 0,
            "failed": 0
        },
        "_seq_no": 4,
        "_primary_term": 1
    }
    
  2. POST,不带_update,不检查原数据,可增加属性

    http://192.168.1.16:9200/mall/product/1
    
    {
        "name":"小米1"
    }
    

    结果

    {
        "_index": "mall",
        "_type": "product",
        "_id": "1",
        "_version": 5,
        "result": "updated",
        "_shards": {
            "total": 2,
            "successful": 1,
            "failed": 0
        },
        "_seq_no": 5,
        "_primary_term": 1
    }
    
  3. PUT,和不带_update的POST一样,可增加属性

    http://192.168.1.16:9200/mall/product/1
    
    {
        "name":"小米1"
    }
    
    {
        "_index": "mall",
        "_type": "product",
        "_id": "1",
        "_version": 6,
        "result": "updated",
        "_shards": {
            "total": 2,
            "successful": 1,
            "failed": 0
        },
        "_seq_no": 6,
        "_primary_term": 1
    }
    

5. 删除文档和索引

  1. 删除文档DELETE

    http://192.168.1.16:9200/mall/product/1
    
  2. 删除索引

    http://192.168.1.16:9200/mall
    

6. 批量操作

  1. _bulk

    POST mall/product/_bulk
    {"index": {"_id": "1"}}
    {"name": "小米"}
    {"index": {"_id": "2"}}
    {"name": "华为"}
    

    结果

    {
      "took" : 8,
      "errors" : false,
      "items" : [
        {
          "index" : {
            "_index" : "mall",
            "_type" : "product",
            "_id" : "1",
            "_version" : 7,
            "result" : "updated",
            "_shards" : {
              "total" : 2,
              "successful" : 1,
              "failed" : 0
            },
            "_seq_no" : 7,
            "_primary_term" : 1,
            "status" : 200
          }
        },
        {
          "index" : {
            "_index" : "mall",
            "_type" : "product",
            "_id" : "2",
            "_version" : 1,
            "result" : "created",
            "_shards" : {
              "total" : 2,
              "successful" : 1,
              "failed" : 0
            },
            "_seq_no" : 8,
            "_primary_term" : 1,
            "status" : 201
          }
        }
      ]
    }
    
  2. 完整的bulk操作

    POST /_bulk
    { "delete": { "_index": "website", "_type": "blog", "_id": "123" }} 
    { "create": { "_index": "website", "_type": "blog", "_id": "123" }}
    { "title":    "My first blog post" }
    { "index":  { "_index": "website", "_type": "blog" }}
    { "title":    "My second blog post" }
    { "update": { "_index": "website", "_type": "blog", "_id": "123"} }
    { "doc" : {"title" : "My updated blog post"} }
    

三、测试

1. 测试数据

github.com/elastic/ela…

2. 检索的两种方式

  1. 请求参数方式检索

    GET bank/_search?q=*&sort=account_number:src
    
  2. Query DSL

    GET /bank/_search
    {
      "query": { "match_all": {} },
      "sort": [
        { "account_number": "asc" }
      ]
    }
    
  3. match查询

    1. 精确查询

      GET /bank/_search
      {
        "query": { 
            "match": { 
                "balance": 16418
            } 
        }
      }
      
    2. 全文检索

      GET /bank/_search
      {
        "query": { 
            "match": { 
                "address": "mill lane" 
            } 
        }
      }
      
      # 精确匹配
      GET /bank/_search
      {
        "query": { 
            "match": { 
                "address.keyword": "mill lane"
            } 
        }
      }
      
    3. 短语检索

      GET /bank/_search
      {
        "query": { 
            "match_phrase": { 
                "address": "mill lane" 
            } 
        }
      }
      
    4. 多字段匹配

      GET /bank/_search
      {
        "query": { 
            "multi_match": { 
                "query": "mill" ,
                "fields":["address","city"]
            } 
        }
      }
      
    5. 复合查询

      GET /bank/_search
      {
        "query": {
          "bool": {
            "must": [
              { "match": { "age": "40" } }
            ],
            "must_not": [
              { "match": { "state": "ID" } }
            ]
          }
        }
      }
      
    6. filter结果过滤,不计算相关性得分

      GET /bank/_search
      {
        "query": {
          "bool": {
            "must": { "match_all": {} },
            "filter": {
              "range": {
                "balance": {
                  "gte": 20000,
                  "lte": 30000
                }
              }
            }
          }
        }
      }
      
    7. term,根据精确值查找,但是对文本查找比较困难,文本查找推荐使用match。建议:text查询用match,非text查询用term

      GET /_search
      {
        "query": {
          "term": {
            "balance": 19955
          }
        }
      }
      
    8. aggregations

      ## 搜索address中包含mill的所有人的年龄分布及平均年龄
      GET bank/_search
      {
        "query": {
          "match": {
            "address": "mill"
          }
        },
        "aggs": {
          "ageAgg": {
            "terms": {
              "field": "age",
              "size": 10
            }
          },
          "aggAvg": {
            "avg": {
              "field": "age"
            }
          }
        }
      }
      

      结果

        "aggregations" : {
          "ageAgg" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : 38,
                "doc_count" : 2
              },
              {
                "key" : 28,
                "doc_count" : 1
              },
              {
                "key" : 32,
                "doc_count" : 1
              }
            ]
          },
          "ageAvg" : {
            "value" : 34.0
          }
        }
      
      ## 按照年龄聚合后,求各年龄段的人的平均薪资
      GET bank/_search
      {
        "query": {
          "match_all": {}
        },
        "aggs": {
          "ageAgg": {
            "terms": {
              "field": "age",
              "size": 100
            },
            "aggs": {
              "balanceAgg": {
                "avg": {
                  "field": "balance"
                }
              }
            }
          }
        }
      }
      
      ## 所有年龄分段,并查出各年龄段gender为M的平均薪资,和gender为F的平均薪资及年龄段总体平均薪资
      GET bank/_search
      {
        "query": {
          "match_all": {}
        },
        "aggs": {
          "ageAgg": {
            "terms": {
              "field": "age",
              "size": 100
            },
            "aggs": {
              "genderAgg": {
                "terms": {
                  "field": "gender.keyword"
                },
                "aggs": {
                  "balanceAvg": {
                    "avg": {
                      "field": "balance"
                    }
                  }
                }
              },
              "tatalAvg":{
                "avg": {
                  "field": "balance"
                }
              }
            }
          }
        }
      }
      
    9. mapping

      创建索引并指定映射

      PUT /my-index-000001
      {
        "mappings": {
          "properties": {
            "age":    { "type": "integer" },  
            "email":  { "type": "keyword"  }, 
            "name":   { "type": "text"  }     
          }
        }
      }
      

      添加新的字段映射

      PUT /my-index-000001/_mapping
      {
        "properties": {
          "employee-id": {
            "type": "keyword",
            "index": false //默认为true,false表示不被索引,即不能检索到
          }
        }
      }
      

      更新已存在字段的映射,只能创建新的索引,然后将数据重新索引到该索引中。

    10. 数据迁移

      固定写法

      POST _reindex
      {
        "source": {
          "index": "my-index-000001"
        },
        "dest": {
          "index": "my-new-index-000001"
        }
      }
      

      将旧索引的type下的数据进行迁移

      POST _reindex
      {
        "source": {
          "index": "bank",
          "type": "account"
        },
        "dest": {
          "index": "newbank"
        }
      }
      
    11. 分词

      POST _analyze
      {
        "analyzer": "standard",
        "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."
      }
      
      1. 安装ik分词器

        wget https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.10.1/elasticsearch-analysis-ik-7.10.1.zip
        --2021-02-18 21:54:10--  https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.10.1/elasticsearch-analysis-ik-7.10.1.zip
        
      2. 测试中文分词

        POST _analyze
        {
          "analyzer": "ik_smart",
          "text": "我是中国人"
        }
        
        POST _analyze
        {
          "analyzer": "ik_max_word",
          "text": "我是中国人"
        }
        
    12. 自定义扩展词库

      1. 启动nginx

        docker run -p 80:80 --name nginx -d nginx:1.18.0
        
      2. 将容器的配置复制到mydata/nginx下

        docker container cp nginx:/etc/nginx .
        
      3. 修改文件夹

        mv nginx conf
        mkdir nginx
        mv conf nginx/
        
      4. 启动nginx

        docker run -p 80:80 --name nginx --privileged=true -v /mydata/nginx/html:/usr/share/nginx/html -v /mydata/nginx/logs:/var/log/nginx -v /mydata/nginx/conf:/etc/nginx -d nginx:1.18.0
        76a4219c3697f643f8c71af730e4e32818cf767893ad602a50a30c6e0866634b
        
      5. 在/html

        mkdir es
        cd es
        vi fenci.tet #加入自定义词语
        
      6. 修改es配置

        cd ik/config
        vi IKAnalyzer.cfg.xml #添加自定义词典地址http://192.168.0.108/es/fenci.txt
        
        

四、整合rest-high-level-client

1. 导入依赖

<!-- 导入ES的rest-high-level-client -->
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>elasticsearch-rest-high-level-client</artifactId>
            <version>7.10.1</version>
        </dependency>

2. 配置文件

@Configuration
public class GulimallElasticSearchConfig {
    public static final RequestOptions COMMON_OPTIONS;
    static {
        RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
//        builder.addHeader("Authorization", "Bearer " + TOKEN);
//        builder.setHttpAsyncResponseConsumerFactory(
//                new HttpAsyncResponseConsumerFactory
//                        .HeapBufferedResponseConsumerFactory(30 * 1024 * 1024 * 1024));
        COMMON_OPTIONS = builder.build();
    }

    public RestHighLevelClient restHighLevelClient(){
        RestHighLevelClient client = new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost("10.138.213.16", 9200, "http")));
        return client;
    }
}

3. 测试

 @Test
    void indexData() throws IOException {
        IndexRequest indexRequest = new IndexRequest("users");
        indexRequest.id("1");//数据的id
//        indexRequest.source("userName","张三","gender","男","age",18);
        User user = new User();
        user.setUserName("张三");
        user.setGender("男");
        user.setAge(18);
        String jsonString = JSON.toJSONString(user);
        indexRequest.source(jsonString, XContentType.JSON);//需要保存的内容
        //执行操作
        IndexResponse index = client.index(indexRequest, GulimallElasticSearchConfig.COMMON_OPTIONS);
        //提取有用的响应数据
        System.out.println(index);
    }
    @Data
    class User{
        private String userName;
        private String gender;
        private Integer age;
    }

4. 测试检索

 @Test
    void searchData() throws IOException {
        // 1. 创建检索请求
        SearchRequest searchRequest = new SearchRequest();
        // 指定索引
        searchRequest.indices("bank");
        // 指定DSL,检索条件
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        // 1.1 构造检索条件
        sourceBuilder.query(QueryBuilders.matchQuery("address","mill"));
        // 1.2 按照年龄进行聚合
        TermsAggregationBuilder ageAgg = AggregationBuilders.terms("ageAgg").field("age").size(10);
        sourceBuilder.aggregation(ageAgg);
        // 1.3 计算平均薪资
        AvgAggregationBuilder balanceAvg = AggregationBuilders.avg("balanceAvg").field("balance");
        sourceBuilder.aggregation(balanceAvg);
        System.out.println("检索条件:"+sourceBuilder.toString());
        searchRequest.source(sourceBuilder);

        // 2. 执行检索
        SearchResponse searchResponse = client.search(searchRequest, GulimallElasticSearchConfig.COMMON_OPTIONS);

        // 3. 分析结果
        System.out.println(searchResponse.toString());
        // 3.1 获取所查到的数据
        SearchHits hits = searchResponse.getHits();
        SearchHit[] searchHits = hits.getHits();
        for (SearchHit hit: searchHits) {
            String sourceAsString = hit.getSourceAsString();
            Account account = JSON.parseObject(sourceAsString, Account.class);
            System.out.println("账户:"+account);
        }
        // 3.2 获取这次检索到的分析信息
        Aggregations aggregations = searchResponse.getAggregations();
//        for (Aggregation aggregation : aggregations.asList()) {
//            String name = aggregation.getName();
//
//        }
        Terms ageAgg1 = aggregations.get("ageAgg");
        for (Terms.Bucket bucket : ageAgg1.getBuckets()) {
            String key = bucket.getKeyAsString();
            System.out.println("年龄"+key);
        }
        Avg balanceAgg1 = aggregations.get("balanceAvg");
        System.out.println("平均薪资"+balanceAgg1.getValue());
    }

    @ToString
    @Data
    static class Account {

        private int account_number;
        private int balance;
        private String firstname;
        private String lastname;
        private int age;
        private String gender;
        private String address;
        private String employer;
        private String email;
        private String city;
        private String state;
    }