【JAVA】Springboot整合Elasticsearch

100 阅读3分钟

1. Elasticsearch

1.1 倒排索引

图片.png

  • 左邊的就是常見的如mysql數據庫,通過Id創建索引,可以快速搜索,如果條件是title,使用like %xxx%則是逐條去篩選,數據量大很可怕。

  • 倒排索引就是將title的內容進行分詞,再次存儲起來,通過關鍵字和詞條匹配,來找到數據。

1.2 Index

前面我們將title的內容分成了多個term和document,通過匹配term來找到所有的document

Index在Elasticsearch裡面就是相同類型文檔的集合,類似數據庫的表

图片.png

图片.png

图片.png

1.2.1 Create Index

图片.png

PUT /employ
{
  "mappings": {
    "properties": {
      "info":{
        "type": "text",
        "analyzer": "standard"
      },
      "email":{
        "type": "keyword",
        "index": false  
      },
      "name":{
        "type": "object",
        "properties": {
          "firstName":{
            "type": "keyword"
          },
          "lastName":{
            "type": "keyword"
          }
        }
      }
    }
  }
}

1.3 IK分詞器

下載解壓,放到ES的Plugins目錄下

github.com/medcl/elast…

Analyzer: ik_smart , ik_max_word , Tokenizer: ik_smart , ik_max_word

1.3.1 Custom

修改ik中的config目录下面的IKAnalyzer.cfg.xml文件

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
	<comment>IK Analyzer 扩展配置</comment>
	<!--用户可以在这里配置自己的扩展字典 -->
	<entry key="ext_dict">my_work.dic</entry>
	 <!--用户可以在这里配置自己的扩展停止词字典-->
	<entry key="ext_stopwords"></entry>
	<!--用户可以在这里配置远程扩展字典 -->
	<!-- <entry key="remote_ext_dict">words_location</entry> -->
	<!--用户可以在这里配置远程扩展停止词字典-->
	<!-- <entry key="remote_ext_stopwords">words_location</entry> -->
</properties>

同級目錄新增dic文件my_work,裡面就是自己整理的可以拼接在一起的關鍵字

比如我把程序員三個字放在一起

1.3.2 Test

步驟參考3.2

#測試分詞器
POST /_analyze
{
  "text": "小明打籃球Basketball",
  "analyzer": "ik_smart"
}

POST /_analyze
{
  "text": "程序員很愛使用github",
  "analyzer": "ik_max_word"
}

图片.png

1.4

2. Code

版本要對應

docs.spring.io/spring-data…

图片.png

2.1 Dependency

<parent>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-parent</artifactId>
    <version>2.5.6</version>
    <relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.example</groupId>
<artifactId>springboot-elasticsearch</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>springboot-elasticsearch</name>
<description>springboot-elasticsearch</description>
<properties>
    <java.version>1.8</java.version>
</properties>
<dependencies>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter</artifactId>
    </dependency>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
    </dependency>


    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-test</artifactId>
        <scope>test</scope>
    </dependency>
    <dependency>
        <groupId>org.projectlombok</groupId>
        <artifactId>lombok</artifactId>
    </dependency>
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
    </dependency>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-web</artifactId>
    </dependency>
</dependencies>

2.2 Application.properties

server.port=8080

#沒有就創建
spring.data.elasticsearch.repositories.enabled=true

spring.elasticsearch.rest.uris=http://localhost:9201
spring.elasticsearch.rest.username=elastic
spring.elasticsearch.rest.password=1234567

2.3 Entity

@Document(indexName = "studentcluster")
@Data
public class Student {
    private int id;
    private String name;
    private String address;
}

Document來設定參數

@Persistent
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.TYPE})
public @interface Document {
    String indexName();

    /** @deprecated */
    @Deprecated
    boolean useServerConfiguration() default false;

    /** @deprecated */
    @Deprecated
    short shards() default 1;

    /** @deprecated */
    @Deprecated
    short replicas() default 1;

    /** @deprecated */
    @Deprecated
    String refreshInterval() default "1s";

    /** @deprecated */
    @Deprecated
    String indexStoreType() default "fs";

    boolean createIndex() default true;

    VersionType versionType() default VersionType.EXTERNAL;
}
//泛型的参数分别是实体类型和主键类型
@Repository
public interface StudentRepository extends ElasticsearchRepository<Student,Integer> {

}
package com.example.service;

import com.fasterxml.jackson.core.JsonProcessingException;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;


@Service
public class StudentService {
    @Autowired
    private StudentRepository studentRepository;
    @Autowired
    private ElasticsearchRestTemplate elasticsearchRestTemplate;

    //检查相应的索引是否存在,
    // 如果spring.data.elasticsearch.repositories.enabled=True,则会自动创建索引
    public boolean checkIndexExists(Class<?> cls){
        boolean isExist = elasticsearchRestTemplate.indexOps(cls).exists();
        //获取索引名
        String indexName = cls.getAnnotation(Document.class).indexName();
        System.out.printf("index %s is %s\n", indexName, isExist ? "exist" : "not exist");
        return isExist;
    }

    public boolean test() {
       return checkIndexExists(Student.class);
    }

    public void save(){
       Student stu1 = new Student();
       stu1.setId(1);
       stu1.setName("Tom");
       stu1.setAddress("ShenZhen");
       studentRepository.save(stu1);

        Student stu2 = new Student();
        stu2.setId(2);
        stu2.setName("Jom");
        stu2.setAddress("GuangZhou");
        studentRepository.save(stu2);

    }
}

3. Kibana

www.elastic.co/downloads

图片.png

3.1 UI

兩個箭頭分別是Kibana界面對ELK的操作,監控和增刪改查

如果只是監控,可以使用ElasticHD

图片.png

3.2 隨便玩一玩

GET _search
{
  "query": {
    "match_all": {}
  }
}

PUT story/_doc/1
{
  "name":"张飞",
  "age":30,
  "from": "China",
  "desc": "皮肤黑、武器重、性格直",
  "tags": ["黑", "重", "直"]
}

PUT story/_doc/2
{
  "name":"赵云",
  "age":18,
  "from":"China",
  "desc":"帅气逼人,一身白袍",
  "tags":["帅", "白"]
}

PUT story/_doc/3
{
  "name":"关羽",
  "age":22,
  "from":"England",
  "desc":"大刀重,骑赤兔马,胡子长",
  "tags":["重", "马","长"]
}

PUT story/_doc/4
{
  "name":"馬超",
  "age":22,
  "from":"England",
  "desc":"大刀重,一身白袍",
  "tags":["帅"]
}

POST story/_doc/4/_update
{
  "doc": {
    "age": 18
  }
}


GET story/_doc/_search
{
  "query": {
    "match": {
      "age": "22"
    }
  }
}

GET story/_doc/_search
{
  "query": {
    "match": {
      "from": "England"
    }
  },
  "sort": [
    {
      "age": {
        "order": "desc"
      }
    }
  ]
}

3.3 高亮查詢

3.3.1 plain highlight

默认方式,底层lucene highlight

#高亮查詢
GET story/_doc/_search
{
  "query": {
    "match": {
      "from": "England"
    }
  },
  "highlight": {
    "fields": {
      "from": {}
    }
  }
}

3.3.2 posting highlight

設置"index_options": "offsets"

性能更好一點

PUT /company
{
  "mappings": {
    "properties": {
      "desc":{
        "type": "text",
        "analyzer": "standard"
      },
      "name":{
        "type": "text",
        "analyzer": "ik_max_word",
        "index_options": "offsets"
      }
    }
  }
}

3.3.2 fast vector highlight

"term_vector": "with_positions_offsets"

PUT /company
{
  "mappings": {
    "properties": {
      "desc":{
        "type": "text",
        "analyzer": "standard"
      },
      "name":{
        "type": "text",
        "analyzer": "ik_max_word",
        "term_vector": "with_positions_offsets"
      }
    }
  }
}