一、docker安装elasticsearch
-
安装elasticsearch
docker pull elasticsearch -
安装kibana
docker pull kibana -
新建目录
mkdir -p /mydata/elasticsearch/config mkdir -p /mydata/elasticsearch/data -
添加配置文件
echo "http.host: 0.0.0.0">>/mydata/elasticsearch/config/elasticsearch.yml -
修改权限
chmod -R 777 /mydata/elasticsearch/ -
运行
docker run --name elasticsearch --privileged=true -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" -e ES_JAVA_OPTS="-Xms64m -Xmx128m" -v /mydata/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml -v /mydata/elasticsearch/config/data:/usr/share/elasticsearch/data -v /mydata/elasticsearch/plugins:/usr/share/elasticsearch/plugins -d elasticsearch:10.7.1 -
运行Kibana
docker run --name kibana -e ELASTICSEARCH_HOSTS=http://192.168.1.16:9200 -p 5601:5601 -d kibana:7.10.1 -
Kibana server is not ready yet:ELASTICSEARCH_HOSTS的ip不对
二、初步检索
1. _cat
- GET/_cat/nodes:查看所有节点
- GET/_cat/health:查看es健康状态
- GET/_cat/master:查看主节点
- GET/_cat/indices:查看所有索引
2. 索引一个文档
-
PUT:请求必须带id,第一次请求为新增,第二次相同请求为更新。
http://192.168.1.16:9200/mall/product/1{ "name":"小米" }返回数据
{ "_index": "mall", "_type": "product", "_id": "1", "_version": 1, "result": "created", "_shards": { "total": 2, "successful": 1, "failed": 0 }, "_seq_no": 0, "_primary_term": 1 } -
POST
-
携带id(和PUT相同)如果不指定
_id,将会自动生成一个 ID -
不携带id,新增
http://192.168.1.16:9200/mall/product/{ "_index": "mall", "_type": "product", "_id": "sH7hqncBShQ_AhSZanfv", "_version": 1, "result": "created", "_shards": { "total": 2, "successful": 1, "failed": 0 }, "_seq_no": 1, "_primary_term": 1 }
-
-
乐观锁修改
http://192.168.1.16:9200/mall/product/1?if_seq_no=0&if_primary_term=1{ "name":"xioami" }结果
{ "_index": "mall", "_type": "product", "_id": "1", "_version": 2, "result": "updated", "_shards": { "total": 2, "successful": 1, "failed": 0 }, "_seq_no": 2, "_primary_term": 1 }再次发送请求,发生409
{ "error": { "root_cause": [ { "type": "version_conflict_engine_exception", "reason": "[1]: version conflict, required seqNo [0], primary term [1]. current document has seqNo [2] and primary term [1]", "index_uuid": "GvqWxKBsSQCq9NJbDiIhQQ", "shard": "0", "index": "mall" } ], "type": "version_conflict_engine_exception", "reason": "[1]: version conflict, required seqNo [0], primary term [1]. current document has seqNo [2] and primary term [1]", "index_uuid": "GvqWxKBsSQCq9NJbDiIhQQ", "shard": "0", "index": "mall" }, "status": 409 }
3. 查询文档
-
GET
http://192.168.1.16:9200/mall/product/1{ "_index": "mall", //索引 "_type": "product", //类型 "_id": "1", //id "_version": 1, //版本号 "_seq_no": 0, //并发控制字段,每次更新都会+1,用来做乐观锁 "_primary_term": 1, //同上,主分片重新分配,如重启就会变化 "found": true, "_source": { //内容 "name": "小米" } }
4. 更新文档
-
POST,带_update,检查原数据,可增加属性
http://192.168.1.16:9200/mall/product/1/_update{ "doc":{ "name":"小米1" } }结果
{ "_index": "mall", "_type": "product", "_id": "1", "_version": 4, "result": "updated", "_shards": { "total": 2, "successful": 1, "failed": 0 }, "_seq_no": 4, "_primary_term": 1 }再次发送请求,会对比数据是否发生变化,若未发生变化,则不做操作,_version和_seq_no都不变
{ "_index": "mall", "_type": "product", "_id": "1", "_version": 4, "result": "noop", "_shards": { "total": 0, "successful": 0, "failed": 0 }, "_seq_no": 4, "_primary_term": 1 } -
POST,不带_update,不检查原数据,可增加属性
http://192.168.1.16:9200/mall/product/1{ "name":"小米1" }结果
{ "_index": "mall", "_type": "product", "_id": "1", "_version": 5, "result": "updated", "_shards": { "total": 2, "successful": 1, "failed": 0 }, "_seq_no": 5, "_primary_term": 1 } -
PUT,和不带_update的POST一样,可增加属性
http://192.168.1.16:9200/mall/product/1{ "name":"小米1" }{ "_index": "mall", "_type": "product", "_id": "1", "_version": 6, "result": "updated", "_shards": { "total": 2, "successful": 1, "failed": 0 }, "_seq_no": 6, "_primary_term": 1 }
5. 删除文档和索引
-
删除文档DELETE
http://192.168.1.16:9200/mall/product/1 -
删除索引
http://192.168.1.16:9200/mall
6. 批量操作
-
_bulk
POST mall/product/_bulk {"index": {"_id": "1"}} {"name": "小米"} {"index": {"_id": "2"}} {"name": "华为"}结果
{ "took" : 8, "errors" : false, "items" : [ { "index" : { "_index" : "mall", "_type" : "product", "_id" : "1", "_version" : 7, "result" : "updated", "_shards" : { "total" : 2, "successful" : 1, "failed" : 0 }, "_seq_no" : 7, "_primary_term" : 1, "status" : 200 } }, { "index" : { "_index" : "mall", "_type" : "product", "_id" : "2", "_version" : 1, "result" : "created", "_shards" : { "total" : 2, "successful" : 1, "failed" : 0 }, "_seq_no" : 8, "_primary_term" : 1, "status" : 201 } } ] } -
完整的bulk操作
POST /_bulk { "delete": { "_index": "website", "_type": "blog", "_id": "123" }} { "create": { "_index": "website", "_type": "blog", "_id": "123" }} { "title": "My first blog post" } { "index": { "_index": "website", "_type": "blog" }} { "title": "My second blog post" } { "update": { "_index": "website", "_type": "blog", "_id": "123"} } { "doc" : {"title" : "My updated blog post"} }
三、测试
1. 测试数据
2. 检索的两种方式
-
请求参数方式检索
GET bank/_search?q=*&sort=account_number:src -
Query DSL
GET /bank/_search { "query": { "match_all": {} }, "sort": [ { "account_number": "asc" } ] } -
match查询
-
精确查询
GET /bank/_search { "query": { "match": { "balance": 16418 } } } -
全文检索
GET /bank/_search { "query": { "match": { "address": "mill lane" } } } # 精确匹配 GET /bank/_search { "query": { "match": { "address.keyword": "mill lane" } } } -
短语检索
GET /bank/_search { "query": { "match_phrase": { "address": "mill lane" } } } -
多字段匹配
GET /bank/_search { "query": { "multi_match": { "query": "mill" , "fields":["address","city"] } } } -
复合查询
GET /bank/_search { "query": { "bool": { "must": [ { "match": { "age": "40" } } ], "must_not": [ { "match": { "state": "ID" } } ] } } } -
filter结果过滤,不计算相关性得分
GET /bank/_search { "query": { "bool": { "must": { "match_all": {} }, "filter": { "range": { "balance": { "gte": 20000, "lte": 30000 } } } } } } -
term,根据精确值查找,但是对文本查找比较困难,文本查找推荐使用match。建议:text查询用match,非text查询用term
GET /_search { "query": { "term": { "balance": 19955 } } } -
aggregations
## 搜索address中包含mill的所有人的年龄分布及平均年龄 GET bank/_search { "query": { "match": { "address": "mill" } }, "aggs": { "ageAgg": { "terms": { "field": "age", "size": 10 } }, "aggAvg": { "avg": { "field": "age" } } } }结果
"aggregations" : { "ageAgg" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : 38, "doc_count" : 2 }, { "key" : 28, "doc_count" : 1 }, { "key" : 32, "doc_count" : 1 } ] }, "ageAvg" : { "value" : 34.0 } }## 按照年龄聚合后,求各年龄段的人的平均薪资 GET bank/_search { "query": { "match_all": {} }, "aggs": { "ageAgg": { "terms": { "field": "age", "size": 100 }, "aggs": { "balanceAgg": { "avg": { "field": "balance" } } } } } }## 所有年龄分段,并查出各年龄段gender为M的平均薪资,和gender为F的平均薪资及年龄段总体平均薪资 GET bank/_search { "query": { "match_all": {} }, "aggs": { "ageAgg": { "terms": { "field": "age", "size": 100 }, "aggs": { "genderAgg": { "terms": { "field": "gender.keyword" }, "aggs": { "balanceAvg": { "avg": { "field": "balance" } } } }, "tatalAvg":{ "avg": { "field": "balance" } } } } } } -
mapping
创建索引并指定映射
PUT /my-index-000001 { "mappings": { "properties": { "age": { "type": "integer" }, "email": { "type": "keyword" }, "name": { "type": "text" } } } }添加新的字段映射
PUT /my-index-000001/_mapping { "properties": { "employee-id": { "type": "keyword", "index": false //默认为true,false表示不被索引,即不能检索到 } } }更新已存在字段的映射,只能创建新的索引,然后将数据重新索引到该索引中。
-
数据迁移
固定写法
POST _reindex { "source": { "index": "my-index-000001" }, "dest": { "index": "my-new-index-000001" } }将旧索引的type下的数据进行迁移
POST _reindex { "source": { "index": "bank", "type": "account" }, "dest": { "index": "newbank" } } -
分词
POST _analyze { "analyzer": "standard", "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone." }-
安装ik分词器
wget https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.10.1/elasticsearch-analysis-ik-7.10.1.zip --2021-02-18 21:54:10-- https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.10.1/elasticsearch-analysis-ik-7.10.1.zip -
测试中文分词
POST _analyze { "analyzer": "ik_smart", "text": "我是中国人" } POST _analyze { "analyzer": "ik_max_word", "text": "我是中国人" }
-
-
自定义扩展词库
-
启动nginx
docker run -p 80:80 --name nginx -d nginx:1.18.0 -
将容器的配置复制到mydata/nginx下
docker container cp nginx:/etc/nginx . -
修改文件夹
mv nginx conf mkdir nginx mv conf nginx/ -
启动nginx
docker run -p 80:80 --name nginx --privileged=true -v /mydata/nginx/html:/usr/share/nginx/html -v /mydata/nginx/logs:/var/log/nginx -v /mydata/nginx/conf:/etc/nginx -d nginx:1.18.0 76a4219c3697f643f8c71af730e4e32818cf767893ad602a50a30c6e0866634b -
在/html
mkdir es cd es vi fenci.tet #加入自定义词语 -
修改es配置
cd ik/config vi IKAnalyzer.cfg.xml #添加自定义词典地址http://192.168.0.108/es/fenci.txt
-
-
四、整合rest-high-level-client
1. 导入依赖
<!-- 导入ES的rest-high-level-client -->
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.10.1</version>
</dependency>
2. 配置文件
@Configuration
public class GulimallElasticSearchConfig {
public static final RequestOptions COMMON_OPTIONS;
static {
RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
// builder.addHeader("Authorization", "Bearer " + TOKEN);
// builder.setHttpAsyncResponseConsumerFactory(
// new HttpAsyncResponseConsumerFactory
// .HeapBufferedResponseConsumerFactory(30 * 1024 * 1024 * 1024));
COMMON_OPTIONS = builder.build();
}
public RestHighLevelClient restHighLevelClient(){
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("10.138.213.16", 9200, "http")));
return client;
}
}
3. 测试
@Test
void indexData() throws IOException {
IndexRequest indexRequest = new IndexRequest("users");
indexRequest.id("1");//数据的id
// indexRequest.source("userName","张三","gender","男","age",18);
User user = new User();
user.setUserName("张三");
user.setGender("男");
user.setAge(18);
String jsonString = JSON.toJSONString(user);
indexRequest.source(jsonString, XContentType.JSON);//需要保存的内容
//执行操作
IndexResponse index = client.index(indexRequest, GulimallElasticSearchConfig.COMMON_OPTIONS);
//提取有用的响应数据
System.out.println(index);
}
@Data
class User{
private String userName;
private String gender;
private Integer age;
}
4. 测试检索
@Test
void searchData() throws IOException {
// 1. 创建检索请求
SearchRequest searchRequest = new SearchRequest();
// 指定索引
searchRequest.indices("bank");
// 指定DSL,检索条件
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 1.1 构造检索条件
sourceBuilder.query(QueryBuilders.matchQuery("address","mill"));
// 1.2 按照年龄进行聚合
TermsAggregationBuilder ageAgg = AggregationBuilders.terms("ageAgg").field("age").size(10);
sourceBuilder.aggregation(ageAgg);
// 1.3 计算平均薪资
AvgAggregationBuilder balanceAvg = AggregationBuilders.avg("balanceAvg").field("balance");
sourceBuilder.aggregation(balanceAvg);
System.out.println("检索条件:"+sourceBuilder.toString());
searchRequest.source(sourceBuilder);
// 2. 执行检索
SearchResponse searchResponse = client.search(searchRequest, GulimallElasticSearchConfig.COMMON_OPTIONS);
// 3. 分析结果
System.out.println(searchResponse.toString());
// 3.1 获取所查到的数据
SearchHits hits = searchResponse.getHits();
SearchHit[] searchHits = hits.getHits();
for (SearchHit hit: searchHits) {
String sourceAsString = hit.getSourceAsString();
Account account = JSON.parseObject(sourceAsString, Account.class);
System.out.println("账户:"+account);
}
// 3.2 获取这次检索到的分析信息
Aggregations aggregations = searchResponse.getAggregations();
// for (Aggregation aggregation : aggregations.asList()) {
// String name = aggregation.getName();
//
// }
Terms ageAgg1 = aggregations.get("ageAgg");
for (Terms.Bucket bucket : ageAgg1.getBuckets()) {
String key = bucket.getKeyAsString();
System.out.println("年龄"+key);
}
Avg balanceAgg1 = aggregations.get("balanceAvg");
System.out.println("平均薪资"+balanceAgg1.getValue());
}
@ToString
@Data
static class Account {
private int account_number;
private int balance;
private String firstname;
private String lastname;
private int age;
private String gender;
private String address;
private String employer;
private String email;
private String city;
private String state;
}