ElasticSerach
es \kibana\ ik分词器\ pinyin分词器的安装
- 需要分词:type=text
- 如果不需要分词,但是需要搜索:type=keyword
- 验证es是否正常启动
- 验证kibana是否正常启动
day02
DSL 查询文档
全文查询、范文查询、精确查询
-- 全文查询
GET /hotel/_search
{
"query":{
"match": {
"all": "如家"
}
}
}
GET /hotel/_search
{
"query":{
"multi_match": {
"query": "如家",
"fields": ["brand","city"]
}
}
}
-- 精确查询
GET /hotel/_search
{
"query":{
"term": {
"brand":{
"value": "如家"
}
}
}
}
-- 范文查询
GET /hotel/_search
{
"query":{
"range": {
"price":{
"gte": "80",
"lte":"200"
}
}
}
}
地理查询
GET /hotel/_search
{
"query":{
"geo_distance": {
"distance":"5km",
"location":"22.569693, 113.860186"
}
}
}
打分算法
- 具体算法
- 目前es 使用的是BM25算法。优点:受到词频的影响较小,当一个词频繁的出现的时候,分数趋向平滑。
打分算法
GET /hotel/_search
{
"query":{
"function_score": {
"query": {
"match": {
"all": "外滩"
}
},
"functions": [
{
"filter": {
"term": {
"id": "432335"
}
},
"weight": 10
}
],
"boost_mode": "multiply"
}
}
}
复合查询
GET /hotel/_search
{
"query":{
"bool": {
"must": [
{
"term": {
"city": {
"value": "上海"
}
}
}
],
"should": [
{
"term": {
"brand": "华美达"
}
},
{
"term": {
"brand": {
"value": "皇冠假日"
}
}
}
],
"must_not": [
{
"range": {
"price": {
"lt": "50"
}
}
}
],
"filter": [
{
"range": {
"score": {
"gte": 0,
"lte": 200
}
}
}
]
}
}
}
搜索结果处理
排序
- 按照价格进行排序
- 按照经纬度进行排序
GET /hotel/_search
{
"query":{
"match_all": {
}
},
"sort": [
{
"_geo_distance": {
"location": {
"lat": 31.034661,
"lon": 121.612282
},
"order": "asc",
"unit": "km"
}
}
]
}
分页
# 分页
GET /hotel/_search
{
"query":{
"match_all": {
}
},
"from": 0,
"size": 20,
"sort": [
{
"price": {
"order": "desc"
}
}
]
}
高亮
GET /hotel/_search
{
"query":{
"match": {
"all": "如家"
}
},
"from": 0,
"size": 20,
"sort": [
{
"price": {
"order": "desc"
}
}
],
"highlight": {
"fields": {
"name": {
"require_field_match": "false"
}
}
}
}
RestClient文档处理
第三天
聚合
Bucket聚合| Metric聚合
GET /hotel/_search
{
"size":0,
"aggs": {
"brandAgg": {
"terms": {
"field": "brand",
"size": 10
}
}
}
}
# 根据聚合的结果降序排序
GET /hotel/_search
{
"size":0,
"aggs": {
"brandAgg": {
"terms": {
"field": "brand",
"order": {
"_count": "asc"
},
"size": 10
}
}
}
}
# 限定聚合的范围
GET /hotel/_search
{
"query": {
"range": {
"price": {
"gte": 10,
"lte": 1500
}
}
},
"size":0,
"aggs": {
"brandAgg": {
"terms": {
"field": "brand",
"order": {
"_count": "asc"
},
"size": 10
}
}
}
}
#DSL实现Metrics 聚合
get /hotel/_search
{
"size":0,
"aggs": {
"brandAgg": {
"terms": {
"field": "brand",
"order": {
"_count": "asc"
},
"size": 10
},
"aggs":{
"score_stats":{
"stats": {
"field": "score"
}
}
}
}
}
}
java api聚合
自动补全
拼音分词器
- 安装拼音分词器 juejin.cn/editor/draf…
拼音分词器使用
POST /_analyze
{
"text": "如家酒店",
"analyzer": "pinyin"
}
DELETE /test
# 创建索引库
PUT /test
{
"settings":{
"analysis":{
"analyzer":{
"my_analyzer":{
"tokenizer":"ik_max_word",
"filter":"py"
}
},
"filter":{
"py":{
"type":"pinyin",
"keep_full_pinyin":false,
"keep_joined_full_pinyin":true,
"keep_original":true,
"limit_first_letter_length":16,
"remove_duplicated_term":true,
"non_chinese_pinyin_tokenize":false
}
}
}
},
"mappings": {
"properties": {
"name":{
"type": "text",
"analyzer": "my_analyzer"
}
}
}
}
# 测试自定义分词器
POST /test/_analyze
{
"text": "如家酒店",
"analyzer": "my_analyzer"
}
# 使用文档进行测试
POST /test/_doc/1
{
"id": 1,
"name": "狮子"
}
POST /test/_doc/2
{
"id": 2,
"name": "虱子"
}
#拼音分词器可以用来创建索引,但是不能用于搜索,搜索的时候应该使用ik_smart分词器
GET /test/_search
{
"query": {
"match": {
"name": "shizi"
}
}
}
# 创建索引库,搜索的时候不适用拼音
PUT /test
{
"settings":{
"analysis":{
"analyzer":{
"my_analyzer":{
"tokenizer":"ik_max_word",
"filter":"py"
}
},
"filter":{
"py":{
"type":"pinyin",
"keep_full_pinyin":false,
"keep_joined_full_pinyin":true,
"keep_original":true,
"limit_first_letter_length":16,
"remove_duplicated_term":true,
"non_chinese_pinyin_tokenize":false
}
}
}
},
"mappings": {
"properties": {
"name":{
"type": "text",
"analyzer": "my_analyzer",
"search_analyzer": "ik_smart"
}
}
}
}
自动补全
# 自动补全
#自动补全的索引库
PUT /test
{
"mappings": {
"properties": {
"title":{
"type": "completion"
}
}
}
}
# 示例数据
POST test/_doc
{
"title": ["Sony", "WH-1000XM3"]
}
POST test/_doc
{
"title": ["SK-II", "PITERA"]
}
POST test/_doc
{
"title": ["Nintendo", "switch"]
}
# 自动补全查询
GET /test/_search
{
"suggest": {
"title_suggest": {
"text": "s",
"completion": {
"field": "title",
"skip_duplicates":true,
"size":10
}
}
}
}
将开始的酒店重新建立索引,支持自动补全
# 酒店数据索引库
PUT /hotel
{
"settings": {
"analysis": {
"analyzer": {
"text_anlyzer": {
"tokenizer": "ik_max_word",
"filter": "py"
},
"completion_analyzer": {
"tokenizer": "keyword",
"filter": "py"
}
},
"filter": {
"py": {
"type": "pinyin",
"keep_full_pinyin": false,
"keep_joined_full_pinyin": true,
"keep_original": true,
"limit_first_letter_length": 16,
"remove_duplicated_term": true,
"none_chinese_pinyin_tokenize": false
}
}
}
},
"mappings": {
"properties": {
"id":{
"type": "keyword"
},
"name":{
"type": "text",
"analyzer": "text_anlyzer",
"search_analyzer": "ik_smart",
"copy_to": "all"
},
"address":{
"type": "keyword",
"index": false
},
"price":{
"type": "integer"
},
"score":{
"type": "integer"
},
"brand":{
"type": "keyword",
"copy_to": "all"
},
"city":{
"type": "keyword"
},
"starName":{
"type": "keyword"
},
"business":{
"type": "keyword",
"copy_to": "all"
},
"location":{
"type": "geo_point"
},
"pic":{
"type": "keyword",
"index": false
},
"all":{
"type": "text",
"analyzer": "text_anlyzer",
"search_analyzer": "ik_smart"
},
"suggestion":{
"type": "completion",
"analyzer": "completion_analyzer"
}
}
}
}
GET /hotel/_search
{
"query": {
"match_all": {
}
}
}
GET /hotel/_search
{
"suggest": {
"sugest_myName": {
"text": "hua",
"completion": {
"field": "suggestion",
"skip_duplicates":true,
"size":10
}
}
}
}
对于上文的分析
使用javaApi
@Test
public void testAggretion() throws IOException {
SearchRequest request = new SearchRequest("hotel");
request.source()
.suggest(new SuggestBuilder().addSuggestion("sugest_myName",
SuggestBuilders.completionSuggestion("suggestion")
.prefix("hua")
.skipDuplicates(true)
.size(10)));
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
// 4.解析
Suggest suggest = response.getSuggest();
// 4.1.根据名称获取结果
CompletionSuggestion suggestion = suggest.getSuggestion("sugest_myName");
// 4.2.获取options
for (CompletionSuggestion.Entry.Option option : suggestion.getOptions()) {
// 4.3.获取补全的结果
String str = option.getText().toString();
System.out.println(str);
}
}
//或者如下
public List<String> getSuggestion(String key) {
try {
// 1.准备请求
SearchRequest request = new SearchRequest("hotel");
// 2.请求参数
request.source().suggest(new SuggestBuilder()
.addSuggestion(
"hotelSuggest",
SuggestBuilders
.completionSuggestion("suggestion")
.size(10)
.skipDuplicates(true)
.prefix(key)
));
// 3.发出请求
SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
// 4.解析
Suggest suggest = response.getSuggest();
// 4.1.根据名称获取结果
CompletionSuggestion suggestion = suggest.getSuggestion("hotelSuggest");
// 4.2.获取options
List<String> list = new ArrayList<>();
for (CompletionSuggestion.Entry.Option option : suggestion.getOptions()) {
// 4.3.获取补全的结果
String str = option.getText().toString();
// 4.4.放入集合
list.add(str);
}
return list;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
数据同步
方案:使用mq进行数据同步
代码
- hotel-admin进行酒店数据的增删
- hotel-demo监听到增删的酒店数据之后将es的数据进行更新
@Component
public class HotelListener {
@Autowired
private IHotelService hotelService;
@RabbitListener(bindings = @QueueBinding(
value = @Queue(name = HotelMqConstants.INSERT_QUEUE_NAME),
exchange = @Exchange(name = HotelMqConstants.EXCHANGE_NAME, type = ExchangeTypes.TOPIC),
key = HotelMqConstants.INSERT_KEY
))
public void listenHotelInsert(Long hotelId){
// 新增
hotelService.saveById(hotelId);
}
@RabbitListener(bindings = @QueueBinding(
value = @Queue(name = HotelMqConstants.DELETE_QUEUE_NAME),
exchange = @Exchange(name = HotelMqConstants.EXCHANGE_NAME, type = ExchangeTypes.TOPIC),
key = HotelMqConstants.DELETE_KEY
))
public void listenHotelDelete(Long hotelId){
// 删除
hotelService.deleteById(hotelId);
}
}
ES集群
集群搭建
使用docker-compose搭建集群
- 使用cerebro链接集群
- 使用cerebro创建es索引&分片
- 分布式存储
- 集群查询 查询的时候协调节点并不知道数据再哪个分片中,会去多个node都查询,然后将结果进行聚合后返回给用户
脑裂问题及解决
- 集群异常:
docker-compose stop es01异常后会重新选择主节点。
- 集群正常:
docker-compose start es01后数据会恢复,但是es01不是主节点了。