Elasticsearch(springcloud5-7)

113 阅读4分钟

ElasticSerach

es \kibana\ ik分词器\ pinyin分词器的安装

juejin.cn/editor/draf…

  • 需要分词:type=text
  • 如果不需要分词,但是需要搜索:type=keyword image.png
  • 验证es是否正常启动

image.png

  • 验证kibana是否正常启动

image.png

day02

DSL 查询文档

全文查询、范文查询、精确查询

-- 全文查询
GET /hotel/_search
{
 "query":{
   "match": {
     "all": "如家"
   }
  }
}

GET /hotel/_search
{
 "query":{
   "multi_match": {
     "query": "如家", 
     "fields": ["brand","city"]
   }
  }
}

-- 精确查询
GET /hotel/_search
{
 "query":{
   "term": {
    "brand":{
      "value": "如家"
    }
   }
  }
}

-- 范文查询
GET /hotel/_search
{
 "query":{
   "range": {
    "price":{
      "gte": "80",
      "lte":"200"
    }
   }
  }
}

地理查询

GET /hotel/_search
{
 "query":{
   "geo_distance": {
    "distance":"5km",
    "location":"22.569693, 113.860186"
    
   }
  }
}

打分算法

  • 具体算法

image.png

  • 目前es 使用的是BM25算法。优点:受到词频的影响较小,当一个词频繁的出现的时候,分数趋向平滑。

打分算法

GET /hotel/_search
{
 "query":{
   "function_score": {
    "query": {
      "match": {
        "all": "外滩"
      }
    },
    "functions": [
      {
        "filter": {
          "term": {
            "id": "432335"
          }
        },
        "weight": 10
        
      }
    ],
    "boost_mode": "multiply"
    }
  }
}

image.png

image.png

复合查询

GET /hotel/_search
{
 "query":{
   "bool": {
    "must": [
      {
        "term": {
          "city": {
            "value": "上海"
          }
        }
      }
    ], 
    "should": [
      {
        "term": {
          "brand": "华美达"
        }
      },
      {
        "term": {
          "brand": {
            "value": "皇冠假日"
          }
        }
      }
    ],
    "must_not": [
      {
        "range": {
          "price": {
            "lt": "50"
          }
        }
      }
       
    ],
    "filter": [
      {
       "range": {
         "score": {
           "gte": 0,
           "lte": 200
         }
       } 
      }
    ]
    
    }
  }
}

image.png

搜索结果处理

排序

  • 按照价格进行排序 image.png
  • 按照经纬度进行排序
GET /hotel/_search
{
 "query":{
   "match_all": {
    
   }
  },
  "sort": [
    {
      "_geo_distance": {
       "location": {
         "lat": 31.034661,
         "lon": 121.612282
       },
       "order": "asc",
       "unit": "km"
      }
    }
  ]
}

image.png

分页

# 分页
GET /hotel/_search
{
 "query":{
   "match_all": {
    
   }
  },
 "from": 0,
 "size": 20,
 "sort": [
   {
     "price": {
       "order": "desc"
     }
   }
 ]
}

image.png

高亮

image.png

GET /hotel/_search
{
 "query":{
   "match": {
     "all": "如家"
   }
  },
 "from": 0,
 "size": 20,
 "sort": [
   {
     "price": {
       "order": "desc"
     }
   }
 ],
 "highlight": {
   "fields": {
     "name": {
      "require_field_match": "false"
     }
   }
 }
}

image.png

RestClient文档处理


第三天

聚合

Bucket聚合| Metric聚合

GET /hotel/_search
{
  "size":0,
  "aggs": {
    "brandAgg": {
      "terms": {
        "field": "brand",
        "size": 10
      }
    }
  }
}

# 根据聚合的结果降序排序
GET /hotel/_search
{
  "size":0,
  "aggs": {
    "brandAgg": {
      "terms": {
        "field": "brand",
        "order": {
          "_count": "asc"
        }, 
        "size": 10
      }
    }
  }
}

# 限定聚合的范围
GET /hotel/_search
{
  "query": {
    "range": {
      "price": {
        "gte": 10,
        "lte": 1500
      }
    }
  }, 
  "size":0,
  "aggs": {
    "brandAgg": {
      "terms": {
        "field": "brand",
        "order": {
          "_count": "asc"
        }, 
        "size": 10
      }
    }
  }
}


#DSL实现Metrics 聚合
get /hotel/_search
{
  "size":0,
  "aggs": {
    "brandAgg": {
      "terms": {
        "field": "brand",
        "order": {
          "_count": "asc"
        }, 
        "size": 10
      },
      "aggs":{
       "score_stats":{
        "stats": {
          "field": "score"
        }
      }
      
    }
    }
  }
}

java api聚合

自动补全

拼音分词器

拼音分词器使用

POST /_analyze
{
 "text": "如家酒店",
 "analyzer": "pinyin"
}

DELETE /test

# 创建索引库
PUT /test
{
 "settings":{
   "analysis":{
     "analyzer":{   
       "my_analyzer":{
         "tokenizer":"ik_max_word",
         "filter":"py"
       }
     },
     "filter":{
       "py":{
         "type":"pinyin",
         "keep_full_pinyin":false,
         "keep_joined_full_pinyin":true,
         "keep_original":true,
         "limit_first_letter_length":16,
         "remove_duplicated_term":true,
         "non_chinese_pinyin_tokenize":false
       }
     }
   }
 },
 "mappings": {
   "properties": {
     "name":{
       "type": "text",
       "analyzer": "my_analyzer"
     }
   }
 }
}


# 测试自定义分词器
POST /test/_analyze
{
 "text": "如家酒店",
 "analyzer": "my_analyzer"
}


# 使用文档进行测试
POST /test/_doc/1
{
 "id": 1,
 "name": "狮子"
}
POST /test/_doc/2
{
 "id": 2,
 "name": "虱子"
}

#拼音分词器可以用来创建索引,但是不能用于搜索,搜索的时候应该使用ik_smart分词器
GET /test/_search
{
 "query": {
   "match": {
     "name": "shizi"
   }
 }
}


# 创建索引库,搜索的时候不适用拼音
PUT /test
{
 "settings":{
   "analysis":{
     "analyzer":{   
       "my_analyzer":{
         "tokenizer":"ik_max_word",
         "filter":"py"
       }
     },
     "filter":{
       "py":{
         "type":"pinyin",
         "keep_full_pinyin":false,
         "keep_joined_full_pinyin":true,
         "keep_original":true,
         "limit_first_letter_length":16,
         "remove_duplicated_term":true,
         "non_chinese_pinyin_tokenize":false
       }
     }
   }
 },
 "mappings": {
   "properties": {
     "name":{
       "type": "text",
       "analyzer": "my_analyzer",
       "search_analyzer": "ik_smart"
     }
   }
 }
}

自动补全

image.png

# 自动补全
#自动补全的索引库
PUT /test
{
 "mappings": {
   "properties": {
     "title":{
       "type": "completion"
     }
   }
 }
}

# 示例数据
POST test/_doc
{
 "title": ["Sony", "WH-1000XM3"]
}
POST test/_doc
{
 "title": ["SK-II", "PITERA"]
}
POST test/_doc
{
 "title": ["Nintendo", "switch"]
}

# 自动补全查询
GET /test/_search
{
 "suggest": {
   "title_suggest": {
     "text": "s",
     "completion": {
       "field": "title",
       "skip_duplicates":true,
       "size":10
     }
   }
 }
 
}

将开始的酒店重新建立索引,支持自动补全

# 酒店数据索引库
PUT /hotel
{
  "settings": {
    "analysis": {
      "analyzer": {
        "text_anlyzer": {
          "tokenizer": "ik_max_word",
          "filter": "py"
        },
        "completion_analyzer": {
          "tokenizer": "keyword",
          "filter": "py"
        }
      },
      "filter": {
        "py": {
          "type": "pinyin",
          "keep_full_pinyin": false,
          "keep_joined_full_pinyin": true,
          "keep_original": true,
          "limit_first_letter_length": 16,
          "remove_duplicated_term": true,
          "none_chinese_pinyin_tokenize": false
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "id":{
        "type": "keyword"
      },
      "name":{
        "type": "text",
        "analyzer": "text_anlyzer",
        "search_analyzer": "ik_smart",
        "copy_to": "all"
      },
      "address":{
        "type": "keyword",
        "index": false
      },
      "price":{
        "type": "integer"
      },
      "score":{
        "type": "integer"
      },
      "brand":{
        "type": "keyword",
        "copy_to": "all"
      },
      "city":{
        "type": "keyword"
      },
      "starName":{
        "type": "keyword"
      },
      "business":{
        "type": "keyword",
        "copy_to": "all"
      },
      "location":{
        "type": "geo_point"
      },
      "pic":{
        "type": "keyword",
        "index": false
      },
      "all":{
        "type": "text",
        "analyzer": "text_anlyzer",
        "search_analyzer": "ik_smart"
      },
      "suggestion":{
          "type": "completion",
          "analyzer": "completion_analyzer"
      }
    }
  }
}





GET /hotel/_search
{
  "query": {
    "match_all": {
      
    }
  }
}


GET /hotel/_search
{
  "suggest": {
    "sugest_myName": {
      "text": "hua",
      "completion": {
        "field": "suggestion",
        "skip_duplicates":true,
        "size":10
      }
    }
  }
}

对于上文的分析 image.png

使用javaApi

@Test
public void testAggretion() throws IOException {
    SearchRequest request = new SearchRequest("hotel");
   request.source()
           .suggest(new SuggestBuilder().addSuggestion("sugest_myName",
          SuggestBuilders.completionSuggestion("suggestion")
                   .prefix("hua")
                  .skipDuplicates(true)
                   .size(10)));


    SearchResponse response = client.search(request, RequestOptions.DEFAULT);
    // 4.解析
    Suggest suggest = response.getSuggest();
    // 4.1.根据名称获取结果
    CompletionSuggestion suggestion = suggest.getSuggestion("sugest_myName");
    // 4.2.获取options
    for (CompletionSuggestion.Entry.Option option : suggestion.getOptions()) {
        // 4.3.获取补全的结果
        String str = option.getText().toString();
        System.out.println(str);
    }
}

//或者如下
public List<String> getSuggestion(String key) {
    try {
        // 1.准备请求
        SearchRequest request = new SearchRequest("hotel");
        // 2.请求参数
        request.source().suggest(new SuggestBuilder()
                .addSuggestion(
                        "hotelSuggest",
                        SuggestBuilders
                                .completionSuggestion("suggestion")
                                .size(10)
                                .skipDuplicates(true)
                                .prefix(key)
                ));
        // 3.发出请求
        SearchResponse response = restHighLevelClient.search(request, RequestOptions.DEFAULT);
        // 4.解析
        Suggest suggest = response.getSuggest();
        // 4.1.根据名称获取结果
        CompletionSuggestion suggestion = suggest.getSuggestion("hotelSuggest");
        // 4.2.获取options
        List<String> list = new ArrayList<>();
        for (CompletionSuggestion.Entry.Option option : suggestion.getOptions()) {
            // 4.3.获取补全的结果
            String str = option.getText().toString();
            // 4.4.放入集合
            list.add(str);
        }
        return list;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}


数据同步

方案:使用mq进行数据同步

image.png

代码

  • hotel-admin进行酒店数据的增删
  • hotel-demo监听到增删的酒店数据之后将es的数据进行更新
@Component
public class HotelListener {

    @Autowired
    private IHotelService hotelService;

    @RabbitListener(bindings = @QueueBinding(
            value = @Queue(name = HotelMqConstants.INSERT_QUEUE_NAME),
            exchange = @Exchange(name = HotelMqConstants.EXCHANGE_NAME, type = ExchangeTypes.TOPIC),
            key = HotelMqConstants.INSERT_KEY
    ))
    public void listenHotelInsert(Long hotelId){
        // 新增
        hotelService.saveById(hotelId);
    }

    @RabbitListener(bindings = @QueueBinding(
            value = @Queue(name = HotelMqConstants.DELETE_QUEUE_NAME),
            exchange = @Exchange(name = HotelMqConstants.EXCHANGE_NAME, type = ExchangeTypes.TOPIC),
            key = HotelMqConstants.DELETE_KEY
    ))
    public void listenHotelDelete(Long hotelId){
        // 删除
        hotelService.deleteById(hotelId);
    }
}

ES集群

集群搭建

使用docker-compose搭建集群 image.png

  • 使用cerebro链接集群

image.png

image.png

  • 使用cerebro创建es索引&分片

image.png

  • 分布式存储

image.png image.png

  • 集群查询 查询的时候协调节点并不知道数据再哪个分片中,会去多个node都查询,然后将结果进行聚合后返回给用户

image.png image.png

脑裂问题及解决

image.png

  • 集群异常: docker-compose stop es01 异常后会重新选择主节点。

image.png

image.png

  • 集群正常: docker-compose start es01 后数据会恢复,但是es01不是主节点了。

image.png