问题
环境:
elasticsearch:7.4.2
elasticsearch-analysis-ik:7.4.2
加载远程字典代码
@GetMapping(value = "/load-customwords")
public String loadCustomWords(HttpServletResponse response) {
String res = esService.deploy(response);
try {
return new String(res.getBytes(),"utf-8");
}catch (Exception e){
e.printStackTrace();
}
return "";
}
@Override
public String deploy(HttpServletResponse response) {
List<CustomWord> allWords = customWordDao.getAllWords();
StringBuffer buffer = new StringBuffer();
if (CollectionUtils.isNotEmpty(allWords)) {
logger.info("开始加载自定义词典...");
int i = 0;
for (CustomWord word : allWords) {
if (i == 0) {
try {
response.setHeader("Last-Modified", DateUtil.getDateTime("yyyy-MM-dd hh:mm:ss", word.getCreatedTime()));
response.setHeader("ETag", DateUtil.getDateTime("yyyy-MM-dd hh:mm:ss", word.getUpdatedTime()));
} catch (Exception e){
e.printStackTrace();
}
buffer.append(word.getWordName());
} else {
buffer.append("\n" + word.getWordName());
}
i++;
}
logger.info("加载自定义词典完毕...");
}
return buffer.toString();
}
配置
使用
问题
没有加载字典!!!
最关键的是,我用5.6.2的版本和7.12.0的版本都去试了一下,发现热更新加载远程分词都ok,我们公司使用的7.4.2的版本竟然无法加载...瞬间气的跳脚的节奏。
怎么办?不可能让公司直接换es的版本,那就只能去看下源码了,呜呜...
源码
分别下载v5.6.2,v7.12.0,v7.4.2 的ik源码,不知道怎么下载的可看如下:
- 进入官网地址
- 打开releases
- 选择对应的版本
- 下载源码
v5.6.2
加载远程字典的核心方法getRemoteWords()
/**
* 从远程服务器上下载自定义词条
*/
private static List<String> getRemoteWords(String location) {
List<String> buffer = new ArrayList<String>();
RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10 * 1000).setConnectTimeout(10 * 1000)
.setSocketTimeout(60 * 1000).build();
CloseableHttpClient httpclient = HttpClients.createDefault();
CloseableHttpResponse response;
BufferedReader in;
HttpGet get = new HttpGet(location);
get.setConfig(rc);
try {
response = httpclient.execute(get);
if (response.getStatusLine().getStatusCode() == 200) {
String charset = "UTF-8";
// 获取编码,默认为utf-8
if (response.getEntity().getContentType().getValue().contains("charset=")) {
String contentType = response.getEntity().getContentType().getValue();
charset = contentType.substring(contentType.lastIndexOf("=") + 1);
}
in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), charset));
String line;
while ((line = in.readLine()) != null) {
buffer.add(line);
}
in.close();
response.close();
return buffer;
}
response.close();
} catch (ClientProtocolException e) {
logger.error("getRemoteWords {} error", e, location);
} catch (IllegalStateException e) {
logger.error("getRemoteWords {} error", e, location);
} catch (IOException e) {
logger.error("getRemoteWords {} error", e, location);
}
return buffer;
}
v7.12.0
加载远程字典的核心方法getRemoteWordsUnprivileged()
/**
* 从远程服务器上下载自定义词条
*/
private static List<String> getRemoteWordsUnprivileged(String location) {
List<String> buffer = new ArrayList<String>();
RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10 * 1000).setConnectTimeout(10 * 1000)
.setSocketTimeout(60 * 1000).build();
CloseableHttpClient httpclient = HttpClients.createDefault();
CloseableHttpResponse response;
BufferedReader in;
HttpGet get = new HttpGet(location);
get.setConfig(rc);
try {
response = httpclient.execute(get);
if (response.getStatusLine().getStatusCode() == 200) {
String charset = "UTF-8";
// 获取编码,默认为utf-8
HttpEntity entity = response.getEntity();
if(entity!=null){
Header contentType = entity.getContentType();
if(contentType!=null&&contentType.getValue()!=null){
String typeValue = contentType.getValue();
if(typeValue!=null&&typeValue.contains("charset=")){
charset = typeValue.substring(typeValue.lastIndexOf("=") + 1);
}
}
if (entity.getContentLength() > 0 || entity.isChunked()) {
in = new BufferedReader(new InputStreamReader(entity.getContent(), charset));
String line;
while ((line = in.readLine()) != null) {
buffer.add(line);
}
in.close();
response.close();
return buffer;
}
}
}
response.close();
} catch (IllegalStateException | IOException e) {
logger.error("getRemoteWords {} error", e, location);
}
return buffer;
}
v7.4.2
加载远程字典的核心方法getRemoteWordsUnprivileged()
/**
* 从远程服务器上下载自定义词条
*/
private static List<String> getRemoteWordsUnprivileged(String location) {
List<String> buffer = new ArrayList<String>();
RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10 * 1000).setConnectTimeout(10 * 1000)
.setSocketTimeout(60 * 1000).build();
CloseableHttpClient httpclient = HttpClients.createDefault();
CloseableHttpResponse response;
BufferedReader in;
HttpGet get = new HttpGet(location);
get.setConfig(rc);
try {
response = httpclient.execute(get);
if (response.getStatusLine().getStatusCode() == 200) {
String charset = "UTF-8";
// 获取编码,默认为utf-8
HttpEntity entity = response.getEntity();
if(entity!=null){
Header contentType = entity.getContentType();
if(contentType!=null&&contentType.getValue()!=null){
String typeValue = contentType.getValue();
if(typeValue!=null&&typeValue.contains("charset=")){
charset = typeValue.substring(typeValue.lastIndexOf("=") + 1);
}
}
if (entity.getContentLength() > 0) {
in = new BufferedReader(new InputStreamReader(entity.getContent(), charset));
String line;
while ((line = in.readLine()) != null) {
buffer.add(line);
}
in.close();
response.close();
return buffer;
}
}
}
response.close();
} catch (IllegalStateException | IOException e) {
logger.error("getRemoteWords {} error", e, location);
}
return buffer;
}
对比三个版本对处理行的地方发现,v5.6.2没有对 entity 做校验,直接获取了行 line = in.readLine()
,v7.12.0在获取流的时候做了校验 entity.getContentLength() > 0 || entity.isChunked()
,而v7.4.2虽然也做了校验,但是只校验了文件的长度entity.getContentLength() > 0
,那我们接下来进行下对该值的打印。
重新打包
- 将打包好的插件解压重新安装到es
- 触发远程字典加载
- 日志
此时我们发现,这个值为-1,所以根本没有获取走判断里面的内容,所以最后的list数组是个空数组。
- 加上v7.12.0的判断重新打包安装
- 分词加载成功,分词生效
解决方案
从上面的分析来看,源码是存在一点问题的,那我们要么就在线上重新安装自己打包的插件,可是公司的环境,说不定运维人员没被通知到,从而安装了自己从网上下载下来的插件呢?那有没有不污染源码的情况解决这个问题呢?
首先我们有个疑问entity.getContentLength() 为啥会等于-1 呢?那么有了这个疑问就好解决,我们可以绕过这个判断吗?
我们强行设置文件长度 response.setContentLength(allWords.size());
然后测试一下,发现,成功解决!!!