扩展ik分词词库支持mysql热部署

939 阅读2分钟

一、代码调整:

  • 1.创建扩展词和停用词的表:
CREATE TABLE `es_ik_extensions_word` (
  `id` int NOT NULL AUTO_INCREMENT COMMENT '主键',
  `word` varchar(50) DEFAULT NULL COMMENT '词',
  `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
  `update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=49208 DEFAULT CHARSET=utf8 COMMENT='EsIk分词扩展词词库';

CREATE TABLE `es_ik_stop_word` (
  `id` int NOT NULL AUTO_INCREMENT COMMENT '主键',
  `word` varchar(50) DEFAULT NULL COMMENT '词',
  `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
  `update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='EsIk分词停用词词库';

<dependency>
    <groupId>mysql</groupId>
    <artifactId>mysql-connector-java</artifactId>
    <version>8.0.13</version>
</dependency>

再在main->assemblies->plugin.xml的描述符文件中,将MySQL驱动添加到依赖集合中,如图所示:

<dependencySet>
    <outputDirectory>/</outputDirectory>
    <useProjectArtifact>true</useProjectArtifact>
    <useTransitiveFiltering>true</useTransitiveFiltering>
    <includes>
        <include>mysql:mysql-connector-java</include>
    </includes>
</dependencySet>

  • 4.在config目录下增加jdbc配置文件:
jdbc.url=jdbc:mysql://1localhost:3306/testik?allowPublicKeyRetrieval=true&useSSL=false&useUnicode=true&characterEncoding=UTF-8&autoReconnect=true&zeroDateTimeBehavior=convertToNull&serverTimezone=Asia/Shanghai
jdbc.user=root
jdbc.password=123456
# 更新词库
jdbc.reload.sql=select word from es_ik_extensions_word
# 更新停用词词库
jdbc.reload.stopword.sql=select word from es_ik_stop_word

  • 5.在org.wltea.analyzer.dic.Dictionary 类增加如下代码:创建读取配置文件对象和静态块加载mysql驱动的代码:
    // prop用来获取上面的properties配置文件
    private static Properties prop = new Properties();

    static {
        try {
            Class.forName("com.mysql.jdbc.Driver");
        } catch (ClassNotFoundException e) {
            logger.error("error", e);
        }
    }

  • 6.在该类增加扩展词库和停用词库的代码:
   private void loadMySqlExtDict() {
        Connection connection = null;
        Statement statement = null;
        ResultSet resultSet = null;

        try {
            Path file = PathUtils.get(getDictRoot(), "jdbc-reload.properties");
            prop.load(new FileInputStream(file.toFile()));

            logger.info("-------jdbc-reload.properties-------");
            for (Object key : prop.keySet()) {
                logger.info("key:{}", prop.getProperty(String.valueOf(key)));
            }

            logger.info("------- 查询词典, sql:{}-------", prop.getProperty("jdbc.reload.sql"));

            // 建立mysql连接
            connection = DriverManager.getConnection(
                    prop.getProperty("jdbc.url"),
                    prop.getProperty("jdbc.user"),
                    prop.getProperty("jdbc.password")
            );

            // 执行查询
            statement = connection.createStatement();
            resultSet = statement.executeQuery(prop.getProperty("jdbc.reload.sql"));

            // 循环输出查询啊结果,添加到Main.dict中去
            while (resultSet.next()) {
                String theWord = resultSet.getString("word");
                if (theWord != null && !"".equals(theWord.trim())) {
                    logger.info("------热更新词典:{}------", theWord);
                    // 加到mainDict里面
                    _MainDict.fillSegment(theWord.trim().toCharArray());
                }
            }
            Thread.sleep(Integer.valueOf(String.valueOf(prop.get("jdbc.reload.interval"))));

        } catch (Exception e) {
            logger.error("error:{}", e);
        } finally {
            try {
                if (resultSet != null) {
                    resultSet.close();
                }
                if (statement != null) {
                    statement.close();
                }
                if (connection != null) {
                    connection.close();
                }
            } catch (SQLException e) {
                logger.error("error", e);
            }
        }
    }


    private void loadMySqlStopwordDict() {
        Connection conn = null;
        Statement stmt = null;
        ResultSet rs = null;

        try {
            Path file = PathUtils.get(getDictRoot(), "jdbc-reload.properties");
            prop.load(new FileInputStream(file.toFile()));

            logger.info("-------jdbc-reload.properties-------");
            for (Object key : prop.keySet()) {
                logger.info("-------key:{}", prop.getProperty(String.valueOf(key)));
            }

            logger.info("-------查询停用词, sql:{}", props.getProperty("jdbc.reload.stopword.sql"));

            conn = DriverManager.getConnection(
                    prop.getProperty("jdbc.url"),
                    prop.getProperty("jdbc.user"),
                    prop.getProperty("jdbc.password"));
            stmt = conn.createStatement();
            rs = stmt.executeQuery(prop.getProperty("jdbc.reload.stopword.sql"));

            while (rs.next()) {
                String theWord = rs.getString("word");
                if (theWord != null && !"".equals(theWord.trim())) {
                    logger.info("------- 加载停用词 : {}", theWord);
                    _StopWords.fillSegment(theWord.trim().toCharArray());
                }
            }

            Thread.sleep(Integer.valueOf(String.valueOf(prop.get("jdbc.reload.interval"))));
        } catch (Exception e) {
            logger.error("error", e);
        } finally {
            try {
                if (rs != null) {
                    rs.close();
                }
                if (stmt != null) {
                    stmt.close();
                }
                if (conn != null) {
                    conn.close();
                }
            } catch (SQLException e) {
                logger.error("error:{}", e);
            }

        }
    }

  • 7.触发调用逻辑:
查找Dictionary类的 loadMainDict()方法,在方法结尾增加调用停用词的方法:
// 加载mysql词库
this.loadMySqlExtDict();

查找Dictionary类的 loadStopWordDict()方法,在方法结尾增加调用停用词的方法:
// 加载mysql词库
this.loadMySqlStopwordDict();

  • 5.定时更新词库:
增加HotDicReloadThread类,代码如下:

public class HotDicReloadThread {
    private static final Logger logger = ESPluginLoggerFactory.getLogger(HotDicReloadThread.class.getName());

    public void initial() {
        while (true) {
            logger.info("-------重新加载mysql词典--------");
            Dictionary.getSingleton().reLoadMainDict();
        }
    }
}

查找Dictionary类的initial方法,增加开启调度任务:
// 执行更新mysql词库的线程
pool.execute(() -> new HotDicReloadThread().initial());

二、更新环境的ik:

  • 1.点击maven构建在target\releases生成如下文件,进行解压:
  • 2.将原有的环境上的 ./elasticsearch-6.8.0/plugins目录下的ik删除,替换成新构建的;

三、问题:

  • 1.更新修改后的ik分词后报错日志如下:
java.security.AccessControlException: access denied (java.net.SocketPermission 127.0.0.1:3306 connect,resolve)

该原因可能是访问权限的原因,确保服务器的防火墙端口是否开放;

  • 2.更新修改后的ik分词后报错日志如下:
java.security.AccessControlException: access denied (permission java.lang.RuntimePermission "setContextClassLoader")

本人的ik使用的版本是:6.6.1,elasticsearch的版本是:6.8.0; 解决的方案是在jdk1.8.0_141/jre/lib/security的java.policy目录下增加权限:

permission java.lang.RuntimePermission "createContextClassLoader";
permission java.lang.RuntimePermission "setContextClassLoader";
permission java.lang.RuntimePermission "getContextClassLoader";
permission java.net.SocketPermission "*", "connect,resolve";