敏感词校验--WordTree+Redis分布式缓存

149 阅读2分钟

ps: 对功能的属性做敏感词校验(即校验表对象的属性),1.自己维护词库。2.实现分布式缓存

1、分析

  • 词库的维护等于CRUD,没有任何难度。
  • 应用方式:1.使用@Valid框架,提供Aop注解。2.在controller注入相关service,调用方法,传入需要校验的属性。
  • 校验方式:使用WordTree(hutool工具,底层是DFA算法,总不能字符串contains吧。。)

2、开干

1、依赖

<dependency>
    <groupId>cn.hutool</groupId>
    <artifactId>hutool-all</artifactId>
    <version>5.7.21</version>
</dependency>

<dependency>
    <groupId>org.redisson</groupId>
    <artifactId>redisson-spring-boot-starter</artifactId>
    <version>3.17.4</version>
</dependency>

2、实现

注意:

  • WordTree 默认关闭了一下特殊字符的校验,点开源码就可以看见:

image.png 初始化树内容的时候,记得 wordTreeCache.setCharFilter(character -> true) 根据需求放开。

  • 如果需求不区分大小写,wordTree初始化和校验的时候,记得先转为全小写。
/**
 * 分布式 敏感词缓存
 *
 * @author Agao
 * @date 2023/9/7 16:48
 */
@Slf4j
@Service
public class SensitiveWordDistributedCache {

    @Autowired private RedissonClient redissonClient;
    @Autowired private SensitiveRepository sensitiveRepository;
    @Autowired private InterZoneCommunicationService izcService;

    public static final String SENSITIVE_WORD_TOPIC = "sensitive_word_msg";
    private RTopic topic;
    private String localMsgId;
    private Channel<SensitiveWordMsg> channel;

    /** 敏感词 缓存树 */
    private final WordTree wordTreeCache = new WordTree();

    /**
     * 获取命中的敏感词
     *
     * @param words 需要校验的敏感词
     * @return
     */
    public String getSensitiveWords(String... words) {
      if (words.length == 0) {
        return null;
      }
      if (wordTreeCache.isEmpty()) {
        initWordTreeCache();
      }

      Set<String> wordSet = new HashSet<>();
      for (String word : words) {
        if (!StringUtils.hasText(word)) {
          continue;
        }
        List<String> sensitiveWords = wordTreeCache.matchAll(word.toLowerCase());
        if (!CollectionUtils.isEmpty(sensitiveWords)) {
          wordSet.addAll(sensitiveWords);
        }
      }

      if (CollectionUtils.isEmpty(wordSet)) {
        return null;
      }
      return String.join(",", wordSet);
    }

    /** 失效敏感词缓存树 */
    public void invalidateAll() {
      wordTreeCache.clear();
      // 通知其他zone
      sendMsg(null, SensitiveWordMsgAction.INVALIDATE);
    }

    /**
     * 加入缓存
     *
     * @param sensitiveWordId 词库id
     */
    public void addCache(String sensitiveWordId) {
      // 查询数据库,添加到缓存
      wordTreeCache.addWords(xxx);
      // 通知其他zone
      sendMsg(sensitiveWordId, SensitiveWordMsgAction.ADD);
    }

    /**
     * 发送缓存变更消息
     *
     * @param sensitiveWordId 词库id
     * @param action 添加词库,失效缓存
     */
    private void sendMsg(String sensitiveWordId, SensitiveWordMsgAction action) {
      SensitiveWordMsg msg = new SensitiveWordMsg(this.localMsgId, sensitiveWordId, action);
      topic.publishAsync(msg);
      channel.send(msg);
    }

    /** 初始化分布式缓存 */
    @PostConstruct
    private void initDistributedCache() {
      localMsgId = UuidUtils.generateUuid();
      topic = redissonClient.getTopic(SENSITIVE_WORD_TOPIC);
      topic.addListener(
          SensitiveWordMsg.class,
          (charSequence, msg) -> {
            if (msg.getMsgId().equals(localMsgId)) {
              // 本地不处理
              return;
            }
            msgHandler(msg);
          });
      channel = izcService.getChannel(SENSITIVE_WORD_TOPIC);
      channel.registerListener(
          key -> {
            if (key == null) {
              log.error("recv {} notification, key is empty", SENSITIVE_WORD_TOPIC);
              return;
            }
            log.info(
                "recv {} notification, SensitiveWord: {}, Action: {}",
                SENSITIVE_WORD_TOPIC,
                key.getSensitiveWordId(),
                key.getAction());
            msgHandler(key);
          });
      log.info("Start SensitiveWordDistributedCache ......");
      initWordTreeCache();
    }
    /** 初始化敏感词树 */
    private void initWordTreeCache() {
      wordTreeCache.clear();
      // 不过滤特殊字符
      wordTreeCache.setCharFilter(character -> true);

      // 加载敏感词库的资源到树上
      wordTreeCache.addWords(xxx);
    }

    /**
     * 处理来自其他zone的敏感词消息
     *
     * @param msg
     */
    private void msgHandler(SensitiveWordMsg msg) {
      SensitiveWordMsgAction msgAction = msg.getAction();
      String sensitiveWordId = msg.getSensitiveWordId();
      if (msgAction.equals(SensitiveWordMsgAction.ADD)) {
        Sensitive entity = sensitiveRepository.findById(sensitiveWordId).orElse(null);
        if (entity == null) {
          return;
        }
        Set<String> words = entity.distinctSplitWords();
        wordTreeCache.addWords(words);
      }
      if (msgAction.equals(SensitiveWordMsgAction.INVALIDATE)) {
        wordTreeCache.clear();
      }
    }

}

订阅消息Action和 msg

/**
 * @author Agao
 * @date 2023/9/7 17:09
 */
@Data
@NoArgsConstructor
@AllArgsConstructor
public class SensitiveWordMsg {
  private String msgId;
  private String sensitiveWordId;
  private SensitiveWordMsgAction action;
}
/**
 * @author Agao
 * @date 2023/9/7 16:44
 */
public enum SensitiveWordMsgAction {
  /** 新增 */
  ADD,
  /** 失效所有 */
  INVALIDATE;
}

注解校验:

/**
 * 敏感词校验,交由 sensitiveService.validateSensitiveWords校验,不同的角色做不同的处理。
 */
@Documented
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.PARAMETER, ElementType.FIELD})
@Constraint(validatedBy = {ValidatorSensitiveWord.class})
public @interface SensitiveWordValidator {
    /**
     * default "" 占位用,实际不会使用
     * @return
     */
    String message() default "";

    Class<?>[] groups() default {};

    Class<? extends Payload>[] payload() default {};
}
/**
 * 敏感词校验
 *
 * @author Agao
 * @date 2023/9/4 13:59
 */
public class ValidatorSensitiveWord implements ConstraintValidator<SensitiveWordValidator, String> {

  @Autowired private SensitiveService sensitiveService;

  @Override
  public void initialize(SensitiveWordValidator constraintAnnotation) {
    ConstraintValidator.super.initialize(constraintAnnotation);
  }

  @Override
  public boolean isValid(String value, ConstraintValidatorContext context) {
    if (!StringUtils.hasText(value)) {
      return true;
    }
    sensitiveService.validateSensitiveWords(value);
    return true;
  }
}