ps: 对功能的属性做敏感词校验(即校验表对象的属性),1.自己维护词库。2.实现分布式缓存
1、分析
- 词库的维护等于CRUD,没有任何难度。
- 应用方式:1.使用@Valid框架,提供Aop注解。2.在controller注入相关service,调用方法,传入需要校验的属性。
- 校验方式:使用WordTree(hutool工具,底层是DFA算法,总不能字符串contains吧。。)
2、开干
1、依赖
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.7.21</version>
</dependency>
<dependency>
<groupId>org.redisson</groupId>
<artifactId>redisson-spring-boot-starter</artifactId>
<version>3.17.4</version>
</dependency>
2、实现
注意:
- WordTree 默认关闭了一下特殊字符的校验,点开源码就可以看见:
初始化树内容的时候,记得 wordTreeCache.setCharFilter(character -> true) 根据需求放开。
- 如果需求不区分大小写,wordTree初始化和校验的时候,记得先转为全小写。
/**
* 分布式 敏感词缓存
*
* @author Agao
* @date 2023/9/7 16:48
*/
@Slf4j
@Service
public class SensitiveWordDistributedCache {
@Autowired private RedissonClient redissonClient;
@Autowired private SensitiveRepository sensitiveRepository;
@Autowired private InterZoneCommunicationService izcService;
public static final String SENSITIVE_WORD_TOPIC = "sensitive_word_msg";
private RTopic topic;
private String localMsgId;
private Channel<SensitiveWordMsg> channel;
/** 敏感词 缓存树 */
private final WordTree wordTreeCache = new WordTree();
/**
* 获取命中的敏感词
*
* @param words 需要校验的敏感词
* @return
*/
public String getSensitiveWords(String... words) {
if (words.length == 0) {
return null;
}
if (wordTreeCache.isEmpty()) {
initWordTreeCache();
}
Set<String> wordSet = new HashSet<>();
for (String word : words) {
if (!StringUtils.hasText(word)) {
continue;
}
List<String> sensitiveWords = wordTreeCache.matchAll(word.toLowerCase());
if (!CollectionUtils.isEmpty(sensitiveWords)) {
wordSet.addAll(sensitiveWords);
}
}
if (CollectionUtils.isEmpty(wordSet)) {
return null;
}
return String.join(",", wordSet);
}
/** 失效敏感词缓存树 */
public void invalidateAll() {
wordTreeCache.clear();
// 通知其他zone
sendMsg(null, SensitiveWordMsgAction.INVALIDATE);
}
/**
* 加入缓存
*
* @param sensitiveWordId 词库id
*/
public void addCache(String sensitiveWordId) {
// 查询数据库,添加到缓存
wordTreeCache.addWords(xxx);
// 通知其他zone
sendMsg(sensitiveWordId, SensitiveWordMsgAction.ADD);
}
/**
* 发送缓存变更消息
*
* @param sensitiveWordId 词库id
* @param action 添加词库,失效缓存
*/
private void sendMsg(String sensitiveWordId, SensitiveWordMsgAction action) {
SensitiveWordMsg msg = new SensitiveWordMsg(this.localMsgId, sensitiveWordId, action);
topic.publishAsync(msg);
channel.send(msg);
}
/** 初始化分布式缓存 */
@PostConstruct
private void initDistributedCache() {
localMsgId = UuidUtils.generateUuid();
topic = redissonClient.getTopic(SENSITIVE_WORD_TOPIC);
topic.addListener(
SensitiveWordMsg.class,
(charSequence, msg) -> {
if (msg.getMsgId().equals(localMsgId)) {
// 本地不处理
return;
}
msgHandler(msg);
});
channel = izcService.getChannel(SENSITIVE_WORD_TOPIC);
channel.registerListener(
key -> {
if (key == null) {
log.error("recv {} notification, key is empty", SENSITIVE_WORD_TOPIC);
return;
}
log.info(
"recv {} notification, SensitiveWord: {}, Action: {}",
SENSITIVE_WORD_TOPIC,
key.getSensitiveWordId(),
key.getAction());
msgHandler(key);
});
log.info("Start SensitiveWordDistributedCache ......");
initWordTreeCache();
}
/** 初始化敏感词树 */
private void initWordTreeCache() {
wordTreeCache.clear();
// 不过滤特殊字符
wordTreeCache.setCharFilter(character -> true);
// 加载敏感词库的资源到树上
wordTreeCache.addWords(xxx);
}
/**
* 处理来自其他zone的敏感词消息
*
* @param msg
*/
private void msgHandler(SensitiveWordMsg msg) {
SensitiveWordMsgAction msgAction = msg.getAction();
String sensitiveWordId = msg.getSensitiveWordId();
if (msgAction.equals(SensitiveWordMsgAction.ADD)) {
Sensitive entity = sensitiveRepository.findById(sensitiveWordId).orElse(null);
if (entity == null) {
return;
}
Set<String> words = entity.distinctSplitWords();
wordTreeCache.addWords(words);
}
if (msgAction.equals(SensitiveWordMsgAction.INVALIDATE)) {
wordTreeCache.clear();
}
}
}
订阅消息Action和 msg
/**
* @author Agao
* @date 2023/9/7 17:09
*/
@Data
@NoArgsConstructor
@AllArgsConstructor
public class SensitiveWordMsg {
private String msgId;
private String sensitiveWordId;
private SensitiveWordMsgAction action;
}
/**
* @author Agao
* @date 2023/9/7 16:44
*/
public enum SensitiveWordMsgAction {
/** 新增 */
ADD,
/** 失效所有 */
INVALIDATE;
}
注解校验:
/**
* 敏感词校验,交由 sensitiveService.validateSensitiveWords校验,不同的角色做不同的处理。
*/
@Documented
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.PARAMETER, ElementType.FIELD})
@Constraint(validatedBy = {ValidatorSensitiveWord.class})
public @interface SensitiveWordValidator {
/**
* default "" 占位用,实际不会使用
* @return
*/
String message() default "";
Class<?>[] groups() default {};
Class<? extends Payload>[] payload() default {};
}
/**
* 敏感词校验
*
* @author Agao
* @date 2023/9/4 13:59
*/
public class ValidatorSensitiveWord implements ConstraintValidator<SensitiveWordValidator, String> {
@Autowired private SensitiveService sensitiveService;
@Override
public void initialize(SensitiveWordValidator constraintAnnotation) {
ConstraintValidator.super.initialize(constraintAnnotation);
}
@Override
public boolean isValid(String value, ConstraintValidatorContext context) {
if (!StringUtils.hasText(value)) {
return true;
}
sensitiveService.validateSensitiveWords(value);
return true;
}
}