diff --git a/im-platform/src/main/java/com/bx/implatform/service/impl/GroupMessageServiceImpl.java b/im-platform/src/main/java/com/bx/implatform/service/impl/GroupMessageServiceImpl.java index 0e8d8eb..a47dfba 100644 --- a/im-platform/src/main/java/com/bx/implatform/service/impl/GroupMessageServiceImpl.java +++ b/im-platform/src/main/java/com/bx/implatform/service/impl/GroupMessageServiceImpl.java @@ -1,8 +1,7 @@ package com.bx.implatform.service.impl; import cn.hutool.core.collection.CollUtil; -import cn.hutool.core.collection.CollectionUtil; -import cn.hutool.core.util.StrUtil; +import cn.hutool.core.text.CharSequenceUtil; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; import com.baomidou.mybatisplus.core.toolkit.Wrappers; @@ -27,12 +26,13 @@ import com.bx.implatform.service.IGroupService; import com.bx.implatform.session.SessionContext; import com.bx.implatform.session.UserSession; import com.bx.implatform.util.BeanUtils; -import com.bx.implatform.util.DateTimeUtils; +import com.bx.implatform.util.SensitiveFilterUtil; import com.bx.implatform.vo.GroupMessageVO; import com.google.common.base.Splitter; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.time.DateUtils; import org.springframework.data.redis.core.RedisTemplate; import org.springframework.stereotype.Service; @@ -47,6 +47,7 @@ public class GroupMessageServiceImpl extends ServiceImpl redisTemplate; private final IMClient imClient; + private final SensitiveFilterUtil sensitiveFilterUtil; @Override public Long sendMessage(GroupMessageDTO dto) { @@ -55,12 +56,12 @@ public class GroupMessageServiceImpl extends ServiceImpl members = groupMemberService.findByUserId(session.getUserId()); List ids = members.stream().map(GroupMember::getGroupId).collect(Collectors.toList()); - if (CollectionUtil.isEmpty(ids)) { + if (CollUtil.isEmpty(ids)) { return new ArrayList<>(); } // 只能拉取最近1个月的 - Date minDate = DateTimeUtils.addMonths(new Date(), -1); + Date minDate = DateUtils.addMonths(new Date(), -1); LambdaQueryWrapper wrapper = Wrappers.lambdaQuery(); wrapper.gt(GroupMessage::getId, minId).gt(GroupMessage::getSendTime, minDate).in(GroupMessage::getGroupId, ids) .ne(GroupMessage::getStatus, MessageStatus.RECALL.code()).orderByAsc(GroupMessage::getId).last("limit 100"); @@ -154,6 +158,7 @@ public class GroupMessageServiceImpl extends ServiceImpl messages = this.list(wrapper); // 转成vo List vos = messages.stream().map(m -> { + m.setContent(sensitiveFilterUtil.filter(m.getContent())); GroupMessageVO vo = BeanUtils.copyProperties(m, GroupMessageVO.class); // 被@用户列表 if (StringUtils.isNotBlank(m.getAtUserIds())) { @@ -207,7 +212,7 @@ public class GroupMessageServiceImpl extends ServiceImpl messages = this.list(wrapper); List messageInfos = - messages.stream().map(m -> BeanUtils.copyProperties(m, GroupMessageVO.class)).collect(Collectors.toList()); + messages.stream().map(m -> { + m.setContent(sensitiveFilterUtil.filter(m.getContent())); + return BeanUtils.copyProperties(m, GroupMessageVO.class); + }).collect(Collectors.toList()); log.info("拉取群聊记录,用户id:{},群聊id:{},数量:{}", userId, groupId, messageInfos.size()); return messageInfos; } diff --git a/im-platform/src/main/java/com/bx/implatform/service/impl/PrivateMessageServiceImpl.java b/im-platform/src/main/java/com/bx/implatform/service/impl/PrivateMessageServiceImpl.java index 6c1564d..8c2cfa6 100644 --- a/im-platform/src/main/java/com/bx/implatform/service/impl/PrivateMessageServiceImpl.java +++ b/im-platform/src/main/java/com/bx/implatform/service/impl/PrivateMessageServiceImpl.java @@ -22,11 +22,11 @@ import com.bx.implatform.service.IPrivateMessageService; import com.bx.implatform.session.SessionContext; import com.bx.implatform.session.UserSession; import com.bx.implatform.util.BeanUtils; -import com.bx.implatform.util.DateTimeUtils; +import com.bx.implatform.util.SensitiveFilterUtil; import com.bx.implatform.vo.PrivateMessageVO; -import lombok.AllArgsConstructor; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.time.DateUtils; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -40,6 +40,7 @@ public class PrivateMessageServiceImpl extends ServiceImpl sendMessage = new IMPrivateMessage<>(); @@ -122,7 +126,10 @@ public class PrivateMessageServiceImpl extends ServiceImpl messages = this.list(wrapper); - List messageInfos = messages.stream().map(m -> BeanUtils.copyProperties(m, PrivateMessageVO.class)).collect(Collectors.toList()); + List messageInfos = messages.stream().map(m -> { + m.setContent(sensitiveFilterUtil.filter(m.getContent())); + return BeanUtils.copyProperties(m, PrivateMessageVO.class); + }).collect(Collectors.toList()); log.info("拉取聊天记录,用户id:{},好友id:{},数量:{}", userId, friendId, messageInfos.size()); return messageInfos; } @@ -139,7 +146,7 @@ public class PrivateMessageServiceImpl extends ServiceImpl queryWrapper = Wrappers.lambdaQuery(); // 只能拉取最近1个月的 - Date minDate = DateTimeUtils.addMonths(new Date(), -1); + Date minDate = DateUtils.addMonths(new Date(), -1); queryWrapper.gt(PrivateMessage::getId, minId) .ge(PrivateMessage::getSendTime, minDate) .ne(PrivateMessage::getStatus, MessageStatus.RECALL.code()) @@ -164,7 +171,10 @@ public class PrivateMessageServiceImpl extends ServiceImpl BeanUtils.copyProperties(m, PrivateMessageVO.class)).collect(Collectors.toList()); + return messages.stream().map(m -> { + m.setContent(sensitiveFilterUtil.filter(m.getContent())); + return BeanUtils.copyProperties(m, PrivateMessageVO.class); + }).collect(Collectors.toList()); } diff --git a/im-platform/src/main/java/com/bx/implatform/util/SensitiveFilterUtil.java b/im-platform/src/main/java/com/bx/implatform/util/SensitiveFilterUtil.java new file mode 100644 index 0000000..66377b4 --- /dev/null +++ b/im-platform/src/main/java/com/bx/implatform/util/SensitiveFilterUtil.java @@ -0,0 +1,205 @@ +package com.bx.implatform.util; + +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.CharUtils; +import org.apache.commons.lang3.StringUtils; +import org.springframework.stereotype.Component; + +import javax.annotation.PostConstruct; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.HashMap; +import java.util.Map; + +/** + * 敏感词过滤器——SensitiveFilter + * + * @author Andrews + * @date 2023/12/4 11:12 + * @return null + */ +@Slf4j +@Component +public final class SensitiveFilterUtil { + + private SensitiveFilterUtil() { + } + + /** + * 替换符 + */ + private static final String REPLACE_MENT = "***"; + + /** + * 根节点 + */ + private static final TrieNode ROOT_NODE = new TrieNode(); + + /** + * 1、 前缀树 前缀树某一个节点 + * + * @author NXY + * @date 2023/12/4 11:17 + * @return null + */ + private static class TrieNode { + // 关键词结束标识 + private boolean isKeywordEnd = false; + + // 子节点(key是下级字符,value是下级节点) + // 当前节点的子节点 + private final Map subNodes = new HashMap<>(); + + public boolean isKeywordEnd() { + return isKeywordEnd; + } + + public void setKeywordEnd(boolean keywordEnd) { + isKeywordEnd = keywordEnd; + } + + // 添加子节点 + public void addSubNode(Character c, TrieNode node) { + subNodes.put(c, node); + } + + // 获取子节点 + public TrieNode getSubNode(Character c) { + return subNodes.get(c); + } + + } + + /** + * 2、初始化方法,服务器启动时初始化 + * + * @author NXY + * @date 2023/12/4 11:18 + */ + @PostConstruct + public void init() { + try ( + // 类加载器 + InputStream is = this.getClass().getClassLoader().getResourceAsStream("sensitive-words.txt"); + BufferedReader reader = new BufferedReader(new InputStreamReader(is)); + ) { + String keyword; + while ((keyword = reader.readLine()) != null) { + // 添加到前缀树 + this.addKeyword(keyword); + } + } catch (IOException e) { + log.error("加载敏感词文件失败: " + e.getMessage()); + } + } + + /** + * 3、将一个敏感词添加到前缀树中 + * + * @param keyword + * @author NXY + * @date 2023/12/4 11:15 + */ + private void addKeyword(String keyword) { + TrieNode tempNode = ROOT_NODE; + for (int i = 0; i < keyword.length(); i++) { + char c = keyword.charAt(i); + TrieNode subNode = tempNode.getSubNode(c); + if (subNode == null) { + // 初始化子节点 + subNode = new TrieNode(); + tempNode.addSubNode(c, subNode); + } + // 指向子节点,进入下一轮循环 + tempNode = subNode; + // 设置结束标识 + if (i == keyword.length() - 1) { + tempNode.setKeywordEnd(true); + } + } + } + + /** + * 过滤敏感词 + * + * @param text 待过滤的文本 + * @return 过滤后的文本 + */ + public String filter(String text) { + if (StringUtils.isBlank(text)) { + return null; + } + // 结果 + StringBuilder sb = new StringBuilder(); + try { + // 指针1 + TrieNode tempNode = ROOT_NODE; + // 指针2 + int begin = 0; + // 指针3 + int position = 0; + while (begin < text.length()) { + if (position < text.length()) { + char c = text.charAt(position); + // 跳过符号 + if (isSymbol(c)) { + // 若指针1处于根节点,将此符号计入结果,让指针2向下走一步 + if (tempNode == ROOT_NODE) { + sb.append(c); + begin++; + } + // 无论符号在开头或中间,指针3都向下走一步 + position++; + continue; + } + // 检查下级节点 + tempNode = tempNode.getSubNode(c); + if (tempNode == null) { + // 以begin开头的字符串不是敏感词 + sb.append(text.charAt(begin)); + // 进入下一个位置 + position = ++begin; + // 重新指向根节点 + tempNode = ROOT_NODE; + } else if (tempNode.isKeywordEnd()) { + // 发现敏感词,将begin~position字符串替换掉 + sb.append(REPLACE_MENT); + // 进入下一个位置 + begin = ++position; + // 重新指向根节点 + tempNode = ROOT_NODE; + } else { + // 检查下一个字符 + position++; + } + } + // position遍历越界仍未匹配到敏感词 + else { + sb.append(text.charAt(begin)); + position = ++begin; + tempNode = ROOT_NODE; + } + } + } catch (Exception e) { + sb = new StringBuilder(text); + } + return sb.toString(); + } + + /** + * 判断是否为符号 ——特殊符号 + * + * @param c + * @return boolean + * @author NXY + * @date 2023/12/4 11:17 + */ + private boolean isSymbol(Character c) { + // 0x2E80~0x9FFF 是东亚文字范围 + return !CharUtils.isAsciiAlphanumeric(c) && (c < 0x2E80 || c > 0x9FFF); + } +} + + diff --git a/im-platform/src/main/resources/sensitive-words.txt b/im-platform/src/main/resources/sensitive-words.txt new file mode 100644 index 0000000..59cac85 --- /dev/null +++ b/im-platform/src/main/resources/sensitive-words.txt @@ -0,0 +1,3 @@ +杀了你 +傻逼 +去死 \ No newline at end of file