|
@@ -0,0 +1,135 @@
|
|
|
+package com.management.platform.util;
|
|
|
+
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.data.redis.core.RedisTemplate;
|
|
|
+import org.springframework.stereotype.Component;
|
|
|
+
|
|
|
+import java.util.*;
|
|
|
+
|
|
|
+@Slf4j
|
|
|
+@Component
|
|
|
+public class WordFilter {
|
|
|
+ private final static String WORDS = "WORDS";
|
|
|
+ private final static String REPLACE_CHAR = "*";
|
|
|
+ private static HashMap sensitiveWordMap;
|
|
|
+ /** * 最小匹配规则 */
|
|
|
+ private static int minMatchTYpe = 1;
|
|
|
+ /** * 最大匹配规则 */
|
|
|
+ private static int maxMatchType = 2;
|
|
|
+// @Autowired
|
|
|
+// private RedisTemplate<String, Object> redisTemplate;
|
|
|
+// @Autowired
|
|
|
+// private SensitiveWordMapper sensitiveWordMapper ;
|
|
|
+//
|
|
|
+ public String replaceWords(String text) {
|
|
|
+ if (StringUtils.isBlank(text)) {
|
|
|
+ return text;
|
|
|
+ }
|
|
|
+// List<Object> words = redisTemplate.opsForList().range("SensitiveWord", 0, -1);
|
|
|
+// if (words.size()<=0){
|
|
|
+// words=sensitiveWordMapper.listStr();
|
|
|
+// //更新redis
|
|
|
+// redisTemplate.opsForList().leftPushAll("SensitiveWord",sensitiveWordMapper.listStr());
|
|
|
+// }
|
|
|
+ //缓存获取敏感词汇原记录
|
|
|
+// return WordFilter.replaceSensitiveWord(words, text, WordFilter.minMatchTYpe);
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 替换敏感字字符
|
|
|
+ *
|
|
|
+ * @param data 敏感字集合
|
|
|
+ * @param txt 待检查文本
|
|
|
+ * @param matchType 匹配规则
|
|
|
+ */
|
|
|
+ private static String replaceSensitiveWord(List<Object> data, String txt, int matchType) {
|
|
|
+ if (sensitiveWordMap == null) {
|
|
|
+ addSensitiveWord(data);
|
|
|
+ }
|
|
|
+ String resultTxt = txt;
|
|
|
+ //获取所有的敏感词
|
|
|
+ List<String> set = getSensitiveWord(txt, matchType);
|
|
|
+ Iterator<String> iterator = set.iterator();
|
|
|
+ while (iterator.hasNext()) {
|
|
|
+ resultTxt = resultTxt.replaceAll(iterator.next(), REPLACE_CHAR);
|
|
|
+ }
|
|
|
+ return resultTxt;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:
|
|
|
+ * 说明:该方法来源于互联网
|
|
|
+ */
|
|
|
+ private static void addSensitiveWord(List<Object> datas) {
|
|
|
+ sensitiveWordMap = new HashMap(datas.size());
|
|
|
+ Iterator<Object> iterator = datas.iterator();
|
|
|
+ Map<String, Object> now = null;
|
|
|
+ Map now2 = null;
|
|
|
+ while (iterator.hasNext()) {
|
|
|
+ now2 = sensitiveWordMap;
|
|
|
+ String word = (String)iterator.next(); //敏感词
|
|
|
+ word=word.trim();
|
|
|
+ for (int i = 0; i < word.length(); i++) {
|
|
|
+ char key_word = word.charAt(i);
|
|
|
+ Object obj = now2.get(key_word);
|
|
|
+ if (obj != null) { //存在
|
|
|
+ now2 = (Map) obj;
|
|
|
+ } else { //不存在
|
|
|
+ now = new HashMap<String, Object>();
|
|
|
+ now.put("isEnd", "0");
|
|
|
+ now2.put(key_word, now);
|
|
|
+ now2 = now;
|
|
|
+ }
|
|
|
+ if (i == word.length() - 1) {
|
|
|
+ now2.put("isEnd", "1");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 获取内容中的敏感词
|
|
|
+ *说明:该方法来源于互联网
|
|
|
+ * @param text 内容
|
|
|
+ * @param matchType 匹配规则 1=不最佳匹配,2=最佳匹配
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ private static List<String> getSensitiveWord(String text, int matchType) {
|
|
|
+ List<String> words = new ArrayList<String>();
|
|
|
+ Map now = sensitiveWordMap;
|
|
|
+ int count = 0; //初始化敏感词长度
|
|
|
+ int start = 0; //标志敏感词开始的下标
|
|
|
+ for (int i = 0; i < text.length(); i++) {
|
|
|
+ char key = text.charAt(i);
|
|
|
+ now = (Map) now.get(key);
|
|
|
+ if (now != null) { //存在
|
|
|
+ count++;
|
|
|
+ if (count == 1) {
|
|
|
+ start = i;
|
|
|
+ }
|
|
|
+ if ("1".equals(now.get("isEnd"))) { //敏感词结束
|
|
|
+ now = sensitiveWordMap; //重新获取敏感词库
|
|
|
+ words.add(text.substring(start, start + count)); //取出敏感词,添加到集合
|
|
|
+ count = 0; //初始化敏感词长度
|
|
|
+ }
|
|
|
+ } else { //不存在
|
|
|
+ now = sensitiveWordMap;//重新获取敏感词库
|
|
|
+ if (count == 1 && matchType == 1) { //不最佳匹配
|
|
|
+ count = 0;
|
|
|
+ } else if (count == 1 && matchType == 2) { //最佳匹配
|
|
|
+ words.add(text.substring(start, start + count));
|
|
|
+ count = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return words;
|
|
|
+ }
|
|
|
+ public WordFilter() {
|
|
|
+ super();
|
|
|
+ }
|
|
|
+
|
|
|
+}
|