java敏感词检测 简单实现
模仿网上的dfa算法,感觉自己写的处理方法效率不行,记录一下,总归是自己写的
下面是实体类
package com.htht.business.utils; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * @author linjunwei * @version 2024/12/6 9:14 */ public class SentiveUtils { public static void main(String[] args) { List<String> list = new ArrayList<>(); list.add("死全家"); list.add("死全家了"); SentiveObj sentiveObj = buildMap(list); System.out.println(sentiveObj.toString()); String s = scanSentiveKey("死光光,死全了", sentiveObj); System.out.println(s); } public static SentiveObj buildMap(List<String> sentiveList) { //手动创建根节点 SentiveObj result = new SentiveObj(); result.setSentiveKey(""); result.setBeforeSentiveKey(""); result.setEndFlag("1"); result.setChildMap(new HashMap<>()); for (String s : sentiveList) { buildMap(result, s); } return result; } /** * 根据传入的字符 在map中组成敏感词树 * @param sentiveObj * @param key */ public static void buildMap(SentiveObj sentiveObj, String key) { SentiveObj sentiveObj1 = sentiveObj; for (int i = 0; i < key.length(); i++) { String keyChar = String.valueOf(key.charAt(i)); if (!sentiveObj1.getChildMap().containsKey(keyChar)) { Map<String, SentiveObj> childMap = sentiveObj1.getChildMap(); SentiveObj sentiveObj2 = new SentiveObj(); sentiveObj2.setChildMap(new HashMap<>()); sentiveObj2.setSentiveKey(keyChar); sentiveObj2.setBeforeSentiveKey(key.substring(0, i + 1)); childMap.put(keyChar, sentiveObj2); sentiveObj1 = sentiveObj2; }else{ sentiveObj1 = sentiveObj1.getChildMap().get(keyChar); } if (i==key.length()-1){ sentiveObj1.setEndFlag("0"); } } } /** * 扫描句子中的敏感词 * @param jvzi * @param sentiveObj * @return */ public static String scanSentiveKey(String jvzi,SentiveObj sentiveObj){ String result = null; for (int i = 0; i < jvzi.length(); i++) { String keyChar = String.valueOf(jvzi.charAt(i)); if (sentiveObj.getChildMap().containsKey(keyChar)) { //匹配到敏感词开头了,进行循环匹配是否完整敏感词 result = matchSentiveKey(sentiveObj, jvzi, i); } } return result; // 如果没有找到敏感词,返回 null } /** * 匹配句子中的敏感词 * @param sentiveObj * @param s * @param i * @return */ public static String matchSentiveKey(SentiveObj sentiveObj,String s,int i){ for (; i < s.length(); i++) { System.out.println(s.charAt(i)); System.out.println(sentiveObj.getChildMap()); System.out.println(sentiveObj.getChildMap().get("习")); Map<String,SentiveObj> map = sentiveObj.getChildMap(); sentiveObj = map.get(String.valueOf(s.charAt(i))); if (sentiveObj == null){ return null; }else if (sentiveObj.getEndFlag().equals("0")){ return "敏感词:"+sentiveObj.getBeforeSentiveKey(); } } return null; } }
测试的主类
package com.htht.business.utils; import lombok.Data; import java.util.Map; /** * @author linjunwei * @version 2024/12/6 9:17 */ @Data public class SentiveObj { /** * 当前敏感词的key */ private String sentiveKey; /** * 敏感词前缀 */ private String beforeSentiveKey; /** * 敏感词字map */ private Map<String,SentiveObj> childMap; /** * 是否敏感词最后一个字 0:是 1:否 */ private String endFlag; public SentiveObj(){ this.endFlag = "1"; } }