java敏感词检测 简单实现
模仿网上的dfa算法,感觉自己写的处理方法效率不行,记录一下,总归是自己写的
下面是实体类
package com.htht.business.utils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @author linjunwei
* @version 2024/12/6 9:14
*/
public class SentiveUtils {
public static void main(String[] args) {
List<String> list = new ArrayList<>();
list.add("死全家");
list.add("死全家了");
SentiveObj sentiveObj = buildMap(list);
System.out.println(sentiveObj.toString());
String s = scanSentiveKey("死光光,死全了", sentiveObj);
System.out.println(s);
}
public static SentiveObj buildMap(List<String> sentiveList) {
//手动创建根节点
SentiveObj result = new SentiveObj();
result.setSentiveKey("");
result.setBeforeSentiveKey("");
result.setEndFlag("1");
result.setChildMap(new HashMap<>());
for (String s : sentiveList) {
buildMap(result, s);
}
return result;
}
/**
* 根据传入的字符 在map中组成敏感词树
* @param sentiveObj
* @param key
*/
public static void buildMap(SentiveObj sentiveObj, String key) {
SentiveObj sentiveObj1 = sentiveObj;
for (int i = 0; i < key.length(); i++) {
String keyChar = String.valueOf(key.charAt(i));
if (!sentiveObj1.getChildMap().containsKey(keyChar)) {
Map<String, SentiveObj> childMap = sentiveObj1.getChildMap();
SentiveObj sentiveObj2 = new SentiveObj();
sentiveObj2.setChildMap(new HashMap<>());
sentiveObj2.setSentiveKey(keyChar);
sentiveObj2.setBeforeSentiveKey(key.substring(0, i + 1));
childMap.put(keyChar, sentiveObj2);
sentiveObj1 = sentiveObj2;
}else{
sentiveObj1 = sentiveObj1.getChildMap().get(keyChar);
}
if (i==key.length()-1){
sentiveObj1.setEndFlag("0");
}
}
}
/**
* 扫描句子中的敏感词
* @param jvzi
* @param sentiveObj
* @return
*/
public static String scanSentiveKey(String jvzi,SentiveObj sentiveObj){
String result = null;
for (int i = 0; i < jvzi.length(); i++) {
String keyChar = String.valueOf(jvzi.charAt(i));
if (sentiveObj.getChildMap().containsKey(keyChar)) {
//匹配到敏感词开头了,进行循环匹配是否完整敏感词
result = matchSentiveKey(sentiveObj, jvzi, i);
}
}
return result; // 如果没有找到敏感词,返回 null
}
/**
* 匹配句子中的敏感词
* @param sentiveObj
* @param s
* @param i
* @return
*/
public static String matchSentiveKey(SentiveObj sentiveObj,String s,int i){
for (; i < s.length(); i++) {
System.out.println(s.charAt(i));
System.out.println(sentiveObj.getChildMap());
System.out.println(sentiveObj.getChildMap().get("习"));
Map<String,SentiveObj> map = sentiveObj.getChildMap();
sentiveObj = map.get(String.valueOf(s.charAt(i)));
if (sentiveObj == null){
return null;
}else if (sentiveObj.getEndFlag().equals("0")){
return "敏感词:"+sentiveObj.getBeforeSentiveKey();
}
}
return null;
}
}
测试的主类
package com.htht.business.utils;
import lombok.Data;
import java.util.Map;
/**
* @author linjunwei
* @version 2024/12/6 9:17
*/
@Data
public class SentiveObj {
/**
* 当前敏感词的key
*/
private String sentiveKey;
/**
* 敏感词前缀
*/
private String beforeSentiveKey;
/**
* 敏感词字map
*/
private Map<String,SentiveObj> childMap;
/**
* 是否敏感词最后一个字 0:是 1:否
*/
private String endFlag;
public SentiveObj(){
this.endFlag = "1";
}
}

浙公网安备 33010602011771号