java敏感字查找和替换

java类:

  • SearchNode
  • SensitiveWords
  • SensitiveWordsReplace
  • SensitiveWordsSearch
  • WordsNode
  • TestSensitiveWordsSearch (测试类)
/**
 * @date 2020-12-10 010 13:28
 */
public class SearchNode {

    private String words;
    private int index;
    private int lastIndex;
    private long id;

    public SearchNode() {
    }

    public SearchNode(String words, int index, int lastIndex) {
        this.words = words;
        this.index = index;
        this.lastIndex = lastIndex;
    }

    public SearchNode(String words, int index, int lastIndex, long id) {
        this.words = words;
        this.index = index;
        this.lastIndex = lastIndex;
        this.id = id;
    }

    public String getWords() {
        return words;
    }

    public void setWords(String words) {
        this.words = words;
    }

    public int getIndex() {
        return index;
    }

    public void setIndex(int index) {
        this.index = index;
    }

    public int getLastIndex() {
        return lastIndex;
    }

    public void setLastIndex(int lastIndex) {
        this.lastIndex = lastIndex;
    }

    public long getId() {
        return id;
    }

    public void setId(long id) {
        this.id = id;
    }
}
/**
 * @date 2020-12-10 010 13:53
 */
public class SensitiveWords {

    private long id;
    private String words;
    private String replace;

    public SensitiveWords() {
    }

    public SensitiveWords(long id, String words, String replace) {
        this.id = id;
        this.words = words;
        this.replace = replace;
    }

    public long getId() {
        return id;
    }

    public void setId(long id) {
        this.id = id;
    }

    public String getWords() {
        return words;
    }

    public void setWords(String words) {
        this.words = words;
    }

    public String getReplace() {
        return replace;
    }

    public void setReplace(String replace) {
        this.replace = replace;
    }
}
/**
 * @date 2020-12-09 009 19:51
 */
public class SensitiveWordsReplace {

    /**
     * 所有关键字
     */
    protected static List<SensitiveWords> sensitiveWordsList;
    protected static Map<Long, SensitiveWords> sensitiveWordsMap;

    public static void init(List<SensitiveWords> sensitiveWordsList) {
        SensitiveWordsReplace.sensitiveWordsList = sensitiveWordsList;
        SensitiveWordsReplace.sensitiveWordsMap = new HashMap<>(sensitiveWordsList.size());
        for (SensitiveWords sensitiveWords : sensitiveWordsList) {
            SensitiveWordsReplace.sensitiveWordsMap.put(sensitiveWords.getId(), sensitiveWords);
        }
    }

    public static String findReplace(String text) {
        // 只能支持全文匹配
        List<SearchNode> searchNodeList = SensitiveWordsSearch.getInstance().findWords(text, true);
        if (CollectionUtils.isEmpty(searchNodeList)) {
            return text;
        }
        Map<Integer, SearchNode> searchNodeMap = new HashMap<>(searchNodeList.size());
        for (SearchNode searchNode : searchNodeList) {
            int index = searchNode.getIndex();
            searchNodeMap.put(index, searchNode);
        }
        StringBuilder builder = new StringBuilder();
        int length = text.length();
        for (int i = 0; i < length; i++) {
            SearchNode searchNode = searchNodeMap.get(i);
            if (null != searchNode) {
                SensitiveWords sensitiveWords = SensitiveWordsReplace.sensitiveWordsMap.get(searchNode.getId());
                if (null != sensitiveWords) {
                    builder.append(sensitiveWords.getReplace());
                } else  {
                    int i1 = searchNode.getLastIndex() - searchNode.getIndex();
                    for (int j = 0; j < i1; j++) {
                        builder.append("*");
                    }
                }
                i = searchNode.getLastIndex() - 1;
            } else {
                builder.append(text.charAt(i));
            }
        }
        return builder.toString();
    }
}
/**
 * @date 2020-12-09 009 19:36
 */
public class SensitiveWordsSearch {

    /**
     * 关键字根节点
     */
    protected WordsNode rootNode;
    /**
     * 所有关键字
     */
    protected List<SensitiveWords> sensitiveWordsList;
    /**
     * 关键字加载中
     */
    protected boolean keywordsLoading;

    private SensitiveWordsSearch() {
    }

    /**
     * 获取实例
     *
     * @return SensitiveWordsSearch
     */
    public static SensitiveWordsSearch getInstance() {
        return SensitiveWordsSearchInstance.INSTANCE;
    }

    /**
     * 初始化关键字
     */
    private void initKeywords() {
        // 初始化
        rootNode = new WordsNode();
        for (SensitiveWords sensitiveWords : sensitiveWordsList) {
            WordsNode node = rootNode;
            String words = sensitiveWords.getWords();
            int length = words.length();
            for (int i = 0; i < length; i++) {
                node = node.add(words.charAt(i));
                if (node.getLayer() == 0) {
                    node.setLayer(i + 1);
                }
            }
            node.setEnd(true);
            node.setId(sensitiveWords.getId());
        }

        System.out.println(JSON.toJSONString(rootNode));
    }

    /**
     * 更新关键字
     *
     * @param sensitiveWordsList 关键字集合
     */
    public void updateKeywords(List<SensitiveWords> sensitiveWordsList) {
        if (!this.keywordsLoading) {
            this.keywordsLoading = true;
            this.sensitiveWordsList = sensitiveWordsList;
            this.initKeywords();
            this.keywordsLoading = false;
        }
    }

    /**
     * 获取关键字
     *
     * @param text     检索文本
     * @param maxMatch 最大匹配
     * @return 查找到的关键字
     */
    public List<SearchNode> findWords(String text, boolean maxMatch) {
        if (null == rootNode) {
            throw new RuntimeException("SensitiveWordsSearch uninitialized.");
        }
        WordsNode top = null;
        List<SearchNode> list = new ArrayList<>();
        WordsNode preNode = null;
        int length = text.length();
        int lastLength = length - 1;
        for (int i = 0; i < length; i++) {
            final char t = text.charAt(i);
            WordsNode node;
            if (top == null) {
                node = rootNode.getNode(t);
            } else {
                if (top.hasKey(t)) {
                    node = top.getNode(t);
                } else {
                    if (maxMatch && top.isEnd()) {
                        preNode = top;
                    }
                    node = rootNode.getNode(t);
                }
            }
            if (maxMatch) {
                // 下一个节点
                if (preNode != null) {
                    // 计算层级向前
                    list.add(new SearchNode(preNode.getWords(), i - preNode.getLayer(), i, preNode.getId()));
                    preNode = null;
                }
            } else {
                // 当前节点
                if (node != null && node.isEnd()) {
                    list.add(new SearchNode(node.getWords(), i + 1 - node.getLayer(), i + 1, node.getId()));
                }
            }
            // 最大匹配时修正最后一个文本无法匹配的问题
            if (lastLength == i && maxMatch && node != null && node.isEnd()) {
                // 当前节点
                // 最后匹配
                list.add(new SearchNode(node.getWords(), i + 1 - node.getLayer(), i + 1, node.getId()));
            }
            top = node;
        }
        return list;
    }

    /**
     * 静态内部类
     */
    private static class SensitiveWordsSearchInstance {
        /**
         * 实例对象
         */
        private static final SensitiveWordsSearch INSTANCE = new SensitiveWordsSearch();
    }
}
/**
 * @date 2020-12-09 009 19:29
 */
public class WordsNode {

    private int layer;
    private boolean end;
    private char c;
    private long id;
    private Map<Character, WordsNode> nodeMap;
    private WordsNode parent;

    public WordsNode() {
        nodeMap = new HashMap<>(16);
    }

    /**
     * 新增字符
     *
     * @param c c
     * @return WordsNode
     */
    public WordsNode add(final Character c) {
        if (nodeMap.containsKey(c)) {
            return nodeMap.get(c);
        }
        final WordsNode node = new WordsNode();
        node.parent = this;
        node.c = c;
        nodeMap.put(c, node);
        return node;
    }

    public boolean hasKey(final char c) {
        return nodeMap.containsKey(c);
    }

    public WordsNode getNode(final char c) {
        return nodeMap.get(c);
    }

    /**
     * 获取当前节点的文本
     *
     * @return String
     */
    public String getWords() {
        if ('\u0000' == this.c) {
            return "";
        }
        List<String> words = new ArrayList<>(this.layer);
        words.add(String.valueOf(this.c));
        if (null != this.parent) {
            words.add(this.parent.getWords());
        }
        Collections.reverse(words);
        StringBuilder builder = new StringBuilder();
        for (String word : words) {
            builder.append(word);
        }
        return builder.toString();
    }

    public int getLayer() {
        return layer;
    }

    public void setLayer(int layer) {
        this.layer = layer;
    }

    public boolean isEnd() {
        return end;
    }

    public void setEnd(boolean end) {
        this.end = end;
    }

    public char getC() {
        return c;
    }

    public void setC(char c) {
        this.c = c;
    }

    public long getId() {
        return id;
    }

    public void setId(long id) {
        this.id = id;
    }

    public Map<Character, WordsNode> getNodeMap() {
        return nodeMap;
    }

    public void setNodeMap(Map<Character, WordsNode> nodeMap) {
        this.nodeMap = nodeMap;
    }

    public WordsNode getParent() {
        return parent;
    }

    public void setParent(WordsNode parent) {
        this.parent = parent;
    }
}
/**
 * @date 2020-12-10 010 10:15
 */
public class TestSensitiveWordsSearch {


    @Test
    public void init() {
        SensitiveWordsSearch instance = SensitiveWordsSearch.getInstance();

        List<SensitiveWords> sensitiveWordsList = new ArrayList<>();
        sensitiveWordsList.add(new SensitiveWords(1L, "凌晨两点", "丑时三刻"));
        sensitiveWordsList.add(new SensitiveWords(2L, "国庆", "庆国"));
        sensitiveWordsList.add(new SensitiveWords(3L, "阅兵", "大阅"));
        sensitiveWordsList.add(new SensitiveWords(4L, "七点", "辰时"));
        sensitiveWordsList.add(new SensitiveWords(5L, "战地", "战场"));
        sensitiveWordsList.add(new SensitiveWords(6L, "维和军士", "和平使者"));
        sensitiveWordsList.add(new SensitiveWords(7L, "特警", "使者"));
        sensitiveWordsList.add(new SensitiveWords(8L, "小说", "软文"));
        instance.updateKeywords(sensitiveWordsList);

        System.out.println("已加载关键词:");
        System.out.println(JSON.toJSONString(sensitiveWordsList));

        String text = "凌晨两点毫无睡意,受国庆阅兵影响,七点爬起来看《白色橄榄树》这部小说。无论怎样评价玖月晞,战地记者与维和军士的配置简直招架不住。阿瓒又成为所看过小说里难以忘怀的一个名字,他是柔和淡定的排弹士兵,是机场突然出现的特警,也是在东国苏睿城郊5秒救下她的人。玖月晞轻描淡写,出来的故事却刻骨铭心,真的很喜欢这样力度的小说。";
        System.out.println("查询文本:\n" + text);

        SensitiveWordsReplace.init(sensitiveWordsList);
        String replace = SensitiveWordsReplace.findReplace(text);
        System.out.println(replace);
    }
}

 

结果对照:

查询文本:
凌晨两点毫无睡意,受国庆阅兵影响,七点爬起来看《白色橄榄树》这部小说。无论怎样评价玖月晞,战地记者与维和军士的配置简直招架不住。阿瓒又成为所看过小说里难以忘怀的一个名字,他是柔和淡定的排弹士兵,是机场突然出现的特警,也是在东国苏睿城郊5秒救下她的人。玖月晞轻描淡写,出来的故事却刻骨铭心,真的很喜欢这样力度的小说丑时三刻毫无睡意,受庆国大阅影响,辰时爬起来看《白色橄榄树》这部软文。无论怎样评价玖月晞,战场记者与和平使者的配置简直招架不住。阿瓒又成为所看过软文里难以忘怀的一个名字,他是柔和淡定的排弹士兵,是机场突然出现的使者,也是在东国苏睿城郊5秒救下她的人。玖月晞轻描淡写,出来的故事却刻骨铭心,真的很喜欢这样力度的软文

 

posted @ 2020-12-11 21:49  Se7end  阅读(627)  评论(0编辑  收藏  举报