Java判断字符串中文乱码工具类

简介

只要包含一个乱码,即返回为true.
该方法正在试用中,请勿直接上生产,暂时记录一下。

工具类

import org.apache.commons.lang3.StringUtils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * MessyUtil.
 * @author levi
 */
public final class MessyUtil {

    private MessyUtil() { }

    /**
     * isChinese.
     * @param c c
     * @return boolean
     */
    public static boolean isChinese(final char c) {
        final Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
        if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
                || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
                || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
                || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
                || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
            return true;
        }
        return false;
    }

    /**
     * isMessyCode.
     * @param strName strName
     * @return boolean
     */
    public static boolean isMessyCode(final String strName) {
        return isMessyCode(strName, "");
    }

    /**
     * isMessyCode.
     * @param strName strName
     * @param log log
     * @return boolean
     */
    public static boolean isMessyCode(final String strName, final String log) {
        if (StringUtils.isBlank(strName)) {
            return false;
        }
        final Pattern p = Pattern.compile("\\s*|\t*|\r*|\n*");
        final Matcher m = p.matcher(strName);
        final String after = m.replaceAll("");
        final String temp = after.replaceAll("\\p{P}", "")
                .replaceAll("`", "")
                .replaceAll("~", "")
                .replaceAll("\\$", "")
                .replaceAll("\\^", "")
                .replaceAll("\\+", "")
                .replaceAll("=", "")
                .replaceAll("<", "")
                .replaceAll(">", "")
                .replaceAll("\\|", "");
        final char[] ch = temp.trim().toCharArray();
        for (int i = 0; i < ch.length; i++) {
            final char c = ch[i];
            if (!Character.isLetterOrDigit(c)) {

                if (!isChinese(c)) {
                    System.out.println("isMessyCode: " + c + " -> " + strName + " -> " + log);
                    return true;
                }
            }
        }
        return false;

    }

    public static void main(String[] args) {
        System.out.println(isMessyCode("Դ\u03A2���"));
        System.out.println(isMessyCode("测试测试f123123`~!@:/.asdf!#$%^&*()_+-=[]{};':\",.<>/?\\|/*-+."));
        System.out.println(isMessyCode("�123测试"));
    }

}

posted @ 2021-03-17 16:40  levi125  阅读(925)  评论(0编辑  收藏  举报