批量转换文件字符编码(GBK转UTF-8)

今天收到一份代码,拖到IDE中发现乱码,看来下编码是GBK的(😓)。只能手写个小工具批量转换下。代码在这里分享下,需要的可自取。

 

核心文件就两个:

ConverterUtil.java

public class ConverterUtil {
    private static String lineSepator;

    static {
        String osName = System.getProperty("os.name");
        if (osName.contains("Windows")) {
            lineSepator = "\r\n";
        } else {
            lineSepator = "\n";
        }
    }

    public static Map<String, String> parseArgs(String[] args) {
        Map<String, String> argsMap = new HashMap<String, String>();
        for (int i = 0; i < args.length; i++) {
            String key = args[i];
            if (++i >= args.length) {
                argsMap.put(key, "");
            } else {
                argsMap.put(key, args[i]);
            }
        }
        return argsMap;
    }

    public static void writeString(File file, String content, String encoding) throws Exception {
        OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(file), encoding);
        osw.write(content);
        osw.close();
    }

    public static String readString(File file, String encoding) throws Exception {
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding));
        StringBuilder buff = new StringBuilder();
        String line;
        while ((line = br.readLine()) != null) {
            if (buff.length() > 0) {
                buff.append(lineSepator);
            }
            buff.append(line);
        }
        br.close();
        return buff.toString();
    }

    public static List<File> listFiles(File dir, String ext, int maxDepth) {
        List<File> files = new ArrayList<File>();
        seekFiles(dir, ext, maxDepth, files);
        return files;
    }

    private static void seekFiles(File dir, String ext, int maxDepth, List<File> files) {
        if (maxDepth-- <= 0) {
            return;
        }
        files.addAll(Arrays.asList(dir.listFiles(f -> {
            return f.isFile() && "*".equals(ext) ? true : f.getName().toLowerCase().endsWith("." + ext);
        })));
        for (File _dir : dir.listFiles(f -> f.isDirectory())) {
            seekFiles(_dir, ext, maxDepth, files);
        }
    }
}
View Code

 

Converter.java

/**
 * change file's encoding(gbk) to utf-8
 * 
 * @author lichmama
 *
 */
public class Converter {
    private static String sourceDir;
    private static String fileExtension = "*";
    private static int maxDepth = Integer.MAX_VALUE;

    /**
     * java -jar gbk2utf.jar --src xxx --ext xxx --max-depth xxx
     * 
     * @param args
     */
    public static void main(String[] args) {
        if (usage(args)) {
            logger("Usage: java -jar gbk2utf.jar [OPTIONS]");
            logger("Change file's encoding(gbk) to utf-8 (@lichmama)");
            logger("OPTIONS:");
            logger("  --src <source path>, essential");
            logger("  --ext <file extension>, optional");
            logger("  --max-depth <max depth>, optional");
            return;
        }

        logger("source: " + sourceDir);
        logger("  file: " + ("*".equals(fileExtension) ? "*" : "*." + fileExtension));
        logger("====================================");
        Converter converter = new Converter();
        converter.process(sourceDir, fileExtension, maxDepth);
        logger("====================================**DONE");
    }

    public void process(String src, String ext, int maxDepth) {
        File _src = new File(src);
        if (!_src.exists()) {
            logger("the path \"{0}\" not exist!", src);
            return;
        }
        if (_src.isDirectory()) {
            List<File> files = ConverterUtil.listFiles(_src, ext, maxDepth);
            if (files.size() == 0) {
                logger("NO FILES TO BE PROCESSED.");
                return;
            } else {
                logger("TOTAL FOUND {0} FILES, BEGIN TO PROCESS...\n", files.size());
            }
            files.stream().forEach(f -> gbk2utf(f));
        } else {
            gbk2utf(_src);
        }
    }

    public boolean gbk2utf(File file) {
        try {
            String content = ConverterUtil.readString(file, "GBK");
            ConverterUtil.writeString(file, content, "UTF-8");
        } catch (Exception e) {
            logger("process {0} - FAILURE, reason: {1}", file.getName(), e.getMessage());
            return false;
        }
        logger("process {0} - SUCCESS", file.getName());
        return true;
    }

    private static boolean usage(String[] args) {
        Map<String, String> argsMap = ConverterUtil.parseArgs(args);
        if (argsMap.get("--src") != null && !"".equals(argsMap.get("--src"))) {
            sourceDir = argsMap.get("--src");
        } else {
            return true;
        }
        if (argsMap.get("--ext") != null) {
            if (!"".equals(argsMap.get("--ext"))) {
                fileExtension = argsMap.get("--ext").toLowerCase();
            } else {
                return true;
            }
        }
        if (argsMap.get("--max-depth") != null) {
            if (!"".equals(argsMap.get("--max-depth"))) {
                maxDepth = Integer.parseInt(argsMap.get("--max-depth"));
            } else {
                return true;
            }
        }
        return false;
    }

    private static void logger(String s) {
        System.out.println(s);
    }
    
    private static void logger(String pattern, Object...arguments) {
        logger(MessageFormat.format(pattern, arguments));
    }
}
View Code

 

编译后打包成Runnable Jar File,

$> java -jar gbk2utf.jar
Usage: java -jar gbk2utf.jar [OPTIONS]
Change file's encoding(gbk) to utf-8 (@lichmama)
OPTIONS:
  --src <source path>, essential
  --ext <file extension>, optional
  --max-depth <max depth>, optional

 

使用示例:

1.转换code目录下java文件的编码
java -jar gbk2utf.jar --src code/ --ext java

2.转转code目录下xml文件的编码,最大遍历深度为3
java -jar gbk2utf.jar --src code/ --ext xml --max-depth 3

3.转换test.html文件的编码
java -jar gbk2utf.jar --src test.html

 

温馨提示:使用前请先备份原始文件以免造成不必要的损失!!!

另,本程序运行环境JDK1.8(及以上)

posted @ 2019-07-26 14:47  lichmama  阅读(5339)  评论(0编辑  收藏  举报