java 判断文件的编码格式,用于文本文件读取

使用google的字符识别工具包

导入依赖

   <!--charset 检测-->
        <!-- https://mvnrepository.com/artifact/com.googlecode.juniversalchardet/juniversalchardet -->
        <dependency>
            <groupId>com.googlecode.juniversalchardet</groupId>
            <artifactId>juniversalchardet</artifactId>
            <version>1.0.3</version>
        </dependency>

方法实现

  public String codeString(String fileName) throws IOException {
        InputStream is = Files.newInputStream(new File(fileName).toPath());
        BufferedInputStream reader = new BufferedInputStream(is);
        byte[] buff = new byte[1024];
        int len = 0;
//      检测文件编码
        UniversalDetector detector = new UniversalDetector(null);
        while ((len = reader.read(buff)) != -1 && !detector.isDone()) {
            detector.handleData(buff, 0, len);
        }
        detector.dataEnd();
//      获取编码类型
        String encoding = detector.getDetectedCharset();
        detector.reset();
        reader.close();
        return encoding;
    }
   // 传入文件路径
   // return 文件字符集字符串
   codeString(filePath)
posted @ 2023-02-28 11:30  lambertlt  阅读(667)  评论(0)    收藏  举报