POI

POI

1. 基本对象

  • 工作簿 ==> excel文件
  • 工作表 ==> sheet

2. POI-写

1. 导入依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.wang</groupId>
    <artifactId>POI</artifactId>
    <version>1.0-SNAPSHOT</version>

    <!--导入依赖-->
    <dependencies>
        <!--xls 03-->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.17</version>
        </dependency>

        <!--xlsx 07-->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.17</version>
        </dependency>

        <!--日期格式化工具-->
        <dependency>
            <groupId>joda-time</groupId>
            <artifactId>joda-time</artifactId>
            <version>2.10.5</version>
        </dependency>

        <!--Junit-->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.13</version>
        </dependency>
    </dependencies>


</project>

2. 编写测试代码

package com.wang;


import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.joda.time.DateTime;
import org.junit.Test;

import java.io.FileOutputStream;

public class ExcelWriteTest {

    public static String PATH = "D:\\Java_Web\\POI\\";

    @Test
    public void testWrite03() throws Exception {
        //创建一个工作簿
        HSSFWorkbook workbook = new HSSFWorkbook();
        //创建一个工作表
        HSSFSheet sheet = workbook.createSheet("我的Excel03");
        //创建一行
        Row row1 = sheet.createRow(0);
        //创建一个单元格 ==> (1,1)
        Cell cell11 = row1.createCell(0);
        //填写数据
        cell11.setCellValue("今日新增bug");
        //(1,2)单元格
        Cell cell12 = row1.createCell(1);
        cell12.setCellValue(666);

        //第二行
        Row row2 = sheet.createRow(1);
        //(2,1)
        Cell cell21 = row2.createCell(0);
        cell21.setCellValue("统计时间");
        //(2,2)
        Cell cell22 = row2.createCell(1);
        //利用 joda-time 工具, toString中可以直接传递时间格式
        String date = new DateTime().toString("yyyy-MM-dd HH:mm:ss");
        cell22.setCellValue(date);

        //生成一张表
        // 03 版本就是使用 xls 结尾!
        FileOutputStream fileOutputStream = new FileOutputStream(PATH + "今日产生bug统计表03.xls");

        workbook.write(fileOutputStream);

        //关闭流
        fileOutputStream.close();

        System.out.println("今日产生bug统计表03.xls 生成完毕!");
    }

    @Test
    public void testWrite07() throws Exception {
        //创建一个工作簿
        XSSFWorkbook workbook = new XSSFWorkbook();
        //创建一个工作表
        XSSFSheet sheet = workbook.createSheet("我的Excel07");
        //创建一行
        Row row1 = sheet.createRow(0);
        //创建一个单元格 ==> (1,1)
        Cell cell11 = row1.createCell(0);
        //填写数据
        cell11.setCellValue("今日新增bug");
        //(1,2)单元格
        Cell cell12 = row1.createCell(1);
        cell12.setCellValue(666);

        //第二行
        Row row2 = sheet.createRow(1);
        //(2,1)
        Cell cell21 = row2.createCell(0);
        cell21.setCellValue("统计时间");
        //(2,2)
        Cell cell22 = row2.createCell(1);
        //利用 joda-time 工具, toString中可以直接传递时间格式
        String date = new DateTime().toString("yyyy-MM-dd HH:mm:ss");
        cell22.setCellValue(date);

        //生成一张表
        // 07 版本就是使用 xlsx 结尾!
        FileOutputStream fileOutputStream = new FileOutputStream(PATH + "今日产生bug统计表03.xlsx");

        workbook.write(fileOutputStream);

        //关闭流
        fileOutputStream.close();

        System.out.println("今日产生bug统计表07.xlsx 生成完毕!");
    }
}

3. 03和07的区别

  • 03 对象为 HSSF
  • 07对象为 XSSF
  • 03的文件后缀为xsl
  • 07的文件后缀会xslx

4. 大量数据的写入

1. 大文件写HSSF

  • 03版excel, 后缀为xsl

  • 缺点: 最多只能处理 65536 行, 否则会抛出异常

  • 优点: 过程中写入缓存, 不操作磁盘, 最后一次性写入磁盘, 速度快

  • 测试写入 65537 行

  • @Test
    public void testWrite03BigData() throws IOException {
        //时间
        long begin = System.currentTimeMillis();
    
        HSSFWorkbook workbook = new HSSFWorkbook();
        HSSFSheet sheet = workbook.createSheet();
        for (int rowNum = 0; rowNum < 65537; rowNum++) {
            HSSFRow row = sheet.createRow(rowNum);
            for (int cellNum = 0; cellNum < 10; cellNum++) {
                HSSFCell cell = row.createCell(cellNum);
                cell.setCellValue(cellNum);
            }
    
        }
        System.out.println("Over!");
        FileOutputStream fileOutputStream = new FileOutputStream(PATH + "testWrite03BigData.xls");
        workbook.write(fileOutputStream);
        fileOutputStream.close();
        long end = System.currentTimeMillis();
        System.out.println((double)(end - begin) / 1000);
    }
    
  • 报异常

  • image-20201028092651619

2. 大文件写XSSF

  • 07版excel, 后缀为xslx

  • 缺点: 写数据时速度非常慢, 非常耗内存, 也会发生内存溢出, 如写入 100万 条

  • 优点: 可以写较大的数据量, 如 20万 条

  • 测试写入 65536 行

  • @Test
    public void testWrite07BigData() throws IOException {
        //时间
        long begin = System.currentTimeMillis();
    
        XSSFWorkbook workbook = new XSSFWorkbook();
        XSSFSheet sheet = workbook.createSheet();
        for (int rowNum = 0; rowNum < 65536; rowNum++) {
            XSSFRow row = sheet.createRow(rowNum);
            for (int cellNum = 0; cellNum < 10; cellNum++) {
                XSSFCell cell = row.createCell(cellNum);
                cell.setCellValue(cellNum);
            }
    
        }
        System.out.println("Over!");
        FileOutputStream fileOutputStream = new FileOutputStream(PATH + "testWrite07BigData.xlsx");
        workbook.write(fileOutputStream);
        fileOutputStream.close();
        long end = System.currentTimeMillis();
        System.out.println((double)(end - begin) / 1000);
    }
    

image-20201028093212113

  • 可以看到, 用时远超HSSF

3. 大文件写SXSSF

  • 优点: 可以写非常大的数据量, 如 100万 条甚至更多条, 写数据速度快, 占用更少的内存

  • 注意

    • 过程中可能会产生临时文件, 需要清理临时文件

    • 默认由 100条 记录被保存在内存中, 如果超过这数量, 则最前面的数据被写入临时文件

    • 如果想自定义内存中数据的数量, 可以使用

      • new SXSSFWorkbook(数量)
        
  • 测试写入 65536行 数据

  • @Test
    public void testWrite07BigDataS() throws IOException {
        //时间
        long begin = System.currentTimeMillis();
    
        SXSSFWorkbook workbook = new SXSSFWorkbook();
        SXSSFSheet sheet = workbook.createSheet();
        for (int rowNum = 0; rowNum < 65536; rowNum++) {
            SXSSFRow row = sheet.createRow(rowNum);
            for (int cellNum = 0; cellNum < 10; cellNum++) {
                SXSSFCell cell = row.createCell(cellNum);
                cell.setCellValue(cellNum);
            }
    
        }
        System.out.println("Over!");
        FileOutputStream fileOutputStream = new FileOutputStream(PATH + "testWrite07BigDataS.xlsx");
        workbook.write(fileOutputStream);
        fileOutputStream.close();
    
        //清除临时文件
        workbook.dispose();
    
        long end = System.currentTimeMillis();
        System.out.println((double)(end - begin) / 1000);
    }
    
  • image-20201028094159715

  • 速度快了许多!

3. POI-读

1. 简单的读取

与上面一样, 同样对于 03 和 07 版本使用不同的对象以及后缀名进行读取!

package com.wang;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.Test;

import java.io.FileInputStream;
import java.io.IOException;

public class ExcelReadTest {

    public static String PATH = "D:\\Java_Web\\POI\\";

    @Test
    public void testRead03() throws IOException {

        //获取文件流
        FileInputStream fileInputStream = new FileInputStream(PATH + "今日产生bug统计表03.xls");

        //根据文件流创建一个工作簿
        Workbook workbook = new HSSFWorkbook(fileInputStream);
        //得到表
        Sheet sheet = workbook.getSheetAt(0);
        //得到行
        Row row = sheet.getRow(1);
        //得到列
        Cell cell = row.getCell(1);

        //读取值的时候, 一定要注意读取值的类型
        //getStringCellValue 字符串类型
        System.out.println(cell.getStringCellValue());

        fileInputStream.close();
    }

    @Test
    public void testRead07() throws IOException {

        //获取文件流
        FileInputStream fileInputStream = new FileInputStream(PATH + "今日产生bug统计表03.xlsx");

        //根据文件流创建一个工作簿
        Workbook workbook = new XSSFWorkbook(fileInputStream);
        //得到表
        Sheet sheet = workbook.getSheetAt(0);
        //得到行
        Row row = sheet.getRow(1);
        //得到列
        Cell cell = row.getCell(1);

        //读取值的时候, 一定要注意读取值的类型
        //getStringCellValue 字符串类型
        System.out.println(cell.getStringCellValue());

        fileInputStream.close();
    }
}

注意获取值的类型即可!

2. 读取不同的数据类型

@Test
public void testCellType() throws Exception {
    FileInputStream inputStream = new FileInputStream(PATH + "明细表.xls");
    Workbook workbook = new HSSFWorkbook(inputStream);

    Sheet sheet = workbook.getSheetAt(0);
    //获取标题内容
    Row rowTitle = sheet.getRow(0);
    if (rowTitle != null) {
        //获取列数
        int cellCount = rowTitle.getPhysicalNumberOfCells();
        //遍历
        for (int cellNum = 0; cellNum < cellCount; cellNum++) {
            Cell cell = rowTitle.getCell(cellNum);
            if (cell != null) {
                CellType cellType = cell.getCellTypeEnum();
                String cellValue = cell.getStringCellValue();
                System.out.print(cellValue + " | ");
            }
        }
        System.out.println();
    }

    //获取表中的内容
    //获取行数
    int rowCount = sheet.getPhysicalNumberOfRows();
    //第一行是标题, 从第二行开始
    for (int rowNum = 1; rowNum < rowCount; rowNum++) {
        Row rowData = sheet.getRow(rowNum);
        if (rowData != null) {
            //获得列数
            int cellCount = rowTitle.getPhysicalNumberOfCells();
            //读取列
            for (int cellNum = 0; cellNum < cellCount; cellNum++) {
                System.out.print("[" + (rowNum + 1) + "-" + (cellNum + 1) + "]");
                //获得单元格
                Cell cellData = rowData.getCell(cellNum);

                //对单元格的数据进行非空判断
                if (cellData != null) {
                    CellType cellType = cellData.getCellTypeEnum();
                    String cellValue = "";
                    //按照类型输出为字符串
                    switch (cellType) {
                        //字符串
                        case STRING:
                            System.out.print("[String]");
                            cellValue = cellData.getStringCellValue();
                            break;

                        //数字 (日期, 普通数字)
                        case NUMERIC:
                            System.out.print("[Number]");
                            //如果是一个日期类型的数字
                            if (HSSFDateUtil.isCellDateFormatted(cellData)) {
                                System.out.print("[日期]");
                                Date dateCellValue = cellData.getDateCellValue();
                                //利用 joda 转化时间格式, 输出为字符串
                                cellValue = new DateTime(dateCellValue).toString("yyyy-MM-dd");
                            } else {
                                //如果是一个普通的数字类型
                                System.out.print("[普通的数字类型]");
                                //转换为字符串
                                HSSFDataFormatter hssfDataFormat = new HSSFDataFormatter();
                                cellValue = hssfDataFormat.formatCellValue(cellData);
                            }
                            break;

                        //布尔
                        case BOOLEAN:
                            System.out.print("[Boolean]");
                            cellValue = String.valueOf(cellData.getBooleanCellValue());
                            break;

                        //数据类型错误
                        case ERROR:
                            System.out.print("[数据类型错误]");
                            break;
                    }
                    System.out.println(cellValue);
                } else {
                    System.out.println("[Blank]");
                }
            }
        }
    }
    inputStream.close();
}

注意点

  • 要先对单元格内容进行非空判断, 否则会报空指针异常

  • 用 getCellTypeEnum() 方法获得内容的枚举类型, 在 switch 中判断

  • 数字类型分为普通数字和日期

    • 日期建议格式化后输出

    • 数字使用以下语句使得输出结果和在 Excel 中的一致

    • HSSFDataFormatter hssfDataFormat = new HSSFDataFormatter();
      cellValue = hssfDataFormat.formatCellValue(cellData);
      

可以提取工具类

package com.wang.Util;

import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.*;
import org.joda.time.DateTime;

import java.io.FileInputStream;
import java.io.IOException;
import java.util.Date;

public class HSSFReadUtil {

    public static void ReadWithType(String PATH, int SheetNum) throws IOException {
        FileInputStream inputStream = new FileInputStream(PATH);
        Workbook workbook = new HSSFWorkbook(inputStream);

        Sheet sheet = workbook.getSheetAt(0);
        //获取标题内容
        Row rowTitle = sheet.getRow(0);
        if (rowTitle != null) {
            //获取列数
            int cellCount = rowTitle.getPhysicalNumberOfCells();
            //遍历
            for (int cellNum = 0; cellNum < cellCount; cellNum++) {
                Cell cell = rowTitle.getCell(cellNum);
                if (cell != null) {
                    CellType cellType = cell.getCellTypeEnum();
                    String cellValue = cell.getStringCellValue();
                    System.out.print(cellValue + " | ");
                }
            }
            System.out.println();
        }

        //获取表中的内容
        //获取行数
        int rowCount = sheet.getPhysicalNumberOfRows();
        //第一行是标题, 从第二行开始
        for (int rowNum = 1; rowNum < rowCount; rowNum++) {
            Row rowData = sheet.getRow(rowNum);
            if (rowData != null) {
                //获得列数
                int cellCount = rowTitle.getPhysicalNumberOfCells();
                //读取列
                for (int cellNum = 0; cellNum < cellCount; cellNum++) {
                    System.out.print("[" + (rowNum + 1) + "-" + (cellNum + 1) + "]");
                    //获得单元格
                    Cell cellData = rowData.getCell(cellNum);

                    //对单元格的数据进行非空判断
                    if (cellData != null) {
                        CellType cellType = cellData.getCellTypeEnum();
                        String cellValue = "";
                        //按照类型输出为字符串
                        switch (cellType) {
                            //字符串
                            case STRING:
                                System.out.print("[String]");
                                cellValue = cellData.getStringCellValue();
                                break;

                            //数字 (日期, 普通数字)
                            case NUMERIC:
                                System.out.print("[Number]");
                                //如果是一个日期类型的数字
                                if (HSSFDateUtil.isCellDateFormatted(cellData)) {
                                    System.out.print("[日期]");
                                    Date dateCellValue = cellData.getDateCellValue();
                                    //利用 joda 转化时间格式, 输出为字符串
                                    cellValue = new DateTime(dateCellValue).toString("yyyy-MM-dd");
                                } else {
                                    //如果是一个普通的数字类型
                                    System.out.print("[普通的数字类型]");
                                    //转换为字符串
                                    HSSFDataFormatter hssfDataFormat = new HSSFDataFormatter();
                                    cellValue = hssfDataFormat.formatCellValue(cellData);
                                }
                                break;

                            //布尔
                            case BOOLEAN:
                                System.out.print("[Boolean]");
                                cellValue = String.valueOf(cellData.getBooleanCellValue());
                                break;

                            //数据类型错误
                            case ERROR:
                                System.out.print("[数据类型错误]");
                                break;
                        }
                        System.out.println(cellValue);
                    } else {
                        System.out.println("[Blank]");
                    }
                }
            }
        }
        inputStream.close();

    }
}

测试工具类

@Test
public void testCellTypeUtil() throws IOException {
    HSSFReadUtil.ReadWithType(PATH + "明细表.xls", 0);
}

3. 计算公式

@Test
public void testFormula() throws Exception {
    FileInputStream inputStream = new FileInputStream(PATH + "公式.xls");
    Workbook workbook = new HSSFWorkbook(inputStream);
    Sheet sheet = workbook.getSheetAt(0);

    Row row = sheet.getRow(4);
    Cell cell = row.getCell(0);

    // 拿到计算公式
    FormulaEvaluator formulaEvaluator = new HSSFFormulaEvaluator((HSSFWorkbook) workbook);

    //输出单元格的内容
    CellType cellType = cell.getCellTypeEnum();
    switch (cellType) {
        //公式
        case FORMULA:
            String formula = cell.getCellFormula();
            System.out.println(formula);

            //计算
            CellValue evaluate = formulaEvaluator.evaluate(cell);
            String cellValue = evaluate.formatAsString();
            System.out.println(cellValue);

            break;
    }
}

注意

  • 先指定公式所在的单元格

  • 通过 new HSSFFormulaEvaluator((HSSFWorkbook) workbook) 拿到计算公式

  • 单元格的内容此时是一个公式!

  • 想要计算结果, 使用 formulaEvaluator.evaluate(cell) 计算当前单元格的值, 用 formatAsString() 转为字符串输出!

posted @ 2020-10-28 14:23  山人西来  阅读(878)  评论(0编辑  收藏  举报