使用POI库实现的Excel读取工具类,支持读取文本和图片内容

       <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.17</version>
            <!-- wildfly需要移除exclusions注释 <exclusions> <exclusion> <artifactId>dom4j</artifactId>
                <groupId>dom4j</groupId> </exclusion> </exclusions> -->
        </dependency>

        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.17</version>
        </dependency>

        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.xmlbeans</groupId>
            <artifactId>xmlbeans</artifactId>
            <version>2.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-excelant</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-examples</artifactId>
            <version>3.17</version>
        </dependency>

封装的数据类,可视情况调整

import lombok.Data;

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Excel数据实体类,用于封装Excel中的行数据,包括文本和图片。
 */
@Data
public class ExcelData {
    
    /**
     * 行号(从1开始)
     */
    private Integer rowIndex;
    
    /**
     * 文本内容Map,key为列索引(从0开始),value为文本内容
     */
    private Map<Integer, String> textData;
    
    /**
     * 图片内容Map,key为列索引(从0开始),value为图片的Base64字符串
     */
    private Map<Integer, List<String>> imageData;
    
    /**
     * 获取指定列的文本内容
     * @param columnIndex 列索引
     * @return 文本内容,如果不存在则返回null
     */
    public String getTextValue(Integer columnIndex) {
        return textData != null ? textData.get(columnIndex) : null;
    }
    
    /**
     * 获取指定列的图片内容(Base64格式)
     * @param columnIndex 列索引
     * @return 图片的Base64字符串,如果不存在则返回null
     */
    public List<String> getImageValue(Integer columnIndex) {
        return imageData != null ? imageData.get(columnIndex) : Collections.emptyList();
    }
    
    /**
     * 设置文本内容
     * @param columnIndex 列索引
     * @param value 文本内容
     */
    public void setTextValue(Integer columnIndex, String value) {
        if (textData == null) {
            textData = new HashMap<>();
        }
        textData.put(columnIndex, value);
    }
    
    /**
     * 设置图片内容
     * @param columnIndex 列索引
     * @param base64Images 图片的Base64字符串
     */
    public void setImageValue(Integer columnIndex, List<String> base64Images) {
        if (imageData == null) {
            imageData = new HashMap<>();
        }
        imageData.put(columnIndex, base64Images);
    }

具体实现

import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;


/**
 * 使用POI库实现的Excel读取工具类,支持读取文本和图片内容。
 */
public class ExcelImageReaderUtil {

    private static final Logger log = LoggerFactory.getLogger(ExcelImageReaderUtil.class);

    /**
     * 读取Excel文件内容,包括文本和图片
     *
     * @param inputStream Excel文件输入流
     * @param fileName    文件名(用于判断Excel版本)
     * @return 包含所有行数据的列表
     */
    public static List<ExcelData> readExcel(InputStream inputStream, String fileName) {
        List<ExcelData> dataList = new ArrayList<>();

        try (Workbook workbook = createWorkbook(inputStream, fileName)) {

            Sheet sheet = workbook.getSheetAt(0); // 获取第一个sheet
            // 获取标题行
            Row headerRow = sheet.getRow(0);
            int columnCount = headerRow.getLastCellNum();

            // 遍历数据行
            for (int rowNum = 1; rowNum <= sheet.getLastRowNum(); rowNum++) {
                Row row = sheet.getRow(rowNum);
                if (row == null) {
                    continue;
                }

                ExcelData rowData = new ExcelData();
                // 设置行号(从1开始)
                rowData.setRowIndex(rowNum + 1);

                // 处理每一列的数据 - 修改循环条件确保处理所有列
                for (int colNum = 0; colNum < columnCount; colNum++) {
                    Cell cell = row.getCell(colNum, Row.MissingCellPolicy.RETURN_BLANK_AS_NULL);
                    String textValue = cell != null ? getCellTextValue(cell) : null;

                    // 处理文本内容
                    if (textValue != null && !textValue.trim().isEmpty()) {
                        rowData.setTextValue(colNum, textValue);
                    }
                    // 处理图片内容
                    List<byte[]> imageDatas = getImageData(sheet, rowNum, colNum);
                    if (!imageDatas.isEmpty()) {
                        List<String> base64Images = new ArrayList<>();
                        for (byte[] imageData : imageDatas) {
                            String base64Image = Base64.getEncoder().encodeToString(imageData);
                            base64Images.add(base64Image);
                        }
                        rowData.setImageValue(colNum, base64Images);
                    }else {
                        // 确保所有列都有值,即使是空值
                        rowData.setTextValue(colNum, null);
                    }
                }
                dataList.add(rowData);
            }
        } catch (IOException e) {
            log.error("读取Excel文件失败", e);
            throw new RuntimeException("Excel文件读取失败:" + e.getMessage());
        }
        return dataList;
    }

    /**
     * 根据文件名创建对应版本的Workbook
     */
    private static Workbook createWorkbook(InputStream inputStream, String fileName) throws IOException {
        if (fileName.toLowerCase().endsWith(".xlsx")) {
            return new XSSFWorkbook(inputStream);
        } else if (fileName.toLowerCase().endsWith(".xls")) {
            return new HSSFWorkbook(inputStream);
        } else {
            throw new IllegalArgumentException("不支持的文件格式,仅支持.xlsx和.xls文件");
        }
    }

    /**
     * 获取单元格的文本值
     */
    private static String getCellTextValue(Cell cell) {
        if (cell == null) {
            return null;
        }
        switch (cell.getCellTypeEnum()) {
            case STRING:
                return cell.getStringCellValue();
            case NUMERIC:
                if (DateUtil.isCellDateFormatted(cell)) {
                    return cell.getDateCellValue().toString();
                }
                return String.valueOf(cell.getNumericCellValue());
            case BOOLEAN:
                return String.valueOf(cell.getBooleanCellValue());
            case FORMULA:
                try {
                    return String.valueOf(cell.getNumericCellValue());
                } catch (IllegalStateException e) {
                    return cell.getStringCellValue();
                }
            default:
                return "";
        }
    }

    /**
     * 获取单元格的图片数据
     */
    private static List<byte[]> getImageData(Sheet sheet, int rowNum, int colNum) {
        List<byte[]> bytes = new ArrayList<>();
        if (sheet.getWorkbook() instanceof XSSFWorkbook) {
            XSSFSheet xssfSheet = (XSSFSheet) sheet;
            List<XSSFShape> shapes = xssfSheet.getDrawingPatriarch() != null ? xssfSheet.getDrawingPatriarch().getShapes() : null;
            if (shapes != null) {
                for (XSSFShape shape : shapes) {
                    if (shape instanceof XSSFPicture) {
                        XSSFPicture picture = (XSSFPicture) shape;
                        XSSFClientAnchor anchor = picture.getPreferredSize();
                        if (anchor != null) {
                            int picRow = anchor.getRow1();
                            int picCol = anchor.getCol1();
                            if (picRow == rowNum && picCol == colNum) {
                                bytes.add(picture.getPictureData().getData());
                            }
                        }
                    }
                }
            }
        } else if (sheet.getWorkbook() instanceof HSSFWorkbook) {
            HSSFSheet hssfSheet = (HSSFSheet) sheet;
            HSSFPatriarch patriarch = hssfSheet.getDrawingPatriarch();
            if (patriarch != null) {
                List<HSSFShape> shapes = patriarch.getChildren();
                for (HSSFShape shape : shapes) {
                    if (shape instanceof HSSFPicture) {
                        HSSFPicture picture = (HSSFPicture) shape;
                        HSSFClientAnchor anchor = (HSSFClientAnchor) picture.getAnchor();
                        if (anchor != null) {
                            int picRow = anchor.getRow1();
                            int picCol = anchor.getCol1();
                            if (picRow == rowNum && picCol == colNum) {
                                bytes.add(picture.getPictureData().getData());
                            }
                        }
                    }
                }
            }
        }
        return bytes;
    }

    public static void main(String[] args) throws IOException {
        //测试当前代码是否可用
        String filePath = "C:\\Users\\XXXX\\Desktop\\新建 XLSX 工作表.xlsx";
        try (InputStream inputStream = Files.newInputStream(Paths.get(filePath))) {
            List<ExcelData> excelData = readExcel(inputStream, filePath);
            for (ExcelData excelDatum : excelData) {
                excelDatum.getImageData().forEach((key, value) -> System.out.println("Row: " + excelDatum.getRowIndex()
                        + ", Column: " + key + ", Image Data: " + value.size()));
            }
        }
    }
posted @ 2025-04-28 15:55  lyu6  阅读(198)  评论(0)    收藏  举报