Atitit poi读取大文件内存溢出的解决 目录 1. poi提供了两种读取excel的方式,一种是类似xml的Dom, 1 2. POI官网上提到XSSF有三种读写excel,POI地址 1 3

Atitit poi读取大文件内存溢出的解决

 

目录

1. poi提供了两种读取excel的方式,一种是类似xml的Dom, 1

2. POI官网上提到XSSF有三种读写excel,POI地址 1

3. 可以看到有三种模式:eventmodel方式,基于事件驱动,SAX的方式解析excel 2

4. /OfficeExcelPrj/src/officefile/ExampleEventUserModel.java 2

5. ref 11

 

  1. poi提供了两种读取excel的方式,一种是类似xml的Dom,

把整个文档load到内存中再解析,这种方法在org.apache.poi.xssf.usermodel包中,好用,但占内存;还有一种就是类似xml的Sax解析,每次只读取一条记录,这种方式占用的内存很小,但API用起来没那么方便,专门为读取大文件而设定的,这种方法在org.apache.poi.xssf.eventusermodel包中。

 

 

  1. POI官网上提到XSSF有三种读写excel,POI地址

:http://poi.apache.org/spreadsheet/index.html。官网的图片:

  1. 可以看到有三种模式:eventmodel方式,基于事件驱动,SAX的方式解析excel

1、eventmodel方式,基于事件驱动,SAX的方式解析excel(.xlsx是基于OOXML的),CPU和内存消耗非常低,但是只能读不能写

2、usermodel,就是我们一般使用的方式,这种方式可以读可以写,但是CPU和内存消耗非常大

3、SXSSF,POI3.8以后开始支持,这种方式只能写excel

 

 

  1. /OfficeExcelPrj/src/officefile/ExampleEventUserModel.java

 

package officefile;

 

import java.io.InputStream;

import java.util.Iterator;

import java.util.List;

import java.util.Map;

 

import org.apache.poi.xssf.eventusermodel.XSSFReader;

import org.apache.poi.xssf.model.SharedStringsTable;

import org.apache.poi.xssf.usermodel.XSSFRichTextString;

import org.apache.commons.lang3.StringUtils;

import org.apache.log4j.Logger;

import org.apache.poi.openxml4j.opc.OPCPackage;

import org.apache.poi.util.StringUtil;

import org.xml.sax.Attributes;

import org.xml.sax.ContentHandler;

import org.xml.sax.InputSource;

import org.xml.sax.SAXException;

import org.xml.sax.XMLReader;

import org.xml.sax.helpers.DefaultHandler;

import org.xml.sax.helpers.XMLReaderFactory;

 

import com.attilax.text.StringUtilsT55;

import com.google.common.collect.Lists;

import com.google.common.collect.Maps;

 

//for big file

public class ExampleEventUserModel {

 

public void processOneSheet(String filename, String sheetname) throws Exception {

OPCPackage pkg = OPCPackage.open(filename);

XSSFReader r = new XSSFReader(pkg);

SharedStringsTable sst = r.getSharedStringsTable();

 

XMLReader parser = fetchSheetParser(sst);

 

// To look up the Sheet Name / Sheet Order / rID,

// you need to process the core Workbook stream.

// Normally it's of the form rId# or rSheet#

// String sheetname = "rId2";

InputStream sheet2 = r.getSheet(sheetname);

InputSource sheetSource = new InputSource(sheet2);

parser.parse(sheetSource);

sheet2.close();

}

 

public void processAllSheets(String filename) throws Exception {

OPCPackage pkg = OPCPackage.open(filename);

XSSFReader r = new XSSFReader(pkg);

SharedStringsTable sst = r.getSharedStringsTable();

 

XMLReader parser = fetchSheetParser(sst);

 

Iterator<InputStream> sheets = r.getSheetsData();

while (sheets.hasNext()) {

System.out.println("Processing new sheet:\n");

InputStream sheet = sheets.next();

InputSource sheetSource = new InputSource(sheet);

parser.parse(sheetSource);

sheet.close();

System.out.println("");

}

}

 

public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {

XMLReader parser = XMLReaderFactory.createXMLReader("com.sun.org.apache.xerces.internal.parsers.SAXParser");

ContentHandler handler = new SheetHandler(sst);

parser.setContentHandler(handler);

return parser;

}

 

/**

 * See org.xml.sax.helpers.DefaultHandler javadocs

 */

private static class SheetHandler extends DefaultHandler {

private SharedStringsTable sst;

private String lastContents;

private boolean nextIsString;

Map m = Maps.newLinkedHashMap();

String idx_cell;

CellAti cellx = new CellAti();

 

private SheetHandler(SharedStringsTable sst) {

this.sst = sst;

}

 

public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {

// c => cell

if (name.equals("c")) {

// Print the cell reference

// System.out.print(attributes.getValue("r") + " - ");

idx_cell = attributes.getValue("r");

// logger.info("Print the cell reference::" + attributes.getValue("r") + " - ");

// Figure out if the value is an index in the SST

String cellType = attributes.getValue("t");

if (cellType != null && cellType.equals("s")) {

nextIsString = true;

} else {

nextIsString = false;

}

}

// Clear contents cache

lastContents = "";

}

 

public void endElement(String uri, String localName, String name) throws SAXException {

// Process the last contents as required.

// Do now, as characters() may be called more than once

if (nextIsString) {

int idx = Integer.parseInt(lastContents);

lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();

nextIsString = false;

}

 

// v => contents of a cell

// Output after we've seen the string contents

if (name.equals("v")) {

System.out.println(lastContents);

logger.info("endElement::" + lastContents);

 

int indexofDigit1 = StringUtilsT55.indexofDigit(idx_cell);

int rowindex = Integer.parseInt(idx_cell.substring(indexofDigit1));

String cellIdx = idx_cell.substring(0, indexofDigit1);

Map row;

try {

row = li.get(rowindex-1);

} catch (IndexOutOfBoundsException e) {

row=Maps.newLinkedHashMap();

row.put(cellIdx, lastContents);

row.put("row",rowindex);

li.add(row);

row = li.get(rowindex-1);

}

 

row.put(cellIdx, lastContents);

row.put("row",rowindex);

logger.info(row);

 

}

}

 

public void characters(char[] ch, int start, int length) throws SAXException {

lastContents += new String(ch, start, length);

logger.info("characters::" + lastContents);/// jeig haosyo mayong

// F2433 - G2433 - H2433 - I2433 - J2433 - K2433 - L2433 - M2433 -

// N2433 - O2433 - P2433 - Q2433 - R2433 - S2433 - T2433 - U2433 -

// V2433 - W2433 - X2433 - Y2433 - Z2433 - AA2433 - AB2433 - AC2433

// - AD2433 - AE2433 - AF2433 - AG2433 - AH2433 - AI2433 - AJ2433 -

// AK2433 - AL2433 - AM2433 - AN2433 - AO2433 - 2019-05-23

// 17:02:16,178$INFO,4728,td:main,lg:officefile.excelUtil2007ver,"characters::5391",officefile.ExampleEventUserModel$SheetHandler.characters.119

 

}

}

 

static org.apache.log4j.Logger logger = Logger.getLogger(excelUtil2007ver.class);

public static List<Map> li = Lists.newArrayList();

 

public static void main(String[] args) throws Exception {

 String namesxls = "D:\\0db\\360手机助手导出的联系人.xlsx";

// String namesxls = "D:\\0db\\test.xlsx";

ExampleEventUserModel example = new ExampleEventUserModel();

System.out.println("11");

example.processOneSheet(namesxls, "rId1");

for (Map m : li) {

System.out.println(m);

}

// example.processAllSheets(args[0]);

}

}

 

 

 

{B=头, row=37142, D=猪, E=猪头, F=sh-totoKustm, AO=15066666666}

{B=雅雯, row=37143, D=竺, E=竺雅雯, F=sh-totoKustm, AO=15706266689}

{B=晨宇, row=37144, D=祝, E=祝晨宇, F=sh-totoKustm, AO=13511909092}

{B=成明, row=37145, D=祝, E=祝成明, F=sh-totoKustm, AO=18990934939}

{B=传武, row=37146, D=祝, E=祝传武, F=sh-totoKustm, AO=13883770474}

{B=玲, row=37147, D=祝, E=祝玲, F=sh-totoKustm, AO=13214156025}

  1. ref

POI读写大数据量EXCEL - tootwo2 - 博客园.html

posted @ 2019-05-23 17:57  attilaxAti  阅读(159)  评论(0)    收藏  举报