c# 读取excel数据的两种方法
c# 读取excel数据的两种方法
转载自:http://developer.51cto.com/art/201302/380622.htm,
方法一:OleDb: 用这种方法读取Excel速度还是非常的快的,但这种方式读取数据的时候不太灵活,不过可以在 DataTable 中对数据进行一些删减修改。
优点:读取方式简单、读取速度快
缺点:除了读取过程不太灵活之外,这种读取方式还有个弊端就是,当Excel数据量很大时。会非常占用内存,当内存不够时会抛出内存溢出的异常。
不过一般情况下还是非常不错的。
(代码比原文相较有所修改)
DataTable GetDataFromExcelByConn(bool hasTitle = false)
{
OpenFileDialog openFile = new OpenFileDialog();
openFile.Filter = "Excel(*.xlsx)|*.xlsx|Excel(*.xls)|*.xls";
openFile.InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
openFile.Multiselect = false;
if (openFile.ShowDialog() == DialogResult.Cancel) return null;
var filePath = openFile.FileName;
string fileType = System.IO.Path.GetExtension(filePath);
if (string.IsNullOrEmpty(fileType)) return null;
using (DataSet ds = new DataSet())
{
string strCon = string.Format("Provider=Microsoft.Jet.OLEDB.{0}.0;" +
"Extended Properties=\"Excel {1}.0;HDR={2};IMEX=1;\";" +
"data source={3};",
(fileType == ".xls" ? 4 : 12), (fileType == ".xls" ? 8 : 12), (hasTitle ? "Yes" : "NO"), filePath);
string strCom = " SELECT * FROM [Sheet1$]";
using (OleDbConnection myConn = new OleDbConnection(strCon))
using (OleDbDataAdapter myCommand = new OleDbDataAdapter(strCom, myConn))
{
myConn.Open();
myCommand.Fill(ds);
}
if (ds == null || ds.Tables.Count <= 0) return null;
return ds.Tables[0];
}
}
方法二:Com组件的方式读取Excel
这种方式需要先引用 Microsoft.Office.Interop.Excel 。首选说下这种方式的优缺点
优点:可以非常灵活的读取Excel中的数据
缺点:如果是Web站点部署在IIS上时,还需要服务器机子已安装了Excel,有时候还需要为配置IIS权限。最重要的一点因为是基于单元格方式读取的,所以数据很慢(曾做过试验,直接读取千行、200多列的文件,直接读取耗时15分钟。即使采用多线程分段读取来提高CPU的利用率也需要8分钟。PS:CPU I3)
需要读取大文件的的童鞋们慎重。。。
(代码比原文相较有所修改)
DataTable GetDataFromExcelByCom(bool hasTitle = false)
{
OpenFileDialog openFile = new OpenFileDialog();
openFile.Filter = "Excel(*.xlsx)|*.xlsx|Excel(*.xls)|*.xls";
openFile.InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
openFile.Multiselect = false;
if (openFile.ShowDialog() == DialogResult.Cancel) return null;
var excelFilePath = openFile.FileName;
Excel.Application app = new Excel.Application();
Excel.Sheets sheets;
object oMissiong = System.Reflection.Missing.Value;
Excel.Workbook workbook = null;
DataTable dt = new DataTable();
try
{
if (app == null) return null;
workbook = app.Workbooks.Open(excelFilePath, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong,
oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong);
sheets = workbook.Worksheets;
//将数据读入到DataTable中
Excel.Worksheet worksheet = (Excel.Worksheet)sheets.get_Item(1);//读取第一张表
if (worksheet == null) return null;
int iRowCount = worksheet.UsedRange.Rows.Count;
int iColCount = worksheet.UsedRange.Columns.Count;
//生成列头
for (int i = 0; i < iColCount; i++)
{
var name = "column" + i;
if (hasTitle)
{
var txt = ((Excel.Range)worksheet.Cells[1, i + 1]).Text.ToString();
if (!string.IsNullOrWhiteSpace(txt)) name = txt;
}
while (dt.Columns.Contains(name)) name = name + "_1";//重复行名称会报错。
dt.Columns.Add(new DataColumn(name, typeof(string)));
}
//生成行数据
Excel.Range range;
int rowIdx = hasTitle ? 2 : 1;
for (int iRow = rowIdx; iRow <= iRowCount; iRow++)
{
DataRow dr = dt.NewRow();
for (int iCol = 1; iCol <= iColCount; iCol++)
{
range = (Excel.Range)worksheet.Cells[iRow, iCol];
dr[iCol - 1] = (range.Value2 == null) ? "" : range.Text.ToString();
}
dt.Rows.Add(dr);
}
return dt;
}
catch { return null; }
finally
{
workbook.Close(false, oMissiong, oMissiong);
System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook);
workbook = null;
app.Workbooks.Close();
app.Quit();
System.Runtime.InteropServices.Marshal.ReleaseComObject(app);
app = null;
}
}
原文的方法二还提供了多线程处理数据的代码,一并复制到此(此处出现了一个SheetOptions的类型,无法考证其来源,如果知晓,请留言,谢谢。):
/// <summary>
/// 使用COM,多线程读取Excel(1 主线程、4 副线程)
/// </summary>
/// <param name="excelFilePath">路径</param>
/// <returns>DataTabel</returns>
public System.Data.DataTable ThreadReadExcel(string excelFilePath)
{
Excel.Application app = new Excel.Application();
Excel.Sheets sheets = null;
Excel.Workbook workbook = null;
object oMissiong = System.Reflection.Missing.Value;
System.Data.DataTable dt = new System.Data.DataTable();
try
{
if (app == null)
{
return null;
}
workbook = app.Workbooks.Open(excelFilePath, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong,
oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong);
//将数据读入到DataTable中——Start
sheets = workbook.Worksheets;
Excel.Worksheet worksheet = (Excel.Worksheet)sheets.get_Item(1);//读取第一张表
if (worksheet == null)
return null;
string cellContent;
int iRowCount = worksheet.UsedRange.Rows.Count;
int iColCount = worksheet.UsedRange.Columns.Count;
Excel.Range range;
//负责列头Start
DataColumn dc;
int ColumnID = 1;
range = (Excel.Range)worksheet.Cells[1, 1];
//while (range.Text.ToString().Trim() != "")
while (iColCount >= ColumnID)
{
dc = new DataColumn();
dc.DataType = System.Type.GetType("System.String");
string strNewColumnName = range.Text.ToString().Trim();
if (strNewColumnName.Length == 0) strNewColumnName = "_1";
//判断列名是否重复
for (int i = 1; i < ColumnID; i++)
{
if (dt.Columns[i - 1].ColumnName == strNewColumnName)
strNewColumnName = strNewColumnName + "_1";
}
dc.ColumnName = strNewColumnName;
dt.Columns.Add(dc);
range = (Excel.Range)worksheet.Cells[1, ++ColumnID];
}
//End
//数据大于500条,使用多进程进行读取数据
if (iRowCount - 1 > 500)
{
//开始多线程读取数据
//新建线程
int b2 = (iRowCount - 1) / 10;
DataTable dt1 = new DataTable("dt1");
dt1 = dt.Clone();
SheetOptions sheet1thread = new SheetOptions(worksheet, iColCount, 2, b2 + 1, dt1);
Thread othread1 = new Thread(new ThreadStart(sheet1thread.SheetToDataTable));
othread1.Start();
//阻塞 1 毫秒,保证第一个读取 dt1
Thread.Sleep(1);
DataTable dt2 = new DataTable("dt2");
dt2 = dt.Clone();
SheetOptions sheet2thread = new SheetOptions(worksheet, iColCount, b2 + 2, b2 * 2 + 1, dt2);
Thread othread2 = new Thread(new ThreadStart(sheet2thread.SheetToDataTable));
othread2.Start();
DataTable dt3 = new DataTable("dt3");
dt3 = dt.Clone();
SheetOptions sheet3thread = new SheetOptions(worksheet, iColCount, b2 * 2 + 2, b2 * 3 + 1, dt3);
Thread othread3 = new Thread(new ThreadStart(sheet3thread.SheetToDataTable));
othread3.Start();
DataTable dt4 = new DataTable("dt4");
dt4 = dt.Clone();
SheetOptions sheet4thread = new SheetOptions(worksheet, iColCount, b2 * 3 + 2, b2 * 4 + 1, dt4);
Thread othread4 = new Thread(new ThreadStart(sheet4thread.SheetToDataTable));
othread4.Start();
//主线程读取剩余数据
for (int iRow = b2 * 4 + 2; iRow <= iRowCount; iRow++)
{
DataRow dr = dt.NewRow();
for (int iCol = 1; iCol <= iColCount; iCol++)
{
range = (Excel.Range)worksheet.Cells[iRow, iCol];
cellContent = (range.Value2 == null) ? "" : range.Text.ToString();
dr[iCol - 1] = cellContent;
}
dt.Rows.Add(dr);
}
othread1.Join();
othread2.Join();
othread3.Join();
othread4.Join();
//将多个线程读取出来的数据追加至 dt1 后面
foreach (DataRow dr in dt.Rows)
dt1.Rows.Add(dr.ItemArray);
dt.Clear();
dt.Dispose();
foreach (DataRow dr in dt2.Rows)
dt1.Rows.Add(dr.ItemArray);
dt2.Clear();
dt2.Dispose();
foreach (DataRow dr in dt3.Rows)
dt1.Rows.Add(dr.ItemArray);
dt3.Clear();
dt3.Dispose();
foreach (DataRow dr in dt4.Rows)
dt1.Rows.Add(dr.ItemArray);
dt4.Clear();
dt4.Dispose();
return dt1;
}
else
{
for (int iRow = 2; iRow <= iRowCount; iRow++)
{
DataRow dr = dt.NewRow();
for (int iCol = 1; iCol <= iColCount; iCol++)
{
range = (Excel.Range)worksheet.Cells[iRow, iCol];
cellContent = (range.Value2 == null) ? "" : range.Text.ToString();
dr[iCol - 1] = cellContent;
}
dt.Rows.Add(dr);
}
}
//将数据读入到DataTable中——End
return dt;
}
catch
{
return null;
}
finally
{
workbook.Close(false, oMissiong, oMissiong);
System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook);
System.Runtime.InteropServices.Marshal.ReleaseComObject(sheets);
workbook = null;
app.Workbooks.Close();
app.Quit();
System.Runtime.InteropServices.Marshal.ReleaseComObject(app);
app = null;
GC.Collect();
GC.WaitForPendingFinalizers();
}
}
原文还提供了第三种方法,感兴趣的可以关心一下:
方法三:NPOI方式读取Excel,NPOI是一组开源的组件,类似Java的 POI。包括:NPOI、NPOI.HPSF、NPOI.HSSF、NPOI.HSSF.UserModel、NPOI.POIFS、NPOI.Util,下载的时候别只下一个噢
优点:读取Excel速度较快,读取方式操作灵活性
缺点:只支持03的Excel,xlsx的无法读取。由于这点,使用这种方式的人不多啊,没理由要求客户使用03版Excel吧,再说03版Excel对于行数还有限制,只支持65536行。
(听他们的开发人员说会在2012年底推出新版,支持xlsx的读取。但一直很忙没时间去关注这个事情,有兴趣的同学可以瞧瞧去)
1 using System;
2 using System.Data;
3 using System.IO;
4 using System.Web;
5 using NPOI;
6 using NPOI.HPSF;
7 using NPOI.HSSF;
8 using NPOI.HSSF.UserModel;
9 using NPOI.POIFS;
10 using NPOI.Util;
11 using System.Text;
12 using System.Configuration;
13
14 public class NPOIHelper
15 {
16 private static int ExcelMaxRow = Convert.ToInt32(ConfigurationManager.AppSettings["ExcelMaxRow"]);
17 /// <summary>
18 /// 由DataSet导出Excel
19 /// </summary>
20 /// <param name="sourceTable">要导出数据的DataTable</param>
21 /// <param name="sheetName">工作表名称</param>
22 /// <returns>Excel工作表</returns>
23 private static Stream ExportDataSetToExcel(DataSet sourceDs)
24 {
25 HSSFWorkbook workbook = new HSSFWorkbook();
26 MemoryStream ms = new MemoryStream();
27
28 for (int i = 0; i < sourceDs.Tables.Count; i++)
29 {
30 HSSFSheet sheet = (HSSFSheet)workbook.CreateSheet(sourceDs.Tables[i].TableName);
31 HSSFRow headerRow = (HSSFRow)sheet.CreateRow(0);
32 // handling header.
33 foreach (DataColumn column in sourceDs.Tables[i].Columns)
34 headerRow.CreateCell(column.Ordinal).SetCellValue(column.ColumnName);
35 // handling value.
36 int rowIndex = 1;
37 foreach (DataRow row in sourceDs.Tables[i].Rows)
38 {
39 HSSFRow dataRow = (HSSFRow)sheet.CreateRow(rowIndex);
40 foreach (DataColumn column in sourceDs.Tables[i].Columns)
41 {
42 dataRow.CreateCell(column.Ordinal).SetCellValue(row[column].ToString());
43 }
44 rowIndex++;
45 }
46 }
47 workbook.Write(ms);
48 ms.Flush();
49 ms.Position = 0;
50 workbook = null;
51 return ms;
52 }
53 /// <summary>
54 /// 由DataSet导出Excel
55 /// </summary>
56 /// <param name="sourceTable">要导出数据的DataTable</param>
57 /// <param name="fileName">指定Excel工作表名称</param>
58 /// <returns>Excel工作表</returns>
59 public static void ExportDataSetToExcel(DataSet sourceDs, string fileName)
60 {
61 //检查是否有Table数量超过65325
62 for (int t = 0; t < sourceDs.Tables.Count; t++)
63 {
64 if (sourceDs.Tables[t].Rows.Count > ExcelMaxRow)
65 {
66 DataSet ds = GetdtGroup(sourceDs.Tables[t].Copy());
67 sourceDs.Tables.RemoveAt(t);
68 //将得到的ds插入 sourceDs中
69 for (int g = 0; g < ds.Tables.Count; g++)
70 {
71 DataTable dt = ds.Tables[g].Copy();
72 sourceDs.Tables.Add(dt);
73 }
74 t--;
75 }
76 }
77
78 MemoryStream ms = ExportDataSetToExcel(sourceDs) as MemoryStream;
79 HttpContext.Current.Response.AppendHeader("Content-Disposition", "attachment;filename=" + fileName);
80 HttpContext.Current.Response.BinaryWrite(ms.ToArray());
81 HttpContext.Current.ApplicationInstance.CompleteRequest();
82 //HttpContext.Current.Response.End();
83 ms.Close();
84 ms = null;
85 }
86 /// <summary>
87 /// 由DataTable导出Excel
88 /// </summary>
89 /// <param name="sourceTable">要导出数据的DataTable</param>
90 /// <returns>Excel工作表</returns>
91 private static Stream ExportDataTableToExcel(DataTable sourceTable)
92 {
93 HSSFWorkbook workbook = new HSSFWorkbook();
94 MemoryStream ms = new MemoryStream();
95 HSSFSheet sheet = (HSSFSheet)workbook.CreateSheet(sourceTable.TableName);
96 HSSFRow headerRow = (HSSFRow)sheet.CreateRow(0);
97 // handling header.
98 foreach (DataColumn column in sourceTable.Columns)
99 headerRow.CreateCell(column.Ordinal).SetCellValue(column.ColumnName);
100 // handling value.
101 int rowIndex = 1;
102 foreach (DataRow row in sourceTable.Rows)
103 {
104 HSSFRow dataRow = (HSSFRow)sheet.CreateRow(rowIndex);
105 foreach (DataColumn column in sourceTable.Columns)
106 {
107 dataRow.CreateCell(column.Ordinal).SetCellValue(row[column].ToString());
108 }
109 rowIndex++;
110 }
111 workbook.Write(ms);
112 ms.Flush();
113 ms.Position = 0;
114 sheet = null;
115 headerRow = null;
116 workbook = null;
117 return ms;
118 }
119 /// <summary>
120 /// 由DataTable导出Excel
121 /// </summary>
122 /// <param name="sourceTable">要导出数据的DataTable</param>
123 /// <param name="fileName">指定Excel工作表名称</param>
124 /// <returns>Excel工作表</returns>
125 public static void ExportDataTableToExcel(DataTable sourceTable, string fileName)
126 {
127 //如数据超过65325则分成多个Table导出
128 if (sourceTable.Rows.Count > ExcelMaxRow)
129 {
130 DataSet ds = GetdtGroup(sourceTable);
131 //导出DataSet
132 ExportDataSetToExcel(ds, fileName);
133 }
134 else
135 {
136 MemoryStream ms = ExportDataTableToExcel(sourceTable) as MemoryStream;
137 HttpContext.Current.Response.AppendHeader("Content-Disposition", "attachment;filename=" + fileName);
138 HttpContext.Current.Response.BinaryWrite(ms.ToArray());
139 HttpContext.Current.ApplicationInstance.CompleteRequest();
140 //HttpContext.Current.Response.End();
141 ms.Close();
142 ms = null;
143 }
144 }
145
146 /// <summary>
147 /// 传入行数超过65325的Table,返回DataSet
148 /// </summary>
149 /// <param name="dt"></param>
150 /// <returns></returns>
151 public static DataSet GetdtGroup(DataTable dt)
152 {
153 string tablename = dt.TableName;
154
155 DataSet ds = new DataSet();
156 ds.Tables.Add(dt);
157
158 double n = dt.Rows.Count / Convert.ToDouble(ExcelMaxRow);
159
160 //创建表
161 for (int i = 1; i < n; i++)
162 {
163 DataTable dtAdd = dt.Clone();
164 dtAdd.TableName = tablename + "_" + i.ToString();
165 ds.Tables.Add(dtAdd);
166 }
167
168 //分解数据
169 for (int i = 1; i < ds.Tables.Count; i++)
170 {
171 //新表行数达到最大 或 基表数量不足
172 while (ds.Tables[i].Rows.Count != ExcelMaxRow && ds.Tables[0].Rows.Count != ExcelMaxRow)
173 {
174 ds.Tables[i].Rows.Add(ds.Tables[0].Rows[ExcelMaxRow].ItemArray);
175 ds.Tables[0].Rows.RemoveAt(ExcelMaxRow);
176
177 }
178 }
179
180 return ds;
181 }
182
183 /// <summary>
184 /// 由DataTable导出Excel
185 /// </summary>
186 /// <param name="sourceTable">要导出数据的DataTable</param>
187 /// <param name="fileName">指定Excel工作表名称</param>
188 /// <returns>Excel工作表</returns>
189 public static void ExportDataTableToExcelModel(DataTable sourceTable, string modelpath, string modelName, string fileName, string sheetName)
190 {
191 int rowIndex = 2;//从第二行开始,因为前两行是模板里面的内容
192 int colIndex = 0;
193 FileStream file = new FileStream(modelpath + modelName + ".xls", FileMode.Open, FileAccess.Read);//读入excel模板
194 HSSFWorkbook hssfworkbook = new HSSFWorkbook(file);
195 HSSFSheet sheet1 = (HSSFSheet)hssfworkbook.GetSheet("Sheet1");
196 sheet1.GetRow(0).GetCell(0).SetCellValue("excelTitle"); //设置表头
197 foreach (DataRow row in sourceTable.Rows)
198 { //双循环写入sourceTable中的数据
199 rowIndex++;
200 colIndex = 0;
201 HSSFRow xlsrow = (HSSFRow)sheet1.CreateRow(rowIndex);
202 foreach (DataColumn col in sourceTable.Columns)
203 {
204 xlsrow.CreateCell(colIndex).SetCellValue(row[col.ColumnName].ToString());
205 colIndex++;
206 }
207 }
208 sheet1.ForceFormulaRecalculation = true;
209 FileStream fileS = new FileStream(modelpath + fileName + ".xls", FileMode.Create);//保存
210 hssfworkbook.Write(fileS);
211 fileS.Close();
212 file.Close();
213 }
214 }

浙公网安备 33010602011771号