Excel 数据读入到DataSet

using System;
using System.Collections.Generic;
using System.Linq;
using System.Data;
using System.IO;
using System.Xml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using DocumentFormat.OpenXml;

namespace ECLink.Common
{
    /// <summary>
    /// 采用openxml方式把excel转换成DataSet
    /// </summary>
    public class ExcelHelper
    {
        public ExcelHelper()
        {

        }
        /// <summary>
        /// 将Excel多单一表转化为DataSet数据集对象
        /// </summary>
        /// <param name="filePath">Excel文件路径</param>
        /// <returns>转化的数据集</returns>
        public  DataSet ExcelToDataSet(string filePath)
        {
            DataSet dataSet = new DataSet();
            try
            {
                using (SpreadsheetDocument spreadDocument = SpreadsheetDocument.Open(filePath, false))
                {
                    //指定WorkbookPart对象
                    WorkbookPart workBookPart = spreadDocument.WorkbookPart;
                    //获取Excel中SheetName集合
                    List<string> sheetNames = GetSheetNames(workBookPart);

                    foreach (string sheetName in sheetNames)
                    {
                        DataTable dataTable = WorkSheetToTable(workBookPart, sheetName);
                        if (dataTable != null)
                        {
                            dataSet.Tables.Add(dataTable);//将表添加到数据集
                        }
                    }
                }
            }
            catch (Exception exp)
            {
                throw new Exception("可能Excel正在打开中,请关闭重新操作!");
            }
            return dataSet;
        }

        /// <summary>
        /// 根据WorkbookPart和表名创建DataTable对象
        /// </summary>
        /// <param name="workBookPart">WorkbookPart对象</param>
        /// <param name="tableName">表名</param>
        /// <returns>转化后的DataTable</returns>
        public DataTable WorkSheetToTable(WorkbookPart workBookPart, string sheetName)
        {
            //创建Table
            DataTable dataTable = new DataTable(sheetName);

            //根据WorkbookPart和sheetName获取该Sheet下所有行数据
            IEnumerable<Row> sheetRows = GetWorkBookPartRows(workBookPart, sheetName);
            IEnumerable<Column> sheetColumns = GetWorkBookPartColumns(workBookPart, sheetName);

            if (sheetRows == null || sheetRows.Count() <= 0)
            {
                return null;
            }

            SharedStringTable stringTable = workBookPart.SharedStringTablePart.SharedStringTable;
            //将数据导入DataTable,假定第一行为列名,第二行以后为数据
            foreach (Row row in sheetRows)
            {
                //获取Excel中的列头
                if (row.RowIndex == 1)
                {
                    GetDataColumn(row, stringTable, ref dataTable);
                }
                else
                {
                    GetDataRow(row, stringTable, ref dataTable);
                }
            }
            return dataTable;
        }
 
        /// <summary>
        /// 根据WorkbookPart获取所有SheetName
        /// </summary>
        /// <param name="workBookPart"></param>
        /// <returns>SheetName集合</returns>
        private List<string> GetSheetNames(WorkbookPart workBookPart)
        {
            List<string> sheetNames = new List<string>();
            Sheets sheets = workBookPart.Workbook.Sheets;
            foreach (Sheet sheet in sheets)
            {
                string sheetName = sheet.Name;
                if (!string.IsNullOrEmpty(sheetName))
                {
                    sheetNames.Add(sheetName);
                }
            }
            return sheetNames;
        }

        /// <summary>
        /// 根据WorkbookPart和sheetName获取该Sheet下所有Row数据
        /// </summary>
        /// <param name="workBookPart">WorkbookPart对象</param>
        /// <param name="sheetName">SheetName</param>
        /// <returns>该SheetName下的所有Row数据</returns>
        public IEnumerable<Row> GetWorkBookPartRows(WorkbookPart workBookPart, string sheetName)
        {
            IEnumerable<Row> sheetRows = null;
            //根据表名在WorkbookPart中获取Sheet集合
            IEnumerable<Sheet> sheets = workBookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == sheetName);
            if (sheets.Count() == 0)
            {
                return null;//没有数据
            }

            WorksheetPart workSheetPart = workBookPart.GetPartById(sheets.First().Id) as WorksheetPart;
            //获取Excel中得到的行
            sheetRows = workSheetPart.Worksheet.Descendants<Row>();
            return sheetRows;
        }

        /// <summary>
        /// 根据WorkbookPart和sheetName获取该Sheet下所有Row数据
        /// </summary>
        /// <param name="workBookPart">WorkbookPart对象</param>
        /// <param name="sheetName">SheetName</param>
        /// <returns>该SheetName下的所有Row数据</returns>
        public IEnumerable<Column> GetWorkBookPartColumns(WorkbookPart workBookPart, string sheetName)
        {
            IEnumerable<Column> sheetColumns = null;
            //根据表名在WorkbookPart中获取Sheet集合
            IEnumerable<Sheet> sheets = workBookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == sheetName);
            if (sheets.Count() == 0)
            {
                return null;//没有数据
            }

            WorksheetPart workSheetPart = workBookPart.GetPartById(sheets.First().Id) as WorksheetPart;
            
            sheetColumns = workSheetPart.Worksheet.Descendants<Column>(); //获取Excel中得到的行

            return sheetColumns;
        }


        /// <summary>
        /// 获取Excel中多表的表名
        /// </summary>
        /// <param name="filePath"></param>
        /// <returns></returns>
        private List<string> GetExcelSheetNames(string filePath)
        {
            string sheetName = string.Empty;
            List<string> sheetNames = new List<string>();//所有Sheet表名
            using (SpreadsheetDocument spreadDocument = SpreadsheetDocument.Open(filePath, false))
            {
                WorkbookPart workBook = spreadDocument.WorkbookPart;
                Stream stream = workBook.GetStream(FileMode.Open);
                XmlDocument xmlDocument = new XmlDocument();
                xmlDocument.Load(stream);

                XmlNamespaceManager xmlNSManager = new XmlNamespaceManager(xmlDocument.NameTable);
                xmlNSManager.AddNamespace("default", xmlDocument.DocumentElement.NamespaceURI);
                XmlNodeList nodeList = xmlDocument.SelectNodes("//default:sheets/default:sheet", xmlNSManager);

                foreach (XmlNode node in nodeList)
                {
                    sheetName = node.Attributes["name"].Value;
                    sheetNames.Add(sheetName);
                }
            }
            return sheetNames;
        }

        #region 采用openxml方式把excel转换成dataTable

        /// <summary>
        /// 构建DataTable的列
        /// </summary>
        /// <param name="row">OpenXML定义的Row对象</param>
        /// <param name="stringTablePart"></param>
        /// <param name="dt">须要返回的DataTable对象</param>
        /// <returns></returns>
        public void GetDataColumn(Row row, SharedStringTable stringTable, ref DataTable dt)
        {
            DataColumn col = new DataColumn();
            Dictionary<string, int> columnCount = new Dictionary<string, int>();
            foreach (Cell cell in row)
            {
                string cellVal = GetValue(cell, stringTable);
                col = new DataColumn(cellVal);
                if (IsContainsColumn(dt, col.ColumnName))
                {
                    if (!columnCount.ContainsKey(col.ColumnName))
                        columnCount.Add(col.ColumnName, 0);
                    col.ColumnName = col.ColumnName + (columnCount[col.ColumnName]++);
                }
                dt.Columns.Add(col);
            }
        }
        /// <summary>
        /// 构建DataTable的每一行数据,并返回该Datatable
        /// </summary>
        /// <param name="row">OpenXML的行</param>
        /// <param name="stringTablePart"></param>
        /// <param name="dt">DataTable</param>
        private void GetDataRow(Row row, SharedStringTable stringTable, ref DataTable dt)
        {
            // 读取算法:按行一一读取单位格,若是整行均是空数据
            DataRow dr = dt.NewRow();
            int i = 0;
            int nullRowCount = i;
            foreach (Cell cell in row)
            {
                string cellVal = GetValue(cell, stringTable);
                if (cellVal == string.Empty)
                {
                    nullRowCount++;
                }
                dr[i] = cellVal;
                i++;
            }
            if (nullRowCount != i)
            {
                dt.Rows.Add(dr);
            }
        }
        /// <summary>
        /// 获取单位格的值
        /// </summary>
        /// <param name="cell"></param>
        /// <param name="stringTablePart"></param>
        /// <returns></returns>
        private string GetValue(Cell cell, SharedStringTable stringTable)
        {
            //因为Excel的数据存储在SharedStringTable中,须要获取数据在SharedStringTable 中的索引
            string value = string.Empty;
            try
            {
                if (cell.ChildElements.Count == 0)
                    return value;
                value = double.Parse(cell.CellValue.InnerText).ToString();
                if ((cell.DataType != null) && (cell.DataType == CellValues.SharedString))
                {
                    value = stringTable.ChildElements[Int32.Parse(value)].InnerText;
                }
            }
            catch (Exception)
            {
                value = "N/A";
            }
            return value;
        }
        /// <summary>
        /// 判断网格是否存在列
        /// </summary>
        /// <param name="dt">网格</param>
        /// <param name="columnName">列名</param>
        /// <returns></returns>
        public bool IsContainsColumn(DataTable dt, string columnName)
        {
            if (dt == null || columnName == null)
            {
                return false;
            }
            return dt.Columns.Contains(columnName);
        }

        #endregion 

        #region SaveCell
        private void InsertTextCellValue(Worksheet worksheet, string column, uint row, string value)
        {
            Cell cell = ReturnCell(worksheet, column, row);
            CellValue v = new CellValue();
            v.Text = value;
            cell.AppendChild(v);
            cell.DataType = new EnumValue<CellValues>(CellValues.String);
            worksheet.Save();
        }
        private void InsertNumberCellValue(Worksheet worksheet, string column, uint row, string value)
        {
            Cell cell = ReturnCell(worksheet, column, row);
            CellValue v = new CellValue();
            v.Text = value;
            cell.AppendChild(v);
            cell.DataType = new EnumValue<CellValues>(CellValues.Number);
            worksheet.Save();
        }
        private static Cell ReturnCell(Worksheet worksheet, string columnName, uint row)
        {
            Row targetRow = ReturnRow(worksheet, row);

            if (targetRow == null)
                return null;

            return targetRow.Elements<Cell>().Where(c =>
               string.Compare(c.CellReference.Value, columnName + row,
               true) == 0).First();
        }
        private static Row ReturnRow(Worksheet worksheet, uint row)
        {
            return worksheet.GetFirstChild<SheetData>().
            Elements<Row>().Where(r => r.RowIndex == row).First();
        }
        #endregion
    }
}

 

posted @ 2014-09-15 10:35  Space Tian  阅读(4490)  评论(0编辑  收藏  举报