代码改变世界

C#大文件大数据量导入sql数据库

2010-07-28 13:21  一片温柔  阅读(343)  评论(0)    收藏  举报

          最近,需要做个功能,从大文件【7百万+行】中导入数据到数据库。由于数据文件和DB不在同台服务器上,因此不太方便用BULK INSERT 这种超变态语句。怎么办呢,调用存储过程一条一条导 or 拼凑insert语句一次导入多条?前者肯定不现实的,后则经测试效果也极其不佳【每次导入2w条,大致需要45分钟+】。怎么办啊,my god?看看System.Data.SqlClient有没有什么超级变态的东东,呵呵,不小心,竟然又看到Bulk了,嘿嘿,只不过是SqlBulkCopy而已,太兴奋了,抓紧看msdn。很快就有了下面的实现方法:

             try
            {
                string beg = DateTime.Now.ToLongTimeString();

                string fileName = string.Format("Userinfo-{0}.txt", DateTime.Today.AddDays(DiffDay).ToString("yyyy-MM-dd")); //"Userinfo-2010-07-19.txt";

                FileInfo file = new FileInfo(Server.MapPath("../Download/" + fileName));

                if (!file.Exists)
                {
                    //JScriptHelper.Alert(this.Page, string.Format("要导入的文件[{0}]不存在,请先ftp下载!", fileName));
                    return;
                }

                int nRowPer; //每次导入的最大行数  控制内存
                if (!int.TryParse(ConfigurationManager.AppSettings["ULROWPER"] as string, out nRowPer))
                {
                    nRowPer = 4000000;
                }

                using (SqlConnection con = new SqlConnection(ConfigurationManager.ConnectionStrings["SQLCONN"].ToString()))
                {
                    using (SqlBulkCopy bulkCtrl = new SqlBulkCopy(con))
                    {
                        DataTable dt = new DataTable();
                        dt.Columns.Add(new DataColumn("PTAccount", System.Type.GetType("System.String")));
                        dt.Columns.Add(new DataColumn("GameID", System.Type.GetType("System.String")));
                        dt.Columns.Add(new DataColumn("Area", System.Type.GetType("System.String")));
                        dt.Columns.Add(new DataColumn("Groups", System.Type.GetType("System.String")));
                        dt.Columns.Add(new DataColumn("GameLevel", System.Type.GetType("System.String")));
                        bulkCtrl.DestinationTableName = "Sp_GameLevel";
                        bulkCtrl.ColumnMappings.Add(0, "PTAccount");
                        bulkCtrl.ColumnMappings.Add(1, "GameID");
                        bulkCtrl.ColumnMappings.Add(2, "Area");
                        bulkCtrl.ColumnMappings.Add(3, "Groups");
                        bulkCtrl.ColumnMappings.Add(4, "GameLevel");
                        bulkCtrl.BulkCopyTimeout = int.MaxValue;
                        StreamReader read = new StreamReader(Server.MapPath("../Download/" + fileName), Encoding.Default);
                        UserLevel userCtrl = new UserLevel();
                        userCtrl.TruncateUserLevel();
                        con.Open();
                        int nRow = 0;
                        int nCnt = 0;
                        string line = read.ReadLine();
                        while (line != null)
                        {
                            nRow = dt.Rows.Count;
                            if (nRow > nRowPer) //规避文件太大程序占用内存太多
                            {
                                bulkCtrl.BatchSize = nRow;
                                bulkCtrl.WriteToServer(dt);
                                //bulkCtrl.Close();
                                dt.Rows.Clear();
                                nCnt += nRow;
                            }
                            string[] str = line.Split('/t');
                            if (str.Length == 5)
                            {
                                DataRow dr = dt.NewRow();
                                dr[0] = str[0];
                                dr[1] = str[1];
                                dr[2] = str[2];
                                dr[3] = str[3];
                                dr[4] = str[4];
                                dt.Rows.Add(dr);
                            }
                            line = read.ReadLine();
                        }
                        nRow = dt.Rows.Count;
                        if (nRow > 0)
                        {
                            nCnt += nRow;
                            bulkCtrl.BatchSize = nRow;
                            if (con.State != ConnectionState.Open) con.Open();
                            bulkCtrl.WriteToServer(dt);
                        }
                        bulkCtrl.Close();
                        con.Close();
                        //LogHelper.Info(string.Format("导入完毕 Beg:{0}End{1}[Row]{2}", beg, DateTime.Now.ToLongTimeString(), nCnt));
                        //JScriptHelper.Alert(this.Page, string.Format("共成功导入{0}条记录!", nCnt));
                    }
                }
            }
            catch (Exception ex)
            { 
                //LogHelper.Error(string.Format("UserLevelMain btnInsert_Click [StackTrace]{0}[Message]{1}", ex.StackTrace, ex.Message));
            }

 

         运行之后,日志是这样的:  导入完毕 Beg:16:42:42 End16:46:04 [Row]5452484

        不到4秒5百万+,比以前的30分钟+,快了多少倍啊!

  keywor:大文件,大数据量,导入sql数据库