C# 取html <data>内容

  private void button1_Click(object sender, EventArgs e)
        {
            string strSource = GetHttpWebRequest("http://www.******.aspx");

            //匹配出表格内容
            Regex rx = new Regex("<table width=\"936\" border=\"0\" cellpadding=\"0\" cellspacing=\"1\" bgcolor=\"#FFB91F\" align=\"center\" style=\"color:Black;\" id=\"panel\" >" + @"([\S\s]*?)" + "</table>", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            MatchCollection matchs = rx.Matches(strSource);
            if (matchs.Count > 0)
            {
                strSource = matchs[0].Value;
                string pattern = "<tr align=\"center\" bgcolor=\"#@all\">@all<td height=\"32\" bgcolor=\"#@all\">(.*)</td>@all<td height=\"28\" bgcolor=\"#@all\">(.*)</td>@all<td bgcolor=\"#@all\">@allchkResult(.*);</script></td>@all</tr>";
                pattern = pattern.Replace("@all", @"[\S\s]*?");
                rx = new Regex(pattern, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                //将匹配出的数据放入DataTable
                DataRow drow;
                matchs = rx.Matches(strSource);

                //MessageBox.Show(matchs[0].Groups[1].Value);
                for (int i = 0; i < matchs.Count; i++)
                {
                    listBox1.Items.Add(matchs[i].Groups[1].Value + "|" + matchs[i].Groups[2].Value + "|" + matchs[i].Groups[3].Value.Replace(",","").Replace("'","").Replace("(","").Replace(")",""));

                }
            }
        }

带条件的取
  private void GetData(int cout)
        {
            string postData2;
            HttpWebRequest requestScore = (HttpWebRequest)WebRequest.Create("http://www.******.aspx");
           // postData2 = "__VIEWSTATE=%2FwEPDwUJNzc3MTAxMzU5ZGRoqAvv8WszDJmdGj4cP0O2gODj8g%3D%3D&soundshow=&reloadshow=&CurrentPageIndex="+cout.ToString();
            byte[] data  = Encoding.ASCII.GetBytes(postData2);
            requestScore.Method = "Post";
            requestScore.ContentType = "application/x-www-form-urlencoded";
            requestScore.ContentLength = data.Length;
            requestScore.KeepAlive = true;

            //使用登陆的cookies通过接下来的验证
            //requestScore.CookieContainer = container;
            Stream stream = requestScore.GetRequestStream();
            stream.Write(data, 0, data.Length);
            stream.Close();
            HttpWebResponse responseSorce = (HttpWebResponse)requestScore.GetResponse();
            StreamReader reader = new StreamReader(responseSorce.GetResponseStream(), Encoding.Default);
            string strSource = reader.ReadToEnd();
            Regex rx = new Regex("<table width=\"936\" border=\"0\" cellpadding=\"0\" cellspacing=\"1\" bgcolor=\"#FFB91F\" align=\"center\" style=\"color:Black;\" id=\"panel\" >" + @"([\S\s]*?)" + "</table>", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            //<table width="936" border="0" cellpadding="0" cellspacing="1" bgcolor="#FFB91F" align="center" style="color:Black;" id="panel" >
            MatchCollection matchs = rx.Matches(strSource);
            if (matchs.Count > 0)
            {
                strSource = matchs[0].Value;
                string pattern = "<tr align=\"center\" bgcolor=\"#@all\">@all<td height=\"32\" bgcolor=\"#@all\">(.*)</td>@all<td height=\"28\" bgcolor=\"#@all\">(.*)</td>@all<td bgcolor=\"#@all\">@allchkResult(.*);</script></td>@all</tr>";
                pattern = pattern.Replace("@all", @"[\S\s]*?");
                rx = new Regex(pattern, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                //将匹配出的数据放入DataTable
                DataRow drow;
                matchs = rx.Matches(strSource);

                //MessageBox.Show(matchs[0].Groups[1].Value);
                for (int i = 0; i < matchs.Count; i++)
                {
                    listBox1.Items.Add(matchs[i].Groups[1].Value + "|" + matchs[i].Groups[2].Value + "|" + matchs[i].Groups[3].Value.Replace(",", "").Replace("'", "").Replace("(", "").Replace(")", ""));
                    One.Add(Convert.ToInt32( matchs[i].Groups[1].Value), matchs[i].Groups[2].Value + "|" + matchs[i].Groups[3].Value.Replace(",", "").Replace("'", "").Replace("(", "").Replace(")", ""));
                    //插入数据库
                    
                }
            }
        }

主页 www.yundll.com

 


 

posted @ 2014-10-20 19:30  秋千,为谁荡  阅读(325)  评论(0编辑  收藏  举报