汤姆熊猫

  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理
/// <summary>
        /// visit the target url
        /// </summary>
        /// <param name="targetURL"></param>
        /// <param name="cc">this is for keeping cookies and sessions</param>
        /// <param name="param">this is the data need post inside form</param>
        /// <returns>html page</returns>
        public static string Login(string targetURL, CookieContainer cc, Hashtable param)
        {
            //prepare the submit data
            string formData = "";
            foreach (DictionaryEntry de in param)
            {
                formData += de.Key.ToString() + "=" + de.Value.ToString() + "&";
            }
            if (formData.Length > 0)
                formData = formData.Substring(0, formData.Length - 1); //remove last '&'

            //ASCIIEncoding encoding = new ASCIIEncoding();
            UTF8Encoding encoding = new UTF8Encoding();
            byte[] data = encoding.GetBytes(formData);

            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(targetURL);
            request.Method = "POST";    //post
            request.ContentType = "application/x-www-form-urlencoded";
            request.ContentLength = data.Length;
            request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 2.0.1124)";

            Stream newStream = request.GetRequestStream();
            newStream.Write(data, 0, data.Length);

            newStream.Close();
            //CookieContainer _cc = new CookieContainer();
            request.CookieContainer = cc;
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            cc.Add(response.Cookies);
            //cc = _cc;
            Stream stream = response.GetResponseStream();
            string result = new StreamReader(stream, System.Text.Encoding.UTF8).ReadToEnd();

            return result;
        }
/// <summary>
        /// 获取指定URL的HTML内容
        /// </summary>
        /// <param name="targetURL"></param>
        /// <param name="cc"></param>
        /// <param name="param"></param>
        /// <returns></returns>
        public static string GetHTML(string targetURL, CookieContainer cc, Hashtable param)
        {
            //prepare the submit data
            string formData = "";
            foreach (DictionaryEntry de in param)
            {
                formData += de.Key.ToString() + "=" + de.Value.ToString() + "&";
            }
            if (formData.Length > 0)
                formData = formData.Substring(0, formData.Length - 1); //remove last '&'

            ASCIIEncoding encoding = new ASCIIEncoding();
            byte[] data = encoding.GetBytes(formData);

            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(targetURL);
            request.Method = "GET";    //post

            request.CookieContainer = cc;
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            cc.Add(response.Cookies);
            Stream stream = response.GetResponseStream();
            string result = new StreamReader(stream, System.Text.Encoding.UTF8).ReadToEnd();
            return result;
        }

调用:

private void button1_Click(object sender, EventArgs e)
        {
            CookieContainer cc = new CookieContainer();//this is for keep the Session and Cookie
            Hashtable param = new Hashtable();//this is for keep post data.

            string urlLogin = "http://bbs.runsky.com/logging.php?action=login&loginsubmit=yes";
            //do find the elementId that needed. check the source of login page can get this information
            param.Add("username", "你的名字");
            param.Add("password", "你的密码");
            string result = Login(urlLogin, cc, param);
            //check result, whether login success
            textBox1.Text = result;
            //if login success, goto the target url, and input some value.
            string url2 = "http://bbs.runsky.com/memcp.php";// need change. special logic
            param.Clear();
            //param.Add("SearchAreaId","JobId")
            result = GetHTML(url2, cc, new Hashtable());
            //ConvertToDT the html or do something others
            richTextBox1.Text = result;
        }

这个地址:

string urlLogin = "http://bbs.runsky.com/logging.php?action=login&loginsubmit=yes";

即使你要将用户名密码POST过去的地址,这个地址可以通过抓包工具(fiddle)获取。

这个地址:

string url2 = "http://bbs.runsky.com/memcp.php";

是用来判断登录是否成功的,因为这个地址如果没有登录成功的话,会返回错误页面,这只是测试用的。

 

可使用http://htmlagilitypack.codeplex.com/ 解析HTML,例子:

http://www.cnblogs.com/bober/archive/2011/10/27/2226794.html

可以通过分析Login方法返回的string结果得到登录后要跳转的页面地址。
posted on 2012-07-14 11:58  汤姆熊猫  阅读(328)  评论(0编辑  收藏  举报