君子博学而日参省乎己 则知明而行无过矣

博客园 首页 新随笔 联系 订阅 管理

无论是做网络爬虫(网络蜘蛛)还是做页面测试,登录表单总是很常见的需求。表单的登录核心是将参数包装起来以POST方式提交给目标页面,但是不同于此的是表单登录往往还涉及到Cookies信息的处理,稍显繁琐。

下面我们将演示如何使用HttpClient4.x执行基于表单的登录,并处理Cookies信息。

具体代码如下:

 

package cn.ysh.studio.crawler.httpclient;import java.util.ArrayList;import java.util.List;import org.apache.http.HttpEntity;import org.apache.http.HttpResponse;import org.apache.http.NameValuePair;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.HttpGet;import org.apache.http.client.methods.HttpPost;import org.apache.http.cookie.Cookie;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.http.message.BasicNameValuePair;import org.apache.http.util.EntityUtils;/**
 * 演示如何使用HttpClient api执行基于表单的登录
 * 
 * @author Shenghany
 * @date 2013-5-19
 */publicclassClientFormLogin{publicstaticvoid main(String[] args)throwsException{DefaultHttpClient httpclient =newDefaultHttpClient();try{//创建Get请求HttpGet httpget =newHttpGet("https://portal.sun.com/portal/dt");//执行请求HttpResponse response = httpclient.execute(httpget);//获得响应实体HttpEntity entity = response.getEntity();System.out.println("Login form get: "+ response.getStatusLine());//销毁实体EntityUtils.consume(entity);//初始化Cookies设置System.out.println("Initial set of cookies:");List<Cookie> cookies = httpclient.getCookieStore().getCookies();if(cookies.isEmpty()){System.out.println("None");}else{for(int i =0; i < cookies.size(); i++){System.out.println("- "+ cookies.get(i).toString());}}//创建Post请求HttpPost httppost =newHttpPost("https://portal.sun.com/amserver/UI/Login?"+"org=self_registered_users&"+"goto=/portal/dt&"+"gotoOnFail=/portal/dt?error=true");//添加HTTP POST参数List<NameValuePair> nvps =newArrayList<NameValuePair>();
            nvps.add(newBasicNameValuePair("IDToken1","username"));
            nvps.add(newBasicNameValuePair("IDToken2","password"));//将POST参数以UTF-8编码并包装成表单实体对象
            httppost.setEntity(newUrlEncodedFormEntity(nvps,"UTF-8"));//执行Post(登录)请求
            response = httpclient.execute(httppost);//获得响应实体
            entity = response.getEntity();System.out.println("Login form get: "+ response.getStatusLine());//销毁实体EntityUtils.consume(entity);//登录后的Cookies信息System.out.println("Post logon cookies:");
            cookies = httpclient.getCookieStore().getCookies();if(cookies.isEmpty()){System.out.println("None");}else{for(int i =0; i < cookies.size(); i++){System.out.println("- "+ cookies.get(i).toString());}}}finally{// 当不再需要HttpClient实例时,关闭连接管理器以确保释放所有占用的系统资源
            httpclient.getConnectionManager().shutdown();}}}
posted on 2013-07-25 04:12  刺猬的温驯  阅读(205)  评论(0)    收藏  举报