无论是做网络爬虫(网络蜘蛛)还是做页面测试,登录表单总是很常见的需求。表单的登录核心是将参数包装起来以POST方式提交给目标页面,但是不同于此的是表单登录往往还涉及到Cookies信息的处理,稍显繁琐。
下面我们将演示如何使用HttpClient4.x执行基于表单的登录,并处理Cookies信息。
具体代码如下:
package cn.ysh.studio.crawler.httpclient;import java.util.ArrayList;import java.util.List;import org.apache.http.HttpEntity;import org.apache.http.HttpResponse;import org.apache.http.NameValuePair;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.HttpGet;import org.apache.http.client.methods.HttpPost;import org.apache.http.cookie.Cookie;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.http.message.BasicNameValuePair;import org.apache.http.util.EntityUtils;/** * 演示如何使用HttpClient api执行基于表单的登录 * * @author Shenghany * @date 2013-5-19 */publicclassClientFormLogin{publicstaticvoid main(String[] args)throwsException{DefaultHttpClient httpclient =newDefaultHttpClient();try{//创建Get请求HttpGet httpget =newHttpGet("https://portal.sun.com/portal/dt");//执行请求HttpResponse response = httpclient.execute(httpget);//获得响应实体HttpEntity entity = response.getEntity();System.out.println("Login form get: "+ response.getStatusLine());//销毁实体EntityUtils.consume(entity);//初始化Cookies设置System.out.println("Initial set of cookies:");List<Cookie> cookies = httpclient.getCookieStore().getCookies();if(cookies.isEmpty()){System.out.println("None");}else{for(int i =0; i < cookies.size(); i++){System.out.println("- "+ cookies.get(i).toString());}}//创建Post请求HttpPost httppost =newHttpPost("https://portal.sun.com/amserver/UI/Login?"+"org=self_registered_users&"+"goto=/portal/dt&"+"gotoOnFail=/portal/dt?error=true");//添加HTTP POST参数List<NameValuePair> nvps =newArrayList<NameValuePair>(); nvps.add(newBasicNameValuePair("IDToken1","username")); nvps.add(newBasicNameValuePair("IDToken2","password"));//将POST参数以UTF-8编码并包装成表单实体对象 httppost.setEntity(newUrlEncodedFormEntity(nvps,"UTF-8"));//执行Post(登录)请求 response = httpclient.execute(httppost);//获得响应实体 entity = response.getEntity();System.out.println("Login form get: "+ response.getStatusLine());//销毁实体EntityUtils.consume(entity);//登录后的Cookies信息System.out.println("Post logon cookies:"); cookies = httpclient.getCookieStore().getCookies();if(cookies.isEmpty()){System.out.println("None");}else{for(int i =0; i < cookies.size(); i++){System.out.println("- "+ cookies.get(i).toString());}}}finally{// 当不再需要HttpClient实例时,关闭连接管理器以确保释放所有占用的系统资源 httpclient.getConnectionManager().shutdown();}}}