为抓取 web的一些隐私数据,需要先登录,然后才能获取这些数据,用程序来实现,就需要实现自动登录,然后将登录信息保存在Cookie中,以便取得数据时,无须再次登录。以网易邮箱为例:
所需jar包:commons-codec-1.3.jar  ,   common-httpclient-3.0.1.jar  ,common-logging-1.1.1.jar ,jaxen-1.1-beta-6.jar(不知道是不是必须)
源码:
import java.io.IOException;
import java.util.Date;

import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;


public class AutoLogin {

    private static String USERNAME = "***";
    private static String LOGINURL = "http://reg.163.com/login.jsp";
   
    private String CookieStr;
    private Date EndTime;
   
    public void autoLogin(){
        HttpClient httpClient = new HttpClient();
        PostMethod post = new PostMethod(LOGINURL);
        // 注意这里的地址!
        post.setRequestHeader("Accept-Language", "en-us,en;q=0.5");
        post.setRequestHeader("Accept-Encoding","gzip, deflate");
        post.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7 (.NET CLR 3.5.30729)");
        post.addRequestHeader("Content-Type","application/x-www-form-urlencoded;charset=utf-8");
       
        NameValuePair user = new NameValuePair("user","***");
        NameValuePair pwd = new NameValuePair("password","***");
        NameValuePair username = new NameValuePair("username",USERNAME + "@163.com");
       
        post.setRequestBody(new NameValuePair[]{username,user,pwd});
        httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
       
        int status;
        try {
            status = httpClient.executeMethod(post);
            System.out.println("loginUrl:"+status);
            Cookie[] cookies = httpClient.getState().getCookies();
            //获取cookie
            if (cookies.length == 0){
                System.out.println("Cookie:None");
            } else {
                if (cookies.length >=2)
                {
                    EndTime = cookies[1].getExpiryDate();
                    for(Cookie c:cookies){
                        CookieStr +=c.toString()+";";
                    }
                }
            }
            System.out.println(EndTime);
            System.out.println(CookieStr);
            System.out.println(post.getResponseBodyAsString());
        } catch (HttpException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }finally{
            post.releaseConnection();
        }
    }
   
    public String getContent(){
        Date date = new Date();
        String content = null;
       
        //CookieStr的值为空或者cookie已经失效
        if (CookieStr == "" || EndTime == null || date.getTime() > EndTime.getTime())
        {
            autoLogin();
        }
        HttpClient client = new HttpClient();
        client.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
        //get data url
        String url = "*******";

        GetMethod get = new GetMethod(url);
        get.setRequestHeader("Accept-Language", "en-us,en;q=0.5");
        get.setRequestHeader("Accept-Encoding","gzip, deflate");
        get.setRequestHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
        get.setRequestHeader("Accept-Charset","ISO-8859-1,utf-8;q=0.7,*;q=0.7");
        get.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7 (.NET CLR 3.5.30729)");
     
        //设置Cookie,必须
        get.setRequestHeader("Cookie", CookieStr);
        try {
            int status = client.executeMethod(get);
            //获取你想要的页面内容
            content = get.getResponseBodyAsString();
            System.out.println("getUrl:"+status);
            System.out.println(content);
        } catch (HttpException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }finally
        {
            get.releaseC