花了两天时间,搞了个豆瓣自动回帖的程序。。。
原理不难,就是http client和 htmlParse的东西。
豆瓣为了防止恶意发贴,在回复或者发新贴的时候,有时候需要验证码,这个验证码还不太好识别(如果有高手,请联系我!),不过,我发现了一个程序上的漏洞,可以绕过去。
先打开IE或者其他浏览器进行登录。
贴上部分代码,仅供学习参考:
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
public class DoubanCCSUtils {
	public static void main(String[] args) {
		comment();
	}
public static void comment() {
		// String httpUrl = "http://www.douban.com/group/M-P/new_topic";
		String httpUrl = "http://www.douban.com/group/";   //我的小组里的最新贴子,只取第一页
		// HttpPost连接对象
		HttpGet httpGet = new HttpGet(httpUrl);
		httpGet.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
		// httpGet.addHeader("Accept-Encoding","gzip,deflate,sdch"); 不能压缩,否则乱码,压缩需要浏览器支持
		httpGet.addHeader("Accept-Language", "zh-CN,zh;q=0.8");
		httpGet.addHeader("Cache-Control", "max-age=0");
		httpGet.addHeader("Connection", "keep-alive");
		httpGet.addHeader("Content-Type", "application/x-www-form-urlencoded");
		// ck,dbcly这两个参数会变化
		httpGet.addHeader(
				"Cookie","");  //Cookie,自己查浏览器
		httpGet.addHeader("Host", "www.douban.com");
		httpGet.addHeader("Origin", "http://www.douban.com");
		httpGet.addHeader("Referer", "http://www.douban.com/group/");
		httpGet.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36");
		httpGet.addHeader("Content-type", "text/html; charset=utf-8");
		// 设置字符集
		try {
			// 取得默认的HttpClient
			HttpClient httpclient = new DefaultHttpClient();
			// 取得HttpResponse
			HttpResponse httpResponse = httpclient.execute(httpGet);
			// HttpStatus.SC_OK表示连接成功
			if (httpResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
				// 取得返回的字符串
				String strResult = EntityUtils.toString(httpResponse.getEntity(), HTTP.UTF_8);
				Parser parser = new Parser(strResult);
				NodeFilter filter = new TagNameFilter("A");
				NodeList nodes = parser.extractAllNodesThatMatch(filter);
				if (nodes != null) {
					for (int i = 0; i < nodes.size(); i++) {
						Node textnode = (Node) nodes.elementAt(i);
						String s = textnode.getText();
						if (s.contains("http://www.douban.com/group/topic") && s.contains("title=")) {
							s = getTopicUrl(s);
							addComment(s);
						}
					}
				}
				System.out.println("完成!");
			}
		} catch (Exception e) {
			System.out.println(e.getLocalizedMessage());
		}
	}
	public static String getTopicUrl(String text) {
		text = text.replace("a href=", "");
		text = text.replace("\"", "");
		String[] arr = text.split(" ");
		return arr[0];
	}
public static void addComment(String httpUrl) {
		System.out.println(httpUrl);
		httpUrl = httpUrl + "add_comment#last";
		// HttpPost连接对象
		HttpPost httpPost = new HttpPost(httpUrl);
		httpPost.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
		httpPost.addHeader("Accept-Encoding", "gzip,deflate,sdch");
		httpPost.addHeader("Accept-Language", "zh-CN,zh;q=0.8");
		httpPost.addHeader("Cache-Control", "max-age=0");
		httpPost.addHeader("Connection", "keep-alive");
		httpPost.addHeader("Content-Type", "application/x-www-form-urlencoded");
		// ck,dbcly这两个参数会变化
		httpPost.addHeader(
				"Cookie","");  //这个Cookie,自己根据浏览器去查吧
		httpPost.addHeader("Host", "www.douban.com");
		httpPost.addHeader("Origin", "http://www.douban.com");
		httpPost.addHeader("Referer", "http://www.douban.com/group/");
		httpPost.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36");
		// 使用NameValuePair来保存要传递的Post参数
		List<NameValuePair> params = new ArrayList<NameValuePair>();
		// 添加要传递的参数
		params.add(new BasicNameValuePair("ck", "VdIW"));
		params.add(new BasicNameValuePair("rv_comment", getMyComment()));
		params.add(new BasicNameValuePair("start", "0"));
		params.add(new BasicNameValuePair("submit_btn", "加上去"));
//params.add(new BasicNameValuePair("captcha-solution", "monkey"));
//params.add(new BasicNameValuePair("captcha-id", "YTYPMnsapAJsXw0o2w6T5SY5"));
		// 设置字符集
		try {
			HttpEntity httpentity = new UrlEncodedFormEntity(params, "utf-8");
			// 请求httpPost
			httpPost.setEntity(httpentity);
			// 取得默认的HttpClient
			HttpClient httpclient = new DefaultHttpClient();
			// 取得HttpResponse
			HttpResponse httpResponse = httpclient.execute(httpPost);
			int status = httpResponse.getStatusLine().getStatusCode();
			System.out.println(status);
			if (status == 200) {  //200实际意味着失败,需要验证码
				DoubanVCUtils.getDoubanVC();  //解决验证码问题
			}
			if (status == 302) {  //302转向意味着成功了
}
Thread.currentThread().sleep(5000); // 设置暂停毫秒,防止引起豆瓣注意, 这个时间可长可短,根据需要
		} catch (Exception e) {
			System.out.println(e.getLocalizedMessage());
		}
	}
	public static String getMyComment() {
		String[] comments = new String[20];
		comments[0] = "帮顶一下。中国儿童安全网,关注儿童安全每一天!";
		comments[1] = "支持楼主!中国儿童安全网,关注儿童安全每一天!";
		comments[2] = "占个坑!中国儿童安全网,关注儿童安全每一天!";
省略部分。。。。
		Random r = new Random();
		int k = r.nextInt(20);
		String s = "abcdefghijklmnopqrstuvwxyz";
		char t[] = new char[26];
		for (int x = 0; x < 26; x++) {
			t[x] = s.charAt(x);
		}
		int j = r.nextInt(26);
		return comments[k] + t[j];   //防止内容重复,豆瓣有检查机制
	}
}
 
                    
                 
                
            
         浙公网安备 33010602011771号
浙公网安备 33010602011771号