Java爬虫入门一
总体步骤:
-
创建HttpClient对象
-
输入网址
-
发起请求
-
解析响应
上代码
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
/**
* 爬虫入门
*/
public class SpyderTest {
public static void main(String[] args) throws IOException {
// 创建HttpClient对象
HttpClient httpClient = HttpClients.createDefault();
// 输入网址
String url = "https://www.baidu.com";
HttpGet httpGet = new HttpGet(url);
// 发起请求
HttpResponse response = httpClient.execute(httpGet);
// 解析响应
if (response.getStatusLine().getStatusCode() == 200) {
HttpEntity entity = response.getEntity();
String content = EntityUtils.toString(entity, "utf8");
System.out.println(content);
}
}
}
=============爬到的结果=======================
<html><script>
var arg1='1F5C36824A0D036DD2B15E0F5964E8D7B1D4EFB6';
// 这表示是经过压缩的,需要使用Gzip流来处理,后面会有解决
var _0x4818=['\x63\x73\x4b\x48\x77\x71\x4d\x49','\x5a\x73\x4b\x4a\x77\x72\x38\x56\x65\x41\x73\x79']
function setCookie(name,value){var expiredate=new Date();expiredate.setTime(expiredate.getTime()+(3600*1000));document.cookie=name+"="+value+";expires="+expiredate.toGMTString()+";max-age=3600;path=/";}
function reload(x) {setCookie("acw_sc__v2", x);document.location.reload();}
</script></html>