Java爬虫入门一

总体步骤: 

  1. 创建HttpClient对象

  2. 输入网址

  3. 发起请求

  4. 解析响应

 上代码

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;

/**
 * 爬虫入门
 */
public class SpyderTest {
    public static void main(String[] args) throws IOException {
        // 创建HttpClient对象
        HttpClient httpClient = HttpClients.createDefault();
        // 输入网址
        String url = "https://www.baidu.com";
        HttpGet httpGet = new HttpGet(url);
        // 发起请求
        HttpResponse response = httpClient.execute(httpGet);
        // 解析响应
        if (response.getStatusLine().getStatusCode() == 200) {
            HttpEntity entity = response.getEntity();
            String content = EntityUtils.toString(entity, "utf8");
            System.out.println(content);
        }
       
    }
}

=============爬到的结果=======================
<html><script>
var arg1='1F5C36824A0D036DD2B15E0F5964E8D7B1D4EFB6';
// 这表示是经过压缩的,需要使用Gzip流来处理,后面会有解决
var _0x4818=['\x63\x73\x4b\x48\x77\x71\x4d\x49','\x5a\x73\x4b\x4a\x77\x72\x38\x56\x65\x41\x73\x79']
        
function setCookie(name,value){var expiredate=new Date();expiredate.setTime(expiredate.getTime()+(3600*1000));document.cookie=name+"="+value+";expires="+expiredate.toGMTString()+";max-age=3600;path=/";}
function reload(x) {setCookie("acw_sc__v2", x);document.location.reload();}
</script></html>

 

posted @ 2022-05-26 08:42  初见洞洞拐  阅读(44)  评论(0)    收藏  举报