网络爬虫简单实现

有访问权限

public class SpiderTest {
  public static void main(String[] args) throws IOException {
    URL url = new URL("https://www.baidu.com");
    InputStream is = url.openStream();
    BufferedReader br = new BufferedReader(new InputStreamReader(is, "utf-8"));
    String msg = null;
    while(null!=(msg=br.readLine())){
      System.out.println(msg);
    }
    br.close();
  }
}

无访问权限

public class SpiderTest2 {
  public static void main(String[] args) throws IOException {
    URL url = new URL("https://www.jd.com");
    HttpURLConnection conn = (HttpURLConnection)url.openConnection();
    conn.setRequestMethod("GET");
    conn.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/67.0");
    InputStream is = url.openStream();
    BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(), "utf-8"));
    String msg = null;
    while(null!=(msg=br.readLine())){
      System.out.println(msg);
    }
    br.close();
  }
}

 

posted @ 2019-06-27 00:21  疯狂的字母  阅读(159)  评论(0编辑  收藏  举报