HttpClient

视频

https://www.bilibili.com/video/BV1cE411u7RA?p=4

https://www.bilibili.com/video/BV1W54y1s7BZ?p=4

基础文本框架

https://www.ayulong.cn/blog/12

https支持

https://blog.csdn.net/Sakuraaaaaaa/article/details/107280162

HttpClient

官网:http://hc.apache.org

准备

maven

 <dependencies>
        <!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
        <!-- HttpClient -->
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.5.12</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
        <!--日志-->
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.25</version>
            <scope>test</scope>
        </dependency>

    </dependencies>

创建 slf4j 日志配置文件

在 resources 目录下创建 log4j.properties 文件, 并添加以下配置

# A1 在控制台显示日志
log4j.rootLogger=DEBUG,A1
log4j.logger.cn.itcast = DEBUG

log4j.appender.A1=org.apache.log4j.ConsoleAppender
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-d{yyyy-MM-dd HH🇲🇲ss,SSS} [%t] [%c]-[%p] %m%n

最简单的爬虫程序

在java的 cn.ayulong.crawler.test 中创建 CrawlerFirst类

package cn.ayulong.crawler.test;

import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;

public class CrawlerFirst {

    public static void main(String[] args) throws IOException {
        // 1. 打开浏览器, 创建 HttpClient 对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 2. 输入网址, 发起get请求创建HttpGet对象
        HttpGet httpGet = new HttpGet("http://www.baidu.com/");

        // 3. 按回车, 发起请求, 返回响应
        CloseableHttpResponse response = httpClient.execute(httpGet);

        // 4. 解析响应, 获取数据
        // 判断状态码是否是 200
        if (response.getStatusLine().getStatusCode() == 200) {
            HttpEntity httpEntity = response.getEntity();
            String content = EntityUtils.toString(httpEntity, "utf-8");
            System.out.println(content);
        }
    }
}
// 直接运行, 成功抓取到百度首页html并打印到控制台

注意: 此方法只能抓取 http 协议的页面, 如果想抓取 https 的页面, 可以参考

GET 请求

无参无头

public class HttpGetTest {

    public static void main(String[] args) {
        // 可关闭的httpclient客户端,相当于你打开的一个浏览器
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 创建HttpGet对象, 设置url访问地址
        HttpGet httpGet = new HttpGet("http://www.baidu.com");

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                // 获取响应结果
                HttpEntity entity = response.getEntity();
                // 对 HttpEntity 操作的工具类
                //String content = EntityUtils.toString(response.getEntity(), "utf8");
                String content = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                System.out.println(content.length());
                //确保流关闭
                EntityUtils.consume(entity);
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            if (response != null) {
                try {
                	response.close();
                } catch (IOException e) {
                	e.printStackTrace();
            	}
            }
            if (httpClient != null) {
                try {
                	httpClient.close();
				} catch (IOException e) {
                	e.printStackTrace();
            	}   
            }
            
        }

    }
}

带请求头

public class HttpGetTest {

    public static void main(String[] args) {
        // 可关闭的httpclient客户端,相当于你打开的一个浏览器
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 创建HttpGet对象, 设置url访问地址
        HttpGet httpGet = new HttpGet("http://www.baidu.com");
        // 解决httpclient被认为不是真人行为
		httpGet.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36")
        // 防盗链,url发生防盗链的网站的url
        httpGet.addHeader("Referer","https://www.baidu.com/")
            
        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                // 获取响应结果
                HttpEntity entity = response.getEntity();
                // 对 HttpEntity 操作的工具类
                //String content = EntityUtils.toString(response.getEntity(), "utf8");
                String content = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                System.out.println(content.length());
                //确保流关闭
                EntityUtils.consume(entity);
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            if (response != null) {
                try {
                	response.close();
                } catch (IOException e) {
                	e.printStackTrace();
            	}
            }
            if (httpClient != null) {
                try {
                	httpClient.close();
				} catch (IOException e) {
                	e.printStackTrace();
            	}   
            }
            
        }

    }
}

请求带参数

public class HttpGetParamTest {

    public static void main(String[] args) throws Exception {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 设置请求地址是: http://yun.itheima.com/search?keys=Java
        // 创建URIBuilder
        URIBuilder uriBuilder = new URIBuilder("http://yun.itheima.com/search");
        // 设置参数
        uriBuilder.setParameter("keys", "Java");
        // 创建HttpGet对象, 设置url访问地址
        HttpGet httpGet = new HttpGet(uriBuilder.build());

        System.out.println("发起请求的信息: " + httpGet);

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                String content = EntityUtils.toString(response.getEntity(), "utf8");
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

或者手动转

public class HttpGetParamTest {

    public static void main(String[] args) throws Exception {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();
		String urlStr = "https://www.baidu.com";
        String passwordParam = "12+abc 456|789"//
        // 做urlencode:如果是浏览器的话,浏览器会帮我们自动给做了,123%2Babc+456%7c789
        passwordParam = URLEncoder.encode(passwordParam, StandardCharsets.UTF_8.name());
        urlStr = "http://demo/?" + passwordParam
        HttpGet httpGet = new HttpGet(urlStr);

        System.out.println("发起请求的信息: " + httpGet);

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                String content = EntityUtils.toString(response.getEntity(), "utf8");
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

获取响应头以及相应的Content-Type

public class HttpGetParamTest {

    public static void main(String[] args) throws Exception {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();
		String urlStr = "https://www.baidu.com";
        HttpGet httpGet = new HttpGet(urlStr);
        System.out.println("发起请求的信息: " + httpGet);

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                System.out.println("响应成功");
                // 获取响应头
                Header[] allHeaders = response.getAllHeaders();
                // 这里会打印所有的响应头,包含 Content-Type
                for (Header header : allHeaders) {
                    System.out.println("响应头" + header.getName() + "的值" + header.getValue());
                    
                }
                // 也可以从 getEntity() 中取得
                HttpEntity entity = response.getEntity();
                System.out.println("ContentType" + entity.getContentType());
                // 对 HttpEntity 操作的工具类
                String content = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                System.out.println(content.length());
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

保存网络图片到本地

public class HttpGetParamTest {

    public static void main(String[] args) throws Exception {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();
		String urlStr = "https://img2022.cnblogs.com/blog/1381066/202202/1381066-20220226153714038-1862164179.png";
        HttpGet httpGet = new HttpGet(urlStr);
		// 可关闭的响应
        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            HttpEntity entity = response.getEntity();
            // image/jpg  image/jpeg  image/png  image/图片的后缀
            String contentType = entity.getContentType.getValue();
            String suffix = '.jpg';
            if (contentType.contains("jpg")||contentType.contains("jpeg")) {
                suffix = '.jpg';
            }else if (contentType.contains("bmp")||contentType.contains("bitbmp")) {
                suffix = '.bmp';
            }else if (contentType.contains("png")) {
                suffix = '.png';
            }else if (contentType.contains("gif")) {
                suffix = '.gif';
            }
            // 获取文件的字节流
            byte[] bytes = EntityUtils.toByteArray(entity);
            String localAbsPath = "e:\\abbc" + suffix;
            FileOutputStream fos = new FileOutputStream(localAbsPath);
            fos.write(bytes);
            fos.close();
            EntityUtils.consume(entity);
            
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

设置访问代理

public class HttpGetTest {

    public static void main(String[] args) {
        // 可关闭的httpclient客户端,相当于你打开的一个浏览器
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 创建HttpGet对象, 设置url访问地址
        HttpGet httpGet = new HttpGet("http://www.baidu.com");
		// 创建一个代理
        String ip = "114.7.192.253";
        int port = 8080;
        HttpHost proxy = new HttpHost(ip, port);
        // 对每一个请求进行配置,会覆盖全局的默认请求配置
        RequestConfig requestConfig = RequestConfig.custom().setProxy(proxy).build();
        httpGet.setConfig(requestConfig);
        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                // 获取响应结果
                HttpEntity entity = response.getEntity();
                // 对 HttpEntity 操作的工具类
                //String content = EntityUtils.toString(response.getEntity(), "utf8");
                String content = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                System.out.println(content.length());
                //确保流关闭
                EntityUtils.consume(entity);
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            if (response != null) {
                try {
                	response.close();
                } catch (IOException e) {
                	e.printStackTrace();
            	}
            }
            if (httpClient != null) {
                try {
                	httpClient.close();
				} catch (IOException e) {
                	e.printStackTrace();
            	}   
            }
            
        }

    }
}

Post请求带参数

application/x-www-form-urlencoded

这是默认的Content-type

使用 HttpClient 发送不带参数的 post 请求与 发送不带参数的 get 请求类似, 只是 HttpGet 要改为 HttpPost, 所以只记录带参数的 post 请求方式

public class HttpPostParamTest {

    public static void main(String[] args) throws Exception {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 创建HttpPost对象, 设置url访问地址
        HttpPost httpPost = new HttpPost("http://yun.itheima.com/search");

        // 声明List集合, 封装表单中的参数
        ArrayList<NameValuePair> params = new ArrayList<NameValuePair>();
        params.add(new BasicNameValuePair("keys", "Java"));

        // 创建表单的Entity对象, 第一个参数就是封装好的表单数据, 第二个参数就是编码
        UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(params, "utf8");

        // 设置表单的Entity对象到 Post 请求中
        httpPost.setEntity(formEntity);

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpPost);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                String content = EntityUtils.toString(response.getEntity(), "utf8");
                System.out.println(content.length());
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

application/json

public class HttpPostParamTest {

    public static void main(String[] args) throws Exception {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 创建HttpPost对象, 设置url访问地址
        HttpPost httpPost = new HttpPost("http://yun.itheima.com/search");
		
        // string:是一个json字符串
        JSONObject jsonObj = new JSONObject();
        jsonObj.put("userName","mingzi");
        jsonObj.put("password","abc123");
        StringEntity jsonEntity = new StringEntity(jsonObj.toString(),Consts.UTF_8);
        // 也需要给 entity 设置一下内容类型
        jsonEntity.setContentType(new BasicHeader("Content-Type","application/json; charset=utf-8"));
        // 设置entity的编码
        jsonEntity.setContentEncoding(Consts.UTF_8.name());
		httpPost.setEntity(jsonEntity);

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpPost);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                String content = EntityUtils.toString(response.getEntity(), "utf8");
                System.out.println(content.length());
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

multipart/form-data类型上传文件

连接池

public class HttpClientPoolTest {

    public static void main(String[] args) {
        // 创建连接池管理器
        PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
        
        // 设置连接数
        cm.setMaxTotal(100);
        
        // 设置每个主机的最大连接数
        cm.setDefaultMaxPerRoute(10);

        // 使用连接池管理器发起请求
        doGet(cm);
        doGet(cm);
    }

    private static void doGet(PoolingHttpClientConnectionManager cm) {
        // 不是每次创建新的HttpClient, 而是从连接池中获取 HttpClient 对象
        CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();

        HttpGet httpGet = new HttpGet("http://www.itcast.cn");

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                String content = EntityUtils.toString(response.getEntity(), "utf8");
                System.out.println(content.length());
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
           if (response != null) {
               try {
                   response.close();
               } catch (IOException e) {
                   e.printStackTrace();
               }
               // 不能关闭 HttpClient, 由连接池管理 HttpClient
               // httpClient. close();
           }
        }
    }
}

请求参数配置

  1. setConnectTimeout:连接超时,ms,完成tcp3次握手的时间上限
  2. setSocketTimeout:读取超时,ms,表示从请求的网址处获得响应数据的时间间隔
  3. setConnectionRequestTimeout:指的从连接池里面获取connection 的超时时间
pubsetConnectionRequestTimeousetConnectionRequestTimeouttlic class HttpConfigTest {

    public static void main(String[] args) {
        // 创建HttpClient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

        // 创建HttpGet对象, 设置url访问地址
        HttpGet httpGet = new HttpGet("http://yun.itheima.com/search?keys=Java");

        // 配置请求信息
        RequestConfig config = RequestConfig.custom().setConnectTimeout(1000) // 创建连接的最长时间, 单位是毫秒
                .setConnectionRequestTimeout(500) // 设置获取连接的最长时间, 单位是毫秒
                .setSocketTimeout(10 * 1000) // 设置数据传输的最长时间, 单位是毫秒
                .build();

        // 给请求设置请求信息
        httpGet.setConfig(config);

        CloseableHttpResponse response = null;
        try {
            // 使用 HttpClient 发起请求, 获取 response
            response = httpClient.execute(httpGet);
            // 解析响应
            if (response.getStatusLine().getStatusCode() == 200) {
                String content = EntityUtils.toString(response.getEntity(), "utf8");
                System.out.println(content.length());
            }
        } catch(IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭 response
            try {
                response.close();
                httpClient.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }
}

Jsoup

jsoup 是一款Java 的HTML解析器,可直接解析某个URL地址、HTML文本内容。它提供了一套非常省力的API,可通过DOM,CSS以及类似于jQuery的操作方法来取出和操作数据。

jsoup的主要功能如下:

  1. 从一个URL,文件或字符串中解析HTML;
  2. 使用DOM或CSS选择器来查找、取出数据;
  3. 可操作HTML元素、属性、文本;( Jsoup一般用于解析爬到的数据并存储, 很少用到操作 )

准备

先在pom中导入依赖, 搭建开发环境

<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
    <!--Jsoup-->
    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.11.3</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/junit/junit -->
    <!--测试-->
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.12</version>
        <scope>test</scope>
    </dependency>
    <!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
    <!--操作文件-->
    <dependency>
        <groupId>commons-io</groupId>
        <artifactId>commons-io</artifactId>
        <version>2.4</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
    <!--操作字符串-->
    <dependency>
        <groupId>org.apache.commons</groupId>
        <artifactId>commons-lang3</artifactId>
        <version>3.4</version>
    </dependency>

Jsoup解析URL

Jsoup可以直接输入url,它会发起请求并获取数据,封装为Document对象

public class JsoupFirstTest {

    @Test
    public void testUrl() throws Exception {
        // 解析url地址, 第一个参数是访问的url, 第二个参数时访问时候的超时时间
        Document doc = Jsoup.parse(new URL("http://www.itcast.cn"), 1000);

        // 使用标签选择器, 获取title标签中的内容
        String title = doc.getElementsByTag("title").first().text();

        // 打印
        System.out.println(title);
    }
}

PS:虽然使用Jsoup可以替代HttpClient直接发起请求解析数据,但是往往不会这样用,因为实际的开发过程中,需要使用到多线程,连接池,代理等等方式,而jsoup对这些的支持并不是很好,所以我们一般把jsoup仅仅作为Html解析工具使用

Jsoup解析字符串

 	@Test
    public void testString() throws Exception {
        // 使用工具类读取文件, 获取字符串
        String content = FileUtils.readFileToString(new File("D:\\360MoveData\\Users\\Administrator\\Desktop\\test.html"), "utf8");

        // 解析字符串
        Document doc = Jsoup.parse(content);

        String title = doc.getElementsByTag("title").first().text();

        System.out.println(title);

    }
}

Jsoup解析文件

	@Test
    public void testFile() throws Exception {
        // 解析文件
        Document doc = Jsoup.parse(new File("D:\\360MoveData\\Users\\Administrator\\Desktop\\test.html"), "utf8");

        String title = doc.getElementsByTag("title").first().text();

        System.out.println(title);

    }

所需 html文件

<html>
 <head> 
  <title>传智播客官网-一样的教育,不一样的品质</title> 
 </head> 
 <body>
	<div class="city">
		<h3 id="city_bj">北京中心</h3>
		<fb:img src="/2018czgw/images/slogan.jpg" class="slogan"/>
		<div class="city_in">
			<div class="city_con" style="display: none;">
				<ul>
					<li id="test" class="class_a class_b">
						<a href="http://www.itcast.cn" target="_blank">
							<span class="s_name">北京</span>
						</a>
					</li>
					<li>
						<a href="http://sh.itcast.cn" target="_blank">
							<span class="s_name">上海</span>
						</a>
					</li>
					<li>
						<a href="http://gz.itcast.cn" target="_blank">
							<span abc="123" class="s_name">广州</span>
						</a>
					</li>
					<ul>
						<li>天津</li>
					</ul>					
				</ul>
			</div>
		</div>
	</div>
 </body>
</html>

使用Dom方式遍历文档

  @Test
    public void testDom() throws Exception {
        // 解析文件件, 获取Document对象
        Document doc = Jsoup.parse(new File("D:\\360MoveData\\Users\\Administrator\\Desktop\\test.html"), "utf8");
        // 1.根据id查询元素getElementById
        Element element = doc.getElementById("city_bj");
        // 2.根据标签获取元素getElementsByTag
        Elements spans = doc.getElementsByTag("span");
        // 3.根据class获取元素getElementsByClass
        Element a = doc.getElementsByClass("class_a class_b").first();
        // 4.根据属性获取元素getElementsByAttribute
        Element abc = doc.getElementsByAttribute("abc").first();
        // 5.根据属性与属性值筛选
        Element href = doc.getElementsByAttributeValue("href", "http://sh.itcast.cn").first();

        // 打印元素内容
        System.out.println("获取到的元素内容是: " + element.text());
        for (Element span : spans) {
            System.out.println(span.text());
        }
        System.out.println(a.text());
        System.out.println("abc.text() = " + abc.text());
        System.out.println("href.text() = " + href.text());
    }

Selector选择器组合使用

@Test
    public void testSelector2() throws Exception {
        // 解析html文件, 获取Document对象
        Document doc = Jsoup.parse(new File("D:\\360MoveData\\Users\\Administrator\\Desktop\\test.html"), "utf8");

        //el#id: 元素+ID,比如: h3#city_bj
        //Element element = doc.select("h3#city_bj").first();

        //el.class: 元素+class,比如: li.class_a
        //Element element = doc.select("li.class_a").first();

        //el[attr]: 元素+属性名,比如: span[abc]
        //Element element = doc.select("span[abc]").first();

        //任意组合: 比如:span[abc].s_name
        Element element = doc.select("span[abc].s_name").first();

        //ancestor child: 查找某个元素下子元素,比如:.city_con li 查找"city_con"下的所有li
        //Elements elements = doc.select(".city_con li");

        //parent > child: 查找某个父元素下的直接子元素,比如:
        // .city_con > ul > li 查找city_con第一级(直接子元素)的ul,再找所有ul下的第一级li
        //Elements elements = doc.select(".city_con > ul > li");

        //parent > *: 查找某个父元素下所有直接子元素
        Elements elements = doc.select(".city_con > ul > *");

        // 打印
        System.out.println("获取到的内容是: " + element.text());
        for (Element ele : elements) {
            System.out.println("遍历的结果: " + ele.text());
        }
    }
posted @ 2022-04-09 16:27  dongye95  阅读(109)  评论(0编辑  收藏  举报