通过Jsoup,爬取车辆品牌,车系,LOGO等

    @Test
    public void test4() throws IOException {
        for (int i = 65; i <= 90; i++) {
            String value = String.valueOf((char) i);

            FileUtil.mkdir("e://pinpai//" + value);

            System.out.println("***********************" + value);
            String url = "https://www.autohome.com.cn/grade/carhtml/" + value + ".html";
            Document document = Jsoup.parse(new URL(url), 300000);
            document.getElementsByTag("dl").stream().forEach(element -> {
                String imgUrl = "https:" + element.getElementsByTag("dt").get(0).getElementsByTag("img").get(0).attr("src");
                String mainBrand = element.getElementsByTag("dt").get(0).getElementsByTag("a").get(1).text();

                //图片LOG
                System.out.println(imgUrl);
                //System.out.println(HttpUtil.downloadFile(imgUrl, FileUtil.file("e://pinpai//" + value)));
                //主品牌
                System.out.println(mainBrand);
                //子品牌
                element.getElementsByTag("dd").get(0).getElementsByClass("h3-tit").tagName("a").stream().forEach(element1 -> {
                    System.out.println(element1.text());
                    //车系
                    List<String> seriesBrand = element1.nextElementSibling().getElementsByTag("h4").stream().map(Element::text).collect(Collectors.toList());
                    System.out.println();

                    //vehicleBrandService.save(VehicleBrand.builder().flag(value).logoBrand(imgUrl).mainBrand(mainBrand).subBrands(element1.text()).seriesBrand(JSON.toJSONString(seriesBrand)).build());
                });
                System.out.println("-----------");
            });
        }
    }

 

posted @ 2020-11-25 10:30  SweetBaby。  阅读(146)  评论(0编辑  收藏  举报