Jsoup爬取JD工具类
导入所需依赖:
<dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.10.2</version> </dependency>
pojo层:
package com.lzyuan.pojo; /** * Encoding:utf-8 * Author: liuzheyuan * Date: 2020-09-24 16:58 * Version:1.0 * Description: */ public class Content { private String title;//描述 private String img;//图片 private String price;//价格 private String scroll;//缩略图 private String stock;//发货地 public Content() { } public Content(String title, String img, String price, String scroll, String stock) { this.title = title; this.img = img; this.price = price; this.scroll = scroll; this.stock = stock; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getImg() { return img; } public void setImg(String img) { this.img = img; } public String getPrice() { return price; } public void setPrice(String price) { this.price = price; } public String getScroll() { return scroll; } public void setScroll(String scroll) { this.scroll = scroll; } public String getStock() { return stock; } public void setStock(String stock) { this.stock = stock; } @Override public String toString() { return "Content{" + "title='" + title + '\'' + ", img='" + img + '\'' + ", price='" + price + '\'' + ", scroll='" + scroll + '\'' + ", stock='" + stock + '\'' + '}'; } }
utile包:
package com.lzyuan.util; import com.lzyuan.pojo.Content; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.List; /** * Encoding:utf-8 * Author: liuzheyuan * Date: 2020-09-23 11:30 * Version:1.0 * Description: */ public class GetPic { /** * @author liuzheyuan * Description 主方法 */ public static void main(String[] args) throws Exception { List<Content> java = new GetPic().parmJD("apple"); for (Content content : java) { System.out.println(content); } } public List<Content> parmJD(String keyword) throws Exception { String url="https://search.jd.com/Search?keyword="+keyword; Document document = Jsoup.parse(new URL(url), 3000);//3000毫秒响应 //js代码 Element element = document.getElementById("J_goodsList");//最大的div //System.out.println(list); //拿到所有的li Elements lis = element.getElementsByTag("li"); ArrayList<Content> contentList = new ArrayList<>(); // System.out.println(lis); //继续拿里面信息 for (Element li : lis) { //System.out.println(li); //source-data-lazy-img String img = li.getElementsByTag("img").eq(0).attr("data-lazy-img");//图 String price = li.getElementsByClass("p-price").eq(0).text(); String title = li.getElementsByClass("p-name").eq(0).text(); Content content = new Content(); content.setTitle(title); content.setImg(img); content.setPrice(price); contentList.add(content); } return contentList; }

浙公网安备 33010602011771号