package task;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Crawler {
//1.成员变量
private static String outputfile = "output\\北京景点数据.txt";
private static String sourcefile = "resource\\北京景点.htm";
//2.解析本地网页,返回Document
public static Document getHtml() throws IOException {
File f=new File(sourcefile); //创建文件对象
//解析本地文件,第一个参数是文件对象
//第二个参数是网页的字符编码格式
Document doc=Jsoup.parse(f,"utf-8");
return doc;
}
//3.提取数据,返回集合
public static ArrayList<EntityBean> getData(Document doc) throws IOException {
Elements elements=doc.select("#search-list > div > div");
ArrayList<EntityBean> list=new ArrayList<EntityBean>();//创建集合对象
for(Element e :elements ) { //10部电影信息
String name=e.select("div.sight_item_about > h3 > a").text();
String jqdj=e.select("div.sight_item_about > div > div.clrfix > span.level").text();
String dz=e.select("div.sight_item_about > div > p > span").text();
String jg=e.select("div.sight_item_pop > table > tbody > tr:nth-child(1) > td").text();
String yxl=e.select("div.sight_item_pop > table > tbody > tr:nth-child(4) > td").text();
//System.out.println(name+" "+jqdj+" "+dz+" "+jg+" "+yxl);
EntityBean eb=new EntityBean(name,jqdj,dz,jg,yxl);
list.add(eb);
}
return list;
}
//4.写出到本地文件系统
public static void wirteToFile(ArrayList<EntityBean> list) throws FileNotFoundException {
PrintWriter pw=new PrintWriter(outputfile);//创建字节打印流对象
for(EntityBean eb : list) { //10
pw.println(eb.getName()+"\t"+eb.getJqdj()+"\t"+eb.getDz()+
"\t"+eb.getJg()+"\t"+eb.getYxl());
}
pw.close();
}
//5.主方法
public static void main(String[] args) throws IOException {
Document doc = Crawler.getHtml();//解析网页,得到doc对象
System.out.println("网页标题:"+doc.title());
ArrayList<EntityBean> entitys = Crawler.getData(doc); //提取数据
Crawler.wirteToFile(entitys);
}
}
package task;
public class EntityBean {
//成员变量列表
private String name;
private String jqdj;
private String dz;
private String jg;
private String yxl;
//get()方法和set()方法
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getJqdj() {
return jqdj;
}
public void setJqdj(String jqdj) {
this.jqdj = jqdj;
}
public String getDz() {
return dz;
}
public void setDz(String dz) {
this.dz = dz;
}
public String getJg() {
return jg;
}
public void setJg(String jg) {
this.jg = jg;
}
public String getYxl() {
return yxl;
}
public void setYxl(String yxl) {
this.yxl = yxl;
}
//有参构造方法
public EntityBean(String name, String jqdj, String dz, String jg, String yxl) {
super();
this.name = name;
this.jqdj = jqdj;
this.dz = dz;
this.jg = jg;
this.yxl = yxl;
}
//无参构造方法
public EntityBean() {
super();
}
//重写tiString()方法
@Override
public String toString() {
return "EntityBean [name=" + name + ", jqdj=" + jqdj + ", dz=" + dz + ", jg=" + jg + ", yxl=" + yxl + "]";
}
}