package task;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Crawler {
//1.成员变量
private static String outputfile = "output\\空气质量数据.txt";
private static String sourcefile = "resource\\2020年7月北京空气质量指数查询.htm";
//2.解析本地网页,返回Document
public static Document getHtml() throws IOException {
File f=new File(sourcefile); //创建文件对象
//解析本地文件,第一个参数是文件对象
//第二个参数是网页的字符编码格式
Document doc=Jsoup.parse(f,"gbk");
return doc;
}
//3.提取数据,返回集合
public static ArrayList<EntityBean> getData(Document doc) throws IOException {
//利用selector选择器,提取32行数据
Elements trs=doc.select("#content > div.api_month_list > table > tbody > tr");
ArrayList<EntityBean> list=new ArrayList<EntityBean>();//创建集合对象
for(Element tr : trs) {
//获取tr,每一行的前6列数据
String date=tr.child(0).text();
String airQuality=tr.child(1).text();
String AQI=tr.child(2).text();
String AQIRanking=tr.child(3).text();
String pm2_5=tr.child(4).text();
String pm10=tr.child(5).text();
//每行数据创建一个对应的实例化对象ed
EntityBean eb=new EntityBean(date,airQuality,AQI,AQIRanking,pm2_5,pm10);
//System.out.println(eb.toString());
list.add(eb);//将每一行天气质量数据对象放进集合
}
return list;
}
//4.写出到本地文件系统
public static void wirteToFile(ArrayList<EntityBean> ebs) throws FileNotFoundException {
PrintWriter pw=new PrintWriter(outputfile);//创建字节打印流对象
for(EntityBean eb : ebs) {
pw.println(eb.getDate()+"\t"+eb.getAirQuality()+"\t"+eb.getAQI()+
"\t"+eb.getAQIRanking()+"\t"+eb.getPm2_5()+"\t"+eb.getPm10());
}
pw.close();
}
//5.主方法
public static void main(String[] args) throws IOException {
Document doc = Crawler.getHtml();//解析网页,得到doc对象
System.out.println("网页标题:"+doc.title());
ArrayList<EntityBean> entitys = Crawler.getData(doc); //提取数据
Crawler.wirteToFile(entitys);
}
}
package task;
public class EntityBean {
//成员变量列表
private String date;
private String airQuality;
private String AQI;
private String AQIRanking;
private String pm2_5;
private String pm10;
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public String getAirQuality() {
return airQuality;
}
public void setAirQuality(String airQuality) {
this.airQuality = airQuality;
}
public String getAQI() {
return AQI;
}
public void setAQI(String aQI) {
AQI = aQI;
}
public String getAQIRanking() {
return AQIRanking;
}
public void setAQIRanking(String aQIRanking) {
AQIRanking = aQIRanking;
}
public String getPm2_5() {
return pm2_5;
}
public void setPm2_5(String pm2_5) {
this.pm2_5 = pm2_5;
}
public String getPm10() {
return pm10;
}
public void setPm10(String pm10) {
this.pm10 = pm10;
}
public EntityBean(String date, String airQuality, String aQI, String aQIRanking, String pm2_5, String pm10) {
super();
this.date = date;
this.airQuality = airQuality;
AQI = aQI;
AQIRanking = aQIRanking;
this.pm2_5 = pm2_5;
this.pm10 = pm10;
}
public EntityBean() {
super();
}
@Override
public String toString() {
return "EntityBean [date=" + date + ", airQuality=" + airQuality + ", AQI=" + AQI + ", AQIRanking=" + AQIRanking
+ ", pm2_5=" + pm2_5 + ", pm10=" + pm10 + "]";
}
//无参构造方法
//有参构造方法
//get()方法和set()方法
//重写toString()方法
}