2-1-1

package task;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Crawler {
    
    //1.成员变量
    private static String outputfile = "output\\空气质量数据.txt";
    private static String sourcefile = "resource\\2020年7月北京空气质量指数查询.htm";
    
    
    //2.解析本地网页,返回Document
    public static Document getHtml() throws IOException {
        File f=new File(sourcefile);  //创建文件对象
        //解析本地文件,第一个参数是文件对象
        //第二个参数是网页的字符编码格式
        Document doc=Jsoup.parse(f,"gbk");  
        return doc;
    }
    
    //3.提取数据,返回集合
    public static ArrayList<EntityBean> getData(Document doc) throws IOException {
        //利用selector选择器,提取32行数据
        Elements trs=doc.select("#content > div.api_month_list > table > tbody > tr");
        ArrayList<EntityBean> list=new ArrayList<EntityBean>();//创建集合对象
        for(Element tr : trs) {
            //获取tr,每一行的前6列数据
            String date=tr.child(0).text();
            String airQuality=tr.child(1).text();
            String AQI=tr.child(2).text();
            String AQIRanking=tr.child(3).text();
            String pm2_5=tr.child(4).text();
            String pm10=tr.child(5).text();
            //每行数据创建一个对应的实例化对象ed
            EntityBean eb=new EntityBean(date,airQuality,AQI,AQIRanking,pm2_5,pm10);
            //System.out.println(eb.toString());
            list.add(eb);//将每一行天气质量数据对象放进集合
        }
        return list;
    }
    
    //4.写出到本地文件系统
    public static void wirteToFile(ArrayList<EntityBean>  ebs) throws FileNotFoundException {
        PrintWriter pw=new PrintWriter(outputfile);//创建字节打印流对象
        for(EntityBean eb : ebs) {
            pw.println(eb.getDate()+"\t"+eb.getAirQuality()+"\t"+eb.getAQI()+
                    "\t"+eb.getAQIRanking()+"\t"+eb.getPm2_5()+"\t"+eb.getPm10());
        }
        pw.close();
    }
    //5.主方法
    public static void main(String[] args) throws IOException {
        Document doc = Crawler.getHtml();//解析网页,得到doc对象
        System.out.println("网页标题:"+doc.title());
        ArrayList<EntityBean> entitys = Crawler.getData(doc);  //提取数据
        Crawler.wirteToFile(entitys);
    }
    
}
package task;

public class EntityBean {
    //成员变量列表
    private String date;
    private String airQuality;
    private String AQI;
    private String AQIRanking;
    private String pm2_5;
    private String pm10;
    public String getDate() {
        return date;
    }
    public void setDate(String date) {
        this.date = date;
    }
    public String getAirQuality() {
        return airQuality;
    }
    public void setAirQuality(String airQuality) {
        this.airQuality = airQuality;
    }
    public String getAQI() {
        return AQI;
    }
    public void setAQI(String aQI) {
        AQI = aQI;
    }
    public String getAQIRanking() {
        return AQIRanking;
    }
    public void setAQIRanking(String aQIRanking) {
        AQIRanking = aQIRanking;
    }
    public String getPm2_5() {
        return pm2_5;
    }
    public void setPm2_5(String pm2_5) {
        this.pm2_5 = pm2_5;
    }
    public String getPm10() {
        return pm10;
    }
    public void setPm10(String pm10) {
        this.pm10 = pm10;
    }
    public EntityBean(String date, String airQuality, String aQI, String aQIRanking, String pm2_5, String pm10) {
        super();
        this.date = date;
        this.airQuality = airQuality;
        AQI = aQI;
        AQIRanking = aQIRanking;
        this.pm2_5 = pm2_5;
        this.pm10 = pm10;
    }
    public EntityBean() {
        super();
    }
    @Override
    public String toString() {
        return "EntityBean [date=" + date + ", airQuality=" + airQuality + ", AQI=" + AQI + ", AQIRanking=" + AQIRanking
                + ", pm2_5=" + pm2_5 + ", pm10=" + pm10 + "]";
    }
    
    //无参构造方法
    //有参构造方法
    
    //get()方法和set()方法
    
    //重写toString()方法
}

 

posted @ 2022-09-13 14:41  aq阿桂  阅读(87)  评论(0)    收藏  举报