2-1-13

package task;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Crawler {
    
    //1.成员变量
    private static String outputfile = "output\\北京景点数据.txt";
    private static String sourcefile = "resource\\北京景点.htm";
    
    
    //2.解析本地网页,返回Document
    public static Document getHtml() throws IOException {
        File f=new File(sourcefile);  //创建文件对象
        //解析本地文件,第一个参数是文件对象
        //第二个参数是网页的字符编码格式
        Document doc=Jsoup.parse(f,"utf-8");  
        return doc;
    }
    
    //3.提取数据,返回集合
    public static ArrayList<EntityBean> getData(Document doc) throws IOException {
        Elements elements=doc.select("#search-list > div > div");
        ArrayList<EntityBean> list=new ArrayList<EntityBean>();//创建集合对象
        for(Element e :elements ) {  //10部电影信息
            String name=e.select("div.sight_item_about > h3 > a").text();
            String jqdj=e.select("div.sight_item_about > div > div.clrfix > span.level").text();
            String dz=e.select("div.sight_item_about > div > p > span").text();
            String jg=e.select("div.sight_item_pop > table > tbody > tr:nth-child(1) > td").text();
            String yxl=e.select("div.sight_item_pop > table > tbody > tr:nth-child(4) > td").text();
            //System.out.println(name+" "+jqdj+" "+dz+" "+jg+" "+yxl);
            EntityBean eb=new EntityBean(name,jqdj,dz,jg,yxl);
            list.add(eb);
        }
        return list;
    }
    
    //4.写出到本地文件系统
    public static void wirteToFile(ArrayList<EntityBean>  list) throws FileNotFoundException {
        PrintWriter pw=new PrintWriter(outputfile);//创建字节打印流对象
        for(EntityBean eb : list) {  //10
            pw.println(eb.getName()+"\t"+eb.getJqdj()+"\t"+eb.getDz()+
                    "\t"+eb.getJg()+"\t"+eb.getYxl());
        }
        pw.close();
    }
    //5.主方法
    public static void main(String[] args) throws IOException {
        Document doc = Crawler.getHtml();//解析网页,得到doc对象
        System.out.println("网页标题:"+doc.title());
        ArrayList<EntityBean> entitys = Crawler.getData(doc);  //提取数据
        Crawler.wirteToFile(entitys);
    }
}
package task;

public class EntityBean {
    //成员变量列表
    private String name;
    private String jqdj;
    private String dz;
    private String jg;
    private String yxl;
    
    //get()方法和set()方法
    public String getName() {
        return name;
    }
    public void setName(String name) {
        this.name = name;
    }
    public String getJqdj() {
        return jqdj;
    }
    public void setJqdj(String jqdj) {
        this.jqdj = jqdj;
    }
    public String getDz() {
        return dz;
    }
    public void setDz(String dz) {
        this.dz = dz;
    }
    public String getJg() {
        return jg;
    }
    public void setJg(String jg) {
        this.jg = jg;
    }
    public String getYxl() {
        return yxl;
    }
    public void setYxl(String yxl) {
        this.yxl = yxl;
    }
    
    //有参构造方法
    public EntityBean(String name, String jqdj, String dz, String jg, String yxl) {
        super();
        this.name = name;
        this.jqdj = jqdj;
        this.dz = dz;
        this.jg = jg;
        this.yxl = yxl;
    }
    
    //无参构造方法
    public EntityBean() {
        super();
    }
    
    //重写tiString()方法
    @Override
    public String toString() {
        return "EntityBean [name=" + name + ", jqdj=" + jqdj + ", dz=" + dz + ", jg=" + jg + ", yxl=" + yxl + "]";
    }
}

 

posted @ 2022-09-13 15:05  aq阿桂  阅读(33)  评论(0)    收藏  举报