爬虫效果--详细
项目简介:
    此项目为了方便同学们进行爬虫编写。涉及到的知识点有Html,Javascript、JQuery
Echarts,Servlet 3.0,Jsoup,FastJson、IO流
项目概要设计:
   1、包结构的设计 必须满足MVC开发模式---解耦 
1 package com.hyxy.food.entity; 2 public class Food { 3 private int id;//区别不同的商店 4 private String name;//名称 5 private int num;//评论条数 6 public int getId() { 7 return id; 8 } 9 public void setId(int id) { 10 this.id = id; 11 } 12 public String getName() { 13 return name; 14 } 15 public void setName(String name) { 16 this.name = name; 17 } 18 public int getNum() { 19 return num; 20 } 21 public void setNum(int num) { 22 this.num = num; 23 } 24 public Food(int id, String name, int num) { 25 super(); 26 this.id = id; 27 this.name = name; 28 this.num = num; 29 } 30 public Food() { 31 super(); 32 } 33 }
1 package com.hyxy.food.util; 2 3 import java.io.IOException; 4 import java.io.InputStream; 5 import java.sql.Connection; 6 import java.sql.DriverManager; 7 import java.util.Properties; 8 public class ConnectMysqlDB { 9 public static Connection getConnection() { 10 Connection conn=null; 11 Properties p=new Properties(); 12 InputStream in=ConnectMysqlDB.class.getClassLoader().getResourceAsStream("jdbc.properties"); 13 try { 14 p.load(in); 15 String url=p.getProperty("url"); 16 String user=p.getProperty("user"); 17 String password=p.getProperty("pwd"); 18 String driver=p.getProperty("driver"); 19 Class.forName(driver); 20 conn=DriverManager.getConnection(url, user, password); 21 } catch (Exception e) { 22 // TODO Auto-generated catch block 23 e.printStackTrace(); 24 } 25 26 return conn; 27 } 28 public static void main(String[] args) { 29 getConnection(); 30 } 31 }
1 jdbc.properties --->src根目录下 2 driver=com.mysql.jdbc.Driver 3 url=jdbc:mysql://192.168.0.199:3306/spider?useUnicode=true&characterEncoding=utf-8 4 user=1805 5 pwd=123456
1 package com.hyxy.food.dao; 2 import java.sql.Connection; 3 import com.hyxy.food.entity.Food; 4 import com.hyxy.food.util.ConnectMysqlDB; 5 import java.sql.PreparedStatement; 6 import java.sql.ResultSet; 7 import java.sql.SQLException; 8 import java.util.ArrayList; 9 import java.util.List; 10 11 public class FoodDao { 12 private Connection conn; 13 public FoodDao() { 14 if(conn==null) { 15 conn=ConnectMysqlDB.getConnection(); 16 } 17 } 18 public boolean addFood(Food f) { 19 String sql="insert into food(name,num) value(?,?)"; 20 PreparedStatement ps; 21 boolean flag=false; 22 try { 23 ps = conn.prepareStatement(sql); 24 ps.setString(1, f.getName()); 25 ps.setInt(2, f.getNum()); 26 ps.executeUpdate(); 27 flag=true; 28 } catch (SQLException e) { 29 // TODO Auto-generated catch block 30 e.printStackTrace(); 31 } 32 return flag; 33 } 34 public List<Food> list(){ 35 String sql="select name,num from food"; 36 List<Food> list=new ArrayList<Food>(); 37 try { 38 PreparedStatement ps=conn.prepareStatement(sql); 39 ResultSet rs= ps.executeQuery(); 40 while(rs.next()) { 41 Food f=new Food(); 42 f.setName(rs.getString("name")); 43 f.setNum(rs.getInt("num")); 44 list.add(f); 45 } 46 } catch (SQLException e) { 47 // TODO Auto-generated catch block 48 e.printStackTrace(); 49 } 50 return list; 51 } 52 }
1 package com.hyxy.food.controller; 2 3 import java.io.IOException; 4 import javax.servlet.ServletException; 5 import javax.servlet.annotation.WebServlet; 6 import javax.servlet.http.HttpServlet; 7 import javax.servlet.http.HttpServletRequest; 8 import javax.servlet.http.HttpServletResponse; 9 10 import org.jsoup.Jsoup; 11 import org.jsoup.nodes.Document; 12 import org.jsoup.nodes.Element; 13 import org.jsoup.select.Elements; 14 15 import com.hyxy.food.dao.FoodDao; 16 import com.hyxy.food.entity.Food; 17 @WebServlet("/food") 18 public class FoodServlet extends HttpServlet { 19 private static final long serialVersionUID = 1L; 20 public FoodServlet() { 21 super(); 22 // TODO Auto-generated constructor stub 23 } 24 protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { 25 doPost(request, response); 26 } 27 protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { 28 String url=request.getParameter("url");//http://www.mafengwo.cn/cy/10035/0-0-0-0-0- 29 int sum=Integer.parseInt(request.getParameter("sum")); 30 //爬虫开始 31 for (int i = 1; i <= sum; i++) { 32 System.out.println("爬取第"+i+"页数据"); 33 //爬虫第一步,获取一个爬虫的Document对象 34 Document d=Jsoup.connect(url+i+".html").get(); 35 //输出Document对象 36 // System.out.println(d.html()); 37 //第二步观察网页具体信息,爬取想要的信息 38 Elements es= d.select("li[class=item clearfix]"); 39 boolean flag=false; 40 for (Element element : es) { 41 String title=element.select("div[class=title]").select("h3").select("a").first().text(); 42 int num=Integer.parseInt(element.select("div[class=grade]").select("p[class=rev-num]").select("em").text()); 43 Food food=new Food(); 44 food.setName(title); 45 food.setNum(num); 46 FoodDao dao=new FoodDao(); 47 flag = dao.addFood(food); 48 } 49 if(flag) { 50 System.out.println("success!!!"); 51 }else { 52 System.out.println("false!!!"); 53 } 54 // request.getRequestDispatcher("list").forward(request, response); 55 } 56 } 57 58 }
1 package com.hyxy.food.test; 2 3 import java.io.IOException; 4 5 import org.jsoup.Jsoup; 6 import org.jsoup.nodes.Document; 7 import org.jsoup.nodes.Element; 8 import org.jsoup.select.Elements; 9 10 public class FoodTest { 11 public static void main(String[] args) throws IOException { 12 int sum=20; 13 for (int i = 1; i <= sum; i++) { 14 System.out.println("爬取第"+i+"页数据"); 15 String url="http://www.mafengwo.cn/cy/10035/0-0-0-0-0-"+i+".html"; 16 //爬虫第一步,获取一个爬虫的Document对象 17 Document d=Jsoup.connect(url).get(); 18 //输出Document对象 19 // System.out.println(d.html()); 20 //第二步观察网页具体信息,爬取想要的信息 21 Elements es= d.select("li[class=item clearfix]"); 22 for (Element element : es) { 23 String title=element.select("div[class=title]").select("h3").select("a").first().text(); 24 int num=Integer.parseInt(element.select("div[class=grade]").select("p[class=rev-num]").select("em").text()); 25 System.out.println(title+":"+num); 26 } 27 } 28 } 29 }
 
                    
                     
                    
                 
                    
                
 
                
            
         
         浙公网安备 33010602011771号
浙公网安备 33010602011771号