爬取WACV2020年前十火热数据 并对他们进行增删改查的操作
实现使用pycharm进行数据的爬取并且将数据存入到数据库中
import requests from bs4 import BeautifulSoup import re import pymysql url = 'https://openaccess.thecvf.com/WACV2020?day=2020-06-18' response = requests.get(url) obj1 = re.compile(r'<dt class="ptitle"><br>.*?.html">(?P<name>.*?)</a></dt>.*?' r'\[<a href="(?P<pdf>.*?)">pdf</a>].*?' r'author = {(?P<author>.*?)},<br>.*?' r'title = {(?P<title>.*?)},<br>.*?' r'booktitle = {(?P<booktitle>.*?)},<br>', re.S) result = obj1.finditer(response.text) # 连接数据库 conn = pymysql.connect(host='localhost', user='root', password='123456', database='python', charset='utf8', port=3306) # 创建游标对象 cursor = conn.cursor() sql = 'INSERT INTO shuju(`name`, pdf, author, title, booktitle, `date`) values(%s,%s,%s,%s,%s,%s)' for it in result: try: data = [it.group('name'), it.group('pdf'), it.group('author'), it.group('title'), it.group('booktitle'), 20200618] cursor.execute(sql, data) conn.commit() except Exception as e: print(e) response.close() # 关闭游标 cursor.close() # 关闭连接 conn.close() print('over!!!')
然后是java项目中的目录结构

shu.java
package bean; public class shu { int id; String name; String pdf; String author; String title; String booktitle; String date; public int getId() { return id; } public void setId(int id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getPdf() { return pdf; } public void setPdf(String pdf) { this.pdf = pdf; } public String getAuthor() { return author; } public void setAuthor(String author) { this.author = author; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getBooktitle() { return booktitle; } public void setBooktitle(String booktitle) { this.booktitle = booktitle; } public String getDate() { return date; } public void setDate(String date) { this.date = date; } public shu(int id,String name,String pdf,String author,String title,String booktitle,String date) { this.id = id; this.name = name; this.pdf = pdf; this.author = author; this.title = title; this.booktitle = booktitle; this.date = date; } }
dao.java
package dao; import java.sql.Connection; import java.util.ArrayList; import java.util.List; import java.sql.ResultSet; import java.sql.Statement; import dbutil.DBUtil; import bean.*; import java.util.Collections; public class dao { public static List<shu> selectkzhuti(String zhuti,String zuozhe) { String sql = "select * from shuju where name like '%"+zhuti+"%' and author like '%"+zuozhe+"%' "; Connection conn = DBUtil.getConn();//这里就是从DBUtil类里面得到连接 Statement state =null; List<shu> list = new ArrayList<>(); ResultSet rs = null; try { state = conn.createStatement(); rs=state.executeQuery(sql); shu kc=null; while(rs.next()) { int ID=rs.getInt("ID"); String name=rs.getString("name"); String pdf=rs.getString("pdf"); String author=rs.getString("author"); String title=rs.getString("title"); String booktitle=rs.getString("booktitle"); String date=rs.getString("date"); kc =new shu(ID,name,pdf,author,title,booktitle,date); //System.out.println(name+"========="); list.add(kc); } } catch(Exception e) { e.printStackTrace(); } finally { DBUtil.close(rs, state, conn); } return list; } public static List<shu> selectquanbu() { String sql = "select * from shuju "; Connection conn = DBUtil.getConn();//这里就是从DBUtil类里面得到连接 Statement state =null; List<shu> list = new ArrayList<>(); ResultSet rs = null; try { state = conn.createStatement(); rs=state.executeQuery(sql); shu kc=null; while(rs.next()) { int ID=rs.getInt("ID"); String name=rs.getString("name"); String pdf=rs.getString("pdf"); String author=rs.getString("author"); String title=rs.getString("title"); String booktitle=rs.getString("booktitle"); String date=rs.getString("date"); kc =new shu(ID,name,pdf,author,title,booktitle,date); //System.out.println(name+"========="); list.add(kc); } } catch(Exception e) { e.printStackTrace(); } finally { DBUtil.close(rs, state, conn); } return list; } public static List<shu> xiugai1(int id) { String sql = "select * from shuju where id='"+id+"'"; Connection conn = DBUtil.getConn();//这里就是从DBUtil类里面得到连接 Statement state =null; List<shu> list = new ArrayList<>(); ResultSet rs = null; try { state = conn.createStatement(); rs=state.executeQuery(sql); shu kc=null; while(rs.next()) { int ID=rs.getInt("ID"); String name=rs.getString("name"); String pdf=rs.getString("pdf"); String author=rs.getString("author"); String title=rs.getString("title"); String booktitle=rs.getString("booktitle"); String date=rs.getString("date"); kc =new shu(ID,name,pdf,author,title,booktitle,date); //System.out.println(name+"========="); list.add(kc); } } catch(Exception e) { e.printStackTrace(); } finally { DBUtil.close(rs, state, conn); } return list; } public int adduser(int id,String name,String pdf,String author,String title,String booktitle,String date) { Connection conn = DBUtil.getConn();//这里就是从DBUtil类里面得到连接 Statement state =null; ResultSet rs = null; int flag=0; try { String sql = "update shuju set name='"+name+"',pdf ='"+pdf+"',author='"+author+"',title='"+title+"',booktitle='"+booktitle+"',date='"+date+"' where id='"+id+"'"; state = conn.createStatement(); int count = state.executeUpdate(sql); if(count>0) { System.out.println("数据插入成功"); }else { System.out.println("数据插入失败了QAQ"); } } catch(Exception e) { e.printStackTrace(); } finally { DBUtil.close(rs, state, conn); } return flag; } public int add1(String name,String pdf,String author,String title,String booktitle,String date) { Connection conn = DBUtil.getConn();//这里就是从DBUtil类里面得到连接 Statement state =null; ResultSet rs = null; int flag=0; try { String sql = "insert into shuju (name,pdf,author,title,booktitle,date) values('"+name+"','"+pdf+"','"+author+"','"+title+"','"+booktitle+"','"+date+"')"; state = conn.createStatement(); int count = state.executeUpdate(sql); if(count>0) { System.out.println("数据插入成功"); }else { System.out.println("数据插入失败了QAQ"); } } catch(Exception e) { e.printStackTrace(); } finally { DBUtil.close(rs, state, conn); } return flag; } }
DBUtil.java
package dbutil; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; public class DBUtil {//这个类就是套模板如果需要用的时候只需要改数据库(db)的名字即可******* //public static String db_url="jdbc:mysql://localhost:3306/database?uerUnicode=true&characterEncoding=UTF-8"; public static String db_url="jdbc:mysql://localhost:3306/python?serverTimezone=GMT%2B8&serverTimezone=GMT%2B8&useUnicode=true&characterEncoding=UTF-8";//db是数据库的名字 //这是连接数据库,servlet是数据库的名称,uerUnicode=true&characterEncoding=UTF-8是将字符集设置为utf-8,避免乱码。 public static String db_user="root";//数据的用户名 public static String db_password="UJuqCT-az5(f";//数据库的密码 public static Connection getConn()//获取连接,返回Connection类型,必须设置为static这样才能在其他类中使用 { Connection conn=null; try { Class.forName("com.mysql.cj.jdbc.Driver");//加载驱动 conn=DriverManager.getConnection(db_url,db_user,db_password);//连接数据库 } catch(Exception e) { e.printStackTrace(); } return conn; } public static void close(Statement state,Connection conn)//关闭函数 { if(state!=null)//只有状态和连接时,先关闭状态 { try { state.close(); } catch(SQLException e) { e.printStackTrace(); } } if(conn!=null) { try { conn.close(); } catch(SQLException e) { e.printStackTrace(); } } } public static void close(ResultSet rs,Statement state,Connection conn) { if(rs!=null)//有结果集,状态和连接时,先关闭结果集,在关闭状态,在关闭连接 { try { rs.close(); } catch(SQLException e) { e.printStackTrace(); } } if(state!=null) { try { state.close(); } catch(SQLException e) { e.printStackTrace(); } } if(conn!=null) { try { conn.close(); } catch(SQLException e) { e.printStackTrace(); } } } }
Servler.java
package servlet; import java.io.IOException; import java.util.List; import java.io.PrintWriter; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import dao.*; import bean.*; @WebServlet("/r2") public class Servlet extends HttpServlet{ public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, java.io.IOException { return; } protected void service(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { req.setCharacterEncoding("utf-8");// 设置字符集,避免乱码 // 获取jsp界面需要进行的操作, String method = req.getParameter("method"); //System.out.println("============"); if (method.equals("zhuti")) { selectzhuti(req,resp); } if (method.equals("quanbu")) { selectquanbu(req,resp); } if (method.equals("xiugai")) { xiugai1(req,resp); } if (method.equals("add")) { add(req,resp); } if (method.equals("add1")) { add1(req,resp); } } private void selectzhuti(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { req.setCharacterEncoding("utf-8"); String zhuti=req.getParameter("zhuti"); String zuozhe=req.getParameter("zuozhe"); // System.out.println(req.getParameter("luxian")+"========="); System .out.println(zhuti+"222"); System .out.println(zuozhe+"3333"); List<shu> kc=dao.selectkzhuti(zhuti,zuozhe); req.setAttribute("kc", kc); req.getRequestDispatcher("chaxun.jsp").forward(req, resp); } private void selectquanbu(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { req.setCharacterEncoding("utf-8"); List<shu> kc=dao.selectquanbu(); req.setAttribute("kc", kc); req.getRequestDispatcher("chaxun.jsp").forward(req, resp); } private void xiugai1(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { req.setCharacterEncoding("utf-8"); int id=Integer.valueOf(req.getParameter("id")); List<shu> kc=dao.xiugai1(id); req.setAttribute("kc", kc); req.getRequestDispatcher("xiugai.jsp").forward(req, resp); } private void add(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { req.setCharacterEncoding("utf-8"); resp.setContentType("text/html;charset=utf-8"); int id=Integer.valueOf(req.getParameter("id")); String name=req.getParameter("name"); String pdf = req.getParameter("pdf"); String author=req.getParameter("author"); String title=req.getParameter("title"); String booktitle = req.getParameter("booktitle"); String date=req.getParameter("date"); dao userdao = new dao(); userdao.adduser(id,name,pdf,author,title,booktitle,date); req.getRequestDispatcher("jiemian.jsp").forward(req,resp); } private void add1(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { req.setCharacterEncoding("utf-8"); resp.setContentType("text/html;charset=utf-8"); String name=req.getParameter("name"); String pdf = req.getParameter("pdf"); String author=req.getParameter("author"); String title=req.getParameter("title"); String booktitle = req.getParameter("booktitle"); String date=req.getParameter("date"); dao userdao = new dao(); userdao.add1(name,pdf,author,title,booktitle,date); req.getRequestDispatcher("jiemian.jsp").forward(req,resp); } }
套用一下layui
<%@page import="bean.shu"%> <%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>Insert title here</title> <link rel="stylesheet" href="layui/css/layui.css"> <script src="layui/layui.js"></script> <style> .body{ background:url(img/1.png)no-repeat; background-size:100%; } .zhuti{ text-align:center; margin:100px; width:888px; } </style> </head> <body class="body"> <div class="zhuti"> <table class="layui-table" layui-data= "{skin: even:true, } "> <tr> <th>ID</th> <th>名字</th> <th>PDF链接</th> <th>作者</th> <th>题目</th> <th>书题目</th> <th>日期</th> </tr> <c:forEach items="${kc}" var="shu"> <tr> <td><c:out value="${shu.id}"></c:out></td> <td><c:out value="${shu.name}"></c:out></td> <td><c:out value="${shu.pdf}"></c:out></td> <td><c:out value="${shu.author}"></c:out></td> <td><c:out value="${shu.title}"></c:out></td> <td><c:out value="${shu.booktitle}"></c:out></td> <td><c:out value="${shu.date}"></c:out></td> <td> <form action="/pachong/r2?method=xiugai" method="post" aligen="center"> <input type="hidden" name="id" value="${shu.id}"> <input type="submit" value="修改" class="layui-btn layui-btn-warm layui-btn-radius"> </form> </td> </tr> </c:forEach> </table></div> </body> </html>
jiamian.jsp
<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>Insert title here</title> <link rel="stylesheet" href="layui/css/layui.css"> <script src="layui/layui.js"></script> <style> .body{ background:url(img/1.png)no-repeat; background-size:100%; } .zhuti{ text-align:center; margin:100px; width:888px; } </style> </head> <body class="body"> <div class="zhuti"> <tabl> <form action="/pachong/r2?method=zhuti" method="post" aligen="center"> <tr colspan="4"> <br> <h3>主题</h3> <input type="text" name="zhuti" class="layui-input" > </tr> <tr colspan="4"> <br> <br> <h3>作者</h3> <input type="text" name="zuozhe" class="layui-input"> </tr> <tr> <td> <input type="submit" vlaue="提交" class="layui-btn layui-btn-warm layui-btn-radius"> </td> <td> <input type="reset" vlaue="重置" class="layui-btn layui-btn-warm layui-btn-radius"> </td> <td> <a herf="action="/pachong/r2?method=quanbu"><button class="layui-btn layui-btn-warm layui-btn-radius">查询全部文献</button></a> </td> <td> <a href="tianjia.jsp" class="layui-btn layui-btn-warm layui-btn-radius">添加文献</a> </td> </tr> </form> </table> </div> </body> </html>
tianjia.jsp
<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>Insert title here</title> <style> .body{ background:url(img/1.png)no-repeat; background-size:100%; .zhuti{ text-align:center; margin:100px; width:888px; } } </style> </head> <link rel="stylesheet" href="layui/css/layui.css"> <script src="layui/layui.js"></script> <body class="body"> <div class="zhuti"> <form action="/pachong/r2?method=add1" method="post" aligen="center"> <br> <div style="text-align:center; "><h3>名字</h3> </div> <input type="text" name="name" class="layui-input" > <br> <div style="text-align:center; "><h3>PDF链接</h3> </div> <input type="text" name="pdf" class="layui-input" > <br> <div style="text-align:center; "><h3>作者: </h3> </div> <input type="text" name="author" class="layui-input" > <br> <div style="text-align:center; "><h3>题目 </h3> </div> <input type="text" name="title" class="layui-input" > <br> <div style="text-align:center; "><h3>书题目 </h3> </div> <input type="text" name="booktitle" class="layui-input" > <br> <div style="text-align:center; "><h3>日期 </h3> </div> <input type="text" name="date" class="layui-input" > <br> <br> <br> <div style="text-align:center; "> <input type="submit" value="提交" class="layui-btn layui-btn-warm layui-btn-radius"> <input type="reset" value="重置" class="layui-btn layui-btn-warm layui-btn-radius"> </div> </form></div> </body> </html>
xiugai.jsp
<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%> <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>Insert title here</title> <link rel="stylesheet" href="layui/css/layui.css"> <script src="layui/layui.js"></script> <style> .body{ background:url(img/1.png)no-repeat; background-size:100%; } .zhuti{ text-align:center; margin:100px; width:888px; } </style> </head> <body class="body"> <div class="zhuti"> <form action="/pachong/r2?method=add" method="post" aligen="center"> <c:forEach items="${kc}" var="shu"> <input type="hidden" name="id" class="layui-input" value="${shu.id}" placeholder="${shu.id}" > 名字: <input type="text" name="name" class="layui-input" value="${shu.name}" placeholder="${shu.name}"> PDF链接: <input type="text" name="pdf" class="layui-input" value="${shu.pdf}" placeholder="${shu.pdf}"> 作者: <input type="text" name="author" class="layui-input" value="${shu.author}" placeholder="${shu.author}"> 题目: <input type="text" name="title" class="layui-input" value="${shu.title}" placeholder="${shu.title}"> 书题目: <input type="text" name="booktitle" class="layui-input" value="${shu.booktitle}" placeholder="${shu.booktitle}"> 日期: <input type="text" name="date" class="layui-input" value="${shu.date}" placeholder="${shu.date}"> <input type="submit" value="提交" class="layui-btn layui-btn-warm layui-btn-radius"> </c:forEach> </form></div> </body> </html>

浙公网安备 33010602011771号