爬虫作业

今天建民哥让我们自学爬虫,并完成对论文的爬取,如下

其实一开始在课上自己看并没有看的很明白也一直迷迷糊糊没搞懂

后来找张可欣帮忙,懂了怎么使用pc去爬取,然后做了该系统,学完了发现也挺简单嘿嘿

eclipse代码如下:

bean包

bean.java

package bean;

public class bean {
    private  String zuozhe;
    private String  title;
    private String date;

    public bean(String zuozhe, String title, String date) {
        this.zuozhe = zuozhe;
        this.title = title;
        this.date = date;
    }

  

    public String getZuozhe() {
        return zuozhe;
    }

    public void setZuozhe(String zuozhe) {
        this.zuozhe = zuozhe;
    }

    public String getTitle() {
        return title;
    }

    public void setTitle(String title) {
        this.title = title;
    }

    public String getDate() {
        return date;
    }

    public void setDate(String date) {
        this.date = date;
    }

    @Override
    public String toString() {
        return "bean{" +
                "zuozhe='" + zuozhe + '\'' +
                ", title='" + title + '\'' +
                ", date='" + date + '\'' +
                '}';
    }
}

result.java

package bean;

public class result {
    private  String name;
    private String  title;
    private String date;
    private String author;
    private String booktitle;

    public result(String name, String title, String date, String author, String booktitle) {
        this.name = name;
        this.title = title;
        this.date = date;
        this.author = author;
        this.booktitle = booktitle;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getTitle() {
        return title;
    }

    public void setTitle(String title) {
        this.title = title;
    }

    public String getDate() {
        return date;
    }

    public void setDate(String date) {
        this.date = date;
    }

    public String getAuthor() {
        return author;
    }

    public void setAuthor(String author) {
        this.author = author;
    }

    public String getBooktitle() {
        return booktitle;
    }

    public void setBooktitle(String booktitle) {
        this.booktitle = booktitle;

dao包

package dao;

import bean.*;
import dbutil.dbutil;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

import static dbutil.dbutil.close;

public class dao {
    public  static List<result> show(String name)  {
        Connection con = null;
        PreparedStatement ptst=null;
        List<result> re=new ArrayList<>();
        try {
            con = dbutil.getconn();
        } catch (SQLException e) {
            e.printStackTrace();
        }
        //  System.out.println(s);
        if( con!=null ){

            // System.out.println("succeed");

            try {
                String sql= "select name,author,title,booktitle,date from cvpr where name =?";
                ptst = con.prepareStatement(sql);
                // System.out.println(ptst);
                ptst.setString(1,name);
                ResultSet rs=ptst.executeQuery();

                while (rs.next())
                {
                    String name1=rs.getString(1);
                    String anthor=rs.getString(2);
                    String title=rs.getString(3);
                    String booktitle=rs.getString(4);
                    String date=rs.getString(5);


                   result re1=new result(name1,title,date,anthor,booktitle);
                    //System.out.println(s1);
                    re.add(re1);
                }
                System.out.println(re);
            } catch (SQLException e) {
                e.printStackTrace();
            }
            finally {
                close(ptst,con);
            }



        }
        return  re;
    }
    
    
    public  static List<result> show1(String author)  {
        Connection con = null;
        PreparedStatement ptst=null;
        List<result> re=new ArrayList<>();
        try {
            con = dbutil.getconn();
        } catch (SQLException e) {
            e.printStackTrace();
        }
        //  System.out.println(s);
        if( con!=null ){

            // System.out.println("succeed");

            try {
                String sql= "select name,author,title,booktitle,date from cvpr where author =?";
                ptst = con.prepareStatement(sql);
                // System.out.println(ptst);
                ptst.setString(1,author);
                ResultSet rs=ptst.executeQuery();

                while (rs.next())
                {
                    String name1=rs.getString(1);
                    String anthor=rs.getString(2);
                    String title=rs.getString(3);
                    String booktitle=rs.getString(4);
                    String date=rs.getString(5);


                   result re1=new result(name1,title,date,anthor,booktitle);
                    //System.out.println(s1);
                    re.add(re1);
                }
                System.out.println(re);
            } catch (SQLException e) {
                e.printStackTrace();
            }
            finally {
                close(ptst,con);
            }



        }
        return  re;
    }



dbutil包

package dbutil;

import java.sql.*;


public class dbutil {
    private static String URL="jdbc:mysql://localhost:3306/pachong?useSSL=true&useUnicode=true&characterEncoding=UTF-8";
    //取得驱动程序
    private static String DRIVER="com.mysql.jdbc.Driver";
    //取得用户
    private static String USER="root";
    //登录密码
    private static String PASSWORD="LCRZKA";
    //静态代码块加载驱动类信息
    static {
        try {
            Class.forName(DRIVER);//将"com.mysql.jdbc.Driver"类的Class类对象加载到运行时内存中
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
    }
    public static  Connection  getconn() throws SQLException {
        //1-注册驱动器,驱动管理器类加载SQLServerDriver类的静态方法,如果没有添加这个驱动,则创建这个驱动
//        Connection con=null;
//        try {
//            Class.forName("com.microsoft.sqlserver.jdbc.SQLServerDriver");
//            con=DriverManager.getConnection("jdbc:sqlserver://localhost:1433;database=students;encrypt=false", "sa", "ddjsdd");
//        } catch (ClassNotFoundException e) {
//            e.printStackTrace();
//        }
//        //会话连接
        Connection conn = null;
        try {
            conn = DriverManager.getConnection(URL, USER, PASSWORD);
        } catch (SQLException e) {
            e.printStackTrace();
        }

        return conn;
    }

//    public static void main(String[] args) throws SQLException {
//        Connection con=dbutil.getconn();
//        System.out.println(con);
//        close(con);
//    }

//            //3-创建一个Statement对象,用于将SQL语句发送到数据库
//            stmt = con.createStatement();
//
//            //4- SQL语句
//            String SQL = "SELECT  * FROM Student";
//
//            //5-执行SQL,返回数据
//            rs = stmt.executeQuery(SQL);
//
//            //6-遍历
//            while (rs.next()) {
//
//                System.out.println(rs.getString(1) + "," + rs.getString(2).trim()+"," + rs.getString(3));
//            }
public static void close(Connection con) {
    if(con!=null)
        try {
            con.close();
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

}
    public static void close (PreparedStatement preparedState, Connection conn) {
        if (preparedState != null) {
            try {
                preparedState.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }

        if (conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }

    public static void close (ResultSet rs, PreparedStatement preparedState, Connection conn) {
        if (rs != null) {
            try {
                rs.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }

        if (preparedState != null) {
            try {
                preparedState.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }

        if (conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 关闭连接
     * @param state
     * @param conn
     */
    public static void close (Statement state, Connection conn) {
        if (state != null) {
            try {
                state.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }

        if (conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }

    public static void close (ResultSet rs, Statement state, Connection conn) {
        if (rs != null) {
            try {
                rs.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }

        if (state != null) {
            try {
                state.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }

        if (conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }


}

servlet包

package servlet;

import java.io.IOException;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import dao.dao;
import bean.*;



@SuppressWarnings("serial")
@WebServlet("/servelt")
public class servlet extends HttpServlet {
    protected void service(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
        req.setCharacterEncoding("utf-8");
        String method = req.getParameter("method");
        if("chaxun".equals(method)) {
            chaxun(req, resp);
        }

    }

    private static   void chaxun(HttpServletRequest req, HttpServletResponse resp) throws IOException, ServletException {
        req.setCharacterEncoding("utf-8");
        String name=req.getParameter("name");
        String author=req.getParameter("zuozhe");
        String date =req.getParameter("date");
        System.out.println(date);
        bean be=new bean(author,name,date);
        System.out.println(be);

        List<result> re= dao.show(name);
       req.setAttribute("rs",re);
        req.getRequestDispatcher("2.jsp").forward(req, resp);


   

servlet1

package servlet;

import java.io.IOException;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import dao.dao;
import bean.*;



@SuppressWarnings("serial")
@WebServlet("/servelt1")
public class servlet1 extends HttpServlet {
    protected void service(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
        req.setCharacterEncoding("utf-8");
        String method = req.getParameter("method");
        if("chaxun".equals(method)) {
            chaxun(req, resp);
        }

    }

    private static   void chaxun(HttpServletRequest req, HttpServletResponse resp) throws IOException, ServletException {
        req.setCharacterEncoding("utf-8");
        String name=req.getParameter("name");
        String author=req.getParameter("zuozhe");
        String date =req.getParameter("date");
        System.out.println(date);
        bean be=new bean(author,name,date);
        System.out.println(be);

        List<result> re= dao.show1(author);
       req.setAttribute("rs",re);
        req.getRequestDispatcher("result.jsp").forward(req, resp);


    }
}

index.jsp

<%--
  Created by IntelliJ IDEA.
  User: 34723
  Date: 2022/5/17
  Time: 12:51
  To change this template use File | Settings | File Templates.
--%>
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<html>
  <head>
    <title>查询界面</title>
  </head>
   <style>
        body {
            background: url('image/88.jpg') no-repeat;
            background-size: 100% auto;
        }
        
        #login_box {
            width: 75%;
            height: 400px;
            background-color: #00000060;
            margin: auto;
            margin-top: 10%;
            text-align: center;
            border-radius: 10px;
            padding: 50px 50px;
        }
        
        #login_box input,
        #login_box button {
            outline: none;
        }
        
        #login_box h1 {
            color: #ffffff90;
              margin-top: 3%;
           font-size:300%;
            font-family:"宋体";
        }
        
        
        #login_box #form #input_box {
            margin-top: 5%;
        }
        
       
        #login_box #sign_up {
            margin-top: 8%;
        }
        
        #login_box #sign_up a {
            color: #FFFFFF;
        }
        #Login_box.aaa{
        width: 20%;
   height: 50px;
   font-size: 14px;
   text-align: center;
   line-height: 50px;
        
        }
        
    </style>
  
  
  <body>
  <div id="login_box">
  <form action="servelt?method=chaxun" method="post">
     <h1>查询界面</h1>
      
      <div class="aaa"> 请选择论文题目:</div> 
        
       

       <div class="aaa">
       <select name="name">

         <option value="IDA-3D: Instance-Depth-Aware 3D Object Detection From Stereo Vision for Autonomous Driving" >IDA-3D: Instance-Depth-Aware 3D Object Detection From Stereo Vision for Autonomous Driving</option>
         <option value="FroDO: From Detections to 3D Objects" >FroDO: From Detections to 3D Objects</option>
         <option value="KeypointNet: A Large-Scale 3D Keypoint Dataset Aggregated From Numerous Human Annotations" >KeypointNet: A Large-Scale 3D Keypoint Dataset Aggregated From Numerous Human Annotations</option>
         <option value="Bridging the Gap Between Anchor-Based and Anchor-Free Detection via Adaptive Training Sample Selection" >Bridging the Gap Between Anchor-Based and Anchor-Free Detection via Adaptive Training Sample Selection</option>
         <option value="Structure Aware Single-Stage 3D Object Detection From Point Cloud" >Structure Aware Single-Stage 3D Object Detection From Point Cloud/option>

       </select>
       </div>
       <br>
<div class="bbb"> 请选择论文作者:</div>
     <div class="bbb"><select name="zuozhe">
          <option value="Peng, Wanli and Pan, Hao and Liu, He and Sun, Yi" >Peng, Wanli and Pan, Hao and Liu, He and Sun, Yi</option>
        
          <option value="Xiong, Peixi and Wu, Ying">Xiong, Peixi and Wu, Ying</option>
          <option value="Zhang, Shifeng and Chi, Cheng and Yao, Yongqiang and Lei, Zhen and Li, Stan Z." >Zhang, Shifeng and Chi, Cheng and Yao, Yongqiang and Lei, Zhen and Li, Stan Z.</option>
          <option value="He, Chenhang and Zeng, Hui and Huang, Jianqiang and Hua, Xian-Sheng and Zhang, Lei" >He, Chenhang and Zeng, Hui and Huang, Jianqiang and Hua, Xian-Sheng and Zhang, Lei</option>

        </select>
     </div>
       
     
        
    </table>
<br>
    <input type="submit" value="提交查询">



  </form>
  </div>
  
  </body>
</html>

result.jsp

<%@ page import="bean.result" %>
<%@ page import="java.util.List" %><%--
  Created by IntelliJ IDEA.
  User: 34723
  Date: 2022/5/16
  Time: 12:05
  To change this template use File | Settings | File Templates.
--%>
<html>
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<head>
    <title>查询结果</title>
</head>
<style type="text/css">
    body{
         background: url('image/88.jpg') no-repeat;
        /*兼容浏览器版本*/
        -webkit-background-size: cover;
        -o-background-size: cover;
         background-size: 100% auto;
         }
.bottom-list .t-b-1{
   border: 1px solid #f0f0f0;
   width: 20%;
   height: 50px;
   font-size: 14px;
   text-align: center;
   line-height: 50px;
}
 .bottom-list .t-b-2{
   border: 1px solid #f0f0f0;
   width: 20%;
   height: 300px;
   text-align: center;
   line-height: 50px;
}
 
 #login_box {
            width: 75%;
            height: 400px;
            background-color: #00000060;
            margin: auto;
            margin-top: 10%;
            text-align: center;
            border-radius: 10px;
            padding: 50px 50px;
        }
        
        #login_box input,
        #login_box button {
            outline: none;
        }
        
        #login_box h1 {
            color: #ffffff90;
              margin-top: 3%;
           font-size:300%;
            font-family:"宋体";
        }
        
        
        #login_box #form #input_box {
            margin-top: 5%;
        }
        
       
        #login_box #sign_up {
            margin-top: 8%;
        }
        
        #login_box #sign_up a {
            color: #FFFFFF;
        }
  
        
</style>
<body>
 <div id="login_box">
<%List<result> list = (List<result>) request.getAttribute("rs");%>

<div class="bottom-list">
    <div style=" background:#f9f9f9;display: flex;justify-content: center;">
        <div class="t-b-1"> 编号</div>
        <div class="t-b-1">  论文名称</div>
        <div class="t-b-1">作者</div>
        <div class="t-b-1">标题</div>
         <div class="t-b-1"> 发行期刊</div>
         <div class="t-b-1"> 发行日期</div> 
       
    </div>
    <div style=" background:#fff;display: flex;justify-content: center;">
         <%
            for (int i = 0; i < list.size(); i++) {
        %>
        <div class="t-b-2"><%=i + 1%></div>
        <div class="t-b-2"> <%=list.get(i).getName()%></div>
        <div class="t-b-2"> <%=list.get(i).getAuthor()%></div>
        <div class="t-b-2"><%=list.get(i).getTitle()%></div>
         <div class="t-b-2"> <%=list.get(i).getBooktitle()%></div>
          <div class="t-b-2"> <%=list.get(i).getDate()%></div>
         <%}%>
    </div>
</div>
 

<div align="center">

    <a href="index.jsp">返回主页面</a>
    </div>
    </div>

</body>
</html>

爬虫代码

import requests
from bs4 import BeautifulSoup
import re
import pymysql

url = 'https://openaccess.thecvf.com/CVPR2020?day=2021-06-18'
response = requests.get(url)

obj1 = re.compile(r'<dt class="ptitle"><br>.*?.html">(?P<name>.*?)</a></dt>.*?'
                  r'\[<a href="(?P<pdf>.*?)">pdf</a>].*?'
                  r'author = {(?P<author>.*?)},<br>.*?'
                  r'title = {(?P<title>.*?)},<br>.*?'
                  r'booktitle = {(?P<booktitle>.*?)},<br>', re.S)

result = obj1.finditer(response.text)

# 连接数据库
conn = pymysql.connect(host='localhost', user='root', password='LCRZKA', database='pachong', charset='utf8', port=3306)
# 创建游标对象
cursor = conn.cursor()
sql = 'INSERT INTO cvpr(`name`, pdf, author, title, booktitle, `date`) values(%s,%s,%s,%s,%s,%s)'

for it in result:
    try:
        data = [it.group('name'), it.group('pdf'), it.group('author'), it.group('title'), it.group('booktitle'), 20100618]
        cursor.execute(sql, data)
        conn.commit()
    except Exception as e:
        print(e)


response.close()

# 关闭游标
cursor.close()
# 关闭连接
conn.close()

print('over!!!')

 

posted on 2022-05-17 17:53  GHOST-CR  阅读(24)  评论(0)    收藏  举报