本次实验为结对实验,我和蒲煜凡完成整体实验,实验过程分工为,我进行数据爬取,蒲煜凡进行热词云构建。实验过程较为顺利。
我所负责部分遇到问题,爬取出现数据库相关问题,经分析,是爬取的数据过长,而数据库设计的长度不足。调整数据库字段长度后,问题解决。
代码如下
Yiqing
package com.jdbc.bean;
public class Yiqing {
private String title;
public Yiqing(String title) {
super();
this.title = title;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
}
yiqidao
package com.jdbc.dao;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
import com.jdbc.bean.Yiqing;
import com.jdbc.util.BaseConnection;
public class yiqidao {
public static List<Yiqing> find2 ()
{
List<Yiqing> list = new ArrayList<Yiqing>();
Connection conn = BaseConnection.getConnection();
Statement statement = null;
String sql = "SELECT * FROM lunwen ";
ResultSet rs = null;
try {
statement = conn.createStatement();
rs = statement.executeQuery(sql);
Yiqing yiqing = null;
while(rs.next())
{
String title = rs.getString(4);
yiqing = new Yiqing(title);
list.add(yiqing);
}
}catch (SQLException e) {
e.printStackTrace();
}finally
{
BaseConnection.close(rs, statement, conn);
}
return list;
}
}
yiqingServlet
package com.jdbc.servlet;
import java.io.IOException;
import java.util.List;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import com.jdbc.bean.Yiqing;
import com.jdbc.dao.yiqidao;
@WebServlet("/yiqingServlet")
public class yiqingServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
public yiqingServlet() {
super();
}
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
response.setContentType("text/html;charset=UTF-8");
request.setCharacterEncoding("UTF-8");
String method = request.getParameter("method");
//System.out.print(method);
if(method.equals("pc"))
{
add(request,response);
}
}
private void add(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
response.setContentType("text/html;charset=UTF-8");
request.setCharacterEncoding("UTF-8");
List<Yiqing> list = yiqidao.find2();
System.out.println(list);
request.setAttribute("list", list);
request.getRequestDispatcher("ciyuntu.jsp").forward(request,response);
}
protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
// TODO Auto-generated method stub
doGet(request, response);
}
}
BaseConnection
package com.jdbc.util;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class BaseConnection {
public static Connection getConnection(){
Connection conn=null;
String driver = "com.mysql.cj.jdbc.Driver";
String url = "jdbc:mysql://localhost:3306/mydate?serverTimezone=UTC&characterEncoding=utf8&useSSL=true";
String user = "root";
String password = "123456";
try{
Class.forName(driver);
conn=DriverManager.
getConnection(url,user,password);
}catch(Exception e){
e.printStackTrace();
}
return conn;
}
public static void close (Statement state, Connection conn) {
if (state != null) {
try {
state.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (conn != null) {
try {
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
public static void close (ResultSet rs, Statement state, Connection conn) {
if (rs != null) {
try {
rs.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (state != null) {
try {
state.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (conn != null) {
try {
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
Lwpq:
import requests
import pymysql
from bs4 import BeautifulSoup
db = pymysql.connect('127.0.0.1',
port=3306,
user='root',
password='mysjz',
db='qsly',
charset='utf8')
cursor = db.cursor()
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
}
url="http://openaccess.thecvf.com/CVPR2019.py"
html=requests.get(url)
soup=BeautifulSoup(html.content,'html.parser')
soup.a.contents=='pdf'
pdfs=soup.findAll(name="a",text="pdf")
lis = []
jianjie=""
for i,pdf in enumerate(pdfs):
pdf_name=pdf["href"].split('/')[-1]
name=pdf_name.split('.')[0].replace("_CVPR_2019_paper","")
link="http://openaccess.thecvf.com/content_CVPR_2019/html/"+name+"_CVPR_2019_paper.html"
url1=link
html1 = requests.get(url1)
soup1 = BeautifulSoup(html1.content, 'html.parser')
weizhi = soup1.find('div', attrs={'id':'abstract'})
if weizhi:
jianjie =weizhi.get_text();
print("ok")
info = {}
info['title'] = name
info['link'] =link
info['abstract']=jianjie
lis.append(info)
cursor = db.cursor()
for i in range(len(lis)):
cols = ", ".join('`{}`'.format(k) for k in lis[i].keys())
print(cols) # '`name`, `age`'
val_cols = ', '.join('%({})s'.format(k) for k in lis[i].keys())
print(val_cols) # '%(name)s, %(age)s'
sql = "insert into lunwen(%s) values(%s)"
res_sql = sql % (cols, val_cols)
print(res_sql)
cursor.execute(res_sql, lis[i]) # 将字典a传入
db.commit()
print("ok")
实验结果如下:
数据库界面

web界面

浙公网安备 33010602011771号