import requests
from bs4 import BeautifulSoup
import re
import pymysql
url = 'https://openaccess.thecvf.com/CVPR2020?day=2020-06-18'
response = requests.get(url)
obj1 = re.compile(r'<dt class="ptitle"><br>.*?.html">(?P<name>.*?)</a></dt>.*?'
r'\[<a href="(?P<pdf>.*?)">pdf</a>].*?'
r'author = {(?P<author>.*?)},<br>.*?'
r'title = {(?P<title>.*?)},<br>.*?'
r'booktitle = {(?P<booktitle>.*?)},<br>', re.S)
result = obj1.finditer(response.text)
# 连接数据库
conn = pymysql.connect(host='localhost', user='root', password='123456', database='py', charset='utf8', port=3306)
# 创建游标对象
cursor = conn.cursor()
sql = 'INSERT INTO test1(`name`, pdf, author, title, booktitle, `date`) values(%s,%s,%s,%s,%s,%s)'
for it in result:
try:
data = [it.group('name'), it.group('pdf'), it.group('author'), it.group('title'), it.group('booktitle'), 20200618]
cursor.execute(sql, data)
conn.commit()
except Exception as e:
print(e)
response.close()
# 关闭游标
cursor.close()
# 关闭连接
conn.close()
print('over!!!')