Python连接MySQL
python连接mysql可以用pymysql模块
pip install pymysql
pymysql基本使用:
import pymysql
# 链接数据库
conn = pymysql.connect(
host='localhost',
port=3306,
user='root',
password='test123456',
database='spider_back'
)
# 创建游标
cursor = conn.cursor()
# 接下来就可以用游标去执行各种操作了
cursor.close() # 断开cursor
conn.close() # 断开连接
pymysql执行增删改操作
添加数据
try:
cursor = conn.cursor()
result = cursor.execute("insert into stu(sname, address, gender) values ('李嘉诚', '八宝山', 1)")
print(cursor.lastrowid) # 获取自增的ID值
print(result) # result是该sql会影响多少条数据
conn.commit() # 提交
except:
conn.rollback() # 回滚
cur.close() # 断开cursor
conn.close() # 断开连接
修改数据
try:
cursor = conn.cursor()
result = cursor.execute("update stu set gender = 2 where sid = 12")
print(result) # result是该sql会影响多少条数据
conn.commit() # 提交
except:
conn.rollback() # 回滚
cur.close() # 断开cursor
conn.close() # 断开连接
删除数据
try:
cursor = conn.cursor()
result = cursor.execute("delete from stu where sid = 12")
print(result) # result是该sql会影响多少条数据
conn.commit() # 提交
except:
conn.rollback() # 回滚
cur.close() # 断开cursor
conn.close() # 断开连接
查询操作
# 查询
from pymysql.cursors import DictCursor
# cursor = conn.cursor(DictCursor) # 使用字典游标. 查询出的结果自动保存在字典中
cursor = conn.cursor() # 默认游标. 查询出的结果自动保存在元组中
sql = """
select * from stu
"""
ret_num = cursor.execute(sql)
# result = cursor.fetchall() # 获取全部结果
# result = cursor.fetchmany(5) # 获取部分结果
result = cursor.fetchone() # 获取单个结果
print(result)
result = cursor.fetchone() # 获取单个结果, 可以连续获取
print(result)
cur.close() # 断开cursor
conn.close() # 断开连接
注意, 一个游标如果被拿空了. 则不能再次获取内容.
案例:爬取 亚洲大学排名数据到MySQL
网址: https://www.webometrics.info/en/asia
CREATE TABLE `university_ranking` (
`ID` bigint(20) NOT NULL AUTO_INCREMENT,
`university` varchar(500) DEFAULT NULL,
`ranking` varchar(50) DEFAULT NULL,
`world_rank` varchar(50) DEFAULT NULL,
PRIMARY KEY (`ID`)
) ENGINE=InnoDB CHARSET=utf8mb4;
import requests
import json
from lxml import etree
import pymysql
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}
conn = pymysql.connect(
host='192.168.4.63',
port=3306,
user='root',
password='Aa123456',
database='test'
)
def insert_university_ranking(university, ranking, world_rank):
try:
cursor = conn.cursor()
sql = """insert into university_ranking(university, ranking, world_rank) values ("{}", "{}", "{}")""".format(university, ranking, world_rank)
cursor.execute(sql)
conn.commit()
except:
conn.rollback()
cursor.close()
def get_ranking(url):
resp = requests.get(url, headers=headers)
resp.encoding = 'UTF-8'
html_tree = etree.HTML(resp.text)
data = html_tree.xpath('//div[@id="block-system-main"]/div/table//tbody/tr')
for item in data:
university = item.xpath('./td[3]/a/text()')[0]
ranking = item.xpath('./td[1]/center/text()')[0]
world_rank = item.xpath('./td[2]/center/text()')[0]
insert_university_ranking(university, ranking, world_rank)
# 分页
url = 'https://www.webometrics.info/en/asia'
for item in range(0,160):
if item == 0:
get_ranking(url)
else:
full_url = "{}?page={}".format(url,item)
get_ranking(full_url)
浙公网安备 33010602011771号