Python selenium 爬取cnvd(国家信息安全漏洞共享平台)剩余部分

# coding = utf-8                     
# @author :今夕
# @Time :2021.08.10 09:22
# @file :main2.py
# @software :PyCharm

import time
from selenium import webdriver
from bs4 import BeautifulSoup
import pymysql
def main():
driver = webdriver.Chrome()
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
db = pymysql.connect(user='root', password='123456', host='localhost', database='cnvd')
cursor = db.cursor()
cursor.execute("select link from cnvd")
links = cursor.fetchall()
cursor.close()
db.cursor()
i=882
while i<len(links):
print("第%s条"%i)
driver.get(links[i][0])
time.sleep(3)
driver.refresh()
html=driver.page_source
#print(len(html))
while True:
if len(html)>16975:
break
print("休眠3秒")
time.sleep(3)
print(("休眠结束"))
driver.refresh()
html = driver.page_source
dat=Parse(html)
update(dat,links[i][0])
i+=1
def Parse(html):
dat=[]
soup = BeautifulSoup(html, "html.parser") # 指定Beautiful的解析器为“html.parser”
for item in soup.find_all('tr'):
temp=item.text
temp=temp.replace("\n","").replace("\t","").replace(" ","").replace("'","")
dat.append(temp)
#print(dat)
return dat
def update(dat,url):
db = pymysql.connect(user='root', password='123456', host='localhost', database='cnvd')
cursor = db.cursor()
Affectproduct = dat[3].split("影响产品")[1] # 影响产品
if len(dat)==19:
CVEID = dat[4].split("CVEID")[1] # CVEID
VulnerabilityDescribes = dat[5].split("漏洞描述")[1] # 漏洞描述
HoleType = dat[6].split("漏洞类型")[1] # 漏洞类型
referenceLinking = dat[7].split("参考链接")[1] # 参考链接
solution = dat[8].split("漏洞解决方案")[1] # 解决方案
ManufacturersPatch = dat[9].split("厂商补丁")[1] # 产品补丁
VerificationInformation = dat[10].split("验证信息")[1] # 验证信息
Vulnerabilityaccessories = dat[14].split("漏洞附件")[1] # 漏洞附件
elif len(dat)==18:
CVEID=""
VulnerabilityDescribes = dat[4].split("漏洞描述")[1] # 漏洞描述
HoleType = dat[5].split("漏洞类型")[1] # 漏洞类型
referenceLinking = dat[6].split("参考链接")[1] # 参考链接
solution = dat[7].split("漏洞解决方案")[1] # 解决方案
ManufacturersPatch = dat[8].split("厂商补丁")[1] # 产品补丁
VerificationInformation = dat[9].split("验证信息")[1] # 验证信息
Vulnerabilityaccessories = dat[13].split("漏洞附件")[1] # 漏洞附件
else:
CVEID = dat[5].split("CVEID")[1] # CVEID
VulnerabilityDescribes = dat[6].split("漏洞描述")[1] # 漏洞描述
HoleType = dat[7].split("漏洞类型")[1] # 漏洞类型
referenceLinking = dat[8].split("参考链接")[1] # 参考链接
solution = dat[9].split("漏洞解决方案")[1] # 解决方案
ManufacturersPatch = dat[10].split("厂商补丁")[1] # 产品补丁
VerificationInformation = dat[11].split("验证信息")[1] # 验证信息
Vulnerabilityaccessories = dat[15].split("漏洞附件")[1] # 漏洞附件
# print(Affectproduct)
# print(CVEID)
# print(VulnerabilityDescribes)
# print(HoleType)
# print(referenceLinking)
# print(solution)
# print(ManufacturersPatch)
# print(VerificationInformation)
# print(Vulnerabilityaccessories)
sql="update cnvd set Affectproduct='%s',CVEID='%s',VulnerabilityDescribes='%s',HoleType='%s',referenceLinking='%s',solution='%s',ManufacturersPatch='%s',VerificationInformation='%s',Vulnerabilityaccessories='%s' where link='%s'"%(Affectproduct,CVEID,VulnerabilityDescribes,HoleType,referenceLinking,solution,ManufacturersPatch,VerificationInformation,Vulnerabilityaccessories,url)
print(sql)
cursor.execute(sql)
db.commit()
cursor.close()
db.cursor()
time.sleep(3)
if __name__ == '__main__':
main()
print("爬取完成")
posted @ 2021-08-10 11:46  今夕要早睡  阅读(488)  评论(0)    收藏  举报