XML爬取
url_str = 'https://www.tenable.com/plugins/feeds?sort=updated'
respose_str = requests.get(url_str)
print(respose_str.text)
soup = BeautifulSoup(respose_str.text, 'xml')
soup_items = soup.find_all('item')
pattern = re.compile(r'<[^>]+>', re.S)
for soup_item in soup_items:
name = soup_item.find("title")
name = pattern.sub('', str(name))
link = soup_item.find("link")
link = pattern.sub('', str(link))
nessus_id = link.split("/")[-1]
# description = soup_item.find("description")
total_description = soup_item.find("description")
# print(total_description.text)
soup1 = BeautifulSoup(total_description.text, "lxml")
span = soup1.find_all("span")
# Synopsis = BeautifulSoup(span[0],'html.parser').get_text
# Description = BeautifulSoup(span[1],'html.parser').get_text
# Solution = BeautifulSoup(span[2],'html.parser').get_text
# print(Solution)
Synopsis = pattern.sub('', str(span[0]))
Description = pattern.sub('', str(span[1]))
Solution = pattern.sub('', str(span[2]))
up_dic = {
"name": name,
"nessus_id": nessus_id,
"synopsis": Synopsis,
"description": Description,
"solution": Solution,
}
ne_item = mdb.get_one("CrawlDataForIDbyNessus", {"nessus_id": nessus_id})
print(ne_item)
if not ne_item:
mdb.add("CrawlDataForIDbyNessus", up_dic)
I can feel you forgetting me。。 有一种默契叫做我不理你,你就不理我

浙公网安备 33010602011771号