python3 spider [ urllib.request ]

# # 导入urllib库的urlopen函数
# from urllib.request import urlopen
# # 发出请求,获取html
# html = urlopen("https://www.baidu.com/")
# # 获取的html内容是字节,将其转化为字符串
# html_text = bytes.decode(html.read())
# # 打印html内容
# print(html_text)


from urllib.request import urlopen, urlretrieve
from bs4 import BeautifulSoup as bf

html = urlopen("https://www.baidu.com/")
obj = bf(html.read(), 'html.parser')
title = obj.head.title

logo_pic_info = obj.find_all('img', class_="index-logo-src")

logo_url = "https:" + logo_pic_info[0]['src']

# download the image
urlretrieve(logo_url, 'logo.png')

 

posted on 2021-06-10 16:00  Karlkiller  阅读(45)  评论(0编辑  收藏  举报

导航