python爬虫之爬取图片(五)
爬取“无聊哦”《姜文,你太皮了!》
import requests from bs4 import BeautifulSoup # 模拟反爬 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 \ (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36' } # 请求网页 def open_url(url): r = requests.get(url, headers=headers) r.encoding = r.apparent_encoding html = r.text return html # 提取标题名称 def title_name(html): soup = BeautifulSoup(html, 'lxml') # 提取h1标签下的文本 title = '《' + soup.find('h1').text + '》' return title # 提取图片链接 def get_url(html): soup2 = BeautifulSoup(html, 'lxml') image_url = soup2.find_all('a', href="http://wuliaoo.com")[1] # 提取a标签下的img标签中的src属性的值 img_url = image_url.img.get('src') return img_url # 保存图片 def save_img(title, img_url): filename = title + '.jpg' # 以二进制方式保存文件 response = requests.get(img_url).content with open(filename, 'wb') as file: file.write(response) print('《姜文,你太皮了!》保存完成!') # 主程序整体框架 def main(): url = 'http://wuliaoo.com/jiangwentaipi.html' res = open_url(url) title = title_name(res) img_url = get_url(res) save_img(title, img_url) if __name__ == '__main__': main()

浙公网安备 33010602011771号