#coding:utf-8
#----------------------------------------------------------------------------------------------------------
# 功能:爬取汽车之家的新闻
#----------------------------------------------------------------------------------------------------------
# pip3 install requests
# pip3 install BeautifulSoup4
import requests
from bs4 import BeautifulSoup
# 获取一个新闻标题
# response = requests.get('http://www.autohome.com.cn/news/')
# response.encoding = 'gbk'
# soup = BeautifulSoup(response.text, 'html.parser')# 解析成对象
# tag = soup.find(id='auto-channel-lazyload-article')
# h3 = tag.find(name='h3')
# print(h3)
#找到所有的新闻,包括标题、简洁、url、图片
response = requests.get('http://www.autohome.com.cn/news/')
response.encoding = 'gbk' # 注意编码方式
# print(response.text)
soup = BeautifulSoup(response.text, 'html.parser')# 解析成对象
li_list = soup.find(id='auto-channel-lazyload-article').find_all(name='li')
i = 1
for li in li_list:
title = li.find('h3') # 查找新闻标题[根据标签]
if not title:
continue
# print(title.text)
summary = li.find('p').text # 查找新闻简介[根据标签]
# print(summary)
# li.find('a').attrs,得到一个字典
# li.find('a').attrs['href'],和下面的效果一样
url = li.find('a').get('href') # 查找新闻标题的超链接url[根据属性]
# print(url)
img = li.find('img').get('src') # 查找图片,其实得到的也是url
# print(img)
# -------------------------要保存图片的话需要再次发送请求,保存到本地-------------------------------------
img = 'https:' + img # 补全url
# print(img)
# print(title.text, url, summary, img)
res = requests.get(img) # 发送请求
file_name = "%s.jpg" % (i,) # 设置图片名称为1,2,3,。。
i+=1
# print(file_name)
with open(file_name, 'wb') as f: # 保存图片到当前文件夹下
f.write(res.content) # 注意这里是二进制[res.content]