1 先select到改数据上层的标签,取第一个[0],然后attrs获取单标签里面的内容,或者get_text获取成对标签内文本
2 find就是明确了标签之后,是唯一的,就可以使用find(标签名).get_text(),不需要加0
import requests from bs4 import BeautifulSoup def search(url): response = requests.get(url) response.encoding = 'gbk' text = response.text with open('a.html', 'wt', encoding='utf-8')as f: f.write(text) soup = BeautifulSoup(text, 'lxml') news = soup.find(id="auto-channel-lazyload-article").select('ul li a') # print(news) for new in news: ''' 1 先select到改数据上层的标签,取第一个[0],然后attrs获取单标签里面的内容,或者get_text获取成对标签内文本 2 find就是明确了标签之后,是唯一的,就可以使用find(标签名).get_text(),不需要加0 ''' link = new.attrs['href'] img = new.select('.article-pic img')[0].attrs['src'] title=new.find('h3').get_text() sub_time=new.select('.fn-left')[0].get_text() num=new.select('.fn-right em')[0].get_text() browsing_num=new.find('p').get_text() print(link,img,title,sub_time,num,browsing_num) print( ''' 链接:http:%s 图片:http:%s 标题:%s 发布时间:%s 浏览数:%s 介绍:%s '''%(link,img,title,sub_time,num,browsing_num) ) if __name__ == '__main__': url = 'https://www.autohome.com.cn/news' search(url)