爬虫实战爬取58同城房源信息
import requests
from lxml import etree
if __name__ == '__main__':
#爬取到页面源码数据
url='https://su.58.com/ershoufang/'
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36'
}
page_text=requests.get(url=url,headers=headers).text
tree=etree.HTML(page_text) #数据解析
#存储li标签对象
li_list=tree.xpath('//ul[@class="house-list-wrap"]/li') #这里class属性值应该是双引号 ""
fp= open('./58.txt','w',encoding='utf-8')
for li in li_list:
#页面数据局部解析
title=li.xpath('./div[2]/p/span/text()')[0]
print(title)
fp.write(title+'\n')

浙公网安备 33010602011771号