爬虫取北京的二手房的信息 地址:https://bj.lianjia.com/zufang/rs/

import time

import requests
from bs4 import BeautifulSoup
from lxml import etree

# @Author  : 熊xiaohui
# @Software: PyCharm
#爬取北京的二手房的信息  
a=0
cz={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.70'}
for i in range(1,101):
 qwe_123='https://bj.lianjia.com/zufang/pg'+str(i)+'/#contentList'
 qwe=requests.get(qwe_123,headers=cz).text
 radsd=etree.HTML(qwe)
 z=radsd.xpath('//*[@id="content"]/div/div/div/div/p[1]/a/text()')#整租·芳群园三区
 z1=radsd.xpath('//*[@id="content"]/div/div/div/div/p[2]/a[1]/text()')#2室1厅
 z2=radsd.xpath('//*[@id="content"]/div/div/div/div/p[2]/a[2]/text()')#南/北
 z3=radsd.xpath('//*[@id="content"]/div/div/div/div/p[2]/a[3]/text()')# 芳群园三区

 z4=radsd.xpath('//*[@class="content__list--item--des"]/text()[5]')#这个列表,这个抽取第5元素 就是索引的意思 60.00
 z5=radsd.xpath('//*[@class="content__list--item--des"]/text()[6]')#南北
 z6=radsd.xpath('//*[@class="content__list--item--des"]/text()[7]')# 2室1厅1卫
 z7=radsd.xpath('///*[@id="content"]/div/div/div/div/span/em/text()')#5800
 for i1,i2,i3,i4,i5,i6,i7,i8 in zip(z,z1,z2,z3,z4,z5,z6,z7):
    q1=str(i1).strip()
    q2 = str(i2).strip()
    q3 = str(i3).strip()
    q4 = str(i4).strip()
    q5 = str(i5).strip()
    q6 = str(i6).strip()
    q7 = str(i7).strip()
    q8 = str(i8).strip()
    print(q1,q2,q3,q4,q5,q6,q7,q8)
    a+=1
 time.sleep(0.5)
print(a)

 

posted @ 2022-09-09 20:23  python,菜鸟  阅读(133)  评论(0编辑  收藏  举报