# 链家新房爬虫
**今日目标**
爬取最新地区以及对应的房价
```python
import requests
import re
import csv
class LianjiaSpider(object):
def __init__(self):
self.url='https://cq.lianjia.com/ershoufang/pg{}/'
self.headers={'User-Agent': 'Mozilla/5.0'}
#获取网址
def get_page(self,url):
res=requests.get(url,headers=self.headers)
html=res.content.decode()
#直接调用解析函数
self.parse_page(html)
#解析数据
def parse_page(self,html):
pattern=re.compile('<div class="houseInfo"><span.*?data-el="region">(.*?)</a>.*?<div class="totalPrice"><span>(.*?)</span>',re.S)
r_list=pattern.findall(html)
self.write_page(r_list)
#保存
def write_page(self,r_list):
film_list=[]
with open('lianjia.csv','a') as f:
writer = csv.writer(f)
for r in r_list:
# 把处理过的数据定义成元组
t=(r[0].strip(),r[1]+'万')
film_list.append(t)
writer.writerows(film_list)
def main(self):
for page in range(1,11):
url=self.url.format(page)
self.get_page(url)
print('正在打印{}页'.format(page))
if __name__ == '__main__':
spider=LianjiaSpider()
spider.main()
```