1 from selenium import webdriver
2 import os
3 import json
4 import time
5
6
7 class Douyu:
8 def __init__(self):
9 # 1.发送首页的请求
10 self.driver = webdriver.Chrome('../chromedriver.exe')
11 self.driver.get('https://www.douyu.com/g_yz')
12
13 # 获取页面内容
14 def get_content(self):
15 time.sleep(3)
16 li_list = self.driver.find_elements_by_xpath("//*[@class='layout-Cover-list']/li[@class='layout-Cover-item']")
17 length = len(li_list)
18 contents = []
19
20 # 遍历房间列表
21 for i in range(length):
22 item = {}
23 item['主播'] = self.driver.find_elements_by_xpath('//h2[@class="DyListCover-user"]')[i].text
24 item['房间名'] = self.driver.find_elements_by_xpath('//h3[@class="DyListCover-intro"]')[i].get_attribute(
25 'title')
26 item['热度'] = self.driver.find_elements_by_xpath('//span[@class="DyListCover-hot"]')[i].text
27 item['封面'] = self.driver.find_elements_by_class_name('DyImg-content')[i].get_attribute(
28 'src')
29 contents.append(item)
30 return contents
31
32 # 保存数据到本地
33 def save_content(self, contents):
34 with open('douyu.json', 'a',encoding='utf-8') as f:
35 for content in contents:
36 json.dump(content, f, ensure_ascii=False, indent=2)
37 f.write(os.linesep)
38
39 def run(self):
40 # 1.发送首页的请求:初始化时已经发送请求
41 # 2.获取第一页的数据
42 contents = self.get_content()
43 self.save_content(contents)
44
45 # 3.循环:点击下一页按钮,只要没有下一页的按钮
46 while self.driver.find_elements_by_class_name('dy-Pagination-item-custom')[1]:
47 # 点击下一页的按钮
48 self.driver.find_elements_by_class_name('dy-Pagination-item-custom')[1].click()
49 # 4.继续获取下一页的内容
50 contents = self.get_content()
51 # 4.保存下一页保存内容
52 self.save_content(contents)
53
54
55 if __name__ == '__main__':
56 douyu = Douyu()
57 douyu.run()