from selenium import webdriver
import time
from bs4 import BeautifulSoup
class douyuSelenium():
#初始化,启动斗鱼浏览器
def setUp(self):
self.driver = webdriver.PhantomJS()
#获取斗鱼房间信息
def testDouyu(self):#'https://www.douyu.com/directory/all'
self.driver.get('https://www.douyu.com/directory/all')
while True:
time.sleep(2)
#指定解析器,生成一个soup对象
soup = BeautifulSoup(self.driver.page_source,'lxml')
#获取当前页面所有的房间标题,观众人数
titles = soup.find_all('h3',{'class':'ellipsis'})
nums = soup.find_all('span',{'class':'dy-num fr'})
for title,num in zip(titles,nums):
info = "房间标题:" + title.get_text().strip() + '\t' + "人气:" + num.get_text().strip()
print(info)
#下一页
#查找下一页 在最后一页会有shark-pager-disable-next元素 表示没有下一页
if self.driver.page_source.find('shark-pager-disable-next') != -1:
break
#点击 页面到下一页
next_page = self.driver.find_element_by_class_name('shark-pager-next')
next_page.click()
def shutdown(self):
print('加载完成...')
#加载完成 退出浏览器
self.driver.quit()
if __name__ == '__main__':
douyu = douyuSelenium()
douyu.setUp()
douyu.testDouyu()
douyu.shutdown()