获取微信热点前十

import requests
from bs4 import BeautifulSoup
import re
cookie = {}
f = open('cookie.txt','r')#微信该网页无法直接爬取 添加cookie文件
for line in f.read().split(':'):
name,value=line.strip().split('=',1)
cookie[name]=value
headers={'User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
page = requests.get('https://tophub.today/n/WnBe01o371',headers=headers,cookies=cookie).content.decode('utf-8')
soup = BeautifulSoup(page,'lxml')#构造html解析对象,补全数据
for td in soup.find_all(attrs={'class':'al'})[:10]:
print(td.string)
for volume in soup.find_all(text=re.compile('10W+.*?[0-9]+.*?'))[:10]:
print(volume)

附 输出结果 

 

posted @ 2020-03-21 22:22  长林丶  阅读(164)  评论(0编辑  收藏  举报