爬取知乎搜索热榜

import requests
from lxml import etree
import re
import pandas as pd
import os
url='https://tophub.today/n/mproPpoq6O'
header={'User-Agent':""}
def get(url):
try:
data=requests.get(url,timeout=30)
data.raise_for_status()
data.encoding=data.apparent_encoding
return data.text
except:
return'无法爬取'
def main():
print('知乎今日搜索热榜(以及热度)')
data=etree.HTML(requests.get(url,headers=header).text)
math=data.xpath('//td[@align="center"]/text()')
title=data.xpath('//td[@class="al"]/a/text()')
read = data.xpath('//td[3]/text()')
for i in range(0,10):
print("{0:<1}\t{1:{3}<1}\t{2:{3}>30}".format(math[i],title[i],read[i],chr(12288)))
get(url)
main()
posted @ 2020-03-21 19:28  M_google  阅读(361)  评论(0)    收藏  举报