案例-亚洲大学排行:xpath
网址: https://www.webometrics.info/en/Asia
xpath代码:
import requests
import json
from lxml import etree
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}
def get_ranking(url):
resp = requests.get(url, headers=headers)
resp.encoding = 'UTF-8'
html_tree = etree.HTML(resp.text)
data = html_tree.xpath('//div[@id="block-system-main"]/div/table//tbody/tr')
for item in data:
print("University==>", item.xpath('./td[3]/a/text()')[0])
print("ranking==>",item.xpath('./td[1]/center/text()')[0])
print("World Rank==>",item.xpath('./td[2]/center/text()')[0])
# 分页
url = 'https://www.webometrics.info/en/asia'
for item in range(0,160):
if item == 0:
get_ranking(url)
else:
full_url = "{}?page={}".format(url,item)
get_ranking(full_url)
浙公网安备 33010602011771号