import requests, time
from lxml import etree
def src_tiqu(yeshu):
for i in range(1,int(yeshu)+1):
try:
url = 'https://src.sjtu.edu.cn/list/?i=' + str(i)
print('提取->',str(i)+'页数')
data = requests.get(url).content
# print(data)
soup = etree.HTML(data.decode('utf-8'))
result = soup.xpath('//td[@class=""]/a/text()')
# print(result)
results = '\n'.join(result)
resultss = results.split() #去空
print(resultss)
for edu in resultss:
with open(r'src.txt', 'a+', encoding='utf-8') as f:
f.write(edu + '\n')
f.close()
except Exception as e:
time.sleep(0.5)
pass
if __name__ == '__main__':
yeshu = input("提取多少页:")
src_tiqu(yeshu)