import requests
import os
from lxml import etree
if __name__ == '__main__':
# 如果没有JianLi文件夹存在 则创建文件夹
if not os.path.exists('./JianLi'):
os.makedirs('./JianLi')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36 Edg/89.0.774.48'
}
url = 'https://sc.chinaz.com/jianli/free.html'
page_text = requests.get(url=url, headers=headers).text
# print(page_text)
tree = etree.HTML(page_text)
# print(tree)
all_src = tree.xpath('//div[@class="sc_warp mt20"]//p/a/@href')
# print(all_src)
for src in all_src:
all_srcs = 'https:' + src
# print(all_srcs)
jianli_content = requests.get(url=all_srcs, headers=headers).text
# print(jianli_url)
tree = etree.HTML(jianli_content)
down_url_list = tree.xpath('//div[@class="down_wrap"]//ul/li/a/@href')[0]
jianli_data = requests.get(url=down_url_list, headers=headers).content
# print(down_url_list)
# name = down_url_list.split('/')[-1]
name = tree.xpath('//div[@class="ppt_tit clearfix"]/h1/text()')[0] + '.rar'
down_name = name.encode('ISO-8859-1').decode('UTF-8')
# print(name,down_name)
JianLi_path = './JianLi/' + down_name
with open(JianLi_path, 'wb') as fp:
fp.write(jianli_data)
print(down_name, '下载成功!!!')
print('下载完成!!!')