站长素材免费简历模板爬取

import requests
import os
from lxml import etree

if __name__ == '__main__':
    # 如果没有JianLi文件夹存在 则创建文件夹
    if not os.path.exists('./JianLi'):
        os.makedirs('./JianLi')
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36 Edg/89.0.774.48'
    }
    url = 'https://sc.chinaz.com/jianli/free.html'
    page_text = requests.get(url=url, headers=headers).text
    # print(page_text)
    tree = etree.HTML(page_text)
    # print(tree)
    all_src = tree.xpath('//div[@class="sc_warp  mt20"]//p/a/@href')
    # print(all_src)
    for src in all_src:
        all_srcs = 'https:' + src
        # print(all_srcs)
        jianli_content = requests.get(url=all_srcs, headers=headers).text
        # print(jianli_url)
        tree = etree.HTML(jianli_content)
        down_url_list = tree.xpath('//div[@class="down_wrap"]//ul/li/a/@href')[0]
        jianli_data = requests.get(url=down_url_list, headers=headers).content
        # print(down_url_list)
        # name = down_url_list.split('/')[-1]
        name = tree.xpath('//div[@class="ppt_tit clearfix"]/h1/text()')[0] + '.rar'
        down_name = name.encode('ISO-8859-1').decode('UTF-8')
        # print(name,down_name)
        JianLi_path = './JianLi/' + down_name
        with open(JianLi_path, 'wb') as fp:
            fp.write(jianli_data)
        print(down_name, '下载成功!!!')
    print('下载完成!!!')
posted @ 2022-11-21 12:13  YangSaid  阅读(63)  评论(0)    收藏  举报