Python 自动下载文件源码

自动下载服务器各目录文件

import threading
from multiprocessing import Pool,Process
import requests
import re,os
# new_content = r.content.decode('utf-8')

#第一次调用生成子目录
def request_dirname(url,pattern):
    path_new = []
    r = requests.get(url)
    new = re.findall(pattern, str(r.content))
    i = 0
    while i < len(new):
        aa = os.path.join(url, new[i])
        # print(aa)
        path_new.append(aa)
        i = i + 1
    return path_new

#第二次调用生产文件目录
def request_filename(url, pattern):
    # dict_new = {}
    # num = 0
    file_compare_list = []
    path_url = request_dirname(url, pattern)
    for item in path_url:
        path_fileurl = request_dirname(item,pattern)
        # item_new = re.findall(r'8081/(.*?)/', item)
        # dict_new[num] = item_new #对应目录编号
        # num = num + 1
        file_compare_list.append(path_fileurl[1:])

    return file_compare_list

#第三次循环写入text文件,文件url
def filename_urls(url, pattern):
    f_urls = []
    file_compare_list = request_filename(url, pattern)
    with open('filename_url.txt', 'w') as f_txt:
        f_txt.write('')
    for list01 in file_compare_list:
        for list02 in list01:
            f_urls.append(list02)
            with open('filename_url.txt','a') as f_txt:
                f_txt.write(list02+'\n')

    return f_urls

#创建文件夹并下载
def build_dir(url,ICBC,current_path):
    path = url.split('/')[-2] #子目录
    filename = url.split('/')[-1] #文件名称
    dir_path = os.path.join(current_path,ICBC,path)
    if os.path.exists(dir_path):
        download_files(url,ICBC,current_path)
    else:
        # os.makedirs('ICBC/'+path)
        os.makedirs(dir_path)
        print('%s已创建'%path)
        build_dir(url,ICBC,current_path)

#下载文件
def download_files(url,ICBC,current_path):
    filename = url.split('/')[-1]
    path = url.split('/')[-2]
    work_path = os.path.join(current_path,ICBC,path)
    print(work_path)
    os.chdir(work_path)
    #查看当前文件
    r = requests.get(url)
    with open(filename,'wb') as f:
        f.write(r.content)
    print(os.listdir('.'))

def run(ICBC,urls_results,current_path):
    # global urls_results
    for xxl in urls_results:
        build_dir(xxl,ICBC,current_path)

if __name__ == '__main__':
    current_path = os.getcwd()
    pattern1 = r'<a href="(.*?)">'
    url1 = 'http://172.17.3.162:8081/'
    ABC = filename_urls(url1, pattern1)
    urls_results = ABC[3:]
    #文件夹
    # p1 = Process(target=run,args=('happy',urls_results,current_path))
    # p1.start()
    pool = Pool(processes=20)

    for i in range(20):
        msg = 'ICBC%d'%i
        print(msg)
        pool.apply_async(run,(msg,urls_results,current_path))

    pool.close()
    pool.join()
    print('done')

结论:

posted @ 2021-01-25 08:59  凌波樂  阅读(498)  评论(0)    收藏  举报