ftplib获取Nasa遥感数据产品
ladsweb.nascom.nasa.gov的ftp网站提供MODIS的原始数据和产品数据的下载,通过FlashFXP软件可以实现批量传输,但这样下载存在一个弊端:无法批量下载指定区域的数据。MODIS的数据文件名中包含固定行列值,用它去匹配文件名,是完成批量下载的思路。 python中的ftplib提供ftp网站的连接以及数据的上传下载工作,借助于re模块完成文件名匹配,从而完成整个下载工作。参考Mark Lutz书中的一些代码设计ftptools类,而后派生出一个下载目录树文件的类,用以完成MODIS数据的下载。下面以温度产品下载为例叙述。
MODIS的温度产品为MOD11A1或者MYD11A1等,具体可以查询MODIS的产品说明。该产品位于FTP站点的alldata下的5目录下,具体的产品在FTP的分布,可以参考FTP根目录下的README,如此可以确定远程下载目录。另外,需要确定自己研究区域MODIS的行列号,方面匹配下载。(代码已上传Github,目前只是能初步运行,仍有许多不足,感兴趣的同仁可以一起完善:https://github.com/zgcao/retrmodis)
具体思路:
(1)配置连接信息,即:连接网站,远程目录,用户名,密码等,将该项工作封装为一个方法来完成:
def config_transf_para(self,site=ftp_server,user = ftp_username,remote_dir = ftp_defaultdir): self.nonpassive = False self.site = site self.user = user self.remotedir = remote_dir self.password = self.getpassword() localdir = self.getlocaldir() if not os.path.exists(localdir):os.mkdir(localdir) self.localdir = localdir self.is_cleanall = self.getcleanall()
(2) FTP服务连接,登录,切换目录,使用: FTP,login, cwd等方法完成;
(3)寻找文件名,进行文件名的匹配。
pattern = r'.*h'+str(self.h)+'v'+str(self.v).zfill(2)+'.*' re_compile = re.compile(pattern) match = re.match(file)
(4)文件下载,介于文件夹中会包含文件夹,这里借助递归的思路,首先判断当前的文件是否为目录,如果是文件则直接下载,如果是目录,则进入后递归调用本函数,下载。
def is_dir(self,line): '''判断远程文件是否为目录 dir列出来的是:dr-xr-xr-x 1 ftp ftp 0 Jan 01 1970 AMS,第一个d代表为目录。 ''' parsed = line.split() permiss = parsed[0] self.fname = parsed[-1]#文件名或者目录名 if permiss[0]=='d': return True else: return False
def retr_onefile(self,filename,localdir): '''重写下载函数,针对匹配modis行列号''' pattern = r'.*h'+str(self.h)+'v'+str(self.v).zfill(2)+'.*' re_compile = re.compile(pattern) if not re_compile.match(filename):return txt_file = os.path.join(localdir,filename) #print(txt_file) if self.is_text_filetype(filename): txt_file_write = open(txt_file,'w',encoding = self.connection.encoding) callback = lambda line:txt_file_write.write(line+'\n') cmd = 'RETR '+filename self.connection.retrlines(cmd,callback) txt_file_write.close() print(cmd,' ',time.asctime()) else: txt_file_write = open(txt_file,'wb') cmd = 'RETR '+filename self.connection.retrbinary(cmd,txt_file_write.write) txt_file_write.close() print(cmd,' ',time.asctime())
(5)退出服务。
完整代码如下:
#!/usr/bin/env python #-*-coding:utf-8 -*- '''下载目录树文件''' import os,sys,time from ftptools import ftptools from mimetypes import add_type import re class retrAll(ftptools): def __init__(self,h,v): self.fcount = self.dcount = 0 self.h = h self.v = v #add_type() def getlocaldir(self): localdir = r'F:\MYD11A1' return localdir def getpassword(self): return None def getcleanall(self): return False def retr_onefile(self,filename,localdir): '''重写下载函数,针对匹配modis行列号''' pattern = r'.*h'+str(self.h)+'v'+str(self.v).zfill(2)+'.*' re_compile = re.compile(pattern) if not re_compile.match(filename):return txt_file = os.path.join(localdir,filename) #print(txt_file) if self.is_text_filetype(filename): txt_file_write = open(txt_file,'w',encoding = self.connection.encoding) callback = lambda line:txt_file_write.write(line+'\n') cmd = 'RETR '+filename self.connection.retrlines(cmd,callback) txt_file_write.close() print(cmd,' ',time.asctime()) else: txt_file_write = open(txt_file,'wb') cmd = 'RETR '+filename self.connection.retrbinary(cmd,txt_file_write.write) txt_file_write.close() print(cmd,' ',time.asctime()) def is_dir(self,line): '''判断远程文件是否为目录 dir列出来的是:dr-xr-xr-x 1 ftp ftp 0 Jan 01 1970 AMS,第一个d代表为目录。 ''' parsed = line.split() permiss = parsed[0] self.fname = parsed[-1]#文件名或者目录名 if permiss[0]=='d': return True else: return False def retr_allfiles(self,localdir): all_files = [] self.connection.dir(all_files.append)#callback默认为sys.stdout,这里为append函数,结果作为他的参数 for remotefile in all_files: if not self.is_dir(remotefile): #如果是文件,直接下载 self.retr_onefile(self.fname,localdir) else: #如果是目录,则递归下载 currentdir = os.path.join(localdir,self.fname) if not os.path.exists(currentdir):os.mkdir(currentdir) self.connection.cwd(self.fname)#cwd to next self.retr_allfiles(currentdir) self.connection.cwd('..') # if __name__=='__main__': #hongze ,h28v05 h = 28 v = 5 retrall= retrAll(h,v) nasa_web = 'ladsweb.nascom.nasa.gov' remotedir = '/allData/5/MYD11A1/'#温度所在的为MYD11A1特征。 retrall.config_transf_para(site = nasa_web,user = None,remote_dir = remotedir) retrall.run(transfer_action=lambda:retrall.retr_allfiles(retrall.localdir))
ftptools类:
#!/usr/bin/env python #-*-coding:utf-8 -*- '''ftptools:upload and download all files under an individuals of folder.''' import os,sys,time from mimetypes import guess_type from ftplib import FTP from getpass import getpass ftp_server = '159.226.73.210' ftp_username = 'zhigang' ftp_defaultdir = './ÏîÄ¿×é³ÉԱ˽È˿ռä/zgcao/test-python/' class ftptools: def getlocaldir(self): return (len(sys.argv)>1 and sys.argv[2]) or '.' def getpassword(self): return getpass('Password for %s on %s:' %(self.user,self.site)) def getcleanall(self): return input('Clean target dir first?(y/n)')[:1] in ['y','Y'] def config_transf_para(self,site=ftp_server,user = ftp_username,remote_dir = ftp_defaultdir): self.nonpassive = False self.site = site self.user = user self.remotedir = remote_dir self.password = self.getpassword() localdir = self.getlocaldir() if not os.path.exists(localdir):os.mkdir(localdir) self.localdir = localdir self.is_cleanall = self.getcleanall() def is_text_filetype(self,localfile): mimetype,encoding = guess_type(localfile) mimetype = mimetype or '?/?' maintype = mimetype.split('/')[0] is_text = False if maintype =='text' and encoding ==None:is_text = True return is_text def ftp_connect(self,verbose = True): conn = FTP(self.site) if verbose:print('Connected:',self.site) xxx = conn.login(self.user,self.password) if verbose: print(xxx) conn.cwd(self.remotedir) if self.nonpassive: conn.set_pasv(False) #print(conn.getwelcome()) self.connection = conn def clear_remotedir(self,verbose = True): count = 0 for item_file in self.connection.nlst(): if item_file not in ['.','..']: self.connection.delete(item_file) count = count+1 if verbose: print('Deleted: %s' % item_file) if verbose: print('%d files have been removed at %s on %s' %(count,time.asctime(),self.site)) def clear_localdir(self,verbose = True): count = 0 for item_file in os.listdir(self.localdir): full_filename = os.path.join(self.localdir,item_file) os.remove(full_filename) count = count+1 if verbose: print('Deleted: %s' % full_filename) if verbose: print('%d files have been removed at %s' %(count,time.asctime())) def retr_onefile(self,filename,localdir): txt_file = os.path.join(self.localdir,filename) #print(txt_file) if self.is_text_filetype(filename): txt_file_write = open(txt_file,'w',encoding = self.connection.encoding) callback = lambda line:txt_file_write.write(line+'\n') self.connection.retrlines('RETR '+filename,callback) txt_file_write.close() else: txt_file_write = open(txt_file,'wb') self.connection.retrbinary('RETR '+filename,txt_file_write.write) txt_file_write.close() def upload_onefile(self,filename,localdir): full_filename = os.path.join(localdir,filename) file_read = open(full_filename,'rb') self.connection.storbinary('STOR '+filename,file_read) file_read.close() def retr_files(self,verbose = True): for file in self.connection.nlst(): self.retr_onefile(file,self.localdir) if verbose: print(file,' has downloaded...') def upload_files(self,verbose = True): for file in os.listdir(self.localdir): self.upload_onefile(file,self.localdir) if verbose: print(file,' has uploaded...') def run(self,clear_target = lambda:None,transfer_action = lambda:None): self.ftp_connect() clear_target() print('Start downloading...,%s' % time.asctime()) transfer_action() self.connection.quit() if __name__=='__main__': ftp_tool = ftptools() task_mode = 'download' if len(sys.argv)>1: task_mode = sys.argv[1] ftp_tool.config_transf_para(site=ftp_server,user = ftp_username,remote_dir = ftp_defaultdir) if task_mode.lower()=='upload': ftp_tool.run(clear_target=ftp_tool.clear_remotedir,transfer_action=ftp_tool.upload_files) else: ftp_tool.run(clear_target=ftp_tool.clear_localdir,transfer_action=ftp_tool.retr_files) # os.chdir(r'E:\02 GitHub\learningpy\web\client') # os.system('python ftptools.py download D:\test-python')
运行效果截图:

浙公网安备 33010602011771号