ftplib获取Nasa遥感数据产品

  ladsweb.nascom.nasa.gov的ftp网站提供MODIS的原始数据和产品数据的下载,通过FlashFXP软件可以实现批量传输,但这样下载存在一个弊端:无法批量下载指定区域的数据。MODIS的数据文件名中包含固定行列值,用它去匹配文件名,是完成批量下载的思路。  python中的ftplib提供ftp网站的连接以及数据的上传下载工作,借助于re模块完成文件名匹配,从而完成整个下载工作。参考Mark Lutz书中的一些代码设计ftptools类,而后派生出一个下载目录树文件的类,用以完成MODIS数据的下载。下面以温度产品下载为例叙述。

MODIS的温度产品为MOD11A1或者MYD11A1等,具体可以查询MODIS的产品说明。该产品位于FTP站点的alldata下的5目录下,具体的产品在FTP的分布,可以参考FTP根目录下的README,如此可以确定远程下载目录。另外,需要确定自己研究区域MODIS的行列号,方面匹配下载。(代码已上传Github,目前只是能初步运行,仍有许多不足,感兴趣的同仁可以一起完善:https://github.com/zgcao/retrmodis)

具体思路:

(1)配置连接信息,即:连接网站,远程目录,用户名,密码等,将该项工作封装为一个方法来完成:

def config_transf_para(self,site=ftp_server,user = ftp_username,remote_dir = ftp_defaultdir):
        self.nonpassive = False
        self.site = site
        self.user = user
        self.remotedir = remote_dir
        self.password = self.getpassword()
        
        localdir = self.getlocaldir()
        if not os.path.exists(localdir):os.mkdir(localdir)
        self.localdir = localdir
        
        self.is_cleanall = self.getcleanall()

(2) FTP服务连接,登录,切换目录,使用: FTP,login, cwd等方法完成;

(3)寻找文件名,进行文件名的匹配。

pattern = r'.*h'+str(self.h)+'v'+str(self.v).zfill(2)+'.*'
 re_compile = re.compile(pattern)
match = re.match(file)

(4)文件下载,介于文件夹中会包含文件夹,这里借助递归的思路,首先判断当前的文件是否为目录,如果是文件则直接下载,如果是目录,则进入后递归调用本函数,下载。

def is_dir(self,line):
        '''判断远程文件是否为目录
        dir列出来的是:dr-xr-xr-x    1 ftp      ftp             0 Jan 01  1970 AMS,第一个d代表为目录。
        '''
        parsed = line.split()
        permiss = parsed[0]
        self.fname = parsed[-1]#文件名或者目录名
        if permiss[0]=='d':
            return True
        else:
            return False
def retr_onefile(self,filename,localdir):
        '''重写下载函数,针对匹配modis行列号'''
        pattern = r'.*h'+str(self.h)+'v'+str(self.v).zfill(2)+'.*'
        re_compile = re.compile(pattern)
        if not re_compile.match(filename):return 
        
        txt_file = os.path.join(localdir,filename)
        #print(txt_file)
        if self.is_text_filetype(filename):            
            txt_file_write = open(txt_file,'w',encoding = self.connection.encoding)
            callback = lambda line:txt_file_write.write(line+'\n')
            cmd = 'RETR '+filename
            self.connection.retrlines(cmd,callback)
            txt_file_write.close()
            print(cmd,'    ',time.asctime())
        else:            
            txt_file_write = open(txt_file,'wb')
            cmd = 'RETR '+filename
            self.connection.retrbinary(cmd,txt_file_write.write)            
            txt_file_write.close()
            print(cmd,'    ',time.asctime())

(5)退出服务。

完整代码如下:

#!/usr/bin/env python
#-*-coding:utf-8 -*-

'''下载目录树文件'''
import os,sys,time
from ftptools import ftptools
from mimetypes import add_type
import re

class retrAll(ftptools):
    def __init__(self,h,v):
        self.fcount = self.dcount = 0
        self.h = h
        self.v = v
        #add_type()
        
    def getlocaldir(self):
        localdir = r'F:\MYD11A1'        
        return localdir
        
    def getpassword(self):
        return None
    def getcleanall(self):
        return False
        
    def retr_onefile(self,filename,localdir):
        '''重写下载函数,针对匹配modis行列号'''
        pattern = r'.*h'+str(self.h)+'v'+str(self.v).zfill(2)+'.*'
        re_compile = re.compile(pattern)
        if not re_compile.match(filename):return 
        
        txt_file = os.path.join(localdir,filename)
        #print(txt_file)
        if self.is_text_filetype(filename):            
            txt_file_write = open(txt_file,'w',encoding = self.connection.encoding)
            callback = lambda line:txt_file_write.write(line+'\n')
            cmd = 'RETR '+filename
            self.connection.retrlines(cmd,callback)
            txt_file_write.close()
            print(cmd,'    ',time.asctime())
        else:            
            txt_file_write = open(txt_file,'wb')
            cmd = 'RETR '+filename
            self.connection.retrbinary(cmd,txt_file_write.write)            
            txt_file_write.close()
            print(cmd,'    ',time.asctime())
    def is_dir(self,line):
        '''判断远程文件是否为目录
        dir列出来的是:dr-xr-xr-x    1 ftp      ftp             0 Jan 01  1970 AMS,第一个d代表为目录。
        '''
        parsed = line.split()
        permiss = parsed[0]
        self.fname = parsed[-1]#文件名或者目录名
        if permiss[0]=='d':
            return True
        else:
            return False
        
    def retr_allfiles(self,localdir):        
        all_files = []
        self.connection.dir(all_files.append)#callback默认为sys.stdout,这里为append函数,结果作为他的参数        
        for remotefile in all_files:        
            if not self.is_dir(remotefile):
            #如果是文件,直接下载
                self.retr_onefile(self.fname,localdir)            
            else:
            #如果是目录,则递归下载
                currentdir = os.path.join(localdir,self.fname)        
                if not os.path.exists(currentdir):os.mkdir(currentdir)
                self.connection.cwd(self.fname)#cwd to next                
                self.retr_allfiles(currentdir)
                self.connection.cwd('..')
                
            
        #
        
if __name__=='__main__':
    #hongze ,h28v05
    h = 28
    v = 5
    retrall= retrAll(h,v)
    nasa_web = 'ladsweb.nascom.nasa.gov'
    remotedir = '/allData/5/MYD11A1/'#温度所在的为MYD11A1特征。

    retrall.config_transf_para(site = nasa_web,user = None,remote_dir = remotedir)

    retrall.run(transfer_action=lambda:retrall.retr_allfiles(retrall.localdir))

ftptools类:

#!/usr/bin/env python
#-*-coding:utf-8 -*-

'''ftptools:upload and download all files under an individuals of folder.'''

import os,sys,time
from mimetypes import guess_type
from ftplib import FTP
from getpass import getpass

ftp_server = '159.226.73.210'
ftp_username = 'zhigang'
ftp_defaultdir = './ÏîÄ¿×é³ÉԱ˽È˿ռä/zgcao/test-python/'

class ftptools:

    def getlocaldir(self):
        return (len(sys.argv)>1 and sys.argv[2]) or '.'    
    def getpassword(self):
        return getpass('Password for %s on %s:' %(self.user,self.site))
    def getcleanall(self):
        return input('Clean target dir first?(y/n)')[:1] in ['y','Y']
        
    def config_transf_para(self,site=ftp_server,user = ftp_username,remote_dir = ftp_defaultdir):
        self.nonpassive = False
        self.site = site
        self.user = user
        self.remotedir = remote_dir
        self.password = self.getpassword()
        
        localdir = self.getlocaldir()
        if not os.path.exists(localdir):os.mkdir(localdir)
        self.localdir = localdir
        
        self.is_cleanall = self.getcleanall()
    
    def is_text_filetype(self,localfile):
        mimetype,encoding = guess_type(localfile)
        mimetype = mimetype or '?/?'
        maintype = mimetype.split('/')[0]
        is_text = False
        if maintype =='text' and encoding ==None:is_text = True
        return is_text
        
    def ftp_connect(self,verbose = True):
        conn = FTP(self.site)
        if verbose:print('Connected:',self.site)
        xxx = conn.login(self.user,self.password)
        if verbose: print(xxx)
        conn.cwd(self.remotedir)        
        if self.nonpassive: conn.set_pasv(False)
        #print(conn.getwelcome())
        self.connection = conn    
        
    def clear_remotedir(self,verbose = True):
        count = 0
        for item_file in self.connection.nlst():
            if item_file not in ['.','..']: self.connection.delete(item_file)
            count = count+1
            if verbose: print('Deleted: %s' % item_file)
        if verbose: print('%d files have been removed at %s on %s' %(count,time.asctime(),self.site))
        
        
    def clear_localdir(self,verbose = True):
        count = 0
        for item_file in os.listdir(self.localdir):
            full_filename = os.path.join(self.localdir,item_file)
            os.remove(full_filename)
            count = count+1
            if verbose: print('Deleted: %s' % full_filename)
        if verbose: print('%d files have been removed at %s' %(count,time.asctime()))
        
        
    def retr_onefile(self,filename,localdir):
        txt_file = os.path.join(self.localdir,filename)
        #print(txt_file)
        if self.is_text_filetype(filename):            
            txt_file_write = open(txt_file,'w',encoding = self.connection.encoding)
            callback = lambda line:txt_file_write.write(line+'\n')
            self.connection.retrlines('RETR '+filename,callback)
            txt_file_write.close()
        else:            
            txt_file_write = open(txt_file,'wb')
            self.connection.retrbinary('RETR '+filename,txt_file_write.write)
            txt_file_write.close()
    
    def upload_onefile(self,filename,localdir):
        full_filename = os.path.join(localdir,filename)
        file_read = open(full_filename,'rb')
        self.connection.storbinary('STOR '+filename,file_read)
        file_read.close()
        
    def retr_files(self,verbose = True):
        for file in self.connection.nlst():            
            self.retr_onefile(file,self.localdir)
            if verbose: print(file,' has downloaded...')
            
    def upload_files(self,verbose = True):
        for file in os.listdir(self.localdir):
            self.upload_onefile(file,self.localdir)
            if verbose: print(file,' has uploaded...')
            
    def run(self,clear_target = lambda:None,transfer_action = lambda:None):        
        self.ftp_connect()
        clear_target()    
        print('Start downloading...,%s' % time.asctime())        
        transfer_action()
        self.connection.quit()
        
if __name__=='__main__':

    ftp_tool = ftptools()
    task_mode = 'download'
        
    if len(sys.argv)>1:
        task_mode = sys.argv[1]
        
    ftp_tool.config_transf_para(site=ftp_server,user = ftp_username,remote_dir = ftp_defaultdir)
    if task_mode.lower()=='upload':            
        ftp_tool.run(clear_target=ftp_tool.clear_remotedir,transfer_action=ftp_tool.upload_files)            
    else:
        ftp_tool.run(clear_target=ftp_tool.clear_localdir,transfer_action=ftp_tool.retr_files)
        
        
        
    # os.chdir(r'E:\02 GitHub\learningpy\web\client')
    # os.system('python ftptools.py download D:\test-python')

运行效果截图:

 

image

posted on 2016-01-22 18:31  未济的Lakers  阅读(954)  评论(0)    收藏  举报

导航