利用pygrib读写grib2文件

import sys
import pygrib
import pandas as pd
from datetime import datetime, timedelta
from functools import reduce
import pytz
import numpy as np
import math
import os.path
import pdb
from glob import glob


DT_FORMAT = "%Y%m%d%H%M"
# Timezone: Asia/Shanghai
bjt = pytz.timezone(pytz.country_timezones['cn'][0])
# for plevel
LVS = np.array([1000, 925, 850, 700, 500, 300, 200])
VARS = ['z', 't', 'u', 'v', 'r','q']
# for surface 
LVS_sfc = np.array([0])
VARS_sfc= ['10u','10v','2t','2d','sp','cape']
# INPUT_ROOT_DIR = '/liuna/ERA_Interium/plevel/plevel'
element = sys.argv[1]
year = sys.argv[2]
month = sys.argv[3]
day   = sys.argv[4]

INPUT_ROOT_DIR = sys.argv[5] #'/mnt/e/'
OUTPUT_ROOT_DIR = sys.argv[6]


def datetime_rounding(in_dt, ceiling=False, round_hour=6):
    """Round input datetime to be on the hour.

    :param in_dt: Input Datetime object
    :param ceiling: If == True, return the ceiling value, else return floor value.
    :param round_hour: The interval of hours
    :return: rounded_dt
    """
    dt_floor = datetime(in_dt.year, in_dt.month, in_dt.day)
    # Delete time zone info to be compatible with subtraction by in_dt
    in_dt = in_dt.replace(tzinfo=None)
    delta_dt = in_dt - dt_floor
    timedelta_unit = timedelta(seconds=round_hour * 3600)
    if ceiling:
        item2 = math.ceil(delta_dt / timedelta_unit)
    else:
        item2 = math.floor(delta_dt / timedelta_unit)
    return dt_floor + item2 * timedelta_unit


def get_file_list(in_dt_start, in_dt_end, is_bjt=False):
    """
    Get the DatetimeIndex within time window -24h ~ +2h.

    :param in_dt: Input datetime, with format yyyymmddhh.
    :param is_bjt: Whether input datetime is in Beijing Time Zone.
    :return: time_list: DatetimeIndex of corresponding hours.
    """
    in_dt_start = datetime.strptime(in_dt_start, DT_FORMAT)
    in_dt_end = datetime.strptime(in_dt_end, DT_FORMAT)
    if is_bjt:
        # Set time zone to aware datetime object with timezone, then convert from BJT to UTC.
        in_dt_start = in_dt_start.astimezone(tz=bjt).astimezone(tz=pytz.utc)
        in_dt_end = in_dt_end.astimezone(tz=bjt).astimezone(tz=pytz.utc)

    dt_start = in_dt_start - timedelta(hours=12)  # 24
    dt_end = in_dt_end + timedelta(hours=6)  # 2

    dt_start_rounded = datetime_rounding(dt_start)
    dt_end_rounded = datetime_rounding(dt_end, ceiling=True)
    time_list = pd.date_range(dt_start_rounded, dt_end_rounded, freq='6H')
    return time_list


def get_file_list_from_datetimes(s_start, s_end, is_bjt=False):
    """
    Get the combined Datetime list from individual `get_file_list`.

    :param in_dt_list:
    :param is_bjt: Whether input datetime is in Beijing Time Zone.
    :return:
    """
    def dt_index2list(dtindex):
        dtindex = dtindex.tolist()
        return [item.strftime(DT_FORMAT) for item in dtindex]

    assert len(s_start) == len(s_end)
    time_lists = [get_file_list(item0, item1, is_bjt) for item0, item1 in zip(s_start, s_end)]

    if len(s_start) == 1:
        dtindexall = time_lists[0]
    else:
        dtindexall = reduce(lambda x, y: x.union(y), time_lists)

    return dtindexall
    # return dt_index2list(dtindexall)


def grbmsg_resolve(msg):
    """Return a tuple of (values, lat, lon) according to the input grbmsg."""
    from numpy import linspace
    lat = linspace(msg['latitudeOfFirstGridPointInDegrees'],
                   msg['latitudeOfLastGridPointInDegrees'], msg['Nj'])
    lon = linspace(msg['longitudeOfFirstGridPointInDegrees'] % 360,
                   msg['longitudeOfLastGridPointInDegrees'] % 360, msg['Ni'])
    values = msg.values
    return values, lat, lon


def grbmsg_subset_region(msg, la0=55.5, la1=15.75, lo0=72.75, lo1=136.5):
    """Subtracting a region according to lats and lons.
    MESO: lat: 65-15, lon: 70-145
    SCMOC: lat: 0-60, lon: 70-140
    Thus - merged: lat: 15-60, lon: 70-140

    Assert latitude decreases!
    """
    from numpy import where
    values, lat, lon = grbmsg_resolve(msg)
    # Getting the indexes of the coordinates
    # the first [0] gets the numpy array from the list
    # the second [0] gets the value of the element in the numpy array
    ind_la0 = where(lat == la0)[0][0]
    ind_la1 = where(lat == la1)[0][0]
    ind_lo0 = where(lon == lo0)[0][0]
    ind_lo1 = where(lon == lo1)[0][0]
    # print(ind_la0, ind_la1, ind_lo0, ind_lo1)
    values = values[ind_la0:ind_la1 + 1, ind_lo0:ind_lo1 + 1]
    lat = lat[ind_la0:ind_la1 + 1]
    lon = lon[ind_lo0:ind_lo1 + 1]

    # Set the new values
    # Return a dict of parameters
    msg['Ni'] = lon.size
    msg['Nj'] = lat.size
    msg['latitudeOfFirstGridPointInDegrees'] = la0
    msg['latitudeOfLastGridPointInDegrees'] = la1
    msg['longitudeOfFirstGridPointInDegrees'] = lo0
    msg['longitudeOfLastGridPointInDegrees'] = lo1
    msg['values'] = values

    return msg


def grbmsg_convert(grbmsg):
    """lat lon limiting and output new data."""
    return grbmsg_subset_region(grbmsg)


def grb_file_process_plevel(dt):
    # To be modified based on folder structure on server.
    # ERA5_hgt_pressure_200501010000.grib
    filename_in = os.path.join(INPUT_ROOT_DIR, 'ERA5_'+element+'_pressure_{}.grib'.format(dt.strftime("%Y%m%d%H%M")))
    grbs = pygrib.open(filename_in) 

    # Convert dt in UTC back to BJT again.
    dt_utc = dt.tz_localize(pytz.utc)
    dt_bjt = dt_utc.tz_convert(bjt)

    #pdb.set_trace()
    for item_grb in grbs:
        # pdb.set_trace()
        #print(item_grb['level'])
        #print(item_grb['shortName'])
        if (int(item_grb['level']) in LVS) and (item_grb['shortName'] in VARS):
            grbmsg = grbmsg_convert(item_grb)
            grbmsg_bin = grbmsg.tostring()

            # pygrib write msg to file
            dir_name = '{}/ECMF_ERA5/{}/{}/{}/{}/'.format(
                OUTPUT_ROOT_DIR,
                # 2019.2.12: modified dt to dt_bjt to make directory date and file date consistent.
                dt_bjt.strftime("%Y"),
                dt_bjt.strftime("%m"),
                dt_bjt.strftime("%d"),
                item_grb['shortName'].upper()
            )
            if not os.path.exists(dir_name):
                os.makedirs(dir_name)
            filename_out = os.path.join(dir_name, 'NAFP_ECMF_{}0000_ERA5_{}_{}.grib1'.format(
                dt_bjt.strftime("%Y%m%d%H"),
                item_grb['shortName'].upper(),
                item_grb['level']))
            with open(filename_out, 'wb') as f:
                f.write(grbmsg_bin)

def grb_file_process_sfc(dt):
    # To be modified based on folder structure on server.
    # ERA5_hgt_pressure_200501010000.grib
    filename_in = os.path.join(INPUT_ROOT_DIR, 'ERA5_'+element+'_pressure_{}.grib'.format(dt.strftime("%Y%m%d%H%M")))
    grbs = pygrib.open(filename_in) 

    # Convert dt in UTC back to BJT again.
    dt_utc = dt.tz_localize(pytz.utc)
    dt_bjt = dt_utc.tz_convert(bjt)

    #pdb.set_trace()
    for item_grb in grbs:
        # pdb.set_trace()
        #print(item_grb['level'])
        #print(item_grb['shortName'])
        if (item_grb['shortName'] in VARS_sfc):
            grbmsg = grbmsg_convert(item_grb)
            grbmsg_bin = grbmsg.tostring()

            # pygrib write msg to file
            dir_name = '{}/ECMF_ERA5/{}/{}/{}/{}/'.format(
                OUTPUT_ROOT_DIR,
                # 2019.2.12: modified dt to dt_bjt to make directory date and file date consistent.
                dt_bjt.strftime("%Y"),
                dt_bjt.strftime("%m"),
                dt_bjt.strftime("%d"),
                item_grb['shortName'].upper()
            )
            if not os.path.exists(dir_name):
                os.makedirs(dir_name)
            filename_out = os.path.join(dir_name, 'NAFP_ECMF_{}0000_ERA5_{}_{}.grib1'.format(
                dt_bjt.strftime("%Y%m%d%H"),
                item_grb['shortName'].upper(),
                item_grb['level']))
            with open(filename_out, 'wb') as f:
                f.write(grbmsg_bin)


def grb_file_process_print(dt):
    # To be modified based on folder structure on server.
    filename_in = os.path.join(INPUT_ROOT_DIR, 'ERA5_'+element+'_pressure_{}.grib'.format(dt.strftime("%Y%m%d%H%M")))
    print('Open file {}'.format(filename_in))
    # grbs = pygrib.open(filename_in)

    # Convert dt in UTC back to BJT again.
    # dt_bjt = dt.astimezone(tz=bjt).astimezone(tz=pytz.utc)
    dt_utc = dt.tz_localize(pytz.utc)
    dt_bjt = dt_utc.tz_convert(bjt)
    out_filename = 'NAFP_ECMF_{}0000_ERA5.grib1'.format(dt_bjt.strftime("%Y%m%d%H"))
    print('Output file name is: {}'.format(out_filename))
 

def s_start_end_gen(filelist_in):
    for item in filelist_in:
        print('Reading file {}'.format(item))
        df = pd.read_csv(item)[['start_time', 'end_time']]
        df = df.dropna()
        df = df.astype(int)
        df = df.astype(str)
        yield df['start_time'], df['end_time']


def read_from_csv(filelist_in):
    """Read from filelist and concatenate the starting and ending time of weather events.

    :param filelist_in: File list of event records.
    :return: s_start, s_end: pandas Series of starting time and ending time of weather events.
    """

    s_series = list(s_start_end_gen(filelist_in))
    s_start = pd.concat([item[0] for item in s_series])
    s_end = pd.concat([item[1] for item in s_series])

    return s_start, s_end


def main():
    #dtlist = get_file_list_from_datetimes(pd.Series(['199001020759']), pd.Series(['199001020900']), is_bjt=True)
    #print(dtlist)
    # grb_file_process(dtlist[0])

    # 2019.02.12: No need to read date from file any more.
    #file_list = glob('./example_records_csv/*.csv')
    #s1, s2 = read_from_csv(file_list)
    
    #s1 = s1[1:10]
    #s2 = s2[1:10]
    
    # 2019.02.12:
    #print(s1)
    #print(s2)
    
    # for item in s1:
    #     try:
    #         dt1 = datetime.strptime(item, DT_FORMAT)
    #     except Exception as e:
    #         print('error occurred at {}'.format(item))
    #         print(repr(e))
    #
    # for item in s2:
    #     try:
    #         dt2 = datetime.strptime(item, DT_FORMAT)
    #     except Exception as e:
    #         print('error occurred at {}'.format(item))
    #         print(repr(e))

    # regex_pattern = r'201[2-8][01]\d[0-3]\d[0-2]\d[0-5]\d'
    # var = s1[~s1.str.match(regex_pattern)]
    # var2 = s2[~s2.str.match(regex_pattern)]

    # 2019.02.12: Generate time list from loops instead of file
    #time_list = get_file_list_from_datetimes(s1, s2, is_bjt=True)
    
    #time_list = pd.date_range("2011-12-30", "2012-01-05", freq="6H").to_pydatetime()
    # DT_FORMAT = "%Y-%m-%d"
    # in_dt_start = year + "-" + month + "-"+ day
    # in_dt_start = datetime.strptime(in_dt_start, DT_FORMAT)
    # in_dt_start_str = datetime.strftime(in_dt_start - timedelta(hours=24), DT_FORMAT)
    # in_dt_end   = datetime.strftime(in_dt_start + timedelta(hours=48), DT_FORMAT)

    # month_0 = in_dt_start_str[5:7]
    # date_0  = in_dt_start_str[-2:]

    # month_1 = in_dt_end[5:7]
    # date_1  = in_dt_end[-2:] 
    # time_list = pd.date_range(year + "-" + month_0 + "-"+ date_0, year + "-" + month_1 + "-" + date_1, freq="1H")

    year1 = int(year) + 1  
    time_list = pd.date_range(year + "-" + month + "-"+ day, str(year1) + "-" + "01" + "-" + "01", freq="1H")
    for item in time_list:
        if os.path.exists(os.path.join(INPUT_ROOT_DIR, 'ERA5_'+element+'_pressure_{}.grib'.format(item.strftime("%Y%m%d%H%M")))):
            if element == 'sfc':
                grb_file_process_sfc(item)
            else:
                grb_file_process_plevel(item)
            print('Processing {}'.format(item))
        else:
            print('WARNING: Grib file for {} does not exist!'.format(item))
            print('ERA5_'+element+'_pressure_{}.grib'.format(item.strftime("%Y%m%d%H%M")))




if __name__ == '__main__':
    main()

 

posted @ 2020-03-27 13:16  Littlefish-  阅读(0)  评论(0)    收藏  举报
Document