import sys
import pygrib
import pandas as pd
from datetime import datetime, timedelta
from functools import reduce
import pytz
import numpy as np
import math
import os.path
import pdb
from glob import glob
DT_FORMAT = "%Y%m%d%H%M"
# Timezone: Asia/Shanghai
bjt = pytz.timezone(pytz.country_timezones['cn'][0])
# for plevel
LVS = np.array([1000, 925, 850, 700, 500, 300, 200])
VARS = ['z', 't', 'u', 'v', 'r','q']
# for surface
LVS_sfc = np.array([0])
VARS_sfc= ['10u','10v','2t','2d','sp','cape']
# INPUT_ROOT_DIR = '/liuna/ERA_Interium/plevel/plevel'
element = sys.argv[1]
year = sys.argv[2]
month = sys.argv[3]
day = sys.argv[4]
INPUT_ROOT_DIR = sys.argv[5] #'/mnt/e/'
OUTPUT_ROOT_DIR = sys.argv[6]
def datetime_rounding(in_dt, ceiling=False, round_hour=6):
"""Round input datetime to be on the hour.
:param in_dt: Input Datetime object
:param ceiling: If == True, return the ceiling value, else return floor value.
:param round_hour: The interval of hours
:return: rounded_dt
"""
dt_floor = datetime(in_dt.year, in_dt.month, in_dt.day)
# Delete time zone info to be compatible with subtraction by in_dt
in_dt = in_dt.replace(tzinfo=None)
delta_dt = in_dt - dt_floor
timedelta_unit = timedelta(seconds=round_hour * 3600)
if ceiling:
item2 = math.ceil(delta_dt / timedelta_unit)
else:
item2 = math.floor(delta_dt / timedelta_unit)
return dt_floor + item2 * timedelta_unit
def get_file_list(in_dt_start, in_dt_end, is_bjt=False):
"""
Get the DatetimeIndex within time window -24h ~ +2h.
:param in_dt: Input datetime, with format yyyymmddhh.
:param is_bjt: Whether input datetime is in Beijing Time Zone.
:return: time_list: DatetimeIndex of corresponding hours.
"""
in_dt_start = datetime.strptime(in_dt_start, DT_FORMAT)
in_dt_end = datetime.strptime(in_dt_end, DT_FORMAT)
if is_bjt:
# Set time zone to aware datetime object with timezone, then convert from BJT to UTC.
in_dt_start = in_dt_start.astimezone(tz=bjt).astimezone(tz=pytz.utc)
in_dt_end = in_dt_end.astimezone(tz=bjt).astimezone(tz=pytz.utc)
dt_start = in_dt_start - timedelta(hours=12) # 24
dt_end = in_dt_end + timedelta(hours=6) # 2
dt_start_rounded = datetime_rounding(dt_start)
dt_end_rounded = datetime_rounding(dt_end, ceiling=True)
time_list = pd.date_range(dt_start_rounded, dt_end_rounded, freq='6H')
return time_list
def get_file_list_from_datetimes(s_start, s_end, is_bjt=False):
"""
Get the combined Datetime list from individual `get_file_list`.
:param in_dt_list:
:param is_bjt: Whether input datetime is in Beijing Time Zone.
:return:
"""
def dt_index2list(dtindex):
dtindex = dtindex.tolist()
return [item.strftime(DT_FORMAT) for item in dtindex]
assert len(s_start) == len(s_end)
time_lists = [get_file_list(item0, item1, is_bjt) for item0, item1 in zip(s_start, s_end)]
if len(s_start) == 1:
dtindexall = time_lists[0]
else:
dtindexall = reduce(lambda x, y: x.union(y), time_lists)
return dtindexall
# return dt_index2list(dtindexall)
def grbmsg_resolve(msg):
"""Return a tuple of (values, lat, lon) according to the input grbmsg."""
from numpy import linspace
lat = linspace(msg['latitudeOfFirstGridPointInDegrees'],
msg['latitudeOfLastGridPointInDegrees'], msg['Nj'])
lon = linspace(msg['longitudeOfFirstGridPointInDegrees'] % 360,
msg['longitudeOfLastGridPointInDegrees'] % 360, msg['Ni'])
values = msg.values
return values, lat, lon
def grbmsg_subset_region(msg, la0=55.5, la1=15.75, lo0=72.75, lo1=136.5):
"""Subtracting a region according to lats and lons.
MESO: lat: 65-15, lon: 70-145
SCMOC: lat: 0-60, lon: 70-140
Thus - merged: lat: 15-60, lon: 70-140
Assert latitude decreases!
"""
from numpy import where
values, lat, lon = grbmsg_resolve(msg)
# Getting the indexes of the coordinates
# the first [0] gets the numpy array from the list
# the second [0] gets the value of the element in the numpy array
ind_la0 = where(lat == la0)[0][0]
ind_la1 = where(lat == la1)[0][0]
ind_lo0 = where(lon == lo0)[0][0]
ind_lo1 = where(lon == lo1)[0][0]
# print(ind_la0, ind_la1, ind_lo0, ind_lo1)
values = values[ind_la0:ind_la1 + 1, ind_lo0:ind_lo1 + 1]
lat = lat[ind_la0:ind_la1 + 1]
lon = lon[ind_lo0:ind_lo1 + 1]
# Set the new values
# Return a dict of parameters
msg['Ni'] = lon.size
msg['Nj'] = lat.size
msg['latitudeOfFirstGridPointInDegrees'] = la0
msg['latitudeOfLastGridPointInDegrees'] = la1
msg['longitudeOfFirstGridPointInDegrees'] = lo0
msg['longitudeOfLastGridPointInDegrees'] = lo1
msg['values'] = values
return msg
def grbmsg_convert(grbmsg):
"""lat lon limiting and output new data."""
return grbmsg_subset_region(grbmsg)
def grb_file_process_plevel(dt):
# To be modified based on folder structure on server.
# ERA5_hgt_pressure_200501010000.grib
filename_in = os.path.join(INPUT_ROOT_DIR, 'ERA5_'+element+'_pressure_{}.grib'.format(dt.strftime("%Y%m%d%H%M")))
grbs = pygrib.open(filename_in)
# Convert dt in UTC back to BJT again.
dt_utc = dt.tz_localize(pytz.utc)
dt_bjt = dt_utc.tz_convert(bjt)
#pdb.set_trace()
for item_grb in grbs:
# pdb.set_trace()
#print(item_grb['level'])
#print(item_grb['shortName'])
if (int(item_grb['level']) in LVS) and (item_grb['shortName'] in VARS):
grbmsg = grbmsg_convert(item_grb)
grbmsg_bin = grbmsg.tostring()
# pygrib write msg to file
dir_name = '{}/ECMF_ERA5/{}/{}/{}/{}/'.format(
OUTPUT_ROOT_DIR,
# 2019.2.12: modified dt to dt_bjt to make directory date and file date consistent.
dt_bjt.strftime("%Y"),
dt_bjt.strftime("%m"),
dt_bjt.strftime("%d"),
item_grb['shortName'].upper()
)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
filename_out = os.path.join(dir_name, 'NAFP_ECMF_{}0000_ERA5_{}_{}.grib1'.format(
dt_bjt.strftime("%Y%m%d%H"),
item_grb['shortName'].upper(),
item_grb['level']))
with open(filename_out, 'wb') as f:
f.write(grbmsg_bin)
def grb_file_process_sfc(dt):
# To be modified based on folder structure on server.
# ERA5_hgt_pressure_200501010000.grib
filename_in = os.path.join(INPUT_ROOT_DIR, 'ERA5_'+element+'_pressure_{}.grib'.format(dt.strftime("%Y%m%d%H%M")))
grbs = pygrib.open(filename_in)
# Convert dt in UTC back to BJT again.
dt_utc = dt.tz_localize(pytz.utc)
dt_bjt = dt_utc.tz_convert(bjt)
#pdb.set_trace()
for item_grb in grbs:
# pdb.set_trace()
#print(item_grb['level'])
#print(item_grb['shortName'])
if (item_grb['shortName'] in VARS_sfc):
grbmsg = grbmsg_convert(item_grb)
grbmsg_bin = grbmsg.tostring()
# pygrib write msg to file
dir_name = '{}/ECMF_ERA5/{}/{}/{}/{}/'.format(
OUTPUT_ROOT_DIR,
# 2019.2.12: modified dt to dt_bjt to make directory date and file date consistent.
dt_bjt.strftime("%Y"),
dt_bjt.strftime("%m"),
dt_bjt.strftime("%d"),
item_grb['shortName'].upper()
)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
filename_out = os.path.join(dir_name, 'NAFP_ECMF_{}0000_ERA5_{}_{}.grib1'.format(
dt_bjt.strftime("%Y%m%d%H"),
item_grb['shortName'].upper(),
item_grb['level']))
with open(filename_out, 'wb') as f:
f.write(grbmsg_bin)
def grb_file_process_print(dt):
# To be modified based on folder structure on server.
filename_in = os.path.join(INPUT_ROOT_DIR, 'ERA5_'+element+'_pressure_{}.grib'.format(dt.strftime("%Y%m%d%H%M")))
print('Open file {}'.format(filename_in))
# grbs = pygrib.open(filename_in)
# Convert dt in UTC back to BJT again.
# dt_bjt = dt.astimezone(tz=bjt).astimezone(tz=pytz.utc)
dt_utc = dt.tz_localize(pytz.utc)
dt_bjt = dt_utc.tz_convert(bjt)
out_filename = 'NAFP_ECMF_{}0000_ERA5.grib1'.format(dt_bjt.strftime("%Y%m%d%H"))
print('Output file name is: {}'.format(out_filename))
def s_start_end_gen(filelist_in):
for item in filelist_in:
print('Reading file {}'.format(item))
df = pd.read_csv(item)[['start_time', 'end_time']]
df = df.dropna()
df = df.astype(int)
df = df.astype(str)
yield df['start_time'], df['end_time']
def read_from_csv(filelist_in):
"""Read from filelist and concatenate the starting and ending time of weather events.
:param filelist_in: File list of event records.
:return: s_start, s_end: pandas Series of starting time and ending time of weather events.
"""
s_series = list(s_start_end_gen(filelist_in))
s_start = pd.concat([item[0] for item in s_series])
s_end = pd.concat([item[1] for item in s_series])
return s_start, s_end
def main():
#dtlist = get_file_list_from_datetimes(pd.Series(['199001020759']), pd.Series(['199001020900']), is_bjt=True)
#print(dtlist)
# grb_file_process(dtlist[0])
# 2019.02.12: No need to read date from file any more.
#file_list = glob('./example_records_csv/*.csv')
#s1, s2 = read_from_csv(file_list)
#s1 = s1[1:10]
#s2 = s2[1:10]
# 2019.02.12:
#print(s1)
#print(s2)
# for item in s1:
# try:
# dt1 = datetime.strptime(item, DT_FORMAT)
# except Exception as e:
# print('error occurred at {}'.format(item))
# print(repr(e))
#
# for item in s2:
# try:
# dt2 = datetime.strptime(item, DT_FORMAT)
# except Exception as e:
# print('error occurred at {}'.format(item))
# print(repr(e))
# regex_pattern = r'201[2-8][01]\d[0-3]\d[0-2]\d[0-5]\d'
# var = s1[~s1.str.match(regex_pattern)]
# var2 = s2[~s2.str.match(regex_pattern)]
# 2019.02.12: Generate time list from loops instead of file
#time_list = get_file_list_from_datetimes(s1, s2, is_bjt=True)
#time_list = pd.date_range("2011-12-30", "2012-01-05", freq="6H").to_pydatetime()
# DT_FORMAT = "%Y-%m-%d"
# in_dt_start = year + "-" + month + "-"+ day
# in_dt_start = datetime.strptime(in_dt_start, DT_FORMAT)
# in_dt_start_str = datetime.strftime(in_dt_start - timedelta(hours=24), DT_FORMAT)
# in_dt_end = datetime.strftime(in_dt_start + timedelta(hours=48), DT_FORMAT)
# month_0 = in_dt_start_str[5:7]
# date_0 = in_dt_start_str[-2:]
# month_1 = in_dt_end[5:7]
# date_1 = in_dt_end[-2:]
# time_list = pd.date_range(year + "-" + month_0 + "-"+ date_0, year + "-" + month_1 + "-" + date_1, freq="1H")
year1 = int(year) + 1
time_list = pd.date_range(year + "-" + month + "-"+ day, str(year1) + "-" + "01" + "-" + "01", freq="1H")
for item in time_list:
if os.path.exists(os.path.join(INPUT_ROOT_DIR, 'ERA5_'+element+'_pressure_{}.grib'.format(item.strftime("%Y%m%d%H%M")))):
if element == 'sfc':
grb_file_process_sfc(item)
else:
grb_file_process_plevel(item)
print('Processing {}'.format(item))
else:
print('WARNING: Grib file for {} does not exist!'.format(item))
print('ERA5_'+element+'_pressure_{}.grib'.format(item.strftime("%Y%m%d%H%M")))
if __name__ == '__main__':
main()