H5裁切
步骤如下:
- 读取h5文件:获取其中的经纬度数据;
- 判断裁切范围:判断各个经纬度坐标点是否在用户输入的wkt范围内,并生成掩膜矩阵;
- 写入变量:根据掩膜矩阵写入变量,不同维度的变量写入方式有所差异。
读取h5文件
def get_h5_content(self, input_h5, wkt_range, lon_parm, lat_parm):
def get_slope(input_h5, h5_data, lon_parm, lat_parm):
h5_type = judge_h5_type(input_h5)
# 特殊类型,特殊处理。变量 SCALE FACTOR 为 1,但是经纬度超出范围,故手动设置尺度因子
if h5_type.startswith("H2B_SMR_L2A") or h5_type.startswith("H2B_SMR_L2B"):
lon_slope, lat_slope = 1e-6, 1e-6
return lon_slope, lat_slope
else:
lon_slope, lat_slope = 1, 1
if "Slope" in h5_data[lon_parm].attrs:
lon_slope = h5_data[lon_parm].attrs["Slope"]
if "Slope" in h5_data[lat_parm].attrs:
lat_slope = h5_data[lat_parm].attrs["Slope"]
return lon_slope, lat_slope
logger.info(f"开始读取h5文件: {input_h5}")
h5_data = h5py.File(input_h5, 'r')
# 获取尺度因子
lon_slope, lat_slope = self.get_slope(input_h5, h5_data, lon_parm, lat_parm)
# 获取到h5文件中的经度
longitude = h5_data[lon_parm][:] * lon_slope
# 获取到h5文件中的纬度
latitude = h5_data[lat_parm][:] * lat_slope
# 加载wkt为多边形
polygon = wkt.loads(wkt_range)
判断裁切范围
续上述代码。读取h5文件和判断裁切范围应该拆开来写,这里合并到一起了。
# 获取边界
bounds_lon_lat = polygon.bounds
# 数据是否被包含
contain_flag = False
if len(latitude.shape) == 1: # 经纬度为一维数据
# 获取经纬度行列数
m, n = latitude.size, longitude.size
# 创建掩膜矩阵
matrix = np.full((m, n), np.nan)
logger.info(f"开始判断经纬度坐标是否在h5文件中 {m}×{n}")
for i in range(0, m):
for j in range(0, n):
# 根据客户输入的坐标来判断坐标是否在h5文件中
flag = whether_contain(longitude[j], latitude[i], bounds_lon_lat)
if flag:
matrix[i, j] = 1
contain_flag = True
elif len(latitude.shape) == 2:
# 获取经纬度行列数
m, n = latitude.shape
# 创建掩膜矩阵
matrix = np.full((m, n), np.nan)
logger.info(f"开始判断经纬度坐标是否在h5文件中: {m}×{n}")
for i in range(0, m):
for j in range(0, n):
# 根据客户输入的坐标来判断坐标是否在h5文件中
flag = whether_contain(longitude[i, j], latitude[i, j], bounds_lon_lat)
if flag:
matrix[i, j] = 1
contain_flag = True
elif len(latitude.shape) == 3:
m, n, p = latitude.shape
# 创建掩膜矩阵
matrix = np.full(latitude.shape, np.nan)
logger.info(f"开始判断经纬度坐标是否在h5文件中: {m}×{n}×{p}")
for k in range(p):
for i in range(0, m):
for j in range(0, n):
# 根据客户输入的坐标来判断坐标是否在h5文件中
flag = whether_contain(longitude[i, j, k], latitude[i, j, k], bounds_lon_lat)
if flag:
matrix[i, j, k] = 1
contain_flag = True
else:
matrix = []
logger.info("读取完毕")
return h5_data, matrix, contain_flag
写入变量
def write_new_h5(self, output_h5, h5_data, matrix, two_dimension_with_row_col, two_dimension_with_row,
two_dimension_with_col, three_dimension_with_row, unaltered_args):
"""将解析后的H5数据写入新的H5文件中"""
def write_description(new_h5_data, origin_h5_data, key):
# 写入内部变量描述信息
for inside_attr in origin_h5_data[key].attrs:
new_h5_data.attrs[inside_attr] = origin_h5_data[key].attrs[inside_attr]
def set_nan2fill_value(origin_data, write_data):
# 替换 Nan 为 fill_value or FillValue
if 'fill_value' in origin_data.attrs and not np.isnan(origin_data.attrs['fill_value']):
new_write_data = np.where(np.isnan(write_data), origin_data.attrs['fill_value'], write_data)
return new_write_data
if 'FillValue' in origin_data.attrs and not np.isnan(origin_data.attrs['FillValue']):
new_write_data = np.where(np.isnan(write_data), origin_data.attrs['FillValue'], write_data)
return new_write_data
return write_data
logger.info(f"开始写入: {output_h5}")
new_h5_file = h5py.File(output_h5, 'w')
# 判断有数据的行和列
t = np.where(matrix == 1)
min_hang, max_hang, min_lie, max_lie = min(t[0]), max(t[0]), min(t[1]), max(t[1])
# 写入数据
for key_1 in h5_data.keys():
temp_1 = h5_data[key_1]
try: # 数据深度: 2层
for key_2 in temp_1.keys():
data_1 = h5_data[f'{key_1}/{key_2}']
logger.debug(f'{key_1}/{key_2}, {data_1.shape}')
if key_2 in two_dimension_with_row_col:
data_new = data_1 * matrix
data_new_clip = data_new[min_hang:max_hang + 1, min_lie:max_lie + 1]
write_data = self.set_nan2fill_value(data_1, data_new_clip)
new_h5_data = new_h5_file.create_dataset(f'{key_1}/{key_2}', data=write_data, dtype=data_1.dtype)
self.write_description(new_h5_data, h5_data, f'{key_1}/{key_2}')
elif key_2 in two_dimension_with_row:
write_data = data_1[min_hang:max_hang + 1, :] if len(data_1.shape) == 2 else data_1[min_hang:max_hang + 1]
new_h5_data = new_h5_file.create_dataset(f'{key_1}/{key_2}', data=write_data, dtype=data_1.dtype)
self.write_description(new_h5_data, h5_data, f'{key_1}/{key_2}')
elif key_2 in two_dimension_with_col:
write_data = data_1[min_lie:max_lie + 1, :] if len(data_1.shape) == 2 else data_1[min_lie:max_lie + 1]
new_h5_data = new_h5_file.create_dataset(f'{key_1}/{key_2}', data=write_data, dtype=data_1.dtype)
self.write_description(new_h5_data, h5_data, f'{key_1}/{key_2}')
elif key_2 in three_dimension_with_row:
write_data = data_1[:, min_hang:max_hang + 1, :]
new_h5_data = new_h5_file.create_dataset(f'{key_1}/{key_2}', data=write_data, dtype=data_1.dtype)
self.write_description(new_h5_data, h5_data, f'{key_1}/{key_2}')
elif key_2 in unaltered_args:
new_h5_data = new_h5_file.create_dataset(f'{key_1}/{key_2}', data=data_1, dtype=data_1.dtype)
self.write_description(new_h5_data, h5_data, f'{key_1}/{key_2}')
else:
logger.warning(f"参数未写入: {key_2}")
except AttributeError as e: # 数据深度: 1层
data_1 = temp_1
logger.debug(f"{key_1}, {data_1.shape}")
if key_1 in two_dimension_with_row_col:
data_new = data_1 * matrix
data_new_clip = data_new[min_hang:max_hang + 1, min_lie:max_lie + 1]
write_data = self.set_nan2fill_value(data_1, data_new_clip)
new_h5_data = new_h5_file.create_dataset(key_1, data=write_data, dtype=data_1.dtype)
self.write_description(new_h5_data, h5_data, key_1)
elif key_1 in two_dimension_with_row:
write_data = data_1[min_hang:max_hang + 1, :] if len(data_1.shape) == 2 else data_1[min_hang:max_hang + 1]
new_h5_data = new_h5_file.create_dataset(key_1, data=write_data, dtype=data_1.dtype)
self.write_description(new_h5_data, h5_data, key_1)
elif key_1 in two_dimension_with_col:
write_data = data_1[min_lie:max_lie + 1, :] if len(data_1.shape) == 2 else data_1[min_lie:max_lie + 1]
new_h5_data = new_h5_file.create_dataset(key_1, data=write_data, dtype=data_1.dtype)
self.write_description(new_h5_data, h5_data, key_1)
elif key_1 in three_dimension_with_row:
write_data = np.full(data_1.shape, np.nan)
for k in range(data_1.shape[2]):
write_data[:, :, k] = data_1[:, :, k] * matrix
write_data = self.set_nan2fill_value(data_1, write_data)
new_h5_data = new_h5_file.create_dataset(key_1, data=write_data, dtype=data_1.dtype)
self.write_description(new_h5_data, h5_data, key_1)
elif key_1 in unaltered_args:
new_h5_data = new_h5_file.create_dataset(key_1, data=data_1, dtype=data_1.dtype)
self.write_description(new_h5_data, h5_data, key_1)
else:
logger.warning(f"参数未写入: {key_1}")
# 写入外部变量描述信息
for key_1 in h5_data.keys():
for outside_attr in h5_data[key_1].attrs:
new_h5_file[key_1].attrs[outside_attr] = h5_data[key_1].attrs[outside_attr]
logger.info("写入完毕")
# 关闭h5文件
h5_data.close()
new_h5_file.close()
参考文章
- HDF5 数据文件简介 - 知乎 (zhihu.com)
- Python 数据处理(二十)—— HDF5 基础 - 知乎 (zhihu.com)
- 【Python 】Python 读取HDF5文件_python读取hdf5文件-CSDN博客
- Quick Start Guide — h5py 3.10.0 documentation
- HDF5基本使用方法 - 宁静是一种习惯 - 博客园 (cnblogs.com)
- 如何使用Python和h5py读取HDF5属性(元数据-腾讯云开发者社区-腾讯云 (tencent.com)
- python - 无法为 cartopy linux 安装 Proj 8.0.0 - IT工具网 (coder.work)
- 安装 — PROJ 9.1.0 文档 (osgeo.cn)
- python gdal geopandas basemap cartopy安装_python卸载gdal-CSDN博客
- GitHub - h5py/h5py: HDF5 for Python -- The h5py package is a Pythonic interface to the HDF5 binary data format.
- 关于xarray无法提取nc文件的变量数据,出现NetCDF: HDF error的解决方案 - 知乎 (zhihu.com)

浙公网安备 33010602011771号