H5裁切

步骤如下:

  1. 读取h5文件:获取其中的经纬度数据;
  2. 判断裁切范围:判断各个经纬度坐标点是否在用户输入的wkt范围内,并生成掩膜矩阵;
  3. 写入变量:根据掩膜矩阵写入变量,不同维度的变量写入方式有所差异。

读取h5文件

def get_h5_content(self, input_h5, wkt_range, lon_parm, lat_parm):
    def get_slope(input_h5, h5_data, lon_parm, lat_parm):
        h5_type = judge_h5_type(input_h5)
        # 特殊类型,特殊处理。变量 SCALE FACTOR 为 1,但是经纬度超出范围,故手动设置尺度因子
        if h5_type.startswith("H2B_SMR_L2A") or h5_type.startswith("H2B_SMR_L2B"):
            lon_slope, lat_slope = 1e-6, 1e-6
            return lon_slope, lat_slope
        else:
            lon_slope, lat_slope = 1, 1
            if "Slope" in h5_data[lon_parm].attrs:
                lon_slope = h5_data[lon_parm].attrs["Slope"]
            if "Slope" in h5_data[lat_parm].attrs:
                lat_slope = h5_data[lat_parm].attrs["Slope"]
            return lon_slope, lat_slope
    logger.info(f"开始读取h5文件: {input_h5}")
    h5_data = h5py.File(input_h5, 'r')
    # 获取尺度因子
    lon_slope, lat_slope = self.get_slope(input_h5, h5_data, lon_parm, lat_parm)
    # 获取到h5文件中的经度
    longitude = h5_data[lon_parm][:] * lon_slope
    # 获取到h5文件中的纬度
    latitude = h5_data[lat_parm][:] * lat_slope
    # 加载wkt为多边形
    polygon = wkt.loads(wkt_range)

判断裁切范围

续上述代码。读取h5文件和判断裁切范围应该拆开来写,这里合并到一起了。

# 获取边界
bounds_lon_lat = polygon.bounds
# 数据是否被包含
contain_flag = False
if len(latitude.shape) == 1:  # 经纬度为一维数据
    # 获取经纬度行列数
    m, n = latitude.size, longitude.size
    # 创建掩膜矩阵
    matrix = np.full((m, n), np.nan)
    logger.info(f"开始判断经纬度坐标是否在h5文件中 {m}×{n}")
    for i in range(0, m):
        for j in range(0, n):
            # 根据客户输入的坐标来判断坐标是否在h5文件中
            flag = whether_contain(longitude[j], latitude[i], bounds_lon_lat)
            if flag:
                matrix[i, j] = 1
                contain_flag = True
elif len(latitude.shape) == 2:
    # 获取经纬度行列数
    m, n = latitude.shape
    # 创建掩膜矩阵
    matrix = np.full((m, n), np.nan)
    logger.info(f"开始判断经纬度坐标是否在h5文件中: {m}×{n}")
    for i in range(0, m):
        for j in range(0, n):
            # 根据客户输入的坐标来判断坐标是否在h5文件中
            flag = whether_contain(longitude[i, j], latitude[i, j], bounds_lon_lat)
            if flag:
                matrix[i, j] = 1
                contain_flag = True
elif len(latitude.shape) == 3:
    m, n, p = latitude.shape
    # 创建掩膜矩阵
    matrix = np.full(latitude.shape, np.nan)
    logger.info(f"开始判断经纬度坐标是否在h5文件中: {m}×{n}×{p}")
    for k in range(p):
        for i in range(0, m):
            for j in range(0, n):
                # 根据客户输入的坐标来判断坐标是否在h5文件中
                flag = whether_contain(longitude[i, j, k], latitude[i, j, k], bounds_lon_lat)
                if flag:
                    matrix[i, j, k] = 1
                    contain_flag = True
else:
    matrix = []
logger.info("读取完毕")
return h5_data, matrix, contain_flag

写入变量

def write_new_h5(self, output_h5, h5_data, matrix, two_dimension_with_row_col, two_dimension_with_row,
                 two_dimension_with_col, three_dimension_with_row, unaltered_args):
    """将解析后的H5数据写入新的H5文件中"""
    def write_description(new_h5_data, origin_h5_data, key):
        # 写入内部变量描述信息
        for inside_attr in origin_h5_data[key].attrs:
            new_h5_data.attrs[inside_attr] = origin_h5_data[key].attrs[inside_attr]

    def set_nan2fill_value(origin_data, write_data):
        # 替换 Nan 为 fill_value or FillValue
        if 'fill_value' in origin_data.attrs and not np.isnan(origin_data.attrs['fill_value']):
            new_write_data = np.where(np.isnan(write_data), origin_data.attrs['fill_value'], write_data)
            return new_write_data
        if 'FillValue' in origin_data.attrs and not np.isnan(origin_data.attrs['FillValue']):
            new_write_data = np.where(np.isnan(write_data), origin_data.attrs['FillValue'], write_data)
            return new_write_data
        return write_data
    
    logger.info(f"开始写入: {output_h5}")
    new_h5_file = h5py.File(output_h5, 'w')
    # 判断有数据的行和列
    t = np.where(matrix == 1)
    min_hang, max_hang, min_lie, max_lie = min(t[0]), max(t[0]), min(t[1]), max(t[1])
    # 写入数据
    for key_1 in h5_data.keys():
        temp_1 = h5_data[key_1]
        try:  # 数据深度: 2层
            for key_2 in temp_1.keys():
                data_1 = h5_data[f'{key_1}/{key_2}']
                logger.debug(f'{key_1}/{key_2}, {data_1.shape}')
                if key_2 in two_dimension_with_row_col:
                    data_new = data_1 * matrix
                    data_new_clip = data_new[min_hang:max_hang + 1, min_lie:max_lie + 1]
                    write_data = self.set_nan2fill_value(data_1, data_new_clip)
                    new_h5_data = new_h5_file.create_dataset(f'{key_1}/{key_2}', data=write_data, dtype=data_1.dtype)
                    self.write_description(new_h5_data, h5_data, f'{key_1}/{key_2}')

                elif key_2 in two_dimension_with_row:
                    write_data = data_1[min_hang:max_hang + 1, :] if len(data_1.shape) == 2 else data_1[min_hang:max_hang + 1]
                    new_h5_data = new_h5_file.create_dataset(f'{key_1}/{key_2}', data=write_data, dtype=data_1.dtype)
                    self.write_description(new_h5_data, h5_data, f'{key_1}/{key_2}')

                elif key_2 in two_dimension_with_col:
                    write_data = data_1[min_lie:max_lie + 1, :] if len(data_1.shape) == 2 else data_1[min_lie:max_lie + 1]
                    new_h5_data = new_h5_file.create_dataset(f'{key_1}/{key_2}', data=write_data, dtype=data_1.dtype)
                    self.write_description(new_h5_data, h5_data, f'{key_1}/{key_2}')

                elif key_2 in three_dimension_with_row:
                    write_data = data_1[:, min_hang:max_hang + 1, :]
                    new_h5_data = new_h5_file.create_dataset(f'{key_1}/{key_2}', data=write_data, dtype=data_1.dtype)
                    self.write_description(new_h5_data, h5_data, f'{key_1}/{key_2}')

                elif key_2 in unaltered_args:
                    new_h5_data = new_h5_file.create_dataset(f'{key_1}/{key_2}', data=data_1, dtype=data_1.dtype)
                    self.write_description(new_h5_data, h5_data, f'{key_1}/{key_2}')

                else:
                    logger.warning(f"参数未写入: {key_2}")
        except AttributeError as e:  # 数据深度: 1层
            data_1 = temp_1
            logger.debug(f"{key_1}, {data_1.shape}")
            if key_1 in two_dimension_with_row_col:
                data_new = data_1 * matrix
                data_new_clip = data_new[min_hang:max_hang + 1, min_lie:max_lie + 1]
                write_data = self.set_nan2fill_value(data_1, data_new_clip)
                new_h5_data = new_h5_file.create_dataset(key_1, data=write_data, dtype=data_1.dtype)
                self.write_description(new_h5_data, h5_data, key_1)

            elif key_1 in two_dimension_with_row:
                write_data = data_1[min_hang:max_hang + 1, :] if len(data_1.shape) == 2 else data_1[min_hang:max_hang + 1]
                new_h5_data = new_h5_file.create_dataset(key_1, data=write_data, dtype=data_1.dtype)
                self.write_description(new_h5_data, h5_data, key_1)

            elif key_1 in two_dimension_with_col:
                write_data = data_1[min_lie:max_lie + 1, :] if len(data_1.shape) == 2 else data_1[min_lie:max_lie + 1]
                new_h5_data = new_h5_file.create_dataset(key_1, data=write_data, dtype=data_1.dtype)
                self.write_description(new_h5_data, h5_data, key_1)

            elif key_1 in three_dimension_with_row:
                write_data = np.full(data_1.shape, np.nan)
                for k in range(data_1.shape[2]):
                    write_data[:, :, k] = data_1[:, :, k] * matrix
                write_data = self.set_nan2fill_value(data_1, write_data)
                new_h5_data = new_h5_file.create_dataset(key_1, data=write_data, dtype=data_1.dtype)
                self.write_description(new_h5_data, h5_data, key_1)

            elif key_1 in unaltered_args:
                new_h5_data = new_h5_file.create_dataset(key_1, data=data_1, dtype=data_1.dtype)
                self.write_description(new_h5_data, h5_data, key_1)

            else:
                logger.warning(f"参数未写入: {key_1}")

    # 写入外部变量描述信息
    for key_1 in h5_data.keys():
        for outside_attr in h5_data[key_1].attrs:
            new_h5_file[key_1].attrs[outside_attr] = h5_data[key_1].attrs[outside_attr]

    logger.info("写入完毕")
    # 关闭h5文件
    h5_data.close()
    new_h5_file.close()

参考文章

  1. HDF5 数据文件简介 - 知乎 (zhihu.com)
  2. Python 数据处理(二十)—— HDF5 基础 - 知乎 (zhihu.com)
  3. 【Python 】Python 读取HDF5文件_python读取hdf5文件-CSDN博客
  4. Quick Start Guide — h5py 3.10.0 documentation
  5. HDF5基本使用方法 - 宁静是一种习惯 - 博客园 (cnblogs.com)
  6. 如何使用Python和h5py读取HDF5属性(元数据-腾讯云开发者社区-腾讯云 (tencent.com)
  7. python - 无法为 cartopy linux 安装 Proj 8.0.0 - IT工具网 (coder.work)
  8. 安装 — PROJ 9.1.0 文档 (osgeo.cn)
  9. python gdal geopandas basemap cartopy安装_python卸载gdal-CSDN博客
  10. GitHub - h5py/h5py: HDF5 for Python -- The h5py package is a Pythonic interface to the HDF5 binary data format.
  11. 关于xarray无法提取nc文件的变量数据,出现NetCDF: HDF error的解决方案 - 知乎 (zhihu.com)
posted @ 2024-01-23 09:04  Alphapy  阅读(25)  评论(0)    收藏  举报