GDAL—栅格数据按瓦片读取、虚拟文件创建、VRT创建、进程和线程处理

1.按瓦片读取

from osgeo import gdal
import numpy as np

ds = gdal.Open(r"E:\DroughtAnalysis\data\SPEI_raster\gdal_spei12\large_raster.tif")
band = ds.GetRasterBand(1)

# 获取瓦片大小(或自定义分块大小)
block_xsize, block_ysize = band.GetBlockSize()  # 非横竖瓦片个数,是各个瓦片的行列大小
cols, rows = ds.RasterXSize, ds.RasterYSize

total_sum = 0.0
total_pixels = 0

# 按瓦片遍历,从左到右,从上到下
for y in range(0, rows, block_ysize):
    y_size = min(block_ysize, rows - y)
    for x in range(0, cols, block_xsize):
        x_size = min(block_xsize, cols - x)

        # 只读当前瓦片
        tile = band.ReadAsArray(x, y, x_size, y_size)

        # 跳过 NoData
        valid = tile[~np.isnan(tile)]
        total_sum += np.sum(valid)
        total_pixels += len(valid)

mean_value = total_sum / total_pixels
print(f"全栅格均值: {mean_value:.4f}")

2.虚拟内存文件创建

from osgeo import gdal

# 创建虚拟内存文件
tmp_raster = "/vsimem/temp_clip.tif"    # /vsimem/ 是 GDAL 的特殊前缀,代表虚拟内容

gdal.Warp(
    tmp_raster,           # 虚拟路径
    "input.tif",
    cutlineDSName="boundary.shp",
    cropToCutline=True,
    format="GTiff"
)

# 直接从内存中读取结果
ds = gdal.Open(tmp_raster)
data = ds.ReadAsArray()

# 清理
ds = None
gdal.Unlink(tmp_raster)  # 删除虚拟文件

3.创建 VRT(虚拟栅格)

VRT(虚拟栅格):不存储实际的栅格像素值,而是通过存储数据处理逻辑和源数据引用,在运行时动态生成所需的栅格视图

from osgeo import gdal

vrt_path = "/vsimem/clip.vrt"  # 也可以保存到磁盘

gdal.Warp(
    vrt_path,
    "input.tif",
    cutlineDSName="boundary.shp",
    cropToCutline=True,
    format="VRT"  # 关键:输出格式为 VRT
)

# VRT 文件很小(几 KB),但可以像普通栅格一样读取
ds = gdal.Open(vrt_path)
data = ds.ReadAsArray()
print(f"数据形状: {data.shape}")

4.进程和线程处理

from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from osgeo import gdal
import numpy as np

def compute_mean(file_path):
    """计算单个文件的均值"""
    ds = gdal.Open(file_path)
    band = ds.GetRasterBand(1)
    stats = band.GetStatistics(True, True) # 计算并返回该波段的最小值、最大值、均值、标准差四个统计量
    return stats[2]  # 均值

# 准备文件列表
files = [f"{i:02d}.tif" for i in range(1, 13)]

# 方法1:多线程(I/O 密集型)
with ThreadPoolExecutor(max_workers=4) as executor:
    results = list(executor.map(compute_mean, files))
print(f" {results}")

# 方法2:多进程(CPU 密集型)
with ProcessPoolExecutor(max_workers=4) as executor:
    results = list(executor.map(compute_mean, files))
print(f" {results}")
posted @ 2026-05-05 17:19  MyEngine  阅读(8)  评论(0)    收藏  举报