GDAL—栅格数据按瓦片读取、虚拟文件创建、VRT创建、进程和线程处理
1.按瓦片读取
from osgeo import gdal
import numpy as np
ds = gdal.Open(r"E:\DroughtAnalysis\data\SPEI_raster\gdal_spei12\large_raster.tif")
band = ds.GetRasterBand(1)
# 获取瓦片大小(或自定义分块大小)
block_xsize, block_ysize = band.GetBlockSize() # 非横竖瓦片个数,是各个瓦片的行列大小
cols, rows = ds.RasterXSize, ds.RasterYSize
total_sum = 0.0
total_pixels = 0
# 按瓦片遍历,从左到右,从上到下
for y in range(0, rows, block_ysize):
y_size = min(block_ysize, rows - y)
for x in range(0, cols, block_xsize):
x_size = min(block_xsize, cols - x)
# 只读当前瓦片
tile = band.ReadAsArray(x, y, x_size, y_size)
# 跳过 NoData
valid = tile[~np.isnan(tile)]
total_sum += np.sum(valid)
total_pixels += len(valid)
mean_value = total_sum / total_pixels
print(f"全栅格均值: {mean_value:.4f}")
2.虚拟内存文件创建
from osgeo import gdal
# 创建虚拟内存文件
tmp_raster = "/vsimem/temp_clip.tif" # /vsimem/ 是 GDAL 的特殊前缀,代表虚拟内容
gdal.Warp(
tmp_raster, # 虚拟路径
"input.tif",
cutlineDSName="boundary.shp",
cropToCutline=True,
format="GTiff"
)
# 直接从内存中读取结果
ds = gdal.Open(tmp_raster)
data = ds.ReadAsArray()
# 清理
ds = None
gdal.Unlink(tmp_raster) # 删除虚拟文件
3.创建 VRT(虚拟栅格)
VRT(虚拟栅格):不存储实际的栅格像素值,而是通过存储数据处理逻辑和源数据引用,在运行时动态生成所需的栅格视图
from osgeo import gdal
vrt_path = "/vsimem/clip.vrt" # 也可以保存到磁盘
gdal.Warp(
vrt_path,
"input.tif",
cutlineDSName="boundary.shp",
cropToCutline=True,
format="VRT" # 关键:输出格式为 VRT
)
# VRT 文件很小(几 KB),但可以像普通栅格一样读取
ds = gdal.Open(vrt_path)
data = ds.ReadAsArray()
print(f"数据形状: {data.shape}")
4.进程和线程处理
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from osgeo import gdal
import numpy as np
def compute_mean(file_path):
"""计算单个文件的均值"""
ds = gdal.Open(file_path)
band = ds.GetRasterBand(1)
stats = band.GetStatistics(True, True) # 计算并返回该波段的最小值、最大值、均值、标准差四个统计量
return stats[2] # 均值
# 准备文件列表
files = [f"{i:02d}.tif" for i in range(1, 13)]
# 方法1:多线程(I/O 密集型)
with ThreadPoolExecutor(max_workers=4) as executor:
results = list(executor.map(compute_mean, files))
print(f" {results}")
# 方法2:多进程(CPU 密集型)
with ProcessPoolExecutor(max_workers=4) as executor:
results = list(executor.map(compute_mean, files))
print(f" {results}")
浙公网安备 33010602011771号