第13章 - 数据格式转换实战
第13章:数据格式转换实战
13.1 概述
数据格式转换是 GIS 开发中最常见的任务之一。GDAL/OGR 支持超过 200 种数据格式,能够满足几乎所有的格式转换需求。
13.1.1 格式转换的重要性
| 场景 | 说明 |
|---|---|
| 系统集成 | 不同系统使用不同格式,需要转换才能交换数据 |
| 性能优化 | 某些格式更适合特定用途(如 COG 适合云端访问) |
| 数据共享 | 开放格式(如 GeoJSON)便于数据共享 |
| 存储优化 | 压缩格式可以减少存储空间 |
| 软件兼容 | 某些软件只支持特定格式 |
13.1.2 常见格式对照表
栅格格式:
| 格式 | 驱动名称 | 文件扩展名 | 特点 |
|---|---|---|---|
| GeoTIFF | GTiff | .tif/.tiff | 最通用的地理栅格格式 |
| COG | COG | .tif | 云优化 GeoTIFF |
| JPEG | JPEG | .jpg/.jpeg | 有损压缩,适合展示 |
| PNG | PNG | .png | 无损压缩,支持透明 |
| ERDAS Imagine | HFA | .img | 遥感影像常用格式 |
| ECW | ECW | .ecw | 高压缩比商业格式 |
| JPEG2000 | JP2OpenJPEG | .jp2 | 高质量压缩 |
| NetCDF | netCDF | .nc | 科学数据格式 |
| HDF5 | HDF5 | .hdf/.h5 | 层次化数据格式 |
| VRT | VRT | .vrt | 虚拟数据集 |
矢量格式:
| 格式 | 驱动名称 | 文件扩展名 | 特点 |
|---|---|---|---|
| Shapefile | ESRI Shapefile | .shp | 最通用但有限制 |
| GeoJSON | GeoJSON | .geojson/.json | Web 友好,纯文本 |
| GeoPackage | GPKG | .gpkg | 现代 OGC 标准 |
| KML/KMZ | KML/LIBKML | .kml/.kmz | Google Earth 格式 |
| FileGDB | FileGDB/OpenFileGDB | .gdb | ArcGIS 文件地理数据库 |
| PostGIS | PostgreSQL | - | 空间数据库 |
| GML | GML | .gml | OGC 标准 XML 格式 |
| FlatGeobuf | FlatGeobuf | .fgb | 高性能二进制格式 |
| CSV | CSV | .csv | 简单表格格式 |
13.2 栅格格式转换
13.2.1 基础转换
from osgeo import gdal
def convert_raster(input_path, output_path, format='GTiff', options=None):
"""
通用栅格格式转换函数
参数:
input_path: 输入文件路径
output_path: 输出文件路径
format: 输出格式驱动名称
options: 创建选项字典
"""
gdal.UseExceptions()
# 默认选项
default_options = {
'GTiff': ['COMPRESS=LZW', 'TILED=YES'],
'COG': ['COMPRESS=LZW', 'OVERVIEWS=AUTO'],
'JPEG': ['QUALITY=90'],
'PNG': ['ZLEVEL=9'],
}
if options is None:
options = default_options.get(format, [])
# 打开源数据
src_ds = gdal.Open(input_path)
if src_ds is None:
raise Exception(f"无法打开: {input_path}")
# 执行转换
translate_options = gdal.TranslateOptions(
format=format,
creationOptions=options
)
dst_ds = gdal.Translate(output_path, src_ds, options=translate_options)
if dst_ds is None:
raise Exception(f"转换失败: {output_path}")
src_ds = None
dst_ds = None
print(f"转换完成: {input_path} -> {output_path}")
# 使用示例
# convert_raster('input.tif', 'output.png', 'PNG')
# convert_raster('input.tif', 'output_cog.tif', 'COG')
13.2.2 GeoTIFF 转换详解
from osgeo import gdal
def convert_to_geotiff(input_path, output_path,
compress='LZW',
tiled=True,
overview=True,
cog=False):
"""
转换为 GeoTIFF(支持多种优化选项)
"""
gdal.UseExceptions()
# 构建创建选项
options = []
if compress:
options.append(f'COMPRESS={compress}')
if compress in ['LZW', 'DEFLATE']:
options.append('PREDICTOR=2')
if tiled:
options.append('TILED=YES')
options.append('BLOCKXSIZE=256')
options.append('BLOCKYSIZE=256')
options.append('BIGTIFF=IF_SAFER')
# COG 格式
if cog:
format_name = 'COG'
options = [f'COMPRESS={compress}' if compress else 'COMPRESS=LZW',
'OVERVIEWS=AUTO',
'BLOCKSIZE=512']
else:
format_name = 'GTiff'
translate_options = gdal.TranslateOptions(
format=format_name,
creationOptions=options
)
dst_ds = gdal.Translate(output_path, input_path, options=translate_options)
# 创建金字塔(非 COG 时)
if overview and not cog:
dst_ds.BuildOverviews('AVERAGE', [2, 4, 8, 16, 32])
dst_ds = None
print(f"GeoTIFF 创建完成: {output_path}")
def convert_to_cog(input_path, output_path, compress='LZW'):
"""转换为 Cloud Optimized GeoTIFF"""
gdal.UseExceptions()
options = [
f'COMPRESS={compress}',
'OVERVIEWS=AUTO',
'BLOCKSIZE=512',
'QUALITY=90',
]
translate_options = gdal.TranslateOptions(
format='COG',
creationOptions=options
)
dst_ds = gdal.Translate(output_path, input_path, options=translate_options)
dst_ds = None
print(f"COG 创建完成: {output_path}")
# 使用示例
# convert_to_geotiff('input.img', 'output.tif', compress='LZW')
# convert_to_cog('input.tif', 'output_cog.tif')
13.2.3 多格式批量转换
from osgeo import gdal
import os
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
def batch_convert_rasters(input_dir, output_dir,
input_format='*.tif',
output_format='GTiff',
options=None,
workers=4):
"""
批量转换栅格文件
"""
gdal.UseExceptions()
input_path = Path(input_dir)
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
# 查找文件
files = list(input_path.glob(input_format))
print(f"找到 {len(files)} 个文件")
# 格式到扩展名映射
format_ext = {
'GTiff': '.tif',
'COG': '.tif',
'PNG': '.png',
'JPEG': '.jpg',
'GPKG': '.gpkg',
'VRT': '.vrt',
}
ext = format_ext.get(output_format, '.tif')
# 构建任务列表
tasks = []
for f in files:
out_name = f.stem + ext
out_path = output_path / out_name
tasks.append((str(f), str(out_path), output_format, options))
# 并行处理
def convert_file(args):
src, dst, fmt, opts = args
try:
convert_raster(src, dst, fmt, opts)
return True, src
except Exception as e:
return False, f"{src}: {e}"
results = []
with ProcessPoolExecutor(max_workers=workers) as executor:
results = list(executor.map(convert_file, tasks))
# 统计结果
success = sum(1 for r in results if r[0])
failed = sum(1 for r in results if not r[0])
print(f"\n转换完成: 成功 {success}, 失败 {failed}")
if failed > 0:
print("失败的文件:")
for r in results:
if not r[0]:
print(f" {r[1]}")
# 使用示例
# batch_convert_rasters('./input', './output', '*.img', 'COG')
13.2.4 影像格式之间的转换
from osgeo import gdal
import numpy as np
def img_to_geotiff(input_path, output_path):
"""ERDAS Imagine IMG 转 GeoTIFF"""
gdal.UseExceptions()
options = gdal.TranslateOptions(
format='GTiff',
creationOptions=[
'COMPRESS=LZW',
'TILED=YES',
'BIGTIFF=IF_SAFER'
]
)
gdal.Translate(output_path, input_path, options=options)
print(f"IMG 转换完成: {output_path}")
def ecw_to_geotiff(input_path, output_path):
"""ECW 转 GeoTIFF"""
gdal.UseExceptions()
# ECW 是压缩格式,转换时可能需要较大内存
gdal.SetConfigOption('GDAL_CACHEMAX', '1024') # 1GB 缓存
options = gdal.TranslateOptions(
format='GTiff',
creationOptions=[
'COMPRESS=JPEG',
'JPEG_QUALITY=90',
'TILED=YES',
'BLOCKXSIZE=512',
'BLOCKYSIZE=512'
]
)
gdal.Translate(output_path, input_path, options=options)
print(f"ECW 转换完成: {output_path}")
def jp2_to_geotiff(input_path, output_path):
"""JPEG2000 转 GeoTIFF"""
gdal.UseExceptions()
options = gdal.TranslateOptions(
format='GTiff',
creationOptions=[
'COMPRESS=LZW',
'TILED=YES'
]
)
gdal.Translate(output_path, input_path, options=options)
print(f"JP2 转换完成: {output_path}")
def geotiff_to_jpeg(input_path, output_path, quality=85):
"""GeoTIFF 转 JPEG(用于预览)"""
gdal.UseExceptions()
# 读取源数据
src_ds = gdal.Open(input_path)
# 确定波段映射
bands = src_ds.RasterCount
if bands >= 3:
band_list = [1, 2, 3] # RGB
else:
band_list = [1] # 灰度
options = gdal.TranslateOptions(
format='JPEG',
bandList=band_list,
scaleParams=[[0, 255]], # 标准化到 0-255
outputType=gdal.GDT_Byte,
creationOptions=[f'QUALITY={quality}']
)
gdal.Translate(output_path, src_ds, options=options)
src_ds = None
print(f"JPEG 创建完成: {output_path}")
def geotiff_to_png(input_path, output_path, scale=True):
"""GeoTIFF 转 PNG"""
gdal.UseExceptions()
src_ds = gdal.Open(input_path)
# 确定波段
bands = src_ds.RasterCount
scale_params = None
if scale:
# 计算每个波段的统计信息用于缩放
scale_params = []
for i in range(1, min(bands, 4) + 1):
band = src_ds.GetRasterBand(i)
stats = band.GetStatistics(True, True)
min_val, max_val = stats[0], stats[1]
scale_params.append([min_val, max_val, 0, 255])
options = gdal.TranslateOptions(
format='PNG',
bandList=list(range(1, min(bands, 4) + 1)),
scaleParams=scale_params,
outputType=gdal.GDT_Byte,
creationOptions=['ZLEVEL=9']
)
gdal.Translate(output_path, src_ds, options=options)
src_ds = None
print(f"PNG 创建完成: {output_path}")
13.2.5 NetCDF 和 HDF 转换
from osgeo import gdal
def netcdf_to_geotiff(nc_path, variable, output_path, time_index=0):
"""
NetCDF 变量导出为 GeoTIFF
参数:
nc_path: NetCDF 文件路径
variable: 变量名
output_path: 输出 GeoTIFF 路径
time_index: 时间索引(多时相数据)
"""
gdal.UseExceptions()
# 打开 NetCDF
ds = gdal.Open(nc_path)
if ds is None:
raise Exception(f"无法打开: {nc_path}")
# 获取子数据集
subdatasets = ds.GetSubDatasets()
if not subdatasets:
# 没有子数据集,直接转换
options = gdal.TranslateOptions(
format='GTiff',
bandList=[time_index + 1] if time_index else None,
creationOptions=['COMPRESS=LZW']
)
gdal.Translate(output_path, ds, options=options)
else:
# 查找指定变量
var_path = None
for path, desc in subdatasets:
if variable in path or variable in desc:
var_path = path
break
if var_path is None:
raise Exception(f"未找到变量: {variable}")
# 打开子数据集
sub_ds = gdal.Open(var_path)
# 转换
options = gdal.TranslateOptions(
format='GTiff',
bandList=[time_index + 1],
creationOptions=['COMPRESS=LZW']
)
gdal.Translate(output_path, sub_ds, options=options)
sub_ds = None
ds = None
print(f"NetCDF 转换完成: {output_path}")
def hdf_to_geotiff(hdf_path, subdataset_index, output_path):
"""
HDF 子数据集导出为 GeoTIFF
"""
gdal.UseExceptions()
ds = gdal.Open(hdf_path)
subdatasets = ds.GetSubDatasets()
if subdataset_index >= len(subdatasets):
raise Exception(f"子数据集索引超出范围: {subdataset_index}")
sub_path, sub_desc = subdatasets[subdataset_index]
print(f"转换子数据集: {sub_desc}")
sub_ds = gdal.Open(sub_path)
options = gdal.TranslateOptions(
format='GTiff',
creationOptions=['COMPRESS=LZW', 'TILED=YES']
)
gdal.Translate(output_path, sub_ds, options=options)
sub_ds = None
ds = None
print(f"HDF 转换完成: {output_path}")
def list_netcdf_variables(nc_path):
"""列出 NetCDF 文件中的所有变量"""
ds = gdal.Open(nc_path)
if ds is None:
raise Exception(f"无法打开: {nc_path}")
subdatasets = ds.GetSubDatasets()
print(f"文件: {nc_path}")
print(f"子数据集数量: {len(subdatasets)}")
print("-" * 60)
for i, (path, desc) in enumerate(subdatasets):
print(f"{i}: {desc}")
ds = None
return subdatasets
13.3 矢量格式转换
13.3.1 基础转换
from osgeo import ogr
def convert_vector(input_path, output_path, format='ESRI Shapefile', options=None):
"""
通用矢量格式转换
"""
ogr.UseExceptions()
# 打开源数据
src_ds = ogr.Open(input_path)
if src_ds is None:
raise Exception(f"无法打开: {input_path}")
# 获取驱动
driver = ogr.GetDriverByName(format)
if driver is None:
raise Exception(f"不支持的格式: {format}")
# 删除已存在的输出
if driver.DeleteDataSource(output_path) != 0:
pass
# 创建输出
dst_ds = driver.CreateDataSource(output_path)
# 转换每个图层
for i in range(src_ds.GetLayerCount()):
src_layer = src_ds.GetLayer(i)
# 获取图层名
layer_name = src_layer.GetName()
# 复制图层
dst_layer = dst_ds.CopyLayer(src_layer, layer_name, options if options else [])
src_ds = None
dst_ds = None
print(f"转换完成: {input_path} -> {output_path}")
# 使用示例
# convert_vector('input.shp', 'output.geojson', 'GeoJSON')
# convert_vector('input.shp', 'output.gpkg', 'GPKG')
13.3.2 Shapefile 转换
from osgeo import ogr
def shp_to_geojson(shp_path, geojson_path, pretty=True):
"""Shapefile 转 GeoJSON"""
ogr.UseExceptions()
src_ds = ogr.Open(shp_path)
src_layer = src_ds.GetLayer()
driver = ogr.GetDriverByName('GeoJSON')
# 设置格式化选项
options = ['RFC7946=YES'] # 标准 GeoJSON
if pretty:
options.append('WRITE_BBOX=YES')
dst_ds = driver.CreateDataSource(geojson_path)
dst_ds.CopyLayer(src_layer, 'features', options)
src_ds = None
dst_ds = None
print(f"GeoJSON 创建完成: {geojson_path}")
def shp_to_geopackage(shp_path, gpkg_path, layer_name=None):
"""Shapefile 转 GeoPackage"""
ogr.UseExceptions()
src_ds = ogr.Open(shp_path)
src_layer = src_ds.GetLayer()
if layer_name is None:
layer_name = src_layer.GetName()
driver = ogr.GetDriverByName('GPKG')
dst_ds = driver.CreateDataSource(gpkg_path)
options = ['FID=fid']
dst_ds.CopyLayer(src_layer, layer_name, options)
src_ds = None
dst_ds = None
print(f"GeoPackage 创建完成: {gpkg_path}")
def shp_to_kml(shp_path, kml_path):
"""Shapefile 转 KML"""
ogr.UseExceptions()
src_ds = ogr.Open(shp_path)
src_layer = src_ds.GetLayer()
# KML 需要 WGS84 坐标
src_srs = src_layer.GetSpatialRef()
driver = ogr.GetDriverByName('KML')
dst_ds = driver.CreateDataSource(kml_path)
dst_ds.CopyLayer(src_layer, src_layer.GetName())
src_ds = None
dst_ds = None
print(f"KML 创建完成: {kml_path}")
def geojson_to_shp(geojson_path, shp_path, encoding='UTF-8'):
"""GeoJSON 转 Shapefile"""
ogr.UseExceptions()
src_ds = ogr.Open(geojson_path)
src_layer = src_ds.GetLayer()
driver = ogr.GetDriverByName('ESRI Shapefile')
# 删除已存在的文件
if driver.DeleteDataSource(shp_path) != 0:
pass
dst_ds = driver.CreateDataSource(shp_path)
options = [f'ENCODING={encoding}']
dst_ds.CopyLayer(src_layer, 'layer', options)
src_ds = None
dst_ds = None
print(f"Shapefile 创建完成: {shp_path}")
13.3.3 数据库格式转换
from osgeo import ogr
def shp_to_postgis(shp_path, connection_string, table_name=None,
srid=None, overwrite=True):
"""
Shapefile 导入 PostGIS
参数:
shp_path: Shapefile 路径
connection_string: PostGIS 连接字符串
例: "PG:host=localhost dbname=gisdb user=postgres password=xxx"
table_name: 表名,默认使用图层名
srid: 目标坐标系 EPSG 代码
overwrite: 是否覆盖已存在的表
"""
ogr.UseExceptions()
src_ds = ogr.Open(shp_path)
src_layer = src_ds.GetLayer()
if table_name is None:
table_name = src_layer.GetName().lower()
# 连接 PostGIS
dst_ds = ogr.Open(connection_string, 1) # 1 表示可写
if dst_ds is None:
raise Exception(f"无法连接到数据库: {connection_string}")
# 检查表是否存在
if overwrite:
try:
dst_ds.DeleteLayer(table_name)
except:
pass
# 复制图层
options = [f'GEOMETRY_NAME=geom']
if srid:
options.append(f'SRID={srid}')
dst_ds.CopyLayer(src_layer, table_name, options)
src_ds = None
dst_ds = None
print(f"导入完成: {table_name}")
def postgis_to_shp(connection_string, table_name, output_path, where=None):
"""
从 PostGIS 导出为 Shapefile
"""
ogr.UseExceptions()
src_ds = ogr.Open(connection_string)
if src_ds is None:
raise Exception(f"无法连接到数据库: {connection_string}")
# 获取图层(可以使用 SQL)
if where:
src_layer = src_ds.ExecuteSQL(f"SELECT * FROM {table_name} WHERE {where}")
else:
src_layer = src_ds.GetLayerByName(table_name)
if src_layer is None:
raise Exception(f"未找到表: {table_name}")
# 创建 Shapefile
driver = ogr.GetDriverByName('ESRI Shapefile')
driver.DeleteDataSource(output_path)
dst_ds = driver.CreateDataSource(output_path)
dst_ds.CopyLayer(src_layer, 'layer', ['ENCODING=UTF-8'])
if where:
src_ds.ReleaseResultSet(src_layer)
src_ds = None
dst_ds = None
print(f"导出完成: {output_path}")
def shp_to_spatialite(shp_path, db_path, table_name=None):
"""Shapefile 导入 Spatialite"""
ogr.UseExceptions()
src_ds = ogr.Open(shp_path)
src_layer = src_ds.GetLayer()
if table_name is None:
table_name = src_layer.GetName()
driver = ogr.GetDriverByName('SQLite')
# 创建或打开数据库
dst_ds = driver.CreateDataSource(db_path, ['SPATIALITE=YES'])
if dst_ds is None:
# 尝试打开已存在的数据库
dst_ds = ogr.Open(db_path, 1)
dst_ds.CopyLayer(src_layer, table_name)
src_ds = None
dst_ds = None
print(f"Spatialite 导入完成: {db_path}")
13.3.4 高级转换选项
from osgeo import ogr, osr
def convert_with_reprojection(input_path, output_path,
output_format='GeoJSON',
target_srs='EPSG:4326'):
"""带投影转换的格式转换"""
ogr.UseExceptions()
src_ds = ogr.Open(input_path)
src_layer = src_ds.GetLayer()
# 创建目标空间参考
dst_srs = osr.SpatialReference()
dst_srs.SetFromUserInput(target_srs)
# 创建坐标转换
src_srs = src_layer.GetSpatialRef()
transform = osr.CoordinateTransformation(src_srs, dst_srs)
# 创建输出
driver = ogr.GetDriverByName(output_format)
dst_ds = driver.CreateDataSource(output_path)
dst_layer = dst_ds.CreateLayer(
src_layer.GetName(),
dst_srs,
src_layer.GetGeomType()
)
# 复制字段
src_defn = src_layer.GetLayerDefn()
for i in range(src_defn.GetFieldCount()):
dst_layer.CreateField(src_defn.GetFieldDefn(i))
dst_defn = dst_layer.GetLayerDefn()
# 转换要素
for feature in src_layer:
geom = feature.GetGeometryRef()
if geom:
geom.Transform(transform)
out_feat = ogr.Feature(dst_defn)
out_feat.SetGeometry(geom)
for i in range(src_defn.GetFieldCount()):
out_feat.SetField(i, feature.GetField(i))
dst_layer.CreateFeature(out_feat)
src_ds = None
dst_ds = None
print(f"转换完成(重投影到 {target_srs}): {output_path}")
def convert_with_filter(input_path, output_path,
output_format='GeoJSON',
where=None, bbox=None):
"""带过滤的格式转换"""
ogr.UseExceptions()
src_ds = ogr.Open(input_path)
src_layer = src_ds.GetLayer()
# 应用过滤
if where:
src_layer.SetAttributeFilter(where)
if bbox:
src_layer.SetSpatialFilterRect(*bbox)
# 创建输出
driver = ogr.GetDriverByName(output_format)
dst_ds = driver.CreateDataSource(output_path)
dst_ds.CopyLayer(src_layer, 'filtered')
src_ds = None
dst_ds = None
print(f"转换完成(带过滤): {output_path}")
def convert_fields_mapping(input_path, output_path,
output_format='GeoJSON',
field_mapping=None):
"""
带字段映射的格式转换
参数:
field_mapping: 字段映射字典 {'old_name': 'new_name', ...}
"""
ogr.UseExceptions()
src_ds = ogr.Open(input_path)
src_layer = src_ds.GetLayer()
driver = ogr.GetDriverByName(output_format)
dst_ds = driver.CreateDataSource(output_path)
dst_layer = dst_ds.CreateLayer(
src_layer.GetName(),
src_layer.GetSpatialRef(),
src_layer.GetGeomType()
)
# 创建映射后的字段
src_defn = src_layer.GetLayerDefn()
field_map = []
for i in range(src_defn.GetFieldCount()):
old_field = src_defn.GetFieldDefn(i)
old_name = old_field.GetName()
if field_mapping and old_name in field_mapping:
new_name = field_mapping[old_name]
if new_name is None:
field_map.append(None)
continue
else:
new_name = old_name
new_field = ogr.FieldDefn(new_name, old_field.GetType())
new_field.SetWidth(old_field.GetWidth())
new_field.SetPrecision(old_field.GetPrecision())
dst_layer.CreateField(new_field)
field_map.append(new_name)
dst_defn = dst_layer.GetLayerDefn()
# 转换要素
for feature in src_layer:
out_feat = ogr.Feature(dst_defn)
out_feat.SetGeometry(feature.GetGeometryRef())
for i, new_name in enumerate(field_map):
if new_name:
out_feat.SetField(new_name, feature.GetField(i))
dst_layer.CreateFeature(out_feat)
src_ds = None
dst_ds = None
print(f"转换完成(字段映射): {output_path}")
13.4 批量处理工具
13.4.1 批量矢量转换
from osgeo import ogr
import os
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
def batch_convert_vectors(input_dir, output_dir,
input_pattern='*.shp',
output_format='GeoJSON',
workers=4):
"""批量转换矢量文件"""
ogr.UseExceptions()
input_path = Path(input_dir)
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
# 格式到扩展名映射
format_ext = {
'GeoJSON': '.geojson',
'GPKG': '.gpkg',
'ESRI Shapefile': '.shp',
'KML': '.kml',
'GML': '.gml',
'FlatGeobuf': '.fgb',
'CSV': '.csv',
}
ext = format_ext.get(output_format, '.geojson')
# 查找文件
files = list(input_path.glob(input_pattern))
print(f"找到 {len(files)} 个文件")
def convert_file(filepath):
try:
out_name = filepath.stem + ext
out_path = output_path / out_name
convert_vector(str(filepath), str(out_path), output_format)
return True, filepath.name
except Exception as e:
return False, f"{filepath.name}: {e}"
results = []
with ThreadPoolExecutor(max_workers=workers) as executor:
results = list(executor.map(convert_file, files))
success = sum(1 for r in results if r[0])
failed = sum(1 for r in results if not r[0])
print(f"\n批量转换完成: 成功 {success}, 失败 {failed}")
# 使用示例
# batch_convert_vectors('./shapefiles', './geojson', '*.shp', 'GeoJSON')
13.4.2 合并多个文件
from osgeo import ogr
def merge_vectors(input_files, output_path, output_format='GeoJSON'):
"""
合并多个矢量文件
"""
ogr.UseExceptions()
driver = ogr.GetDriverByName(output_format)
dst_ds = driver.CreateDataSource(output_path)
dst_layer = None
dst_defn = None
for i, filepath in enumerate(input_files):
print(f"处理: {filepath}")
src_ds = ogr.Open(filepath)
src_layer = src_ds.GetLayer()
if dst_layer is None:
# 首次创建图层
dst_layer = dst_ds.CreateLayer(
'merged',
src_layer.GetSpatialRef(),
src_layer.GetGeomType()
)
# 复制字段
src_defn = src_layer.GetLayerDefn()
for j in range(src_defn.GetFieldCount()):
dst_layer.CreateField(src_defn.GetFieldDefn(j))
dst_defn = dst_layer.GetLayerDefn()
# 复制要素
for feature in src_layer:
out_feat = ogr.Feature(dst_defn)
out_feat.SetGeometry(feature.GetGeometryRef())
for j in range(src_defn.GetFieldCount()):
out_feat.SetField(j, feature.GetField(j))
dst_layer.CreateFeature(out_feat)
src_ds = None
dst_ds = None
print(f"合并完成: {output_path}")
def merge_rasters(input_files, output_path, nodata=None):
"""合并多个栅格文件"""
from osgeo import gdal
gdal.UseExceptions()
# 创建 VRT
vrt_options = gdal.BuildVRTOptions(
srcNodata=nodata,
VRTNodata=nodata,
resolution='average'
)
vrt = gdal.BuildVRT('', input_files, options=vrt_options)
# 转换为 GeoTIFF
translate_options = gdal.TranslateOptions(
format='GTiff',
creationOptions=[
'COMPRESS=LZW',
'TILED=YES',
'BIGTIFF=IF_SAFER'
]
)
gdal.Translate(output_path, vrt, options=translate_options)
vrt = None
print(f"栅格合并完成: {output_path}")
13.5 命令行工具脚本
13.5.1 通用转换脚本
#!/usr/bin/env python
"""
通用 GDAL/OGR 格式转换工具
用法:
python converter.py input_file output_file [--format FORMAT]
"""
import argparse
import os
from pathlib import Path
from osgeo import gdal, ogr
def detect_type(filepath):
"""检测文件类型(栅格或矢量)"""
# 尝试作为栅格打开
ds = gdal.Open(filepath)
if ds is not None:
ds = None
return 'raster'
# 尝试作为矢量打开
ds = ogr.Open(filepath)
if ds is not None:
ds = None
return 'vector'
return None
def convert(input_path, output_path, format=None, options=None):
"""执行转换"""
gdal.UseExceptions()
ogr.UseExceptions()
data_type = detect_type(input_path)
if data_type is None:
raise Exception(f"无法识别的文件格式: {input_path}")
print(f"检测到数据类型: {data_type}")
if data_type == 'raster':
if format is None:
format = 'GTiff'
translate_options = gdal.TranslateOptions(
format=format,
creationOptions=options or ['COMPRESS=LZW', 'TILED=YES']
)
gdal.Translate(output_path, input_path, options=translate_options)
else: # vector
if format is None:
# 根据扩展名推断
ext = Path(output_path).suffix.lower()
format_map = {
'.geojson': 'GeoJSON',
'.json': 'GeoJSON',
'.gpkg': 'GPKG',
'.shp': 'ESRI Shapefile',
'.kml': 'KML',
'.gml': 'GML',
'.fgb': 'FlatGeobuf',
}
format = format_map.get(ext, 'GeoJSON')
src_ds = ogr.Open(input_path)
driver = ogr.GetDriverByName(format)
if os.path.exists(output_path):
driver.DeleteDataSource(output_path)
dst_ds = driver.CreateDataSource(output_path)
for i in range(src_ds.GetLayerCount()):
dst_ds.CopyLayer(src_ds.GetLayer(i), src_ds.GetLayer(i).GetName())
src_ds = None
dst_ds = None
print(f"转换完成: {output_path}")
def main():
parser = argparse.ArgumentParser(description='GDAL/OGR 格式转换工具')
parser.add_argument('input', help='输入文件路径')
parser.add_argument('output', help='输出文件路径')
parser.add_argument('-f', '--format', help='输出格式')
parser.add_argument('-o', '--option', action='append', help='创建选项')
args = parser.parse_args()
try:
convert(args.input, args.output, args.format, args.option)
except Exception as e:
print(f"错误: {e}")
exit(1)
if __name__ == '__main__':
main()
13.5.2 批量处理脚本
#!/bin/bash
# 批量格式转换脚本
INPUT_DIR="$1"
OUTPUT_DIR="$2"
FORMAT="${3:-GTiff}"
if [ -z "$INPUT_DIR" ] || [ -z "$OUTPUT_DIR" ]; then
echo "用法: $0 <输入目录> <输出目录> [输出格式]"
exit 1
fi
mkdir -p "$OUTPUT_DIR"
# 处理栅格文件
for file in "$INPUT_DIR"/*.{tif,img,ecw,jp2}; do
if [ -f "$file" ]; then
filename=$(basename "$file")
name="${filename%.*}"
echo "转换: $filename"
gdal_translate -of "$FORMAT" -co COMPRESS=LZW "$file" "$OUTPUT_DIR/$name.tif"
fi
done
# 处理矢量文件
for file in "$INPUT_DIR"/*.shp; do
if [ -f "$file" ]; then
filename=$(basename "$file")
name="${filename%.*}"
echo "转换: $filename"
ogr2ogr -f "GeoJSON" "$OUTPUT_DIR/$name.geojson" "$file"
fi
done
echo "转换完成"
13.6 常见问题与解决方案
13.6.1 编码问题
from osgeo import ogr, gdal
def fix_encoding_issues(input_path, output_path,
input_encoding='GBK', output_encoding='UTF-8'):
"""处理编码问题"""
ogr.UseExceptions()
# 设置输入编码
gdal.SetConfigOption('SHAPE_ENCODING', input_encoding)
src_ds = ogr.Open(input_path)
src_layer = src_ds.GetLayer()
driver = ogr.GetDriverByName('GeoJSON') # GeoJSON 总是 UTF-8
dst_ds = driver.CreateDataSource(output_path)
dst_ds.CopyLayer(src_layer, 'layer')
src_ds = None
dst_ds = None
# 恢复默认
gdal.SetConfigOption('SHAPE_ENCODING', '')
print(f"编码转换完成: {output_path}")
13.6.2 大文件处理
from osgeo import gdal
def convert_large_raster(input_path, output_path, chunk_size=4096):
"""处理大型栅格文件"""
gdal.UseExceptions()
# 设置较大的缓存
gdal.SetCacheMax(2 * 1024 * 1024 * 1024) # 2GB
src_ds = gdal.Open(input_path)
# 使用 BIGTIFF
translate_options = gdal.TranslateOptions(
format='GTiff',
creationOptions=[
'COMPRESS=LZW',
'TILED=YES',
f'BLOCKXSIZE={chunk_size}',
f'BLOCKYSIZE={chunk_size}',
'BIGTIFF=YES'
]
)
gdal.Translate(output_path, src_ds, options=translate_options,
callback=gdal.TermProgress_nocb)
src_ds = None
print(f"大文件转换完成: {output_path}")
13.6.3 坐标系问题
from osgeo import ogr, osr
def convert_with_srs_fix(input_path, output_path, target_srs='EPSG:4326'):
"""处理缺少坐标系的数据"""
ogr.UseExceptions()
src_ds = ogr.Open(input_path)
src_layer = src_ds.GetLayer()
src_srs = src_layer.GetSpatialRef()
if src_srs is None:
print("警告: 源数据没有坐标系定义")
# 假设是 WGS84
src_srs = osr.SpatialReference()
src_srs.ImportFromEPSG(4326)
dst_srs = osr.SpatialReference()
dst_srs.SetFromUserInput(target_srs)
driver = ogr.GetDriverByName('GeoJSON')
dst_ds = driver.CreateDataSource(output_path)
dst_layer = dst_ds.CreateLayer('layer', dst_srs, src_layer.GetGeomType())
# ... 复制要素 ...
src_ds = None
dst_ds = None
13.7 本章小结
本章介绍了数据格式转换的各种技术:
- 栅格转换:GeoTIFF、COG、PNG、JPEG、NetCDF、HDF
- 矢量转换:Shapefile、GeoJSON、GeoPackage、KML、PostGIS
- 批量处理:并行处理、文件合并
- 高级选项:投影转换、字段映射、过滤
- 常见问题:编码、大文件、坐标系
13.8 思考与练习
- 编写一个支持所有常见格式的通用转换工具。
- 实现一个带进度显示的批量转换脚本。
- 比较不同压缩方式对 GeoTIFF 文件大小和读取速度的影响。
- 如何处理 Shapefile 的字段名长度限制(10 字符)?
- 实现从 PostGIS 批量导出多个表为 GeoPackage。

浙公网安备 33010602011771号