第13章 - 数据格式转换实战

第13章:数据格式转换实战

13.1 概述

数据格式转换是 GIS 开发中最常见的任务之一。GDAL/OGR 支持超过 200 种数据格式,能够满足几乎所有的格式转换需求。

13.1.1 格式转换的重要性

场景 说明
系统集成 不同系统使用不同格式,需要转换才能交换数据
性能优化 某些格式更适合特定用途(如 COG 适合云端访问)
数据共享 开放格式(如 GeoJSON)便于数据共享
存储优化 压缩格式可以减少存储空间
软件兼容 某些软件只支持特定格式

13.1.2 常见格式对照表

栅格格式:

格式 驱动名称 文件扩展名 特点
GeoTIFF GTiff .tif/.tiff 最通用的地理栅格格式
COG COG .tif 云优化 GeoTIFF
JPEG JPEG .jpg/.jpeg 有损压缩,适合展示
PNG PNG .png 无损压缩,支持透明
ERDAS Imagine HFA .img 遥感影像常用格式
ECW ECW .ecw 高压缩比商业格式
JPEG2000 JP2OpenJPEG .jp2 高质量压缩
NetCDF netCDF .nc 科学数据格式
HDF5 HDF5 .hdf/.h5 层次化数据格式
VRT VRT .vrt 虚拟数据集

矢量格式:

格式 驱动名称 文件扩展名 特点
Shapefile ESRI Shapefile .shp 最通用但有限制
GeoJSON GeoJSON .geojson/.json Web 友好,纯文本
GeoPackage GPKG .gpkg 现代 OGC 标准
KML/KMZ KML/LIBKML .kml/.kmz Google Earth 格式
FileGDB FileGDB/OpenFileGDB .gdb ArcGIS 文件地理数据库
PostGIS PostgreSQL - 空间数据库
GML GML .gml OGC 标准 XML 格式
FlatGeobuf FlatGeobuf .fgb 高性能二进制格式
CSV CSV .csv 简单表格格式

13.2 栅格格式转换

13.2.1 基础转换

from osgeo import gdal

def convert_raster(input_path, output_path, format='GTiff', options=None):
    """
    通用栅格格式转换函数
    
    参数:
        input_path: 输入文件路径
        output_path: 输出文件路径
        format: 输出格式驱动名称
        options: 创建选项字典
    """
    gdal.UseExceptions()
    
    # 默认选项
    default_options = {
        'GTiff': ['COMPRESS=LZW', 'TILED=YES'],
        'COG': ['COMPRESS=LZW', 'OVERVIEWS=AUTO'],
        'JPEG': ['QUALITY=90'],
        'PNG': ['ZLEVEL=9'],
    }
    
    if options is None:
        options = default_options.get(format, [])
    
    # 打开源数据
    src_ds = gdal.Open(input_path)
    if src_ds is None:
        raise Exception(f"无法打开: {input_path}")
    
    # 执行转换
    translate_options = gdal.TranslateOptions(
        format=format,
        creationOptions=options
    )
    
    dst_ds = gdal.Translate(output_path, src_ds, options=translate_options)
    
    if dst_ds is None:
        raise Exception(f"转换失败: {output_path}")
    
    src_ds = None
    dst_ds = None
    
    print(f"转换完成: {input_path} -> {output_path}")

# 使用示例
# convert_raster('input.tif', 'output.png', 'PNG')
# convert_raster('input.tif', 'output_cog.tif', 'COG')

13.2.2 GeoTIFF 转换详解

from osgeo import gdal

def convert_to_geotiff(input_path, output_path, 
                       compress='LZW', 
                       tiled=True,
                       overview=True,
                       cog=False):
    """
    转换为 GeoTIFF(支持多种优化选项)
    """
    gdal.UseExceptions()
    
    # 构建创建选项
    options = []
    
    if compress:
        options.append(f'COMPRESS={compress}')
        if compress in ['LZW', 'DEFLATE']:
            options.append('PREDICTOR=2')
    
    if tiled:
        options.append('TILED=YES')
        options.append('BLOCKXSIZE=256')
        options.append('BLOCKYSIZE=256')
    
    options.append('BIGTIFF=IF_SAFER')
    
    # COG 格式
    if cog:
        format_name = 'COG'
        options = [f'COMPRESS={compress}' if compress else 'COMPRESS=LZW',
                   'OVERVIEWS=AUTO',
                   'BLOCKSIZE=512']
    else:
        format_name = 'GTiff'
    
    translate_options = gdal.TranslateOptions(
        format=format_name,
        creationOptions=options
    )
    
    dst_ds = gdal.Translate(output_path, input_path, options=translate_options)
    
    # 创建金字塔(非 COG 时)
    if overview and not cog:
        dst_ds.BuildOverviews('AVERAGE', [2, 4, 8, 16, 32])
    
    dst_ds = None
    
    print(f"GeoTIFF 创建完成: {output_path}")

def convert_to_cog(input_path, output_path, compress='LZW'):
    """转换为 Cloud Optimized GeoTIFF"""
    
    gdal.UseExceptions()
    
    options = [
        f'COMPRESS={compress}',
        'OVERVIEWS=AUTO',
        'BLOCKSIZE=512',
        'QUALITY=90',
    ]
    
    translate_options = gdal.TranslateOptions(
        format='COG',
        creationOptions=options
    )
    
    dst_ds = gdal.Translate(output_path, input_path, options=translate_options)
    dst_ds = None
    
    print(f"COG 创建完成: {output_path}")

# 使用示例
# convert_to_geotiff('input.img', 'output.tif', compress='LZW')
# convert_to_cog('input.tif', 'output_cog.tif')

13.2.3 多格式批量转换

from osgeo import gdal
import os
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor

def batch_convert_rasters(input_dir, output_dir, 
                          input_format='*.tif', 
                          output_format='GTiff',
                          options=None,
                          workers=4):
    """
    批量转换栅格文件
    """
    gdal.UseExceptions()
    
    input_path = Path(input_dir)
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    
    # 查找文件
    files = list(input_path.glob(input_format))
    print(f"找到 {len(files)} 个文件")
    
    # 格式到扩展名映射
    format_ext = {
        'GTiff': '.tif',
        'COG': '.tif',
        'PNG': '.png',
        'JPEG': '.jpg',
        'GPKG': '.gpkg',
        'VRT': '.vrt',
    }
    
    ext = format_ext.get(output_format, '.tif')
    
    # 构建任务列表
    tasks = []
    for f in files:
        out_name = f.stem + ext
        out_path = output_path / out_name
        tasks.append((str(f), str(out_path), output_format, options))
    
    # 并行处理
    def convert_file(args):
        src, dst, fmt, opts = args
        try:
            convert_raster(src, dst, fmt, opts)
            return True, src
        except Exception as e:
            return False, f"{src}: {e}"
    
    results = []
    with ProcessPoolExecutor(max_workers=workers) as executor:
        results = list(executor.map(convert_file, tasks))
    
    # 统计结果
    success = sum(1 for r in results if r[0])
    failed = sum(1 for r in results if not r[0])
    
    print(f"\n转换完成: 成功 {success}, 失败 {failed}")
    
    if failed > 0:
        print("失败的文件:")
        for r in results:
            if not r[0]:
                print(f"  {r[1]}")

# 使用示例
# batch_convert_rasters('./input', './output', '*.img', 'COG')

13.2.4 影像格式之间的转换

from osgeo import gdal
import numpy as np

def img_to_geotiff(input_path, output_path):
    """ERDAS Imagine IMG 转 GeoTIFF"""
    
    gdal.UseExceptions()
    
    options = gdal.TranslateOptions(
        format='GTiff',
        creationOptions=[
            'COMPRESS=LZW',
            'TILED=YES',
            'BIGTIFF=IF_SAFER'
        ]
    )
    
    gdal.Translate(output_path, input_path, options=options)
    print(f"IMG 转换完成: {output_path}")

def ecw_to_geotiff(input_path, output_path):
    """ECW 转 GeoTIFF"""
    
    gdal.UseExceptions()
    
    # ECW 是压缩格式,转换时可能需要较大内存
    gdal.SetConfigOption('GDAL_CACHEMAX', '1024')  # 1GB 缓存
    
    options = gdal.TranslateOptions(
        format='GTiff',
        creationOptions=[
            'COMPRESS=JPEG',
            'JPEG_QUALITY=90',
            'TILED=YES',
            'BLOCKXSIZE=512',
            'BLOCKYSIZE=512'
        ]
    )
    
    gdal.Translate(output_path, input_path, options=options)
    print(f"ECW 转换完成: {output_path}")

def jp2_to_geotiff(input_path, output_path):
    """JPEG2000 转 GeoTIFF"""
    
    gdal.UseExceptions()
    
    options = gdal.TranslateOptions(
        format='GTiff',
        creationOptions=[
            'COMPRESS=LZW',
            'TILED=YES'
        ]
    )
    
    gdal.Translate(output_path, input_path, options=options)
    print(f"JP2 转换完成: {output_path}")

def geotiff_to_jpeg(input_path, output_path, quality=85):
    """GeoTIFF 转 JPEG(用于预览)"""
    
    gdal.UseExceptions()
    
    # 读取源数据
    src_ds = gdal.Open(input_path)
    
    # 确定波段映射
    bands = src_ds.RasterCount
    if bands >= 3:
        band_list = [1, 2, 3]  # RGB
    else:
        band_list = [1]  # 灰度
    
    options = gdal.TranslateOptions(
        format='JPEG',
        bandList=band_list,
        scaleParams=[[0, 255]],  # 标准化到 0-255
        outputType=gdal.GDT_Byte,
        creationOptions=[f'QUALITY={quality}']
    )
    
    gdal.Translate(output_path, src_ds, options=options)
    src_ds = None
    
    print(f"JPEG 创建完成: {output_path}")

def geotiff_to_png(input_path, output_path, scale=True):
    """GeoTIFF 转 PNG"""
    
    gdal.UseExceptions()
    
    src_ds = gdal.Open(input_path)
    
    # 确定波段
    bands = src_ds.RasterCount
    
    scale_params = None
    if scale:
        # 计算每个波段的统计信息用于缩放
        scale_params = []
        for i in range(1, min(bands, 4) + 1):
            band = src_ds.GetRasterBand(i)
            stats = band.GetStatistics(True, True)
            min_val, max_val = stats[0], stats[1]
            scale_params.append([min_val, max_val, 0, 255])
    
    options = gdal.TranslateOptions(
        format='PNG',
        bandList=list(range(1, min(bands, 4) + 1)),
        scaleParams=scale_params,
        outputType=gdal.GDT_Byte,
        creationOptions=['ZLEVEL=9']
    )
    
    gdal.Translate(output_path, src_ds, options=options)
    src_ds = None
    
    print(f"PNG 创建完成: {output_path}")

13.2.5 NetCDF 和 HDF 转换

from osgeo import gdal

def netcdf_to_geotiff(nc_path, variable, output_path, time_index=0):
    """
    NetCDF 变量导出为 GeoTIFF
    
    参数:
        nc_path: NetCDF 文件路径
        variable: 变量名
        output_path: 输出 GeoTIFF 路径
        time_index: 时间索引(多时相数据)
    """
    gdal.UseExceptions()
    
    # 打开 NetCDF
    ds = gdal.Open(nc_path)
    
    if ds is None:
        raise Exception(f"无法打开: {nc_path}")
    
    # 获取子数据集
    subdatasets = ds.GetSubDatasets()
    
    if not subdatasets:
        # 没有子数据集,直接转换
        options = gdal.TranslateOptions(
            format='GTiff',
            bandList=[time_index + 1] if time_index else None,
            creationOptions=['COMPRESS=LZW']
        )
        gdal.Translate(output_path, ds, options=options)
    else:
        # 查找指定变量
        var_path = None
        for path, desc in subdatasets:
            if variable in path or variable in desc:
                var_path = path
                break
        
        if var_path is None:
            raise Exception(f"未找到变量: {variable}")
        
        # 打开子数据集
        sub_ds = gdal.Open(var_path)
        
        # 转换
        options = gdal.TranslateOptions(
            format='GTiff',
            bandList=[time_index + 1],
            creationOptions=['COMPRESS=LZW']
        )
        gdal.Translate(output_path, sub_ds, options=options)
        
        sub_ds = None
    
    ds = None
    print(f"NetCDF 转换完成: {output_path}")

def hdf_to_geotiff(hdf_path, subdataset_index, output_path):
    """
    HDF 子数据集导出为 GeoTIFF
    """
    gdal.UseExceptions()
    
    ds = gdal.Open(hdf_path)
    subdatasets = ds.GetSubDatasets()
    
    if subdataset_index >= len(subdatasets):
        raise Exception(f"子数据集索引超出范围: {subdataset_index}")
    
    sub_path, sub_desc = subdatasets[subdataset_index]
    print(f"转换子数据集: {sub_desc}")
    
    sub_ds = gdal.Open(sub_path)
    
    options = gdal.TranslateOptions(
        format='GTiff',
        creationOptions=['COMPRESS=LZW', 'TILED=YES']
    )
    
    gdal.Translate(output_path, sub_ds, options=options)
    
    sub_ds = None
    ds = None
    
    print(f"HDF 转换完成: {output_path}")

def list_netcdf_variables(nc_path):
    """列出 NetCDF 文件中的所有变量"""
    
    ds = gdal.Open(nc_path)
    
    if ds is None:
        raise Exception(f"无法打开: {nc_path}")
    
    subdatasets = ds.GetSubDatasets()
    
    print(f"文件: {nc_path}")
    print(f"子数据集数量: {len(subdatasets)}")
    print("-" * 60)
    
    for i, (path, desc) in enumerate(subdatasets):
        print(f"{i}: {desc}")
    
    ds = None
    return subdatasets

13.3 矢量格式转换

13.3.1 基础转换

from osgeo import ogr

def convert_vector(input_path, output_path, format='ESRI Shapefile', options=None):
    """
    通用矢量格式转换
    """
    ogr.UseExceptions()
    
    # 打开源数据
    src_ds = ogr.Open(input_path)
    if src_ds is None:
        raise Exception(f"无法打开: {input_path}")
    
    # 获取驱动
    driver = ogr.GetDriverByName(format)
    if driver is None:
        raise Exception(f"不支持的格式: {format}")
    
    # 删除已存在的输出
    if driver.DeleteDataSource(output_path) != 0:
        pass
    
    # 创建输出
    dst_ds = driver.CreateDataSource(output_path)
    
    # 转换每个图层
    for i in range(src_ds.GetLayerCount()):
        src_layer = src_ds.GetLayer(i)
        
        # 获取图层名
        layer_name = src_layer.GetName()
        
        # 复制图层
        dst_layer = dst_ds.CopyLayer(src_layer, layer_name, options if options else [])
    
    src_ds = None
    dst_ds = None
    
    print(f"转换完成: {input_path} -> {output_path}")

# 使用示例
# convert_vector('input.shp', 'output.geojson', 'GeoJSON')
# convert_vector('input.shp', 'output.gpkg', 'GPKG')

13.3.2 Shapefile 转换

from osgeo import ogr

def shp_to_geojson(shp_path, geojson_path, pretty=True):
    """Shapefile 转 GeoJSON"""
    
    ogr.UseExceptions()
    
    src_ds = ogr.Open(shp_path)
    src_layer = src_ds.GetLayer()
    
    driver = ogr.GetDriverByName('GeoJSON')
    
    # 设置格式化选项
    options = ['RFC7946=YES']  # 标准 GeoJSON
    if pretty:
        options.append('WRITE_BBOX=YES')
    
    dst_ds = driver.CreateDataSource(geojson_path)
    dst_ds.CopyLayer(src_layer, 'features', options)
    
    src_ds = None
    dst_ds = None
    
    print(f"GeoJSON 创建完成: {geojson_path}")

def shp_to_geopackage(shp_path, gpkg_path, layer_name=None):
    """Shapefile 转 GeoPackage"""
    
    ogr.UseExceptions()
    
    src_ds = ogr.Open(shp_path)
    src_layer = src_ds.GetLayer()
    
    if layer_name is None:
        layer_name = src_layer.GetName()
    
    driver = ogr.GetDriverByName('GPKG')
    dst_ds = driver.CreateDataSource(gpkg_path)
    
    options = ['FID=fid']
    dst_ds.CopyLayer(src_layer, layer_name, options)
    
    src_ds = None
    dst_ds = None
    
    print(f"GeoPackage 创建完成: {gpkg_path}")

def shp_to_kml(shp_path, kml_path):
    """Shapefile 转 KML"""
    
    ogr.UseExceptions()
    
    src_ds = ogr.Open(shp_path)
    src_layer = src_ds.GetLayer()
    
    # KML 需要 WGS84 坐标
    src_srs = src_layer.GetSpatialRef()
    
    driver = ogr.GetDriverByName('KML')
    dst_ds = driver.CreateDataSource(kml_path)
    dst_ds.CopyLayer(src_layer, src_layer.GetName())
    
    src_ds = None
    dst_ds = None
    
    print(f"KML 创建完成: {kml_path}")

def geojson_to_shp(geojson_path, shp_path, encoding='UTF-8'):
    """GeoJSON 转 Shapefile"""
    
    ogr.UseExceptions()
    
    src_ds = ogr.Open(geojson_path)
    src_layer = src_ds.GetLayer()
    
    driver = ogr.GetDriverByName('ESRI Shapefile')
    
    # 删除已存在的文件
    if driver.DeleteDataSource(shp_path) != 0:
        pass
    
    dst_ds = driver.CreateDataSource(shp_path)
    
    options = [f'ENCODING={encoding}']
    dst_ds.CopyLayer(src_layer, 'layer', options)
    
    src_ds = None
    dst_ds = None
    
    print(f"Shapefile 创建完成: {shp_path}")

13.3.3 数据库格式转换

from osgeo import ogr

def shp_to_postgis(shp_path, connection_string, table_name=None, 
                   srid=None, overwrite=True):
    """
    Shapefile 导入 PostGIS
    
    参数:
        shp_path: Shapefile 路径
        connection_string: PostGIS 连接字符串
            例: "PG:host=localhost dbname=gisdb user=postgres password=xxx"
        table_name: 表名,默认使用图层名
        srid: 目标坐标系 EPSG 代码
        overwrite: 是否覆盖已存在的表
    """
    ogr.UseExceptions()
    
    src_ds = ogr.Open(shp_path)
    src_layer = src_ds.GetLayer()
    
    if table_name is None:
        table_name = src_layer.GetName().lower()
    
    # 连接 PostGIS
    dst_ds = ogr.Open(connection_string, 1)  # 1 表示可写
    
    if dst_ds is None:
        raise Exception(f"无法连接到数据库: {connection_string}")
    
    # 检查表是否存在
    if overwrite:
        try:
            dst_ds.DeleteLayer(table_name)
        except:
            pass
    
    # 复制图层
    options = [f'GEOMETRY_NAME=geom']
    if srid:
        options.append(f'SRID={srid}')
    
    dst_ds.CopyLayer(src_layer, table_name, options)
    
    src_ds = None
    dst_ds = None
    
    print(f"导入完成: {table_name}")

def postgis_to_shp(connection_string, table_name, output_path, where=None):
    """
    从 PostGIS 导出为 Shapefile
    """
    ogr.UseExceptions()
    
    src_ds = ogr.Open(connection_string)
    
    if src_ds is None:
        raise Exception(f"无法连接到数据库: {connection_string}")
    
    # 获取图层(可以使用 SQL)
    if where:
        src_layer = src_ds.ExecuteSQL(f"SELECT * FROM {table_name} WHERE {where}")
    else:
        src_layer = src_ds.GetLayerByName(table_name)
    
    if src_layer is None:
        raise Exception(f"未找到表: {table_name}")
    
    # 创建 Shapefile
    driver = ogr.GetDriverByName('ESRI Shapefile')
    driver.DeleteDataSource(output_path)
    dst_ds = driver.CreateDataSource(output_path)
    
    dst_ds.CopyLayer(src_layer, 'layer', ['ENCODING=UTF-8'])
    
    if where:
        src_ds.ReleaseResultSet(src_layer)
    
    src_ds = None
    dst_ds = None
    
    print(f"导出完成: {output_path}")

def shp_to_spatialite(shp_path, db_path, table_name=None):
    """Shapefile 导入 Spatialite"""
    
    ogr.UseExceptions()
    
    src_ds = ogr.Open(shp_path)
    src_layer = src_ds.GetLayer()
    
    if table_name is None:
        table_name = src_layer.GetName()
    
    driver = ogr.GetDriverByName('SQLite')
    
    # 创建或打开数据库
    dst_ds = driver.CreateDataSource(db_path, ['SPATIALITE=YES'])
    
    if dst_ds is None:
        # 尝试打开已存在的数据库
        dst_ds = ogr.Open(db_path, 1)
    
    dst_ds.CopyLayer(src_layer, table_name)
    
    src_ds = None
    dst_ds = None
    
    print(f"Spatialite 导入完成: {db_path}")

13.3.4 高级转换选项

from osgeo import ogr, osr

def convert_with_reprojection(input_path, output_path, 
                              output_format='GeoJSON',
                              target_srs='EPSG:4326'):
    """带投影转换的格式转换"""
    
    ogr.UseExceptions()
    
    src_ds = ogr.Open(input_path)
    src_layer = src_ds.GetLayer()
    
    # 创建目标空间参考
    dst_srs = osr.SpatialReference()
    dst_srs.SetFromUserInput(target_srs)
    
    # 创建坐标转换
    src_srs = src_layer.GetSpatialRef()
    transform = osr.CoordinateTransformation(src_srs, dst_srs)
    
    # 创建输出
    driver = ogr.GetDriverByName(output_format)
    dst_ds = driver.CreateDataSource(output_path)
    
    dst_layer = dst_ds.CreateLayer(
        src_layer.GetName(),
        dst_srs,
        src_layer.GetGeomType()
    )
    
    # 复制字段
    src_defn = src_layer.GetLayerDefn()
    for i in range(src_defn.GetFieldCount()):
        dst_layer.CreateField(src_defn.GetFieldDefn(i))
    
    dst_defn = dst_layer.GetLayerDefn()
    
    # 转换要素
    for feature in src_layer:
        geom = feature.GetGeometryRef()
        
        if geom:
            geom.Transform(transform)
        
        out_feat = ogr.Feature(dst_defn)
        out_feat.SetGeometry(geom)
        
        for i in range(src_defn.GetFieldCount()):
            out_feat.SetField(i, feature.GetField(i))
        
        dst_layer.CreateFeature(out_feat)
    
    src_ds = None
    dst_ds = None
    
    print(f"转换完成(重投影到 {target_srs}): {output_path}")

def convert_with_filter(input_path, output_path, 
                        output_format='GeoJSON',
                        where=None, bbox=None):
    """带过滤的格式转换"""
    
    ogr.UseExceptions()
    
    src_ds = ogr.Open(input_path)
    src_layer = src_ds.GetLayer()
    
    # 应用过滤
    if where:
        src_layer.SetAttributeFilter(where)
    
    if bbox:
        src_layer.SetSpatialFilterRect(*bbox)
    
    # 创建输出
    driver = ogr.GetDriverByName(output_format)
    dst_ds = driver.CreateDataSource(output_path)
    dst_ds.CopyLayer(src_layer, 'filtered')
    
    src_ds = None
    dst_ds = None
    
    print(f"转换完成(带过滤): {output_path}")

def convert_fields_mapping(input_path, output_path, 
                           output_format='GeoJSON',
                           field_mapping=None):
    """
    带字段映射的格式转换
    
    参数:
        field_mapping: 字段映射字典 {'old_name': 'new_name', ...}
    """
    ogr.UseExceptions()
    
    src_ds = ogr.Open(input_path)
    src_layer = src_ds.GetLayer()
    
    driver = ogr.GetDriverByName(output_format)
    dst_ds = driver.CreateDataSource(output_path)
    
    dst_layer = dst_ds.CreateLayer(
        src_layer.GetName(),
        src_layer.GetSpatialRef(),
        src_layer.GetGeomType()
    )
    
    # 创建映射后的字段
    src_defn = src_layer.GetLayerDefn()
    field_map = []
    
    for i in range(src_defn.GetFieldCount()):
        old_field = src_defn.GetFieldDefn(i)
        old_name = old_field.GetName()
        
        if field_mapping and old_name in field_mapping:
            new_name = field_mapping[old_name]
            if new_name is None:
                field_map.append(None)
                continue
        else:
            new_name = old_name
        
        new_field = ogr.FieldDefn(new_name, old_field.GetType())
        new_field.SetWidth(old_field.GetWidth())
        new_field.SetPrecision(old_field.GetPrecision())
        dst_layer.CreateField(new_field)
        field_map.append(new_name)
    
    dst_defn = dst_layer.GetLayerDefn()
    
    # 转换要素
    for feature in src_layer:
        out_feat = ogr.Feature(dst_defn)
        out_feat.SetGeometry(feature.GetGeometryRef())
        
        for i, new_name in enumerate(field_map):
            if new_name:
                out_feat.SetField(new_name, feature.GetField(i))
        
        dst_layer.CreateFeature(out_feat)
    
    src_ds = None
    dst_ds = None
    
    print(f"转换完成(字段映射): {output_path}")

13.4 批量处理工具

13.4.1 批量矢量转换

from osgeo import ogr
import os
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor

def batch_convert_vectors(input_dir, output_dir, 
                          input_pattern='*.shp',
                          output_format='GeoJSON',
                          workers=4):
    """批量转换矢量文件"""
    
    ogr.UseExceptions()
    
    input_path = Path(input_dir)
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    
    # 格式到扩展名映射
    format_ext = {
        'GeoJSON': '.geojson',
        'GPKG': '.gpkg',
        'ESRI Shapefile': '.shp',
        'KML': '.kml',
        'GML': '.gml',
        'FlatGeobuf': '.fgb',
        'CSV': '.csv',
    }
    
    ext = format_ext.get(output_format, '.geojson')
    
    # 查找文件
    files = list(input_path.glob(input_pattern))
    print(f"找到 {len(files)} 个文件")
    
    def convert_file(filepath):
        try:
            out_name = filepath.stem + ext
            out_path = output_path / out_name
            convert_vector(str(filepath), str(out_path), output_format)
            return True, filepath.name
        except Exception as e:
            return False, f"{filepath.name}: {e}"
    
    results = []
    with ThreadPoolExecutor(max_workers=workers) as executor:
        results = list(executor.map(convert_file, files))
    
    success = sum(1 for r in results if r[0])
    failed = sum(1 for r in results if not r[0])
    
    print(f"\n批量转换完成: 成功 {success}, 失败 {failed}")

# 使用示例
# batch_convert_vectors('./shapefiles', './geojson', '*.shp', 'GeoJSON')

13.4.2 合并多个文件

from osgeo import ogr

def merge_vectors(input_files, output_path, output_format='GeoJSON'):
    """
    合并多个矢量文件
    """
    ogr.UseExceptions()
    
    driver = ogr.GetDriverByName(output_format)
    dst_ds = driver.CreateDataSource(output_path)
    
    dst_layer = None
    dst_defn = None
    
    for i, filepath in enumerate(input_files):
        print(f"处理: {filepath}")
        
        src_ds = ogr.Open(filepath)
        src_layer = src_ds.GetLayer()
        
        if dst_layer is None:
            # 首次创建图层
            dst_layer = dst_ds.CreateLayer(
                'merged',
                src_layer.GetSpatialRef(),
                src_layer.GetGeomType()
            )
            
            # 复制字段
            src_defn = src_layer.GetLayerDefn()
            for j in range(src_defn.GetFieldCount()):
                dst_layer.CreateField(src_defn.GetFieldDefn(j))
            
            dst_defn = dst_layer.GetLayerDefn()
        
        # 复制要素
        for feature in src_layer:
            out_feat = ogr.Feature(dst_defn)
            out_feat.SetGeometry(feature.GetGeometryRef())
            
            for j in range(src_defn.GetFieldCount()):
                out_feat.SetField(j, feature.GetField(j))
            
            dst_layer.CreateFeature(out_feat)
        
        src_ds = None
    
    dst_ds = None
    
    print(f"合并完成: {output_path}")

def merge_rasters(input_files, output_path, nodata=None):
    """合并多个栅格文件"""
    
    from osgeo import gdal
    gdal.UseExceptions()
    
    # 创建 VRT
    vrt_options = gdal.BuildVRTOptions(
        srcNodata=nodata,
        VRTNodata=nodata,
        resolution='average'
    )
    
    vrt = gdal.BuildVRT('', input_files, options=vrt_options)
    
    # 转换为 GeoTIFF
    translate_options = gdal.TranslateOptions(
        format='GTiff',
        creationOptions=[
            'COMPRESS=LZW',
            'TILED=YES',
            'BIGTIFF=IF_SAFER'
        ]
    )
    
    gdal.Translate(output_path, vrt, options=translate_options)
    
    vrt = None
    
    print(f"栅格合并完成: {output_path}")

13.5 命令行工具脚本

13.5.1 通用转换脚本

#!/usr/bin/env python
"""
通用 GDAL/OGR 格式转换工具

用法:
    python converter.py input_file output_file [--format FORMAT]
"""

import argparse
import os
from pathlib import Path
from osgeo import gdal, ogr

def detect_type(filepath):
    """检测文件类型(栅格或矢量)"""
    
    # 尝试作为栅格打开
    ds = gdal.Open(filepath)
    if ds is not None:
        ds = None
        return 'raster'
    
    # 尝试作为矢量打开
    ds = ogr.Open(filepath)
    if ds is not None:
        ds = None
        return 'vector'
    
    return None

def convert(input_path, output_path, format=None, options=None):
    """执行转换"""
    
    gdal.UseExceptions()
    ogr.UseExceptions()
    
    data_type = detect_type(input_path)
    
    if data_type is None:
        raise Exception(f"无法识别的文件格式: {input_path}")
    
    print(f"检测到数据类型: {data_type}")
    
    if data_type == 'raster':
        if format is None:
            format = 'GTiff'
        
        translate_options = gdal.TranslateOptions(
            format=format,
            creationOptions=options or ['COMPRESS=LZW', 'TILED=YES']
        )
        gdal.Translate(output_path, input_path, options=translate_options)
    
    else:  # vector
        if format is None:
            # 根据扩展名推断
            ext = Path(output_path).suffix.lower()
            format_map = {
                '.geojson': 'GeoJSON',
                '.json': 'GeoJSON',
                '.gpkg': 'GPKG',
                '.shp': 'ESRI Shapefile',
                '.kml': 'KML',
                '.gml': 'GML',
                '.fgb': 'FlatGeobuf',
            }
            format = format_map.get(ext, 'GeoJSON')
        
        src_ds = ogr.Open(input_path)
        driver = ogr.GetDriverByName(format)
        
        if os.path.exists(output_path):
            driver.DeleteDataSource(output_path)
        
        dst_ds = driver.CreateDataSource(output_path)
        for i in range(src_ds.GetLayerCount()):
            dst_ds.CopyLayer(src_ds.GetLayer(i), src_ds.GetLayer(i).GetName())
        
        src_ds = None
        dst_ds = None
    
    print(f"转换完成: {output_path}")

def main():
    parser = argparse.ArgumentParser(description='GDAL/OGR 格式转换工具')
    parser.add_argument('input', help='输入文件路径')
    parser.add_argument('output', help='输出文件路径')
    parser.add_argument('-f', '--format', help='输出格式')
    parser.add_argument('-o', '--option', action='append', help='创建选项')
    
    args = parser.parse_args()
    
    try:
        convert(args.input, args.output, args.format, args.option)
    except Exception as e:
        print(f"错误: {e}")
        exit(1)

if __name__ == '__main__':
    main()

13.5.2 批量处理脚本

#!/bin/bash
# 批量格式转换脚本

INPUT_DIR="$1"
OUTPUT_DIR="$2"
FORMAT="${3:-GTiff}"

if [ -z "$INPUT_DIR" ] || [ -z "$OUTPUT_DIR" ]; then
    echo "用法: $0 <输入目录> <输出目录> [输出格式]"
    exit 1
fi

mkdir -p "$OUTPUT_DIR"

# 处理栅格文件
for file in "$INPUT_DIR"/*.{tif,img,ecw,jp2}; do
    if [ -f "$file" ]; then
        filename=$(basename "$file")
        name="${filename%.*}"
        echo "转换: $filename"
        gdal_translate -of "$FORMAT" -co COMPRESS=LZW "$file" "$OUTPUT_DIR/$name.tif"
    fi
done

# 处理矢量文件
for file in "$INPUT_DIR"/*.shp; do
    if [ -f "$file" ]; then
        filename=$(basename "$file")
        name="${filename%.*}"
        echo "转换: $filename"
        ogr2ogr -f "GeoJSON" "$OUTPUT_DIR/$name.geojson" "$file"
    fi
done

echo "转换完成"

13.6 常见问题与解决方案

13.6.1 编码问题

from osgeo import ogr, gdal

def fix_encoding_issues(input_path, output_path, 
                        input_encoding='GBK', output_encoding='UTF-8'):
    """处理编码问题"""
    
    ogr.UseExceptions()
    
    # 设置输入编码
    gdal.SetConfigOption('SHAPE_ENCODING', input_encoding)
    
    src_ds = ogr.Open(input_path)
    src_layer = src_ds.GetLayer()
    
    driver = ogr.GetDriverByName('GeoJSON')  # GeoJSON 总是 UTF-8
    dst_ds = driver.CreateDataSource(output_path)
    
    dst_ds.CopyLayer(src_layer, 'layer')
    
    src_ds = None
    dst_ds = None
    
    # 恢复默认
    gdal.SetConfigOption('SHAPE_ENCODING', '')
    
    print(f"编码转换完成: {output_path}")

13.6.2 大文件处理

from osgeo import gdal

def convert_large_raster(input_path, output_path, chunk_size=4096):
    """处理大型栅格文件"""
    
    gdal.UseExceptions()
    
    # 设置较大的缓存
    gdal.SetCacheMax(2 * 1024 * 1024 * 1024)  # 2GB
    
    src_ds = gdal.Open(input_path)
    
    # 使用 BIGTIFF
    translate_options = gdal.TranslateOptions(
        format='GTiff',
        creationOptions=[
            'COMPRESS=LZW',
            'TILED=YES',
            f'BLOCKXSIZE={chunk_size}',
            f'BLOCKYSIZE={chunk_size}',
            'BIGTIFF=YES'
        ]
    )
    
    gdal.Translate(output_path, src_ds, options=translate_options,
                   callback=gdal.TermProgress_nocb)
    
    src_ds = None
    
    print(f"大文件转换完成: {output_path}")

13.6.3 坐标系问题

from osgeo import ogr, osr

def convert_with_srs_fix(input_path, output_path, target_srs='EPSG:4326'):
    """处理缺少坐标系的数据"""
    
    ogr.UseExceptions()
    
    src_ds = ogr.Open(input_path)
    src_layer = src_ds.GetLayer()
    
    src_srs = src_layer.GetSpatialRef()
    
    if src_srs is None:
        print("警告: 源数据没有坐标系定义")
        # 假设是 WGS84
        src_srs = osr.SpatialReference()
        src_srs.ImportFromEPSG(4326)
    
    dst_srs = osr.SpatialReference()
    dst_srs.SetFromUserInput(target_srs)
    
    driver = ogr.GetDriverByName('GeoJSON')
    dst_ds = driver.CreateDataSource(output_path)
    
    dst_layer = dst_ds.CreateLayer('layer', dst_srs, src_layer.GetGeomType())
    
    # ... 复制要素 ...
    
    src_ds = None
    dst_ds = None

13.7 本章小结

本章介绍了数据格式转换的各种技术:

  1. 栅格转换:GeoTIFF、COG、PNG、JPEG、NetCDF、HDF
  2. 矢量转换:Shapefile、GeoJSON、GeoPackage、KML、PostGIS
  3. 批量处理:并行处理、文件合并
  4. 高级选项:投影转换、字段映射、过滤
  5. 常见问题:编码、大文件、坐标系

13.8 思考与练习

  1. 编写一个支持所有常见格式的通用转换工具。
  2. 实现一个带进度显示的批量转换脚本。
  3. 比较不同压缩方式对 GeoTIFF 文件大小和读取速度的影响。
  4. 如何处理 Shapefile 的字段名长度限制(10 字符)?
  5. 实现从 PostGIS 批量导出多个表为 GeoPackage。
posted @ 2025-12-29 10:48  我才是银古  阅读(1)  评论(0)    收藏  举报