ZBJ 文件的信息提取

.ZBJ 文件可以通过修改文件前两个字节成为压缩文件,然后使用密码,7zip解压,然后再进一步解压生成的 SignedBidFile.p7s,从而得到 docx 等文件。
如下是 zbj_extractor.ps1 脚本。

# ZBJ转换器 
param(
    [string]$SourcePath = "C:\Users\geyee\Desktop\某单位框采",
    [string]$OutputPath = "D:\temp",
    [switch]$SaveToOriginalFolder = $false
)

# 从招标项目JSON文件读取项目名称的函数
function Get-ProjectNameFromJson {
    param([string]$JsonFile)
    
    try {
        if (Test-Path $JsonFile) {
            $jsonContent = Get-Content $JsonFile -Encoding UTF8 -Raw
            $jsonObj = $jsonContent | ConvertFrom-Json
            
            if ($jsonObj.招标项目.项目信息.'@招标项目名称') {
                return $jsonObj.招标项目.项目信息.'@招标项目名称'
            } elseif ($jsonObj.招标项目.项目信息.招标项目名称) {
                return $jsonObj.招标项目.项目信息.招标项目名称
            }
        }
    } catch {
        Write-Host "JSON解析错误: $($_.Exception.Message)" -ForegroundColor Yellow
    }
    
    return $null
}

# 从招标项目XML文件读取项目名称的函数
function Get-ProjectNameFromXml {
    param([string]$XmlFile)
    
    try {
        if (Test-Path $XmlFile) {
            [xml]$xmlContent = Get-Content $XmlFile -Encoding UTF8
            $projectInfoNode = $xmlContent.SelectSingleNode("//项目信息")
            if ($projectInfoNode) {
                $projectName = $projectInfoNode.GetAttribute("招标项目名称")
                if ($projectName) {
                    return $projectName
                }
            }
        }
    } catch {
        Write-Host "XML解析错误: $($_.Exception.Message)" -ForegroundColor Yellow
    }
    
    return $null
}

# 清理项目名称,移除非法文件名字符
function Clean-ProjectName {
    param([string]$ProjectName)
    
    if ([string]::IsNullOrEmpty($ProjectName)) {
        return $null
    }
    
    # 移除或替换非法文件名字符
    $cleanName = $ProjectName -replace '[\\/:*?"<>|]', '_'
    $cleanName = $cleanName -replace '\s+', '_'  # 空格替换为单下划线
    $cleanName = $cleanName -replace '_+', '_'   # 多个下划线替换为单下划线
    $cleanName = $cleanName.Trim('_')            # 移除首尾下划线
    
    # 限制长度,避免文件名过长
    if ($cleanName.Length -gt 100) {
        $cleanName = $cleanName.Substring(0, 100)
    }
    
    return $cleanName
}

# 查找7z.exe的函数
function Find-7zExe {
    $possiblePaths = @(
        "C:\Program Files\7-Zip\7z.exe",
        "C:\Program Files (x86)\7-Zip\7z.exe"
    )
    
    foreach ($path in $possiblePaths) {
        if (Test-Path $path) {
            return $path
        }
    }
    
    # 检查PATH环境变量
    try {
        $cmd = Get-Command 7z.exe -ErrorAction SilentlyContinue
        if ($cmd) {
            return $cmd.Source
        }
    } catch {}
    
    return $null
}

# 使用7z.exe解压的函数
function Extract-With-7zExe {
    param(
        [string]$ArchivePath,
        [string]$OutputDirectory,
        [string]$Password = $null
    )
    
    $sevenZipExe = Find-7zExe
    if (-not $sevenZipExe) {
        Write-Host " [info] 未找到7z.exe" -ForegroundColor Red
        return $false
    }
    
    Write-Host "  使用7z.exe: $sevenZipExe" -ForegroundColor Cyan
    
    # 确保输出目录存在
    if (-not (Test-Path $OutputDirectory)) {
        New-Item -ItemType Directory -Path $OutputDirectory -Force | Out-Null
    }
    
    # 构建参数
    $arguments = @('x', $ArchivePath, "-o$OutputDirectory", "-y")
    if ($Password) {
        $arguments += "-p$Password"
    }
    
    try {
        $process = Start-Process -FilePath $sevenZipExe -ArgumentList $arguments -Wait -PassThru -NoNewWindow
        
        if ($process.ExitCode -eq 0) {
            Write-Host " [info] 7z.exe解压成功" -ForegroundColor Green
            return $true
        } else {
            Write-Host " [info] 7z.exe解压失败,退出代码: $($process.ExitCode)" -ForegroundColor Red
            return $false
        }
    } catch {
        Write-Host " [info] 7z.exe调用失败: $($_.Exception.Message)" -ForegroundColor Red
        return $false
    }
}

# 使用System.IO.Compression解压的函数(备用方案)
function Extract-With-DotNet {
    param(
        [string]$ArchivePath,
        [string]$OutputDirectory
    )
    
    try {
        Write-Host "  尝试使用.NET Framework解压..." -ForegroundColor Yellow
        
        # 确保输出目录存在
        if (-not (Test-Path $OutputDirectory)) {
            New-Item -ItemType Directory -Path $OutputDirectory -Force | Out-Null
        }
        
        # 加载必要的程序集
        [System.Reflection.Assembly]::LoadWithPartialName("System.IO.Compression.FileSystem") | Out-Null
        
        # 尝试解压
        [System.IO.Compression.ZipFile]::ExtractToDirectory($ArchivePath, $OutputDirectory)
        Write-Host " [info] .NET Framework解压成功" -ForegroundColor Green
        return $true
    } catch {
        Write-Host " [info] .NET Framework解压失败: $($_.Exception.Message)" -ForegroundColor Red
        return $false
    }
}

# 主要的ZBJ处理函数
function Process-ZBJFile {
    param(
        [string]$ZBJFile,
        [string]$OutputDir
    )
    
    try {
        # 确保输出目录存在
        if (-not (Test-Path $OutputDir)) {
            New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null
        }
        
        # 创建临时目录
        $tempDir = Join-Path $env:TEMP "ZBJ_Extract_$(Get-Random)"
        if (Test-Path $tempDir) {
            Remove-Item $tempDir -Recurse -Force
        }
        New-Item -ItemType Directory -Path $tempDir -Force | Out-Null
        
        Write-Host "步骤1: 转换ZBJ为7z..." -ForegroundColor Yellow
        
        # 修改文件头 - ZBJ文件修改魔数后就是7z文件了
        $originalBytes = [System.IO.File]::ReadAllBytes($ZBJFile)
        $newBytes = $originalBytes.Clone()
        $newBytes[0] = 0x37  # '7'
        $newBytes[1] = 0x7A  # 'z'
        
        $tempZipFile = Join-Path $tempDir "temp.7z"
        [System.IO.File]::WriteAllBytes($tempZipFile, $newBytes)
        Write-Host " [info] 7z文件创建成功" -ForegroundColor Green
        
        Write-Host "步骤2: 解压文件..." -ForegroundColor Yellow
        
        # 创建解压目录
        $extractDir = Join-Path $tempDir "extracted"
        if (Test-Path $extractDir) {
            Remove-Item $extractDir -Recurse -Force
        }
        New-Item -ItemType Directory -Path $extractDir -Force | Out-Null
        
        # 尝试多种解压方法
        $extractSuccess = $false
        
        # 方法1: 使用7z.exe
        Write-Host "  尝试方法1: 7z.exe..." -ForegroundColor Yellow
        $extractSuccess = Extract-With-7zExe -ArchivePath $tempZipFile -OutputDirectory $extractDir -Password "truelore"
        
        # 方法2: 使用.NET Framework(如果7z.exe失败)
        if (-not $extractSuccess) {
            Write-Host "  尝试方法2: .NET Framework..." -ForegroundColor Yellow
            $extractSuccess = Extract-With-DotNet -ArchivePath $tempZipFile -OutputDirectory $extractDir
        }
        
        if (-not $extractSuccess) {
            throw "所有解压方法都失败了"
        }
        
        Write-Host "步骤3: 查找SignedBidFile.p7s文件..." -ForegroundColor Yellow
        
        # 查找SignedBidFile.p7s文件
        $signedFile = Join-Path $extractDir "SignedBidFile.p7s"
        if (Test-Path $signedFile) {
            Write-Host " [info] 找到SignedBidFile.p7s文件" -ForegroundColor Green
            
            # 创建p7s解压目录
            $p7sExtractDir = Join-Path $tempDir "p7s_extracted"
            if (Test-Path $p7sExtractDir) {
                Remove-Item $p7sExtractDir -Recurse -Force
            }
            New-Item -ItemType Directory -Path $p7sExtractDir -Force | Out-Null
            
            Write-Host "步骤4: 解压SignedBidFile.p7s文件..." -ForegroundColor Yellow
            
            # 尝试解压p7s文件
            $p7sExtractSuccess = Extract-With-7zExe -ArchivePath $signedFile -OutputDirectory $p7sExtractDir -Password "truelore"
            
            if (-not $p7sExtractSuccess) {
                # 如果p7s解压失败,尝试直接复制为7z文件再解压
                $p7sAs7z = Join-Path $tempDir "SignedBidFile.7z"
                Copy-Item -LiteralPath $signedFile -Destination $p7sAs7z -Force
                $p7sExtractSuccess = Extract-With-7zExe -ArchivePath $p7sAs7z -OutputDirectory $p7sExtractDir -Password "truelore"
            }
            
            if ($p7sExtractSuccess) {
                Write-Host " [info] p7s文件解压成功" -ForegroundColor Green
            } else {
                Write-Host " [info] p7s文件解压失败,可能不是压缩文件" -ForegroundColor Yellow
            }
            
            Write-Host "步骤5: 查找DOCX文件..." -ForegroundColor Yellow
            
            # 查找DOCX文件
            $docxFiles = Get-ChildItem $p7sExtractDir -Recurse -Filter "*.docx" -ErrorAction SilentlyContinue
            if ($docxFiles.Count -gt 0) {
                Write-Host " [info] 找到 $($docxFiles.Count) 个DOCX文件" -ForegroundColor Green
                
                Write-Host "步骤6: 读取项目名称..." -ForegroundColor Yellow
                
                # 读取项目名称
                $jsonFile = Join-Path $p7sExtractDir "招标项目.json"
                $extractedProjectName = Get-ProjectNameFromJson $jsonFile
                
                if (-not $extractedProjectName) {
                    $xmlFile = Join-Path $p7sExtractDir "招标项目.xml"
                    $extractedProjectName = Get-ProjectNameFromXml $xmlFile
                }
                
                if ($extractedProjectName) {
                    $extractedProjectName = Clean-ProjectName $extractedProjectName
                    Write-Host " [info] 项目名称: $extractedProjectName" -ForegroundColor Green
                } else {
                    Write-Host " [info] 未获取到项目名称,使用默认命名" -ForegroundColor Yellow
                }
                
                Write-Host "步骤7: 复制DOCX文件..." -ForegroundColor Yellow
                
                # 复制DOCX文件
                $counter = 1
                foreach ($docxFile in $docxFiles) {
                    if ($extractedProjectName) {
                        if ($docxFiles.Count -eq 1) {
                            $newName = "$extractedProjectName.docx"
                        } else {
                            $newName = "$extractedProjectName`_$counter.docx"
                        }
                    } else {
                        $fileBaseName = [System.IO.Path]::GetFileNameWithoutExtension($ZBJFile)
                        if ($docxFiles.Count -eq 1) {
                            $newName = "$fileBaseName`_extracted.docx"
                        } else {
                            $newName = "$fileBaseName`_extracted_$counter.docx"
                        }
                    }
                    
                    $newPath = Join-Path $OutputDir $newName
                    Copy-Item -LiteralPath $docxFile.FullName -Destination $newPath -Force
                    Write-Host " [info] 已保存: $newName" -ForegroundColor Green
                    $counter++
                }
                
                return @{
                    Success = $true
                    ProjectName = $extractedProjectName
                    DocxCount = $docxFiles.Count
                }
            } else {
                Write-Host " [info] 在p7s文件中未找到DOCX文件" -ForegroundColor Yellow
            }
        } else {
            Write-Host " [info] 未找到SignedBidFile.p7s文件" -ForegroundColor Yellow
        }
        
        # 在主解压目录中查找DOCX文件
        Write-Host "步骤5: 在主解压目录中查找DOCX文件..." -ForegroundColor Yellow
        $docxFiles = Get-ChildItem $extractDir -Recurse -Filter "*.docx" -ErrorAction SilentlyContinue
        if ($docxFiles.Count -gt 0) {
            Write-Host " [info] 找到 $($docxFiles.Count) 个DOCX文件" -ForegroundColor Green
            
            Write-Host "步骤6: 读取项目名称..." -ForegroundColor Yellow
            
            # 读取项目名称
            $jsonFile = Join-Path $extractDir "招标项目.json"
            $extractedProjectName = Get-ProjectNameFromJson $jsonFile
            
            if (-not $extractedProjectName) {
                $xmlFile = Join-Path $extractDir "招标项目.xml"
                $extractedProjectName = Get-ProjectNameFromXml $xmlFile
            }
            
            if ($extractedProjectName) {
                $extractedProjectName = Clean-ProjectName $extractedProjectName
                Write-Host " [info] 项目名称: $extractedProjectName" -ForegroundColor Green
            } else {
                Write-Host " [info] 未获取到项目名称,使用默认命名" -ForegroundColor Yellow
            }
            
            Write-Host "步骤7: 复制DOCX文件..." -ForegroundColor Yellow
            
            # 复制DOCX文件
            $counter = 1
            foreach ($docxFile in $docxFiles) {
                if ($extractedProjectName) {
                    if ($docxFiles.Count -eq 1) {
                        $newName = "$extractedProjectName.docx"
                    } else {
                        $newName = "$extractedProjectName`_$counter.docx"
                    }
                } else {
                    $fileBaseName = [System.IO.Path]::GetFileNameWithoutExtension($ZBJFile)
                    if ($docxFiles.Count -eq 1) {
                        $newName = "$fileBaseName`_extracted.docx"
                    } else {
                        $newName = "$fileBaseName`_extracted_$counter.docx"
                    }
                }
                
                $newPath = Join-Path $OutputDir $newName
                Copy-Item -LiteralPath $docxFile.FullName -Destination $newPath -Force
                Write-Host " [info] 已保存: $newName" -ForegroundColor Green
                $counter++
            }
            
            return @{
                Success = $true
                ProjectName = $extractedProjectName
                DocxCount = $docxFiles.Count
            }
        } else {
            Write-Host " [info] 未找到任何DOCX文件" -ForegroundColor Red
            throw "未找到DOCX文件"
        }
    }
    catch {
        Write-Host "处理ZBJ文件失败: $($_.Exception.Message)" -ForegroundColor Red
        return @{
            Success = $false
            ProjectName = $null
            DocxCount = 0
        }
    }
    finally {
        # 清理临时文件
        if (Test-Path $tempDir) {
            Remove-Item $tempDir -Recurse -Force -ErrorAction SilentlyContinue
        }
    }
}

# 主程序
Write-Host "=== ZBJ转换器 - 多重解压方案 ===" -ForegroundColor Green
Write-Host "源文件路径: $SourcePath" -ForegroundColor Cyan

if ($SaveToOriginalFolder) {
    Write-Host "保存模式: 保存到原始文件夹" -ForegroundColor Yellow
} else {
    Write-Host "保存模式: 集中保存到 $OutputPath" -ForegroundColor Yellow
    if (!(Test-Path $OutputPath)) {
        New-Item -ItemType Directory -Path $OutputPath -Force | Out-Null
        Write-Host "创建输出目录: $OutputPath" -ForegroundColor Yellow
    }
}

Write-Host ""

# 检查7z.exe是否可用
$sevenZipExe = Find-7zExe
if ($sevenZipExe) {
    Write-Host "[info]找到7z.exe: $sevenZipExe" -ForegroundColor Green
} else {
    Write-Host "[info]未找到7z.exe,将使用.NET Framework解压" -ForegroundColor Yellow
    Write-Host "  建议安装7-Zip: https://www.7-zip.org/" -ForegroundColor White
}

Write-Host ""

# 搜索ZBJ文件
Write-Host "搜索ZBJ文件..." -ForegroundColor Yellow
$zbjFiles = Get-ChildItem $SourcePath -Recurse -Filter "*.ZBJ"
Write-Host "找到 $($zbjFiles.Count) 个ZBJ文件" -ForegroundColor Cyan

if ($zbjFiles.Count -eq 0) {
    Write-Host "未找到任何ZBJ文件,程序退出" -ForegroundColor Red
    exit
}

$successCount = 0
$failCount = 0
$processedFiles = @()

foreach ($zbjFile in $zbjFiles) {
    try {
        Write-Host "`n=== 处理文件: $($zbjFile.Name) ===" -ForegroundColor Cyan
        Write-Host "文件大小: $([math]::Round($zbjFile.Length/1KB, 2)) KB" -ForegroundColor Gray
        
        # 确定输出目录
        if ($SaveToOriginalFolder) {
            $fileOutputDir = $zbjFile.Directory.FullName
        } else {
            $projectName = $zbjFile.Directory.Name
            $cleanProjectName = $projectName -replace '^\d+', ''
            $fileOutputDir = Join-Path $OutputPath $cleanProjectName
            if (!(Test-Path $fileOutputDir)) {
                New-Item -ItemType Directory -Path $fileOutputDir -Force | Out-Null
            }
        }
        
        # 处理ZBJ文件
        $result = Process-ZBJFile -ZBJFile $zbjFile.FullName -OutputDir $fileOutputDir
        
        if ($result.Success) {
            Write-Host "[info]文件处理成功" -ForegroundColor Green
            if ($result.ProjectName) {
                Write-Host " [info] 项目名称获取成功: $($result.ProjectName)" -ForegroundColor Green
            } else {
                Write-Host " [info] 未获取到项目名称,使用默认命名" -ForegroundColor Yellow
            }
            Write-Host " [info] 找到并保存了 $($result.DocxCount) 个DOCX文件" -ForegroundColor Green
            
            $successCount++
            $processedFiles += [PSCustomObject]@{
                FileName = $zbjFile.Name
                Status = "成功"
                OutputPath = $fileOutputDir
                ExtractedProjectName = if ($result.ProjectName) { $result.ProjectName } else { "未获取" }
            }
        } else {
            throw "处理ZBJ文件失败"
        }
    } catch {
        Write-Host "[info]文件处理失败: $($zbjFile.Name)" -ForegroundColor Red
        Write-Host "  错误: $($_.Exception.Message)" -ForegroundColor Red
        $failCount++
        $processedFiles += [PSCustomObject]@{
            FileName = $zbjFile.Name
            Status = "失败: $($_.Exception.Message)"
            OutputPath = "N/A"
            ExtractedProjectName = "N/A"
        }
    }
}

Write-Host "`n=== 处理完成 ===" -ForegroundColor Green
Write-Host "成功处理: $successCount 个文件" -ForegroundColor Green
Write-Host "失败数量: $failCount 个文件" -ForegroundColor Red

if ($SaveToOriginalFolder) {
    Write-Host "DOCX文件已保存到各自的原始文件夹中" -ForegroundColor Cyan
} else {
    Write-Host "DOCX文件已保存到: $OutputPath" -ForegroundColor Cyan
}

# 生成报告
if ($SaveToOriginalFolder) {
    $reportFile = Join-Path $SourcePath "zbj_conversion_report.txt"
} else {
    $reportFile = Join-Path $OutputPath "zbj_conversion_report.txt"
}

$report = @"
ZBJ转换器报告 - 多重解压方案
处理时间: $(Get-Date -Format "yyyy-MM-dd HH:mm:ss")
源文件路径: $SourcePath
保存模式: $(if ($SaveToOriginalFolder) { "保存到原始文件夹" } else { "集中保存到: $OutputPath" })

统计信息:
- 总文件数: $($zbjFiles.Count)
- 成功处理: $successCount
- 失败数量: $failCount
- 成功率: $([math]::Round($successCount / $zbjFiles.Count * 100, 2))%

详细处理记录:
$($processedFiles | ForEach-Object { "- $($_.FileName) | 状态: $($_.Status) | 项目名称: $($_.ExtractedProjectName) | 位置: $($_.OutputPath)" } | Out-String)

说明:
- 本脚本使用多重解压方案确保成功
- 优先使用7z.exe,备选使用.NET Framework
- 只提取DOCX文件,忽略其他文件类型
- 自动从招标项目元数据中提取项目名称并重命名文件
- 支持JSON和XML格式的元数据文件
"@

$report | Out-File $reportFile -Encoding UTF8
Write-Host "`n处理报告已保存: $reportFile" -ForegroundColor Cyan

Write-Host "`n使用说明:" -ForegroundColor Yellow
Write-Host "- 优先使用7z.exe解压,备选使用.NET Framework" -ForegroundColor White
Write-Host "- 只提取DOCX文件,忽略其他文件类型" -ForegroundColor White
Write-Host "- 自动使用项目名称重命名DOCX文件" -ForegroundColor White
Write-Host "- 临时文件会自动清理,不会占用系统空间" -ForegroundColor White
Write-Host "- 如果未安装7-Zip,建议访问: https://www.7-zip.org/" -ForegroundColor White

. ".\zbj_extractor.ps1" -SaveToOriginalFolder 实现生成的 docx 保存到原文件所在目录。

posted @ 2025-08-28 17:53  geyee  阅读(39)  评论(0)    收藏  举报