PowerShell处理东萍象棋棋谱

# 首先定义所有函数
function ExtractField($content, $fieldName) {
    $pattern = "\[$fieldName\](.*?)\[/$fieldName\]"
    $match = [regex]::Match($content, $pattern, [System.Text.RegularExpressions.RegexOptions]::Singleline)
    if ($match.Success) {
        return $match.Groups[1].Value.Trim()
    }
    return ""
}

function ExtractDhtmlXQData($content) {
    # 查找DhtmlXQ数据块
    $dhtmlxqPattern = '\[DhtmlXQ\](.*?)\[/DhtmlXQ\]'
    $match = [regex]::Match($content, $dhtmlxqPattern, [System.Text.RegularExpressions.RegexOptions]::Singleline)
    
    if ($match.Success) {
        $dhtmlxqBlock = $match.Groups[1].Value
        
        # 提取各个字段
        $data = @{
            Title = ExtractField $dhtmlxqBlock "DhtmlXQ_title"
            Event = ExtractField $dhtmlxqBlock "DhtmlXQ_event"
            Date = ExtractField $dhtmlxqBlock "DhtmlXQ_date"
            Result = ExtractField $dhtmlxqBlock "DhtmlXQ_result"
            RedName = ExtractField $dhtmlxqBlock "DhtmlXQ_redname"
            BlackName = ExtractField $dhtmlxqBlock "DhtmlXQ_blackname"
            Binit = ExtractField $dhtmlxqBlock "DhtmlXQ_binit"
            Movelist = ExtractField $dhtmlxqBlock "DhtmlXQ_movelist"
            Open = ExtractField $dhtmlxqBlock "DhtmlXQ_open"
            Refer = ExtractField $dhtmlxqBlock "DhtmlXQ_refer"
            Generator = ExtractField $dhtmlxqBlock "DhtmlXQ_generator"
            Zoom = ExtractField $dhtmlxqBlock "DhtmlXQ_zoom"
            Ver = ExtractField $dhtmlxqBlock "DhtmlXQ_ver"
            Init = ExtractField $dhtmlxqBlock "DhtmlXQ_init"
        }
        
        return $data
    }
    return $null
}

function GetSafeFilename($filename) {
    # 移除非法字符
    $invalidChars = [IO.Path]::GetInvalidFileNameChars()
    $safeName = $filename -replace "[$($invalidChars -join '')]", "_"
    
    # 限制长度
    if ($safeName.Length -gt 100) {
        $safeName = $safeName.Substring(0, 100)
    }
    
    return $safeName
}

function GenerateUBBContent($data, $sourceUrl) {
    $title = if ($data.Title) { $data.Title } else { "未知棋谱" }
    $redName = if ($data.RedName) { $data.RedName } else { "红方" }
    $blackName = if ($data.BlackName) { $data.BlackName } else { "黑方" }
    $result = if ($data.Result) { $data.Result } else { "未知" }
    $date = if ($data.Date) { $data.Date } else { Get-Date -Format "yyyy/MM/dd" }
    $binit = if ($data.Binit) { $data.Binit } else { "8979695949392919097717866646260600102030405060708012720323436383" }
    $movelist = if ($data.Movelist) { $data.Movelist } else { "" }
    
    $htmlContent = @"
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
<title>$title</title>
</head>
<body>
<p>
<iframe src="http://www.dpxq.com/DhtmlXQ_www_dpxq_com/DhtmlXQ_www_dpxq_com.htm" 
frameborder="0" scrolling="no" width="800" height="600" style="width:800px;height:600px;" 
name='NoFile_[DhtmlXQiFrame]
<!--发帖时,无需粘贴此注释行之上的部分.-->

[DhtmlXQ]
[DhtmlXQ_ver]www_dpxq_com[/DhtmlXQ_ver]
[DhtmlXQ_zoom]1.60.500[/DhtmlXQ_zoom]
[DhtmlXQ_title]$title[/DhtmlXQ_title]
[DhtmlXQ_event]$($data.Event)[/DhtmlXQ_event]
[DhtmlXQ_date]$date[/DhtmlXQ_date]
[DhtmlXQ_init]800,600[/DhtmlXQ_init]
[DhtmlXQ_result]$result[/DhtmlXQ_result]
[DhtmlXQ_redname]$redName[/DhtmlXQ_redname]
[DhtmlXQ_blackname]$blackName[/DhtmlXQ_blackname]
[DhtmlXQ_binit]$binit[/DhtmlXQ_binit]
[DhtmlXQ_movelist]$movelist[/DhtmlXQ_movelist]
[DhtmlXQ_open]$($data.Open)[/DhtmlXQ_open]
[DhtmlXQ_refer]http%3A//www.dpxq.com/%0D%0Ahttp%3A//www.dpxq.com/bbs/[/DhtmlXQ_refer]
[DhtmlXQ_generator]www.dpxq.com[/DhtmlXQ_generator]
[/DhtmlXQ]
<!--发帖时,无需粘贴此注释行之下的部分.-->
[/DhtmlXQiFrame]'>
</iframe>
<p>
<p>来源: $sourceUrl</p>
</body>
</html>
"@
    
    return $htmlContent
}

function ExtractMovelistFromSource($content) {
    # 方法1: 直接搜索 JavaScript 变量 DhtmlXQ_movelist
    $movelistPattern1 = 'var\s+DhtmlXQ_movelist\s*=\s*[''"]\[DhtmlXQ_movelist\](.*?)\[/DhtmlXQ_movelist\][''"];'
    $match1 = [regex]::Match($content, $movelistPattern1, [System.Text.RegularExpressions.RegexOptions]::Singleline)
    if ($match1.Success) {
        $extracted = $match1.Groups[1].Value.Trim()
        Write-Host "  ✓ 从DhtmlXQ_movelist变量提取到movelist" -ForegroundColor Green
        return $extracted
    }
    
    # 方法2: 搜索包含[DhtmlXQ_movelist]的字符串
    $movelistPattern2 = 'DhtmlXQ_movelist.*?\[DhtmlXQ_movelist\](.*?)\[/DhtmlXQ_movelist\]'
    $match2 = [regex]::Match($content, $movelistPattern2, [System.Text.RegularExpressions.RegexOptions]::Singleline)
    if ($match2.Success) {
        $extracted = $match2.Groups[1].Value.Trim()
        Write-Host "  ✓ 从字符串中提取到movelist" -ForegroundColor Green
        return $extracted
    }    

    Write-Host "  ⚠ 未能从源代码提取movelist" -ForegroundColor Yellow
    return ""
}

# 主程序开始
# 读取文件内容
Set-Location $PSScriptRoot
$filePath = Read-Host "请输入文本文件名称或路径"
if(Test-Path -Path $filePath)
{
    ;
}
else
{
    throw "文件不存在"
}
$fileContent = Get-Content -Path $filePath -Encoding Default

# 创建输出目录
$outputDir = "htm"
if (Test-Path $outputDir) {
    Remove-Item -Path $outputDir -Recurse -Force
}
New-Item -ItemType Directory -Path $outputDir | Out-Null
# 计数器
$totalCount = ($fileContent | Measure-Object).Count
$processedCount = 0
$successCount = 0
$errorCount = 0

Write-Host "开始处理 $totalCount 个棋谱..." -ForegroundColor Cyan
Write-Host "=" * 50 -ForegroundColor Cyan

# 处理每个URL
foreach ($line in $fileContent) {
    $processedCount++
    
    # 分割行获取URL和描述
    $parts = $line -split "\|"
    $url = $parts[0].Trim()
    $description = if ($parts.Count -gt 1) { $parts[1].Trim() } else { "无描述" }
    
    Write-Progress -Activity "正在处理棋谱" -Status "处理 $processedCount/$totalCount : $description" -PercentComplete (($processedCount / $totalCount) * 100)
    
    try {
        # 发送HTTP请求获取网页内容
        Write-Host "正在获取: $description" -ForegroundColor Gray
        $response = Invoke-WebRequest -Uri $url -UserAgent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" -TimeoutSec 30 -ErrorAction Stop
        $content = $response.Content
        
        # 提取DhtmlXQ数据
        $dhtmlxqData = ExtractDhtmlXQData $content
        
        if ($dhtmlxqData) {
            # 如果movelist为空,尝试从源代码中提取
            $originalMovelist = $dhtmlxqData.Movelist
            if ([string]::IsNullOrEmpty($dhtmlxqData.Movelist)) {
                Write-Host "  ⚠ DhtmlXQ块中movelist为空,从源代码提取..." -ForegroundColor Yellow
                $extractedMovelist = ExtractMovelistFromSource $content
                if (![string]::IsNullOrEmpty($extractedMovelist)) {
                    $dhtmlxqData.Movelist = $extractedMovelist
                    Write-Host "  ✓ 成功提取movelist: $($extractedMovelist.Length)个字符" -ForegroundColor Green
                }
            } else {
                Write-Host "  ✓ DhtmlXQ块中已有movelist: $($dhtmlxqData.Movelist.Length)个字符" -ForegroundColor Green
            }
            
            # 生成文件名(使用棋谱名称)
            $safeFilename = GetSafeFilename $dhtmlxqData.Title
            $outputFile = Join-Path $outputDir "$safeFilename.htm"
            
            # 生成UBB格式HTML
            $ubbContent = GenerateUBBContent $dhtmlxqData $url
            
            # 保存文件
            $ubbContent | Out-File -FilePath $outputFile -Encoding default
            $successCount++
            
            Write-Host "✓ 成功生成: $($dhtmlxqData.Title)" -ForegroundColor Green
            Write-Host "-" * 40 -ForegroundColor DarkGray
        } else {
            Write-Host "⚠ 未找到棋谱数据: $description" -ForegroundColor Yellow
            Write-Host "-" * 40 -ForegroundColor DarkGray
            $errorCount++
        }
        
        # 添加延迟避免访问过于频繁
        Start-Sleep -Milliseconds 1200
        
    } catch {
        $errorMessage = $_.Exception.Message
        
        if ($errorMessage -like "*访问频率过高*" -or $errorMessage -like "*频率过高*" -or $errorMessage -like "*429*") {
            Write-Host "⚠ 访问频率过高: $description" -ForegroundColor Yellow
            Write-Host "   等待20秒后继续..." -ForegroundColor Yellow
            Start-Sleep -Seconds 20
        } else {
            Write-Host "✗ 错误获取: $description" -ForegroundColor Red
            Write-Host "   错误信息: $errorMessage" -ForegroundColor Red
        }
        Write-Host "-" * 40 -ForegroundColor DarkGray
        $errorCount++
    }
}

Write-Progress -Activity "正在处理棋谱" -Completed

# 输出统计信息
Write-Host "`n处理完成!" -ForegroundColor Cyan
Write-Host "总共处理: $totalCount 个URL" -ForegroundColor White
Write-Host "成功生成: $successCount 个棋谱文件" -ForegroundColor Green
Write-Host "失败数量: $errorCount 个" -ForegroundColor Red

 

posted @ 2025-09-03 10:24  ryueifu  阅读(32)  评论(0)    收藏  举报