正则表达式捕获并替换
正则表达式捕获并替换
const fs = require('fs');
const path = require('path');
const { spawnSync } = require('child_process');
const pandocPath = 'E:\\pandoc.exe';
// 定义要转换的文件类型和转换后的文件类型
const inputExtension = '.docx';
const outputExtension = '.md';
const i = "xxx/数据标注--图像数据标注";
const o = 'xxx/label';
function doTransfer(inputDir, outputDir) {
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir);
}
// 获取当前目录下的所有文件
let i = 1;
const files = fs.readdirSync(inputDir).filter(f => f.endsWith(inputExtension))
for (let file of files) {
if (!file.endsWith(inputExtension)) {
continue;
}
const inputFilePath = path.join(inputDir, file);
const outputFileName = `${i}`.padStart(3, "0") + outputExtension;
const outputFilePath = path.join(outputDir, outputFileName);
// 构造 Pandoc 命令
const c1 = `${pandocPath} ${inputFilePath} -f docx -t markdown_strict+pipe_tables -o ${outputFilePath} --extract-media=${outputDir}`;
const c2 = `mkdir ${outputDir}\\media${i}`;
const c3 = `move ${outputDir}\\media\\* ${outputDir}\\media${i}`;
const c4 = `rmdir ${outputDir}\\media`;
execSync(c1);
execSync(c2);
execSync(c3);
execSync(c4);
insertChar(outputFilePath, i);
i = i + 1;
}
console.log("----------done----------")
}
function execSync(cmd) {
//chcp 65001是为了正常显示中文,不乱码
const result = spawnSync(`chcp 65001 && ${cmd}`, { shell: true, encoding: 'utf-8' });
if (result.error) {
console.error(`执行命令时发生错误: ${result.error.message}`);
return;
}
const stdout = result.stdout.toString();
const stderr = result.stderr.toString();
stdout && console.log(`命令输出: ${stdout}`);
stderr && console.error(`命令错误输出: ${stderr}`);
}
function insertChar(filepath, i) {
try {
const data = fs.readFileSync(filepath, 'utf8');
// 在所有以#开头的行的前面再插入一个#
let modifiedData = data.replace(/^(#+?)([^#+?])/gm, '#$1$2');
const regex = /<img[^>]*src=["'].*?(image\d+\.(?:png|jpe?g)).*?["'][^>]*>/g;
// 将图片内容替换为Markdown格式,此处仅捕获类似image1.png,image1.png,image1.png等的字符串并将之放到替换后的里面
modifiedData = modifiedData.replace(regex, ``);
const iconMarkRegex = /(图 \d+ .*)/g;
modifiedData = modifiedData.replace(iconMarkRegex, '<div class="center">$1</div>');
fs.writeFileSync(filepath, modifiedData, 'utf8');
console.log(`${filepath} # updated.`);
} catch (err) {
console.error(err);
}
}
doTransfer(i, o);

浙公网安备 33010602011771号