function extractTextFromHTML(htmlContent) {
// 创建一个新的DOM解析器
const parser = new DOMParser();
// 解析HTML内容为DOM
const doc = parser.parseFromString(htmlContent, 'text/html');
// 使用深度优先搜索遍历DOM树
const textNodes = dfs(doc);
// 连接所有文本节点
return textNodes.join('').trim();
}
function dfs(node) {
let textNodes = [];
if (node.nodeType === Node.TEXT_NODE) {
textNodes.push(node.textContent);
} else {
for (let i = 0; i < node.childNodes.length; i++) {
textNodes = textNodes.concat(dfs(node.childNodes[i]));
}
}
return textNodes;
}
// 示例HTML内容
const htmlContent = `
<!DOCTYPE html>
<html>
<head>
<title>Test Page</title>
</head>
<body>
<h1>Welcome to My Page</h1>
<p>This is a paragraph with <b>bold</b> text.</p>
<div>
<p>This is another paragraph.</p>
</div>
</body>
</html>
`;
// 提取文本
const extractedText = extractTextFromHTML(htmlContent);
console.log(extractedText);