文本重复率检查

<!DOCTYPE html>  
<html lang="zh">  
<head>  
    <meta charset="UTF-8">  
    <meta name="viewport" content="width=device-width, initial-scale=1.0">  
    <title>文本重复率检查</title>  
    <style>  
        body {  
            font-family: Arial, sans-serif;  
            margin: 0;  
            padding: 20px;  
            background-color: #f5f5f5;  
        }  
          
        h1 {  
            text-align: center;  
        }  
          
        .container {  
            max-width: 600px;  
            margin: 0 auto;  
            background-color: #fff;  
            padding: 20px;  
            border-radius: 5px;  
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);  
        }  
          
        label {  
            display: block;  
            margin-bottom: 5px;  
        }  
          
        textarea {  
            width: 100%;  
            padding: 10px;  
            border-radius: 5px;  
            border: 1px solid #ccc;  
            box-sizing: border-box;  
            resize: vertical;  
        }  
          
        button {  
            padding: 10px 20px;  
            background-color: #4CAF50;  
            color: #fff;  
            border: none;  
            border-radius: 5px;  
            cursor: pointer;  
            margin-top: 10px;  
        }  
          
        button:hover {  
            background-color: #45a049;  
        }  
          
        #result {  
            margin-top: 20px;  
            font-weight: bold;  
            text-align: center;  
        }  
    </style>  
</head>  
<body>  
    <div class="container">  
        <h1>文本重复率检查</h1>  
        <label for="textA">标准文本(A):</label>  
        <textarea id="textA" rows="4"></textarea><br>  
  
        <label for="textB">对比文本(B):</label>  
        <textarea id="textB" rows="4"></textarea><br>  
  
        <button onclick="checkDuplication()">检查重复率</button>  
  
        <div id="result"></div>  
    </div>  
    <script>  
        function cleanText(text) {  
            // 去除除中文之外的所有字符,包括空格、换行等  
            return text.replace(/[^\u4e00-\u9fa5]+/g, '');  
        }  
  
        function checkDuplication() {  
            const textA = document.getElementById('textA').value;  
            const textB = document.getElementById('textB').value;  
  
            // 清洗文本  
            const cleanTextA = cleanText(textA);  
            const cleanTextB = cleanText(textB);  
  
            // 计算重复字符数量  
            const intersection = [...new Set(cleanTextA + cleanTextB)].filter(char => cleanTextA.includes(char) && cleanTextB.includes(char));  
            const commonChars = intersection.length;  
  
            // 注意:这里的重复率是基于两个文本共同拥有的不同字符数量与两个文本不同字符并集数量的比例  
            const totalUniqueChars = [...new Set(cleanTextA + cleanTextB)].length;  
            const duplicationRate = (commonChars / totalUniqueChars) * 100;  
  
            // 显示结果  
            document.getElementById('result').textContent = `重复率: ${duplicationRate.toFixed(2)}%`;  
        }  
    </script>  
</body>  
</html>

 

posted @ 2024-06-16 12:34  joiny-  阅读(51)  评论(0)    收藏  举报