用js去除word里的多余标签

function formatHtml(strHtml) {
    strHtml = splitLabel(strHtml, "SPAN");
    strHtml = splitLabel(strHtml, "P");
    strHtml = splitLabel(strHtml, "B");
    strHtml = splitLabel(strHtml, "FONT");
    strHtml = splitLabel(strHtml, "TABLE");
    strHtml = splitLabel(strHtml, "TD");
    strHtml = splitLabel(strHtml, "TR");
    //去除class
    strHtml = strHtml.replace(/\sclass=\w+?\b/g, "");
    //去除<o:p>
    strHtml = strHtml.replace(/<o:p>(.*?)<\/o:p>/ig, "$1");
    //去除<?xml.../>
    strHtml = strHtml.replace(/<\?xml.*?\/>/ig, "");
    //去除lang
    strHtml = strHtml.replace(/lang=\w+-\w+?\b/g, "");
    //去除face 
    strHtml = strHtml.replace(/face=.+?>/g, ">");
    //去除vAlign
    strHtml = strHtml.replace(/vAlign=\w+?\b/ig, "");
    //去掉<st1:>
    strHtml = strHtml.replace(/<st1:chsdate.*?>(.*?)<\/st1:chsdate>/ig, "$1");
    //去掉多余的空格
    strHtml = strHtml.replace(/<(\w+?)\s+?>/ig, "<$1>");
    //去掉<o:lock v:ext="edit" aspectratio="t"></o:lock>此类代码
    strHtml = strHtml.replace(/<o:.+?<\/o:\w+?>/ig, "");
    //去掉<v:> <\v:>
    strHtml = strHtml.replace(/<v:.+?>/g, "");
    strHtml = strHtml.replace(/<\/v:.+?>/g, "");
            //去掉<w:> <\v:>
            strHtml = strHtml.replace(/<w:.+?>/g, "");
            strHtml = strHtml.replace(/<\/w:.+?>/g, "");
 
            strHtml=strHtml.replace(/<\/?span[^>]*>/gi, '<span>')
            //设置border=1
            strHtml = strHtml.replace(/border=[0-9]/gi, "border=1"); 
            //设置border=1
            strHtml = strHtml.replace(/border="0"/gi, "border='1'"); 
    //如果td或table里同时有style="width:.." 和width=21去掉style="width:.."
    strHtml = strHtml.replace(/<td(.+?)style=\"width:.+?\"(.+?)width=(.+?)>/ig, "<td$1$2width=$3>");
    strHtml = strHtml.replace(/<table(.+?)style=\"width:\s.+?\"(.+?)width=(.+?)>/ig, "<table$1$2width=$3>");
    //去除size=+0
    strHtml = strHtml.replace(/size=\+\d/ig, "");
    //多个空格合并一个空格
    strHtml = strHtml.replace(/\s{2,}/g, " ");
    return strHtml;
}
//用<flag 分割字符串
function splitLabel(strHtml, flag) {
    var arr = strHtml.split("<" + flag + " ");
    var strLabel = arr[0];
    for (var i = 1; i < arr.length; i++) {
        var temp = splitStyle(arr[i]);
        strLabel = strLabel + "<" + flag + " " + temp;
    }
    return strLabel;
}
//分割style
function splitStyle(strStyle) {
    var regStyle = /^style=/i;
    if (!regStyle.test(strStyle)) return strStyle;
    var arrStyle = strStyle.split("\"");
    if (arrStyle.length > 1) {
        var strStyleRtn = splitChild(arrStyle[1]);
        //alert(strStyleRtn);
        //标记
        var tag = strStyleRtn;
        if (tag != "") {
            strStyleRtn = arrStyle[0] + "\"" + strStyleRtn;
        }
        for (var i = 2; i < arrStyle.length; i++) {
 
            strStyleRtn = strStyleRtn + "\"" + arrStyle[i];
        }
        if (tag == "") {
            strStyleRtn = strStyleRtn.substr(1, strStyleRtn.length - 1);
        }
        return strStyleRtn;
    }
    else {
        return strStyle;
    }
 
}
//把style中的每个属性都分割出来
function splitChild(strChild) {
    var regChild = /;/;
    //如果符合regChild则说明style里含有多个属性
    if (regChild.test(strChild)) {
        var arrChild = strChild.split("; ");
        var strRtn = "";
        var strColor = "";
        var strFontSize = "";
        var strFontWeight = "";
        var strWidth = "";
        for (var i = 0; i < arrChild.length; i++) {
            var regColor = /^COLOR:/i;
            if (regColor.test(arrChild[i])) {
                strColor = arrChild[i];
            }
            var regFontSize = /^FONT-SIZE:/i;
            if (regFontSize.test(arrChild[i])) {
                strFontSize = arrChild[i];
            }
            var regFontWeight = /^FONT-WEIGHT:/i;
            if (regFontWeight.test(arrChild[i])) {
                strFontWeight = arrChild[i];
            }
            var regWidht = /^WIDTH:/i;
            if (regWidht.test(arrChild[i])) {
                strWidth = arrChild[i];
            }
        }
 
        if (strColor != "") {
            strRtn = strRtn + strColor + ";";
        }
        if (strFontSize != "") {
            strRtn = strRtn + strFontSize + ";";
        }
        if (strFontWeight != "") {
            strRtn = strRtn + strFontWeight + ";";
        }
        if (strWidth != "") {
            strRtn = strRtn + strWidth + ";";
        }
        return strRtn;
    } else {
        var strRtn = "";
        var regColor = /^COLOR:/i;
        var regFontSize = /^FONT-SIZE:/i;
        var regFontWeight = /^FONT-WEIGHT:/i;
        var regWidth=/^WIDTH:/i;
        if (regColor.test(strChild)) {
            strRtn = strChild;
        }
        if (regFontSize.test(strChild)) {
            strRtn = strChild;
        }
        if (regFontWeight.test(strChild)) {
            strRtn = strChild;
        }
        if (regWidth.test(strChild)) {
            strRtn = strChild;
        }
        return strRtn;
    }
}  

  

posted @ 2020-10-20 09:17  春夏秋冬的千山万水  阅读(326)  评论(0)    收藏  举报