用js去除word里的多余标签
function formatHtml(strHtml) {
strHtml = splitLabel(strHtml, "SPAN");
strHtml = splitLabel(strHtml, "P");
strHtml = splitLabel(strHtml, "B");
strHtml = splitLabel(strHtml, "FONT");
strHtml = splitLabel(strHtml, "TABLE");
strHtml = splitLabel(strHtml, "TD");
strHtml = splitLabel(strHtml, "TR");
//去除class
strHtml = strHtml.replace(/\sclass=\w+?\b/g, "");
//去除<o:p>
strHtml = strHtml.replace(/<o:p>(.*?)<\/o:p>/ig, "$1");
//去除<?xml.../>
strHtml = strHtml.replace(/<\?xml.*?\/>/ig, "");
//去除lang
strHtml = strHtml.replace(/lang=\w+-\w+?\b/g, "");
//去除face
strHtml = strHtml.replace(/face=.+?>/g, ">");
//去除vAlign
strHtml = strHtml.replace(/vAlign=\w+?\b/ig, "");
//去掉<st1:>
strHtml = strHtml.replace(/<st1:chsdate.*?>(.*?)<\/st1:chsdate>/ig, "$1");
//去掉多余的空格
strHtml = strHtml.replace(/<(\w+?)\s+?>/ig, "<$1>");
//去掉<o:lock v:ext="edit" aspectratio="t"></o:lock>此类代码
strHtml = strHtml.replace(/<o:.+?<\/o:\w+?>/ig, "");
//去掉<v:> <\v:>
strHtml = strHtml.replace(/<v:.+?>/g, "");
strHtml = strHtml.replace(/<\/v:.+?>/g, "");
//去掉<w:> <\v:>
strHtml = strHtml.replace(/<w:.+?>/g, "");
strHtml = strHtml.replace(/<\/w:.+?>/g, "");
strHtml=strHtml.replace(/<\/?span[^>]*>/gi, '<span>')
//设置border=1
strHtml = strHtml.replace(/border=[0-9]/gi, "border=1");
//设置border=1
strHtml = strHtml.replace(/border="0"/gi, "border='1'");
//如果td或table里同时有style="width:.." 和width=21去掉style="width:.."
strHtml = strHtml.replace(/<td(.+?)style=\"width:.+?\"(.+?)width=(.+?)>/ig, "<td$1$2width=$3>");
strHtml = strHtml.replace(/<table(.+?)style=\"width:\s.+?\"(.+?)width=(.+?)>/ig, "<table$1$2width=$3>");
//去除size=+0
strHtml = strHtml.replace(/size=\+\d/ig, "");
//多个空格合并一个空格
strHtml = strHtml.replace(/\s{2,}/g, " ");
return strHtml;
}
//用<flag 分割字符串
function splitLabel(strHtml, flag) {
var arr = strHtml.split("<" + flag + " ");
var strLabel = arr[0];
for (var i = 1; i < arr.length; i++) {
var temp = splitStyle(arr[i]);
strLabel = strLabel + "<" + flag + " " + temp;
}
return strLabel;
}
//分割style
function splitStyle(strStyle) {
var regStyle = /^style=/i;
if (!regStyle.test(strStyle)) return strStyle;
var arrStyle = strStyle.split("\"");
if (arrStyle.length > 1) {
var strStyleRtn = splitChild(arrStyle[1]);
//alert(strStyleRtn);
//标记
var tag = strStyleRtn;
if (tag != "") {
strStyleRtn = arrStyle[0] + "\"" + strStyleRtn;
}
for (var i = 2; i < arrStyle.length; i++) {
strStyleRtn = strStyleRtn + "\"" + arrStyle[i];
}
if (tag == "") {
strStyleRtn = strStyleRtn.substr(1, strStyleRtn.length - 1);
}
return strStyleRtn;
}
else {
return strStyle;
}
}
//把style中的每个属性都分割出来
function splitChild(strChild) {
var regChild = /;/;
//如果符合regChild则说明style里含有多个属性
if (regChild.test(strChild)) {
var arrChild = strChild.split("; ");
var strRtn = "";
var strColor = "";
var strFontSize = "";
var strFontWeight = "";
var strWidth = "";
for (var i = 0; i < arrChild.length; i++) {
var regColor = /^COLOR:/i;
if (regColor.test(arrChild[i])) {
strColor = arrChild[i];
}
var regFontSize = /^FONT-SIZE:/i;
if (regFontSize.test(arrChild[i])) {
strFontSize = arrChild[i];
}
var regFontWeight = /^FONT-WEIGHT:/i;
if (regFontWeight.test(arrChild[i])) {
strFontWeight = arrChild[i];
}
var regWidht = /^WIDTH:/i;
if (regWidht.test(arrChild[i])) {
strWidth = arrChild[i];
}
}
if (strColor != "") {
strRtn = strRtn + strColor + ";";
}
if (strFontSize != "") {
strRtn = strRtn + strFontSize + ";";
}
if (strFontWeight != "") {
strRtn = strRtn + strFontWeight + ";";
}
if (strWidth != "") {
strRtn = strRtn + strWidth + ";";
}
return strRtn;
} else {
var strRtn = "";
var regColor = /^COLOR:/i;
var regFontSize = /^FONT-SIZE:/i;
var regFontWeight = /^FONT-WEIGHT:/i;
var regWidth=/^WIDTH:/i;
if (regColor.test(strChild)) {
strRtn = strChild;
}
if (regFontSize.test(strChild)) {
strRtn = strChild;
}
if (regFontWeight.test(strChild)) {
strRtn = strChild;
}
if (regWidth.test(strChild)) {
strRtn = strChild;
}
return strRtn;
}
}

浙公网安备 33010602011771号