技术,思考的艺术

ASP.net ,C#,VB.NET,SQL + B2B,SCM,CRM,ERP
  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

清word垃圾代码的函数

Posted on 2004-09-04 16:58  1werwerfer  阅读(487)  评论(0)    收藏  举报
function WordClean()
{
TextBox_Content_editor.focus();
var html = TextBox_Content_editor.document.body.innerHTML;
//alert(html);
// Remove all SPAN tags
html = html.replace(/<\/?SPAN[^>]*>/gi, "" );
// Remove Class attributes
html = html.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3") ;
// Remove Style attributes
html = html.replace(/<(\w[^>]*) style="([^"]*)"([^>]*)/gi, "<$1$3") ;
// Remove Lang attributes
html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ;
// Remove XML elements and declarations
html = html.replace(/<\\?\?xml[^>]*>/gi, "") ;
// Remove Tags with XML namespace declarations: <o:p></o:p>
html = html.replace(/<\/?\w+:[^>]*>/gi, "") ;
// Replace the &nbsp;
html = html.replace(/&nbsp;/, " " );
// Transform <P> to <DIV>
var re = new RegExp("(<P)([^>]*>.*?)(<\/P>)","gi") ; // Different because of a IE 5.0 error
html = html.replace( re, "<div$2</div>" ) ;
TextBox_Content_editor.document.body.innerHTML=html;
}