代码改变世界

使用SgmlReader将HTML转换为合法的XML

2007-02-06 14:51  Cat Chen  阅读(4373)  评论(3编辑  收藏  举报
public static string Convert(string html)
{
    
if (string.IsNullOrEmpty(html.Trim()))
    
{
        
return string.Empty;
    }

    
using (SgmlReader reader = new SgmlReader())
    
{
        reader.DocType 
= "HTML";
        reader.InputStream 
= new StringReader(html);
        
using (StringWriter stringWriter = new StringWriter())
        
{
            
using (XmlTextWriter writer = new XmlTextWriter(stringWriter))
            
{
                reader.WhitespaceHandling 
= WhitespaceHandling.None;
                writer.Formatting 
= Formatting.Indented;
                XmlDocument doc 
= new XmlDocument();
                doc.Load(reader);
                
if (doc.DocumentElement == null)
                
{
                    
return string.Empty;
                }

                
else
                
{
                    doc.DocumentElement.WriteContentTo(writer);
                }

                writer.Close();
                
string xhtml = stringWriter.ToString();
                
return xhtml;
            }

        }

    }

}