通过WebBrowser的Navigate()方法导航到一个HTML页面或URL,可以获取当前页的对应的HtmlDocument对象
(webBrowser.Document),下面的程序通过递归函数解析HtmlDocument对象后,并以层次关系显示到Treeview上。
1using System;
2using System.Collections.Generic;
3using System.ComponentModel;
4using System.Data;
5using System.Drawing;
6using System.Text;
7using System.Windows.Forms;
8
9namespace HtmlParserDemo
10
2using System.Collections.Generic;
3using System.ComponentModel;
4using System.Data;
5using System.Drawing;
6using System.Text;
7using System.Windows.Forms;
8
9namespace HtmlParserDemo
10
private WebBrowser browser = null;
private TreeNode rootNode = null;
private void ToolStripMenuItem_Open_Click(object sender, EventArgs e)
{
DialogResult result = openFileDialog1.ShowDialog();
if (result == DialogResult.OK)
{
this.Text = openFileDialog1.FileName;
browser.Navigate(openFileDialog1.FileName); //这里也可以是合法的URL
rootNode.Nodes.Clear();
//等待browser加载页面完成
while (browser.ReadyState != WebBrowserReadyState.Complete)
{
Application.DoEvents();
}
for (int i = 0; i < browser.Document.All.Count; i++)
{
if (browser.Document.All[i].TagName.ToUpper() == "HTML") //从<HTML>开始解析
{
HtmlParser(browser.Document.All[i], rootNode);
break;
}
}
}
}
private void Form1_Load(object sender, EventArgs e)
{
browser = new WebBrowser();
rootNode = new TreeNode("Root");
treeView1.Nodes.Add(rootNode);
openFileDialog1.Filter = "Html files (*.Html)|*.HTML|Html files (*.Htm)|*.HTM|All files (*.*)|*.*";
}
/// <summary>
/// 递归函数,解析当前WebBrowser中的页面,获取该页面的所有标记元素信息
/// </summary>
/// <param name="element">HTML标记</param>
/// <param name="parent">treeview节点</param>
/// <returns>treenode</returns>
private TreeNode HtmlParser(HtmlElement element, TreeNode parent)
{
//新建节点
//如果当前标记元素没有子孙,该节点为叶子节点,反之为父节点
TreeNode node = new TreeNode(element.TagName);
if (element.Children.Count == 0) //叶子节点(当前标记元素没有子节点)
{
node.Name = element.Name; //使用标记元素名称作为节点名称
node.ToolTipText = element.InnerText + "\n" + element.OuterHtml; parent.Nodes.Add(node); //添加新建节点到其父节点
}
else
{
//当前节点存在子节点
for (int i = 0; i < element.Children.Count; i++)
{
node.ToolTipText = element.InnerText;
HtmlParser(element.Children[i], node);
//为该父节点添加它所有的子节点
if (i == element.Children.Count - 1)
parent.Nodes.Add(node);
}
}
return parent;
}
private TreeNode rootNode = null;
private void ToolStripMenuItem_Open_Click(object sender, EventArgs e)
{
DialogResult result = openFileDialog1.ShowDialog();
if (result == DialogResult.OK)
{
this.Text = openFileDialog1.FileName;
browser.Navigate(openFileDialog1.FileName); //这里也可以是合法的URL
rootNode.Nodes.Clear();
//等待browser加载页面完成
while (browser.ReadyState != WebBrowserReadyState.Complete)
{
Application.DoEvents();
}
for (int i = 0; i < browser.Document.All.Count; i++)
{
if (browser.Document.All[i].TagName.ToUpper() == "HTML") //从<HTML>开始解析
{
HtmlParser(browser.Document.All[i], rootNode);
break;
}
}
}
}
private void Form1_Load(object sender, EventArgs e)
{
browser = new WebBrowser();
rootNode = new TreeNode("Root");
treeView1.Nodes.Add(rootNode);
openFileDialog1.Filter = "Html files (*.Html)|*.HTML|Html files (*.Htm)|*.HTM|All files (*.*)|*.*";
}
/// <summary>
/// 递归函数,解析当前WebBrowser中的页面,获取该页面的所有标记元素信息
/// </summary>
/// <param name="element">HTML标记</param>
/// <param name="parent">treeview节点</param>
/// <returns>treenode</returns>
private TreeNode HtmlParser(HtmlElement element, TreeNode parent)
{
//新建节点
//如果当前标记元素没有子孙,该节点为叶子节点,反之为父节点
TreeNode node = new TreeNode(element.TagName);
if (element.Children.Count == 0) //叶子节点(当前标记元素没有子节点)
{
node.Name = element.Name; //使用标记元素名称作为节点名称
node.ToolTipText = element.InnerText + "\n" + element.OuterHtml; parent.Nodes.Add(node); //添加新建节点到其父节点
}
else
{
//当前节点存在子节点
for (int i = 0; i < element.Children.Count; i++)
{
node.ToolTipText = element.InnerText;
HtmlParser(element.Children[i], node);
//为该父节点添加它所有的子节点
if (i == element.Children.Count - 1)
parent.Nodes.Add(node);
}
}
return parent;
}
相关代码下载