using System;
using HtmlAgilityPack;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections.Generic;
namespace ConsoleApp
{
class Program
{
static string goText(HtmlNode _htmlnode, bool isSplit = true)
{
string str = "";
try {
// 获取text内容
str = _htmlnode.InnerText;
// 消除多余的符号
str = Regex.Replace(str, "\r|\n|\t| ", "").Trim();
// 切割字符串
if (isSplit && str.IndexOf(":") >= 0) {
str = str.Split(':')[1];
}
}
catch {
}
return str;
}
static void Main(string[] args)
{
// 获取index.html的内容
string basePath = AppDomain.CurrentDomain.BaseDirectory + "/index.html";
string html = "";
if (File.Exists(@basePath)) {
html = File.ReadAllText(@basePath, Encoding.Default);
}
// 开始计算耗时
DateTime beforDT = System.DateTime.Now;
// 使用HtmlAgilityPack解析它
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(html);
// 报告编号
var report_number = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[1]/tbody/tr[2]/td[1]"));
// 查询时间
var query_time = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[1]/tbody/tr[2]/td[2]"));
// 报告时间
var report_time = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[1]/tbody/tr[2]/td[3]"));
// 姓名
var report_name = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[2]/tbody/tr[1]/td[1]"));
// 证件类型
var report_type = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[2]/tbody/tr[1]/td[2]"));
// 证件号码
var report_id = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[2]/tbody/tr[1]/td[3]"));
// 婚姻
var report_marriage = goText(htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div/table/tr[2]/td/table[2]/tbody/tr[1]/td[4]"));
// 表格
var table_tr = htmlDoc.DocumentNode.SelectNodes("/html/body/div/div/table/tr[2]/td/table[4]/tr[3]/td/table/tbody/tr/td/table/tbody/tr");
List<Table> list = new List<Table>();
// 遍历所有的tr
foreach (var node in table_tr) {
// 跳过第一次遍历吧
if (node.NodeType == HtmlNodeType.Element) {
// 获取所有的Td
var tds = node.Elements("td");
Table tb = new Table();
int i = 0;
// 遍历所有的Td
foreach (var td in tds) {
if (td.NodeType == HtmlNodeType.Element) {
string text = goText(td, false);
// 使用比较蠢的方式赋值,自己想办法优化
switch (i)
{
case 0:
tb.a = text;
break;
case 1:
tb.b = text;
break;
case 2:
tb.c = text;
break;
case 3:
tb.d = text;
break;
}
}
i++;
}
list.Add(tb);
}
}
// 删除第一个节点。我不需要表头
list.RemoveAt(0);
Console.Write(list);
// 结算程序耗时
DateTime afterDT = System.DateTime.Now;
TimeSpan ts = afterDT.Subtract(beforDT);
Console.WriteLine("DateTime总共花费{0}ms.", ts.TotalMilliseconds);
Console.ReadLine();
}
}
public class Table
{
/// <summary>
/// a
/// </summary>
public string a { get; set; }
/// <summary>
/// b
/// </summary>
public string b { get; set; }
/// <summary>
/// c
/// </summary>
public string c { get; set; }
/// <summary>
/// c
/// </summary>
public string d { get; set; }
}
}