利用com调用ie进行html解析
利用com调用ie进行html解析,部分代码参考互联网。
别的就不多说了,直接上代码,代码很简单的,不懂的留言。
![]()
Code
1
using System;
2
using System.Collections.Generic;
3
using System.Linq;
4
using System.Text;
5
using System.Runtime.InteropServices;
6
using mshtml;
7
using System.Threading;
8
using System.Runtime.InteropServices.ComTypes;
9
using System.IO;
10![]()
11
namespace Eric.Utilities.Html
12![]()
![]()
{
13
public enum HRESULT
14![]()
{
15
E_FAIL = -2147467259,
16
E_INVALIDARG = -2147024809,
17
E_NOINTERFACE = -2147467262,
18
E_NOTIMPL = -2147467263,
19
E_UNEXPECTED = -2147418113,
20
S_FALSE = 1,
21
S_OK = 0
22
}
23![]()
24
[ComImport, Guid("0000010c-0000-0000-C000-000000000046"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true)]
25
public interface IPersist
26![]()
{
27
void GetClassID(ref Guid pClassID);
28
}
29![]()
30
[ComImport, InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true), Guid("7FD52380-4E07-101B-AE2D-08002B2EC713")]
31
public interface IPersistStreamInit : IPersist
32![]()
{
33
new void GetClassID(ref Guid pClassID);
34
[PreserveSig]
35
int IsDirty();
36
[PreserveSig]
37
HRESULT Load(IStream pstm);
38
[PreserveSig]
39
HRESULT Save(IStream pstm, [MarshalAs(UnmanagedType.Bool)] bool fClearDirty);
40
[PreserveSig]
41
HRESULT GetSizeMax([In, Out, MarshalAs(UnmanagedType.U8)] ref long pcbSize);
42
[PreserveSig]
43
HRESULT InitNew();
44
}
45![]()
46
public class HtmlParser
47![]()
{
48
public IHTMLDocument3 Parse(string url)
49![]()
{
50
HTMLDocument objMSHTML = new HTMLDocument();
51
IHTMLDocument2 objMSHTML2;
52
IHTMLDocument3 objMSHTML3;
53![]()
54
IPersistStreamInit objIPS;
55
objIPS = objMSHTML as IPersistStreamInit;
56
objIPS.InitNew();
57
objIPS = null;
58![]()
59
objMSHTML2 = objMSHTML.createDocumentFromUrl(url, "null");
60
while (objMSHTML2.readyState != "complete")
61![]()
{
62
Thread.Sleep(1000);
63
}
64
objMSHTML3 = objMSHTML2 as IHTMLDocument3;
65
return objMSHTML3;
66
}
67![]()
68
public IHTMLDocument3 ParseHtml(string html, Encoding encoding)
69![]()
{
70
string tmpFile = Path.GetTempFileName();
71
File.WriteAllText(tmpFile, html, encoding);
72
return Parse(tmpFile);
73
}
74
}
75
}
76
说一个问题,进行parsehtml的时候,是先建立了一个临时文件,想不建立临时文件直接进行,但是不得法,网上有通过makup service进行的,但我发现不好用。不知道大家有没有好的办法。
1
using System;2
using System.Collections.Generic;3
using System.Linq;4
using System.Text;5
using System.Runtime.InteropServices;6
using mshtml;7
using System.Threading;8
using System.Runtime.InteropServices.ComTypes;9
using System.IO;10

11
namespace Eric.Utilities.Html12


{13
public enum HRESULT14

{15
E_FAIL = -2147467259,16
E_INVALIDARG = -2147024809,17
E_NOINTERFACE = -2147467262,18
E_NOTIMPL = -2147467263,19
E_UNEXPECTED = -2147418113,20
S_FALSE = 1,21
S_OK = 022
}23

24
[ComImport, Guid("0000010c-0000-0000-C000-000000000046"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true)]25
public interface IPersist26

{27
void GetClassID(ref Guid pClassID);28
}29

30
[ComImport, InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true), Guid("7FD52380-4E07-101B-AE2D-08002B2EC713")]31
public interface IPersistStreamInit : IPersist32

{33
new void GetClassID(ref Guid pClassID);34
[PreserveSig]35
int IsDirty();36
[PreserveSig]37
HRESULT Load(IStream pstm);38
[PreserveSig]39
HRESULT Save(IStream pstm, [MarshalAs(UnmanagedType.Bool)] bool fClearDirty);40
[PreserveSig]41
HRESULT GetSizeMax([In, Out, MarshalAs(UnmanagedType.U8)] ref long pcbSize);42
[PreserveSig]43
HRESULT InitNew();44
}45

46
public class HtmlParser47

{48
public IHTMLDocument3 Parse(string url)49

{50
HTMLDocument objMSHTML = new HTMLDocument();51
IHTMLDocument2 objMSHTML2;52
IHTMLDocument3 objMSHTML3;53

54
IPersistStreamInit objIPS;55
objIPS = objMSHTML as IPersistStreamInit;56
objIPS.InitNew();57
objIPS = null;58

59
objMSHTML2 = objMSHTML.createDocumentFromUrl(url, "null");60
while (objMSHTML2.readyState != "complete")61

{62
Thread.Sleep(1000);63
}64
objMSHTML3 = objMSHTML2 as IHTMLDocument3;65
return objMSHTML3;66
}67

68
public IHTMLDocument3 ParseHtml(string html, Encoding encoding)69

{70
string tmpFile = Path.GetTempFileName();71
File.WriteAllText(tmpFile, html, encoding);72
return Parse(tmpFile);73
}74
}75
}76

本文基于署名 2.5 中国大陆许可协议发布,欢迎转载,演绎或用于商业目的,但是必须保留本文的署名小橋流水(包含链接)。如您有任何疑问或者授权方面的协商,请给我发邮件。
浙公网安备 33010602011771号