c# 一个应用于网页抓取的方法!有效截取字符串
1
public static string GetWebPageContent(string strUrl, string strBegin, string strEnd, int iBegin, int iEnd, string[] arrOld, string[] arrNew)
2
{
3
string strOriginal = "";
4
string strDestination = "";
5
try
6
{
7
WebClient WClient = new WebClient();
8
WClient.Encoding = System.Text.Encoding.Default;
9
strOriginal = WClient.DownloadString(strUrl);
10
int iii = strOriginal.Length;
11
}
12
catch (Exception e)
13
{
14
throw e;
15
}
16
17
int iStart = 0, iTerminal = 0, iLength = 0;
18
string[] strBeginSeparators = new string[] { strBegin };
19
string[] strEndSeparators = new string[] { strEnd };
20
string[] arrBegin = strOriginal.Split(strBeginSeparators, StringSplitOptions.None);
21
string[] arrEnd = strOriginal.Split(strEndSeparators, StringSplitOptions.RemoveEmptyEntries);
22
if (iBegin < 1 || iBegin >= arrBegin.Length) //判断截取索引开始位置以及需要截取的长度
23
if (arrBegin.Length > 1)
24
iStart = strOriginal.LastIndexOf(strBegin) + strBegin.Length;
25
else //如果不存在分割字符,则把起始截取位置置于末尾
26
iStart = strOriginal.Length;
27
else
28
{
29
int offset;
30
for (int i = 0; i < iBegin; i++)
31
{
32
iStart += arrBegin[i].Length;
33
}
34
iStart += (iBegin - 1) * strBegin.Length + strBegin.Length;
35
if (arrBegin.Length <= 1) //如果不存在分割字符,则把起始截取位置置于末尾
36
iStart=strOriginal.Length;
37
}
38
39
if (iEnd < 1 || iEnd >= arrEnd.Length)
40
{
41
if (strOriginal.LastIndexOf(strEnd) > iStart)
42
{
43
iTerminal = strOriginal.LastIndexOf(strEnd) ;
44
}
45
else //如果后面已经没有结束字符了,则取一个空值
46
iTerminal = iStart ;
47
}
48
49
else
50
{
51
for (int i = 0; i < arrEnd.Length; i++)
52
{
53
if (strOriginal.LastIndexOf(strEnd) <= iStart ) //如果后面已经没有结束字符了,则取一个空值,推出循环
54
{
55
iTerminal = iStart;
56
break;
57
}
58
// 继续取值
59
iTerminal += arrEnd[i].Length + strEnd.Length;
60
if(iTerminal>iStart)
61
{
62
if(arrEnd.Length>iEnd+i)
63
{
64
for (int m = 1; m < iEnd; m++ )
65
{
66
iTerminal += arrEnd[i + m].Length + strEnd.Length;
67
}
68
iTerminal = iTerminal - strEnd.Length;
69
}
70
else
71
{
72
iTerminal = strOriginal.LastIndexOf(strEnd);
73
}
74
break ;
75
}
76
}
77
//
78
}
79
80
iLength = iTerminal - iStart;// +strEnd.Length; 判断如何截取结束
81
try
82
{
83
strDestination = strOriginal.Substring(iStart, iLength);
84
}
85
catch (Exception ex)
86
{
87
throw ex;
88
}
89
90
if (arrOld != null && arrNew != null)
91
{
92
for (int i = 0; i < arrOld.Length; i++)
93
{
94
strDestination = strDestination.Replace(arrOld[i], arrNew[i]);
95
}
96
}
97
if (arrOld != null && arrNew == null)
98
{
99
for (int i = 0; i < arrOld.Length; i++)
100
{
101
strDestination = strDestination.Replace(arrOld[i], "");
102
}
103
}
104
return strDestination;
105
}
public static string GetWebPageContent(string strUrl, string strBegin, string strEnd, int iBegin, int iEnd, string[] arrOld, string[] arrNew)2
{3
string strOriginal = "";4
string strDestination = "";5
try6
{7
WebClient WClient = new WebClient();8
WClient.Encoding = System.Text.Encoding.Default;9
strOriginal = WClient.DownloadString(strUrl);10
int iii = strOriginal.Length;11
}12
catch (Exception e)13
{14
throw e;15
}16

17
int iStart = 0, iTerminal = 0, iLength = 0;18
string[] strBeginSeparators = new string[] { strBegin };19
string[] strEndSeparators = new string[] { strEnd };20
string[] arrBegin = strOriginal.Split(strBeginSeparators, StringSplitOptions.None);21
string[] arrEnd = strOriginal.Split(strEndSeparators, StringSplitOptions.RemoveEmptyEntries);22
if (iBegin < 1 || iBegin >= arrBegin.Length) //判断截取索引开始位置以及需要截取的长度23
if (arrBegin.Length > 1)24
iStart = strOriginal.LastIndexOf(strBegin) + strBegin.Length;25
else //如果不存在分割字符,则把起始截取位置置于末尾26
iStart = strOriginal.Length; 27
else28
{29
int offset;30
for (int i = 0; i < iBegin; i++)31
{32
iStart += arrBegin[i].Length;33
}34
iStart += (iBegin - 1) * strBegin.Length + strBegin.Length;35
if (arrBegin.Length <= 1) //如果不存在分割字符,则把起始截取位置置于末尾36
iStart=strOriginal.Length;37
}38

39
if (iEnd < 1 || iEnd >= arrEnd.Length)40
{41
if (strOriginal.LastIndexOf(strEnd) > iStart)42
{43
iTerminal = strOriginal.LastIndexOf(strEnd) ;44
}45
else //如果后面已经没有结束字符了,则取一个空值46
iTerminal = iStart ; 47
}48
49
else50
{51
for (int i = 0; i < arrEnd.Length; i++)52
{53
if (strOriginal.LastIndexOf(strEnd) <= iStart ) //如果后面已经没有结束字符了,则取一个空值,推出循环54
{55
iTerminal = iStart;56
break; 57
}58
// 继续取值59
iTerminal += arrEnd[i].Length + strEnd.Length;60
if(iTerminal>iStart)61
{62
if(arrEnd.Length>iEnd+i)63
{64
for (int m = 1; m < iEnd; m++ )65
{66
iTerminal += arrEnd[i + m].Length + strEnd.Length;67
}68
iTerminal = iTerminal - strEnd.Length;69
}70
else71
{72
iTerminal = strOriginal.LastIndexOf(strEnd);73
}74
break ;75
}76
}77
//78
}79

80
iLength = iTerminal - iStart;// +strEnd.Length; 判断如何截取结束81
try82
{83
strDestination = strOriginal.Substring(iStart, iLength);84
}85
catch (Exception ex)86
{87
throw ex;88
}89

90
if (arrOld != null && arrNew != null)91
{92
for (int i = 0; i < arrOld.Length; i++)93
{94
strDestination = strDestination.Replace(arrOld[i], arrNew[i]);95
}96
}97
if (arrOld != null && arrNew == null)98
{99
for (int i = 0; i < arrOld.Length; i++)100
{101
strDestination = strDestination.Replace(arrOld[i], "");102
}103
}104
return strDestination;105
}在一个朋友写的代码的基础上改进了一下,刚研究出来,分享一下!



浙公网安备 33010602011771号