using System;
using System.Net;
using System.IO;
using System.Text;
using System.Collections;
using System.Text.RegularExpressions;

namespace ConsoleApplication1


{

/**//// <summary>
/// Class1 的摘要说明。
/// </summary>
class Class1

{

/**//// <summary>
/// 应用程序的主入口点。
/// </summary>
[STAThread]
static void Main(string[] args)

{
string url="http://book.sina.com.cn/nzt/lit/dixiashi/index.shtml";
string content=Gethtml(url);

string[,] arr=new string[85,85];
arr[0,0]="";
for(int i=1;i<=84;i++)

{
url="<a href=/nzt/lit/dixiashi/"+i+".shtml target=_blank class=a03>";
string temp_1=GetChinese("[\u4e00-\u9fa5]",MID(content,url,80));//链接文字
string temp_2="Books_zg_"+i+".html";
arr[i,0]=temp_1;
arr[0,i]=temp_2;
}

content=Getlist(4,700,arr);
Writefile(@"C:\Documents and Settings\Administrator\桌面\index.html",content);
}

private static string MID(string Content,string StartString,int length)

{
string Intercept=Content;
int a=Intercept.IndexOf(StartString);
string aa=Intercept.Substring(a,length);
return aa;
}


/**//// <summary>
/// 获取网页html代码
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
private static string Gethtml(string url)

{
WebClient wc=new WebClient();
Stream str=wc.OpenRead(url);
StreamReader sr=new StreamReader(str,System.Text.Encoding.GetEncoding("GB2312"));
return sr.ReadToEnd();
}

// 获取指定网页的HTML代码
static string GetPageSource(string URL)

{
Uri uri =new Uri(URL);

HttpWebRequest hwReq = (HttpWebRequest)WebRequest.Create(uri);
HttpWebResponse hwRes = (HttpWebResponse)hwReq.GetResponse();

hwReq.Method = "Get";

hwReq.KeepAlive = false;

StreamReader reader = new StreamReader(hwRes.GetResponseStream(),System.Text.Encoding.GetEncoding("GB2312"));

return reader.ReadToEnd();
}



/**//// <summary>
/// 获取列表
/// </summary>
/// <param name="col">列数</param>
/// <param name="ww">table的宽度</param>
/// <param name="arr">数组</param>
/// <returns></returns>
static string Getlist(int col,int ww,string[,] arr)

{
int temp_1=arr.GetLength(0);
int row=(int)System.Math.Ceiling(temp_1/Convert.ToDouble(col));//行数

int temp_2=0;

int temp_3=(int)System.Math.Floor(ww/Convert.ToDouble(col));//得到每列的宽度


StringBuilder sb=new StringBuilder();
sb.Append("<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=gb2312\"><title>书的列表</title><style type=\"text/css\"><!--BODY {SCROLLBAR-FACE-COLOR: #c5c5c5; MARGIN: 0px; FONT: 12px 宋体; SCROLLBAR-HIGHLIGHT-COLOR: #c5c5c5; SCROLLBAR-SHADOW-COLOR: #c5c5c5; SCROLLBAR-3DLIGHT-COLOR: #c5c5c5; SCROLLBAR-ARROW-COLOR: #ffffff; SCROLLBAR-TRACK-COLOR: #fffffd; SCROLLBAR-DARKSHADOW-COLOR: #c5c5c5;font-size:13px;}A.a03:link {COLOR: #1E1E9C; TEXT-DECORATION: underline}A.a03:visited { COLOR: #6d6e71; TEXT-DECORATION: none}A.a03:active {COLOR: #ff0000; TEXT-DECORATION: none}A.a03:hover {COLOR: #ff0000; TEXT-DECORATION: none}td{font-size:13px;}--></style></head><body><table width=\""+ww+"\" border=\"0\" cellspacing=\"0\" cellpadding=\"0\">");
for(int i=1;i<=row;i++)

{
sb.Append("<tr>");
for(int j=0;j<col;j++)

{
temp_2++;
sb.Append("<td width=\""+temp_3+"\">");
try

{
sb.Append("<a href=\""+arr[0,temp_2]+"\" target=\"_blank\" class=\"a03\">"+arr[temp_2,0]+"</a>");
}
catch

{
}
sb.Append("</td>");
}
sb.Append("</tr>");
sb.Append("<tr><td colspan=\""+col+"\" height=\"7\"></td></tr>");
}
sb.Append("</table></body></html>");

return sb.ToString();
}



/**//// <summary>
/// 写文件
/// </summary>
/// <param name="path"></param>
/// <param name="text"></param>
private static void Writefile(string path,string text)

{
using(StreamWriter sw=new StreamWriter(path,false,System.Text.Encoding.GetEncoding("gb2312"))) //中文,QuickCHM支持

{
sw.Write(text);
}
}

// 提取HTML代码中的网址
static ArrayList GetHyperLinks(string htmlCode)

{
ArrayList al = new ArrayList();

string strRegex = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";

Regex r = new Regex(strRegex,RegexOptions.IgnoreCase);
MatchCollection m = r.Matches(htmlCode);

for(int i=0; i<=m.Count-1; i++)

{
bool rep = false;
string strNew = m[i].ToString();

// 过滤重复的URL
foreach(string str in al)

{
if(strNew==str)

{
rep =true;
break;
}
}

if(!rep) al.Add(strNew);
}

al.Sort();

return al;
}


static string GetChinese(string reg,string str)

{
int temp=str.IndexOf("(");
string temp_1=str.Substring(temp+1,1);
Regex r = new Regex(reg,RegexOptions.IgnoreCase);
MatchCollection m = r.Matches(str);
string strNew="";
for(int i=0; i<=m.Count-1; i++)

{
strNew += m[i].ToString();
}

return strNew+"("+temp_1+")";
}

}
}

posted on
2006-12-25 10:55
感動常在
阅读(
553)
评论()
收藏
举报