简易网页采集器的实现
自己写的一个扫描网址标题的小工具.
功能:遍历指定范围的IP,根据IP扫描网页的标题,并记录(支持二级重定向网页的扫描)
自动记录采集日志到D盘的net_collect.log文件中.
类型:控制台程序
实现语言:C#
需要的环境: .NET 3.5
可选的环境:Oracle数据库
相关的缺省值说明:
缺省(直接按回车即是缺省值)Oracle数据库用户:scott
缺省Oracle数据库密码:tigger
缺省Oracle数据库连接标识符:orcl (即TNSNAME名称)
缺省的http连接超时时间:6秒
缺省启用数据库来记录采集到的信息
缺省不启用扫描完成后自动关机
(篇幅原因,数据库连接类这里就不贴了)
主函数代码如下:
static void Main(string[] args)
{
try
{
string user = "scott";
string pwd = "tigger";
string tns = "orcl";
Console.WriteLine("***************简易网址扫描器V1.0*****************");
Console.WriteLine("**************Created By Cryking*****************");
Console.WriteLine("******************QQ:278676125********************");
Console.WriteLine("**************************************************");
Console.WriteLine("请设置超时时间(若网络环境较差,建议设大一点,如100秒)(单位/秒):");
timeOut = Int32.Parse(Console.ReadLine());
Console.WriteLine("扫描完成后是否自动关机(Y/N)?");
if (Regex.IsMatch(Console.ReadLine(), "(?i)[y]")) shutDownFlag = 1;
Console.WriteLine("是否启用数据库支持(不启用则只写日志文件),Y/N?:");
if (Regex.IsMatch(Console.ReadLine(), "(?i)[n]")) DBFlag = 0;
if (DBFlag == 1)
{
Console.WriteLine("请输入Oracle数据库连接用户名:");
user = Console.ReadLine();
user = user == string.Empty ? "scott" : user;
Console.WriteLine("请输入Oracle数据库连接密码:");
pwd = string.Empty;
ConsoleKeyInfo info;
do
{
info = Console.ReadKey(true);
if (info.Key != ConsoleKey.Enter && info.Key != ConsoleKey.Backspace && info.Key != ConsoleKey.Escape && info.Key != ConsoleKey.Tab && info.KeyChar != '\0')
{ pwd += info.KeyChar; Console.Write('*'); }
} while (info.Key != ConsoleKey.Enter);
pwd = pwd == string.Empty ? "tigger" : pwd;
Console.WriteLine();
Console.WriteLine("请输入Oracle数据库连接标识符(TNSNAME):");
tns = Console.ReadLine();
tns = tns == string.Empty ? "orcl" : tns;
if (!DBAccess.DBConnect(user, pwd, tns))
{
MessageBox.Show("数据库连接失败!", "错误001", MessageBoxButtons.OK, MessageBoxIcon.Error);
System.Diagnostics.Process.GetCurrentProcess().Kill();
}
Console.WriteLine("数据库连接成功!");
if (DBAccess.selectStr("select count(*) from user_objects where object_name='NET_COLLECT' ") == "0")
{
Console.WriteLine("开始创建表(NET_COLLECT),请等待...");
if (0 == DBAccess.DBExecSql(@"create table NET_COLLECT(
IP VARCHAR2(30) not null,
PORT NUMBER default 80,
TITLE VARCHAR2(4000),
URL VARCHAR2(2000),
COLLECTDATE DATE default sysdate
)"))
Console.WriteLine("表(NET_COLLECT)创建成功!");
else
{
Console.WriteLine("表(NET_COLLECT)创建失败,请参照说明,先手工创建表(NET_COLLECT)!");
System.Diagnostics.Process.GetCurrentProcess().Kill();
}
}
}
string Scan = "";
Console.WriteLine("请输入扫描范围(如:0.0.0.0-10.10.10.10)");
Scan = Console.ReadLine();
string[] tmpIp = Scan.Trim().Split('-');
string[] ipScanScop = allocaIncreament(tmpIp[0], tmpIp[1]);//平均分配IP范围给8个线程
logFile = new StreamWriter("d:\\net_collect.log", true);
DateTime startTime = DateTime.Now;
logFile.WriteLine("开始时间:" + DateTime.Now.ToString());
//开8个线程跑
Thread t = new Thread(new ParameterizedThreadStart(ipScan));
t.Start(ipScanScop[0]);
Thread t1 = new Thread(new ParameterizedThreadStart(ipScan));
t1.Start(ipScanScop[1]);
Thread t2 = new Thread(new ParameterizedThreadStart(ipScan));
t2.Start(ipScanScop[2]);
Thread t3 = new Thread(new ParameterizedThreadStart(ipScan));
t3.Start(ipScanScop[3]);
Thread t4 = new Thread(new ParameterizedThreadStart(ipScan));
t4.Start(ipScanScop[4]);
Thread t5 = new Thread(new ParameterizedThreadStart(ipScan));
t5.Start(ipScanScop[5]);
Thread t6 = new Thread(new ParameterizedThreadStart(ipScan));
t6.Start(ipScanScop[6]);
Thread t7 = new Thread(new ParameterizedThreadStart(ipScan));
t7.Start(ipScanScop[7]);
while (true) { if (8 == flag) break; };
DBAccess.DBClose();
TimeSpan ts = DateTime.Now - startTime;
logFile.WriteLine("结束时间:" + DateTime.Now.ToString());
logFile.Close();
Console.WriteLine("总共花费时间:" + ts.ToString());
if(1==shutDownFlag)
Process.Start("Shutdown.exe", " -s -t 0"); //完成后自动关机
Console.ReadKey();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
扫描功能函数:
static void ipScan1(object obj)
{
try
{
string[] scope = obj.ToString().Split('-');
if (string.Compare(scope[0].ToString(), scope[1].ToString()) > 0)//交换
{
string tmp = "";
tmp = scope[0];
scope[0] = scope[1];
scope[1] = tmp;
}
string[] ipStart = scope[0].ToString().Split('.');
int i = Int32.Parse(ipStart[0]);
int j = Int32.Parse(ipStart[1]);
int k = Int32.Parse(ipStart[2]);
int g = Int32.Parse(ipStart[3]);
string[] ipEnd = scope[1].ToString().Split('.');
int ei = Int32.Parse(ipEnd[0]);
int ej = Int32.Parse(ipEnd[1]);
int ek = Int32.Parse(ipEnd[2]);
int eg = Int32.Parse(ipEnd[3]);
string html;
string ip;
string logBuffer = "";
for (; i <= ei; i++)
{
if (10 == i || 127 == i) continue;//私有地址
if (g == eg && k == ek && j == ej && i == ei) break;
for (; j <= 255; j++)
{
if (192 == i && 168 == j) continue;//私有地址
for (; k <= 255; k++)
{
for (; g <= 255; g++)
{
{
ip = i.ToString() + "." + j.ToString() + "." + k.ToString() + "." + g.ToString();
html = GetHtmlInfo(ip, timeOut*1000, Encoding.Default);//采用缺省的编码方式,可能会获得乱码
string title=GetTitle(html);
title = title == string.Empty ? (html.Length > 1000 ? html.Substring(0, 1000) : html) : title;
if (html != string.Empty && html != "无法连接到远程服务器")
if(DBFlag==1)
DBAccess.DBExecSql("insert into net_collect values('" + ip + "',default,'" + title + "','',default)");
Console.WriteLine(ip + " --" + title);
if (logBuffer != html)
{
lock (logFile)
{
myMutex.WaitOne();
logFile.WriteLine("ip:" + ip + " [MSG:]" + title); logBuffer = html;
myMutex.ReleaseMutex();
}
}
logFile.Flush();
countPort++;
}
count++;
}
g = 0;
}
k = 0;
}
j = 0;
}
flag++;
}
catch (Exception e) { Console.WriteLine(e.Message); }
}
网页信息获取函数
static string GetHtmlInfo(string url, int timeout, Encoding EnCodeType)
{
if (!url.StartsWith("http://") && !url.StartsWith("https://")) { url = "http://" + url; }
string result = "";
StreamReader reader = null;
string temp = "";
HttpWebRequest request = null;
HttpWebResponse response = null;
try
{
request = (HttpWebRequest)HttpWebRequest.Create(url);//初始化WebRequest
request.Timeout = timeout;
request.UserAgent = "User-Agent:Mozilla/5.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 2.0.40607; .NET CLR 1.1.4322; .NET CLR 3.5.30729;.NET CLR 1.0.3705)";
request.Accept = "*/*";
request.AllowAutoRedirect = false;
request.KeepAlive = true;
request.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
response = (HttpWebResponse)request.GetResponse();//返回来自Internet的响应
if (response.StatusCode == System.Net.HttpStatusCode.MovedPermanently)//获取重定向的网页
{
request = (HttpWebRequest)HttpWebRequest.Create(response.Headers["Location"]);//初始化WebRequest
response = (HttpWebResponse)request.GetResponse();//返回来自Internet的响应
}
if (response.StatusCode == System.Net.HttpStatusCode.OK)
{
StringBuilder builder = new StringBuilder();
Stream stream = response.GetResponseStream();
reader = new StreamReader(stream, EnCodeType);
string tmp = "";
while ((temp = reader.ReadLine()) != null){
builder.Append(temp);
tmp = builder.ToString();
if (tmp.IndexOf("</title>") > 0) { break; }//ReadLine是读取整行,所以有时在它后面的很多字符串也会读取
builder.Append("\r\n");
}
result = builder.ToString();
response.Close();
request.Abort();
return result;
}
response.Close();
request.Abort();
return string.Empty;
}
catch (Exception ex){
return ex.Message;
}
finally { if (reader != null) { reader.Close(); } if (response != null) { response.Close(); } if (request != null) { request.Abort(); } }
}
IP范围分配函数(分配给各个线程)
static string[] allocaIncreament(string tmpIp0, string tmpIp1)//平均分配IP范围给各个线程
{
string[] ipResult=new string[8];
if (string.Compare(tmpIp0,tmpIp1)>0)//交换
{
string tmp = "";
tmp = tmpIp0;
tmpIp0 = tmpIp1;
tmpIp1 = tmp;
}
string[] startip=tmpIp0.Split('.');
string[] endip = tmpIp1.Split('.');
int incre = (Int32.Parse(endip[3]) - Int32.Parse(startip[3]) +
(Int32.Parse(endip[2]) - Int32.Parse(startip[2])) * 256 +
(Int32.Parse(endip[1]) - Int32.Parse(startip[1])) * 256 * 256 +
(Int32.Parse(endip[0]) - Int32.Parse(startip[0])) * 256 * 256 * 256) / 8;
string tmpIp0End = calcIp(startip, incre);
ipResult[0] = tmpIp0 + "-" + tmpIp0End;
string[] t1 = tmpIp0End.Split('.');
t1[3] = (Int32.Parse(t1[3]) + 1).ToString();
string tmpIp1End = calcIp(t1, incre);
if (string.Compare(string.Join(".", t1), tmpIp1End) >= 0)
{
ipResult[1] = tmpIp0End + "-" + tmpIp1;
ipResult[2] = tmpIp1 + "-" + tmpIp1;
ipResult[3] = ipResult[2];
ipResult[4] = ipResult[2];
ipResult[5] = ipResult[2];
ipResult[6] = ipResult[2];
ipResult[7] = ipResult[2];
}
else
{
ipResult[1] = string.Join(".", t1) + "-" + tmpIp1End;
string[] t2 = tmpIp1End.Split('.');
t2[3] = (Int32.Parse(t2[3]) + 1).ToString();
string tmpIp2End = calcIp(t2, incre);
if (string.Compare(string.Join(".", t2), tmpIp2End) >= 0)
{
ipResult[2] = tmpIp1End + "-" + tmpIp1;
ipResult[3] = tmpIp1 + "-" + tmpIp1;
ipResult[4] = ipResult[3];
ipResult[5] = ipResult[3];
ipResult[6] = ipResult[3];
ipResult[7] = ipResult[3];
}
else
{
ipResult[2] = string.Join(".", t2) + "-" + tmpIp2End;
string[] t3 = tmpIp2End.Split('.');
t3[3] = (Int32.Parse(t3[3]) + 1).ToString();
string tmpIp3End = calcIp(t3, incre);
if (string.Compare(string.Join(".", t3), tmpIp3End) >= 0)
{
ipResult[3] = tmpIp2End + "-" + tmpIp1; ipResult[4] = tmpIp1 + "-" + tmpIp1;
ipResult[5] = ipResult[4];
ipResult[6] = ipResult[4];
ipResult[7] = ipResult[4];
}
else
{
ipResult[3] = string.Join(".", t3) + "-" + tmpIp3End;
string[] t4 = tmpIp3End.Split('.');
t4[3] = (Int32.Parse(t4[3]) + 1).ToString();
string tmpIp4End = calcIp(t4, incre);
if (string.Compare(string.Join(".", t4), tmpIp4End) >= 0)
{
ipResult[4] = tmpIp3End + "-" + tmpIp1; ipResult[5] = tmpIp1 + "-" + tmpIp1;
ipResult[6] = ipResult[5];
ipResult[7] = ipResult[5];
}
else
{
ipResult[4] = string.Join(".", t4) + "-" + tmpIp4End;
string[] t5 = tmpIp4End.Split('.');
t5[3] = (Int32.Parse(t5[3]) + 1).ToString();
string tmpIp5End = calcIp(t5, incre);
if (string.Compare(string.Join(".", t5), tmpIp5End) >= 0)
{
ipResult[5] = tmpIp4End + "-" + tmpIp1; ipResult[6] = tmpIp1 + "-" + tmpIp1;
ipResult[7] = ipResult[6];
}
else
{
ipResult[5] = string.Join(".", t5) + "-" + tmpIp5End;
string[] t6 = tmpIp5End.Split('.');
t6[3] = (Int32.Parse(t6[3]) + 1).ToString();
string tmpIp6End = calcIp(t6, incre);
if (string.Compare(string.Join(".", t6), tmpIp6End) >= 0)
{
ipResult[6] = tmpIp5End + "-" + tmpIp1; ipResult[7] = tmpIp1 + "-" + tmpIp1;
}
else
{
ipResult[6] = string.Join(".", t6) + "-" + tmpIp6End;
string[] t7 = tmpIp6End.Split('.');
t7[3] = (Int32.Parse(t7[3]) + 1).ToString();
string tmpIp7End = calcIp(t7, incre);
if (string.Compare(string.Join(".", t7), tmpIp7End) >= 0) ipResult[7] = tmpIp6End + "-" + tmpIp1;
else
ipResult[7] = string.Join(".", t7) + "-" + tmpIp1;
}
}
}
}
}
}
return ipResult;
}
运行的界面如下:

---
工具下载地址:http://pan.baidu.com/share/link?shareid=657915&uk=2449788611
有任何问题及建议,请联系我QQ:278676125

浙公网安备 33010602011771号