该程序的设计目的是为了从Replays.net上找到感兴趣的replays并将其下载。其中用到的是如何下载和解析网页的html源码,从而得到想要下载的文件的url。整个过程中的思路比较简单,但是涉及到一些细节的问题时,由于个人经验和能力所限,使得程序的可拓展性没有很好的体现。其中xml作为配置文件的作用没有发挥到最大,例如不可能通过改变xml文件来实现对另外一个网站上的所感兴趣的文件的下载。在以后的版本中将努力实现这一思想。
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;
using System.Collections;
using System.Xml;
using System.Web;
namespace RepalysDownload
{
class Program
{
static void Main(string[] args)
{
string strRoolUrl = null;
string strTagName = "ReplaysDownload";
GetUrlFromXML GUFXml = new GetUrlFromXML();
strRoolUrl = GUFXml.GetUrlByNode(strTagName);
ArrayList strAL = new ArrayList();
GetReplaysUrlFromXML GRUFXml = new GetReplaysUrlFromXML();
strAL = GRUFXml.GetUrlByNode(strTagName);
NavigatetoUrl NavigatetoUrl = new NavigatetoUrl();
GetFileUrl strGetFileUrl = new GetFileUrl();
ArrayList strUrls = new ArrayList();
ArrayList tempAL = new ArrayList();
ArrayList strDownloadUrl = new ArrayList();
foreach (string strReplaysUrl in strAL)
{
tempAL = NavigatetoUrl.GetDownLoadUrl(strReplaysUrl);
foreach (Item item in tempAL)
{
//item.onclick = strRoolUrl + item.onclick;
strUrls.Add(strRoolUrl + item.onclick);
}
foreach (string temp in strUrls)
{
//Download the files
//strDownloadUrl.Add(strRoolUrl + strGetFileUrl.Get_FileUrl(temp));
string url = strRoolUrl + strGetFileUrl.Get_FileUrl(temp);
string strUrl = HttpUtility.UrlDecode(url);
string[] strSplit = strUrl.Split('/');
int i = strSplit.Length;
string repName = strSplit[i - 1];
DownloadFile.Download(url, FilePath.repFilePath + repName);//To be modified
}
strUrls.Clear();
}
}
}
static class FilePath
{
static public string xmlFilePath = @"C:\Documents and Settings\v-niwa\Desktop\RepalysDownload\RepalysDownload\RepalysDownload\RepalysDownload.xml";
static public string repFilePath = @"D:\War3\Replays\";
}
//To be added
static class DownloadFile
{
static public void Download(string url, string fileName)
{
try
{
WebClient wc = new WebClient();
wc.DownloadFile(url, fileName);
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
}
}
}
class GetUrlFromXML
{
public string GetUrlByNode(string strTagName)
{
XmlDocument xmlDoc = new XmlDocument();
xmlDoc.Load(FilePath.xmlFilePath);
XmlNode xmlNode = xmlDoc.GetElementsByTagName(strTagName)[0];
string rootUrl = xmlNode.ChildNodes[0].ChildNodes[0].InnerText.ToString();
return rootUrl;
}
}
class GetReplaysUrlFromXML
{
public ArrayList GetUrlByNode(string strTagName)
{
ArrayList AL = new ArrayList();
XmlDocument xmlDocumet = new XmlDocument();
xmlDocumet.Load(FilePath.xmlFilePath);
XmlNode xmlNode = xmlDocumet.GetElementsByTagName(strTagName)[0].ChildNodes[1];
XmlNodeList xmlNodeList = ((XmlElement)xmlNode).GetElementsByTagName("RUrl");
foreach (XmlNode xNode in xmlNodeList)
{
AL.Add(xNode.InnerText.ToString());
}
return AL;
}
}
class NavigatetoUrl
{
public ArrayList GetDownLoadUrl(string url)
{
string rl;
string onclickStartString = "/doc";
string onclickEndString = "html";
string imgStartString = "<li";
string imgEndString = "</li>";
string tempOnClick = null;
string tempImage = null;
string tempP1 = null;
string tempP2 = null;
WebRequest myReq = WebRequest.Create(url);
WebResponse myRes = myReq.GetResponse();
Stream resStream = myRes.GetResponseStream();
StreamReader sr = new StreamReader(resStream, Encoding.UTF8);
ArrayList myArrayList = new ArrayList();
while ((rl = sr.ReadLine()) != null)
{
//sb.Append(rl);
if(rl.Contains("<ul class=\"datarow3\" onmouseover=\"this.style.backgroundColor='#ffffee';\" onmouseout=\"this.style.backgroundColor='#ffffff'\" onclick=\"window.location.href='/doc/cn/"))
{
tempOnClick = getString(rl,onclickStartString, onclickEndString);
}
if(rl.Contains("<li class=\"c_i\">"))
{
tempImage = getString(rl, imgStartString, imgEndString);
if(tempImage.Length > 20)
{
Item tempItem = new Item();
tempItem.onclick = tempOnClick;
tempItem.image = tempImage;
myArrayList.Add(tempItem);
}
}
}
return myArrayList;
}
private string getString(string wholeString, string startString, string endString)
{
int start = wholeString.IndexOf(startString);
int end = wholeString.IndexOf(endString);
string midString = wholeString.Substring(start, end - start + 4);
return midString;
}
}
struct Item
{
public string onclick;
public string image;
public string people1;
public string people2;
}
class GetFileUrl //This class is to get the rep file's url
{
public string Get_FileUrl(string fileUrl)
{
try
{
string temStr;
WebRequest myRequest = WebRequest.Create(fileUrl);
WebResponse myResponse = myRequest.GetResponse();
Stream resStream = myResponse.GetResponseStream();
StreamReader streamReader = new StreamReader(resStream, Encoding.UTF8);
string minString = null;
while ((temStr = streamReader.ReadLine()) != null)
{
if (temStr.Contains("/Download.aspx"))
{
int start = temStr.IndexOf("/Download.aspx");
int end = temStr.IndexOf("'>Download REP");
minString = temStr.Substring(start, end - start);
break;
}
}
return minString;
}
catch(Exception ex)
{
Console.WriteLine(ex.ToString());
}
}
}
}
<?xml version="1.0" encoding="utf-8" ?>
<ReplaysDownload>
<RootUrl>
<RootUrl>http://w3g.replays.net</RootUrl>
</RootUrl>
<ReplaysUrls>
<RUrl>http://w3g.replays.net/replaylist.aspx?Gamerace=5</RUrl>
<RUrl>http://w3g.replays.net/replaylist.aspx?Gamerace=6</RUrl>
<RUrl>http://w3g.replays.net/replaylist.aspx?Gamerace=8</RUrl>
<RUrl>http://w3g.replays.net/replaylist.aspx?Gamerace=12</RUrl>
</ReplaysUrls>
</ReplaysDownload>


浙公网安备 33010602011771号