Fighting Ant

Ant can be great while elephant can be chickenshit

导航

一个从Replays.net上下载rep的程序(Alpha Release)

Posted on 2007-09-11 11:12  Nillson  阅读(516)  评论(3)    收藏  举报

该程序的设计目的是为了从Replays.net上找到感兴趣的replays并将其下载。其中用到的是如何下载和解析网页的html源码,从而得到想要下载的文件的url。整个过程中的思路比较简单,但是涉及到一些细节的问题时,由于个人经验和能力所限,使得程序的可拓展性没有很好的体现。其中xml作为配置文件的作用没有发挥到最大,例如不可能通过改变xml文件来实现对另外一个网站上的所感兴趣的文件的下载。在以后的版本中将努力实现这一思想。

using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;
using System.Collections;
using System.Xml;
using System.Web;

namespace RepalysDownload
{
    
class Program
    
{
        
static void Main(string[] args)
        
{
            
string strRoolUrl = null;
            
string strTagName = "ReplaysDownload";
            GetUrlFromXML GUFXml 
= new GetUrlFromXML();
            strRoolUrl 
= GUFXml.GetUrlByNode(strTagName);
            ArrayList strAL 
= new ArrayList();
            GetReplaysUrlFromXML GRUFXml 
= new GetReplaysUrlFromXML();
            strAL 
= GRUFXml.GetUrlByNode(strTagName);
            NavigatetoUrl NavigatetoUrl 
= new NavigatetoUrl();
            GetFileUrl strGetFileUrl 
= new GetFileUrl();
            ArrayList strUrls 
= new ArrayList();
            ArrayList tempAL 
= new ArrayList();
            ArrayList strDownloadUrl 
= new ArrayList();
            
foreach (string strReplaysUrl in strAL)
            
{
                tempAL 
= NavigatetoUrl.GetDownLoadUrl(strReplaysUrl);
                
foreach (Item item in tempAL)
                
{
                    
//item.onclick = strRoolUrl + item.onclick;
                    strUrls.Add(strRoolUrl + item.onclick);
                }

                
foreach (string temp in strUrls)
                
{
                    
//Download the files
                    
//strDownloadUrl.Add(strRoolUrl + strGetFileUrl.Get_FileUrl(temp));
                    string url = strRoolUrl + strGetFileUrl.Get_FileUrl(temp);
                    
string strUrl = HttpUtility.UrlDecode(url);
                    
string[] strSplit = strUrl.Split('/');
                    
int i = strSplit.Length;
                    
string repName = strSplit[i - 1];
                    DownloadFile.Download(url, FilePath.repFilePath 
+ repName);//To be modified
                }

                strUrls.Clear();
            }

        }

    }

    
static class FilePath
    
{
        
static public string xmlFilePath = @"C:\Documents and Settings\v-niwa\Desktop\RepalysDownload\RepalysDownload\RepalysDownload\RepalysDownload.xml";
        
static public string repFilePath = @"D:\War3\Replays\";
    }

    
//To be added
    static class DownloadFile
    
{
        
static public void Download(string url, string fileName)
        
{
            
try
            
{
                WebClient wc 
= new WebClient();
                wc.DownloadFile(url, fileName);
            }

            
catch (Exception ex)
            
{
                Console.WriteLine(ex.ToString());
            }

        }

    }


    
class GetUrlFromXML
    
{
        
public string GetUrlByNode(string strTagName)
        
{
            XmlDocument xmlDoc 
= new XmlDocument();
            xmlDoc.Load(FilePath.xmlFilePath);
            XmlNode xmlNode 
= xmlDoc.GetElementsByTagName(strTagName)[0];
            
string rootUrl = xmlNode.ChildNodes[0].ChildNodes[0].InnerText.ToString();
            
return rootUrl;
        }

    }


    
class GetReplaysUrlFromXML
    
{
        
public ArrayList GetUrlByNode(string strTagName)
        
{
            ArrayList AL 
= new ArrayList();
            XmlDocument xmlDocumet 
= new XmlDocument();
            xmlDocumet.Load(FilePath.xmlFilePath);
            XmlNode xmlNode 
= xmlDocumet.GetElementsByTagName(strTagName)[0].ChildNodes[1];
            XmlNodeList xmlNodeList 
= ((XmlElement)xmlNode).GetElementsByTagName("RUrl");
            
foreach (XmlNode xNode in xmlNodeList)
            
{
                AL.Add(xNode.InnerText.ToString());
            }

            
return AL;
        }

    }


    
class NavigatetoUrl
    
{
        
public  ArrayList GetDownLoadUrl(string url)
        
{
            
string rl;
            
string onclickStartString = "/doc";
            
string onclickEndString = "html";
            
string imgStartString = "<li";
            
string imgEndString = "</li>";
            
string tempOnClick = null;
            
string tempImage = null;
            
string tempP1 = null;
            
string tempP2 = null;
            WebRequest myReq 
= WebRequest.Create(url);
            WebResponse myRes 
= myReq.GetResponse();
            Stream resStream 
= myRes.GetResponseStream();
            StreamReader sr 
= new StreamReader(resStream, Encoding.UTF8);
            
            ArrayList myArrayList 
= new ArrayList();
            
            
while ((rl = sr.ReadLine()) != null)
            
{
                
//sb.Append(rl);
                if(rl.Contains("<ul class=\"datarow3\" onmouseover=\"this.style.backgroundColor='#ffffee';\" onmouseout=\"this.style.backgroundColor='#ffffff'\" onclick=\"window.location.href='/doc/cn/"))
                {
                    
                    tempOnClick 
= getString(rl,onclickStartString, onclickEndString);
                }

               
                
if(rl.Contains("<li class=\"c_i\">"))
                
{
                    tempImage 
= getString(rl, imgStartString, imgEndString);
                    
if(tempImage.Length > 20)
                    
{
                        Item tempItem 
= new Item();
                        tempItem.onclick 
= tempOnClick;
                        tempItem.image 
= tempImage;
                        myArrayList.Add(tempItem);
                    }

                }

            }

            
return myArrayList;
        }

        
private string getString(string wholeString, string startString, string endString)
        
{
            
int start = wholeString.IndexOf(startString);
            
int end = wholeString.IndexOf(endString);
            
string midString = wholeString.Substring(start, end - start + 4);
            
return midString;
        }

    }

    
struct Item
    
{
        
public string onclick;
        
public string image;
        
public string people1;
        
public string people2;
        
    }

    
class GetFileUrl //This class is to get the rep file's url
    {
        
public string Get_FileUrl(string fileUrl)
        
{
            
try
            
{
                
string temStr;
                WebRequest myRequest 
= WebRequest.Create(fileUrl);
                WebResponse myResponse 
= myRequest.GetResponse();
                Stream resStream 
= myResponse.GetResponseStream();
                StreamReader streamReader 
= new StreamReader(resStream, Encoding.UTF8);
                
string minString = null;
                
while ((temStr = streamReader.ReadLine()) != null)
                
{
                    
if (temStr.Contains("/Download.aspx"))
                    
{
                        
int start = temStr.IndexOf("/Download.aspx");
                        
int end = temStr.IndexOf("'>Download REP");
                        minString 
= temStr.Substring(start, end - start);
                        
break;
                    }

                }

                
return minString;
            }

            
catch(Exception ex)
            
{
                Console.WriteLine(ex.ToString());
            }

        }

    }

}


<?xml version="1.0" encoding="utf-8" ?>
<ReplaysDownload>
  
<RootUrl>
    
<RootUrl>http://w3g.replays.net</RootUrl>
  
</RootUrl>
  
<ReplaysUrls>
    
<RUrl>http://w3g.replays.net/replaylist.aspx?Gamerace=5</RUrl>
    
<RUrl>http://w3g.replays.net/replaylist.aspx?Gamerace=6</RUrl>
    
<RUrl>http://w3g.replays.net/replaylist.aspx?Gamerace=8</RUrl>
    
<RUrl>http://w3g.replays.net/replaylist.aspx?Gamerace=12</RUrl>
  
</ReplaysUrls>
</ReplaysDownload>