最近换了份工作,很少来园子里转了。
现在做的是SDET,测试协议,感觉还是没有应用开发有意思哦~~~~~~
由于工作需要写了一个包括所有Url的正则表达式,用来验证返回的Url是否符合RFC1738规定。
有兴趣的同学可以去看RFC1378关于Url部分的介绍(http://www.ietf.org/rfc/rfc1738.txt),本文中的代码是按其规定编写的。
在没有了解RFC1738的时候,一直以为Url的正则表达式很简单,没想到Url有这么多分类,更没想到一个普通的http的正则表达式也不是那么简单。
以下是我搜到的关于http的正则表达式:
http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?
当然这已经满足大部分人的需求了,但是如果需要严格的验证的话还是要符合RFC1738了。
Url包括Http,Ftp,News,Nntpurl,Telnet,Gopher,Wais,Mailto,File,Prosperurl和Otherurl。
呵呵,废话不多说了,上代码
 

 Code
Code

 Http#region Http
            Http#region Http

 string lowalpha = @"[a-z]";
            string lowalpha = @"[a-z]";
 string hialpha = @"[A-Z]";
            string hialpha = @"[A-Z]";
 string alpha = String.Format(@"({0}|{1})", lowalpha, hialpha);
            string alpha = String.Format(@"({0}|{1})", lowalpha, hialpha);
 string digit = @"[0-9]";
            string digit = @"[0-9]";
 string safe = @"(\$|-|_|\.|\+)";
            string safe = @"(\$|-|_|\.|\+)";
 string extra = @"(!|\*|'|\(|\)|,)";
            string extra = @"(!|\*|'|\(|\)|,)";
 string hex = String.Format(@"({0}|A|B|C|D|E|F|a|b|c|d|e|f)", digit);
            string hex = String.Format(@"({0}|A|B|C|D|E|F|a|b|c|d|e|f)", digit);
 string escape = String.Format(@"(%{0}{0})", hex);
            string escape = String.Format(@"(%{0}{0})", hex);
 string unreserved = String.Format(@"({0}|{1}|{2}|{3})", alpha, digit, safe, extra);
            string unreserved = String.Format(@"({0}|{1}|{2}|{3})", alpha, digit, safe, extra);
 string uchar = String.Format(@"({0}|{1})", unreserved, escape);
            string uchar = String.Format(@"({0}|{1})", unreserved, escape);
 string reserved = @"(;|/|\?|:|@|&|=)";
            string reserved = @"(;|/|\?|:|@|&|=)";
 string xchar = String.Format(@"({0}|{1}|{2})", unreserved, reserved, escape);
            string xchar = String.Format(@"({0}|{1}|{2})", unreserved, reserved, escape);
 string digits = String.Format(@"({0}+)", digit);
            string digits = String.Format(@"({0}+)", digit);

 string alphadigit = String.Format(@"({0}|{1})", alpha, digit);
            string alphadigit = String.Format(@"({0}|{1})", alpha, digit);
 string domainlabel = String.Format(@"({0}|{0}({0}|-)*{0})", alphadigit);
            string domainlabel = String.Format(@"({0}|{0}({0}|-)*{0})", alphadigit);
 string toplabel = String.Format(@"({0}|{0}({1}|-)*{1})", alpha, alphadigit);
            string toplabel = String.Format(@"({0}|{0}({1}|-)*{1})", alpha, alphadigit);
 string hostname = String.Format(@"(({0}\.)*{1})", domainlabel, toplabel);
            string hostname = String.Format(@"(({0}\.)*{1})", domainlabel, toplabel);
 string hostnumber = String.Format(@"{0}\.{0}\.{0}\.{0}", digits);
            string hostnumber = String.Format(@"{0}\.{0}\.{0}\.{0}", digits);
 string host = String.Format(@"({0}|{1})", hostname, hostnumber);
            string host = String.Format(@"({0}|{1})", hostname, hostnumber);
 string port = digits;
            string port = digits;
 string hostport = String.Format(@"({0}(:{1}){{0,1}})", host, port);
            string hostport = String.Format(@"({0}(:{1}){{0,1}})", host, port);
 string hsegment = String.Format(@"(({0}|;|:|@|&|=)*)", uchar);
            string hsegment = String.Format(@"(({0}|;|:|@|&|=)*)", uchar);
 string search = String.Format(@"(({0}|;|:|@|&|=)*)", uchar);
            string search = String.Format(@"(({0}|;|:|@|&|=)*)", uchar);
 string hpath = String.Format(@"{0}(/{0})*", hsegment);
            string hpath = String.Format(@"{0}(/{0})*", hsegment);
 string httpurl = String.Format(@"http://{0}(/{1}(\?{2}){{0,1}}){{0,1}}", hostport, hpath, search);
            string httpurl = String.Format(@"http://{0}(/{1}(\?{2}){{0,1}}){{0,1}}", hostport, hpath, search);

 #endregion
            #endregion


 Ftp#region Ftp
            Ftp#region Ftp

 string user = String.Format(@"(({0}|;|\?|&|=)*)", uchar);
            string user = String.Format(@"(({0}|;|\?|&|=)*)", uchar);
 string password = String.Format(@"(({0}|;|\?|&|=)*)", uchar);
            string password = String.Format(@"(({0}|;|\?|&|=)*)", uchar);
 string login = String.Format(@"(({0}(:{1}){{0,1}}@){{0,1}}{2})", user, password, hostport);
            string login = String.Format(@"(({0}(:{1}){{0,1}}@){{0,1}}{2})", user, password, hostport);
 string fsegment = String.Format(@"(({0}|\?|:|@|&|=)*)", uchar);
            string fsegment = String.Format(@"(({0}|\?|:|@|&|=)*)", uchar);
 string ftptype = @"(A|I|D|a|i|d)";
            string ftptype = @"(A|I|D|a|i|d)";
 string fpath = String.Format(@"({0}(/{0})*)", fsegment);
            string fpath = String.Format(@"({0}(/{0})*)", fsegment);
 string ftpurl = String.Format(@"ftp://{0}(/{1}(;type={2}){{0,1}}){{0,1}}", login, fpath, ftptype);
            string ftpurl = String.Format(@"ftp://{0}(/{1}(;type={2}){{0,1}}){{0,1}}", login, fpath, ftptype);

 #endregion
            #endregion


 News#region News
            News#region News

 string group = String.Format(@"({0}({0}|{1}|-|\.|\+|_)*)", alpha, digit);
            string group = String.Format(@"({0}({0}|{1}|-|\.|\+|_)*)", alpha, digit);
 string article = String.Format(@"(({0}|;|/|\?|:|&|=)+@{1})", uchar, host);
            string article = String.Format(@"(({0}|;|/|\?|:|&|=)+@{1})", uchar, host);
 string grouppart = String.Format(@"(\*|{0}|{1})", group, article);
            string grouppart = String.Format(@"(\*|{0}|{1})", group, article);
 string newsurl = String.Format(@"(news:{0})", grouppart);
            string newsurl = String.Format(@"(news:{0})", grouppart);

 #endregion
            #endregion


 Nntpurl#region Nntpurl
            Nntpurl#region Nntpurl

 string nntpurl = String.Format(@"nntp://{0}/{1}(/{2}){{0,1}}", hostport, group, digits);
            string nntpurl = String.Format(@"nntp://{0}/{1}(/{2}){{0,1}}", hostport, group, digits);

 #endregion
            #endregion


 Telnet#region Telnet
            Telnet#region Telnet

 string telneturl = String.Format(@"telnet://{0}/{{0,1}}", login);
            string telneturl = String.Format(@"telnet://{0}/{{0,1}}", login);

 #endregion
            #endregion


 Gopher#region Gopher
            Gopher#region Gopher

 string gtype = xchar;
            string gtype = xchar;
 string selector = String.Format(@"({0}*)", xchar);
            string selector = String.Format(@"({0}*)", xchar);
 string gopherplus_string = String.Format(@"({0}*)", xchar);
            string gopherplus_string = String.Format(@"({0}*)", xchar);
 string gopherurl = String.Format(@"gopher://{0}(/({1}({2}(%09{3}(%09{4}){{0,1}}){{0,1}}){{0,1}}){{0,1}}){{0,1}}", hostport, gtype, selector, search, gopherplus_string);
            string gopherurl = String.Format(@"gopher://{0}(/({1}({2}(%09{3}(%09{4}){{0,1}}){{0,1}}){{0,1}}){{0,1}}){{0,1}}", hostport, gtype, selector, search, gopherplus_string);

 #endregion
            #endregion


 Wais#region Wais
            Wais#region Wais

 string database = String.Format(@"({0}*)", uchar);
            string database = String.Format(@"({0}*)", uchar);
 string wtype = String.Format(@"({0}*)", uchar);
            string wtype = String.Format(@"({0}*)", uchar);
 string wpath = String.Format(@"({0}*)", uchar);
            string wpath = String.Format(@"({0}*)", uchar);
 string waisdatabase = String.Format(@"(wais://{0}/{1})", hostport, database);
            string waisdatabase = String.Format(@"(wais://{0}/{1})", hostport, database);
 string waisindex = String.Format(@"(wais://{0}/{1}\?{2})", hostport, database, search);
            string waisindex = String.Format(@"(wais://{0}/{1}\?{2})", hostport, database, search);
 string waisdoc = String.Format(@"(wais://{0}/{1}/{2}/{3})", hostport, database, wtype, wpath);
            string waisdoc = String.Format(@"(wais://{0}/{1}/{2}/{3})", hostport, database, wtype, wpath);
 string waisurl = String.Format(@"{0}|{1}|{2}", waisdatabase, waisindex, waisdoc);
            string waisurl = String.Format(@"{0}|{1}|{2}", waisdatabase, waisindex, waisdoc);

 #endregion
            #endregion


 Mailto#region Mailto
            Mailto#region Mailto

 string encoded822addr = String.Format(@"({0}+)", xchar);
            string encoded822addr = String.Format(@"({0}+)", xchar);
 string mailtourl = String.Format(@"mailto:{0}", encoded822addr);
            string mailtourl = String.Format(@"mailto:{0}", encoded822addr);

 #endregion
            #endregion


 File#region File
            File#region File

 string fileurl = String.Format(@"file://({0}{{0,1}}|localhost)/{1}", host, fpath);
            string fileurl = String.Format(@"file://({0}{{0,1}}|localhost)/{1}", host, fpath);

 #endregion
            #endregion


 Prosperourl#region Prosperourl
            Prosperourl#region Prosperourl

 string fieldname = String.Format(@"({0}|\?|:|@|&)", uchar);
            string fieldname = String.Format(@"({0}|\?|:|@|&)", uchar);
 string fieldvalue = String.Format(@"({0}|\?|:|@|&)", uchar);
            string fieldvalue = String.Format(@"({0}|\?|:|@|&)", uchar);
 string fieldspec = String.Format(@"(;{0}={1})", fieldname, fieldvalue);
            string fieldspec = String.Format(@"(;{0}={1})", fieldname, fieldvalue);
 string psegment = String.Format(@"(({0}|\?|:|@|&|=)*)", uchar);
            string psegment = String.Format(@"(({0}|\?|:|@|&|=)*)", uchar);
 string ppath = String.Format(@"({0}(/{0})*)", psegment);
            string ppath = String.Format(@"({0}(/{0})*)", psegment);
 string prosperourl = String.Format(@"prospero://{0}/{1}({2})*", hostport, ppath, fieldspec);
            string prosperourl = String.Format(@"prospero://{0}/{1}({2})*", hostport, ppath, fieldspec);

 #endregion
            #endregion


 Otherurl#region Otherurl
            Otherurl#region Otherurl

 //otherurl equal genericurl
            //otherurl equal genericurl
 string urlpath = String.Format(@"(({0})*)", xchar);
            string urlpath = String.Format(@"(({0})*)", xchar);
 string scheme = String.Format(@"(({0}|{1}|\+|-|\.)+)", lowalpha, digit);
            string scheme = String.Format(@"(({0}|{1}|\+|-|\.)+)", lowalpha, digit);
 string ip_schemepar = String.Format(@"(//{0}(/{1}){{0,1}})", login, urlpath);
            string ip_schemepar = String.Format(@"(//{0}(/{1}){{0,1}})", login, urlpath);
 string schemepart = String.Format(@"(({0})*|{1})", xchar, ip_schemepar);
            string schemepart = String.Format(@"(({0})*|{1})", xchar, ip_schemepar);
 string genericurl = String.Format(@"{0}:{1}", scheme, schemepart);
            string genericurl = String.Format(@"{0}:{1}", scheme, schemepart);
 string otherurl = genericurl;
            string otherurl = genericurl;

 #endregion
            #endregion 
有了Pattern剩下的就简单多了,无非就是正则表达式的验证了,以Http为例:
Http的pattern为string httpurl,假设要验证的Url为url,所以验证url的代码如下:
Regex regex = new Regex(httpurl);
bool isMatchHttp = regex.IsMatch(url);
呵呵,感谢大家观赏,今天就写到这里吧~~~~~~
FYI.