blackcore

本质的东西,深植于骨骼,扎根于灵魂! 淘实惠,各类电子版书籍

导航

Excel Vba 正则URL协议、域名、端口号、页面路径

打开Excel,Shift + F11 打开VBA编辑器,注意高度时必须先保存,否则会提示无法高度外部程序。

Sub RegexURL()

Dim url As String
Dim regex As Object
url = "http://www.baidu.com/a/b/index.html"
Set regex = CreateObject("vbscript.regexp")
regex.Global = True
regex.Pattern = "(\w+)://([^/:]+)(:\d*)?([^# ]*)"
MsgBox regex.Replace(url, "使用协议/主域名/端口号/页面:[$1],[$2],[$3],[$4]")

End Sub
            Regex reg = new Regex(@"(\w+):\/\/([^/:]+)(:\d*)?([^# ]*)");

List<string> items = new List<string>() {
"http://www.baidu.com",
"http://www.baidu.com/index.html",
"http://www.fengcao.baidu.cn/index/index.html",
"ftp://www.baidu.cn/index/index.html",
"ftp://www.baidu.com.cn",
"http://www.net.cn",
};

StringBuilder sb = new StringBuilder ();
foreach (string item in items)
{
foreach(Match match in reg.Matches(item))
{
//for(int i=0;i<match.Groups.Count ;i++)
//{
//sb.Append(match.Groups[2]);
MessageBox.Show(GetServerDomain(match.Groups[2].ToString ()));
//}
}
sb.AppendLine("--");
}

string result = sb.ToString();
       public bool IsNumeric(string str)
{
try { int i = Convert.ToInt32(str); return true; }
catch { return false; }
}

public string GetServerDomain(string url)
{
string str = url.ToLower();//此处获取值转换为小写
if (str.IndexOf('.') > 0)
{
string[] strArr = str.Split('.');
string lastStr = strArr.GetValue(strArr.Length - 1).ToString();
if (IsNumeric(lastStr)) //如果最后一位是数字,那么说明是IP地址
{
return str.Replace(".", ""); //替换.为纯数字输出
}
else //否则为域名
{
string[] domainRules = ".com.cn|.net.cn|.org.cn|.gov.cn|.com|.net|.cn|.org|.cc|.me|.tel|.mobi|.asia|.biz|.info|.name|.tv|.hk|.公司|.中国|.网络".Split('|');
string findStr = string.Empty;
string replaceStr = string.Empty;
string returnStr = string.Empty;
for (int i = 0; i < domainRules.Length; i++)
{
if (str.EndsWith(domainRules[i].ToLower())) //如果最后有找到匹配项
{
findStr = domainRules[i].ToString(); //www.eieboom.com
replaceStr = str.Replace(findStr, ""); //将匹配项替换为空,便于再次判断
if (replaceStr.IndexOf('.') > 0) //存在二级域名或者三级,比如:www.eieboom
{
string[] replaceArr = replaceStr.Split('.'); // www eieboom
returnStr = replaceArr.GetValue(replaceArr.Length - 1).ToString() + findStr;
return returnStr;
}
else //eieboom
{
returnStr = replaceStr + findStr; //连接起来输出为:eieboom.com
return returnStr;
};
}
else
{ returnStr = str; }
}
return returnStr;
}
}
else
{
return str;
}
}




posted on 2011-11-01 18:45  blackcore  阅读(1211)  评论(0编辑  收藏  举报