asp.net利用正则在HTML中提取图片路径
代码
public static string GetImgUrl(string HTMLStr)
{
string str = string.Empty;
//string sPattern = @"^<img\s+[^>]*>";
Regex r = new Regex(@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>", //注意这里的(?<url>\S+)是按正则表达式中的组来处理的,下面的代码中用使用到,也可以更改成其它的HTML标签,以同样的方法获得内容!
RegexOptions.Compiled);
Match m = r.Match(HTMLStr.ToLower());
if (m.Success)
str = m.Result("${url}");
return str;
}
//返回多个路径的情况
public static StringBuilder MyGetImgUrl(string text)
{
StringBuilder str = new StringBuilder();
string pat = @"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>";
Regex r = new Regex(pat, RegexOptions.Compiled);
Match m = r.Match(text.ToLower());
//int matchCount = 0;
while (m.Success)
{
Group g = m.Groups[2];
str.Append(g).Append(",");
m = m.NextMatch();
}
return str;
}
{
string str = string.Empty;
//string sPattern = @"^<img\s+[^>]*>";
Regex r = new Regex(@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>", //注意这里的(?<url>\S+)是按正则表达式中的组来处理的,下面的代码中用使用到,也可以更改成其它的HTML标签,以同样的方法获得内容!
RegexOptions.Compiled);
Match m = r.Match(HTMLStr.ToLower());
if (m.Success)
str = m.Result("${url}");
return str;
}
//返回多个路径的情况
public static StringBuilder MyGetImgUrl(string text)
{
StringBuilder str = new StringBuilder();
string pat = @"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>";
Regex r = new Regex(pat, RegexOptions.Compiled);
Match m = r.Match(text.ToLower());
//int matchCount = 0;
while (m.Success)
{
Group g = m.Groups[2];
str.Append(g).Append(",");
m = m.NextMatch();
}
return str;
}