最近遇到一个问题,就是提取Html文件中的Image标签<img * />,并需要提取Image标签里的属性值
我的实现如下:
private void ParseImageFromHtmlConent()
{
//搜索图像的正则表达式
string pattern = @"(<img[^>]*((/>)|(>\s*</img>)))";
//要解析的Html内容
string url = "sdfasdfs<br />sdf<br /><img src=\"http://localhost:49573/images/6575636a-9bff-4a68-a1c7-f8ef4f78a2a2.flv\" alt=\"sdf\" style=\"width:150px;height:150px;border-color:red\" ></img><br />sdf<img src=\"http://localhost:49573/images/6575636a-9bff-4a68-a1c7-f8ef4f78a2a2.flv\" alt=\"sdf\" style=\"width:150px;height:150px\" />fdfdf";
Regex reg = new Regex(pattern, RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.IgnoreCase);
url = reg.Replace(url, new MatchEvaluator(ParseImageTag));
}

private void ParseImageTag(Match m)
{
GroupCollection gc = m.Groups;
string img = gc[1].ToString();
Dictionary<string, string> dic = ParseImagePropertiesTages(img);
string style = dic["style"];
Dictionary<string, string> dicStyle = ParseTagesStyle(style);
//下面根据你的需求进行处理
//........................
}

private Dictionary<string, string> ParseImagePropertiesTages(string img)
{
Dictionary<string, string> dic = new Dictionary<string, string>();
//搜索属性的正则表达式
string pattern = @"([a-zA-Z]+)\s*=\s*[""']\s*([^""']+)\s*[""']";
Regex reg = new Regex(pattern);
MatchCollection mc = reg.Matches(img);
foreach (Match m in mc)
{
GroupCollection groupCol = m.Groups;
string key = groupCol[1].ToString().Trim();
string value = groupCol[2].ToString().Trim();
if (dic.ContainsKey(key.ToLower()))
{
dic.Remove(key.ToLower());
}
dic.Add(key.ToLower(), value);
}
return dic;
}

private Dictionary<string, string> ParseTagesStyle(string style)
{
Dictionary<string, string> dic = new Dictionary<string, string>();
if (string.IsNullOrEmpty(style))
return dic;
//解析style的正则表达式
string pattern = @"([a-zA-Z\-]+)\s*:\s*([^;]*);*";
Regex reg = new Regex(pattern);
MatchCollection mc = reg.Matches(style);
foreach (Match m in mc)
{
GroupCollection groupCol = m.Groups;
string key = groupCol[1].ToString().Trim();
string value = groupCol[2].ToString().Trim();
if (dic.ContainsKey(key.ToLower()))
{
dic.Remove(key.ToLower());
}
dic.Add(key.ToLower(), value);
}
return dic;
}
如有不妥的地方,希望各位不吝指出,或有更好的解析方式,大家一起共享哦
我的实现如下:
private void ParseImageFromHtmlConent()
{
//搜索图像的正则表达式
string pattern = @"(<img[^>]*((/>)|(>\s*</img>)))";
//要解析的Html内容
string url = "sdfasdfs<br />sdf<br /><img src=\"http://localhost:49573/images/6575636a-9bff-4a68-a1c7-f8ef4f78a2a2.flv\" alt=\"sdf\" style=\"width:150px;height:150px;border-color:red\" ></img><br />sdf<img src=\"http://localhost:49573/images/6575636a-9bff-4a68-a1c7-f8ef4f78a2a2.flv\" alt=\"sdf\" style=\"width:150px;height:150px\" />fdfdf";
Regex reg = new Regex(pattern, RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.IgnoreCase);
url = reg.Replace(url, new MatchEvaluator(ParseImageTag));
}
private void ParseImageTag(Match m)
{
GroupCollection gc = m.Groups;
string img = gc[1].ToString();
Dictionary<string, string> dic = ParseImagePropertiesTages(img);
string style = dic["style"];
Dictionary<string, string> dicStyle = ParseTagesStyle(style);
//下面根据你的需求进行处理
//........................
}
private Dictionary<string, string> ParseImagePropertiesTages(string img)
{
Dictionary<string, string> dic = new Dictionary<string, string>();
//搜索属性的正则表达式
string pattern = @"([a-zA-Z]+)\s*=\s*[""']\s*([^""']+)\s*[""']";
Regex reg = new Regex(pattern);
MatchCollection mc = reg.Matches(img);
foreach (Match m in mc)
{
GroupCollection groupCol = m.Groups;
string key = groupCol[1].ToString().Trim();
string value = groupCol[2].ToString().Trim();
if (dic.ContainsKey(key.ToLower()))
{
dic.Remove(key.ToLower());
}
dic.Add(key.ToLower(), value);
}
return dic;
}
private Dictionary<string, string> ParseTagesStyle(string style)
{
Dictionary<string, string> dic = new Dictionary<string, string>();
if (string.IsNullOrEmpty(style))
return dic;
//解析style的正则表达式
string pattern = @"([a-zA-Z\-]+)\s*:\s*([^;]*);*";
Regex reg = new Regex(pattern);
MatchCollection mc = reg.Matches(style);
foreach (Match m in mc)
{
GroupCollection groupCol = m.Groups;
string key = groupCol[1].ToString().Trim();
string value = groupCol[2].ToString().Trim();
if (dic.ContainsKey(key.ToLower()))
{
dic.Remove(key.ToLower());
}
dic.Add(key.ToLower(), value);
}
return dic;
}


浙公网安备 33010602011771号