【原创】RSS开发心得小结

几经面试和简历更新,发现自己做了这么久的开发,却少于做总结,一个个项目过去了,但是知识的累积沉淀却很少……

借着这次机会,把以前的skill整理一下,浓缩的才是精华。为自己也为其他初学的朋友做个参考。

RSS(全称Really Simple Syndication) 目前广泛用于网上新闻频道,blog和wiki,主要的版本有0.91, 1.0, 2.0。

另外还有Google制定的ATOM格式,以及作为Feed集合的OPML文件。

最常见的Feed格式是Rss1.0,2.0和ATOM,解析时通过不同的命名空间来处理不同的版本,下面是解析的主要代码:

 

代码
/// <summary>
/// 根据xml内容解析Feed
/// </summary>
/// <param name="url">Feed源地址</param>
/// <param name="xmlContent">Feed xml 内容</param>
/// <returns>返回解析后的Feed对象实例</returns>
public static Feed AnalyseFeedContent(string url, string xmlContent)
{
Feed feed
= new Feed();
feed.Url
= url;
feed.ChannelInfo
= new FeedChannel();

XmlDocument doc
= new XmlDocument();
doc
= ReadGlobals.LoadXml(doc, xmlContent, url);

// 添加常用的命名空间
XmlNamespaceManager mgr = new XmlNamespaceManager(doc.NameTable);
mgr.AddNamespace(
"rdf", "http://purl.org/rss/1.0/");
mgr.AddNamespace(
"content", "http://purl.org/rss/1.0/modules/content/");
mgr.AddNamespace(
"dc", "http://purl.org/dc/elements/1.1/");
XmlNode nodeRoot
= doc.DocumentElement;
XmlNode nodeChannel;
XmlNodeList nodeList;

try
{
if (nodeRoot != null)
{
// enclosure地址
if (nodeRoot.Attributes["xmlns:enc"] != null)
{
mgr.AddNamespace(
"enc", nodeRoot.Attributes["xmlns:enc"].Value);
}
else
{
mgr.AddNamespace(
"enc", "http://crocodile.org/ns/rss/2.0/enclosures");
}

// trackback地址
if (nodeRoot.Attributes["xmlns:trackback"] != null)
{
mgr.AddNamespace(
"trackback", nodeRoot.Attributes["xmlns:trackback"].Value);
}
else
{
mgr.AddNamespace(
"trackback", "http://madskills.com/public/xml/rss/module/trackback/");
}

if (nodeRoot.Name.ToLower() == "opml")
{
// 是opml文件
feed.Type = FeedType.OPML;
}

if (nodeRoot.Name.ToLower() == "feed")
{
// 是atom文件
feed.Type = FeedType.ATOM_0_3;

string strAtomNameSpace = "http://www.w3.org/2005/Atom";
// 以头部的命名空间为准;
if (nodeRoot.Attributes["xmlns"] != null)
{
strAtomNameSpace
= nodeRoot.Attributes["xmlns"].Value;
}
mgr.AddNamespace(
"atom", strAtomNameSpace);

feed.ChannelInfo
= GetChannel(doc, mgr, "atom");

nodeList
= doc.SelectNodes("//atom:entry", mgr);
feed.Items
= GetItems(nodeList, feed.Type, mgr);
}

if (nodeRoot.Name.ToLower() == "rdf:rdf")
{
// 是rss1.0文件
feed.Type = FeedType.RSS_1_0;
feed.ChannelInfo
= GetChannel(doc, mgr, "rdf");

nodeList
= doc.SelectNodes("//rdf:item", mgr);
feed.Items
= GetItems(nodeList, feed.Type, mgr);
}

if (nodeRoot.Name.ToLower() == "rss")
{
// 是rss2.0文件
feed.Type = FeedType.RSS_2_0;

nodeChannel
= doc.SelectSingleNode("rss/channel");
feed.ChannelInfo
= GetChannelForRss20(nodeChannel);

nodeList
= nodeChannel.SelectNodes("item", mgr);
feed.Items
= GetItems(nodeList, feed.Type, mgr);
}
}
}
catch (Exception ex)
{
Log.Write(ex);
}

return feed;
}

 Feed主要分为两部分,Channel和ItemList部分,分别用如下方法解析:

 

FeedChannel解析
/// <summary>
/// 获取 FeedChannel 数据
/// </summary>
private static FeedChannel GetChannel(XmlDocument xdtDoc, XmlNamespaceManager mgr, string prefix)
{
FeedChannel channel
= new FeedChannel();

if (xdtDoc != null)
{
string title = string.Empty, generator = string.Empty, link = string.Empty, description = string.Empty, language = string.Empty;
string managingeditor = string.Empty, webmaster = string.Empty, copyright = string.Empty, pubdate = string.Empty, lastbuilddate = string.Empty;

string strPrefix = prefix;

XmlNode xneLogo
= xdtDoc.SelectSingleNode("//" + strPrefix + ":logo", mgr);
if (xneLogo != null)
{
channel.Logo.Src
= xneLogo.InnerText;
}

XmlNode snTitle
= xdtDoc.SelectSingleNode("//" + strPrefix + ":title", mgr);
if (snTitle != null)
{
channel.Title
= snTitle.InnerText;
}

XmlNode snLink
= xdtDoc.SelectSingleNode("//" + strPrefix + ":link[@rel='alternate']/@href", mgr);
if (snLink != null)
{
channel.Link
= snLink.InnerText;
}
else
{
channel.Link
= xdtDoc.SelectSingleNode("//" + strPrefix + ":link", mgr) == null ? string.Empty : xdtDoc.SelectSingleNode("//" + strPrefix + ":link", mgr).InnerText;
}


XmlNode snDescription
= xdtDoc.SelectSingleNode("//" + strPrefix + ":tagline", mgr);
if (snDescription != null)
{
channel.Description
= snDescription.InnerText;
}

XmlNode snLanguage
= xdtDoc.SelectSingleNode("//" + strPrefix + ":feed/@xml:lang", mgr);
if (snLanguage != null)
{
try
{
Thread.CurrentThread.CurrentUICulture
= new CultureInfo(snLanguage.InnerText);
channel.Language
= CultureInfo.CreateSpecificCulture(language).LCID;
}
catch
{
channel.Language
= 0;
}
}
}
return channel;
}

 

FeedItem解析
/// <summary>
/// 获取item列表
/// </summary>
/// <param name="xnlItems"></param>
public static List<FeedItem> GetItems(XmlNodeList xnlItems, FeedType type, XmlNamespaceManager mgr)
{
List
<FeedItem> lstItems = new List<FeedItem>();
if (xnlItems != null)
{
switch (type)
{
case FeedType.RSS_1_0:
foreach (XmlNode xne in xnlItems)
{
FeedItem fim
= new FeedItem();
fim.Title
= xne.SelectSingleNode("rdf:title", mgr) != null ? xne.SelectSingleNode("rdf:title", mgr).InnerText : string.Empty;
fim.Link
= xne.SelectSingleNode("rdf:link", mgr) != null ? xne.SelectSingleNode("rdf:link", mgr).InnerText : string.Empty;
//rim.Description = xne.SelectSingleNode("rdf:description", mgr) != null ? xne.SelectSingleNode("rdf:description", mgr).InnerText : string.Empty;
string strContent = xne.SelectSingleNode("content:encoded", mgr) != null ? xne.SelectSingleNode("content:encoded", mgr).InnerText : string.Empty;
string strDescription = xne.SelectSingleNode("rdf:description", mgr) != null ? xne.SelectSingleNode("rdf:description", mgr).InnerText : string.Empty;
fim.Description
= strContent == string.Empty ? strDescription : strContent;
fim.Description
= fim.Description.Replace("<![CDATA[", string.Empty).Replace("]]>", string.Empty);
string strAuthorName = xne.SelectSingleNode("//rdf:author", mgr) != null ? xne.SelectSingleNode("//rdf:author", mgr).InnerText : string.Empty;
if (strAuthorName != string.Empty)
{
strAuthorName
= xne.SelectSingleNode("dc:creator", mgr) != null ? xne.SelectSingleNode("dc:creator", mgr).InnerText : string.Empty;
}
if (strAuthorName != string.Empty)
{
fim.Author
= new FeedPerson();
fim.Author.Name
= strAuthorName;
}
fim.PubDate
= xne.SelectSingleNode("rdf:pubDate", mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode("rdf:pubDate", mgr).InnerText) : DateTime.MinValue;
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate
= xne.SelectSingleNode("dc:date", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("dc:date", mgr).InnerText) : DateTime.MinValue;
}
XmlNodeList xnlSubjects
= xne.SelectNodes("dc:subject", mgr);
if (xnlSubjects != null)
{
foreach (XmlNode xnlSubject in xnlSubjects)
{
fim.Subject
+= xnlSubject.InnerText + ",";
}
}
XmlNode xndEnclosure
= xne.SelectSingleNode("enclosure", mgr);
if (xndEnclosure != null)
{
fim.Enclosures
= new List<FeedEnclosure>();
FeedEnclosure enc
= new FeedEnclosure();
enc.Type
= xndEnclosure.Attributes["type"] != null ? xndEnclosure.Attributes["type"].Value : string.Empty;
string strLength = xndEnclosure.Attributes["length"] != null ? xndEnclosure.Attributes["length"].Value : string.Empty;
try
{
enc.Length
= Convert.ToInt32(strLength);
}
catch { }
enc.Url
= xndEnclosure.Attributes["url"] != null ? xndEnclosure.Attributes["url"].Value : string.Empty;
fim.Enclosures.Add(enc);
}
fim.Description
+= GetHtmlByByEnclosure(fim.Enclosures);
fim.TrackbackPing
= xne.SelectSingleNode("trackback:ping", mgr) != null ? xne.SelectSingleNode("trackback:ping", mgr).InnerText : string.Empty;
lstItems.Add(fim);
}
break;
case FeedType.RSS_2_0:
foreach (XmlNode xne in xnlItems)
{
FeedItem fim
= new FeedItem();
fim.Title
= xne.SelectSingleNode("title", mgr) != null ? xne.SelectSingleNode("title", mgr).InnerText : string.Empty;
fim.Link
= xne.SelectSingleNode("link", mgr) != null ? xne.SelectSingleNode("link", mgr).InnerText : string.Empty;
string strContent = xne.SelectSingleNode("content:encoded", mgr) != null ? xne.SelectSingleNode("content:encoded", mgr).InnerText : string.Empty;
string strDescription = xne.SelectSingleNode("description", mgr) != null ? xne.SelectSingleNode("description", mgr).InnerText : string.Empty;
fim.Description
= strContent == string.Empty ? strDescription : strContent;
fim.Description
= fim.Description.Replace("<![CDATA[", string.Empty).Replace("]]>", string.Empty);
string strAuthorName = xne.SelectSingleNode("author", mgr) != null ? xne.SelectSingleNode("author", mgr).InnerText : string.Empty;
if (strAuthorName != string.Empty)
{
strAuthorName
= xne.SelectSingleNode("dc:creator", mgr) != null ? xne.SelectSingleNode("dc:creator", mgr).InnerText : string.Empty;
}
if (strAuthorName != string.Empty)
{
fim.Author
= new FeedPerson();
fim.Author.Name
= strAuthorName;
}
fim.PubDate
= xne.SelectSingleNode("pubDate", mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode("pubDate", mgr).InnerText) : DateTime.MinValue;
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate
= xne.SelectSingleNode("dc:date", mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode("dc:date", mgr).InnerText) : DateTime.MinValue;
}
fim.Guid
= xne.SelectSingleNode("guid", mgr) != null ? xne.SelectSingleNode("guid", mgr).InnerText : string.Empty;
XmlNodeList xnlSubjects
= xne.SelectNodes("dc:subject", mgr);
if (xnlSubjects != null)
{
foreach (XmlNode xnlSubject in xnlSubjects)
{
fim.Subject
+= xnlSubject.InnerText + ",";
}
}
XmlNodeList xnlCategorys
= xne.SelectNodes("category", mgr);
if (xnlCategorys != null)
{
foreach (XmlNode xnlCategory in xnlCategorys)
{
fim.Category
+= xnlCategory != null ? xnlCategory.InnerText : string.Empty;
fim.Category
+= ",";
}
}
XmlNode xndEnclosure
= xne.SelectSingleNode("enclosure", mgr);
if (xndEnclosure != null)
{
fim.Enclosures
= new List<FeedEnclosure>();
FeedEnclosure enc
= new FeedEnclosure();
enc.Type
= xndEnclosure.Attributes["type"] != null ? xndEnclosure.Attributes["type"].Value : string.Empty;
string strLength = xndEnclosure.Attributes["length"] != null ? xndEnclosure.Attributes["length"].Value : string.Empty;
try
{
enc.Length
= Convert.ToInt32(strLength);
}
catch { }
enc.Url
= xndEnclosure.Attributes["url"] != null ? xndEnclosure.Attributes["url"].Value : string.Empty;
fim.Enclosures.Add(enc);
}
fim.Description
+= GetHtmlByByEnclosure(fim.Enclosures);
fim.TrackbackPing
= xne.SelectSingleNode("trackback:ping", mgr) != null ? xne.SelectSingleNode("trackback:ping", mgr).InnerText : string.Empty;
lstItems.Add(fim);
}
break;
case FeedType.ATOM_0_3:

foreach (XmlNode xne in xnlItems)
{
FeedItem fim
= new FeedItem();
fim.Title
= xne.SelectSingleNode("atom:title", mgr) != null ? xne.SelectSingleNode("atom:title", mgr).InnerText : string.Empty;
fim.Link
= xne.SelectSingleNode("atom:link[@rel='alternate']", mgr) != null ? xne.SelectSingleNode("atom:link[@rel='alternate']", mgr).Attributes["href"].InnerText : string.Empty;
fim.Summary
= xne.SelectSingleNode("atom:summary", mgr) != null ? xne.SelectSingleNode("atom:summary", mgr).InnerText : string.Empty;
fim.Description
= xne.SelectSingleNode("atom:content", mgr) != null ? xne.SelectSingleNode("atom:content", mgr).InnerText : string.Empty;
fim.Description
= fim.Description.Replace("<![CDATA[", string.Empty).Replace("]]>", string.Empty);

fim.Guid
= xne.SelectSingleNode("atom:id", mgr) != null ? xne.SelectSingleNode("atom:id", mgr).InnerText : string.Empty;
fim.Contributor
= xne.SelectSingleNode("atom:contributor", mgr) != null ? xne.SelectSingleNode("atom:contributor", mgr).InnerText : string.Empty;
XmlNode xneAuthor
= xne.SelectSingleNode("atom:author", mgr);
if (xneAuthor != null)
{
fim.Author
= new FeedPerson();
fim.Author.Name
= xneAuthor.SelectSingleNode("atom:name", mgr) != null ? xneAuthor.SelectSingleNode("atom:name", mgr).InnerText : string.Empty;
fim.Author.Url
= xneAuthor.SelectSingleNode("atom:uri", mgr) != null ? xneAuthor.SelectSingleNode("atom:uri", mgr).InnerText : string.Empty;
fim.Author.Email
= xneAuthor.SelectSingleNode("atom:email", mgr) != null ? xneAuthor.SelectSingleNode("atom:email", mgr).InnerText : string.Empty;
}
fim.UpdateDate
= xne.SelectSingleNode("atom:updated", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:updated", mgr).InnerText) : DateTime.MinValue;
if (fim.UpdateDate == DateTime.MinValue)
{
fim.UpdateDate
= xne.SelectSingleNode("atom:modified", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:modified", mgr).InnerText) : DateTime.MinValue;
}
fim.PubDate
= xne.SelectSingleNode("atom:published", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:published", mgr).InnerText) : DateTime.MinValue;
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate
= xne.SelectSingleNode("atom:issued", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:issued", mgr).InnerText) : DateTime.MinValue;
}
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate
= xne.SelectSingleNode("atom:created", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:created", mgr).InnerText) : DateTime.MinValue;
}
XmlNodeList xnlTags
= xne.SelectNodes("dc:subject", mgr);
XmlNodeList xnlSubjects
= xne.SelectNodes("dc:subject", mgr);
if (xnlSubjects != null)
{
foreach (XmlNode xnlSubject in xnlSubjects)
{
fim.Subject
+= xnlSubject.InnerText + ",";
}
}
XmlNodeList xnlCategorys
= xne.SelectNodes("atom:category", mgr);
if (xnlCategorys != null)
{
foreach (XmlNode xnlCategory in xnlCategorys)
{
fim.Category
+= xnlCategory.Attributes["term"] != null ? xnlCategory.Attributes["term"].Value : string.Empty;
fim.Category
+= ",";
}
}
XmlNodeList xnlEnclosures
= xne.SelectNodes("atom:link[@rel='enclosure']", mgr);
if (xnlEnclosures != null)
{
fim.Enclosures
= new List<FeedEnclosure>();
foreach (XmlNode xndEnclosure in xnlEnclosures)
{
FeedEnclosure enc
= new FeedEnclosure();
enc.Type
= xndEnclosure.Attributes["type"] != null ? xndEnclosure.Attributes["type"].Value : string.Empty;
string strLength = xndEnclosure.Attributes["length"] != null ? xndEnclosure.Attributes["length"].Value : string.Empty;
try
{
enc.Length
= Convert.ToInt32(strLength);
}
catch { }
enc.Url
= xndEnclosure.Attributes["href"] != null ? xndEnclosure.Attributes["href"].Value : string.Empty;
enc.Title
= xndEnclosure.Attributes["title"] != null ? xndEnclosure.Attributes["title"].Value : string.Empty;
fim.Enclosures.Add(enc);
}
}
fim.Description
+= GetHtmlByByEnclosure(fim.Enclosures);
fim.TrackbackPing
= xne.SelectSingleNode("trackback:ping", mgr) != null ? xne.SelectSingleNode("trackback:ping", mgr).InnerText : string.Empty;
fim.Rights
= xne.SelectSingleNode("atom:rights", mgr) != null ? xne.SelectSingleNode("atom:rights", mgr).InnerText : string.Empty;
lstItems.Add(fim);
}
break;
default:
return null;
}
}
return lstItems;

}

 

 

 

posted @ 2010-10-14 23:45  litsword  阅读(2710)  评论(1编辑  收藏  举报