博客园博客PDF生成器

      周末写了一个博客园博客PDF生成器,由于博客园文件上传大小的限制,我把源代码放在CSDN上了(想信大家都有帐号哈),如果没有帐号的请留下邮箱,我会尽快发给你,当然如果哪位朋友能帮忙把源代码上传到博客园上更好:博客园博客PDF生成器 

      废话不多说,直接看生成后的PDF效果哈:

 

博客中图片效果:

 

      代码比较简单,这里先简单说一下思路,先通过博客地址取得该博客的RSS信息,这是一个XML文件,把源码存在本地,然后解析这个XML文件,从中取出需要的信息,再用iTextSharp这个DLL来操作PDF,从面生成PDF文档。

      下面只帖出几个主要的类,大家有兴趣可以下载源代码看:

      实体类channel,类属性是从XML文件中取得的:

实体类:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace BlogsConvert
{
    
public class channel
    {
        
private string title;
        
private string link;
        
private string description;
        
private string language;
        
private DateTime lastBuildDate;
        
private DateTime pubDate;
        
private int ttl;

        
public string Title
        {
            
get { return title; }
            
set { title = value; }
        }

        
public string Link
        {
            
get { return link; }
            
set { link = value; }
        }

        
public string Description
        {
            
get { return description; }
            
set { description = value; }
        }

        
public string Language
        {
            
get { return language; }
            
set { language = value; }
        }

        
public DateTime LastBuildDate
        {
            
get { return lastBuildDate; }
            
set { lastBuildDate = value; }
        }

        
public DateTime PubDate
        {
            
get { return pubDate; }
            
set { pubDate = value; }
        }

        
public int Ttl
        {
            
get { return ttl; }
            
set { ttl = value; }
        }
    }
}

 

 

      实体类item(属性来自XML文件):

 

实体类:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace BlogsConvert
{
    
public class item
    {
        
private string title;
        
private string link;
        
private string dc_creator;
        
private string author;
        
private DateTime pubDate;
        
private string guid;
        
private string description;

        
public string Title
        {
            
get { return title; }
            
set { title = value; }
        }

        
public string Link
        {
            
get { return link; }
            
set { link = value; }
        }

        
public string Dc_creator
        {
            
get { return dc_creator; }
            
set { dc_creator = value; }
        }

        
public string Author
        {
            
get { return author; }
            
set { author = value; }
        }

        
public DateTime PubDate
        {
            
get { return pubDate; }
            
set { pubDate = value; }
        }

        
public string Guid
        {
            
get { return guid; }
            
set { guid = value; }
        }

        
public string Description
        {
            
get { return description; }
            
set { description = value; }
        }
    }
}

 

 

      从XML文件中提取博客信息类:

 

代码
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using System.Xml;

namespace BlogsConvert
{
    
public class BlogsInfo
    {
        
/// <summary>
        
/// 从XML文件中取得博主信息
        
/// </summary>
        
/// <param name="xmlPath">xml文件路径</param>
        
/// <returns>channel</returns>
        public channel GetChannel(string xmlPath)
        {
            channel cha
=new channel();
            
//解析XML文件
            XmlDocument myXml = new XmlDocument();
            myXml.Load(xmlPath);
            XmlNode blogs 
= myXml.DocumentElement;
            XmlNode node
=blogs.ChildNodes[0];
            
if (node.Name == "channel")
            {
                
foreach (XmlNode chanode in node.ChildNodes)
                {
                    
switch (chanode.Name)
                    {
                        
case "title":
                            cha.Title 
= chanode.InnerText;
                            
break;
                        
case "link":
                            cha.Link 
= chanode.InnerText;
                            
break;
                        
case "description":
                            cha.Description 
= chanode.InnerText;
                            
break;
                        
case "language":
                            cha.Language 
= chanode.InnerText;
                            
break;
                        
case "lastBuildDate":
                            cha.LastBuildDate 
= DateTime.Parse(chanode.InnerText);
                            
break;
                        
case "pubDate":
                            cha.PubDate 
= DateTime.Parse(chanode.InnerText);
                            
break;
                        
case "ttl":
                            cha.Ttl 
= int.Parse(chanode.InnerText);
                            
break;
                    }
                    
if (chanode.Name == "item")
                        
break;
                }
            }
            
if (cha.Title.Trim()!="")
                
return cha;
            
return null;
        }

        
/// <summary>
        
/// 从XML文件中取得文章信息
        
/// </summary>
        
/// <param name="xmlPath">xml文件路径</param>
        
/// <returns>IList</returns>
        public IList<item> GetItems(string xmlPath)
        {
            
return GetItems(xmlPath,"");
        }

        
/// <summary>
        
/// 从XML文件中取得文章信息
        
/// </summary>
        
/// <param name="xmlPath">xml文件路径</param>
        
/// <param name="keyWord">按关键字提取博客信息</param>
        
/// <returns>IList</returns>
        public IList<item> GetItems(string xmlPath,string keyWord)
        {
            IList
<item> itemList = new List<item>();
            item temp;
            
//解析XML文件
            XmlDocument myXml = new XmlDocument();
            myXml.Load(xmlPath);
            XmlNode blogs 
= myXml.DocumentElement;
            XmlNode node 
= blogs.ChildNodes[0];
            
if (node.Name == "channel")
            {
                
foreach (XmlNode statusnode in node.ChildNodes)
                {
                    
switch (statusnode.Name)
                    {
                        
case "item":
                            temp
=new item();
                            
bool flag = true;
                            
foreach (XmlNode o in statusnode.ChildNodes)
                            {
                                
if (flag)
                                {
                                    
switch (o.Name)
                                    {
                                        
case "title":
                                            
if (keyWord.Trim() != "")
                                            {
                                                
if (!o.InnerText.Contains(keyWord))
                                                    flag 
= false;
                                            }
                                            temp.Title 
= o.InnerText;
                                            
break;
                                        
case "link":
                                            temp.Link 
= o.InnerText;
                                            
break;
                                        
case "dc:creator":
                                            temp.Dc_creator 
= o.InnerText;
                                            
break;
                                        
case "author":
                                            temp.Author 
= o.InnerText;
                                            
break;
                                        
case "pubDate":
                                            temp.PubDate 
= DateTime.Parse(o.InnerText);
                                            
break;
                                        
case "guid":
                                            temp.Guid 
= o.InnerText;
                                            
break;
                                        
case "description":
                                            temp.Description 
= o.InnerText;
                                            
break;
                                    }
                                }
                            }
                            
if(temp.Link!=null)
                                itemList.Add(temp);
                            
break;
                    }
                }
            }
            
if(itemList.Count>0)
                
return itemList;
            
return null;
        }
    }
}

 

 

        PDF文件生成类,也是本软件中最重要的一个类,其实就是iTextSharp的运用(这个DLL文件在源代码中有):

 

代码
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using iTextSharp.text;
using iTextSharp.text.pdf;
using System.IO;
using System.Text.RegularExpressions;

namespace BlogsConvert
{
    
public class ToPdf:IConvert
    {
        
#region IConvert 成员

        
/// <summary>
        
/// 转为PDF
        
/// </summary>
        
/// <param name="commonInfo">博主信息</param>
        
/// <param name="itemList">文章信息</param>
        
/// <param name="path">生成的PDF文件存放路径</param>
        public void Convert(channel commonInfo, IList<item> itemList,string path)
        {
            
if (commonInfo != null && itemList != null)
            {
                
//设置页面大小
                Rectangle pageSize = PageSize.A4;
                
//创建文档对象
                Document document = new Document(pageSize);
                PdfWriter.GetInstance(document,
new FileStream(path,FileMode.Create));

                
//打开文档
                document.Open();

                
//定义字体
                BaseFont bfSongTi = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF",BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                Font font 
= new Font(bfSongTi, 12);

                
//定义字体
                BaseFont bfSongTiBlod = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                Font fontBlod 
= new Font(bfSongTiBlod, 15);

                
//提示段落
                Paragraph pToop=new Paragraph(new Chunk("本文档由程序整理生成(生成时间:"+DateTime.Now+"",fontBlod));
                
//1为居中,0为居左,2为居右
                pToop.Alignment = 1;
                pToop.SpacingAfter 
= 20;
                document.Add(pToop);

                
//博客标题
                Paragraph pTitle = new Paragraph(new Phrase(commonInfo.Title, fontBlod));
                pTitle.Alignment 
= 1;
                pTitle.SpacingAfter 
= 20;
                document.Add(pTitle);

                
//添加博客子标题
                Paragraph pDescription=new Paragraph(commonInfo.Description,font);
                pDescription.Alignment 
= 0;
                
//行间距(倍)
                pDescription.MultipliedLeading = 2;
                pDescription.SpacingAfter 
= 20;
                document.Add(pDescription);

                
//博客目录
                Paragraph allGuid = new Paragraph("目      录", fontBlod);
                allGuid.Alignment 
= 1;
                allGuid.SpacingBefore 
= 10;
                document.Add(allGuid);

                
//添加目录
                Paragraph guid=new Paragraph("    ");
                guid.MultipliedLeading 
= 1;
                Anchor aTitle;
                
for (int i = 0; i < itemList.Count;i++ )
                {
                    item o 
= itemList[i];
                    aTitle 
= new Anchor(""+(i+1)+"篇: "+o.Title,font);
                    aTitle.Reference 
= "#link" + o.PubDate.ToString();
                    document.Add(aTitle);
                    document.Add(guid);
                }
                document.Add(guid);
                document.Add(guid);
                document.Add(guid);

                
//文章标题
                Paragraph blogTitle;
                
//文章内容
                Paragraph blogContent;
                
//分割线
                Paragraph hr=new Paragraph("--------------------------------------------------------------------------------------------------------");
                hr.Alignment
=1;
                hr.SpacingAfter
=20;
                hr.SpacingBefore
=20;

                
//提取图片
                string Content;
                Regex reg 
= new Regex(@"(?is)(?:<img[^>]*?src|\bbackground)=(?:(['""])(?<img>[^'"">]+)\1|(?<img>[^'""\s>]+))");
                MatchCollection mc;
                IList
<string> picList;

                
//内容处理
                string[] ContentArray;
                Anchor lTitle;
                
int index = 1;
                
foreach (var o in itemList)
                {
                    lTitle 
= new Anchor(""+index+"篇:",font);
                    lTitle.Name 
= "link" + o.PubDate.ToString();
                    document.Add(lTitle);
                    index
++;
                    blogTitle
=new Paragraph(o.Title,fontBlod);
                    blogTitle.Alignment 
= 1;
                    blogTitle.MultipliedLeading 
= 1;                    
                    document.Add(blogTitle);

                    Content 
= o.Description;
                    Content 
= Content.Replace("<p>""");
                    Content 
= Content.Replace("<br />""");
                    Content 
= Content.Replace("<br/ />""");

                     mc
= reg.Matches(Content);
                     picList 
= new List<string>();
                    
for(int i=0;i<mc.Count;i++)
                    {
                        Match m
=mc[i];
                        
if (!m.Groups["img"].Value.Contains("OutliningIndicators"))
                        {
                            picList.Add(m.Groups[
"img"].Value);
                            Content 
= Content.Replace(m.Groups["img"].Value, "\" />卍Pic" + m.Groups["img"].Value + "ciP卍<img src=\"");
                        }
                    }

                    
//去掉Html标签
                    Content = NoHTML(Content);

                    
//按文章内容生成段落
                    ContentArray = Content.Split('');
                    
for (int i = 0; i < ContentArray.Length; i++)
                    {
                        
for (int j = 0; j < picList.Count; j++)
                        {
                            
if ( ContentArray[i] == "Pic" +picList[j] + "ciP")
                            {
                                Image jpeg 
= Image.GetInstance(picList[j]);
                                
if (jpeg.Width > PageSize.A4.Width)
                                {
                                    jpeg.ScaleAbsolute(PageSize.A4.Width, jpeg.Width 
* jpeg.Height / PageSize.A4.Width);
                                }
                                jpeg.Alignment 
= Image.MIDDLE_ALIGN;
                                document.Add(jpeg);
                                ContentArray[i] 
= "PicDRJciP";
                            }
                        }
                        
if (ContentArray[i] != "PicDRJciP")
                        {
                            blogContent 
= new Paragraph(ContentArray[i], font);
                            blogContent.Alignment 
= 0;
                            blogContent.MultipliedLeading 
= 2;
                            blogContent.SpacingAfter 
= 10;
                            document.Add(blogContent);
                        }
                    }
                    document.Add(hr);
                }

                
//提示信息
                Paragraph drj = new Paragraph(new Chunk("本程序由博客园——天行健(http://home.cnblogs.com/u/durongjian/)制作,如有建议请发邮件至drjchina@163.com", font));
                
//1为居中,0为居左,2为居右
                drj.Alignment = 1;
                drj.SpacingAfter 
= 20;
                drj.SpacingBefore 
= 20;
                document.Add(drj);

                
//关闭文档
                document.Close();
            }
        }

        
/// <summary>
        
/// 去掉HTML标签
        
/// </summary>
        
/// <param name="Htmlstring">带有HTML标签的字符串</param>
        
/// <returns>string</returns>
        public static string NoHTML(string Htmlstring)
        {
            Htmlstring 
= Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>""", RegexOptions.IgnoreCase);
            Htmlstring 
= Regex.Replace(Htmlstring, @"<(.[^>]*)>""", RegexOptions.IgnoreCase);
            Htmlstring 
= Regex.Replace(Htmlstring, @"([\r\n])[\s]+""", RegexOptions.IgnoreCase);
            Htmlstring 
= Regex.Replace(Htmlstring, @"-->""", RegexOptions.IgnoreCase);
            Htmlstring 
= Regex.Replace(Htmlstring, @"<!--.*""", RegexOptions.IgnoreCase);

            Htmlstring 
= Regex.Replace(Htmlstring, @"&(quot|#34);""\"", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);""&", RegexOptions.IgnoreCase);
            Htmlstring 
= Regex.Replace(Htmlstring, @"&(lt|#60);""<", RegexOptions.IgnoreCase);
            Htmlstring 
= Regex.Replace(Htmlstring, @"&(gt|#62);"">", RegexOptions.IgnoreCase);
            Htmlstring 
= Regex.Replace(Htmlstring, @"&(nbsp|#160);"" ", RegexOptions.IgnoreCase);
            Htmlstring 
= Regex.Replace(Htmlstring, @"&(iexcl|#161);""\xa1", RegexOptions.IgnoreCase);
            Htmlstring 
= Regex.Replace(Htmlstring, @"&(cent|#162);""\xa2", RegexOptions.IgnoreCase);
            Htmlstring 
= Regex.Replace(Htmlstring, @"&(pound|#163);""\xa3", RegexOptions.IgnoreCase);
            Htmlstring 
= Regex.Replace(Htmlstring, @"&(copy|#169);""\xa9", RegexOptions.IgnoreCase);
            Htmlstring 
= Regex.Replace(Htmlstring, @"&#(\d+);""", RegexOptions.IgnoreCase);

            Htmlstring.Replace(
"<""");
            Htmlstring.Replace(
">""");
            Htmlstring.Replace(
"\r\n""");

            
return Htmlstring.Trim();
        }

        
#endregion
    }
}

 

 

      最后就是调用类了,先看一下软件界面吧:

      后台代码:

 

代码
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using BlogsConvert;
using System.Net;
using System.IO;

namespace CnBlogsHelper
{
    
public partial class BlogToPdf : Form
    {
        
public channel commonInfo=new channel();
        
public IList<item> blogInfos=new List<item>();
        
public BlogToPdf()
        {
            InitializeComponent();
        }

        
private void BlogToPdf_Load(object sender, EventArgs e)
        {
        }

        
/// <summary>
        
/// 获取RSS源码,存入XML文件中
        
/// </summary>
        
/// <param name="PageUrl">XML文件路径</param>
        public void GetXML(string PageUrl)
        {
            
//发送GET请求,得到XML格式的数据
            WebRequest request = WebRequest.Create(PageUrl);
            WebResponse response 
= request.GetResponse();
            Stream resStream 
= response.GetResponseStream();
            StreamReader sr 
= new StreamReader(resStream, System.Text.Encoding.GetEncoding("GB2312"));
            
string Content = sr.ReadToEnd();
            
string xmlPath =Application.StartupPath+ @"\Blogs.xml";

            
//如果XML文件不存在就创建
            if (!System.IO.File.Exists(xmlPath))
            {
                System.IO.FileStream f 
= System.IO.File.Create(xmlPath);
                f.Close();
            }
            
//以覆盖的形式把数据写入XML文件
            System.IO.StreamWriter f2 = new System.IO.StreamWriter(xmlPath, false, System.Text.Encoding.GetEncoding("UTF-8"));
            f2.Write(Content);
            f2.Close();
            f2.Dispose();
            sr.Close();
            resStream.Close();

            
if (Content.Trim() == "")
            {
                
throw new Exception("用户名有误,请检查后重新输入!");
            }
        }

        
/// <summary>
        
/// 生成PDF文件
        
/// </summary>
        
/// <param name="saveName">生成的PDF文件名</param>
        
/// <param name="cha">博主信息</param>
        
/// <param name="itemList">文章信息</param>
        public void CreatePDF(string saveName,channel cha,IList<item> itemList)
        {
            BlogsInfo blog 
= new BlogsInfo();
            IConvert con 
= new ToPdf();
            
string dir = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);
            con.Convert(cha,itemList,dir
+"\\"+saveName+".pdf");
        }

        
//生成事件
        private void btnCreate_Click(object sender, EventArgs e)
        {
            
if (!CheckForm())
                
return;
            
try
            {
                
if (blogInfos.Count > 0)
                {
                    Wait f 
= new Wait();
                    f.Show();
                    Application.DoEvents();

                    CreatePDF(txtFileName.Text.Trim(), commonInfo, blogInfos);

                    f.Close();
                    MessageBox.Show(
"PDF文档“" + txtFileName.Text.Trim() + ".pdf”生成成功,文档在桌面!");
                }
                
else
                {
                    MessageBox.Show(
"博客数为0,请先提取博客信息!");
                }
            }
            
catch (Exception ex)
            {
                MessageBox.Show(
"异常信息:"+ex.Message);
            }
        }

        
//提取博客信息事件
        private void btnFind_Click(object sender, EventArgs e)
        {
            
if (!CheckForm())
                
return;
            libBlog.Items.Clear();
            
string pageUrl = txtBlogUrl.Text.Trim();
            
if (pageUrl.Substring(pageUrl.Length - 11!= "/")
            {
                pageUrl 
= pageUrl + @"/";
            }
            pageUrl 
= pageUrl + "rss";
            
try
            {
                
//弹出等待窗体
                Wait f = new Wait();
                f.Show();
                Application.DoEvents();

                GetXML(pageUrl);
                
string path = Application.StartupPath + @"\Blogs.xml";
                BlogsInfo blogInfo 
= new BlogsInfo();
                commonInfo 
= blogInfo.GetChannel(path);
                blogInfos 
= blogInfo.GetItems(path, txtKeyWord.Text.Trim() == "请输入标题中的关键字"?"":txtKeyWord.Text.Trim());

                
foreach (item o in blogInfos)
                {
                    libBlog.Items.Add(o.Title);
                }

                f.Close();
            }
            
catch (Exception ex)
            {
                MessageBox.Show(
"异常信息:" + ex.Message);
            }
        }

        
//清空事件
        private void btnClearAll_Click(object sender, EventArgs e)
        {
            libBlog.Items.Clear();
            blogInfos.Clear();
        }

        
//删除当前选中项事件
        private void btnClearCurrent_Click(object sender, EventArgs e)
        {
            
int index=libBlog.SelectedIndex;
            libBlog.Items.Remove(libBlog.Items[index]);
            blogInfos.RemoveAt(index);
        }

        
//鼠标进入文本框清空默认文本
        private void txtKeyWord_Click(object sender, EventArgs e)
        {
            txtKeyWord.Text 
= txtKeyWord.Text.Trim() == "请输入标题中的关键字"?"":txtKeyWord.Text;
        }

        
private bool CheckForm()
        {
            
if (txtBlogUrl.Text.Trim() == "" || txtFileName.Text.Trim() == "")
            {
                MessageBox.Show(
"博客地址和保存文件名不能为空!");
                txtBlogUrl.Text 
= "http://www.cnblogs.com/";
                txtFileName.Text 
= "我的博客";
                
return false;
            }
            
return true;
        }
    }
}

 

      其中调用了一个等待窗体Wait,非常简单,这里就不说了,大家可以看源代码。

      博客园中高手如云,本人只能算个菜,只是把自己写的一点小东西拿出来跟大家分享,希望能帮到大家,欢迎各位朋友批评指正,如果使用过程中有错误请留言哦。

      本软件目地是服务博客园的朋友们,源代码完全开源,但转载或二次开发请注明出处。

posted @ 2010-10-25 16:24  artwl  阅读(2993)  评论(33编辑  收藏  举报

个人简介

var ME = {
	"name": "土豆/Artwl",
	"job": "coding",
	"languages": [
		"JS", "HTML",
                "CSS", "jQuery"
		"MVC",".NET",
		"设计模式"
	],
	"hobby": [
		"阅读", "旅游",
		"音乐", "电影"
	]
}
TOP