maven之读写pdf简单实例(pdfbox与itext)与pdfbox源码解析(访问者模式)

  记录学习的脚步


       本文是用pdfbox读写pdf,但是因为pdfbox在写pdf的时候,对中文的支持不好,会有乱码,我尝试着修改COSString的源码,试了UTF-8、UTF-16BE几种编码 中文输出还是乱码 接着把pdfbox parent中的pom 的 <project.build.sourceEncoding>ISO-8859-1</project.build.sourceEncoding>  属性改为UTF-8 还是不行  好吧 能力有限 还是放弃了

      所幸itext对中文的支持还不错   使用itext进行写pdf

参考

  pdfbox 官网   http://pdfbox.apache.org/cookbook/documentcreation.html

  itext 官网  http://itextpdf.com/learn

   下面  itext 中的代码 来源于 这位哥们写的    http://www.iteye.com/topic/1006313   本来是打算自己写的  但是这哥们写的不错 还有注释  就直接用了 

  itext的更多详细的操作 可参考    http://rensanning.iteye.com/blog/1538689   他写的很详细 

1、先看pdfbox的读写pdf的代码

    产生pdf的 SavePdfDocument.java类  必要的地方都加了注释

   

package com.undergrowth.pdfbox;

import java.io.IOException;



import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;

/**
 * SavePdfDocument类用于产生pdf文档
 * @author Administrator
 * @date 2014-8-31
 * @version 1.0.0
 */
public class SavePdfDocument {

	/**
	 * 日志常量
	 */
	public static final Log logger=LogFactory.getLog(SavePdfDocument.class);
	
	
	/**
	 * 测试产生pdf文档
	 * @param sayWhat 要写入到pdf文档中的内容
	 * @param filePath 保存pdf的路径
	 * @throws IOException 
	 * @throws COSVisitorException 
	 * 
	 */
	public  boolean helloPdf(String sayWhat,String filePath) throws IOException, COSVisitorException{
		boolean f=false;
		PDDocument document=getPdDocument();
		PDPage page=getPdPage();
		document.addPage(page);
		
		PDFont font=getFont();
		
		PDPageContentStream contentStream=getPdPageContentStream(document, page);
		
		contentStream.beginText();
		  contentStream.setFont(font, 20);
		  contentStream.moveTextPositionByAmount(200, 300);
		  /* COSString cosString=new COSString(new String(sayWhat.getBytes(), "UTF-16BE"));
		 contentStream.drawString("hello world"+"\t");*/
		  //contentStream.drawString("hello world"+cosString.getString());
		  contentStream.drawString(sayWhat);
		  contentStream.endText();
		//关闭页面内容流
		contentStream.close();
		
		document.save(filePath);
		document.close();
		
		logger.info("成功创建pdf");
		f=true;
		return f;
	}
	
	

	
	
	
	
	/**
	 * 获取空的pdf文档对象
	 * @return PDDocument
	 */
	public PDDocument getPdDocument(){
		PDDocument document=new PDDocument();
		return document;
	}
	
	/**
	 * 通过文件名加载文档
	 * @param fileName
	 * @return PDDocument
	 * @throws IOException
	 */
	public PDDocument getPdDocument(String fileName) throws IOException{
		PDDocument document=PDDocument.load(fileName);
		return document;
	}
	
	/**
	 * 获取空的pdf页面对象
	 * @return PDPage
	 */
	public PDPage getPdPage(){
		PDPage page =new PDPage();
		return page;
	}

	/**
	 * 获取海维提卡体
	 * @return PDFont
	 */
	public PDFont getFont(){
		PDFont font=PDType1Font.HELVETICA_BOLD;
		return font;
	}
	
	/**
	 * 获取页面内容流 向页面添加内容
	 * @param document PDDocument
	 * @param page PDPage
	 * @return PDPageContentStream
	 * @throws IOException
	 */
	public PDPageContentStream getPdPageContentStream(PDDocument document,PDPage page) throws IOException{
		PDPageContentStream contentStream=new PDPageContentStream(document, page);
		return contentStream;
	}
	
}

  提取pdf的 PdfTextStripperTest.java

package com.undergrowth.pdfbox;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

public class PdfTextStripperTest {

	public static Log log=LogFactory.getLog(PdfTextStripperTest.class);
	
	/**
	 * 获取文本提取
	 * 
	 * @param document
	 * @param writer
	 * @throws IOException
	 */
	public void getTextStripper(PDDocument document, Writer writer)
			throws IOException {
		PDFTextStripper textStripper = new PDFTextStripper();
		textStripper.writeText(document, writer);
	}

	/**
	 * 提取文本内容
	 * @param String fileName 加载文档的路径
	 * @return String
	 * @throws IOException
	 */
	public String getText(String fileName) throws IOException {
		String textString = "";
		SavePdfDocument pdfDocument = new SavePdfDocument();
		PDDocument document = pdfDocument.getPdDocument(fileName);
		//将提取出来的字节流转换为字符流进行显示
		ByteArrayOutputStream out = new ByteArrayOutputStream();
		OutputStreamWriter writer = new OutputStreamWriter(out);
		getTextStripper(document, writer);
		document.close();
		out.close();
		writer.close();
		byte[] con = out.toByteArray();
		textString = new String(con);
		log.info("提取的文本内容为:"+textString);
		return textString;
	}
}

测试类

package com.undergrowth.pdfbox;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.exceptions.COSVisitorException;

import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;

/**
 * Unit test for simple App.
 */
public class AppTest 
    extends TestCase
{
    
	
	
	/**
     * Create the test case
     *
     * @param testName name of the test case
     */
    public AppTest( String testName )
    {
       super( testName );
    	
    }

    /**
     * @return the suite of tests being tested
     */
    public static Test suite()
    {
        return new TestSuite( AppTest.class );
    }

    /**
     * Rigourous Test :-)
     * @throws IOException 
     * @throws COSVisitorException 
     */
    public void testApp() throws COSVisitorException, IOException
    {
    	SavePdfDocument pdfDocument=new SavePdfDocument();
    	String filePath="e:\\hello.pdf";
    	boolean f=pdfDocument.helloPdf(("hello world"), filePath);
    	/*
    	 * boolean f=pdfDocument.helloPdf(new String("?我".getBytes("UTF-16BE"),"UTF-16BE"), filePath);
    	 * System.out.println("我".getBytes("UTF-8"));
    	System.out.println(new String("我".getBytes("UTF-16BE"), "UTF-16BE"));
    	*/
    	assertTrue( f );

    	filePath="E:\\test11.pdf";
    	
    	PdfTextStripperTest textStripperTest=new PdfTextStripperTest();
    	String stripperText = textStripperTest.getText(filePath);
    	
    	assertNotSame(stripperText, "");    	
        
    }
    
    
   
    
   
}


   2、使用itext进行写pdf

PdfUtils.java

package com.undergrowth.pdfbox;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;

import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Chapter;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Font;
import com.itextpdf.text.FontFactory;
import com.itextpdf.text.List;
import com.itextpdf.text.ListItem;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Phrase;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.Section;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;

/**
 * 来源: http://www.iteye.com/topic/1006313
 * @author Administrator
 *
 */
public class PdfUtils {

//		public static final String CHARACTOR_FONT_CH_FILE = "SIMFANG.TTF";  //仿宋常规
		public static final String CHARACTOR_FONT_CH_FILE = "SIMHEI.TTF";  //黑体常规
		
		public static final Rectangle PAGE_SIZE = PageSize.A4;
		public static final float MARGIN_LEFT = 50;
		public static final float MARGIN_RIGHT = 50;
		public static final float MARGIN_TOP = 50;
		public static final float MARGIN_BOTTOM = 50;
		public static final float SPACING = 20;
		
		
		private Document document = null;
		private FileOutputStream out=null;
		/**
		 * 功能:创建导出数据的目标文档
		 * @param fileName 存储文件的临时路径
		 * @return 
		 */
		public void createDocument(String fileName) {
			File file = new File(fileName);
			out = null;
			document = new Document(PAGE_SIZE, MARGIN_LEFT, MARGIN_RIGHT, MARGIN_TOP, MARGIN_BOTTOM);
			try {
				out = new FileOutputStream(file);
//				PdfWriter writer = 
				PdfWriter.getInstance(document, out);
			} catch (FileNotFoundException e) {
				e.printStackTrace();
			} catch (DocumentException e) {
				e.printStackTrace();
			}
			// 打开文档准备写入内容
			document.open();
		}
		
		/**
		 * 将章节写入到指定的PDF文档中
		 * @param chapter
		 * @return 
		 */
		public void writeChapterToDoc(Chapter chapter) {
			try {
				if(document != null) {
					if(!document.isOpen()) document.open();
					document.add(chapter);
				}
			} catch (DocumentException e) {
				e.printStackTrace();
			}
		}
		
		/**
		 * 功能  创建PDF文档中的章节
		 * @param title 章节标题
		 * @param chapterNum 章节序列号
		 * @param alignment 0表示align=left,1表示align=center
		 * @param numberDepth 章节是否带序号 设值=1 表示带序号 1.章节一;1.1小节一...,设值=0表示不带序号
		 * @param font 字体格式
		 * @return Chapter章节
		 */
		public static Chapter createChapter(String title, int chapterNum, int alignment, int numberDepth, Font font) {
			Paragraph chapterTitle = new Paragraph(title, font);
			chapterTitle.setAlignment(alignment);
			Chapter chapter = new Chapter(chapterTitle, chapterNum);
			chapter.setNumberDepth(numberDepth); 
			return chapter;
		}
		
		/**
		 * 功能:创建某指定章节下的小节
		 * @param chapter 指定章节
		 * @param title 小节标题
		 * @param font 字体格式
		 * @param numberDepth 小节是否带序号 设值=1 表示带序号 1.章节一;1.1小节一...,设值=0表示不带序号
		 * @return section在指定章节后追加小节
		 */
		public static Section createSection(Chapter chapter, String title, Font font, int numberDepth) {
			Section section = null;
			if(chapter != null) {
				Paragraph sectionTitle = new Paragraph(title, font);
				sectionTitle.setSpacingBefore(SPACING);
				section = chapter.addSection(sectionTitle);
				section.setNumberDepth(numberDepth);
			}
			return section;
		}
		
		/**
		 * 功能:向PDF文档中添加的内容
		 * @param text 内容
		 * @param font 内容对应的字体
		 * @return phrase 指定字体格式的内容
		 */
		public static Phrase createPhrase(String text,Font font) {
			Phrase phrase = new Paragraph(text,font);
			return phrase;
		}
		
		/**
		 * 功能:创建列表
		 * @param numbered  设置为 true 表明想创建一个进行编号的列表
		 * @param lettered 设置为true表示列表采用字母进行编号,为false则用数字进行编号
		 * @param symbolIndent
		 * @return list
		 */
		public static List createList(boolean numbered, boolean lettered, float symbolIndent) {
			List list = new List(numbered, lettered, symbolIndent);
			return list;
		}
		
		/**
		 * 功能:创建列表中的项
		 * @param content 列表项中的内容
		 * @param font 字体格式
		 * @return listItem
		 */
		public static ListItem createListItem(String content, Font font) {
			ListItem listItem = new ListItem(content, font);
			return listItem;
		}

		/**
		 * 功能:创造字体格式
		 * @param fontname 
		 * @param size 字体大小
		 * @param style 字体风格
		 * @param color 字体颜色
		 * @return Font
		 */
		public static Font createFont(String fontname, float size, int style, BaseColor color) {
			Font font =  FontFactory.getFont(fontname, size, style, color);
			return font;
		}
		
		/**
		 * 功能: 返回支持中文的字体---仿宋
		 * @param size 字体大小
		 * @param style 字体风格
		 * @param color 字体 颜色
		 * @return  字体格式
		 */
		public static Font createCHineseFont(float size, int style, BaseColor color) {
			BaseFont bfChinese = null;
			try {
				bfChinese = BaseFont.createFont(CHARACTOR_FONT_CH_FILE,BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
			} catch (DocumentException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
			return new Font(bfChinese, size, style, color);
		}
		
		/**
		 * 最后关闭PDF文档
		 */
		public void closeDocument() {
			if(document != null) {
				document.close();
			}
			
		}
		

		/**
		 * 读PDF文件,使用了pdfbox开源项目
		 * @param fileName
		 */
		public static void readPDF(String fileName) {
			File file = new File(fileName);
			FileInputStream in = null;
			try {
				in = new FileInputStream(fileName);
				// 新建一个PDF解析器对象
				PDFParser parser = new PDFParser(in);
				// 对PDF文件进行解析
				parser.parse();
				// 获取解析后得到的PDF文档对象
				PDDocument pdfdocument = parser.getPDDocument();
				// 新建一个PDF文本剥离器
				PDFTextStripper stripper = new PDFTextStripper();
				// 从PDF文档对象中剥离文本
				String result = stripper.getText(pdfdocument);
				System.out.println("PDF文件的文本内容如下:");
				System.out.println(result);

			} catch (Exception e) {
				System.out.println("读取PDF文件" + file.getAbsolutePath() + "生失败!" + e);
				e.printStackTrace();
			} finally {
				if (in != null) {
					try {
						in.close();
					} catch (IOException e1) {
					}
				}
			}
		}

		/**
		 * 测试pdf文件的创建
		 * @param args
		 */
		public static void main(String[] args) {

			String fileName = "E:\\test11.pdf";  //这里先手动把绝对路径的文件夹给补上。
			PdfUtils PdfUtils = new PdfUtils();
			
			Font chapterFont = com.undergrowth.pdfbox.PdfUtils.createCHineseFont(20, Font.BOLD, new BaseColor(0, 0, 255));//文章标题字体
			Font sectionFont = com.undergrowth.pdfbox.PdfUtils.createCHineseFont(16, Font.BOLD, new BaseColor(0, 0, 255));//文章小节字体
			Font textFont = com.undergrowth.pdfbox.PdfUtils.createCHineseFont(10, Font.NORMAL, new BaseColor(0, 0, 0));//小节内容字体
			
			PdfUtils.createDocument(fileName);
			Chapter chapter = com.undergrowth.pdfbox.PdfUtils.createChapter("糖尿病病例1", 1, 1, 0, chapterFont);
			Section section1 = com.undergrowth.pdfbox.PdfUtils.createSection(chapter, "病例联系人信息", sectionFont,0);
			Phrase text1 = com.undergrowth.pdfbox.PdfUtils.createPhrase("如您手中有同类现成病例,在填写完以上基础信息后,传病例附件",textFont);
			section1.add(text1);
			
			Section section2 = com.undergrowth.pdfbox.PdfUtils.createSection(chapter, "病例个人体会", sectionFont,0);
			Phrase text2 = com.undergrowth.pdfbox.PdfUtils.createPhrase("1.下载病例生成PDF文档",textFont);
//			text2.setFirstLineIndent(20);  //第一行空格距离
			section2.add(text1);
			section2.add(text2);
			
			List list = com.undergrowth.pdfbox.PdfUtils.createList(true, false, 20);
			String tmp = "还有什么能够文档。文档是 PDF 文档的所有元素的容器。 ";
			ListItem listItem1 = com.undergrowth.pdfbox.PdfUtils.createListItem(tmp,textFont);
			ListItem listItem2 = com.undergrowth.pdfbox.PdfUtils.createListItem("列表2",textFont);
			list.add(listItem1);
			list.add(listItem2);
			section2.add(list);
			
			PdfUtils.writeChapterToDoc(chapter);
			PdfUtils.closeDocument();
			
			//读取
			readPDF(fileName);
		}
	}

上面使用了黑体字体 需要将黑体字体的ttf文件放在resources目录下  即可

上面即使使用pdfbox与itext的简单实例

附pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>com.undergrowth</groupId>
	<artifactId>pdfbox</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>pdfbox</name>
	<url>http://maven.apache.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	</properties>

	<dependencies>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>3.8.1</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.pdfbox</groupId>
			<artifactId>pdfbox</artifactId>
			<version>1.8.6</version>
		</dependency>
		<dependency>
			<groupId>com.ibm.icu</groupId>
			<artifactId>icu4j</artifactId>
			<version>3.8</version>
		</dependency>
		<dependency>
			<groupId>com.itextpdf</groupId>
			<artifactId>itextpdf</artifactId>
			<version>5.5.1</version>
			<type>jar</type>
		</dependency>
	</dependencies>

	<build>
		<plugins>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-javadoc-plugin</artifactId>
				<version>2.9.1</version>
				<configuration>
					<tags>
						<tag>
							<name>date</name>
							<placement>a</placement>
							<head>日期:</head>
						</tag>
					</tags>
				</configuration>
			</plugin>
		</plugins>
	</build>

</project>

3、再来看看pdfbox的源码吧  说起pdfbox的源码编译  就郁闷 

因为pdfbox核心库pdfbox中测试需要用到

<dependency>
            <groupId>com.levigo.jbig2</groupId>
            <artifactId>levigo-jbig2-imageio</artifactId>
            <version>1.6.2</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>net.java.dev.jai-imageio</groupId>
            <artifactId>jai-imageio-core-standalone</artifactId>
            <version>1.2-pre-dr-b04-2011-07-04</version>
            <scope>test</scope>
        </dependency>

<repository>
            <id>jbig2.googlecode</id>
            <name>JBIG2 ImageIO-Plugin repository at googlecode.com</name>
            <url>http://jbig2-imageio.googlecode.com/svn/maven-repository/</url>
        </repository>



位于googlecode上的jar包  可 google 与我天朝的关系貌似不太友好啊  一直编译不过去  后来干脆只有将pdfbox核心库中pdfbox的测试库全删了  再把上面的依赖注释掉 

哈哈 编译成功   


好吧 还是来看看 pdfbox中用到的访问者模式吧  也正是由于这个模式中的访问者的操作  才将最终的document内容输出到输出流中去

访问者模式是什么啊   好吧  

看看这里有两篇文章 有个大致印象吧

访问者模式   http://blog.csdn.net/hfmbook/article/details/7684175

 访问者模式  http://www.2cto.com/kf/201402/278957.html


还是从头看起 先看  PDdocument的构造器

 /**
     * Constructor, creates a new PDF Document with no pages.  You need to add
     * at least one page for the document to be valid.
     */
    public PDDocument()
    {
        document = new COSDocument();

        //First we need a trailer
        COSDictionary trailer = new COSDictionary();
        document.setTrailer( trailer );

        //Next we need the root dictionary.
        COSDictionary rootDictionary = new COSDictionary();
        trailer.setItem( COSName.ROOT, rootDictionary );
        rootDictionary.setItem( COSName.TYPE, COSName.CATALOG );
        rootDictionary.setItem( COSName.VERSION, COSName.getPDFName( "1.4" ) );

        //next we need the pages tree structure
        COSDictionary pages = new COSDictionary();
        rootDictionary.setItem( COSName.PAGES, pages );
        pages.setItem( COSName.TYPE, COSName.PAGES );
        COSArray kidsArray = new COSArray();
        pages.setItem( COSName.KIDS, kidsArray );
        pages.setItem( COSName.COUNT, COSInteger.ZERO );
    }

在构建一个新的PDDocument的时候 底层使用了一个COSDocument进行替代 然后在document中有一个全局的字典记录器 trailer

大致示意图 画得太丑了 完全没有艺术细胞 哎




其实上面 如果有兴趣 你追踪看一下 发现 new COSDocument();

 /**
     * Constructor.  Uses memory to store stream.
     */
    public COSDocument()
    {
        this(new RandomAccessBuffer(), false);
    }

 /**
     * Default constructor.
     */
    public RandomAccessBuffer()
    {
        // starting with one chunk
        bufferList = new ArrayList<byte[]>();
        currentBuffer = new byte[BUFFER_SIZE];
        bufferList.add(currentBuffer);
        pointer = 0;
        currentBufferPointer = 0;
        size = 0;
        bufferListIndex = 0;
        bufferListMaxIndex = 0;
    }

会发现 COSDocument 实际上是初始化一个16k的内存堆块



接着看 创建了一个PDPage的构造函数 

 /**
     * Creates a new instance of PDPage with a size of 8.5x11.
     */
    public PDPage()
    {
        page = new COSDictionary();
        page.setItem( COSName.TYPE, COSName.PAGE );
        setMediaBox( PAGE_SIZE_LETTER );
    }

创建了一个page 页面的矩形大小为  page拥有一个type和media_box属性

 /**
     * A page size of LETTER or 8.5x11.
     */
    public static final PDRectangle PAGE_SIZE_LETTER = 
        new PDRectangle( 8.5f*DEFAULT_USER_SPACE_UNIT_DPI, 11f*DEFAULT_USER_SPACE_UNIT_DPI );
page.setItem( COSName.MEDIA_BOX, mediaBoxValue.getCOSArray() );



接着看 document.addPage(page);  将创建的页面添加到文档中 估计就是和上面画的那张不太好看的图挂上钩   额 

  /**
     * This will add a page to the document.  This is a convenience method, that
     * will add the page to the root of the hierarchy and set the parent of the
     * page to the root.
     *
     * @param page The page to add to the document.
     */
    public void addPage( PDPage page )
    {
        PDPageNode rootPages = getDocumentCatalog().getPages();
        rootPages.getKids().add( page );
        page.setParent( rootPages );
        rootPages.updateCount();
    }

看看 第一个方法 getDocumentCatalog().getPages();  

这个方法就是返回rootDictionary中所包含的所有页面  

 /**
     * This will get the root node for the pages.
     *
     * @return The parent page node.
     */
    public PDPageNode getPages()
    {
        return new PDPageNode( (COSDictionary)root.getDictionaryObject( COSName.PAGES ) );
    }

初始化的时候 rootDictionary中的page的计数是为0的


接着 第三行 page.setParent( rootPages );  将page指向root的page页


再看PDPageContentStream的构造器

/**
     * Create a new PDPage content stream.
     *
     * @param document The document the page is part of.
     * @param sourcePage The page to write the contents to.
     * @throws IOException If there is an error writing to the page contents.
     */
    public PDPageContentStream(PDDocument document, PDPage sourcePage) throws IOException
    {
        this(document, sourcePage, false, true);
    }

 /**
     * Create a new PDPage content stream.
     *
     * @param document The document the page is part of.
     * @param sourcePage The page to write the contents to.
     * @param appendContent Indicates whether content will be overwritten. If false all previous content is deleted.
     * @param compress Tell if the content stream should compress the page contents.
     * @throws IOException If there is an error writing to the page contents.
     */
    public PDPageContentStream(PDDocument document, PDPage sourcePage, boolean appendContent, boolean compress)
            throws IOException
    {
        this(document, sourcePage, appendContent, compress, false);
    }

 /**
     * Create a new PDPage content stream.
     *
     * @param document The document the page is part of.
     * @param sourcePage The page to write the contents to.
     * @param appendContent Indicates whether content will be overwritten. If false all previous content is deleted.
     * @param compress Tell if the content stream should compress the page contents.
     * @param resetContext Tell if the graphic context should be reseted.
     * @throws IOException If there is an error writing to the page contents.
     */
    public PDPageContentStream(PDDocument document, PDPage sourcePage, boolean appendContent, boolean compress,
            boolean resetContext) throws IOException
    {
        // Get the pdstream from the source page instead of creating a new one
        PDStream contents = sourcePage.getContents();
        boolean hasContent = contents != null;

        // If request specifies the need to append to the document
        if (appendContent && hasContent)
        {

            // Create a pdstream to append new content
            PDStream contentsToAppend = new PDStream(document);

            // This will be the resulting COSStreamArray after existing and new streams are merged
            COSStreamArray compoundStream = null;

            // If contents is already an array, a new stream is simply appended to it
            if (contents.getStream() instanceof COSStreamArray)
            {
                compoundStream = (COSStreamArray) contents.getStream();
                compoundStream.appendStream(contentsToAppend.getStream());
            }
            else
            {
                // Creates the COSStreamArray and adds the current stream plus a new one to it
                COSArray newArray = new COSArray();
                newArray.add(contents.getCOSObject());
                newArray.add(contentsToAppend.getCOSObject());
                compoundStream = new COSStreamArray(newArray);
            }

            if (compress)
            {
                List<COSName> filters = new ArrayList<COSName>();
                filters.add(COSName.FLATE_DECODE);
                contentsToAppend.setFilters(filters);
            }

            if (resetContext)
            {
                // create a new stream to encapsulate the existing stream
                PDStream saveGraphics = new PDStream(document);
                output = saveGraphics.createOutputStream();
                // save the initial/unmodified graphics context
                saveGraphicsState();
                close();
                if (compress)
                {
                    List<COSName> filters = new ArrayList<COSName>();
                    filters.add(COSName.FLATE_DECODE);
                    saveGraphics.setFilters(filters);
                }
                // insert the new stream at the beginning
                compoundStream.insertCOSStream(saveGraphics);
            }

            // Sets the compoundStream as page contents
            sourcePage.setContents(new PDStream(compoundStream));
            output = contentsToAppend.createOutputStream();
            if (resetContext)
            {
                // restore the initial/unmodified graphics context
                restoreGraphicsState();
            }
        }
        else
        {
            if (hasContent)
            {
                LOG.warn("You are overwriting an existing content, you should use the append mode");
            }
            contents = new PDStream(document);
            if (compress)
            {
                List<COSName> filters = new ArrayList<COSName>();
                filters.add(COSName.FLATE_DECODE);
                contents.setFilters(filters);
            }
            sourcePage.setContents(contents);
            output = contents.createOutputStream();
        }
        formatDecimal.setMaximumFractionDigits(10);
        formatDecimal.setGroupingUsed(false);
        // this has to be done here, as the resources will be set to null when reseting the content stream
        resources = sourcePage.getResources();
        if (resources == null)
        {
            resources = new PDResources();
            sourcePage.setResources(resources);
        }

    }

其实这个方法这么多  对于第一次创建PDPageContentStream的话  

contents = new PDStream(document);
用document的记录文件创建 PDStream
output = contents.createOutputStream();

并且将输出流指向PDStream  即指向document的记录文件中


至于  contentStream.beginText();
 contentStream.setFont(font, 20);
 contentStream.moveTextPositionByAmount(200, 300);  这几个方法  都比较简单  就是写一些命令 移动上面所见的page的位置


看drawString 


/**
     * This will draw a string at the current location on the screen.
     *
     * @param text The text to draw.
     * @throws IOException If an io exception occurs.
     */
    public void drawString(String text) throws IOException
    {
        if (!inTextMode)
        {
            throw new IOException("Error: must call beginText() before drawString");
        }
        COSString string = new COSString(text);
        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
        string.writePDF(buffer);
        appendRawCommands(buffer.toByteArray());
        appendRawCommands(SPACE);
        appendRawCommands(SHOW_TEXT);
    }


这里有一个COSString类 我估计就是这个类导致与中文的乱码  还是看看它的构造器吧 

  /**
     * Explicit constructor for ease of manual PDF construction.
     *
     * @param value
     *            The string value of the object.
     */
    public COSString(String value)
    {
        try
        {
            boolean unicode16 = false;
            char[] chars = value.toCharArray();
            int length = chars.length;
            for (int i = 0; i < length; i++)
            {
                if (chars[i] > 255)
                {
                    unicode16 = true;
                    break;
                }
            }
            if (unicode16)
            {
                byte[] data = value.getBytes("UTF-16BE");
                out = new ByteArrayOutputStream(data.length + 2);
                out.write(0xFE);
                out.write(0xFF);
                out.write(data);
            }
            else
            {
                byte[] data = value.getBytes("ISO-8859-1");
                out = new ByteArrayOutputStream(data.length);
                out.write(data);
            }
        }
        catch (IOException ignore)
        {
            LOG.error(ignore,ignore);
            // should never happen
        }
    }


       很明显的看到  当单个字符的编码小于255的时候  使用ISO-8859-1获取到字节码   ISO-8859-1不支持中文啊  不乱码才怪  恩  其实还有很多地方都是用的是ISO-8859-1 所以目前还不清楚  到底需要改哪些地方 才能正确输出中文  貌似对中文的支持确实不太好

    好吧  接着看 其实上面就是向PDPageContentStream的output输出流中写入字节  其实就是向document的记录文件中写入字节


接着 contentStream.close();  一定要  close()  因为 

public class PDPageContentStream implements Closeable

实现了Closeable接口


接下来的这部操作  就是执行输出操作的地方了  也是用到了访问者模式的地方  

document.save(filePath);

/**
     * Save the document to a file.
     *
     * @param fileName The file to save as.
     *
     * @throws IOException If there is an error saving the document.
     * @throws COSVisitorException If an error occurs while generating the data.
     */
    public void save( String fileName ) throws IOException, COSVisitorException
    {
        save( new File( fileName ) );
    }

/**
     * Save the document to a file.
     *
     * @param file The file to save as.
     *
     * @throws IOException If there is an error saving the document.
     * @throws COSVisitorException If an error occurs while generating the data.
     */
    public void save( File file ) throws IOException, COSVisitorException
    {
        save( new FileOutputStream( file ) );
    }

 /**
     * This will save the document to an output stream.
     *
     * @param output The stream to write to.
     *
     * @throws IOException If there is an error writing the document.
     * @throws COSVisitorException If an error occurs while generating the data.
     */
    public void save( OutputStream output ) throws IOException, COSVisitorException
    {
        //update the count in case any pages have been added behind the scenes.
        getDocumentCatalog().getPages().updateCount();
        COSWriter writer = null;
        try
        {
            writer = new COSWriter( output );
            writer.write( this );
            writer.close();
        }
        finally
        {
            if( writer != null )
            {
                writer.close();
            }
        }
    }


上面有一句

writer.write( this );

看看它的源码


/**
     * This will write the pdf document.
     *
     * @param doc The document to write.
     *
     * @throws COSVisitorException If an error occurs while generating the data.
     */
    public void write(PDDocument doc) throws COSVisitorException
    {
        Long idTime = doc.getDocumentId() == null ? System.currentTimeMillis() : 
                                                    doc.getDocumentId();
        
        document = doc;
        if(incrementalUpdate)
        {
            prepareIncrement(doc);
        }
        
        // if the document says we should remove encryption, then we shouldn't encrypt
        if(doc.isAllSecurityToBeRemoved())
        {
            this.willEncrypt = false;
            // also need to get rid of the "Encrypt" in the trailer so readers 
            // don't try to decrypt a document which is not encrypted
            COSDocument cosDoc = doc.getDocument();
            COSDictionary trailer = cosDoc.getTrailer();
            trailer.removeItem(COSName.ENCRYPT);
        }
        else
        {
            SecurityHandler securityHandler = document.getSecurityHandler();
            if(securityHandler != null)
            {
                try
                {
                    securityHandler.prepareDocumentForEncryption(document);
                    this.willEncrypt = true;
                }
                catch(IOException e)
                {
                    throw new COSVisitorException( e );
                }
                catch(CryptographyException e)
                {
                    throw new COSVisitorException( e );
                }
            }
            else
            {
                    this.willEncrypt = false;
            }        
        }

        COSDocument cosDoc = document.getDocument();
        COSDictionary trailer = cosDoc.getTrailer();
        COSArray idArray = (COSArray)trailer.getDictionaryObject( COSName.ID );
        if( idArray == null || incrementalUpdate)
        {
            try
            {

                //algorithm says to use time/path/size/values in doc to generate
                //the id.  We don't have path or size, so do the best we can
                MessageDigest md = MessageDigest.getInstance( "MD5" );
                md.update( Long.toString(idTime).getBytes("ISO-8859-1") );
                COSDictionary info = (COSDictionary)trailer.getDictionaryObject( COSName.INFO );
                if( info != null )
                {
                    Iterator<COSBase> values = info.getValues().iterator();
                    while( values.hasNext() )
                    {
                        md.update( values.next().toString().getBytes("ISO-8859-1") );
                    }
                }
                idArray = new COSArray();
                COSString id = new COSString( md.digest() );
                idArray.add( id );
                idArray.add( id );
                trailer.setItem( COSName.ID, idArray );
            }
            catch( NoSuchAlgorithmException e )
            {
                throw new COSVisitorException( e );
            }
            catch( UnsupportedEncodingException e )
            {
                throw new COSVisitorException( e );
            }
        }
        cosDoc.accept(this);
    }


上面那个方法  重点在   cosDoc.accept(this);  方法上  即COSDocument接受COSWriter对象的访问 

那么访问者模式必有得四要素

访问者接口

访问者实现类

目标对象接口

目标对象实现类


在这里 

访问者接口即是  ICOSVisitor

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.cos;

import org.apache.pdfbox.exceptions.COSVisitorException;

/**
 * An interface for visiting a PDF document at the type (COS) level.
 *
 * @author Michael Traut
 * @version $Revision: 1.6 $
 */
public interface ICOSVisitor
{
    /**
     * Notification of visit to Array object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromArray( COSArray obj ) throws COSVisitorException;

    /**
     * Notification of visit to boolean object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromBoolean( COSBoolean obj ) throws COSVisitorException;

    /**
     * Notification of visit to dictionary object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromDictionary( COSDictionary obj ) throws COSVisitorException;

    /**
     * Notification of visit to document object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromDocument( COSDocument obj ) throws COSVisitorException;

    /**
     * Notification of visit to float object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromFloat( COSFloat obj ) throws COSVisitorException;

    /**
     * Notification of visit to integer object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromInt( COSInteger obj ) throws COSVisitorException;

    /**
     * Notification of visit to name object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromName( COSName obj ) throws COSVisitorException;

    /**
     * Notification of visit to null object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromNull( COSNull obj ) throws COSVisitorException;

    /**
     * Notification of visit to stream object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromStream( COSStream obj ) throws COSVisitorException;

    /**
     * Notification of visit to string object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromString( COSString obj ) throws COSVisitorException;
}

可以看到 有很多访问的操作方法


访问者实现类 又很多 这里只列举  COSWriter  的一个实现方法 

 /**
     * The visit from document method.
     *
     * @param doc The object that is being visited.
     *
     * @throws COSVisitorException If there is an exception while visiting this object.
     *
     * @return null
     */
    public Object visitFromDocument(COSDocument doc) throws COSVisitorException
    {
        try
        {
            if(!incrementalUpdate)
            {
                doWriteHeader(doc);
            }
            doWriteBody(doc);
            
            // get the previous trailer
            COSDictionary trailer = doc.getTrailer();
            long hybridPrev = -1;

            if (trailer != null)
            {
                hybridPrev = trailer.getLong(COSName.XREF_STM);
            }
            
            if(incrementalUpdate)
            {
                doWriteXRefInc(doc, hybridPrev);
            }
            else
            {
                doWriteXRef(doc);
            }
            
            // the trailer section should only be used for xref tables not for xref streams
            if (!incrementalUpdate || !doc.isXRefStream() || hybridPrev != -1)
            {
                doWriteTrailer(doc);
            }
            
            // write endof
            getStandardOutput().write(STARTXREF);
            getStandardOutput().writeEOL();
            getStandardOutput().write(String.valueOf(getStartxref()).getBytes("ISO-8859-1"));
            getStandardOutput().writeEOL();
            getStandardOutput().write(EOF);
            getStandardOutput().writeEOL();
            
            if(incrementalUpdate)
            {
                doWriteSignature(doc);
            }
            
            return null;
        }
        catch (IOException e)
        {
            throw new COSVisitorException(e);
        }
        catch (SignatureException e)
        {
            throw new COSVisitorException(e);
        }
    }


目标对象接口  COSBase  有一个抽象的 可接受访问的方法

 /**
     * visitor pattern double dispatch method.
     *
     * @param visitor The object to notify when visiting this object.
     * @return any object, depending on the visitor implementation, or null
     * @throws COSVisitorException If an error occurs while visiting this object.
     */
    public abstract Object accept(ICOSVisitor visitor) throws COSVisitorException;


目标对象实现类 也很多 只列举 COSDocument 的

  /**
     * visitor pattern double dispatch method.
     *
     * @param visitor The object to notify when visiting this object.
     * @return any object, depending on the visitor implementation, or null
     * @throws COSVisitorException If an error occurs while visiting this object.
     */
    @Override
    public Object accept(ICOSVisitor visitor) throws COSVisitorException
    {
        return visitor.visitFromDocument( this );
    }


所以  在上面的write方法中  调用  cosDoc.accept(this); 的时候  

实际上  调用了上面的accept方法  接着调用了 COSWriter的visitFromDocument方法  实现最终的文档内容的输出


对于访问者模式   额  感觉就是对同一类对象 不同的访问者实现类 可以做不同的事情    额  好像   哎  还是不太明白 其实  主要是没有真正的用过  只是学过 看过  还是理解不深啊  记录学习的脚步 接着努力学习 。。。



posted on 2014-09-08 17:22  liangxinzhi  阅读(1379)  评论(1编辑  收藏  举报