iText例子

参考:http://itextpdf.com/book/examples.php

 

daniel@daniel-mint ~/latex/linux/itext/daniel $ cat HelloWorldNarrow.java 
import java.io.FileOutputStream;
import java.io.IOException;
 
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfWriter;
 
/**
 * Hello World: document constructor.
 */
public class HelloWorldNarrow {
 
    /** Path to the resulting PDF file. */
    public static final String RESULT
        = "daniel.pdf";
 
    /**
     * Creates a PDF file: hello_narrow.pdf
     * @param    args    no arguments needed
     */
    public static void main(String[] args)
         throws DocumentException, IOException {
        // step 1
    	// Using a custom page size
        Rectangle pagesize = new Rectangle(480f, 720f);
        Document document = new Document(pagesize, 36f, 72f, 108f, 180f);
        // step 2
        PdfWriter.getInstance(document, new FileOutputStream(RESULT));
        // step 3
        document.open();
        // step 4
        document.add(new Paragraph(
            "Hello World! Hello People! " +
            "Hello Sky! Hello Sun! Hello Moon! Hello Stars!"));
        // step 5
        document.close();
    }
}
daniel@daniel-mint ~/latex/linux/itext/daniel $ 

  

 

 

daniel@daniel-mint ~/latex/linux/itext/daniel $ 
daniel@daniel-mint ~/latex/linux/itext/daniel $ ls
HelloWorldNarrow.java  run.sh
daniel@daniel-mint ~/latex/linux/itext/daniel $ cat run.sh 
javac -cp ../itextpdf-5.5.2.jar:. $1.java
java -cp ../itextpdf-5.5.2.jar:. $1 
rm $1.class
daniel@daniel-mint ~/latex/linux/itext/daniel $ bash run.sh HelloWorldNarrow
daniel@daniel-mint ~/latex/linux/itext/daniel $ ls
daniel.pdf  HelloWorldNarrow.java  run.sh
daniel@daniel-mint ~/latex/linux/itext/daniel $ 

  

daniel@daniel-mint ~/latex/linux/itext/daniel $ 
daniel@daniel-mint ~/latex/linux/itext/daniel $ cat daniel.pdf 
%PDF-1.4
%���
2 0 obj
<</Length 96/Filter/FlateDecode>>stream
x�+�r
�26S034I�2P�5�1��
                 @Bi\�99
��E9)�
N@j~AN*��]	g�������0�fH�d
                                 4�K����\C����&
endstream
endobj
4 0 obj
<</Parent 3 0 R/Contents 2 0 R/Type/Page/Resources<</Font<</F1 1 0 R>>>>/MediaBox[0 0 480 720]>>
endobj
1 0 obj
<</BaseFont/Helvetica/Type/Font/Encoding/WinAnsiEncoding/Subtype/Type1>>
endobj
3 0 obj
<</Type/Pages/Count 1/Kids[4 0 R]>>
endobj
5 0 obj
<</Type/Catalog/Pages 3 0 R>>
endobj
6 0 obj
<</Producer(iText� 5.5.2 �2000-2014 iText Group NV \(AGPL-version\))/ModDate(D:20140819175007+08'00')/CreationDate(D:20140819175007+08'00')>>
endobj
xref
0 7
0000000000 65535 f 
0000000289 00000 n 
0000000015 00000 n 
0000000377 00000 n 
0000000177 00000 n 
0000000428 00000 n 
0000000473 00000 n 
trailer
<</Root 5 0 R/ID [<8f313f0b956c31e86d082ed4ab592eeb><8f313f0b956c31e86d082ed4ab592eeb>]/Info 6 0 R/Size 7>>
%iText-5.5.2
startxref
630
%%EOF
daniel@daniel-mint ~/latex/linux/itext/daniel $ 

  

通过Rups解析出stream内容

q
BT
36 612 Td
0 -18 Td
/F1 12 Tf
(Hello World! Hello People! Hello Sky! Hello Sun! Hello Moon! Hello) Tj
0 0 Td
0 -18 Td
(Stars!) Tj
0 0 Td
ET
Q

  

daniel@daniel-mint ~/latex/linux/itext/daniel $ awk '/stream/ {p=1}; p; /endstream/ {p=0}' daniel.pdf | sed -re '{s/.*stream.*//g}' | awk 'NF!=0' | hexdump -C
00000000  78 9c 2b e4 72 0a e1 32  36 53 30 33 34 52 08 49  |x.+.r..26S034R.I|
00000010  e1 32 50 d0 35 b4 00 31  f4 dd 0c 15 40 42 69 5c  |.2P.5..1....@Bi\|
00000020  1a 1e a9 39 39 f9 0a e1  f9 45 39 29 8a 0a 10 4e  |...99....E9)...N|
00000030  40 6a 7e 41 4e 2a 8c 17  9c 5d 09 67 96 e6 c1 98  |@j~AN*...].g....|
00000040  be f9 f9 30 b6 66 48 16  d0 64 03 14 0b 34 82 4b  |...0.fH..d...4.K|
00000050  12 8b 8a 15 91 a4 5c 43  b8 02 b9 00 fc 98 26 ef  |......\C......&.|
00000060  0a                                                |.|
00000061
daniel@daniel-mint ~/latex/linux/itext/daniel $ 

  

daniel@daniel-mint ~/latex/linux/itext/daniel $ awk '/stream/ {p=1}; p; /endstream/ {p=0}' daniel.pdf | sed -re '{s/.*stream.*//g}' | awk 'NF!=0' | zlib-flate -uncompress
q
BT
36 612 Td
0 -18 Td
/F1 12 Tf
(Hello World! Hello People! Hello Sky! Hello Sun! Hello Moon! Hello)Tj
0 0 Td
0 -18 Td
(Stars!)Tj
0 0 Td
ET
Q

  

其中zlib-flate是在qpdf程序包中。

 

直接修改pdf文件

daniel@daniel-mint ~/latex/linux/itext/daniel $ awk '/stream/ {p=1}; p; /endstream/ {p=0}' daniel.pdf | sed -re '{s/.*stream.*//g}' | awk 'NF!=0' | zlib-flate -uncompress > de.bin
daniel@daniel-mint ~/latex/linux/itext/daniel $ cat de.bin 
q
BT
36 612 Td
0 -18 Td
/F1 12 Tf
(Hello World! Hello People! Hello Sky! Hello Sun! Hello Moon! Hello)Tj
0 0 Td
0 -18 Td
(Stars!)Tj
0 0 Td
ET
Q
daniel@daniel-mint ~/latex/linux/itext/daniel $ sed -e 's/Hello.*Hello/Daniel King is a software engineer!/g' de.bin 
q
BT
36 612 Td
0 -18 Td
/F1 12 Tf
(Daniel King is a software engineer!)Tj
0 0 Td
0 -18 Td
(Stars!)Tj
0 0 Td
ET
Q
daniel@daniel-mint ~/latex/linux/itext/daniel $ sed -e 's/Hello.*Hello/Daniel King is a software engineer!/g' de.bin > de1.bin
daniel@daniel-mint ~/latex/linux/itext/daniel $ cat daniel.pdf | awk 'BEGIN{p=1}; p; /stream/{p=0};' > reassembled.pdf
daniel@daniel-mint ~/latex/linux/itext/daniel $ cat de1.bin | zlib-flate -compress >> reassembled.pdf 
daniel@daniel-mint ~/latex/linux/itext/daniel $ echo -e '\nendstream' >> reassembled.pdf daniel@daniel-mint ~/latex/linux/itext/daniel $ cat daniel.pdf | awk 'BEGIN{p=0}; p; /endstream/{p=1};' >> reassembled.pdf

  

结果虽然能在pdf viewer中显示,但是用pdf2txt处理会失败,因此还是hack的不完美。

 

我们再看一下PoDoFo的实现

 802   
 803   
 804   
 805     m_oss.str("");
 806     m_oss << "BT" << std::endl << "/" << m_pFont->GetIdentifier().GetName()
 807           << " "  << m_pFont->GetFontSize() 
 808           << " Tf" << std::endl;         
 809   
 810     if (currentTextRenderingMode != ePdfTextRenderingMode_Fill) {
 811         SetCurrentTextRenderingMode(); 
 812     }
 813   
 814     //if( m_pFont->GetFontScale() != 100.0F ) - this value is kept between text blocks
 815     m_oss << m_pFont->GetFontScale() << " Tz" << std::endl;
 816   
 817     //if( m_pFont->GetFontCharSpace() != 0.0F )  - this value is kept between text blocks
 818     m_oss << m_pFont->GetFontCharSpace() * m_pFont->GetFontSize() / 100.0 << " Tc" << std::endl; 
 819   
 820     m_oss << dX << std::endl
 821           << dY << std::endl << "Td ";   
 822   
 823     m_pCanvas->Append( m_oss.str() );
 824     m_pFont->WriteStringToStream( sString, m_pCanvas );
 825   
 826     /*
 827     char* pBuffer;
 828     std::auto_ptr<PdfFilter> pFilter = PdfFilterFactory::Create( ePdfFilter_ASCIIHexDecode );
 829     pFilter->Encode( sString.GetString(), sString.GetLength(), &pBuffer, &lLen );
 830   
 831     m_pCanvas->Append( pBuffer, lLen );
 832     free( pBuffer );
 833     */
 834   
 835     m_pCanvas->Append( " Tj\nET\n" );
 836 } 

可以看到,它依次调用了

Tf :     选择字体,设置字体大小
Tz:     设置横向缩放比例因子
Tc:     设置字符间距离
Td:     设置位置
Tj:      显示字符串

  

posted @ 2014-08-19 17:50  Daniel King  阅读(720)  评论(0编辑  收藏  举报