import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.util.PDFOperator;
/**
*
*
*
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>1.8.3</version>
</dependency>
* @author Cloud-Top
*
*/
public class PdfUtil {
public static void main(String[] args) {
removePdfWatermark("C:\\Users\\Cloud-Top\\eclipse-workspace\\demo\\src\\main\\resources\\watermark_pdf\\1629680832866.pdf",
"C:\\Users\\Cloud-Top\\1629680832866.pdf");
}
/**
*
* @param sourPath 原pdf
* @param savePath 新pdf
*/
@SuppressWarnings("unchecked")
public static void removePdfWatermark(String sourPath ,String savePath) {
try {
//读取源文件
PDDocument helloDocument = PDDocument.load(new File(sourPath));
List<PDPage> allPages = helloDocument.getDocumentCatalog().getAllPages();
for(PDPage pdPage : allPages) {
PDStream contents = pdPage.getContents();
PDFStreamParser parser = new PDFStreamParser(contents.getStream());
parser.parse();
List<Object> tokens = parser.getTokens();
for (int j = 0; j < tokens.size(); j++) {
Object next = tokens.get(j);
if (next instanceof PDFOperator) {
PDFOperator op = (PDFOperator) next;
// Tj and TJ are the two operators that display strings in a PDF
if (op.getOperation().equals("Tj")) {
// Tj takes one operator and that is the string
// to display so lets update that operator
COSString previous = (COSString) tokens.get(j - 1);
String string = previous.getString();
System.out.println(string);
//需要解决乱码
//有效期至 == eHgó
if(string.contains("eHgó")) {
//清空匹配上的内容
previous.reset();
}
}
}
}
PDStream updatedStream = new PDStream(helloDocument);
OutputStream out = updatedStream.createOutputStream();
ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
tokenWriter.writeTokens(tokens);
pdPage.setContents(updatedStream);
}
//Output file name
helloDocument.save(savePath);
helloDocument.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (COSVisitorException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}