csharp: iTextSharp get Text or Image

 

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Text;
using iTextSharp.text.pdf;
using iTextSharp;
using iTextSharp.text.pdf.parser;
using Dotnet = System.Drawing.Image;


namespace OfficeDoumentWebApp
{


    /// <summary>
    /// geovindu,Geovin Du, 涂聚文
    /// pdf
    /// </summary>
    public partial class ItexPDFForm : System.Web.UI.Page
    {
        string imgPath="geovindu";
        /// <summary>
        /// 
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        protected void Page_Load(object sender, EventArgs e)
        {
            StringBuilder sb = new StringBuilder();           
            try
            { 

                if(!IsPostBack)
                {
                    //Server.MapPath
                    string datafile = DateTime.Now.ToString("yyyyMMddHHmmssfff");
                    string urc = Server.MapPath("OutFile/" + datafile + "/");
                    if (!Directory.Exists(urc))
                    {
                        Directory.CreateDirectory(urc);
                    }

                    imgPath = urc;
                        string sourcefile = Server.MapPath("SourceFile/珠宝RFID实施方案.pdf");  //珠宝RFID实施方案
                        //if (!Directory.Exists(urc))
                        // Directory.CreateDirectory(urc);
                  
                      //  ItexPDFParser.ExtractImagesFromPDF(Server.MapPath("SourceFile/珠宝RFID实施方案.pdf"), urc);

                        PdfReader pdfReader = new PdfReader(sourcefile);
                    PdfReaderContentParser parser = new PdfReaderContentParser(pdfReader);
                    //ExtRenderListener extRenderListener = new ExtRenderListener();

                    for (int pageNumber = 1; pageNumber <= pdfReader.NumberOfPages; pageNumber++)
                    {

                        //PdfReader pdf = new PdfReader(pdfFile);
                        PdfDictionary pg = pdfReader.GetPageN(pageNumber);
                        PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
                        PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));

                        //获取文本内容
                      string tex= PdfTextExtractor.GetTextFromPage(pdfReader, pageNumber);
                        sb.Append(tex);

                        if (!object.Equals(xobj, null))
                        {
                            foreach (PdfName name in xobj.Keys)
                            {
                                PdfObject obj = xobj.Get(name);
                                if (obj.IsIndirect())
                                {
                                    PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
                                    string width = tg.Get(PdfName.WIDTH).ToString();
                                    string height = tg.Get(PdfName.HEIGHT).ToString();
                                    //TextRenderInfo inf=pdfReader.
                                    GraphicsState state = new GraphicsState();
                                    // state=
                                    //state = (GraphicsState)(new Matrix(float.Parse(width), float.Parse(height)));
                                    ImageRenderInfo imgRI = ImageRenderInfo.CreateForXObject(state, (PRIndirectReference)obj, tg);
                                    if (!Object.Equals(imgRI, null))
                                    {
                                        RenderImage(imgRI, pageNumber); //图片没有读出
                                    }


                                }
                            }
                        }
                    }
                   



                 }

            }
            catch(Exception ex)
            {
                Response.Write(ex.Message.ToString());

            }
            Response.Write(sb.ToString());
        }
        /// <summary>
        /// 
        /// </summary>
        /// <param name="renderInfo"></param>
        /// <returns></returns>
        GraphicsState getGraphicsState(PathPaintingRenderInfo renderInfo)
        {
            System.Reflection.FieldInfo gsField = typeof(PathPaintingRenderInfo).GetField("gs", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);
            return (GraphicsState)gsField.GetValue(renderInfo);
        }


        /// <summary>
        /// 提取图片
        /// geovindu,Geovin Du
        /// 涂聚文
        /// </summary>
        /// <param name="renderInfo"></param>

        private void RenderImage(ImageRenderInfo renderInfo,int number)
        {
            PdfImageObject image = renderInfo.GetImage();
            using (Dotnet dotnetImg = image.GetDrawingImage())
            {
                if (dotnetImg != null)
                {
                    using (MemoryStream ms = new MemoryStream())
                    {
                        dotnetImg.Save(ms, ImageFormat.Jpeg); //Tiff
                        Bitmap d = new Bitmap(dotnetImg);
                        string f = imgPath + number + ".jpg";
                        d.Save(imgPath+number+".jpg");
                    }
                }
            }
        }


    }
}

  

 

 

ABCpdf.NET
https://www.nuget.org/packages/ABCpdf
http://test.websupergoo.com/helppdfnet/default.htm?page=source%2f3-concepts%2fg-htmlrender.htm

posted @ 2022-09-08 17:37  ®Geovin Du Dream Park™  阅读(86)  评论(0)    收藏  举报