csharp: iTextSharp get Text or Image
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Text;
using iTextSharp.text.pdf;
using iTextSharp;
using iTextSharp.text.pdf.parser;
using Dotnet = System.Drawing.Image;
namespace OfficeDoumentWebApp
{
/// <summary>
/// geovindu,Geovin Du, 涂聚文
/// pdf
/// </summary>
public partial class ItexPDFForm : System.Web.UI.Page
{
string imgPath="geovindu";
/// <summary>
///
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
protected void Page_Load(object sender, EventArgs e)
{
StringBuilder sb = new StringBuilder();
try
{
if(!IsPostBack)
{
//Server.MapPath
string datafile = DateTime.Now.ToString("yyyyMMddHHmmssfff");
string urc = Server.MapPath("OutFile/" + datafile + "/");
if (!Directory.Exists(urc))
{
Directory.CreateDirectory(urc);
}
imgPath = urc;
string sourcefile = Server.MapPath("SourceFile/珠宝RFID实施方案.pdf"); //珠宝RFID实施方案
//if (!Directory.Exists(urc))
// Directory.CreateDirectory(urc);
// ItexPDFParser.ExtractImagesFromPDF(Server.MapPath("SourceFile/珠宝RFID实施方案.pdf"), urc);
PdfReader pdfReader = new PdfReader(sourcefile);
PdfReaderContentParser parser = new PdfReaderContentParser(pdfReader);
//ExtRenderListener extRenderListener = new ExtRenderListener();
for (int pageNumber = 1; pageNumber <= pdfReader.NumberOfPages; pageNumber++)
{
//PdfReader pdf = new PdfReader(pdfFile);
PdfDictionary pg = pdfReader.GetPageN(pageNumber);
PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
//获取文本内容
string tex= PdfTextExtractor.GetTextFromPage(pdfReader, pageNumber);
sb.Append(tex);
if (!object.Equals(xobj, null))
{
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (obj.IsIndirect())
{
PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
string width = tg.Get(PdfName.WIDTH).ToString();
string height = tg.Get(PdfName.HEIGHT).ToString();
//TextRenderInfo inf=pdfReader.
GraphicsState state = new GraphicsState();
// state=
//state = (GraphicsState)(new Matrix(float.Parse(width), float.Parse(height)));
ImageRenderInfo imgRI = ImageRenderInfo.CreateForXObject(state, (PRIndirectReference)obj, tg);
if (!Object.Equals(imgRI, null))
{
RenderImage(imgRI, pageNumber); //图片没有读出
}
}
}
}
}
}
}
catch(Exception ex)
{
Response.Write(ex.Message.ToString());
}
Response.Write(sb.ToString());
}
/// <summary>
///
/// </summary>
/// <param name="renderInfo"></param>
/// <returns></returns>
GraphicsState getGraphicsState(PathPaintingRenderInfo renderInfo)
{
System.Reflection.FieldInfo gsField = typeof(PathPaintingRenderInfo).GetField("gs", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);
return (GraphicsState)gsField.GetValue(renderInfo);
}
/// <summary>
/// 提取图片
/// geovindu,Geovin Du
/// 涂聚文
/// </summary>
/// <param name="renderInfo"></param>
private void RenderImage(ImageRenderInfo renderInfo,int number)
{
PdfImageObject image = renderInfo.GetImage();
using (Dotnet dotnetImg = image.GetDrawingImage())
{
if (dotnetImg != null)
{
using (MemoryStream ms = new MemoryStream())
{
dotnetImg.Save(ms, ImageFormat.Jpeg); //Tiff
Bitmap d = new Bitmap(dotnetImg);
string f = imgPath + number + ".jpg";
d.Save(imgPath+number+".jpg");
}
}
}
}
}
}

ABCpdf.NET
https://www.nuget.org/packages/ABCpdf
http://test.websupergoo.com/helppdfnet/default.htm?page=source%2f3-concepts%2fg-htmlrender.htm
哲学管理(学)人生, 文学艺术生活, 自动(计算机学)物理(学)工作, 生物(学)化学逆境, 历史(学)测绘(学)时间, 经济(学)数学金钱(理财), 心理(学)医学情绪, 诗词美容情感, 美学建筑(学)家园, 解构建构(分析)整合学习, 智商情商(IQ、EQ)运筹(学)生存.---Geovin Du(涂聚文)
浙公网安备 33010602011771号