转:C#读取PDF、TXT内容

//读取PDF内容
private void button2_Click(object sender, EventArgs e)
        {
            label3.Text = OnCreated("D:\\aa.pdf");
        }
 
        private string OnCreated(string filepath)
        {
            try
            {
                string pdffilename = filepath;
                PdfReader pdfReader = new PdfReader(pdffilename);
                int numberOfPages = pdfReader.NumberOfPages;
                string text = string.Empty;
 
                for (int i = 1; i <= numberOfPages; ++i)
                {
                    iTextSharp.text.pdf.parser.ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy();
                    text += iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(pdfReader, i, strategy);
                }
                pdfReader.Close();
 
                return text;
            }
            catch (Exception ex)
            {
                StreamWriter wlog = File.AppendText(System.AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "\\mylog.log");
                wlog.WriteLine("出错文件:"  + "原因:" + ex.ToString());
                wlog.Flush();
                wlog.Close(); return null;
            }
 
 
//读取TXT
string text = System.IO.File.ReadAllText(path);//读取内容 path为文件路径
text = text.Replace("\n", string.Empty).Replace("\r", string.Empty);//去掉字符串里的\n \r符号
 
实例:

//1. 生成一个PDF,将文本和图片添加到PDF里面。
        //2. 从PDF文档中提取所有图片。
        //3. 从PDF文档中提取所有文本。
 
       //生成一个PDF文件 里面包含文本和图片
        private void button2_Click(object sender, EventArgs e)
        {
            Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument();
            PdfPageBase page = doc.Pages.Add();
 
            //添加文本  
            page.Canvas.DrawString("Hello!Welcome to my house!",
            new Spire.Pdf.Graphics.PdfFont(PdfFontFamily.Helvetica, 20f),
            new PdfSolidBrush(Color.Black), 10, 10);//中文汉字字符均不能正确生成 英文字母可以
 
            //添加图片
            Spire.Pdf.Graphics.PdfImage image = Spire.Pdf.Graphics.PdfImage.FromFile("ff.jpg");
            float width = image.Width * 0.75f;
            float height = image.Height * 0.75f;
            float x = (page.Canvas.ClientSize.Width - width) / 2;
            page.Canvas.DrawImage(image, x, 60, width, height);
 
            //Spire.Pdf.Graphics.PdfImage image2 = Spire.Pdf.Graphics.PdfImage.FromFile("image.jpg");
            //width = image2.Width * 0.75f;
            //height = image2.Height * 0.75f;
            //page.Canvas.DrawImage(image2, x - 100, 220, width, height);
            doc.SaveToFile("sample.pdf");
        }
 
        //读取图片 获取图片个数 并把图片保存到本地
        private void button1_Click(object sender, EventArgs e)
        {
            Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument();
            doc.LoadFromFile("sample.pdf");
            IList<Image> images = new List<Image>();
            foreach (PdfPageBase page in doc.Pages)
            {
                if (page.ExtractImages() != null)
                {
                    foreach (Image image in page.ExtractImages())
                    {
                        images.Add(image);
                    }
                }
            }
            doc.Close();
            int index = 0;
            int aa = images.Count;
            label3.Text = aa.ToString();
            foreach (Image image in images)
            {
                String imageFileName = String.Format("Image-{0}.png", index++);
                image.Save(imageFileName, ImageFormat.Png);
            }
        }
 
        //读取文本
        private void button3_Click(object sender, EventArgs e)
        {
            Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument();
            doc.LoadFromFile("sample.pdf");
 
            StringBuilder buffer = new StringBuilder();
            foreach (PdfPageBase page in doc.Pages)
            {
                buffer.Append(page.ExtractText());
            }
            doc.Close();
            label1.Text = buffer.ToString();//在界面显示读取到的文本
            //把读取到的文本写入TXT文件
            //String fileName = "TextInPdf.txt";
            //File.WriteAllText(fileName, buffer.ToString());
            buffer = null;
        }

原文:https://blog.csdn.net/wk125570/article/details/73794257?utm_source=copy 

参考:http://www.cnblogs.com/Yesi/p/4203686.html

 

posted @ 2018-10-12 09:20  chu_叶子  阅读(4318)  评论(0编辑  收藏  举报