CSharp: UglyToad.PdfPig in donet 8.0
/*
IDE: VS 2022 17.5
OS: windows 10
.net: 8.0
生成PDF文档,从PDF文档中获取文字内容 控制台下测试
*/
// See https://aka.ms/new-console-template for more information
using System;
using System.Collections.Generic;
using System.Linq;
using System.Xml.Linq;
using UglyToad.PdfPig;
using UglyToad.PdfPig.AcroForms;
using UglyToad.PdfPig.AcroForms.Fields;
using UglyToad.PdfPig.Content;
using UglyToad.PdfPig.Outline;
using System.IO;
using UglyToad.PdfPig.Core;
using UglyToad.PdfPig.Fonts.Standard14Fonts;
using UglyToad.PdfPig.Fonts.SystemFonts;
using UglyToad.PdfPig.Writer;
using System.Drawing;
using System.Drawing.Text;
Console.WriteLine("Hello,CSharp World! Geovin Du,geovindu, 涂聚文\n\t");
try {
PdfDocumentBuilder builder = new PdfDocumentBuilder();
//string fontfile = Server.MapPath("fonts/MHeiHK-Light.TTF");
//byte[] robotoBytes = File.ReadAllBytes(fontfile);
// PdfDocumentBuilder.AddedFont MHeiHK = builder.AddTrueTypeFont(robotoBytes);
// 读取宋体字体文件到字节数组 中文必须是中文字体,相应文字语言,用相关的字体 simsunb.ttf
byte[] simSunFontBytes;
using (FileStream fontFileStream = File.OpenRead("C:\\Windows\\Fonts\\STSONG.TTF"))
{
simSunFontBytes = new byte[fontFileStream.Length];
fontFileStream.Read(simSunFontBytes, 0, simSunFontBytes.Length);
}
// 添加支持中文的字体
PdfDocumentBuilder.AddedFont font = builder.AddTrueTypeFont(simSunFontBytes);
PdfDocumentBuilder.AddedFont helvetica = builder.AddStandard14Font(Standard14Font.Helvetica);
PdfDocumentBuilder.AddedFont helveticaBold = builder.AddStandard14Font(Standard14Font.HelveticaBold);
// PdfDocumentBuilder.AddedFont song = builder.AddStandard14Font(Standard14Font.simsunb);
PdfPageBuilder page = builder.AddPage(PageSize.A4);
PdfPoint closeToTop = new PdfPoint(15, page.PageSize.Top - 25);
page.AddText("My first PDF document!", 12, closeToTop, helvetica);
page.AddText("Hello CSharp World!,Geovin Du!", 10, closeToTop.Translate(0, -15), helveticaBold);
page = builder.AddPage(PageSize.A4);
page.AddText("geovindu!", 12, closeToTop, helvetica); //中文用中文系统字体
page = builder.AddPage(PageSize.A4);
//写入
page.AddText("你好,这是一个PDF文档。涂聚文欢迎你!", 12, new PdfPoint(25, 520), font);
//byte[] b = builder.Build();
string fiel = "file.pdf";
File.WriteAllBytes(fiel, builder.Build());
Console.WriteLine("文档生成ok\n\t");
//从PDF文件中读取文字内容
string fileout ="1.pdf";
using (PdfDocument document = PdfDocument.Open(fileout))
{
foreach (UglyToad.PdfPig.Content.Page pagedu in document.GetPages())
{
IEnumerable<Word> words = pagedu.GetWords();
foreach (Word word in words)
{
Console.WriteLine(word.Text);
}
}
}
Console.WriteLine("\n\t从PDF文件中读取文字内容ok");
}
catch(Exception ex)
{
Console.WriteLine(ex.Message.ToString());
}
https://github.com/BobLd/PdfPig/tree/table-extractor-2
https://github.com/kba/hocr-spec
https://github.com/kba/hocrjs
Concurrency in .NET
https://github.com/rikace/fConcBook
https://dotnetcurry.com/dotnet/1360/concurrent-programming-dotnet-core
https://www.csharptutorial.net/csharp-concurrency/
https://www.oreilly.com/library/view/concurrency-in-net/9781617292996/
https://blog.christian-schou.dk/blog/concurrency-vs-parallelism-vs-asynchronous/
Concurrency in C++
https://www.codeproject.com/Articles/1271904/Programming-Concurrency-in-Cplusplus-Part-1
https://www.codeproject.com/Articles/1278737/Programming-Concurrency-in-Cplusplus-Part-2
https://www.modernescpp.org/wp-content/uploads/2023/04/Concurrency.pdf
https://www.codeproject.com/Tips/5376066/Solving-Fizz-Buzz-in-Csharp-and-Cplusplus
https://www.classes.cs.uchicago.edu/archive/2013/spring/12300-1/labs/lab6/
concurrency in Java
https://github.com/RadekKoubsky/java-concurrency-in-practice-examples
https://github.com/LeonardoZ/java-concurrency-patterns
concurrency in python
https://stackabuse.com/concurrency-in-python/
https://github.com/ro6ley/python-concurrency-example
/*
IDE: VS 2022 17.6
OS: windows 10
.NET 8.0
FROM https://github.com/BobLd/PdfPig
https://github.com/UglyToad/PdfPig/wiki/Document-Layout-Analysis
https://github.com/UglyToad/PdfPig/issues/617
*/
namespace ConsoleAppPdfDemo
{
using UglyToad.PdfPig.Content;
using UglyToad.PdfPig.Core;
using UglyToad.PdfPig.Fonts.Standard14Fonts;
using UglyToad.PdfPig.Writer;
using UglyToad.PdfPig;
using UglyToad.PdfPig.DocumentLayoutAnalysis.TableExtractor;
using System.Diagnostics;
//using static System.Net.Mime.MediaTypeNames;
using System.Drawing;
using System.Net;
/// <summary>
///
/// </summary>
internal class Program
{
private static double cmToPdfUnits(double cm) => cm / 2.54 * 72;
/// <summary>
///
/// </summary>
/// <param name="args"></param>
static void Main(string[] args)
{
Console.WriteLine("Hello,CSharp World! Geovin Du,geovindu, 涂聚文\n\t");
try
{
PdfDocumentBuilder builder = new PdfDocumentBuilder();
//string fontfile = Server.MapPath("fonts/MHeiHK-Light.TTF");
//byte[] robotoBytes = File.ReadAllBytes(fontfile);
// PdfDocumentBuilder.AddedFont MHeiHK = builder.AddTrueTypeFont(robotoBytes);
// 读取宋体字体文件到字节数组 中文必须是中文字体,相应文字语言,用相关的字体 simsunb.ttf
byte[] simSunFontBytes;
using (FileStream fontFileStream = File.OpenRead("C:\\Windows\\Fonts\\STSONG.TTF"))
{
simSunFontBytes = new byte[fontFileStream.Length];
fontFileStream.Read(simSunFontBytes, 0, simSunFontBytes.Length);
}
string baseurl = Environment.CurrentDirectory.ToString() + "\\";
// 添加支持中文的字体
PdfDocumentBuilder.AddedFont font = builder.AddTrueTypeFont(simSunFontBytes);
PdfDocumentBuilder.AddedFont helvetica = builder.AddStandard14Font(Standard14Font.Helvetica);
PdfDocumentBuilder.AddedFont helveticaBold = builder.AddStandard14Font(Standard14Font.HelveticaBold);
// PdfDocumentBuilder.AddedFont song = builder.AddStandard14Font(Standard14Font.simsunb);
//第1页
PdfPageBuilder page = builder.AddPage(PageSize.A4);
PdfPoint closeToTop = new PdfPoint(15, page.PageSize.Top - 25);
page.AddText("My first PDF document!言语成了邀功尽责的功臣,还需要行为每日值班吗?", 12, closeToTop, font);
page.AddText("Hello CSharp World!,Geovin Du!涂聚文,geovindu", 10, closeToTop.Translate(0, -15), font);
var imgstream = new FileStream(baseurl+ @"images\logo.jpg", FileMode.Open);
var imgX = cmToPdfUnits(2.5);
var imgY = cmToPdfUnits(14);
var imgWidth = cmToPdfUnits(16);
var imgHeight = cmToPdfUnits(12);
page.AddJpeg(imgstream, new PdfRectangle(imgX, imgY, imgX + imgWidth, imgY + imgHeight)); //.jpg
//第二页
page = builder.AddPage(PageSize.A4);
page.AddText("geovindu!", 12, new PdfPoint(15, 815), font); //中文用中文系统字体 845
page.AddText("励学篇", 12, new PdfPoint(15, 800), font);
page.AddText("宋 赵恒", 12, new PdfPoint(15, 785), font);
page.AddText("富家不用买良田,书中自有千钟粟。", 12, new PdfPoint(15, 770), font);
page.AddText("安居不用架高堂,书中自有黄金屋。", 12, new PdfPoint(15, 755), font);
page.AddText("出门莫恨无人随,书中车马多如簇。", 12, new PdfPoint(15, 740), font);
page.AddText("娶妻莫恨无良媒,书中自有颜如玉。", 12, new PdfPoint(15, 725), font);
page.AddText("男儿欲遂平生志,五经勤向窗前读。", 12, new PdfPoint(15, 710), font);
page.AddText("", 12, new PdfPoint(15, 695), font);
page.AddText("", 12, new PdfPoint(15, 780), font);
page.AddText("", 12, new PdfPoint(15, 765), font);
//第3页
page = builder.AddPage(PageSize.A4);
//写入
page.AddText("你好,这是一个PDF文档。涂聚文欢迎你!", 12, new PdfPoint(25, 520), font);
//byte[] b = builder.Build();
string fiel = "geovindu" + DateTime.Now.ToString("yyyyMMHHmmss") + ".pdf";
File.WriteAllBytes(fiel, builder.Build());
Console.WriteLine("文档生成ok\n\t");
//从PDF文件中读取文字内容
string fileout = "1.pdf";
using (PdfDocument document = PdfDocument.Open(fileout))
{
foreach (UglyToad.PdfPig.Content.Page pagedu in document.GetPages())
{
IEnumerable<Word> words = pagedu.GetWords();
foreach (Word word in words)
{
Console.WriteLine(word.Text);
}
}
}
Console.WriteLine("\n\t从PDF文件中读取文字内容ok");
//預覽文件
var process = new Process
{
StartInfo = new ProcessStartInfo(fiel)
{
UseShellExecute = true
}
};
process.Start();
process.WaitForExit();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message.ToString());
}
}
}
}
https://github.com/BobLd/PdfPig/tree/table-extractor-2
https://github.com/BobLd/PdfPig/tree/table-extractor
浙公网安备 33010602011771号