获取EMF文件内全部文字, 并按照左上到右下的顺序排序
因为工作要求, 需要对EMF文件文字内容做分析.....SO, 如下代码出现了
懒得加注释了, 反正对外接口属性就那么几个, 根据英文猜吧, 很容易的
说明一下:
这个东西结果会对所有文字内容按照左上到右下的顺序排序(EMF内数据顺序是根据画图顺序来的, 所以不一定是什么顺序, 但是数据分析就要得到行列关系)
但是图片没有行列概念, 所以我简单借鉴了一下纯粹横排版模式, 认为2个文字元素, 只要显示范围的中线在对方范围内, 就会被认为是同一行
2015-10-19:
1.修改了几个排序时的BUG, 增加了一个对显示区域的处理, 最大方式减少对排版的影响
2.修改了获取SmallTextOut的处理方式
{ EMF文件分析单元 读取EMF内文字元素并排版 最后修改时间 2015-10-19 by: 刘志林 E-Mail: lzl_17948876@hotmail.com } unit Comm.EMFInfo; interface uses System.Types, System.Generics.Collections, Vcl.Graphics; type TEMFStrInfo = record DisplayRect: TRect; {显示区域} Text: string; {显示内容} LineKey: string; {行标记} end; PEMFStrInfo = ^TEMFStrInfo; TEMFStrInfoList = Class private FList: TList<PEMFStrInfo>; FDic: TDictionary<string, UInt32>; FMaxHeight: Integer; FJSONStrs: string; {定位查找失败时,使用文本进行泛查找} function GetItem(Index: UInt32): TEMFStrInfo; function GetCount: UInt32; function GetJSONStrs: string; public constructor Create; destructor Destroy; override; procedure Append(AEMF: TMetafile; var AHeight: Integer); procedure Clear; property Count: UInt32 read GetCount; property Items[Index: UInt32]: TEMFStrInfo read GetItem; function TryGetInfo(AInfoName: string; var AInfo: TEMFStrInfo; var AIndex: UInt32): Boolean; function StrAnalyze(ALeavePattern: array of string; var AResult: string): Boolean; property JSONStr: string read GetJSONStrs; property MaxHeight: Integer read FMaxHeight; end; implementation uses System.SysUtils, System.Classes, System.Generics.Defaults, System.RegularExpressions, Winapi.Windows, Vcl.Printers, QJSON; const // if set use ANSI version else UNICODE SMALLTEXT_TYPE_ANSI = $200; // if set use EMR_SMALLTEXTOUT else use EMR_SMALLTEXTOUTCLIP SMALLTEXT_TYPE_WITHOUT_CLIP = $100; // Structures type EMR_SMALLTEXTOUT_HEAD = RECORD emr: emr; ptlReference: TPoint; nChars: DWORD; fuOptions: DWORD; // this record type // == SMALLTEXT_TYPE_WITHOUT_CLIP // == SMALLTEXT_TYPE_ANSI // also holds fuOptions like in the ExtTextOut function iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode exScale: Single; { X and Y scales from Page units to .01mm units } eyScale: Single; { if graphics mode is GM_COMPATIBLE. } END; PEMRSmallTextOutHead = ^EMR_SMALLTEXTOUT_HEAD; EMR_SMALLTEXTOUTCLIPA = RECORD emr: emr; ptlReference: TPoint; // might be in negative numbers, so take abs nChars: DWORD; fuOptions: DWORD; // this record type // != SMALLTEXT_TYPE_WITHOUT_CLIP // == SMALLTEXT_TYPE_ANSI // also holds fuOptions like in the ExtTextOut function iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode exScale: Single; { X and Y scales from Page units to .01mm units } eyScale: Single; { if graphics mode is GM_COMPATIBLE. } rclClip: TRect; cString: Array [0 .. 0] of AnsiChar; { This is followed by the string array } END; PEMRSmallTextOutClipA = ^EMR_SMALLTEXTOUTCLIPA; EMR_SMALLTEXTOUTCLIPW = RECORD emr: emr; ptlReference: TPoint; nChars: DWORD; fuOptions: DWORD; // this record type // != SMALLTEXT_TYPE_WITHOUT_CLIP // != SMALLTEXT_TYPE_ANSI // also holds fuOptions like in the ExtTextOut function iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode exScale: Single; { X and Y scales from Page units to .01mm units } eyScale: Single; { if graphics mode is GM_COMPATIBLE. } rclClip: TRect; cString: Array [0 .. 0] of WideChar; { This is followed by the string array } END; PEMRSmallTextOutClipW = ^EMR_SMALLTEXTOUTCLIPW; EMR_SMALLTEXTOUTA = RECORD emr: emr; ptlReference: TPoint; nChars: DWORD; fuOptions: DWORD; // this record type // == SMALLTEXT_TYPE_WITHOUT_CLIP // == SMALLTEXT_TYPE_ANSI // also holds fuOptions like in the ExtTextOut function iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode exScale: Single; { X and Y scales from Page units to .01mm units } eyScale: Single; { if graphics mode is GM_COMPATIBLE. } cString: Array [0 .. 0] of AnsiChar; { This is followed by the string array } END; PEMRSmallTextOutA = ^EMR_SMALLTEXTOUTA; EMR_SMALLTEXTOUTW = RECORD emr: emr; ptlReference: TPoint; nChars: DWORD; fuOptions: DWORD; // this record type // == SMALLTEXT_TYPE_WITHOUT_CLIP // != SMALLTEXT_TYPE_ANSI // also holds fuOptions like in the ExtTextOut function iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode exScale: Single; { X and Y scales from Page units to .01mm units } eyScale: Single; { if graphics mode is GM_COMPATIBLE. } cString: Array [0 .. 0] of WideChar; { This is followed by the string array } END; PEMRSmallTextOutW = ^EMR_SMALLTEXTOUTW; var FReferenceDC: VCL.Graphics.TBitmap; function EnumTextProc(DC: HDC; lpHTable: PHANDLETABLE; EMFR: PENHMETARECORD; nObj, lpData: Integer): Integer; stdcall; function _IsEffeetiveRect(const ARect: TRect): Boolean; begin Result := (not ARect.IsEmpty) and (ARect.Right > 0) and (ARect.Left > 0) and (ARect.Bottom - ARect.Top > 4) and (ARect.Right - ARect.Left > 4); end; procedure _ShrinkRect(var ARect: TRect; ASize: TSize); var v: Integer; begin v := ARect.Left + ASize.cx; if ARect.Right > v then ARect.Right := v; v := ARect.Top + ASize.cy; if ARect.Bottom > v then ARect.Bottom := v; end; var nSize: TSize; nStrA: PAnsiChar; nStrW: PWideChar; nEMRTO: PEMRExtTextOut; nEMRSTOHead: PEMRSmallTextOutHead; nEMRSTO_A: PEMRSmallTextOutA; nEMRSTO_AC: PEMRSmallTextOutClipA; nEMRSTO_W: PEMRSmallTextOutW; nEMRSTO_WC: PEMRSmallTextOutClipW; nOTR: PEMFStrInfo; nEMFElementList: TList<PEMFStrInfo>; begin nEMFElementList := Pointer(lpData); nSize.cX := 0; nSize.cY := 0; if (EMFR.iType = EMR_EXTTEXTOUTA) then begin nEMRTO := PEMRExtTextOut(EMFR); nStrA := AnsiStrAlloc(nEMRTO.EMRText.nChars + 1); try FillChar(nStrA^, nEMRTO.EMRText.nChars + 1, 0); Move(pointer(2 + Cardinal(@nEMRTO.EMRText) + nEMRTO.EMRText.offString)^, nStrA^, nEMRTO.EMRText.nChars); New(nOTR); with nOTR^ do begin Text := Trim(nStrA); DisplayRect := nEMRTO.rclBounds; LineKey := ''; end; finally StrDispose(nStrA); end; Winapi.Windows.GetTextExtentPoint32(FReferenceDC.Canvas.Handle, nOTR^.Text, Length(nOTR^.Text), nSize); nOTR^.DisplayRect.NormalizeRect; _ShrinkRect(nOTR^.DisplayRect, nSize); if (nOTR^.Text <> '') and _IsEffeetiveRect(nOTR^.DisplayRect) then nEMFElementList.Add(nOTR) else Dispose(nOTR); end else if (EMFR.iType = EMR_EXTTEXTOUTW) then begin nEMRTO := PEMRExtTextOut(EMFR); nStrW := WideStrAlloc(nEMRTO.EMRText.nChars + 1); try FillChar(nStrW^, (nEMRTO.EMRText.nChars + 1) * 2, 0); Move(pointer(2 + Cardinal(@nEMRTO.EMRText) + nEMRTO.EMRText.offString div 2)^, nStrW^, nEMRTO.EMRText.nChars * 2); New(nOTR); with nOTR^ do begin Text := Trim(nStrW); DisplayRect := nEMRTO.rclBounds; LineKey := ''; end; finally StrDispose(nStrW); end; Winapi.Windows.GetTextExtentPoint32(FReferenceDC.Canvas.Handle, nOTR^.Text, Length(nOTR^.Text), nSize); nOTR^.DisplayRect.NormalizeRect; _ShrinkRect(nOTR^.DisplayRect, nSize); if (nOTR^.Text <> '') and _IsEffeetiveRect(nOTR^.DisplayRect) then nEMFElementList.Add(nOTR) else Dispose(nOTR); end else if EMFR.iType = EMR_SMALLTEXTOUT then begin nEMRSTOHead := PEMRSmallTextOutHead(EMFR); New(nOTR); if nEMRSTOHead.fuOptions and SMALLTEXT_TYPE_ANSI = SMALLTEXT_TYPE_ANSI then begin if nEMRSTOHead.fuOptions and SMALLTEXT_TYPE_WITHOUT_CLIP = SMALLTEXT_TYPE_WITHOUT_CLIP then begin nEMRSTO_A := Pointer(nEMRSTOHead); nStrA := AnsiStrAlloc(nEMRSTO_A^.nChars + 1); try FillChar(nStrA^, nEMRSTO_A^.nChars + 1, 0); Move(nEMRSTO_A^.cString[0], nStrA^, nEMRSTO_A^.nChars); with nOTR^ do begin Text := Trim(nStrA); DisplayRect := Rect(nEMRSTO_A^.ptlReference.X, nEMRSTO_A^.ptlReference.Y, MAXWORD, MAXWORD); LineKey := ''; end; finally StrDispose(nStrA); end; end else begin nEMRSTO_AC := Pointer(nEMRSTOHead); nStrA := AnsiStrAlloc(nEMRSTO_AC^.nChars + 1); try FillChar(nStrA^, nEMRSTO_AC^.nChars + 1, 0); Move(nEMRSTO_AC^.cString[0], nStrA^, nEMRSTO_AC^.nChars); with nOTR^ do begin Text := Trim(nStrA); DisplayRect := nEMRSTO_AC^.rclClip; DisplayRect.TopLeft := nEMRSTO_AC^.ptlReference; LineKey := ''; end; finally StrDispose(nStrA); end; end; end else begin if nEMRSTOHead.fuOptions and SMALLTEXT_TYPE_WITHOUT_CLIP = SMALLTEXT_TYPE_WITHOUT_CLIP then begin nEMRSTO_W := Pointer(nEMRSTOHead); nStrW := WideStrAlloc(nEMRSTO_W^.nChars + 1); try FillChar(nStrW^, (nEMRSTO_W^.nChars + 1) * 2, 0); Move(nEMRSTO_W^.cString[0], nStrW^, nEMRSTO_W^.nChars * 2); with nOTR^ do begin Text := Trim(nStrW); DisplayRect := Rect(nEMRSTO_W^.ptlReference.X, nEMRSTO_W^.ptlReference.Y, MAXWORD, MAXWORD); LineKey := ''; end; finally StrDispose(nStrA); end; end else begin nEMRSTO_WC := Pointer(nEMRSTOHead); nStrW := WideStrAlloc(nEMRSTO_WC^.nChars + 1); try FillChar(nStrW^, (nEMRSTO_WC^.nChars + 1) * 2, 0); Move(nEMRSTO_WC^.cString[0], nStrW^, nEMRSTO_WC^.nChars * 2); with nOTR^ do begin Text := Trim(nStrW); DisplayRect := nEMRSTO_AC^.rclClip; DisplayRect.TopLeft := nEMRSTO_AC^.ptlReference; LineKey := ''; end; finally StrDispose(nStrA); end; end; end; Winapi.Windows.GetTextExtentPoint32(FReferenceDC.Canvas.Handle, nOTR^.Text, Length(nOTR^.Text), nSize); nOTR^.DisplayRect.NormalizeRect; _ShrinkRect(nOTR^.DisplayRect, nSize); if (nOTR^.Text <> '') and _IsEffeetiveRect(nOTR^.DisplayRect) then nEMFElementList.Add(nOTR) else Dispose(nOTR); end; Result := 1; end; type TEMFStrInfoCompare = class(TComparer<PEMFStrInfo>) public function Compare(const Left, Right: PEMFStrInfo): Integer; override; end; { TEMFStrInfoCompare } function TEMFStrInfoCompare.Compare(const Left, Right: PEMFStrInfo): Integer; var nCPLeft, nCPRight: TPoint; nLIR, nRIL: Int8; nLineKey: string; begin nCPLeft := Left^.DisplayRect.CenterPoint; nCPRight := Right^.DisplayRect.CenterPoint; if nCPLeft.Y <= Right^.DisplayRect.Top then nLIR := -1 else if nCPLeft.Y >= Right^.DisplayRect.Bottom then nLIR := 1 else nLIR := 0; if nCPRight.Y <= Left^.DisplayRect.Top then nRIL := -1 else if nCPRight.Y >= Left^.DisplayRect.Bottom then nRIL := 1 else nRIL := 0; if (nLIR = 0) or (nRIL = 0) then begin if Left^.LineKey <> '' then Right^.LineKey := Left^.LineKey else if Right^.LineKey <> '' then Left^.LineKey := Right^.LineKey else begin Left^.LineKey := TGUID.NewGuid.ToString; Right^.LineKey := Left^.LineKey; end; {有任意left或right在另一方区域内的, 认为在同一行, 通过x位置判断排序} if nCPLeft.X < nCPRight.X then {根据左侧判断位置} Result := -1 else if nCPLeft.X > nCPRight.X then Result := 1 else if nCPLeft.Y < nCPRight.Y then Result := -1 else if nCPLeft.Y > nCPRight.Y then Result := 1 else Result := 0; end else begin Result := nLIR; end; end; { TEMFStrInfoList } procedure TEMFStrInfoList.Append(AEMF: TMetafile; var AHeight: Integer); var nList: TList<PEMFStrInfo>; nInfoExists: Boolean; nCheckPoint: TPoint; i: Integer; nCompare: TEMFStrInfoCompare; nPI: PEMFStrInfo; nTmpLineKey, nTmpJSONStr: string; nJ, nJLine: TQJson; begin nList := TList<PEMFStrInfo>.Create; try {读取文件元素存入列表} EnumEnhMetafile(0, AEMF.Handle, @EnumTextProc, Pointer(nList), Rect(0, 0, 0, 0)); nCompare := TEMFStrInfoCompare.Create; try {排序} try nList.Sort(nCompare); finally nCompare.Free; end; except end; {计算最大高度, 元素名称存入字典} AHeight := 0; nJ := TQJson.Create; try // nJ.TryParse(FJSONStrs); nJ.DataType := jdtArray; nJLine := nil; nTmpLineKey := ''; for i := 0 to nList.Count - 1 do begin nPI := nList[i]; if nPI^.LineKey = '' then nPI^.LineKey := TGUID.NewGuid.ToString; {没有相同行标记的给一个标记} {需要换行} if (nTmpLineKey = '') or (not SameText(nTmpLineKey, nPI^.LineKey)) then nJLine := nil; {当前行标记} nTmpLineKey := nPI^.LineKey; if nPI^.DisplayRect.Bottom > AHeight then AHeight := nPI^.DisplayRect.Bottom; OffsetRect(nPI^.DisplayRect, 0, FMaxHeight); FDic.AddOrSetValue(nPI^.Text, FList.Add(nPI)); if (nJLine = nil) then nJLine := nJ.AddArray(''); nJLine.Add.AsString := nPI^.Text; end; nTmpJSONStr := nJ.Encode(False); nTmpJSONStr := Copy(nTmpJSONStr, 2, Length(nTmpJSONStr) - 2); if FJSONStrs = '' then FJSONStrs := nTmpJSONStr else FJSONStrs := FJSONStrs + ',' + nTmpJSONStr; finally nJ.Free; end; FMaxHeight := FMaxHeight + AHeight; finally nList.Free; end; end; procedure TEMFStrInfoList.Clear; var i: Integer; begin FMaxHeight := 0; FJsonStrs := ''; for i := 0 to FList.Count - 1 do Dispose(FList[i]); FList.Clear; FDic.Clear; end; constructor TEMFStrInfoList.Create; begin FList := TList<PEMFStrInfo>.Create; FDic := TDictionary<string, UInt32>.Create; FMaxHeight := 0; FJsonStrs := ''; end; destructor TEMFStrInfoList.Destroy; var i: Integer; begin for i := 0 to FList.Count - 1 do Dispose(FList[i]); FList.Free; FDic.Free; inherited; end; function TEMFStrInfoList.GetCount: UInt32; begin Result := FList.Count; end; function TEMFStrInfoList.GetItem(Index: UInt32): TEMFStrInfo; begin Result := FList[Index]^; end; function TEMFStrInfoList.GetJSONStrs: string; begin Result := '[' + FJSONStrs + ']'; end; function TEMFStrInfoList.StrAnalyze(ALeavePattern: array of string; var AResult: string): Boolean; function _RegExAnalyze(AData, APattern: string): string; var nMatches: TMatchCollection; begin nMatches := TRegEx.Matches(AData, APattern, [roMultiLine]); if nMatches.Count > 0 then Result := nMatches.Item[0].Value; end; var i: Integer; nTmpData: string; begin AResult := ''; try nTmpData := FJSONStrs; for i := Low(ALeavePattern) to High(ALeavePattern) do begin nTmpData := _RegExAnalyze(nTmpData, ALeavePattern[i]); if nTmpData = '' then Break; end; AResult := nTmpData; except on E: Exception do raise Exception.CreateFmt('正则分析失败[%s]', [E.Message]); end; Result := AResult <> ''; end; function TEMFStrInfoList.TryGetInfo(AInfoName: string; var AInfo: TEMFStrInfo; var AIndex: UInt32): Boolean; begin Result := FDic.TryGetValue(AInfoName, AIndex); if Result then AInfo := FList[AIndex]^; end; initialization FReferenceDC := VCL.Graphics.TBitmap.Create; with FReferenceDC do begin PixelFormat := pf24bit; Width := 2048; Height := 2048; end; finalization FreeAndNil(FReferenceDC); end.
--------------------------------------------------------------------------------------------------
作者:黑暗煎饼果子
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利.
--------------------------------------------------------------------------------------------------