获取EMF文件内全部文字, 并按照左上到右下的顺序排序

因为工作要求, 需要对EMF文件文字内容做分析.....SO, 如下代码出现了

懒得加注释了, 反正对外接口属性就那么几个, 根据英文猜吧, 很容易的

 

说明一下:

  这个东西结果会对所有文字内容按照左上到右下的顺序排序(EMF内数据顺序是根据画图顺序来的, 所以不一定是什么顺序, 但是数据分析就要得到行列关系)

但是图片没有行列概念, 所以我简单借鉴了一下纯粹横排版模式, 认为2个文字元素, 只要显示范围的中线在对方范围内, 就会被认为是同一行

 

2015-10-19:

  1.修改了几个排序时的BUG, 增加了一个对显示区域的处理, 最大方式减少对排版的影响

  2.修改了获取SmallTextOut的处理方式

 

{
EMF文件分析单元
读取EMF内文字元素并排版

最后修改时间 2015-10-19

by: 刘志林
E-Mail: lzl_17948876@hotmail.com
}

unit Comm.EMFInfo;

interface

uses
  System.Types, System.Generics.Collections,
  Vcl.Graphics;

type
  TEMFStrInfo = record
    DisplayRect: TRect; {显示区域}
    Text: string; {显示内容}
    LineKey: string; {行标记}
  end;
  PEMFStrInfo = ^TEMFStrInfo;

  TEMFStrInfoList = Class
  private
    FList: TList<PEMFStrInfo>;
    FDic: TDictionary<string, UInt32>;
    FMaxHeight: Integer;
    FJSONStrs: string; {定位查找失败时,使用文本进行泛查找}

    function GetItem(Index: UInt32): TEMFStrInfo;
    function GetCount: UInt32;
    function GetJSONStrs: string;
  public
    constructor Create;
    destructor Destroy; override;

    procedure Append(AEMF: TMetafile; var AHeight: Integer);
    procedure Clear;
    property Count: UInt32 read GetCount;
    property Items[Index: UInt32]: TEMFStrInfo read GetItem;
    function TryGetInfo(AInfoName: string; var AInfo: TEMFStrInfo; var AIndex: UInt32): Boolean;
    function StrAnalyze(ALeavePattern: array of string; var AResult: string): Boolean;
    property JSONStr: string read GetJSONStrs;
    property MaxHeight: Integer read FMaxHeight;
  end;

implementation

uses
  System.SysUtils, System.Classes, System.Generics.Defaults,
  System.RegularExpressions,
  Winapi.Windows,
  Vcl.Printers,
  QJSON;

const
  // if set use ANSI version else UNICODE
  SMALLTEXT_TYPE_ANSI = $200;
  // if set use EMR_SMALLTEXTOUT else use EMR_SMALLTEXTOUTCLIP
  SMALLTEXT_TYPE_WITHOUT_CLIP = $100;

// Structures
type
  EMR_SMALLTEXTOUT_HEAD = RECORD
    emr: emr;
    ptlReference: TPoint;
    nChars: DWORD;
    fuOptions: DWORD; // this record type
    // == SMALLTEXT_TYPE_WITHOUT_CLIP
    // == SMALLTEXT_TYPE_ANSI
    // also holds fuOptions like in the ExtTextOut function
    iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
    exScale: Single; { X and Y scales from Page units to .01mm units }
    eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
  END;

  PEMRSmallTextOutHead = ^EMR_SMALLTEXTOUT_HEAD;

  EMR_SMALLTEXTOUTCLIPA = RECORD
    emr: emr;
    ptlReference: TPoint; // might be in negative numbers, so take abs
    nChars: DWORD;
    fuOptions: DWORD; // this record type
    // != SMALLTEXT_TYPE_WITHOUT_CLIP
    // == SMALLTEXT_TYPE_ANSI
    // also holds fuOptions like in the ExtTextOut function
    iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
    exScale: Single; { X and Y scales from Page units to .01mm units }
    eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
    rclClip: TRect;
    cString: Array [0 .. 0] of AnsiChar;
    { This is followed by the string array }
  END;

  PEMRSmallTextOutClipA = ^EMR_SMALLTEXTOUTCLIPA;

  EMR_SMALLTEXTOUTCLIPW = RECORD
    emr: emr;
    ptlReference: TPoint;
    nChars: DWORD;
    fuOptions: DWORD; // this record type
    // != SMALLTEXT_TYPE_WITHOUT_CLIP
    // != SMALLTEXT_TYPE_ANSI
    // also holds fuOptions like in the ExtTextOut function
    iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
    exScale: Single; { X and Y scales from Page units to .01mm units }
    eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
    rclClip: TRect;
    cString: Array [0 .. 0] of WideChar;
    { This is followed by the string array }
  END;

  PEMRSmallTextOutClipW = ^EMR_SMALLTEXTOUTCLIPW;

  EMR_SMALLTEXTOUTA = RECORD
    emr: emr;
    ptlReference: TPoint;
    nChars: DWORD;
    fuOptions: DWORD; // this record type
    // == SMALLTEXT_TYPE_WITHOUT_CLIP
    // == SMALLTEXT_TYPE_ANSI
    // also holds fuOptions like in the ExtTextOut function
    iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
    exScale: Single; { X and Y scales from Page units to .01mm units }
    eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
    cString: Array [0 .. 0] of AnsiChar;
    { This is followed by the string array }
  END;

  PEMRSmallTextOutA = ^EMR_SMALLTEXTOUTA;

  EMR_SMALLTEXTOUTW = RECORD
    emr: emr;
    ptlReference: TPoint;
    nChars: DWORD;
    fuOptions: DWORD; // this record type
    // == SMALLTEXT_TYPE_WITHOUT_CLIP
    // != SMALLTEXT_TYPE_ANSI
    // also holds fuOptions like in the ExtTextOut function
    iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
    exScale: Single; { X and Y scales from Page units to .01mm units }
    eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
    cString: Array [0 .. 0] of WideChar;
    { This is followed by the string array }
  END;

  PEMRSmallTextOutW = ^EMR_SMALLTEXTOUTW;

var
  FReferenceDC: VCL.Graphics.TBitmap;

function EnumTextProc(DC: HDC; lpHTable: PHANDLETABLE; EMFR: PENHMETARECORD;
  nObj, lpData: Integer): Integer; stdcall;

  function _IsEffeetiveRect(const ARect: TRect): Boolean;
  begin
    Result := (not ARect.IsEmpty) and (ARect.Right > 0) and (ARect.Left > 0)
      and (ARect.Bottom - ARect.Top > 4) and (ARect.Right - ARect.Left > 4);
  end;

  procedure _ShrinkRect(var ARect: TRect; ASize: TSize);
  var
    v: Integer;
  begin
    v := ARect.Left + ASize.cx;
    if ARect.Right > v then
      ARect.Right := v;
    v := ARect.Top + ASize.cy;
    if ARect.Bottom > v then
      ARect.Bottom := v;
  end;

var
  nSize: TSize;
  nStrA: PAnsiChar;
  nStrW: PWideChar;
  nEMRTO: PEMRExtTextOut;

  nEMRSTOHead: PEMRSmallTextOutHead;
  nEMRSTO_A: PEMRSmallTextOutA;
  nEMRSTO_AC: PEMRSmallTextOutClipA;
  nEMRSTO_W: PEMRSmallTextOutW;
  nEMRSTO_WC: PEMRSmallTextOutClipW;

  nOTR: PEMFStrInfo;
  nEMFElementList: TList<PEMFStrInfo>;
begin
  nEMFElementList := Pointer(lpData);
  nSize.cX := 0;
  nSize.cY := 0;

  if (EMFR.iType = EMR_EXTTEXTOUTA) then
  begin
    nEMRTO := PEMRExtTextOut(EMFR);
    nStrA := AnsiStrAlloc(nEMRTO.EMRText.nChars + 1);
    try
      FillChar(nStrA^, nEMRTO.EMRText.nChars + 1, 0);
      Move(pointer(2 + Cardinal(@nEMRTO.EMRText) + nEMRTO.EMRText.offString)^,
        nStrA^, nEMRTO.EMRText.nChars);

      New(nOTR);
      with nOTR^ do
      begin
        Text := Trim(nStrA);
        DisplayRect := nEMRTO.rclBounds;
        LineKey := '';
      end;

    finally
      StrDispose(nStrA);
    end;

    Winapi.Windows.GetTextExtentPoint32(FReferenceDC.Canvas.Handle,
      nOTR^.Text, Length(nOTR^.Text), nSize);
    nOTR^.DisplayRect.NormalizeRect;
    _ShrinkRect(nOTR^.DisplayRect, nSize);

    if (nOTR^.Text <> '') and _IsEffeetiveRect(nOTR^.DisplayRect) then
      nEMFElementList.Add(nOTR)
    else
      Dispose(nOTR);
  end
  else if (EMFR.iType = EMR_EXTTEXTOUTW) then
  begin
    nEMRTO := PEMRExtTextOut(EMFR);
    nStrW := WideStrAlloc(nEMRTO.EMRText.nChars + 1);
    try
      FillChar(nStrW^, (nEMRTO.EMRText.nChars + 1) * 2, 0);
      Move(pointer(2 + Cardinal(@nEMRTO.EMRText) + nEMRTO.EMRText.offString div 2)^,
        nStrW^, nEMRTO.EMRText.nChars * 2);

      New(nOTR);
      with nOTR^ do
      begin
        Text := Trim(nStrW);
        DisplayRect := nEMRTO.rclBounds;
        LineKey := '';
      end;

    finally
      StrDispose(nStrW);
    end;

    Winapi.Windows.GetTextExtentPoint32(FReferenceDC.Canvas.Handle,
      nOTR^.Text, Length(nOTR^.Text), nSize);
    nOTR^.DisplayRect.NormalizeRect;
    _ShrinkRect(nOTR^.DisplayRect, nSize);

    if (nOTR^.Text <> '') and _IsEffeetiveRect(nOTR^.DisplayRect) then
      nEMFElementList.Add(nOTR)
    else
      Dispose(nOTR);
  end
  else if EMFR.iType = EMR_SMALLTEXTOUT then
  begin
    nEMRSTOHead := PEMRSmallTextOutHead(EMFR);
    New(nOTR);
    if nEMRSTOHead.fuOptions and SMALLTEXT_TYPE_ANSI = SMALLTEXT_TYPE_ANSI then
    begin
      if nEMRSTOHead.fuOptions and SMALLTEXT_TYPE_WITHOUT_CLIP = SMALLTEXT_TYPE_WITHOUT_CLIP then
      begin
        nEMRSTO_A := Pointer(nEMRSTOHead);
        nStrA := AnsiStrAlloc(nEMRSTO_A^.nChars + 1);
        try
          FillChar(nStrA^, nEMRSTO_A^.nChars + 1, 0);
          Move(nEMRSTO_A^.cString[0], nStrA^, nEMRSTO_A^.nChars);

          with nOTR^ do
          begin
            Text := Trim(nStrA);
            DisplayRect := Rect(nEMRSTO_A^.ptlReference.X, nEMRSTO_A^.ptlReference.Y,
              MAXWORD, MAXWORD);
            LineKey := '';
          end;
        finally
          StrDispose(nStrA);
        end;
      end
      else
      begin
        nEMRSTO_AC := Pointer(nEMRSTOHead);
        nStrA := AnsiStrAlloc(nEMRSTO_AC^.nChars + 1);
        try
          FillChar(nStrA^, nEMRSTO_AC^.nChars + 1, 0);
          Move(nEMRSTO_AC^.cString[0], nStrA^, nEMRSTO_AC^.nChars);

          with nOTR^ do
          begin
            Text := Trim(nStrA);
            DisplayRect := nEMRSTO_AC^.rclClip;
            DisplayRect.TopLeft := nEMRSTO_AC^.ptlReference;
            LineKey := '';
          end;
        finally
          StrDispose(nStrA);
        end;
      end;
    end
    else
    begin
      if nEMRSTOHead.fuOptions and SMALLTEXT_TYPE_WITHOUT_CLIP = SMALLTEXT_TYPE_WITHOUT_CLIP then
      begin
        nEMRSTO_W := Pointer(nEMRSTOHead);
        nStrW := WideStrAlloc(nEMRSTO_W^.nChars + 1);
        try
          FillChar(nStrW^, (nEMRSTO_W^.nChars + 1) * 2, 0);
          Move(nEMRSTO_W^.cString[0], nStrW^, nEMRSTO_W^.nChars * 2);

          with nOTR^ do
          begin
            Text := Trim(nStrW);
            DisplayRect := Rect(nEMRSTO_W^.ptlReference.X, nEMRSTO_W^.ptlReference.Y,
              MAXWORD, MAXWORD);
            LineKey := '';
          end;
        finally
          StrDispose(nStrA);
        end;
      end
      else
      begin
        nEMRSTO_WC := Pointer(nEMRSTOHead);
        nStrW := WideStrAlloc(nEMRSTO_WC^.nChars + 1);
        try
          FillChar(nStrW^, (nEMRSTO_WC^.nChars + 1) * 2, 0);
          Move(nEMRSTO_WC^.cString[0], nStrW^, nEMRSTO_WC^.nChars * 2);

          with nOTR^ do
          begin
            Text := Trim(nStrW);
            DisplayRect := nEMRSTO_AC^.rclClip;
            DisplayRect.TopLeft := nEMRSTO_AC^.ptlReference;
            LineKey := '';
          end;
        finally
          StrDispose(nStrA);
        end;
      end;
    end;

    Winapi.Windows.GetTextExtentPoint32(FReferenceDC.Canvas.Handle,
      nOTR^.Text, Length(nOTR^.Text), nSize);
    nOTR^.DisplayRect.NormalizeRect;
    _ShrinkRect(nOTR^.DisplayRect, nSize);

    if (nOTR^.Text <> '') and _IsEffeetiveRect(nOTR^.DisplayRect) then
      nEMFElementList.Add(nOTR)
    else
      Dispose(nOTR);
  end;

  Result := 1;
end;

type
  TEMFStrInfoCompare = class(TComparer<PEMFStrInfo>)
  public
    function Compare(const Left, Right: PEMFStrInfo): Integer; override;
  end;

{ TEMFStrInfoCompare }

function TEMFStrInfoCompare.Compare(const Left, Right: PEMFStrInfo): Integer;
var
  nCPLeft, nCPRight: TPoint;
  nLIR, nRIL: Int8;
  nLineKey: string;
begin
  nCPLeft := Left^.DisplayRect.CenterPoint;
  nCPRight := Right^.DisplayRect.CenterPoint;

  if nCPLeft.Y <= Right^.DisplayRect.Top then
    nLIR := -1
  else if nCPLeft.Y >= Right^.DisplayRect.Bottom then
    nLIR := 1
  else
    nLIR := 0;

  if nCPRight.Y <= Left^.DisplayRect.Top then
    nRIL := -1
  else if nCPRight.Y >= Left^.DisplayRect.Bottom then
    nRIL := 1
  else
    nRIL := 0;

  if (nLIR = 0) or (nRIL = 0) then
  begin
    if Left^.LineKey <> '' then
      Right^.LineKey := Left^.LineKey
    else if Right^.LineKey <> '' then
      Left^.LineKey := Right^.LineKey
    else
    begin
      Left^.LineKey := TGUID.NewGuid.ToString;
      Right^.LineKey := Left^.LineKey;
    end;

    {有任意left或right在另一方区域内的, 认为在同一行, 通过x位置判断排序}
    if nCPLeft.X < nCPRight.X then {根据左侧判断位置}
      Result := -1
    else if nCPLeft.X > nCPRight.X then
      Result := 1
    else if nCPLeft.Y < nCPRight.Y then
      Result := -1
    else if nCPLeft.Y > nCPRight.Y then
      Result := 1
    else
      Result := 0;
  end
  else
  begin
    Result := nLIR;
  end;
end;

{ TEMFStrInfoList }

procedure TEMFStrInfoList.Append(AEMF: TMetafile; var AHeight: Integer);
var
  nList: TList<PEMFStrInfo>;
  nInfoExists: Boolean;
  nCheckPoint: TPoint;
  i: Integer;
  nCompare: TEMFStrInfoCompare;
  nPI: PEMFStrInfo;
  nTmpLineKey, nTmpJSONStr: string;
  nJ, nJLine: TQJson;
begin
  nList := TList<PEMFStrInfo>.Create;
  try
    {读取文件元素存入列表}
    EnumEnhMetafile(0, AEMF.Handle, @EnumTextProc, Pointer(nList), Rect(0, 0, 0, 0));

    nCompare := TEMFStrInfoCompare.Create;
    try
      {排序}
      try
        nList.Sort(nCompare);
      finally
        nCompare.Free;
      end;
    except
    end;

    {计算最大高度, 元素名称存入字典}
    AHeight := 0;
    nJ := TQJson.Create;
    try
//      nJ.TryParse(FJSONStrs);
      nJ.DataType := jdtArray;
      nJLine := nil;
      nTmpLineKey := '';
      for i := 0 to nList.Count - 1 do
      begin
        nPI := nList[i];
        if nPI^.LineKey = '' then
          nPI^.LineKey := TGUID.NewGuid.ToString; {没有相同行标记的给一个标记}
        {需要换行}
        if (nTmpLineKey = '') or (not SameText(nTmpLineKey, nPI^.LineKey)) then
          nJLine := nil;
        {当前行标记}
        nTmpLineKey := nPI^.LineKey;

        if nPI^.DisplayRect.Bottom > AHeight then
          AHeight := nPI^.DisplayRect.Bottom;

        OffsetRect(nPI^.DisplayRect, 0, FMaxHeight);
        FDic.AddOrSetValue(nPI^.Text, FList.Add(nPI));

        if (nJLine = nil) then
          nJLine := nJ.AddArray('');

        nJLine.Add.AsString := nPI^.Text;
      end;
      nTmpJSONStr := nJ.Encode(False);
      nTmpJSONStr := Copy(nTmpJSONStr, 2, Length(nTmpJSONStr) - 2);
      if FJSONStrs = '' then
        FJSONStrs := nTmpJSONStr
      else
        FJSONStrs := FJSONStrs + ',' + nTmpJSONStr;
    finally
      nJ.Free;
    end;
    FMaxHeight := FMaxHeight + AHeight;
  finally
    nList.Free;
  end;
end;

procedure TEMFStrInfoList.Clear;
var
  i: Integer;
begin
  FMaxHeight := 0;
  FJsonStrs := '';
  for i := 0 to FList.Count - 1 do
    Dispose(FList[i]);
  FList.Clear;
  FDic.Clear;
end;

constructor TEMFStrInfoList.Create;
begin
  FList := TList<PEMFStrInfo>.Create;
  FDic := TDictionary<string, UInt32>.Create;
  FMaxHeight := 0;
  FJsonStrs := '';
end;

destructor TEMFStrInfoList.Destroy;
var
  i: Integer;
begin
  for i := 0 to FList.Count - 1 do
    Dispose(FList[i]);
  FList.Free;
  FDic.Free;
  inherited;
end;

function TEMFStrInfoList.GetCount: UInt32;
begin
  Result := FList.Count;
end;

function TEMFStrInfoList.GetItem(Index: UInt32): TEMFStrInfo;
begin
  Result := FList[Index]^;
end;

function TEMFStrInfoList.GetJSONStrs: string;
begin
  Result := '[' + FJSONStrs + ']';
end;

function TEMFStrInfoList.StrAnalyze(ALeavePattern: array of string; var AResult: string): Boolean;

  function _RegExAnalyze(AData, APattern: string): string;
  var
    nMatches: TMatchCollection;
  begin
    nMatches := TRegEx.Matches(AData, APattern, [roMultiLine]);
    if nMatches.Count > 0 then
      Result := nMatches.Item[0].Value;
  end;

var
  i: Integer;
  nTmpData: string;
begin
  AResult := '';
  try
    nTmpData := FJSONStrs;
    for i := Low(ALeavePattern) to High(ALeavePattern) do
    begin
      nTmpData := _RegExAnalyze(nTmpData, ALeavePattern[i]);
      if nTmpData = '' then
        Break;
    end;
    AResult := nTmpData;
  except
    on E: Exception do
      raise Exception.CreateFmt('正则分析失败[%s]', [E.Message]);
  end;
  Result := AResult <> '';
end;

function TEMFStrInfoList.TryGetInfo(AInfoName: string; var AInfo: TEMFStrInfo; var AIndex: UInt32): Boolean;
begin
  Result := FDic.TryGetValue(AInfoName, AIndex);
  if Result then
    AInfo := FList[AIndex]^;
end;

initialization
  FReferenceDC := VCL.Graphics.TBitmap.Create;
  with FReferenceDC do
  begin
    PixelFormat := pf24bit;
    Width := 2048;
    Height := 2048;
  end;

finalization
  FreeAndNil(FReferenceDC);

end.

 

posted on 2015-07-28 17:11  黑暗煎饼果子  阅读(2407)  评论(0编辑  收藏  举报