delphi 解析提取 doc/docx/ppt/pptx 文件的文本内容
2018 年写的,有许多不足之处,仅供参考。
office 文件格式说明,参见:https://learn.microsoft.com/en-us/openspecs/office_file_formats/MS-OFFFFLP/8aea05e3-8c1e-4a9a-9614-31f71e679456
点击查看代码
unit u_WordFile;
interface
uses
u_CompoundBinaryFile, u_DirectoryEntry, Classes, u_Helper, Windows, u_Pcd,
SysUtils, StrUtils;
const
//存储于FIB中的word文档标识
WORD_IDENT = $A5EC;
type
{Word文件 TODO 或许需要处理文档语言,与编码的情况}
TWordFile = class(TCompoundBinaryFile)
private
//Word格式的版本
FNFib: Word;
//是否使用1Table
FUsed1Table: Boolean;
//标识文档使用的语言
FLidFE: Word;
//文档中正文的总字数
FCcpText: Integer;
//文档中页脚的总字数
FCcpFtn: Integer;
//文档中页眉的总字数
FCcpHdd: Integer;
//文档中批注的总字数
FCcpAtn: Integer;
//文档中尾注的总字数
FCcpEdn: Integer;
//文档中文本框的总字数
FCcpTxbx: Integer;
//文档中页眉文本框的总字数
FCcpHdrTxbx: Integer;
//Clx在Table Stream中的偏移
FFcClx: DWORD;
//Clx的大小
FLcbClx: DWORD;
//该列表存储Pcd结构关联的文本的长度
FTextLenList: TList;
//Pcd结构列表
FPcdList: TList;
//文本内容
FTextContent: WideString;
FParagraphText: WideString;
FFootnoteText: WideString;
FHeaderText: WideString;
FCommentText: WideString;
FEndnoteText: WideString;
FTextboxText: WideString;
FHeaderTextboxText: WideString;
procedure ReadWordDocument();
procedure ReadFibBase(Stream: TStream);
procedure ReadFibRgW97(Stream: TStream);
procedure ReadFibRgLw97(Stream: TStream);
procedure ReadFibRgFcLcb(Stream: TStream);
procedure ReadTableStream();
procedure ReadText();
procedure SplitText();
procedure ClearPcdList();
protected
procedure ReadContent(); override;
public
destructor Destroy(); override;
function GetTextContent(): WideString; override;
function FindText(Text: WideString): Boolean; override;
end;
implementation
{ TWordFile }
destructor TWordFile.Destroy();
begin
if FTextLenList <> nil then
FreeAndNil(FTextLenList);
if FPcdList <> nil then
begin
ClearPcdList();
FreeAndNil(FPcdList);
end;
inherited;
end;
procedure TWordFile.ReadContent;
begin
inherited;
ReadWordDocument();
end;
procedure TWordFile.ReadWordDocument;
var
entry: TDirectoryEntry;
begin
entry := FRootDirEntry.FindChild('WordDocument');
ReadDirEntryData(entry);
entry.DataStream.Seek(0, soFromBeginning);
ReadFibBase(entry.DataStream);
ReadFibRgW97(entry.DataStream);
ReadFibRgLw97(entry.DataStream);
ReadFibRgFcLcb(entry.DataStream);
ReadTableStream();
ReadText();
FilterSpecialChar(FTextContent);
//SplitText();
end;
{读取FibBase结构的数据}
procedure TWordFile.ReadFibBase(Stream: TStream);
var
wordIdent: Word;
flags: Word;
encrypted: Boolean;
begin
Stream.ReadBuffer(wordIdent, 2);
if wordIdent <> WORD_IDENT then
begin
raise CompoundFileException.Create(FFileName + ' 不是doc文件.');
end;
Stream.ReadBuffer(FNFib, 2);
Stream.Seek(6, soFromCurrent);
Stream.ReadBuffer(flags, 2);
encrypted := IsFlagSet(flags, 9);
if encrypted then
raise CompoundFileException.Create(FFileName + ' 已被加密.');
FUsed1Table := IsFlagSet(flags, 10);
Stream.Seek(20, soFromCurrent);
end;
{读取FibRgW97结构的数据}
procedure TWordFile.ReadFibRgW97(Stream: TStream);
var
//FibRgW97所包含的Word字段的个数
csw: Word;
begin
Stream.ReadBuffer(csw, 2);
Stream.Seek(26, soFromCurrent);
Stream.ReadBuffer(FLidFE, 2);
if csw > 14 then
begin
Stream.Seek((csw - 14) * 2, soFromCurrent);
end;
end;
{读取FibRgLw97结构的数据}
procedure TWordFile.ReadFibRgLw97(Stream: TStream);
var
//FibRgLw97所包含的DWORD字段的个数
cslw: Word;
begin
Stream.ReadBuffer(cslw, 2);
Stream.Seek(12, soFromCurrent);
Stream.ReadBuffer(FCcpText, 4);
Stream.ReadBuffer(FCcpFtn, 4);
Stream.ReadBuffer(FCcpHdd, 4);
Stream.Seek(4, soFromCurrent);
Stream.ReadBuffer(FCcpAtn, 4);
Stream.ReadBuffer(FCcpEdn, 4);
Stream.ReadBuffer(FCcpTxbx, 4);
Stream.ReadBuffer(FCcpHdrTxbx, 4);
Stream.Seek(cslw * 4 - 44, soFromCurrent);
end;
{读取FibRgFcLcb结构的数据}
procedure TWordFile.ReadFibRgFcLcb(Stream: TStream);
var
//FibRgFcLcb所包含的无符号64位字段的个数
cbRgFcLcb: Word;
begin
Stream.ReadBuffer(cbRgFcLcb, 2);
Stream.Seek(66 * 4, soFromCurrent);
Stream.ReadBuffer(FFcClx, 4);
Stream.ReadBuffer(FLcbClx, 4);
end;
{读取Table中的数据}
procedure TWordFile.ReadTableStream();
var
entry: TDirectoryEntry;
stream: TStream;
//clx结构位于数据流的位置
clxOffset: Int64;
//clx结构在数据流的结束位置
clxEnd: Int64;
//标识RgPrc结构或Pcdt结构
clxt: Byte;
//Prc结构包含的PrcData结构下的GrpPrl结构的长度
cbGrpprl : SmallInt;
//PlcPcd结构体列表的大小
lcb: DWORD;
pcdCount: DWORD;
i: DWORD;
textStartPoint: DWORD;
textEndPoint: DWORD;
info: Word;
fc: DWORD;
prm: Word;
pcd: TPcd;
begin
if FUsed1Table then
entry := FRootDirEntry.FindChild('1Table')
else
entry := FRootDirEntry.FindChild('0Table');
ReadDirEntryData(entry);
stream := entry.DataStream;
clxOffset := FFcClx;
clxEnd := clxOffset + FLcbClx;
stream.Seek(clxOffset, soFromBeginning);
stream.ReadBuffer(clxt, 1);
cbGrpprl := 0;
//跳过Prc结构
while (clxt = 1) and (stream.Position < clxEnd) do
begin
stream.ReadBuffer(cbGrpprl , 2);
stream.Seek(cbGrpprl , soFromCurrent);
stream.ReadBuffer(clxt, 1);
end;
stream.ReadBuffer(lcb, 4);
pcdCount := (lcb - 4) div 12;
FTextLenList := TList.Create;
for i:=1 to pcdCount do
begin
stream.ReadBuffer(textStartPoint, 4);
stream.ReadBuffer(textEndPoint, 4);
FTextLenList.Add(Pointer(textEndPoint - textStartPoint));
stream.Seek(-4, soFromCurrent);
end;
stream.Seek(4, soFromCurrent);
FPcdList := TList.Create;
for i:=1 to pcdCount do
begin
stream.ReadBuffer(info, 2);
stream.ReadBuffer(fc, 4);
stream.ReadBuffer(prm, 2);
pcd := TPcd.Create(info, fc, prm);
FPcdList.Add(pcd);
end;
end;
procedure TWordFile.ReadText();
var
entry: TDirectoryEntry;
stream: TStream;
textLen: DWORD;
i, j: DWORD;
pcd: TPcd;
len: DWORD;
wideStr: WideString;
ansiStr: AnsiString;
begin
entry := FRootDirEntry.FindChild('WordDocument');
stream := entry.DataStream;
stream.Seek(0, soFromBeginning);
textLen := 0;
j := FPcdList.Count - 1;
for i:=0 to j do
begin
pcd := TPcd(FPcdList.Items[i]);
stream.Seek(pcd.TextLocation, soFromBeginning);
len := DWORD(FTextLenList.Items[i]);
if pcd.Unicode then
begin
SetLength(wideStr, len);
stream.ReadBuffer(wideStr[1], len * 2);
end
else
begin
SetLength(ansiStr, len);
stream.ReadBuffer(ansiStr[1], len);
wideStr := ansiStr;
end;
SetLength(FTextContent, textLen + len);
Move(wideStr[1], FTextContent[textLen + 1], len * 2);
Inc(textLen, len);
end;
end;
procedure TWordFile.SplitText();
var
paragraphEnd,
footnoteEnd,
headerEnd,
commentEnd,
endnoteEnd,
textboxEnd: Integer;
begin
paragraphEnd := FCcpText;
footnoteEnd := paragraphEnd + FCcpFtn;
headerEnd := footnoteEnd + FCcpHdd;
commentEnd := headerEnd + FCcpAtn;
endnoteEnd := commentEnd + FCcpEdn;
textboxEnd := endnoteEnd + FCcpTxbx;
FParagraphText := MidStr(FTextContent, 0, paragraphEnd);
FilterSpecialChar(FParagraphText);
FFootnoteText := MidStr(FTextContent, paragraphEnd, FCcpFtn);
FilterSpecialChar(FFootnoteText);
FHeaderText := MidStr(FTextContent, footnoteEnd, FCcpHdd);
FilterSpecialChar(FHeaderText);
FCommentText := MidStr(FTextContent, headerEnd, FCcpAtn);
FilterSpecialChar(FCommentText);
FEndnoteText := MidStr(FTextContent, commentEnd, FCcpEdn);
FilterSpecialChar(FEndnoteText);
FTextboxText := MidStr(FTextContent, endnoteEnd, FCcpTxbx);
FilterSpecialChar(FTextboxText);
FHeaderTextboxText := MidStr(FTextContent, textboxEnd, FCcpHdrTxbx);
FilterSpecialChar(FHeaderTextboxText);
end;
procedure TWordFile.ClearPcdList;
var
i, j: Integer;
begin
j := FPcdList.Count - 1;
for i:=0 to j do
begin
TObject(FPcdList.Items[i]).Free;
end;
FPcdList.Clear;
end;
function TWordFile.GetTextContent: WideString;
begin
Result := FTextContent;
end;
function TWordFile.FindText(Text: WideString): Boolean;
begin
Result := Pos(Text, FTextContent) > 0;
end;
end.
点击查看代码
unit u_WordOOXmlFile;
interface
uses
u_OfficeOpenXmlFile, NativeXml, SysUtils;
type
TWordOOXmlFile = class(TOfficeOpenXmlFile)
private
FTextContent: WideString;
procedure ReadText(Node: TXmlNode);
protected
procedure ReadContent(); override;
public
function GetTextContent(): WideString; override;
function FindText(Text: WideString): Boolean; override;
end;
implementation
{ TWordOOXmlFile }
function TWordOOXmlFile.GetTextContent: WideString;
begin
Result := FTextContent;
end;
procedure TWordOOXmlFile.ReadContent;
var
xml: TNativeXml;
begin
inherited;
xml := GetXml('document.xml');
try
if (xml <> nil) and (xml.Root <> nil) then
begin
ReadText(xml.Root);
end
else
begin
raise OOXmlException.Create(FFileName + ' 文件已损坏.');
end;
finally
if xml <> nil then
xml.Free;
end;
end;
procedure TWordOOXmlFile.ReadText(Node: TXmlNode);
var
i, j: Integer;
childNode: TXmlNode;
text: WideString;
oldLen: Integer;
space: string;
begin
j := Node.NodeCount - 1;
for i:=0 to j do
begin
childNode := Node.Nodes[i];
if SameText(childNode.Name, 'w:t') then
begin
text := childNode.ValueAsWidestring;
oldLen := Length(FTextContent);
SetLength(FTextContent, oldLen + Length(text));
Move(text[1], FTextContent[oldLen + 1], Length(text) * 2);
if childNode.HasAttribute('xml:space') then
begin
space := childNode.AttributeByName['xml:space'];
if space = 'preserve' then
begin
SetLength(FTextContent, Length(FTextContent) + 1);
FTextContent[Length(FTextContent)] := ' ';
end;
end;
end
else if SameText(childNode.Name, 'w:cr') or SameText(childNode.Name, 'w:br') then
begin
SetLength(FTextContent, Length(FTextContent) + 2);
FTextContent[Length(FTextContent) - 1] := #13;
FTextContent[Length(FTextContent)] := #10;
end
else if SameText(childNode.Name, 'w:tab') then
begin
SetLength(FTextContent, Length(FTextContent) + 1);
FTextContent[Length(FTextContent)] := #9;
end
else if SameText(childNode.Name, 'w:p') then
begin
ReadText(childNode);
SetLength(FTextContent, Length(FTextContent) + 2);
FTextContent[Length(FTextContent) - 1] := #13;
FTextContent[Length(FTextContent)] := #10;
end
else
begin
ReadText(childNode);
end;
end;
end;
function TWordOOXmlFile.FindText(Text: WideString): Boolean;
begin
Result := Pos(Text, FTextContent) > 0;
end;
end.
点击查看代码
unit u_PowerPointFile;
interface
uses
u_CompoundBinaryFile, Windows, Classes, u_DirectoryEntry, u_Helper;
const
//标识记录是一个容器记录
CONTAINER = $F;
type
//记录类型
TRecordType = (Unknown = 0, SlideListWithText = $0FF0, OfficeArtRecord = $F00D,
HeadersFooters = $0FD9, TextCharsAtom = $0FA0, TextBytesAtom = $0FA8,
CString = $0FBA);
{PPT 文件}
TPowerPointFile = class(TCompoundBinaryFile)
private
//文本内容
FTextContent: WideString;
procedure ReadRecords(Stream: TStream);
protected
procedure ReadContent(); override;
public
function GetTextContent(): WideString; override;
function FindText(Text: WideString): Boolean; override;
end;
implementation
{ TPowerPointFile }
function TPowerPointFile.GetTextContent: WideString;
begin
Result := FTextContent;
end;
procedure TPowerPointFile.ReadContent;
var
entry: TDirectoryEntry;
begin
inherited;
entry := FRootDirEntry.FindChild('PowerPoint Document');
ReadDirEntryData(entry);
entry.DataStream.Seek(0, soFromBeginning);
ReadRecords(entry.DataStream);
end;
{读取PowerPoint Document数据流中的记录}
procedure TPowerPointFile.ReadRecords(Stream: TStream);
procedure ReadRecord(ParentRecordType: Word);
var
value: Word;
//record版本
version: Word;
//该值的意义取决于recordType
//instance: Word;
//记录的类型
recordType: Word;
//记录的长度
recordLen: DWORD;
recordOffset: Int64;
wideStr: WideString;
ansiStr: AnsiString;
textLen: Integer;
begin
Stream.ReadBuffer(value, 2);
version := value and $F;
//instance := value and $FFF0;
Stream.ReadBuffer(recordType, 2);
Stream.ReadBuffer(recordLen, 4);
recordOffset := Stream.Position;
if version = CONTAINER then
begin
//该记录是一个容器,递归读取其下的记录
while Stream.Position < recordOffset + recordLen do
ReadRecord(recordType);
end
else if (ParentRecordType = Ord(OfficeArtRecord)) or
(ParentRecordType = Ord(SlideListWithText)) or
(ParentRecordType = Ord(HeadersFooters)) then
begin
{OfficeArtRecord/SlideListWithText/HeadersFooters记录的
TextCharsAtom/CString/TextBytesAtom子记录中记录的数据就是我们所需要的
ppt文本内容}
if (recordType = Ord(TextCharsAtom)) or (recordType = Ord(CString)) then
begin
SetLength(wideStr, recordLen div 2);
Stream.ReadBuffer(wideStr[1], recordLen);
ConverLF(wideStr);
//一个记录中存储的是一个幻灯片的文本,需要在其后添加换行符用以分隔
wideStr := wideStr + #13#10;
textLen := Length(FTextContent);
SetLength(FTextContent, textLen + Length(wideStr));
Move(wideStr[1], FTextContent[textLen + 1], Length(wideStr) * 2);
end
else if recordType = Ord(TextBytesAtom) then
begin
SetLength(ansiStr, recordLen);
Stream.ReadBuffer(ansiStr[1], recordLen);
wideStr := ansiStr;
ConverLF(wideStr);
//一个记录中存储的是一个幻灯片的文本,需要在其后添加换行符用以分隔
wideStr := wideStr + #13#10;
textLen := Length(FTextContent);
SetLength(FTextContent, textLen + Length(wideStr));
Move(wideStr[1], FTextContent[textLen + 1], Length(wideStr) * 2);
end
else
Stream.Seek(recordLen, soFromCurrent);
end
else
begin
Stream.Seek(recordLen, soFromCurrent);
end;
end;
begin
while Stream.Position < Stream.Size do
begin
ReadRecord(0);
end;
end;
function TPowerPointFile.FindText(Text: WideString): Boolean;
begin
Result := Pos(Text, FTextContent) > 0;
end;
end.
点击查看代码
unit u_PowerPointOOXmlFile;
interface
uses
u_OfficeOpenXmlFile, Classes, NativeXml, SysUtils;
type
TPowerPointOOXmlFile = class(TOfficeOpenXmlFile)
private
FTextContent: WideString;
procedure GetSlideFileIdx(var List: TList);
procedure ReadSlideFile(FileIdx: Integer);
procedure ReadText(Xml: TNativeXml); overload;
procedure ReadText(Node: TXmlNode); overload;
protected
procedure ReadContent(); override;
public
function GetTextContent(): WideString; override;
function FindText(Text: WideString): Boolean; override;
end;
{
a:p 为段落
a:t 为文本内容
a:br 为换行
a:tbl 为一个表格
a:tr 为一行表格
a:tc 为一个单元格
}
implementation
{ TPowerPointOOXmlFile }
function TPowerPointOOXmlFile.GetTextContent: WideString;
begin
Result := FTextContent;
end;
procedure TPowerPointOOXmlFile.ReadContent;
var
slideFileIdxList: TList;
i, j: Integer;
hasNextSlide: Boolean;
slideNo: Integer;
slideName: string;
slideFileIdx: Integer;
begin
inherited;
slideFileIdxList := TList.Create;
try
GetSlideFileIdx(slideFileIdxList);
{从第一个幻灯片文件开始,向后顺序读取文件,解析文本内容}
hasNextSlide := slideFileIdxList.Count > 0;
slideNo := 1;
while hasNextSlide do
begin
hasNextSlide := False;
slideName := 'slide' + IntToStr(slideNo) + '.xml';
j := slideFileIdxList.Count - 1;
for i:=0 to j do
begin
slideFileIdx := Integer(slideFileIdxList[i]);
if SameText(FZip.Filename[slideFileIdx], slideName) then
begin
ReadSlideFile(slideFileIdx);
slideFileIdxList.Delete(i);
hasNextSlide := slideFileIdxList.Count > 0;
Inc(slideNo);
Break;
end;
end
end;
finally
slideFileIdxList.Free;
end;
end;
{获取幻灯片文件位于ZIP包中的下标,以列表的形式返回
**列表中的幻灯片文件是无序的}
procedure TPowerPointOOXmlFile.GetSlideFileIdx(var List: TList);
var
i: Integer;
begin
i := 0;
while i < FZip.Count do
begin
if Pos('ppt\slides\slide', FZip.FullName[i]) > 0 then
begin
List.Add(Pointer(i));
end;
Inc(i);
end;
end;
{读取幻灯片文件中的文本}
procedure TPowerPointOOXmlFile.ReadSlideFile(FileIdx: Integer);
var
stream: TMemoryStream;
xml: TNativeXml;
begin
xml := nil;
stream := TMemoryStream.Create;
try
FZip.UnZipToStreamByIndex(stream, FileIdx);
xml := TNativeXml.Create;
xml.LoadFromStream(stream);
ReadText(xml);
finally
if xml <> nil then
xml.Free;
if stream <> nil then
stream.free;
end;
end;
{解析xml,从中抽取去文本}
procedure TPowerPointOOXmlFile.ReadText(Xml: TNativeXml);
begin
if (xml <> nil) and (xml.Root <> nil) then
begin
ReadText(xml.Root);
end
else
begin
raise OOXmlException.Create(FFileName + ' 文件已损坏.');
end;
end;
{解析xml节点,从中抽取去文本}
procedure TPowerPointOOXmlFile.ReadText(Node: TXmlNode);
var
i, j: Integer;
childNode: TXmlNode;
text: WideString;
oldLen: Integer;
begin
j := Node.NodeCount - 1;
for i:=0 to j do
begin
childNode := Node.Nodes[i];
if SameText(childNode.Name, 'a:t') then
begin
text := childNode.ValueAsWidestring;
oldLen := Length(FTextContent);
SetLength(FTextContent, oldLen + Length(text));
Move(text[1], FTextContent[oldLen + 1], Length(text) * 2);
end
else if SameText(childNode.Name, 'a:br') then
begin
SetLength(FTextContent, Length(FTextContent) + 2);
FTextContent[Length(FTextContent) - 1] := #13;
FTextContent[Length(FTextContent)] := #10;
end
else if SameText(childNode.Name, 'a:t') then
begin
SetLength(FTextContent, Length(FTextContent) + 1);
FTextContent[Length(FTextContent)] := #9;
end
else if SameText(childNode.Name, 'a:p') then
begin
ReadText(childNode);
SetLength(FTextContent, Length(FTextContent) + 2);
FTextContent[Length(FTextContent) - 1] := #13;
FTextContent[Length(FTextContent)] := #10;
end
else
begin
ReadText(childNode);
end;
end;
end;
function TPowerPointOOXmlFile.FindText(Text: WideString): Boolean;
begin
Result := Pos(Text, FTextContent) > 0;
end;
end.
点击查看代码
unit u_CompoundBinaryFile;
interface
uses
u_IFile, Classes, SysUtils, Windows, u_DirectoryEntry, u_Helper;
const
//复合文件头大小
HEADER_SIZE = 512;
//目录条目大小
DIR_ENTRY_SIZE = 128;
//最大的常规扇区编号
MAX_REG_SECT = $FFFFFFFA;
//保留的扇区编号
RESERVE_SECT = $FFFFFFFB;
//标识FAT中的DIFAT扇区
DIF_SECT = $FFFFFFFC;
//标识FAT中的FAT扇区
FAT_SECT = $FFFFFFFD;
//标识扇区链结束
END_OF_CHAIN = $FFFFFFFE;
//标识扇区未使用
FREE_SECTOR = $FFFFFFFF;
//标识目录条目不存在
NOSTREAM = $FFFFFFFF;
//未知的目录条目
UNKNOWN_DIR_ENTRY = 0;
//存储数据的目录条目
STORAGE_DIR_ENTRY = 1;
//存储数据流的目录条目?
STREAM_DIR_ENTRY = 2;
//根目录条目
ROOT_DIR_ENTRY = 5;
type
{复合文件异常}
CompoundFileException = class(Exception);
{复合二进制文件}
TCompoundBinaryFile = class(TInterfacedObject, IFile)
private
procedure ReadHeader();
procedure ReadFATSectors();
procedure ReadFirst109FatSectors();
procedure ReadFATSectorsFromDIFAT();
procedure ReadDirectory();
function GetDirectoryEntry(EntryIdx: DWORD; out leftSiblingEntryIdx,
rightSiblingEntryIdx, childEntryIdx: DWORD): TDirectoryEntry;
procedure ReadDirectoryEntry(Parent: TDirectoryEntry; EntryIdx: DWORD);
function GetDirectoryEntryPos(EntryIdx: DWORD): Int64;
procedure ReadMiniFATSectors();
procedure ReadMiniSectors();
function GetDirEntryDataSectorChain(DirEntry: TDirectoryEntry): TList;
protected
FFileName: string;
FFileStream: TFileStream;
//扇区大小
FSectorSize: Word;
//小扇区大小
FMiniSectorSize: Word;
//FAT扇区数量
FFatSectorCount: DWORD;
//第一个目录条目所在的扇区下标
FFirstDirSectorNo: DWORD;
//小数据的截止大小(小于该值),小数据存储于小扇区中
FMiniStreamCutoffSize: DWORD;
//第一个Mini-FAT扇区编号
FFirstMiniFATSectorNo: DWORD;
//Mini-FAT扇区数量
FMiniFATCount: DWORD;
//第一个DIFAT扇区的编号
FFirstDIFATSectorNo: DWORD;
//DIFAT扇区数量
FDIFATCount: DWORD;
//文件分配表扇区列表
FFATSectorList: TList;
//目录信息的扇区链
FDirSectorChain: TList;
//根目录条目
FRootDirEntry: TDirectoryEntry;
//小文件分配表扇区列表
FMiniFatSectorList: TList;
//小扇区列表
FMiniSectorList: TList;
function GetNormalSectorOffset(SectorNo: DWORD): Int64;
function GetNextNormalSectorNo(SectorNo: DWORD): DWORD;
function GetNextMiniStreamNo(MiniStreamNo: DWORD): DWORD;
function GetMiniStreamOffset(MiniStreamNo: DWORD): Int64;
procedure ReadDirEntryData(DirEntry: TDirectoryEntry);
procedure ReadContent(); virtual; abstract;
public
constructor Create(FileName: string);
destructor Destroy(); override;
function GetObjectInstance(): TObject;
function GetTextContent(): WideString; virtual; abstract;
function FindText(Text: WideString): Boolean; virtual; abstract;
end;
implementation
{ TCompoundBinaryFile }
constructor TCompoundBinaryFile.Create(FileName: string);
begin
FFileName := FileName;
FFileStream := TFileStream.Create(FileName, fmOpenRead
or fmShareDenyWrite);
ReadHeader();
ReadFATSectors();
ReadDirectory();
ReadMiniFATSectors();
ReadMiniSectors();
ReadContent();
end;
destructor TCompoundBinaryFile.Destroy;
begin
if FFileStream <> nil then
FreeAndNil(FFileStream);
if FFATSectorList <> nil then
FreeAndNil(FFATSectorList);
if FDirSectorChain <> nil then
FreeAndNil(FDirSectorChain);
if FMiniFatSectorList <> nil then
FreeAndNil(FMiniFatSectorList);
if FMiniSectorList <> nil then
FreeAndNil(FMiniSectorList);
if FRootDirEntry <> nil then
FreeAndNil(FRootDirEntry);
inherited;
end;
{读取复合文件的头部信息}
procedure TCompoundBinaryFile.ReadHeader;
{判断文件是否为office文件}
function OfficeFile(): Boolean;
var
buf: array of Byte;
begin
SetLength(buf, 8);
FFileStream.ReadBuffer(buf[0], 8);
Result := (buf[0] = $D0) and
(buf[1] = $CF) and
(buf[2] = $11) and
(buf[3] = $E0) and
(buf[4] = $A1) and
(buf[5] = $B1) and
(buf[6] = $1A) and
(buf[7] = $E1);
end;
var
//字节顺序
byteOrder: Word;
begin
if not OfficeFile() then
raise CompoundFileException.Create(FFileName + ' 不是office文件');
FFileStream.Seek(20, soFromCurrent);
FFileStream.ReadBuffer(byteOrder, 2);
if byteOrder <> $FFFE then
raise CompoundFileException.Create(FFileName + ' 不支持该版本的office文件. 原因:无法解析文件字节顺序');
FFileStream.ReadBuffer(FSectorSize, 2);
FSectorSize := Power(2, FSectorSize);
FFileStream.ReadBuffer(FMiniSectorSize, 2);
FMiniSectorSize := Power(2, FMiniSectorSize);
FFileStream.Seek(10, soFromCurrent);
FFileStream.ReadBuffer(FFatSectorCount, 4);
FFileStream.ReadBuffer(FFirstDirSectorNo, 4);
FFileStream.Seek(4, soFromCurrent);
FFileStream.ReadBuffer(FMiniStreamCutoffSize, 4);
FFileStream.ReadBuffer(FFirstMiniFATSectorNo, 4);
FFileStream.ReadBuffer(FMiniFATCount, 4);
FFileStream.ReadBuffer(FFirstDIFATSectorNo, 4);
FFileStream.ReadBuffer(FDIFATCount, 4);
end;
{读取FAT扇区列表}
procedure TCompoundBinaryFile.ReadFATSectors;
begin
FFATSectorList := TList.Create;
if FFatSectorCount > 0 then
begin
ReadFirst109FatSectors();
ReadFATSectorsFromDIFAT();
end;
end;
{读取header区域的109个FAT}
procedure TCompoundBinaryFile.ReadFirst109FatSectors();
var
i, j: Integer;
sectorNo: DWORD;
begin
j := 109;
for i:=1 to j do
begin
FFileStream.ReadBuffer(sectorNo, 4);
if sectorNo = FREE_SECTOR then
Break;
FFATSectorList.Add(Pointer(sectorNo));
end;
end;
{从DIFAT扇区读取FAT扇区}
procedure TCompoundBinaryFile.ReadFATSectorsFromDIFAT();
var
DIFATSectorNo: DWORD;
offset: Int64;
i, j: Integer;
FATSectorNo: DWORD;
begin
//判断是否存在DIFAT
if FDIFATCount > 0 then
begin
DIFATSectorNo := FFirstDIFATSectorNo;
//判断DIFAT扇区链是否结束
while DIFATSectorNo <> END_OF_CHAIN do
begin
offset := GetNormalSectorOffset(DIFATSectorNo);
FFileStream.Seek(offset, soFromBeginning);
j := 127;
for i:=1 to j do
begin
FFileStream.ReadBuffer(FATSectorNo, 4);
if FATSectorNo = FREE_SECTOR then
Exit;
FFATSectorList.Add(Pointer(FATSectorNo));
end;
FFileStream.ReadBuffer(DIFATSectorNo, 4);
end;
end;
end;
{读取目录信息}
procedure TCompoundBinaryFile.ReadDirectory();
var
serctorNo: DWORD;
leftSiblingEntryIdx: DWORD;
rightSiblingEntryIdx: DWORD;
childEntryIdx: DWORD;
begin
FDirSectorChain := TList.Create;
if FFirstDirSectorNo >= RESERVE_SECT then
Exit;
serctorNo := FFirstDirSectorNo;
while serctorNo <> END_OF_CHAIN do
begin
FDirSectorChain.Add(Pointer(serctorNo));
serctorNo := GetNextNormalSectorNo(serctorNo);
end;
FRootDirEntry := GetDirectoryEntry(0, leftSiblingEntryIdx,
rightSiblingEntryIdx, childEntryIdx);
ReadDirectoryEntry(FRootDirEntry, childEntryIdx);
end;
{读取MiniFAT列表}
procedure TCompoundBinaryFile.ReadMiniFATSectors();
var
serctorNo: DWORD;
begin
FMiniFatSectorList := TList.Create;
if FMiniFATCount > 0 then
begin
serctorNo := FFirstMiniFATSectorNo;
while serctorNo <> END_OF_CHAIN do
begin
FMiniFatSectorList.Add(Pointer(serctorNo));
serctorNo := GetNextNormalSectorNo(serctorNo);
end;
end;
end;
{读取小扇区列表}
procedure TCompoundBinaryFile.ReadMiniSectors();
var
serctorNo: DWORD;
begin
FMiniSectorList := TList.Create;
if FMiniFATCount > 0 then
begin
serctorNo := FRootDirEntry.DataSectorNo;
while serctorNo <> END_OF_CHAIN do
begin
FMiniSectorList.Add(Pointer(serctorNo));
serctorNo := GetNextNormalSectorNo(serctorNo);
end;
end;
end;
{获取指定扇区编号位于文件中的位置}
function TCompoundBinaryFile.GetNormalSectorOffset(SectorNo: DWORD): Int64;
begin
Result := HEADER_SIZE + FSectorSize * SectorNo;
end;
{获取指定扇区的扇区链的下一个扇区}
function TCompoundBinaryFile.GetNextNormalSectorNo(SectorNo: DWORD): DWORD;
var
FATIndex: Integer;
FATSerctorNo: DWORD;
FATOffset: Int64;
begin
{计算出扇区编号所对应的FAT}
FATIndex := SectorNo div 128;
FATSerctorNo := DWORD(FFATSectorList.Items[FATIndex]);
{从FAT中查询出下一个扇区的编号}
FATOffset := GetNormalSectorOffset(FATSerctorNo);
FFileStream.Seek(FATOffset + 4 * (SectorNo mod 128), soFromBeginning);
FFileStream.ReadBuffer(Result, 4);
end;
{获取指定小扇区的扇区链的下一个小扇区}
function TCompoundBinaryFile.GetNextMiniStreamNo(MiniStreamNo: DWORD): DWORD;
var
sectorNo: DWORD;
begin
sectorNo := DWORD(FMiniFatSectorList.Items[MiniStreamNo div 128]);
FFileStream.Seek(GetNormalSectorOffset(sectorNo) + 4 * (MiniStreamNo mod 128),
soFromBeginning);
FFileStream.ReadBuffer(Result, 4);
end;
{获取指定小数据位于文件中的位置}
function TCompoundBinaryFile.GetMiniStreamOffset(MiniStreamNo: DWORD): Int64;
var
sectorNo: DWORD;
begin
sectorNo := DWORD(FMiniSectorList.Items[MiniStreamNo * FMiniSectorSize div FSectorSize]);
Result := GetNormalSectorOffset(sectorNo) + MiniStreamNo * FMiniSectorSize mod FSectorSize;
end;
{从数据流中解析出指定下标的目录条目}
function TCompoundBinaryFile.GetDirectoryEntry(EntryIdx: DWORD; out leftSiblingEntryIdx,
rightSiblingEntryIdx, childEntryIdx: DWORD): TDirectoryEntry;
var
name: WideString;
nameLen: Word;
entryType: Byte;
dataSectorNo: DWORD;
dataLen: DWORD;
begin
Result := nil;
leftSiblingEntryIdx := NOSTREAM;
rightSiblingEntryIdx := NOSTREAM;
childEntryIdx := NOSTREAM;
FFileStream.Seek(GetDirectoryEntryPos(EntryIdx), soFromBeginning);
SetLength(name, 32);
FFileStream.ReadBuffer(name[1], 64);
FFileStream.ReadBuffer(nameLen, 2);
//取出来的name是为Unicode编码,nameLen为其字节长度,需要转为字符长度(并减去#0结束字符)
nameLen := nameLen div 2 - 1;
SetLength(name, nameLen);
FFileStream.ReadBuffer(entryType, 1);
if not (entryType in [UNKNOWN_DIR_ENTRY, STORAGE_DIR_ENTRY, STREAM_DIR_ENTRY,
ROOT_DIR_ENTRY]) then
Exit;
FFileStream.Seek(1, soFromCurrent);
FFileStream.ReadBuffer(leftSiblingEntryIdx, 4);
FFileStream.ReadBuffer(rightSiblingEntryIdx, 4);
FFileStream.ReadBuffer(childEntryIdx, 4);
FFileStream.Seek(36, soFromCurrent);
FFileStream.ReadBuffer(dataSectorNo, 4);
FFileStream.ReadBuffer(dataLen, 4);
Result := TDirectoryEntry.Create(EntryIdx, name, entryType, dataSectorNo,
dataLen);
end;
{递归读取给定目录条目的所有子条目}
procedure TCompoundBinaryFile.ReadDirectoryEntry(Parent: TDirectoryEntry; EntryIdx: DWORD);
var
entry: TDirectoryEntry;
leftSiblingEntryIdx: DWORD;
rightSiblingEntryIdx: DWORD;
childEntryIdx: DWORD;
begin
entry := GetDirectoryEntry(EntryIdx, leftSiblingEntryIdx,
rightSiblingEntryIdx, childEntryIdx);
if entry = nil then
Exit;
Parent.AddChild(entry);
if leftSiblingEntryIdx <> NOSTREAM then
ReadDirectoryEntry(Parent, leftSiblingEntryIdx);
if rightSiblingEntryIdx <> NOSTREAM then
ReadDirectoryEntry(Parent, rightSiblingEntryIdx);
if childEntryIdx <> NOSTREAM then
ReadDirectoryEntry(entry, childEntryIdx);
end;
{获取指定下标的目录条目位于数据流的位置}
function TCompoundBinaryFile.GetDirectoryEntryPos(EntryIdx: DWORD): Int64;
var
idx: Integer;
serctorNo: DWORD;
begin
//计算该条目所属的扇区位于扇区链中的位置
idx := DIR_ENTRY_SIZE * EntryIdx div FSectorSize;
serctorNo := DWORD(FDirSectorChain.Items[idx]);
Result := GetNormalSectorOffset(serctorNo) + (DIR_ENTRY_SIZE * EntryIdx mod FSectorSize);
end;
{读取目录条目的数据流}
procedure TCompoundBinaryFile.ReadDirEntryData(DirEntry: TDirectoryEntry);
var
serctorChain: TList;
mStream: TMemoryStream;
i, j: Integer;
miniStreamNo: DWORD;
sectorNo: DWORD;
offset: Int64;
dataLen: DWORD;
readLen: DWORD;
buff: array of Byte;
exp: Exception;
begin
serctorChain := nil;
exp := nil;
try
try
serctorChain := GetDirEntryDataSectorChain(DirEntry);
mStream := TMemoryStream.Create;
dataLen := DirEntry.DataLen;
j := serctorChain.Count - 1;
if DirEntry.DataLen >= FMiniStreamCutoffSize then
begin
SetLength(buff, FSectorSize);
for i:=0 to j do
begin
sectorNo := DWORD(serctorChain.Items[i]);
offset := GetNormalSectorOffset(sectorNo);
FFileStream.Seek(offset, soFromBeginning);
if FSectorSize < dataLen then
readLen := FSectorSize
else
readLen := dataLen;
Dec(dataLen, readLen);
FFileStream.ReadBuffer(buff[0], readLen);
mStream.WriteBuffer(buff[0], readLen);
end;
end
else
begin
SetLength(buff, FMiniSectorSize);
for i:=0 to j do
begin
miniStreamNo := DWORD(serctorChain.Items[i]);
offset := GetMiniStreamOffset(miniStreamNo);
FFileStream.Seek(offset, soFromBeginning);
if FMiniSectorSize < dataLen then
readLen := FMiniSectorSize
else
readLen := dataLen;
Dec(dataLen, readLen);
FFileStream.ReadBuffer(buff[0], readLen);
mStream.WriteBuffer(buff[0], readLen);
end;
end;
DirEntry.DataStream := mStream;
except
on e: Exception do
begin
exp := e;
end;
end;
finally
if serctorChain <> nil then
serctorChain.Free;
end;
if exp <> nil then
raise exp;
end;
{如果目录条目关联的数据是位于扇区链中,则返回扇区链,否则返回小数据链}
function TCompoundBinaryFile.GetDirEntryDataSectorChain(DirEntry: TDirectoryEntry): TList;
var
sectorNo: DWORD;
begin
Result := TList.Create;
sectorNo := DirEntry.DataSectorNo;
if DirEntry.DataLen >= FMiniStreamCutoffSize then
begin
while sectorNo <> END_OF_CHAIN do
begin
Result.Add(Pointer(sectorNo));
sectorNo := GetNextNormalSectorNo(sectorNo);
end;
end
else
begin
while sectorNo <> END_OF_CHAIN do
begin
Result.Add(Pointer(sectorNo));
sectorNo := GetNextMiniStreamNo(sectorNo);
end;
end;
end;
function TCompoundBinaryFile.GetObjectInstance(): TObject;
begin
Result := Self;
end;
end.
点击查看代码
unit u_DirectoryEntry;
interface
uses
Windows, Classes, SysUtils;
type
{目录条目}
TDirectoryEntry = class(TObject)
private
//该条目的下标
FIdx: DWORD;
//名称
FName: WideString;
//类型
FEntryType: Byte;
//该条目所包含的数据的扇区下标
FDataSectorNo: DWORD;
//所包含数据的长度
FDataLen: DWORD;
//该条目的子条目列表
FChildren: TList;
//该条目关联的数据流
FDataStream: TStream;
function GetChild(Index: Integer): TDirectoryEntry;
function GetChildCount(): Integer;
procedure ClearChild();
public
constructor Create(Idx: DWORD; Name: WideString; EntryType: Byte;
DataSectorNo: DWORD; DataLen: DWORD);
destructor Destroy(); override;
property Idx: DWORD read FIdx write FIdx;
property Name: WideString read FName write FName;
property EntryType: Byte read FEntryType write FEntryType;
property DataSectorNo: DWORD read FDataSectorNo write FDataSectorNo;
property DataLen: DWORD read FDataLen write FDataLen;
property Children[Index: Integer]: TDirectoryEntry read GetChild;
property ChildCount: Integer read GetChildCount;
property DataStream: TStream read FDataStream write FDataStream;
function AddChild(Child: TDirectoryEntry): Integer;
function FindChild(Name: WideString): TDirectoryEntry;
end;
implementation
{ TDirectoryEntry }
constructor TDirectoryEntry.Create(Idx: DWORD; Name: WideString; EntryType: Byte;
DataSectorNo: DWORD; DataLen: DWORD);
begin
FIdx := Idx;
FName := Name;
FEntryType := EntryType;
FDataSectorNo := DataSectorNo;
FDataLen := DataLen;
FChildren := TList.Create;
end;
destructor TDirectoryEntry.Destroy();
begin
if FChildren <> nil then
begin
ClearChild();
FreeAndNil(FChildren);
end;
if FDataStream <> nil then
FreeAndNil(FDataStream);
inherited;
end;
function TDirectoryEntry.GetChild(Index: Integer): TDirectoryEntry;
begin
Result := nil;
if (Index > -1) and (Index < FChildren.Count) then
Result := FChildren[Index];
end;
function TDirectoryEntry.AddChild(Child: TDirectoryEntry): Integer;
begin
Result := FChildren.Add(Child);
end;
function TDirectoryEntry.GetChildCount: Integer;
begin
Result := FChildren.Count;
end;
function TDirectoryEntry.FindChild(Name: WideString): TDirectoryEntry;
var
i, j: Integer;
entry: TDirectoryEntry;
begin
Result := nil;
j := FChildren.Count - 1;
for i:=0 to j do
begin
entry := FChildren[i];
if WideSameStr(entry.FName, Name) then
begin
Result := entry;
Break;
end;
end;
end;
procedure TDirectoryEntry.ClearChild;
var
i, j: Integer;
begin
j := FChildren.Count - 1;
for i:=0 to j do
begin
TObject(FChildren.Items[i]).Free;
end;
FChildren.Clear;
end;
end.
点击查看代码
unit u_OfficeOpenXmlFile;
interface
uses
u_IFile, Forms, SysUtils, NativeXml, Classes, VCLZip;
type
OOXmlException = class(Exception);
TOfficeOpenXmlFile = class(TInterfacedObject, IFile)
protected
FFileName: string;
FZip: TVCLZip;
procedure ReadContent(); virtual; abstract;
function GetXml(XmlName: string): TNativeXml;
public
constructor Create(FileName: string);
destructor Destroy(); override;
function GetObjectInstance(): TObject;
function GetTextContent(): WideString; virtual; abstract;
function FindText(Text: WideString): Boolean; virtual; abstract;
end;
implementation
{ TOfficeOpenXmlFile }
constructor TOfficeOpenXmlFile.Create(FileName: string);
begin
FFileName := FileName;
FZip := TVCLZip.Create(nil);
FZip.ZipName := FileName;
FZip.ReadZip;
ReadContent();
end;
destructor TOfficeOpenXmlFile.Destroy;
begin
if FZip <> nil then
FreeAndNil(FZip);
inherited;
end;
function TOfficeOpenXmlFile.GetObjectInstance: TObject;
begin
Result := Self;
end;
{返回指定名称的XML}
function TOfficeOpenXmlFile.GetXml(XmlName: string): TNativeXml;
var
i: Integer;
stream: TMemoryStream;
begin
Result := nil;
i := 0;
while i < FZip.Count do
begin
if SameText(ExtractFileName(FZip.Filename[i]), XmlName) then
begin
stream := TMemoryStream.Create;
try
FZip.UnZipToStreamByIndex(stream, i);
Result := TNativeXml.Create;
Result.LoadFromStream(stream);
finally
if stream <> nil then
stream.free;
end;
Break;
end;
Inc(i);
end;
end;
end.
点击查看代码
unit u_IFile;
interface
type
IFile = interface
['{2A38E410-13FE-49A2-86EF-CAFE8BB60556}']
//返回该接口所在的对象(delphi中接口与其所在对象的地址是不一样的,接口不能够直接转换为对象)
function GetObjectInstance(): TObject;
//返回文本内容
function GetTextContent(): WideString;
//查找文本
function FindText(Text: WideString): Boolean;
end;
implementation
end.
点击查看代码
unit u_Pcd;
interface
uses
Windows;
type
TPcd = class(TObject)
private
FTextLocation: DWORD;
FUnicode: Boolean;
public
constructor Create(Info: Word; Fc: DWORD; Prm: Word);
property TextLocation: DWORD read FTextLocation;
property Unicode: Boolean read FUnicode;
end;
implementation
{ TPcd }
constructor TPcd.Create(Info: Word; Fc: DWORD; Prm: Word);
begin
FTextLocation := Fc and $3FFFFFFF;
FUnicode := (Fc and $40000000) = 0;
if not FUnicode then
FTextLocation := FTextLocation div 2;
end;
end.
点击查看代码
{帮助方法单元}
unit u_Helper;
interface
uses
Windows;
const
{Word文件中使用的特殊Ascii字符}
//单元格之间的分隔符,连续的两个代表下一行的单元格
CELL_MARK = #07;
//制表符
TAB = #09;
BREAK_LINE = #11;
PAGE_BREAK_OR_SECTION_MARK = #12;
//换行符
LINE_BREAK = #13;
CLUMN_BREAK = #14;
//标识特殊标记开始
FIELD_START = #19;
//FIELD_START到FIELD_SEPERATOR之间的字串是控制用的(不顯示)
FIELD_SEPERATOR = #20;
//FIELD_SEPERATOR到FIELD_END之間的字串則是顯示用的
FIELD_END = #21;
NON_BREAKING_HYPHEN = #30;
NON_REQUIRED_HYPHEN = #31;
NON_BREAKING_SPACE = #160;
{计算BaseNum的平方值,指数为Exponent}
function Power(BaseNum: Word; Exponent: Word): Word;
{判断参数Value中FlagPos所指定的位是否为1}
function IsFlagSet(Value: DWORD; FlagPos: Word): Boolean;
{过滤office文本中的特殊字符}
procedure FilterSpecialChar(var Source: WideString);
{将office文件使用的换行符转换为delphi中使用的换行符}
procedure ConverLF(var Source: WideString);
implementation
function Power(BaseNum: Word; Exponent: Word): Word;
var
i: Integer;
begin
Result := BaseNum;
for i:=2 to Exponent do
begin
Result := Result * BaseNum;
end;
end;
function IsFlagSet(Value: DWORD; FlagPos: Word): Boolean;
var
num: Word;
begin
Dec(FlagPos, 1);
num := Power(2, FlagPos);
Result := (Value and num) = num;
end;
procedure FilterSpecialChar(var Source: WideString);
var
i: DWORD;
len: DWORD;
startIgnore: Boolean;
startIgnoreIdx: DWORD;
begin
startIgnore := False;
startIgnoreIdx := 0;
i := 0;
len := Length(Source);
while i < len do
begin
Inc(i);
{过控制字符和通讯专用字符,除了Tab}
if (Source[i] < #32) and (Source[i] <> #9) then
begin
if startIgnore then
begin
if Source[i] = FIELD_SEPERATOR then
begin
startIgnore := False;
Delete(Source, startIgnoreIdx, i - startIgnoreIdx + 1);
i := startIgnoreIdx - 1;
len := Length(Source);
end;
end
else
begin
if Source[i] = FIELD_START then
begin
{遇到office的特殊标记符号,记录下下标,之后遇到的任何字符都不再过滤,
直到遇到FIELD_SEPERATOR后,将两个符号间的字符都删除}
startIgnore := True;
startIgnoreIdx := i;
end
else if Source[i] = LINE_BREAK then
begin
{office中的换行符为#13,这里需要替换为#13#10}
Inc(len);
SetLength(Source, len);
//如果i之后还有字符,则将其向后移一位
if i < len - 1 then
Move(Source[i + 1], Source[i + 2], (len - i - 1) * 2);
Source[i] := #13;
Source[i + 1] := #10;
Inc(i);
end
else if Source[i] = CELL_MARK then
begin
if (i < len) and (Source[i + 1] = CELL_MARK) then
begin
{连续两个CELL_MARK代表单元格换行,需要将这两个CELL_MARK符号替换为#13#10}
Source[i] := #13;
Source[i + 1] := #10;
Inc(i);
end
else
begin
//相邻的单元格之间使用TAB分隔
Source[i] := #9;
end;
end
else
begin
{其它字符直接删除}
Delete(Source, i, 1);
Dec(i);
Dec(len);
end;
end;
end;
end;
end;
procedure ConverLF(var Source: WideString);
var
i: DWORD;
len: DWORD;
begin
i := 0;
len := Length(Source);
while i < len do
begin
Inc(i);
if (Source[i] < #32) and (Source[i] <> #9) then
begin
if Source[i] = LINE_BREAK then
begin
{office中的换行符为#13,这里需要替换为#13#10}
SetLength(Source, len + 1);
//如果当前字符后面还有字符,则将后面的字符向后移一位
if i < len then
Move(Source[i + 1], Source[i + 2], (len - i) * 2);
Source[i] := #13;
Source[i + 1] := #10;
Inc(i);
Inc(len);
end
else
begin
Delete(Source, i, 1);
Dec(i);
Dec(len);
end;
end;
end;
end;
end.
禁止转载,如果需要你可以添加引用或链接。
理由:我认为由于恣意转载,现在国内的技术博客充斥着大量的重复内容,有些甚至是错误的,这不利于技术探索。
理由:我认为由于恣意转载,现在国内的技术博客充斥着大量的重复内容,有些甚至是错误的,这不利于技术探索。

浙公网安备 33010602011771号