灵格斯LDX,LD2等格式分析

词典格式是这样的

struct {
int Flag; //?LD2,?LDX等标记
byte[16] MD5;//估计是MD5值,LD2转换为LDX时只有前两个字段有变化
byte[4] byteorder;
short majorversion;
short minorversion;
byte[16] ID;
int version;
int flags;
byte[16] MD52;
int unknown1;
int unknown2;
int unknown3;
int unknown4;
} Header; //以上为词典头

int unknown; //总为1
int xmllength; //词典信息的xml长度
[Encrypted("DES")]
byte[xmllength] infoxml; //此xml保存词典信息,由DES加密。

struct {
int dicttype; //取值0,1,2,3,5
[Switch(dicttype)]
union {
[Case(3)] //dicttype=3则为本地词典
{
int dictlength;
[Length(dictlength)]
{
int indexLength; //索引表长度
int uncompressedwordoffset; //单词表在解压后文件的起始位置
int uncompressedxmloffset; //xml在解压后文件的起始位置
int uncompressedxmldatalength; //解压后的xml数据长度
int compresseddatalength;
[Length(indexLength)]
int[] indexes; //索引表
[Length(compresseddatalength)]
struct {
int compressedblockindexlength; //解压后每块数据大小
int uncompressedTotalLength; //解压后总大小
int[] compressedindexes; //每块数据在compresseddata中的起始位置,下一
个为结束位置
byte[] compresseddata;
} compresseddata;
}
}
[Case(0)] //dicttype=0则表示词典结束
}Dictcontent;
} Dict[];

 

解压后的数据结构:
byte[] wordindexdata;
byte[] worddata;
byte[] xmldata;

每个单词的索引值乘以10就是wordindexdata中的偏移量
在这里读取以下数据结构
struct
{
int WordOffset; //单词在worddata中的偏移量
int XmlOffset; //单词在xmldata中的偏移量
byte Flag1;
byte Flag2; //若为1则表示在worddata中的数据是索引值,读取一个整型,然后用此索引重新计算word和xml
int NextWordOffset;//可计算出word的长度,读取
int NextXmlOffset;//可计算出xml的长度,读取
} WordInfo;

读出的word即为单词
xml为单词释义,是变形的html格式,需要用xslt转换为标准html读取。

解密后词典信息的内容是这种格式的,图标应该是base64编码的。

<dict id="3699E846E5BC094CA733B92FD733ACDF" version="2" byte_order="4321" type="1" name="Collins COBUILD 
Advanced Learner's English Dictionary" 
icon="89504E470D0A1A0A0000000D4948445200000010000000100803000000282D0F530000000467414D410000AFC837058AE90000001 
974455874536F6674776172650041646F626520496D616765526561647971C9653C00000114504C5445F7F7FAFCFCFD7F7FB1F9F9FBFAFA 
FCF4F4F86F6FA6F6F6F9C2C2D9000064A5A5C85E5E9DC3C3DA494991C5C5DBA1A1C50404671C1C765A5A9A111170AAAACBCACADECFCFE1F 
3F3F87070A8F5F5F9EFEFF57373A91818744B4B91EDEDF48080B2F8F8FA050569DFDFEB58589A9191BC6C6CA6BFBFD8F2F2F7C0C0D7BFBF 
D79999BFEAEAF2D3D3E4D1D1E20101663F3F8B212178141471D1D1E3A0A0C51616737979AE020267D2D2E37070A77E7EB147478E3E3E8A9 
494BD4E4E92B6B6D29E9EC40A0A6BB0B0CFD8D8E7FDFDFDDBDBE9F6F6FA4D4D94C9C9DDE1E1EC61619FBEBED7E3E3EE9797BFC1C1D98484 
B421217901016746468E6E6EA66C6CA5F0F0F5C9C9DE232379F0F0F6ECECF3000065000066FFFFFF5AB8BD910000005C74524E53FFFFFFF 
FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 
FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00836D17E0000000E94944415478DA628806817037431E3 
B4F3910132080188058495B2392934D224A8C49373A1A208080025C020E3ECA325E0C2EAEDC7C8CD10001C410CD681D1969C5C200D61AC2 
C40C104040065364A4693414B0B303041043B4B9962A3F2B4C209A03208018B8022205DDE1FC6867800062088B8A8AB4450844030410830 
15040164900208018ECA3224D789004000288215A54C54284016128400001997E919196303E332B4000011DA61F1919C8C202E6ABCB2B00 
04105085B19EA20DB7BF2FB38778B0916434400081743B796B023D276D161AA4131D0D104010E3A47885D51C8522404C800003000286447 
A9A58897A0000000049454E44AE426082" date="2007-11-21" update="2007-11-21"><from lang="en" charset="utf-8" 
sort="0" /><to lang="en" charset="utf-8" /><flag>000000B1</flag><info><item lang="en"><title>Collins COBUILD  
Advanced Learner's English Dictionary</title><edition>4</edition><description 
/><publisher><author>HarperCollins Publishers Ltd</author><email 
/><website>http://www.collins.co.uk/</website><copyright>Copyright © HarperCollins Publishers Ltd 
2004.</copyright></publisher><message /><license /></item><item lang="zh-CN"><title>柯林斯高阶英语词典 
</title><edition>4</edition><description /><publisher><author>HarperCollins Publishers Ltd</author><email 
/><website>http://www.collins.co.uk/</website><copyright>Copyright © HarperCollins Publishers Ltd 
2004.</copyright></publisher><message /><license /></item><item lang="zh-TW"><title>柯林斯高階英語詞典 
</title><edition>4</edition><description /><publisher><author>HarperCollins Publishers Ltd</author><email 
/><website>http://www.collins.co.uk/</website><copyright>Copyright © HarperCollins Publishers Ltd 
2004.</copyright></publisher><message /><license /></item></info><str /><res /><gls count="32730" 
index_count="32730" maxsize_key="45" maxsize_data="26986" /></dict>

 

解压后词典条目的xml是这样的:

<C><F><H><L><l>1</l>word</L> <h><U>n.</U></h></H><K><![CDATA[<TABLE cellpadding="3" cellspacing="0" 
border="0"><TR valign="top"><TD nowrap style="vertical- 
align:top;">Pronunciation:&nbsp;&nbsp;&nbsp;&nbsp;</TD><TD><B>'</B>wərd</TD></TR> <TR valign="top"><TD nowrap 
style="vertical- 
align:top;">Function:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</TD><TD><I>noun</I></TD></TR> 
<TR valign="top"><TD nowrap style="vertical- 
align:top;">Etymology:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</TD><TD><EXPANN>Middle 
English,</EXPANN> from Old English; akin to Old High German <I>wort</I> word, Latin <I>verbum,</I> Greek 
<I>eirein</I> to say, speak, Hittite <I>weriya-</I> to call, name</TD></TR> <TR valign="top"><TD nowrap 
style="vertical- 
align:top;">Date:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</TD><TD>b 
efore 12th century</TD></TR></TABLE><BR><B>1 a</B> <B>: </B>something that is said <B>b</B> 
<I><I>plural</I></I> <B>(1)</B> <B>: </B><a href="dict://key.[$DictID]/TALK"><u>TALK</u></a>, <a 
href="dict://key.[$DictID]/DISCOURSE"><u>DISCOURSE</u></a> &lt;putting one's feelings into 
<I>word</I><I>s</I>&gt; <B>(2)</B> <B>: </B>the text of a vocal musical composition <B>c</B> <B>: </B>a brief 
remark or conversation &lt;would like to have a <I>word</I> with you&gt; <BR><B>2 a (1)</B> <B>: </B>a speech 
sound or series of speech sounds that symbolizes and communicates a meaning usually without being divisible 
into smaller units capable of independent use <B>(2)</B> <B>: </B>the entire set of linguistic forms produced 
by combining a single base with various inflectional elements without change in the part of speech elements 
<B>b (1)</B> <B>: </B>a written or printed character or combination of characters representing a spoken word 
&lt;the number of <I>word</I><I>s</I> to a line&gt; ― sometimes used with the first letter of a real or 
pretended taboo word prefixed as an often humorous euphemism &lt;the first man to utter the f <I>word</I> on 
British TV ― <I>Time</I>&gt; &lt;we were not afraid to use the d <I>word</I> and talk about death ― Erma 
Bombeck&gt; <B>(2)</B> <B>: </B>any segment of written or printed discourse ordinarily appearing between spaces 
or between a space and a punctuation mark <B>c</B> <B>: </B>a number of bytes processed as a unit and conveying 
a quantum of information in communication and computer work <BR><B>3</B> <B>: </B><a href="dict://key. 
[$DictID]/ORDER"><u>ORDER</u></a>, <a href="dict://key.[$DictID]/COMMAND"><u>COMMAND</u></a> &lt;don't move 
till I give the <I>word</I>&gt; <BR><B>4</B> <I>often capitalized</I> <B>a</B> <B>: </B><a href="dict://key. 
[$DictID]/LOGOS"><u>LOGOS</u></a> <B>b</B> <B>: </B><a href="dict://key.[$DictID]/GOSPEL"><u>GOSPEL 1A</u></a> 
<B>c</B> <B>: </B>the expressed or manifested mind and will of God <BR><B>5 a</B> <B>: </B><a 
href="dict://key.[$DictID]/NEWS"><u>NEWS</u></a>, <a href="dict://key. 
[$DictID]/INFORMATION"><u>INFORMATION</u></a> &lt;sent <I>word</I> that he would be late&gt; <B>b</B> <B>: 
</B><a href="dict://key.[$DictID]/RUMOR"><u>RUMOR</u></a> <BR><B>6</B> <B>: </B>the act of speaking or of 
making verbal communication <BR><B>7</B> <B>: </B><a href="dict://key.[$DictID]/SAYING"><u>SAYING</u></a>, <a 
href="dict://key.[$DictID]/PROVERB"><u>PROVERB</u></a> <BR><B>8</B> <B>: </B><a href="dict://key. 
[$DictID]/PROMISE"><u>PROMISE</u></a>, <a href="dict://key.[$DictID]/DECLARATION"><u>DECLARATION</u></a> 
&lt;kept her <I>word</I>&gt; <BR><B>9</B> <B>: </B>a quarrelsome utterance or conversation ― usually used in 
plural &lt;they had <I>word</I><I>s</I> and parted&gt; <BR><B>10</B> <B>: </B>a verbal signal <B>: </B><a 
href="dict://key.[$DictID]/PASSWORD"><u>PASSWORD</u></a> <BR><B>11</B> <I>slang</I> ― used interjectionally to 
express agreement <BR>&nbsp;<B>–good word</B> <BR><B>1</B> <B>: </B>a favorable statement &lt;put in a <I>good 
word</I> for me&gt; <BR><B>2</B> <B>: </B>good news &lt;what's the <I>good word</I>&gt; <BR>&nbsp;<B>–in a 
word</B> <B>: </B>in short <BR>&nbsp;<B>–in so many words</B> <BR><B>1</B> <B>: </B>in exactly those terms 
&lt;implied that such actions were criminal but did not say so <I>in so many words</I>&gt; <BR><B>2</B> <B>: 
</B>in plain forthright language &lt;<I>in so many words,</I> she wasn't fit to be seen ― Jean Stafford&gt; 
<BR>&nbsp;<B>–of few words</B> <B>: </B>not inclined to say more than is necessary <B>: </B><a 
href="dict://key.[$DictID]/LACONIC"><u>LACONIC</u></a> &lt;a man <I>of few words</I>&gt; <BR>&nbsp;<B>–of 
one's word</B> <B>: </B>that can be relied on to keep a promise ― used only after <I>man</I> or <I>woman</I> 
&lt;a man <I>of his word</I>&gt; <BR>&nbsp;<B>–upon my word</B> <B>: </B>with my assurance <B>: </B><a 
href="dict://key.[$DictID]/INDEED"><u>INDEED</u></a>, <a href="dict://key. 
[$DictID]/ASSUREDLY"><u>ASSUREDLY</u></a> &lt;<I>upon my word,</I> I've never heard of such a thing&gt;]] 
></K></F><F><H><L><l>2</l>word</L> <h><U>n.</U></h></H><K><![CDATA[<TABLE cellpadding="3" cellspacing="0" 
border="0"><TR valign="top"><TD nowrap style="vertical- 
align:top;">Function:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</TD><TD><I>intransitive 
verb</I></TD></TR> <TR valign="top"><TD nowrap style="vertical- 
align:top;">Date:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</TD><TD>1 
3th century</TD></TR></TABLE><BR><I>archaic</I> <B>: </B><a href="dict://key.[$DictID]/SPEAK"><u>SPEAK</u></a> 
<BR><I>transitive verb</I> <B>: </B>to express in words <B>: </B><a href="dict://key. 
[$DictID]/PHRASE"><u>PHRASE</u></a> &lt;a carefully <I>word</I><I>ed</I> reply&gt;]]></K></F><F><H><L><x 
K="#333399">word <h>(as used in expressions)</h></x></L> <h><U>n.</U></h></H><K><![CDATA[<a href="dict://key. 
[$DictID]/code word"><u>code word</u></a><BR><a href="dict://key.[$DictID]/content word"><u>content 
word</u></a><BR><a href="dict://key.[$DictID]/dirty word"><u>dirty word</u></a><BR><a href="dict://key. 
[$DictID]/entry word"><u>entry word</u></a><BR><a href="dict://key.[$DictID]/fighting word"><u>fighting 
word</u></a><BR><a href="dict://key.[$DictID]/form word"><u>form word</u></a><BR><a href="dict://key. 
[$DictID]/four letter word"><u>four letter word</u></a><BR><a href="dict://key.[$DictID]/function 
word"><u>function word</u></a><BR><a href="dict://key.[$DictID]/ghost word"><u>ghost word</u></a><BR><a 
href="dict://key.[$DictID]/guide word"><u>guide word</u></a><BR><a href="dict://key.[$DictID]/key word"><u>key 
word</u></a><BR><a href="dict://key.[$DictID]/last word"><u>last word</u></a><BR><a href="dict://key. 
[$DictID]/my word"><u>my word</u></a><BR><a href="dict://key.[$DictID]/weasel word"><u>weasel 
word</u></a><BR><a href="dict://key.[$DictID]/upon my word"><u>upon my word</u></a><BR><a href="dict://key. 
[$DictID]/of one's word"><u>of one's word</u></a><BR><a href="dict://key.[$DictID]/in a word"><u>in a 
word</u></a><BR><a href="dict://key.[$DictID]/good word"><u>good word</u></a><BR><a href="dict://key. 
[$DictID]/word association test"><u>word association test</u></a><BR><a href="dict://key.[$DictID]/word 
class"><u>word class</u></a><BR><a href="dict://key.[$DictID]/word for word"><u>word for word</u></a><BR><a 
href="dict://key.[$DictID]/word hoard"><u>word hoard</u></a><BR><a href="dict://key.[$DictID]/word 
mongering"><u>word mongering</u></a><BR><a href="dict://key.[$DictID]/word of mouth"><u>word of 
mouth</u></a><BR><a href="dict://key.[$DictID]/word order"><u>word order</u></a><BR><a href="dict://key. 
[$DictID]/word processing"><u>word processing</u></a><BR><a href="dict://key.[$DictID]/word process"><u>word 
process</u></a><BR><a href="dict://key.[$DictID]/word processor"><u>word processor</u></a><BR><a 
href="dict://key.[$DictID]/word square"><u>word square</u></a><BR><a href="dict://key.[$DictID]/word 
stress"><u>word stress</u></a><BR><a href="dict://key.[$DictID]/word accent"><u>word accent</u></a><BR><a 
href="dict://key.[$DictID]/word wrap"><u>word wrap</u></a><BR><a href="dict://key.[$DictID]/eat one's 
words"><u>eat one's words</u></a><BR><a href="dict://key.[$DictID]/of few words"><u>of few words</u></a><BR><a 
href="dict://key.[$DictID]/in so many words"><u>in so many words</u></a><BR>]]></K></F></C>
posted @ 2012-11-24 14:58  SuperBrothersTeam  阅读(8519)  评论(14编辑  收藏  举报