LZW Compress algorithm

LZW(Lempel-Ziv-Welch) is the first widely used universal data compression method on computers. It would typically compress large English texts to about half of their original sizes. Now LZW is still used in GIF and PDF.

The basic idea: a sequence of adjacent input symbols is called a phrase, the phrases are put into a table along reading input stream, the indices of the phrases in the table is used to form the output.

There are two columns in the table: phrase and its index. Each phrase is composed of a prefix and a symbol, the prefix is an index in the table referencing another phrase, the symbol is appended to the prefix to form the new phrase.

Encode Algorithm:

initialize table;
word <- NIL;

while (there is input)
{
 symbol <- next symbol from input;
 phrase <- word + symbol;
 
 if (phrase exists in the table)
 {
  word <- phrase;
 }
 else
 {
  output (index(word));
  add phrase to the table;
  word <- symbol;
 }
}
output (index(word));

Decode Algorithm:

initialize table;
phrase <- NIL;

while (there is input)
{
 wordIndex <- next code from input;
 
 if (wordIndex exists in the table)
 {
  word <- dictionary[wordIndex];
  phrase <- phrase + head(word);
  if(phrase.Length > 1)
  {
   add phrase to the dictionary;
  }
 }
 else
 {
  phrase <- phrase + head(phrase);
  add phrase to the dictionary;
  word <- phrase; //word <- dictionary[wordIndex];
 }
 phrase <- word;
 output (word);
}

 

I implemented the algorithm in C# according to <PDF Reference>, which includes more encode details:

 

Phrase
    /// <summary>
    
/// This class represents a Phrase.
    
/// </summary>
    public struct Phrase
    {
        
/// <summary>
        
/// Gets or sets the PrefixIndex. 
        
/// </summary>
        public int PrefixIndex;
        
/// <summary>
        
/// Gets or sets the Symbol. 
        
/// </summary>
        public int Symbol;

        
/// <summary>
        
/// Initializes a new instance of the <see cref="Phrase"/> struct.
        
/// </summary>
        
/// <param name="symbol">The symbol.</param>
        public Phrase(int symbol)
        {
            Symbol 
= symbol;
            PrefixIndex 
= -1;
        }

        
/// <summary>
        
/// Initializes a new instance of the <see cref="Phrase"/> struct.
        
/// </summary>
        
/// <param name="symbol">The symbol.</param>
        
/// <param name="refIdnex">The ref idnex.</param>
        public Phrase(int symbol, int refIdnex)
        {
            Symbol 
= symbol;
            PrefixIndex 
= refIdnex;
        }

        
/// <summary>
        
/// Indicates whether this instance and a specified object are equal.
        
/// </summary>
        
/// <param name="obj">Another object to compare to.</param>
        
/// <returns>
        
/// true if obj and this instance are the same type and represent the same value; otherwise, false.
        
/// </returns>
        public override bool Equals(object obj)
        {
            
return this.Symbol == ((Phrase)obj).Symbol
                
&& this.PrefixIndex == ((Phrase)obj).PrefixIndex;
        }

        
/// <summary>
        
/// Returns the hash code for this instance.
        
/// </summary>
        
/// <returns>
        
/// A 32-bit signed integer that is the hash code for this instance.
        
/// </returns>
        public override int GetHashCode()
        {
            
if (PrefixIndex != -1)
            {
                
return PrefixIndex + Symbol;
            }
            
else
            {
                
return -Symbol;
            }
        }

        
/// <summary>
        
/// Returns the fully qualified type name of this instance.
        
/// </summary>
        
/// <returns>
        
/// A <see cref="T:System.String"></see> containing a fully qualified type name.
        
/// </returns>
        public override string ToString()
        {
            
return String.Format("{0}+{1}", PrefixIndex, Symbol);
        }
    }

 

 

 

LZW_Encode
    /// <summary>
    
/// Lzw compress algorithm.
    
/// </summary>
    public partial class Lzw
    {
        
/// <summary>
        
/// Clear Table Marker. 
        
/// </summary>
        const int ClearTableMarker = 256;
        
/// <summary>
        
/// End Of Data Marker. 
        
/// </summary>
        const int EOD = 257;

        
/// <summary>
        
/// Encodes the specified input.
        
/// </summary>
        
/// <param name="input">The input.</param>
        
/// <param name="output">The output.</param>
        public static void Encode(Stream input, Stream output)
        {
            MemoryStream memStream 
= new MemoryStream();
            BitStream outStream 
= new BitStream(memStream);
            
// <code, numbits>
            foreach (Pair<intint> code in Analyze(input))
            {
                outStream.WriteBitsBigEndian(code.Left, code.Right);
            }
            outStream.Flush();
            memStream.Position 
= 0;
            BitOrder.Reverse(memStream, output);
        }

        
private static void InitializeTable(Dictionary<Phrase, int> Table)
        {
            Table.Clear();
            
for (int i = 0; i <= 257; i++)
            {
                Table.Add(
new Phrase(i), i);
            }
        }

        
private static IEnumerable<Pair<intint>> Analyze(Stream input)
        {
            Dictionary
<Phrase, int> Table = new Dictionary<Phrase, int>();
            InitializeTable(Table);
            
int numbits = 9;
            
yield return new Pair<intint>(ClearTableMarker, numbits);

            Phrase lastPhrase 
= new Phrase(-1);
            Phrase currentPhrase 
= new Phrase();
            
while (true)
            {
                
int symbol = input.ReadByte();

                
int wordIndex = Table.ContainsKey(lastPhrase) ? Table[lastPhrase] : -1;

                
if (symbol == -1)
                {
                    
yield return new Pair<intint>(wordIndex, numbits);
                    
break;
                }

                currentPhrase.PrefixIndex 
= wordIndex;
                currentPhrase.Symbol 
= symbol;

                
if (Table.ContainsKey(currentPhrase))
                {
                    lastPhrase 
= currentPhrase;
                }
                
else
                {
                    
yield return new Pair<intint>(wordIndex, numbits);
                    
if (Table.Count < 4096)
                    {
                        Table.Add(currentPhrase, Table.Count);
                        
if (numbits == 9 && Table.Count > 511)
                        {
                            numbits 
= 10;
                        }
                        
else if (numbits == 10 && Table.Count > 1023)
                        {
                            numbits 
= 11;
                        }
                        
else if (numbits == 11 && Table.Count > 2047)
                        {
                            numbits 
= 12;
                        }
                    }
                    
else
                    {
                        InitializeTable(Table);
                        
yield return new Pair<intint>(ClearTableMarker, numbits);
                        numbits 
= 9;
                    }
                    lastPhrase.Symbol 
= symbol;
                    lastPhrase.PrefixIndex 
= -1;
                }
            }
            
yield return new Pair<intint>(EOD, numbits);
        }
    }

 

 

 

LZW_Decode
 /// <summary>
    
/// Lzw Decompress algorithm.
    
/// </summary>
    public partial class Lzw
    {
        
/// <summary>
        
/// Decodes the specified input.
        
/// </summary>
        
/// <param name="input">The input.</param>
        
/// <param name="output">The output.</param>
        public static void Decode(Stream input, Stream output)
        {
            
long pos = input.Position;
            MemoryStream memStream 
= BitOrder.Reverse(input);
            BitStream inStream 
= new BitStream(memStream);

            Dictionary
<int, Phrase> Table = new Dictionary<int, Phrase>();
            Phrase phrase 
= new Phrase();
            
int wordIndex = -1;
            
int numbits = 9;
            
while (true)
            {
                
int code = inStream.ReadBitsBigEndian(numbits);
                
if (code == ClearTableMarker)
                {
                    InitializeTable(Table);
                    wordIndex 
= -1;
                    numbits 
= 9;
                }
                
else if (code == EOD || code == -1)
                {
                    
break;
                }
                
else
                {
                    
if (code < 256)
                    {
                        phrase.PrefixIndex 
= wordIndex;
                        phrase.Symbol 
= code;
                        
if (wordIndex != -1)
                        {
                            Table.Add(Table.Count, phrase);
                        }
                        output.WriteByte((
byte)code);
                    }
                    
else
                    {
                        
if (Table.ContainsKey(code))
                        {
                            
int head = GetHead(Table[code], Table);
                            phrase.PrefixIndex 
= wordIndex;
                            phrase.Symbol 
= head;
                            Table.Add(Table.Count, phrase);
                        }
                        
else
                        {
                            
int head = GetHead(Table[wordIndex], Table);
                            phrase.PrefixIndex 
= wordIndex;
                            phrase.Symbol 
= head;
                            
//Table.Count == code
                            Table.Add(Table.Count, phrase);
                        }
                        Output(code, Table, output);
                    }
                    
if (numbits == 9 && Table.Count > 510)
                    {
                        numbits 
= 10;
                    }
                    
else if (numbits == 10 && Table.Count > 1022)
                    {
                        numbits 
= 11;
                    }
                    
else if (numbits == 11 && Table.Count > 2046)
                    {
                        numbits 
= 12;
                    }
                    wordIndex 
= code;
                }
            }
            input.Position 
= pos + inStream.Position;
        }

        
private static int GetHead(Phrase phrase, Dictionary<int, Phrase> Table)
        {
            
if (phrase.PrefixIndex == -1)
            {
                
return phrase.Symbol;
            }
            
else
            {
                Phrase entry 
= Table[phrase.PrefixIndex];
                
while (entry.PrefixIndex != -1)
                {
                    entry 
= Table[entry.PrefixIndex];
                }
                
return entry.Symbol;
            }
        }

        
private static void Output(int code, Dictionary<int, Phrase> Table, Stream output)
        {
            List
<byte> symbols = new List<byte>();
            Phrase entry 
= Table[code];
            
while (entry.PrefixIndex != -1)
            {
                symbols.Add((
byte)entry.Symbol);
                entry 
= Table[entry.PrefixIndex];
            }
            symbols.Add((
byte)entry.Symbol);
            symbols.Reverse();
            output.Write(symbols.ToArray(), 
0, symbols.Count);
        }

        
private static void InitializeTable(Dictionary<int, Phrase> Table)
        {
            Table.Clear();
            
for (int i = 0; i <= 257; i++)
            {
                Table.Add(i, 
new Phrase(i));
            }
        }
    }

 

 

 

BitOrder
    /// <summary>
    
/// Revers BitOrder of bytes
    
/// </summary>
    public class BitOrder
    {
        
static readonly byte[] BitReverseTable = 
{
  
0x000x800x400xC00x200xA00x600xE00x100x900x500xD00x300xB00x700xF0
  
0x080x880x480xC80x280xA80x680xE80x180x980x580xD80x380xB80x780xF8
  
0x040x840x440xC40x240xA40x640xE40x140x940x540xD40x340xB40x740xF4
  
0x0C0x8C0x4C0xCC0x2C0xAC0x6C0xEC0x1C0x9C0x5C0xDC0x3C0xBC0x7C0xFC
  
0x020x820x420xC20x220xA20x620xE20x120x920x520xD20x320xB20x720xF2
  
0x0A0x8A0x4A0xCA0x2A0xAA0x6A0xEA0x1A0x9A0x5A0xDA0x3A0xBA0x7A0xFA,
  
0x060x860x460xC60x260xA60x660xE60x160x960x560xD60x360xB60x760xF6
  
0x0E0x8E0x4E0xCE0x2E0xAE0x6E0xEE0x1E0x9E0x5E0xDE0x3E0xBE0x7E0xFE,
  
0x010x810x410xC10x210xA10x610xE10x110x910x510xD10x310xB10x710xF1,
  
0x090x890x490xC90x290xA90x690xE90x190x990x590xD90x390xB90x790xF9
  
0x050x850x450xC50x250xA50x650xE50x150x950x550xD50x350xB50x750xF5,
  
0x0D0x8D0x4D0xCD0x2D0xAD0x6D0xED0x1D0x9D0x5D0xDD0x3D0xBD0x7D0xFD,
  
0x030x830x430xC30x230xA30x630xE30x130x930x530xD30x330xB30x730xF3
  
0x0B0x8B0x4B0xCB0x2B0xAB0x6B0xEB0x1B0x9B0x5B0xDB0x3B0xBB0x7B0xFB,
  
0x070x870x470xC70x270xA70x670xE70x170x970x570xD70x370xB70x770xF7
  
0x0F0x8F0x4F0xCF0x2F0xAF0x6F0xEF0x1F0x9F0x5F0xDF0x3F0xBF0x7F0xFF
};

        
/// <summary>
        
/// Reverses the specified value.
        
/// </summary>
        
/// <param name="value">The value.</param>
        
/// <returns></returns>
        public static byte Reverse(byte value)
        {
            
return BitReverseTable[value];
        }

        
/// <summary>
        
/// Reverses the specified input.
        
/// </summary>
        
/// <param name="input">The input.</param>
        
/// <param name="output">The output.</param>
        public static void Reverse(Stream input, Stream output)
        {
            
while (true)
            {
                
int value = input.ReadByte();
                
if (value == -1break;
                output.WriteByte(BitReverseTable[value]);
            }
        }

        
/// <summary>
        
/// Reverses the specified input.
        
/// </summary>
        
/// <param name="input">The input.</param>
        
/// <returns></returns>
        public static MemoryStream Reverse(Stream input)
        {
            MemoryStream memStream 
= new MemoryStream();
            BitOrder.Reverse(input, memStream);
            memStream.Position 
= 0;
            
return memStream;
        }
    }

 

 

 

BitStream
    /// <summary>
    
/// BitStream.
    
/// </summary>
    public class BitStream
    {
        
/// <summary>
        
/// BufferedStream
        
/// </summary>
        BufferedStream stream;

        
/// <summary>
        
/// Initializes a new instance of the <see cref="BitStream"/> class.
        
/// </summary>
        
/// <param name="stream">The stream.</param>
        public BitStream(Stream stream)
        {
            
//this.stream = stream;
            this.stream = new BufferedStream(stream);
        }

        
/// <summary>
        
/// Gets the length.
        
/// </summary>
        
/// <value>The length.</value>
        public long Length
        {
            
get { return stream.Length; }
        }

        
/// <summary>
        
/// Gets or sets the position.
        
/// </summary>
        
/// <value>The position.</value>
        public long Position
        {
            
get { return stream.Position; }
            
set { stream.Position = value; }
        }

        
/// <summary>
        
/// Gets a value indicating whether this instance is end reached.
        
/// </summary>
        
/// <value>
        
///     <c>true</c> if this instance is end reached; otherwise, <c>false</c>.
        
/// </value>
        public bool IsEndReached
        {
            
get { return Position == Length; }
        }

        BitArray bitBuffer;
        
/// <summary>
        
/// bitsRead
        
/// </summary>
        public int bitsRead = 0;
        
int bitsRemained = 0;

        
/// <summary>
        
/// Read num bits from stream, return as a machine integer stored with the most-significant bit first
        
/// return -1 if not enough bits remained.
        
/// </summary>
        
/// <param name="num"></param>
        
/// <returns></returns>
        public int ReadBitsBigEndian(int num)
        {
            
if (num < 1 || num > 16)
            {
                
throw new ArgumentOutOfRangeException("number of bits");
            }
            
if (num <= bitsRemained)
            {
                
int result = GetBinaryInteger(bitBuffer, bitsRead, num);
                bitsRead 
+= num;
                bitsRemained 
-= num;
                
return result;
            }
            
else
            {
                
int count = num - bitsRemained;
                
int bytesToRead = count <= 8 ? 1 : 2;
                
byte[] data = ReadBytes(bytesToRead);
                
if (data.Length == bytesToRead)
                {
                    
int result = GetBinaryInteger(bitBuffer, bitsRead, bitsRemained);
                    bitBuffer 
= new BitArray(data);
                    result 
= GetBinaryInteger(result, bitBuffer, 0, count);
                    bitsRead 
= count;
                    bitsRemained 
= bytesToRead * 8 - count;
                    
return result;
                }
                
else
                {
                    
return -1;
                }
            }
        }

        
/// <summary>
        
/// Gets the binary integer.
        
/// </summary>
        
/// <param name="array">The array.</param>
        
/// <param name="start">The start.</param>
        
/// <param name="count">The count.</param>
        
/// <returns></returns>
        private static int GetBinaryInteger(BitArray array, int start, int count)
        {
            
return GetBinaryInteger(0, array, start, count);
        }

        
/// <summary>
        
/// Gets the binary integer.
        
/// </summary>
        
/// <param name="initial">The initial.</param>
        
/// <param name="array">The array.</param>
        
/// <param name="start">The start.</param>
        
/// <param name="count">The count.</param>
        
/// <returns></returns>
        private static int GetBinaryInteger(int initial, BitArray array, int start, int count)
        {
            
int result = initial;
            
for (int n = start; n < start + count; n++)
            {
                
int bit = array[n] ? 1 : 0;
                result 
= result * 2 + bit;
            }
            
return result;
        }

        
/// <summary>
        
/// Read Bits Little Endian
        
/// </summary>
        
/// <param name="num"></param>
        
/// <returns></returns>
        public int ReadBits(int num)
        {
            
int result = ReadBitsBigEndian(num);
            BitArray bits 
= new BitArray(BitConverter.GetBytes(result));
            
return GetBinaryInteger(0, bits, 0, num);
        }

        
/// <summary>
        
/// Read one bit.
        
/// </summary>
        
/// <returns></returns>
        public int ReadBit()
        {
            
return ReadBitsBigEndian(1);
        }

        
/// <summary>
        
/// go to the next byte boundary, former unread bits are ignored.
        
/// </summary>
        public void GotoNextByte()
        {
            bitsRead 
= 0;
            bitsRemained 
= 0;
        }

        
/// <summary>
        
/// Reads the byte.
        
/// </summary>
        
/// <returns></returns>
        public byte ReadByte()
        {
            
return (byte)stream.ReadByte();
        }

        
/// <summary>
        
/// Reads the byte.
        
/// </summary>
        
/// <param name="offset">The offset.</param>
        
/// <returns></returns>
        public int ReadByte(long offset)
        {
            stream.Position 
= offset;
            
return stream.ReadByte();
        }

        
/// <summary>
        
/// Reads the bytes.
        
/// </summary>
        
/// <param name="count">The count.</param>
        
/// <returns></returns>
        public byte[] ReadBytes(int count)
        {
            
byte[] data = new byte[count];
            
int bytesRead = stream.Read(data, 0, count);
            
if (bytesRead == count)
            {
                
return data;
            }
            
else
            {
                
byte[] bytes = new byte[bytesRead];
                
if (bytesRead > 0)
                {
                    Array.Copy(data, bytes, bytesRead);
                }
                
return bytes;
            }
        }

        
/// <summary>
        
/// Reads to end.
        
/// </summary>
        
/// <returns></returns>
        public byte[] ReadToEnd()
        {
            
int bytesRemained = (int)(Length - Position);
            
return ReadBytes(bytesRemained);
        }

        
/// <summary>
        
/// Peeks the bytes.
        
/// </summary>
        
/// <param name="count">The count.</param>
        
/// <returns></returns>
        public byte[] PeekBytes(int count)
        {
            
return PeekBytes(this.Position, count);
        }

        
/// <summary>
        
/// Peeks the bytes.
        
/// </summary>
        
/// <param name="offset">The offset.</param>
        
/// <param name="length">The length.</param>
        
/// <returns></returns>
        public byte[] PeekBytes(long offset, int length)
        {
            
long pos = this.Position;
            
this.Position = offset;
            
byte[] data = ReadBytes(length);
            
this.Position = pos;
            
return data;
        }

        
/// <summary>
        
/// Reads the UInt16.
        
/// </summary>
        
/// <returns></returns>
        public ushort ReadUInt16()
        {
            
byte[] data = ReadBytes(2);
            
return BitConverter.ToUInt16(data, 0);
        }

        
uint writebuffer = 0;
        
/// <summary>
        
/// 
        
/// </summary>
        public int bitsWritten = 0;
        
/// <summary>
        
/// Writes the bits.
        
/// </summary>
        
/// <param name="value">The value.</param>
        
/// <param name="num">The number of bits.</param>
        public void WriteBits(int value, int num)
        {
            
if (num == 0return;
            BitArray bits 
= new BitArray(BitConverter.GetBytes(value));
            
for (int i = 0; i < num; i++)
            {
                
uint bit = bits[i] ? 0x80000000 : 0;
                writebuffer 
= (writebuffer >> 1| bit;

                bitsWritten
++;
                
if (bitsWritten == 32)
                {
                    WriteBytes(BitConverter.GetBytes(writebuffer));
                    ClearWriteBuffer();
                }
            }
        }

        
/// <summary>
        
/// Writes the bits big endian.
        
/// </summary>
        
/// <param name="value">The value.</param>
        
/// <param name="num">The number of bits.</param>
        public void WriteBitsBigEndian(int value, int num)
        {
            BitArray bits 
= new BitArray(BitConverter.GetBytes(value));
            
int result = GetBinaryInteger(0, bits, 0, num);
            WriteBits(result, num);
        }

        
/// <summary>
        
/// Writes the bit.
        
/// </summary>
        
/// <param name="bit">if set to <c>true</c> [bit].</param>
        public void WriteBit(bool bit)
        {
            WriteBit(bit 
? 1 : 0);
        }

        
/// <summary>
        
/// Writes the bit.
        
/// </summary>
        
/// <param name="bit">The bit.</param>
        public void WriteBit(int bit)
        {
            WriteBits(bit, 
1);
        }

        
private void ClearWriteBuffer()
        {
            writebuffer 
= 0;
            bitsWritten 
= 0;
        }

        
/// <summary>
        
/// Flush bits in buffer, zero bits are apended to form a byte border.
        
/// Or flush read when reading.
        
/// </summary>
        public void Flush()
        {
            
if (bitsWritten > 0)
            {
                writebuffer 
= writebuffer >> (32 - bitsWritten);
                
byte[] bytes = BitConverter.GetBytes(writebuffer);
                
int count = bitsWritten / 8;
                
if (bitsWritten % 8 != 0)
                {
                    count
++;
                }
                stream.Write(bytes, 
0, count);
                ClearWriteBuffer();
            }
            stream.Flush();
        }

        
/// <summary>
        
/// Writes the byte.
        
/// </summary>
        
/// <param name="value">The value.</param>
        public void WriteByte(byte value)
        {
            stream.WriteByte(value);
        }

        
/// <summary>
        
/// Writes the bytes.
        
/// </summary>
        
/// <param name="data">The data.</param>
        public void WriteBytes(byte[] data)
        {
            stream.Write(data, 
0, data.Length);
        }

        
/// <summary>
        
/// Writes the bytes.
        
/// </summary>
        
/// <param name="data">The data.</param>
        
/// <param name="offset">The offset.</param>
        
/// <param name="count">The count.</param>
        public void WriteBytes(byte[] data, int offset, int count)
        {
            stream.Write(data, offset, count);
        }
    }

 

 

 

posted @ 2010-01-13 16:08  刘俊峰  阅读(1169)  评论(3编辑  收藏  举报