Huffman编码树实现

在Huffman编码树是基于加权最短路径树,具体定义见:Huffman Coding 

Huffman编码树的实现过程

  1. 输入各个字符已经相应的字符权重
  2. 构建Huffman编码树节点向量
  3. 在向量中查找权重最小的两个节点,新建一个新的节点,其左右子树是两节点,权重为两个子树权重之和
  4. 添加到向量中,并删除两个字数
  5. 重复3-4,直到只有一个节点

注意点:

  • 查找两个最小节点的采用的是 Divide and Conquer ,算法复杂度为(logN), 如果采用遍历的方法算法复杂度达(2n).  将向量均分成两部分,分别求得每个部分的最小的两个Tuple1(Min1, Min2),和Tuple2(Min1,Min2), 将比较这两个部分的最小的两个,递归基为向量的个数小于4个

/// <summary>
       /// 递归基,平凡情况,如果只有两个或者三个要素时
       /// </summary>
       /// <param name="huffchars"></param>
       /// <param name="lo"></param>
       /// <param name="hi"></param>
       /// <returns></returns>
       private  Tuple<Int32, Int32> TrivialTwoMin(IVector<BinNode<HuffChar>> huffchars, Int32 lo, int hi)
       {
           int first = huffchars[lo].Data.Weight < huffchars[lo + 1].Data.Weight ? lo : lo + 1;
           int second = first == lo ? lo + 1 : lo;
           for (int i = lo + 2; i < hi; i++)
           {
               if (huffchars[i].Data.Weight < huffchars[second].Data.Weight)
               {
                   if (huffchars[i].Data.Weight < huffchars[first].Data.Weight)
                   {
                       second = first;
                       first = i;
                   }
                   else
                   {
                       second = i;
                   }
               }
           }
           return new Tuple<int, int>(first, second);
       }

       /// <summary>
       /// 递归迭代版查找最小的两个要素
       /// </summary>
       /// <param name="huffchars"></param>
       /// <param name="lo"></param>
       /// <param name="hi"></param>
       /// <returns></returns>
       private  Tuple<Int32, Int32> FindTwoMin(IVector<BinNode<HuffChar>> huffchars, Int32 lo, Int32 hi)
       {
           if (hi - lo <= 3)
           {
               return TrivialTwoMin(huffchars, lo, hi);
           }
           int mi = (hi + lo) >> 1;
           Tuple<Int32, Int32> firstPart = FindTwoMin(huffchars, lo, mi);
           Tuple<Int32, Int32> secondPart = FindTwoMin(huffchars, mi, hi);
           if (huffchars[firstPart.Item1].Data.Weight < huffchars[secondPart.Item1].Data.Weight)
           {
               return huffchars[firstPart.Item2].Data.Weight < huffchars[secondPart.Item1].Data.Weight ?
                   firstPart :
                   new Tuple<int, int>(firstPart.Item1, secondPart.Item1);
           }
           return
               huffchars[secondPart.Item2].Data.Weight < huffchars[firstPart.Item1].Data.Weight ?
                   secondPart :
                   new Tuple<int, int>(secondPart.Item1, firstPart.Item1);
       }

  • 获取各个叶节点的编码。遍历整个二叉树,判断如果当前节点为叶节点,通过Parent指针一直到根节点,左孩子为0,右孩子为1,获取每个字符串的编码

/// <summary>
       /// 遍历整个二叉树
       /// </summary>
       private void BuildCodeMap()
       {
           _huffmanRoot.TravIn(BuildCodeMap);
       }

       /// <summary>
       /// 如果是叶节点
       /// </summary>
       /// <param name="node"></param>
       private void BuildCodeMap(BinNode<HuffChar> node)
       {
           if (!node.HasBothChild)
           {
               char code = node.Data.Ch;
               string s = string.Empty;
               while (node!=_huffmanRoot)
               {
                   if (node.IsLChild)
                   {
                       s += "0";
                   }
                   else
                   {
                       s += "1";
                   }
                   node = node.Parent;
               }
               _charCodeMap.Add(code,new string(s.Reverse().ToArray()));
           }
       }

完整代码 (github: github/DataStructure

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;

namespace Sequence
{
    public class HuffTree
    {
        /// <summary>
        /// 字符和其权重
        /// </summary>
        private readonly Dictionary<char, double> _charweights;

        /// <summary>
        /// 字符及其对应的编码
        /// </summary>
        private readonly Dictionary<char, string> _charCodeMap;

        /// <summary>
        /// Huffman编码树的根节点
        /// </summary>
        private BinNode<HuffChar> _huffmanRoot;

        /// <summary>
        /// 构造函数
        /// </summary>
        /// <param name="charweights">一个字典</param>
        public HuffTree(Dictionary<char, double> charweights)
        {
            _charweights = charweights;
            _charCodeMap=new Dictionary<char, string>();
            BuildTree();
            BuildCodeMap();
        }

        /// <summary>
        /// 构造函数
        /// </summary>
        /// <param name="nodes"></param>
        /// <param name="weights"></param>
        public HuffTree(IEnumerable<char> nodes,IEnumerable<Double> weights)
        {
            if(nodes.Count()!=weights.Count())
                throw new ArgumentException("The char set count is not equal to weight's count");
            var nodesList = nodes.ToList();
            var weightList = weights.ToList();
            _charweights=new Dictionary<char, double>();
            for (int i = 0; i < nodesList.Count; i++)
            {
                if (_charweights.ContainsKey(nodesList[i]))
                {
                    throw new ArgumentException("The char set must not eqaul by each");
                }
                _charweights.Add(nodesList[i],weightList[i]);
            }
            _charCodeMap=new Dictionary<char, string>();
            BuildTree();
            BuildCodeMap();
        }


        #region 构建Huffman树
        private void BuildTree()
        {
            IVector<BinNode<HuffChar>> huffchars = InitHuffChars();
            while (huffchars.Size > 1)
            {
                Tuple<Int32, Int32> twoMin = FindTwoMin(huffchars, 0, huffchars.Size);
                int first = Math.Min(twoMin.Item1, twoMin.Item2);
                int second = Math.Max(twoMin.Item1, twoMin.Item2);
                var node1 = huffchars.Remove(second);
                var node2 = huffchars.Remove(first);

                BinNode<HuffChar> newNode = new BinNode<HuffChar>(
                    new HuffChar('^', node1.Data.Weight + node2.Data.Weight),null,node1,node2);
                node1.Parent = newNode;
                node2.Parent = newNode;
                huffchars.Insert(newNode);
            }
            _huffmanRoot = huffchars[0];
        }
        private  IVector<BinNode<HuffChar>> InitHuffChars()
        {
            IVector<BinNode<HuffChar>> huffChars =
                Vector<BinNode<HuffChar>>.VectorFactory();
            foreach (var charItem in _charweights.Keys)
            {
                huffChars.Insert(new BinNode<HuffChar>(
                    new HuffChar(charItem, _charweights[charItem])));
            }
            return huffChars;
        }

        /// <summary>
        /// 递归基,平凡情况,如果只有两个或者三个要素时
        /// </summary>
        /// <param name="huffchars"></param>
        /// <param name="lo"></param>
        /// <param name="hi"></param>
        /// <returns></returns>
        private  Tuple<Int32, Int32> TrivialTwoMin(IVector<BinNode<HuffChar>> huffchars, Int32 lo, int hi)
        {
            int first = huffchars[lo].Data.Weight < huffchars[lo + 1].Data.Weight ? lo : lo + 1;
            int second = first == lo ? lo + 1 : lo;
            for (int i = lo + 2; i < hi; i++)
            {
                if (huffchars[i].Data.Weight < huffchars[second].Data.Weight)
                {
                    if (huffchars[i].Data.Weight < huffchars[first].Data.Weight)
                    {
                        second = first;
                        first = i;
                    }
                    else
                    {
                        second = i;
                    }
                }
            }
            return new Tuple<int, int>(first, second);
        }

        /// <summary>
        /// 递归迭代版查找最小的两个要素
        /// </summary>
        /// <param name="huffchars"></param>
        /// <param name="lo"></param>
        /// <param name="hi"></param>
        /// <returns></returns>
        private  Tuple<Int32, Int32> FindTwoMin(IVector<BinNode<HuffChar>> huffchars, Int32 lo, Int32 hi)
        {
            if (hi - lo <= 3)
            {
                return TrivialTwoMin(huffchars, lo, hi);
            }
            int mi = (hi + lo) >> 1;
            Tuple<Int32, Int32> firstPart = FindTwoMin(huffchars, lo, mi);
            Tuple<Int32, Int32> secondPart = FindTwoMin(huffchars, mi, hi);
            if (huffchars[firstPart.Item1].Data.Weight < huffchars[secondPart.Item1].Data.Weight)
            {
                return huffchars[firstPart.Item2].Data.Weight < huffchars[secondPart.Item1].Data.Weight ?
                    firstPart :
                    new Tuple<int, int>(firstPart.Item1, secondPart.Item1);
            }
            return
                huffchars[secondPart.Item2].Data.Weight < huffchars[firstPart.Item1].Data.Weight ?
                    secondPart :
                    new Tuple<int, int>(secondPart.Item1, firstPart.Item1);
        }
        #endregion


        #region Build char code Map

        /// <summary>
        /// 遍历整个二叉树
        /// </summary>
        private void BuildCodeMap()
        {
            _huffmanRoot.TravIn(BuildCodeMap);
        }

        /// <summary>
        /// 如果是叶节点
        /// </summary>
        /// <param name="node"></param>
        private void BuildCodeMap(BinNode<HuffChar> node)
        {
            if (!node.HasBothChild)
            {
                char code = node.Data.Ch;
                string s = string.Empty;
                while (node!=_huffmanRoot)
                {
                    if (node.IsLChild)
                    {
                        s += "0";
                    }
                    else
                    {
                        s += "1";
                    }
                    node = node.Parent;
                }
                _charCodeMap.Add(code,new string(s.Reverse().ToArray()));
            }
        }
        #endregion

        /// <summary>
        /// 将某个文本加密
        /// </summary>
        /// <param name="text"></param>
        /// <returns></returns>
        public String Encode(string text)
        {
            StringBuilder sb=new StringBuilder();
            foreach (var item in text)
            {
                if(!_charCodeMap.ContainsKey(item))
                    throw new ArgumentException("The some one char does not exist in char set");
                sb.Append(_charCodeMap[item]);
            }
            return sb.ToString();
        }

        /// <summary>
        /// 将某个密文解码
        /// </summary>
        /// <param name="code"></param>
        /// <returns></returns>
        public string Decode(string code)
        {
            StringBuilder sb=new StringBuilder();
            BinNode<HuffChar> hot=_huffmanRoot;
            bool backToRoot = true;
            foreach (char t in code)
            {
                if (backToRoot)
                {
                    hot = _huffmanRoot;
                }
                if (hot == null)
                {
                    throw new ArgumentException("the code is not illeagl");
                }
                if (t == '0')
                {
                    hot = hot.LChild;
                    if (!hot.HasBothChild)
                    {
                        sb.Append(hot.Data.Ch);
                        backToRoot = true;
                    }
                    else
                    {
                        backToRoot = false;
                    }
                }
                else
                {
                    hot = hot.RChild;
                    if (!hot.HasBothChild)
                    {
                        sb.Append(hot.Data.Ch);
                        backToRoot = true;
                    }
                    else
                    {
                        backToRoot = false;
                    }
                }
            }
            return sb.ToString();
        }

    }
}

posted @ 2016-05-17 19:38  南瓜_waniu  阅读(323)  评论(0)    收藏  举报