# 使用F#来实现哈夫曼编码吧

F#的算法实现这种东西本身不好找，不过M\$似乎有着预见性，得来全不费功夫。。。

open System

/// 哈夫曼编码使用了一个叶子节点为输入符号，
/// 内部节点是他们所有符号组合的期望频率的
/// 二叉树。
type HuffmanTree =
| Leaf of char * float
| Node of float * HuffmanTree * HuffmanTree

/// 为包含给定符号的字符串和期望的频率提供编码和解码
type HuffmanCoder(symbols: seq<char>, frequencies : seq<float>) =

/// 从输入的频率构建一个哈夫曼编码树
let huffmanTreeLeafs =
Seq.zip symbols frequencies
|> Seq.toList
|> List.map Leaf

/// 用于从哈夫曼编码树的节点获取频率
let frequency node =
match node with
| Leaf(_,p) -> p
| Node(p,_,_) -> p

/// 从根节点列表构建一个哈夫曼编码树，遍历它直到唯一根节点
let rec buildCodeTree roots =
match roots |> List.sortBy frequency with
| [] -> failwith "Cannot build a Huffman Tree for no inputs"
| [node] -> node
| least::nextLeast::rest ->
let combinedFrequency = frequency least + frequency nextLeast
let newNode = Node(combinedFrequency, least, nextLeast)
buildCodeTree (newNode::rest)

let tree = buildCodeTree huffmanTreeLeafs

/// 为哈夫曼编码树的所有叶子构建哈夫曼编码表
let huffmanCodeTable =
let rec huffmanCodes tree =
match tree with
| Leaf (c,_) -> [(c, [])]
| Node (_, left, right) ->
let leftCodes = huffmanCodes left |> List.map (fun (c, code) -> (c, true::code))
let rightCodes = huffmanCodes right |> List.map (fun (c, code) -> (c, false::code))
List.append leftCodes rightCodes
huffmanCodes tree
|> List.map (fun (c,code) -> (c,List.toArray code))
|> Map.ofList

/// 使用哈夫曼编码表编码字符串
let encode (str:string) =
let encodeChar c =
match huffmanCodeTable |> Map.tryFind c with
| Some bits -> bits
| None -> failwith "No frequency information provided for character '%A'" c
str.ToCharArray()
|> Array.map encodeChar
|> Array.concat

/// 使用哈夫曼编码树将一个二进制数组解码为字符串
let decode bits =
let rec decodeInner bitsLeft treeNode result =
match bitsLeft, treeNode with
| [] , Node (_,_,_) -> failwith "Bits provided did not form a complete word"
| [] , Leaf (c,_) ->  (c:: result) |> List.rev |> List.toArray
| _  , Leaf (c,_) -> decodeInner bitsLeft tree (c::result)
| b::rest , Node (_,l,r)  -> if b
then decodeInner rest l result
else decodeInner rest r result
let bitsList = Array.toList bits
new String (decodeInner bitsList tree [])

member coder.Encode source = encode source
member coder.Decode source = decode source

## 模式匹配

match tree with
| Leaf (c,_) -> //...
| Node (_, left, right) -> //...

Warning: Incomplete pattern matches on this expression. The value '([],Node (_, _, _))' will not be matched 

## 集合

• F# 列表：不可变链表，在一个用到了列表的递归算法里用到了。
• F# 映射：不可变字典，用于存储每个符号。
• F# 序列 = .NET "IEnumerable"：基本集合的接口，用于输入。
• .NET 数组：基本类型的数组用于输出编码。

## “这是.NET的一部分！”又曰“华而不实”

1. 把所有的内容拍到一个级别
2. 把代码包裹起来：

type HuffmanCoder(symbols : seq<char>, frequencies : seq<float>) =

// 要包裹的代码...

member coder.Encode source = encode source
member coder.Decode source = decode source

public class HuffmanCoder
{
public HuffmanCoder(IEnumerable<char> symbols, IEnumerable<double> frequencies);

public string Decode(bool[] source);
public bool[] Encode(string source);
}

## 嗯！

posted @ 2015-06-20 01:13 Johnwii 阅读(...) 评论(...) 编辑 收藏