霍夫曼编码是一种特殊类型的最佳前缀编码.通常用于无损数据压缩.一种无损数据压
缩算法.该算法为输入字符分配可变长度代码.分配的代码长度基于相应字符出现的频
率.出现最频繁的字符得到最小的代码.出现最不频繁的字符得到最大的代码.
1.特点:
从输入字符构建霍夫曼树.
遍历霍夫曼树并将代码分配给字符.
2.步骤:
输入是一组唯一字符及其出现频率.输出是霍夫曼树.
为每个唯一字符创建一个叶子节点.并构建所有叶子节点的最小堆.
从最小堆中提取出现频率最低的两个节点.
创建一个新的内部节点.其出现频率等于两个节点频率之和.将第一个提取的节点作为其左子树.将另一个提取的节点作为其右子树.将其内部节点添加到最小堆.
重复步骤2和3.直到堆中只包含一个节点.剩下的节点是根节点.树是完整的.
3.实现:
3.1方法:
package data
import (
"container/heap"
"fmt"
)
// 接口.
type HuffmanTree interface {
Freq() int
}
// 子节点
type HuffmanLeaf struct {
freq int
value rune
}
func (h HuffmanLeaf) Freq() int {
return h.freq
}
// 节点及方法.
type HuffmanNode struct {
freq int
left, right HuffmanTree
}
func (h HuffmanNode) Freq() int {
return h.freq
}
// 最小堆及方法.
type minHeap []HuffmanTree
func (th minHeap) Len() int {
return len(th)
}
func (th minHeap) Less(i, j int) bool {
return th[i].Freq() < th[j].Freq()
}
func (th *minHeap) Push(ele interface{}) {
*th = append(*th, ele.(HuffmanTree))
}
func (th *minHeap) Pop() (popped interface{}) {
popped = (*th)[len(*th)-1]
*th = (*th)[:len(*th)-1]
return
}
func (th minHeap) Swap(i, j int) {
th[i], th[j] = th[j], th[i]
}
// 构建霍夫曼树.
func BuildTree(symFreqs map[rune]int) HuffmanTree {
var trees minHeap
for c, f := range symFreqs {
trees = append(trees, HuffmanLeaf{f, c})
}
heap.Init(&trees)
for trees.Len() > 1 {
//出现频率最低的两棵树.
a := heap.Pop(&trees).(HuffmanTree)
b := heap.Pop(&trees).(HuffmanTree)
//放入新节点并重新插入队列.
heap.Push(&trees, HuffmanNode{a.Freq() + b.Freq(), a, b})
}
return heap.Pop(&trees).(HuffmanTree)
}
// 打印编码.
func PrintCodes(tree HuffmanTree, prefix []byte) {
switch i := tree.(type) {
case HuffmanLeaf:
//如果是一个叶子节点.包含一个输入字符.从byte[]打印字符及代码.
fmt.Printf("%c\t%d\t%s\n", i.value, i.freq, string(prefix))
case HuffmanNode:
//将0赋值给左边缘并重复出现.
prefix = append(prefix, '0')
PrintCodes(i.left, prefix)
prefix = prefix[:len(prefix)-1]
//将1赋值给右边缘并重复出现.
prefix = append(prefix, '1')
PrintCodes(i.right, prefix)
prefix = prefix[:len(prefix)-1]
}
}
```
```
#### ***3.2main方法:***
```
func main() {
test := "abcdefghijk"
sysmFreqs := make(map[rune]int)
for _, c := range test {
sysmFreqs[c]++
}
tree := data.BuildTree(sysmFreqs)
data.PrintCodes(tree, []byte{})
}
```
### 4.实战:
#### 4.1方法:
```js
```
// 节点的值类型.
type ValueType int32
// 树中节点
type HuffNode struct {
Parent *HuffNode
Left *HuffNode
Right *HuffNode
Count int
Value ValueType
}
func (node *HuffNode) Code(r uint64, bits byte) {
for parent := node.Parent; parent != nil; node, parent = parent, parent.Parent {
if parent.Right == node {
//位1.
r |= 1 << bits
}
//否则位0与r无关.
bits++
}
return
}
type SortNodes []*HuffNode
func (sn SortNodes) Len() int {
return len(sn)
}
func (sn SortNodes) Less(i, j int) bool {
return sn[i].Count < sn[j].Count
}
func (sn SortNodes) Swap(i, j int) {
sn[i], sn[j] = sn[j], sn[i]
}
// 从指定的叶子节点构建.
func Build(leaves []*HuffNode) *HuffNode {
//排序一次.二进制插入.
sort.Stable(SortNodes(leaves))
return BuildSorted(leaves)
}
// 从必须按node.count排序的指定叶子节点构建.
func BuildSorted(leaves []*HuffNode) *HuffNode {
if len(leaves) == 0 {
return nil
}
for len(leaves) > 1 {
left, right := leaves[0], leaves[1]
parentCount := left.Count + right.Count
parent := &HuffNode{Left: left, Right: right, Count: parentCount}
left.Parent = parent
right.Parent = parent
ls := leaves[2:]
idx := sort.Search(len(ls), func(i int) bool {
return ls[i].Count >= parentCount
})
idx += 2
copy(leaves[1:], leaves[2:idx])
leaves[idx-1] = parent
leaves = leaves[1:]
}
return leaves[0]
}
func Print(root *HuffNode) {
var traverse func(n *HuffNode, code uint64, bits byte)
traverse = func(n *HuffNode, code uint64, bits byte) {
if n.Left == nil {
fmt.Printf("'%c':%0"+strconv.Itoa(int(bits))+"b\n", n.Value, code)
return
}
bits++
traverse(n.Left, code<<1, bits)
traverse(n.Right, code<<1+1, bits)
}
traverse(root, 0, 0)
}
```
```
#### 4.2mian方法:
```
func main() {
leaves := []*data.HuffNode{
{Value: ' ', Count: 20},
{Value: 'a', Count: 40},
{Value: 'm', Count: 10},
{Value: 'l', Count: 7},
{Value: 'f', Count: 8},
{Value: 't', Count: 15},
}
root := data.Build(leaves)
data.Print(root)
}
```
小楼又东风.
如果大家喜欢我的分享的话.可以关注我的微信公众号
念何架构之路