Go语言数据结构和算法(三十三)霍夫曼编码

11 阅读3分钟

霍夫曼编码是一种特殊类型的最佳前缀编码.通常用于无损数据压缩.一种无损数据压

缩算法.该算法为输入字符分配可变长度代码.分配的代码长度基于相应字符出现的频

率.出现最频繁的字符得到最小的代码.出现最不频繁的字符得到最大的代码.

1.特点:

从输入字符构建霍夫曼树.

遍历霍夫曼树并将代码分配给字符.

2.步骤:

输入是一组唯一字符及其出现频率.输出是霍夫曼树.

为每个唯一字符创建一个叶子节点.并构建所有叶子节点的最小堆.

从最小堆中提取出现频率最低的两个节点.

创建一个新的内部节点.其出现频率等于两个节点频率之和.将第一个提取的节点作为其左子树.将另一个提取的节点作为其右子树.将其内部节点添加到最小堆.

重复步骤2和3.直到堆中只包含一个节点.剩下的节点是根节点.树是完整的.

3.实现:

3.1方法:

package data

import (
    "container/heap"
    "fmt"
)

// 接口.
type HuffmanTree interface {
    Freq() int
}

// 子节点
type HuffmanLeaf struct {
    freq  int
    value rune
}

func (h HuffmanLeaf) Freq() int {
    return h.freq
}

// 节点及方法.
type HuffmanNode struct {
    freq        int
    left, right HuffmanTree
}

func (h HuffmanNode) Freq() int {
    return h.freq
}

// 最小堆及方法.
type minHeap []HuffmanTree

func (th minHeap) Len() int {
    return len(th)
}

func (th minHeap) Less(i, j int) bool {
    return th[i].Freq() < th[j].Freq()
}

func (th *minHeap) Push(ele interface{}) {
    *th = append(*th, ele.(HuffmanTree))
}

func (th *minHeap) Pop() (popped interface{}) {
    popped = (*th)[len(*th)-1]
    *th = (*th)[:len(*th)-1]
    return
}

func (th minHeap) Swap(i, j int) {
    th[i], th[j] = th[j], th[i]
}

// 构建霍夫曼树.
func BuildTree(symFreqs map[rune]int) HuffmanTree {
    var trees minHeap
    for c, f := range symFreqs {
       trees = append(trees, HuffmanLeaf{f, c})
    }
    heap.Init(&trees)
    for trees.Len() > 1 {
       //出现频率最低的两棵树.
       a := heap.Pop(&trees).(HuffmanTree)
       b := heap.Pop(&trees).(HuffmanTree)
       //放入新节点并重新插入队列.
       heap.Push(&trees, HuffmanNode{a.Freq() + b.Freq(), a, b})
    }
    return heap.Pop(&trees).(HuffmanTree)
}

// 打印编码.
func PrintCodes(tree HuffmanTree, prefix []byte) {
    switch i := tree.(type) {
    case HuffmanLeaf:
       //如果是一个叶子节点.包含一个输入字符.从byte[]打印字符及代码.
       fmt.Printf("%c\t%d\t%s\n", i.value, i.freq, string(prefix))
    case HuffmanNode:
       //将0赋值给左边缘并重复出现.
       prefix = append(prefix, '0')
       PrintCodes(i.left, prefix)
       prefix = prefix[:len(prefix)-1]
       //将1赋值给右边缘并重复出现.
       prefix = append(prefix, '1')
       PrintCodes(i.right, prefix)
       prefix = prefix[:len(prefix)-1]
    }
}
```
```

#### ***3.2main方法:***

```
func main() {
	test := "abcdefghijk"
	sysmFreqs := make(map[rune]int)
	for _, c := range test {
		sysmFreqs[c]++
	}
	tree := data.BuildTree(sysmFreqs)
	data.PrintCodes(tree, []byte{})
}
```
### 4.实战:

#### 4.1方法:

```js
```
// 节点的值类型.
type ValueType int32

// 树中节点
type HuffNode struct {
    Parent *HuffNode

    Left *HuffNode

    Right *HuffNode

    Count int

    Value ValueType
}

func (node *HuffNode) Code(r uint64, bits byte) {
    for parent := node.Parent; parent != nil; node, parent = parent, parent.Parent {
       if parent.Right == node {
          //位1.
          r |= 1 << bits
       }
       //否则位0与r无关.
       bits++
    }
    return
}

type SortNodes []*HuffNode

func (sn SortNodes) Len() int {
    return len(sn)
}

func (sn SortNodes) Less(i, j int) bool {
    return sn[i].Count < sn[j].Count
}

func (sn SortNodes) Swap(i, j int) {
    sn[i], sn[j] = sn[j], sn[i]
}

// 从指定的叶子节点构建.
func Build(leaves []*HuffNode) *HuffNode {
    //排序一次.二进制插入.
    sort.Stable(SortNodes(leaves))
    return BuildSorted(leaves)
}

// 从必须按node.count排序的指定叶子节点构建.
func BuildSorted(leaves []*HuffNode) *HuffNode {
    if len(leaves) == 0 {
       return nil
    }
    for len(leaves) > 1 {
       left, right := leaves[0], leaves[1]
       parentCount := left.Count + right.Count
       parent := &HuffNode{Left: left, Right: right, Count: parentCount}
       left.Parent = parent
       right.Parent = parent
       ls := leaves[2:]
       idx := sort.Search(len(ls), func(i int) bool {
          return ls[i].Count >= parentCount
       })
       idx += 2
       copy(leaves[1:], leaves[2:idx])
       leaves[idx-1] = parent
       leaves = leaves[1:]
    }
    return leaves[0]
}

func Print(root *HuffNode) {
    var traverse func(n *HuffNode, code uint64, bits byte)

    traverse = func(n *HuffNode, code uint64, bits byte) {
       if n.Left == nil {
          fmt.Printf("'%c':%0"+strconv.Itoa(int(bits))+"b\n", n.Value, code)
          return
       }
       bits++
       traverse(n.Left, code<<1, bits)
       traverse(n.Right, code<<1+1, bits)
    }
    traverse(root, 0, 0)
}
```
```

#### 4.2mian方法:

```
func main() {
	leaves := []*data.HuffNode{
		{Value: ' ', Count: 20},
		{Value: 'a', Count: 40},
		{Value: 'm', Count: 10},
		{Value: 'l', Count: 7},
		{Value: 'f', Count: 8},
		{Value: 't', Count: 15},
	}
	root := data.Build(leaves)
	data.Print(root)
}
```


小楼又东风.

如果大家喜欢我的分享的话.可以关注我的微信公众号

念何架构之路