前言
哈夫曼编码(Huffman Coding),又称霍夫曼编码,是一种编码方式。该方法完全依据字符出现概率来构造异字头的平均长度最短的码字,有时称之为最佳编码,一般就叫做Huffman编码(有时也称为霍夫曼编码)。
哈夫曼思考
下图是学生成绩分布图:
if( sum < 60)
result = “不及格”;
else if(sum < 70)
result = “及格”;
else if(sum < 80)
result = “中等”;
else if(sum < 90)
result = “良好”;
else
result = “优秀”;
通常我们使用WPL(树的带权路径长度)来计算优质性,权重值就是学生所占比例 x 树到指定节点的路径
- 优化前:1x5 + 2x15 + 3x40 + 4x30 + 4x10 = 315
- 优化后:5x3 + 15x3 + 40x2 + 30x2 + 10x2 = 220
构建过程
我们使用传输文字内容为例:
- 排序
- 构建树
- 左孩子的路径用0表示,右孩子用1表示
- 返回新编码
代码
#include <stdio.h>
#include "string.h"
#include "stdlib.h"
const int MaxValue = 100000;//初始化射电的权值最大值
const int MaxBit = 4;//初始最大编码位数
const int MaxN = 10;//最大结点数
typedef struct HaffNode {
int weight;//权重
int flag;//是否加入到合并到树中。0:为合并; 1:合并
int parent;//双亲结点下标
int lChild;//左孩子下标
int rChild;//右孩子下标
}HaffNode;
typedef struct HaffCode{
int bit[MaxBit];//编码数组
int length;//数组长度
int weight;//权重
}HaffCode;
/**
构建哈弗曼树
*/
void haffman(int weight[], int n, HaffNode *haffTree) {
//哈夫曼树的结点度为0或者2,所以n个叶子结点,总结点数为2n-1
for (int i = 0; i < 2*n-1; i++) {
if (i < n) {
haffTree[i].weight = weight[i];
} else {
haffTree[i].weight = 0;
}
haffTree[i].parent = 0;
haffTree[i].flag = 0;
haffTree[i].lChild = -1;
haffTree[i].rChild = -1;
}
int value1, value2;
int index1, index2;
//构造哈夫曼树
for (int i = 0; i < n-1; i++) {
value1 = value2 = MaxValue;
index1 = index2 = 0;
//循环找出权重最小的两个值
int j;
for (j = 0; j < n+i; j++) {
if (haffTree[j].weight < value1 && haffTree[j].flag == 0) {
value2 = value1;
index2 = index1;
value1 = haffTree[j].weight;
index1 = j;
} else if (haffTree[j].weight < value2 && haffTree[j].flag == 0) {
value2 = haffTree[j].weight;
index2 = j;
}
}
//将两个最小值合并成一个子树
haffTree[index1].parent = n+i;
haffTree[index2].parent = n+i;
haffTree[index1].flag = 1;
haffTree[index2].flag = 1;
haffTree[n+i].weight = haffTree[index1].weight + haffTree[index2].weight;
haffTree[n+i].lChild = index1;
haffTree[n+i].rChild = index2;
}
}
/**
根据哈夫曼树创建哈夫曼编码
*/
void haffmanCode(HaffNode haffTree[], int n, HaffCode haffCode[]) {
//创建一个编码结点
HaffCode *code = (HaffCode*)malloc(sizeof(HaffCode));
int child, parent;
//遍历到权重数组长度
for (int i = 0; i< n; i++) {
code->length = 0;
code->weight = haffTree[i].weight;
child = i;
parent = haffTree[i].parent;
while (parent != 0) {
if (haffTree[parent].lChild == child) {
code->bit[code->length] = 0;//左孩子结点编码0
} else {
code->bit[code->length] = 1;//右孩子结点编码1
}
code->length ++;
child = parent;
parent = haffTree[child].parent;
}
//编码是从子结点到父节点的顺序生成的,是反的,所以要反向取编码
int temp = 0;
for (int j = code->length - 1; j>=0; j--) {
temp = code->length - j - 1;
haffCode[i].bit[temp] = code->bit[j];
}
haffCode[i].length = code->length;
haffCode[i].weight = code->weight;
}
}
运行
int main(int argc, const char * argv[]) {
// insert code here...
printf("Hello, 哈夫曼编码!\n");
int i, j, n = 4, m = 0;
//权值
int weight[] = {2,4,5,7};
//初始化哈夫曼树, 哈夫曼编码
HaffNode *myHaffTree = malloc(sizeof(HaffNode)*2*n-1);
HaffCode *myHaffCode = malloc(sizeof(HaffCode)*n);
//当前n > MaxN,表示超界. 无法处理.
if (n>MaxN)
{
printf("定义的n越界,修改MaxN!");
exit(0);
}
//1. 构建哈夫曼树
haffman(weight, n, myHaffTree);
//2.根据哈夫曼树得到哈夫曼编码
haffmanCode(myHaffTree, n, myHaffCode);
//3.
for (i = 0; i<n; i++)
{
printf("Weight = %d\n",myHaffCode[i].weight);
for (j = 0; j<myHaffCode[i].length; j++)
printf("%d",myHaffCode[i].bit[j]);
m = m + myHaffCode[i].weight*myHaffCode[i].length;
printf("\n");
}
printf("Huffman's WPL is:%d\n",m);
return 0;
}