本文已参与「新人创作礼」活动,一起开启掘金创作之路。
前言:上文学习了赫夫曼编码,接下来我们讲解赫夫曼编码压缩数据
数据压缩
根据赫夫曼编码压缩数据的原理,需要创建 "i like like like java do you like a java" 对应的赫夫曼树
- Node{date(存放数据),weight(权重),left,right }
- 得到字符串对应的数组
- 将构建赫夫曼树的Node 节点放到List
- 通过List创建对应的赫夫曼树
package com.huffmancode;
import java.util.*;
/**
* @author Kcs 2022/9/12
*/
public class HuffmanCode {
public static void main(String[] args) {
String content = "i like you and must love you and only love you";
byte contentBytes[] = content.getBytes();
//字符串长度
System.out.println("字符串长度:" + contentBytes.length);
//测试
List<HuffmanNode> nodes = getHuffmanNodes(contentBytes);
System.out.println("nodes=" + nodes);
//测试创建二叉树
System.out.println("前序遍历当前的赫夫曼树~~~");
HuffmanNode huffmanTreeRoot = creatHuffmanTree(nodes);
huffmanTreeRoot.preOrder();
//测试一把是否赫夫曼编码
//getCodes(HuffmanTreeRoot,"",stringBuilder);
Map<Byte, String> huffmanCodes = getCodes(huffmanTreeRoot);
System.out.println("~生成的赫夫曼编码表~\n" + huffmanCodes);
}
/**
* 生成赫夫曼树对应的赫夫曼编码
* 将赫夫曼编码表放在Map<Byte,String>
* {[key:value] [key:value]}
*/
static Map<Byte, String> huffmanCodes = new HashMap<Byte, String>();
/**
* 在生成赫夫曼编码表时,拼接路径,用StringBuilder 存储某个叶子节点的路径
*/
static StringBuilder stringBuilder = new StringBuilder();
/**
* 重载getCodes
* @param root 跟结点
* @return map
*/
private static Map<Byte, String> getCodes(HuffmanNode root) {
if (root == null) {
return null;
}
//处理root的左子树
getCodes(root.left, "0", stringBuilder);
//处理root的右子树
getCodes(root.right, "1", stringBuilder);
return huffmanCodes;
}
/**
* 功能:得到传入的node节点的所有赫夫曼编码,并存放到huffmanCodes集合中
* @param node 传入的节点(默认从root)
* @param code 该节点的路径(左子节点0和右子节点1)
* @param stringBuilder 拼接路径
*/
private static void getCodes(HuffmanNode node, String code, StringBuilder stringBuilder) {
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
//将传入的code加入到stringBuilder2中
stringBuilder2.append(code);
if (node != null) {
//node==null不处理
//判断当前node时叶子节点还是非叶子节点
if (node.data == null) {
//非叶子节点
//向左递归
getCodes(node.left, "0", stringBuilder2);
//向右递归
getCodes(node.right, "1", stringBuilder2);
} else {
//叶子节点
huffmanCodes.put(node.data, stringBuilder2.toString());
}
}
}
/**
* @param bytes 接受的数组
* @return 返回List形式:[Node[date=97,weight =5],Node[date=32,weight=9]]
*/
private static List<HuffmanNode> getHuffmanNodes(byte bytes[]) {
//创建ArrayList
ArrayList<HuffmanNode> nodes = new ArrayList<HuffmanNode>();
//遍历bytes,统计每个byte出现的次数->map
Map<Byte, Integer> counts = new HashMap<>();
for (byte b : bytes) {
Integer count = counts.get(b);
if (count == null) {
//首次,Map没有字符数据
counts.put(b, 1);
} else {
counts.put(b, count + 1);
}
}
//遍历Map。把每个键值对转成HuffmanNode对象,并加入nodes中
for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {
nodes.add(new HuffmanNode(entry.getKey(), entry.getValue()));
}
return nodes;
}
/**
* 通过List,创建赫夫曼树
* @param nodes list列表
* @return 根节点
*/
private static HuffmanNode creatHuffmanTree(List<HuffmanNode> nodes) {
while (nodes.size() > 1) {
Collections.sort(nodes);
//排序小到大
HuffmanNode leftNode = nodes.get(0);
HuffmanNode rightNode = nodes.get(1);
//创建新的二叉树,没有data只有权值weight
HuffmanNode parent = new HuffmanNode(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
//移除旧的结点
nodes.remove(leftNode);
nodes.remove(rightNode);
//添加新的二叉树
nodes.add(parent);
}
return nodes.get(0);
}
/**
* 前序遍历
* @param root
*/
private static void preOrder(HuffmanNode root) {
if (root != null) {
root.preOrder();
} else {
System.out.println("空树");
}
}
}
/**
* 创建Node
*/
class HuffmanNode implements Comparable<HuffmanNode> {
/**
* 存放数据本身'a'->97 ' '->32
*/
Byte data;
/**
* 权值,字符出现的次数
*/
int weight;
HuffmanNode left;
HuffmanNode right;
public HuffmanNode(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public int compareTo(HuffmanNode huffmanNode) {
//表示从小到达排序
return this.weight - huffmanNode.weight;
}
@Override
public String toString() {
return "HuffmanNode{" + "data=" + data + ", weight=" + weight + '}';
}
/**
* 前序遍历
*/
public void preOrder() {
System.out.println(this);
if (this.left != null) {
this.left.preOrder();
}
if (this.right != null) {
this.right.preOrder();
}
}
}