一、哈希表核心原理
1.1 哈希表基本概念
哈希表是一种通过哈希函数将键映射到存储位置的高效数据结构,实现平均O(1)时间复杂度的查找、插入和删除操作。
java
public class HashTableIntroduction {
public static void main(String[] args) {
// 哈希表工作原理演示
System.out.println("哈希表工作原理:");
String[] hashTable = new String[10];
// 模拟哈希函数:hash(key) = key.length() % 10
String key1 = "apple";
int index1 = key1.length() % 10; // 5
hashTable[index1] = "red fruit";
String key2 = "banana";
int index2 = key2.length() % 10; // 6
hashTable[index2] = "yellow fruit";
System.out.println("apple 存储在位置: " + index1 + ", 值: " + hashTable[index1]);
System.out.println("banana 存储在位置: " + index2 + ", 值: " + hashTable[index2]);
}
}
1.2 哈希函数设计
java
public class HashFunctions {
/**
* 直接定址法: Hash(Key) = A * Key + B
*/
public static int directAddress(int key, int a, int b) {
return a * key + b;
}
/**
* 除留余数法: Hash(Key) = Key % p (p为质数)
*/
public static int divisionMethod(int key, int prime) {
return Math.abs(key) % prime;
}
/**
* 平方取中法
*/
public static int midSquare(int key) {
long square = (long) key * key;
String squareStr = String.valueOf(square);
int mid = squareStr.length() / 2;
// 取中间3位
String midDigits = squareStr.substring(mid - 1, mid + 2);
return Integer.parseInt(midDigits);
}
/**
* 字符串哈希函数 - DJB2算法
*/
public static int djb2Hash(String str) {
int hash = 5381;
for (char c : str.toCharArray()) {
hash = ((hash << 5) + hash) + c; // hash * 33 + c
}
return hash;
}
/**
* 字符串哈希函数 - Java默认实现
*/
public static int javaStringHash(String str) {
return str.hashCode();
}
public static void main(String[] args) {
System.out.println("直接定址法(2x+1): " + directAddress(10, 2, 1));
System.out.println("除留余数法(%13): " + divisionMethod(27, 13));
System.out.println("平方取中法(123): " + midSquare(123));
System.out.println("DJB2哈希(hello): " + djb2Hash("hello"));
System.out.println("Java哈希(hello): " + javaStringHash("hello"));
}
}
1.3 哈希冲突与负载因子
java
public class HashConflictAnalysis {
/**
* 计算冲突率
*/
public static double calculateConflictRate(int[] keys, int tableSize) {
boolean[] occupied = new boolean[tableSize];
int conflicts = 0;
for (int key : keys) {
int index = Math.abs(key) % tableSize;
if (occupied[index]) {
conflicts++;
} else {
occupied[index] = true;
}
}
return (double) conflicts / keys.length;
}
/**
* 不同负载因子下的冲突率测试
*/
public static void loadFactorTest() {
int[] tableSizes = {10, 20, 50, 100};
int elementCount = 75; // 固定元素数量
System.out.println("负载因子与冲突率关系:");
for (int size : tableSizes) {
double loadFactor = (double) elementCount / size;
// 生成测试数据
int[] testData = new int[elementCount];
for (int i = 0; i < elementCount; i++) {
testData[i] = (int) (Math.random() * 1000);
}
double conflictRate = calculateConflictRate(testData, size);
System.out.printf("表大小: %d, 负载因子: %.2f, 冲突率: %.2f%%\n",
size, loadFactor, conflictRate * 100);
}
}
public static void main(String[] args) {
loadFactorTest();
// 推荐负载因子
System.out.println("\n推荐负载因子: 0.75");
System.out.println("当负载因子超过0.75时,冲突率显著增加,建议扩容");
}
}
二、哈希桶实现
2.1 完整哈希桶实现
java
public class HashBucket<K, V> {
/**
* 节点类
*/
private static class Node<K, V> {
K key;
V value;
Node<K, V> next;
public Node(K key, V value) {
this.key = key;
this.value = value;
}
}
private Node<K, V>[] array;
private int size; // 当前数据个数
private static final double LOAD_FACTOR = 0.75;
private static final int DEFAULT_CAPACITY = 10;
@SuppressWarnings("unchecked")
public HashBucket() {
array = (Node<K, V>[]) new Node[DEFAULT_CAPACITY];
size = 0;
}
@SuppressWarnings("unchecked")
public HashBucket(int capacity) {
array = (Node<K, V>[]) new Node[capacity];
size = 0;
}
/**
* 插入键值对
*/
public void put(K key, V value) {
if (key == null) {
throw new IllegalArgumentException("Key cannot be null");
}
int index = getIndex(key);
Node<K, V> head = array[index];
// 检查键是否已存在
Node<K, V> current = head;
while (current != null) {
if (current.key.equals(key)) {
current.value = value; // 更新值
return;
}
current = current.next;
}
// 创建新节点并插入到链表头部
Node<K, V> newNode = new Node<>(key, value);
newNode.next = head;
array[index] = newNode;
size++;
// 检查是否需要扩容
if (loadFactor() > LOAD_FACTOR) {
resize();
}
}
/**
* 获取值
*/
public V get(K key) {
if (key == null) {
return null;
}
int index = getIndex(key);
Node<K, V> current = array[index];
while (current != null) {
if (current.key.equals(key)) {
return current.value;
}
current = current.next;
}
return null; // 键不存在
}
/**
* 删除键值对
*/
public V remove(K key) {
if (key == null) {
return null;
}
int index = getIndex(key);
Node<K, V> current = array[index];
Node<K, V> prev = null;
while (current != null) {
if (current.key.equals(key)) {
if (prev == null) {
// 删除头节点
array[index] = current.next;
} else {
prev.next = current.next;
}
size--;
return current.value;
}
prev = current;
current = current.next;
}
return null; // 键不存在
}
/**
* 检查是否包含键
*/
public boolean containsKey(K key) {
return get(key) != null;
}
/**
* 获取哈希桶大小
*/
public int size() {
return size;
}
/**
* 检查哈希桶是否为空
*/
public boolean isEmpty() {
return size == 0;
}
/**
* 计算索引位置
*/
private int getIndex(K key) {
return Math.abs(key.hashCode()) % array.length;
}
/**
* 计算当前负载因子
*/
private double loadFactor() {
return (double) size / array.length;
}
/**
* 扩容哈希表
*/
@SuppressWarnings("unchecked")
private void resize() {
Node<K, V>[] oldArray = array;
array = (Node<K, V>[]) new Node[oldArray.length * 2];
size = 0;
// 重新插入所有元素
for (Node<K, V> head : oldArray) {
Node<K, V> current = head;
while (current != null) {
put(current.key, current.value);
current = current.next;
}
}
System.out.println("哈希表已扩容: " + oldArray.length + " -> " + array.length);
}
/**
* 打印哈希表结构(用于调试)
*/
public void printStructure() {
System.out.println("哈希表结构 (大小: " + size + ", 容量: " + array.length + "):");
for (int i = 0; i < array.length; i++) {
System.out.print("[" + i + "]: ");
Node<K, V> current = array[i];
while (current != null) {
System.out.print("(" + current.key + "=" + current.value + ") -> ");
current = current.next;
}
System.out.println("null");
}
System.out.println("负载因子: " + String.format("%.2f", loadFactor()));
}
}
2.2 哈希桶测试
java
public class HashBucketTest {
public static void main(String[] args) {
HashBucket<String, Integer> hashMap = new HashBucket<>();
// 测试插入操作
System.out.println("=== 插入测试 ===");
hashMap.put("apple", 1);
hashMap.put("banana", 2);
hashMap.put("orange", 3);
hashMap.put("grape", 4);
hashMap.put("apple", 5); // 更新已存在的键
hashMap.printStructure();
// 测试查询操作
System.out.println("\n=== 查询测试 ===");
System.out.println("apple: " + hashMap.get("apple"));
System.out.println("banana: " + hashMap.get("banana"));
System.out.println("pear: " + hashMap.get("pear")); // 不存在的键
// 测试删除操作
System.out.println("\n=== 删除测试 ===");
System.out.println("删除orange: " + hashMap.remove("orange"));
hashMap.printStructure();
// 测试扩容
System.out.println("\n=== 扩容测试 ===");
for (int i = 0; i < 20; i++) {
hashMap.put("key" + i, i * 10);
}
hashMap.printStructure();
// 性能测试
System.out.println("\n=== 性能测试 ===");
HashBucket<Integer, String> largeMap = new HashBucket<>();
long startTime = System.currentTimeMillis();
for (int i = 0; i < 10000; i++) {
largeMap.put(i, "value" + i);
}
long endTime = System.currentTimeMillis();
System.out.println("插入10000个元素耗时: " + (endTime - startTime) + "ms");
}
}
三、哈希表应用题目
3.1 随机链表的复制
java
import java.util.*;
class Node {
int val;
Node next;
Node random;
public Node(int val) {
this.val = val;
this.next = null;
this.random = null;
}
}
public class CopyRandomList {
/**
* 方法1: 使用HashMap (时间复杂度O(n), 空间复杂度O(n))
*/
public Node copyRandomListHashMap(Node head) {
if (head == null) return null;
Map<Node, Node> map = new HashMap<>();
Node current = head;
// 第一遍遍历:创建所有新节点
while (current != null) {
map.put(current, new Node(current.val));
current = current.next;
}
// 第二遍遍历:设置next和random指针
current = head;
while (current != null) {
Node newNode = map.get(current);
newNode.next = map.get(current.next);
newNode.random = map.get(current.random);
current = current.next;
}
return map.get(head);
}
/**
* 方法2: 原地修改 (时间复杂度O(n), 空间复杂度O(1))
*/
public Node copyRandomListInPlace(Node head) {
if (head == null) return null;
// 第一步:在每个原节点后面插入复制节点
Node current = head;
while (current != null) {
Node copy = new Node(current.val);
copy.next = current.next;
current.next = copy;
current = copy.next;
}
// 第二步:设置random指针
current = head;
while (current != null) {
if (current.random != null) {
current.next.random = current.random.next;
}
current = current.next.next;
}
// 第三步:分离两个链表
Node newHead = head.next;
current = head;
while (current != null) {
Node copy = current.next;
current.next = copy.next;
if (copy.next != null) {
copy.next = copy.next.next;
}
current = current.next;
}
return newHead;
}
// 测试方法
public static void printList(Node head) {
Node current = head;
while (current != null) {
System.out.print("[" + current.val + ", " +
(current.random != null ? current.random.val : "null") + "] -> ");
current = current.next;
}
System.out.println("null");
}
public static void main(String[] args) {
// 创建测试链表: [[7,null],[13,0],[11,4],[10,2],[1,0]]
Node node1 = new Node(7);
Node node2 = new Node(13);
Node node3 = new Node(11);
Node node4 = new Node(10);
Node node5 = new Node(1);
node1.next = node2;
node2.next = node3;
node3.next = node4;
node4.next = node5;
node2.random = node1; // 13 -> 7
node3.random = node5; // 11 -> 1
node4.random = node3; // 10 -> 11
node5.random = node1; // 1 -> 7
CopyRandomList solution = new CopyRandomList();
System.out.println("原链表:");
printList(node1);
Node copiedList = solution.copyRandomListHashMap(node1);
System.out.println("复制后的链表(HashMap方法):");
printList(copiedList);
Node copiedList2 = solution.copyRandomListInPlace(node1);
System.out.println("复制后的链表(原地修改方法):");
printList(copiedList2);
}
}
3.2 旧键盘问题
java
import java.util.*;
public class BrokenKeyboard {
/**
* 找出坏掉的键
*/
public static String findBrokenKeys(String expected, String actual) {
Set<Character> workingKeys = new HashSet<>();
Set<Character> brokenKeys = new LinkedHashSet<>(); // 保持顺序
// 将实际输入的键转换为大写并加入集合
for (char c : actual.toUpperCase().toCharArray()) {
workingKeys.add(c);
}
// 检查期望输入的每个键
for (char c : expected.toUpperCase().toCharArray()) {
if (!workingKeys.contains(c) && !brokenKeys.contains(c)) {
brokenKeys.add(c);
}
}
// 构建结果字符串
StringBuilder result = new StringBuilder();
for (char c : brokenKeys) {
result.append(c);
}
return result.toString();
}
/**
* 使用数组优化的版本
*/
public static String findBrokenKeysOptimized(String expected, String actual) {
boolean[] workingKeys = new boolean[128]; // ASCII字符集
boolean[] foundBroken = new boolean[128];
StringBuilder result = new StringBuilder();
// 标记实际输入的键
for (char c : actual.toUpperCase().toCharArray()) {
workingKeys[c] = true;
}
// 找出坏键
for (char c : expected.toUpperCase().toCharArray()) {
if (!workingKeys[c] && !foundBroken[c]) {
foundBroken[c] = true;
result.append(c);
}
}
return result.toString();
}
public static void main(String[] args) {
String expected = "7_This_is_a_test";
String actual = "_hs_s_a_es";
System.out.println("期望输入: " + expected);
System.out.println("实际输入: " + actual);
String brokenKeys = findBrokenKeys(expected, actual);
System.out.println("坏掉的键: " + brokenKeys);
String brokenKeys2 = findBrokenKeysOptimized(expected, actual);
System.out.println("坏掉的键(优化版): " + brokenKeys2);
// 更多测试用例
String[][] testCases = {
{"7_This_is_a_test", "_hs_s_a_es", "7TI"},
{"This_is_another_test", "This_is_test", "ANO"},
{"ABCDEFG", "ABC", "DEFG"}
};
System.out.println("\n=== 测试用例 ===");
for (String[] testCase : testCases) {
String result = findBrokenKeys(testCase[0], testCase[1]);
System.out.printf("输入: %s, %s -> 输出: %s (期望: %s) %s\n",
testCase[0], testCase[1], result, testCase[2],
result.equals(testCase[2]) ? "✓" : "✗");
}
}
}
3.3 宝石与石头
java
import java.util.*;
public class JewelsAndStones {
/**
* 方法1: 使用HashSet
*/
public static int numJewelsInStonesHashSet(String jewels, String stones) {
Set<Character> jewelSet = new HashSet<>();
// 将宝石类型加入集合
for (char c : jewels.toCharArray()) {
jewelSet.add(c);
}
// 统计宝石数量
int count = 0;
for (char c : stones.toCharArray()) {
if (jewelSet.contains(c)) {
count++;
}
}
return count;
}
/**
* 方法2: 使用布尔数组 (更高效)
*/
public static int numJewelsInStonesArray(String jewels, String stones) {
boolean[] isJewel = new boolean[128]; // ASCII字符集
// 标记宝石类型
for (char c : jewels.toCharArray()) {
isJewel[c] = true;
}
// 统计宝石数量
int count = 0;
for (char c : stones.toCharArray()) {
if (isJewel[c]) {
count++;
}
}
return count;
}
/**
* 方法3: 使用Java 8 Stream API
*/
public static int numJewelsInStonesStream(String jewels, String stones) {
Set<Character> jewelSet = new HashSet<>();
for (char c : jewels.toCharArray()) {
jewelSet.add(c);
}
return (int) stones.chars()
.mapToObj(c -> (char) c)
.filter(jewelSet::contains)
.count();
}
public static void main(String[] args) {
String jewels = "aA";
String stones = "aAAbbbb";
System.out.println("宝石类型: " + jewels);
System.out.println("拥有的石头: " + stones);
int count1 = numJewelsInStonesHashSet(jewels, stones);
int count2 = numJewelsInStonesArray(jewels, stones);
int count3 = numJewelsInStonesStream(jewels, stones);
System.out.println("宝石数量(HashSet): " + count1);
System.out.println("宝石数量(数组): " + count2);
System.out.println("宝石数量(Stream): " + count3);
// 性能测试
String largeJewels = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
StringBuilder largeStones = new StringBuilder();
for (int i = 0; i < 100000; i++) {
largeStones.append((char) ('a' + i % 26));
}
long startTime = System.currentTimeMillis();
int result1 = numJewelsInStonesHashSet(largeJewels, largeStones.toString());
long time1 = System.currentTimeMillis() - startTime;
startTime = System.currentTimeMillis();
int result2 = numJewelsInStonesArray(largeJewels, largeStones.toString());
long time2 = System.currentTimeMillis() - startTime;
System.out.println("\n性能测试 (100000个石头):");
System.out.println("HashSet方法: " + time1 + "ms, 结果: " + result1);
System.out.println("数组方法: " + time2 + "ms, 结果: " + result2);
}
}
3.4 只出现一次的数字
java
import java.util.*;
public class SingleNumber {
/**
* 方法1: 使用异或运算 (时间复杂度O(n), 空间复杂度O(1))
*/
public static int singleNumberXOR(int[] nums) {
int result = 0;
for (int num : nums) {
result ^= num;
}
return result;
}
/**
* 方法2: 使用HashSet (时间复杂度O(n), 空间复杂度O(n))
*/
public static int singleNumberHashSet(int[] nums) {
Set<Integer> set = new HashSet<>();
for (int num : nums) {
if (set.contains(num)) {
set.remove(num);
} else {
set.add(num);
}
}
return set.iterator().next();
}
/**
* 方法3: 使用HashMap统计频率
*/
public static int singleNumberHashMap(int[] nums) {
Map<Integer, Integer> frequency = new HashMap<>();
for (int num : nums) {
frequency.put(num, frequency.getOrDefault(num, 0) + 1);
}
for (Map.Entry<Integer, Integer> entry : frequency.entrySet()) {
if (entry.getValue() == 1) {
return entry.getKey();
}
}
return -1; // 理论上不会执行到这里
}
/**
* 方法4: 数学方法 2*(a+b+c) - (a+a+b+b+c) = c
*/
public static int singleNumberMath(int[] nums) {
Set<Integer> set = new HashSet<>();
int sum = 0;
int uniqueSum = 0;
for (int num : nums) {
sum += num;
if (set.add(num)) {
uniqueSum += num;
}
}
return 2 * uniqueSum - sum;
}
public static void main(String[] args) {
int[] test1 = {2, 2, 1};
int[] test2 = {4, 1, 2, 1, 2};
int[] test3 = {1};
int[] test4 = {7, 3, 5, 3, 7, 8, 5};
System.out.println("测试用例1: " + Arrays.toString(test1));
System.out.println("异或方法: " + singleNumberXOR(test1));
System.out.println("HashSet方法: " + singleNumberHashSet(test1));
System.out.println("HashMap方法: " + singleNumberHashMap(test1));
System.out.println("数学方法: " + singleNumberMath(test1));
System.out.println("\n测试用例2: " + Arrays.toString(test2));
System.out.println("异或方法: " + singleNumberXOR(test2));
System.out.println("\n测试用例3: " + Arrays.toString(test3));
System.out.println("异或方法: " + singleNumberXOR(test3));
System.out.println("\n测试用例4: " + Arrays.toString(test4));
System.out.println("异或方法: " + singleNumberXOR(test4));
// 性能比较
int[] largeArray = new int[1000001];
for (int i = 0; i < 500000; i++) {
largeArray[i] = i;
largeArray[i + 500000] = i;
}
largeArray[1000000] = 999999; // 单独的数字
long startTime = System.currentTimeMillis();
int result1 = singleNumberXOR(largeArray);
long time1 = System.currentTimeMillis() - startTime;
startTime = System.currentTimeMillis();
int result2 = singleNumberHashSet(largeArray);
long time2 = System.currentTimeMillis() - startTime;
System.out.println("\n性能测试 (1000001个元素):");
System.out.println("异或方法: " + time1 + "ms, 结果: " + result1);
System.out.println("HashSet方法: " + time2 + "ms, 结果: " + result2);
}
}
四、哈希表高级应用
4.1 LRU缓存实现
java
import java.util.*;
public class LRUCache<K, V> {
private class Node {
K key;
V value;
Node prev;
Node next;
Node(K key, V value) {
this.key = key;
this.value = value;
}
}
private final int capacity;
private final Map<K, Node> cache;
private final Node head; // 虚拟头节点
private final Node tail; // 虚拟尾节点
public LRUCache(int capacity) {
this.capacity = capacity;
this.cache = new HashMap<>();
this.head = new Node(null, null);
this.tail = new Node(null, null);
head.next = tail;
tail.prev = head;
}
public V get(K key) {
Node node = cache.get(key);
if (node == null) {
return null;
}
// 移动到链表头部
moveToHead(node);
return node.value;
}
public void put(K key, V value) {
Node node = cache.get(key);
if (node == null) {
// 创建新节点
Node newNode = new Node(key, value);
cache.put(key, newNode);
addToHead(newNode);
// 检查容量
if (cache.size() > capacity) {
Node tailNode = removeTail();
cache.remove(tailNode.key);
}
} else {
// 更新值并移动到头部
node.value = value;
moveToHead(node);
}
}
private void addToHead(Node node) {
node.prev = head;
node.next = head.next;
head.next.prev = node;
head.next = node;
}
private void removeNode(Node node) {
node.prev.next = node.next;
node.next.prev = node.prev;
}
private void moveToHead(Node node) {
removeNode(node);
addToHead(node);
}
private Node removeTail() {
Node node = tail.prev;
removeNode(node);
return node;
}
public void printCache() {
Node current = head.next;
System.out.print("LRU Cache: ");
while (current != tail) {
System.out.print("[" + current.key + "=" + current.value + "] ");
current = current.next;
}
System.out.println();
}
public static void main(String[] args) {
LRUCache<Integer, String> cache = new LRUCache<>(3);
cache.put(1, "A");
cache.put(2, "B");
cache.put(3, "C");
cache.printCache(); // [1=A] [2=B] [3=C]
cache.get(1); // 访问1,使其成为最近使用的
cache.printCache(); // [2=B] [3=C] [1=A]
cache.put(4, "D"); // 加入新元素,淘汰最久未使用的2
cache.printCache(); // [3=C] [1=A] [4=D]
cache.put(3, "C++"); // 更新已存在的键
cache.printCache(); // [1=A] [4=D] [3=C++]
}
}
五、总结
5.1 哈希表核心要点
- 哈希函数设计:决定分布均匀性和冲突率
- 冲突解决:链地址法(哈希桶)vs 开放地址法
- 负载因子管理:及时扩容保证性能
- 时间复杂度:平均O(1),最坏O(n)
5.2 应用场景选择
| 场景 | 推荐方法 | 理由 |
|---|---|---|
| 查找唯一元素 | 异或运算 | O(1)空间 |
| 统计频率 | HashMap | 直观易用 |
| 缓存实现 | LinkedHashMap | 保持顺序 |
| 字符串处理 | 数组映射 | 高效紧凑 |
5.3 最佳实践
- 合理选择哈希函数和冲突解决策略
- 设置合适的初始容量和负载因子
- 对于已知范围的数据,使用数组代替HashMap
- 注意线程安全性要求
哈希表是计算机科学中最重要的数据结构之一,深入理解其原理和应用对于解决实际问题至关重要。