知乎:zhuanlan.zhihu.com/p/77386081
IdentityHashMap解析
JDK版本号:1.8
一、概述
IdentityHashMap实现了Map接口,用法与HashMap差不多,都是用Hash表实现数据的存储,
比较key的值是否相等,如果相等就替换原有的值.
二、与HashMap的区别:
hash算法
首先来看HashMap的hash()方法:
static final int hash(Object key) {
int h;
return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}首先通过传入的key获取hashcode 记为h1 然后在h1基础上无符号右移16位,int类型四个字节,共32位,也就是取高16位,记为h2 最后h1 ^ h2 做异或操作 得到最终hash值返回
再来看看IdentityHashMap的hash()方法:
private static int hash(Object x, int length) {
int h = System.identityHashCode(x);
// Multiply by -127, and left-shift to use least bit as part of hash
return ((h << 1) - (h << 8)) & (length - 1);
}首先通过传入的key获得hashcode记为h
(h*2 - h*256) & (length - 1);
看上去很简单,其实比较有玄机,capacity方法保证了这里的length一定是2的倍数。
//带初始长度的构造函数
public IdentityHashMap(int expectedMaxSize) {
if (expectedMaxSize < 0)
throw new IllegalArgumentException("expectedMaxSize is negative: "
+ expectedMaxSize);
init(capacity(expectedMaxSize));
}
//根据传进来的参数得到小于 3*传入参数 的最大2的幂次方的数字作为数组的长度
private static int capacity(int expectedMaxSize) {
// assert expectedMaxSize >= 0;
return
(expectedMaxSize > MAXIMUM_CAPACITY / 3) ? MAXIMUM_CAPACITY :
(expectedMaxSize <= 2 * MINIMUM_CAPACITY / 3) ? MINIMUM_CAPACITY :
Integer.highestOneBit(expectedMaxSize + (expectedMaxSize << 1));
}比如数组的长度是16,那么hashcode就会和15做“与”运算,其实就等同于 hashcode%length出来的值,且 & 操作 比 % 操作 占用的cpu周期更少所以其效率更高。
put方法判断key是否相等
IdentityHashMap put方法:
public V put(K key, V value) {
final Object k = maskNull(key);
retryAfterResize: for (;;) {
final Object[] tab = table;
final int len = tab.length;
//计算hash值定位数组下标
int i = hash(k, len);
//从下标为i开始遍历数组,如果发生key碰撞则直接替换原有值
for (Object item; (item = tab[i]) != null;
i = nextKeyIndex(i, len)) {
//此处用 == 比较key
if (item == k) {
@SuppressWarnings("unchecked")
V oldValue = (V) tab[i + 1];
tab[i + 1] = value;
return oldValue;
}
}
final int s = size + 1;
// Use optimized form of 3 * s.
// Next capacity is len, 2 * current capacity.
if (s + (s << 1) > len && resize(len))
continue retryAfterResize;
modCount++;
tab[i] = k;
tab[i + 1] = value;
size = s;
return null;
}
}可以看到IdentityHashMap在判断key是否冲突的时候用的是 == ,也就是对象的地址比较
if (item == k) {
@SuppressWarnings("unchecked")
V oldValue = (V) tab[i + 1];
tab[i + 1] = value;
return oldValue;
}HashMap put方法:
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
boolean evict) {
Node<K,V>[] tab; Node<K,V> p; int n, i;
if ((tab = table) == null || (n = tab.length) == 0)
n = (tab = resize()).length;
//传入的hash无碰撞,直接插入数据
if ((p = tab[i = (n - 1) & hash]) == null)
tab[i] = newNode(hash, key, value, null);
else {
Node<K,V> e; K k;
//如果hashcode相等且equals也相等则替换节点
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
e = p;
else if (p instanceof TreeNode)
//如果hashcode相等且equals不相等且桶的数据结构是红黑树,则插入节点
e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
else {
for (int binCount = 0; ; ++binCount) {
if ((e = p.next) == null) {
//如果hashcode相等且equals不相等且桶的数据结构还是链表,则向后挂一个节点
p.next = newNode(hash, key, value, null);
//如果链表长度达到阈值则将数据结构转成红黑树
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
treeifyBin(tab, hash);
break;
}
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
break;
p = e;
}
}
if (e != null) { // existing mapping for key
V oldValue = e.value;
if (!onlyIfAbsent || oldValue == null)
e.value = value;
afterNodeAccess(e);
return oldValue;
}
}
++modCount;
if (++size > threshold)
resize();
afterNodeInsertion(evict);
return null;
}可以看到HashMap比较了hashcode方法和equals方法出来的结果。
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
e = p;三、IdentityHashMap的用法
定义实体类:
public class Student {
private String name;
private Integer age;
public Student(String name, Integer age) {
this.name = name;
this.age = age;
}
// getter setter
//重写hashCode()
@Override
public int hashCode() {
return this.age;
}
//重写equals()
@Override
public boolean equals(Object obj) {
if(obj instanceof Student){
Student student = (Student)obj;
if(student.age == age && student.name.equals(name)){
return true;
}
}
return false;
}
}定义主函数:
public static void main(String[] args) {
HashMap<Student, String> hashMap = new HashMap<>();
hashMap.put(new Student("yxy", 1), "A");
hashMap.put(new Student("yxy", 1), "B");
hashMap.put(new Student("yxy", 1), "C");
hashMap.put(new Student("yxy2", 2), "D");
Iterator<Entry<Student, String>> iteratorHashMap = hashMap.entrySet().iterator();
System.out.println("in HashMap ---------------------------");
while (iteratorHashMap.hasNext()) {
Entry<Student, String> entry = iteratorHashMap.next();
System.out.println(System.identityHashCode(entry.getKey()) + " , " + entry.getValue());
}
IdentityHashMap<Student, String> identityHashMap = new IdentityHashMap<>();
identityHashMap.put(new Student("yxy", 1), "A");
identityHashMap.put(new Student("yxy", 1), "B");
identityHashMap.put(new Student("yxy", 1), "C");
identityHashMap.put(new Student("yxy2", 2), "D");
Iterator<Entry<Student, String>> iteratorIdentityHashMap = identityHashMap.entrySet()
.iterator();
System.out.println("in IdentityHashMap ---------------------------");
while (iteratorIdentityHashMap.hasNext()) {
Entry<Student, String> entry = iteratorIdentityHashMap.next();
System.out.println(System.identityHashCode(entry.getKey()) + " , " + entry.getValue());
}
}可以看到输出结果:
in HashMap ---------------------------
403424356 , C
321142942 , D
in IdentityHashMap ---------------------------
1644443712 , B
610984013 , A
1393931310 , C
788117692 , D证明IdentityHashMap比较的不是hashcode(),那么IdentityHashMap比的是什么呢?
四、hash code的生成策略
查看synchronizer.cpp文件看这里
intptr_t ObjectSynchronizer::FastHashCode (Thread * Self, oop obj)FastHashCode()方法生成了HashCode的值 然后看这里 这里有很多生成策略,在JDK8中hashCode = 5,在这里被设置hashCode
// Marsaglia's xor-shift scheme with thread-specific state
// This is probably the best overall implementation -- we'll
// likely make this the default in future releases.
unsigned t = Self->_hashStateX ;
t ^= (t << 11) ;
Self->_hashStateX = Self->_hashStateY ;
Self->_hashStateY = Self->_hashStateZ ;
Self->_hashStateZ = Self->_hashStateW ;
unsigned v = Self->_hashStateW ;
v = (v ^ (v >> 19)) ^ (t ^ (t >> 8)) ;
Self->_hashStateW = v ;
value = v ;用Xorshift算法生成随机数作为hash值。
关于Xorshift的论文在这里
五、线程安全问题:
IdentityHashMap存在线程安全问题
所以使用时最好包裹在同步代码块中
六、IdentityHashMap QA环节
private void init(int initCapacity) {
// assert (initCapacity & -initCapacity) == initCapacity; // power of 2
// assert initCapacity >= MINIMUM_CAPACITY;
// assert initCapacity <= MAXIMUM_CAPACITY;
table = new Object[2 * initCapacity];
}MacKong Q:
init函数里面为什么还要 2 * capacity? 因为capacity函数已经保证了2的倍数和预分配了,按道理init里面不用乘2了。
A:
其实capacity函数已经保证了2的倍数和预分配了,按道理init里面不用乘2了。 这里乘2是因为capacity指的是key的个数, 而IdentityHashMay的存储方式是在一个Object[]中key和value相邻存储, 所以要两倍的空间大小
/**
* Value representing null keys inside tables.
*/
static final Object NULL_KEY = new Object();
/**
* Use NULL_KEY for key if it is null.
*/
private static Object maskNull(Object key) {
return (key == null ? NULL_KEY : key);
}MacKong Q:
IndentityHashMap怎么和我们传入的null的key作区分的?
A:
如果传入是null,会用NULL_KEY代表它,而NULL_KEY就是一个全局的Object对象这样null所代表的hash code其实就是同一个了
MacKong Q:
identityHashCode如果是不同的对象,会不会返回相同的值。如果不会的话,是不是identityHashCode不需要进行冲突处理,进而性能更好?
A:
是会有Hash冲突的,因为不冲突的是System.identityHashCode()方法出来的值,%数组的长度肯定会有碰撞的,发生碰撞之后IdentityHashMap会继续找 index = i + 2的下标,看是否有空间,return (i + 2 < len ? i + 2 : 0);