HashMap实现原理-1

HashMap结构图

JDK1.8 HashMap的数据结构为：数组+链表+红黑树

变量介绍

DEFAULT_INITIAL_CAPACITY Table数组的初始化长度

// The default initial capacity - MUST be a power of two.
static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16

MAXIMUM_CAPACITY Table数组的最大长度

// The maximum capacity, used if a higher value is implicitly specified by either 
// of the constructors with arguments. MUST be a power of two <= 1<<30.
static final int MAXIMUM_CAPACITY = 1 << 30;

DEFAULT_LOAD_FACTOR 负载因子：默认值为0.75。当元素的总个数>（当前数组的长度*负载因子）。数组会进行扩容，扩容为原来的两倍

// The load factor used when none specified in constructor.
static final float DEFAULT_LOAD_FACTOR = 0.75f;

TREEIFY_THRESHOLD 链表树化阙值：默认值为 8 。表示在一个node（Table）节点下的值的个数大于8时候，会将链表转换成为红黑树

//The bin count threshold for using a tree rather than list for a bin. 
//Bins are converted to trees when adding an element to a bin with at least
//this many nodes. The value must be greater than 2 and should be at 
//least 8 to mesh with assumptions in tree removal about conversion back 
//to plain bins upon shrinkage.
static final int TREEIFY_THRESHOLD = 8;

UNTREEIFY_THRESHOLD 红黑树链化阙值：默认值为 6 。表示在进行扩容期间，单个Node节点下的红黑树节点的个数小于6时候，会将红黑树转化成为链表

//The bin count threshold for untreeifying a (split) bin during a resize
//operation. Should be less than TREEIFY_THRESHOLD, and at most 6 
//to mesh with shrinkage detection under removal.
static final int UNTREEIFY_THRESHOLD = 6;

MIN_TREEIFY_CAPACITY = 64 最小树化阈值，当Table所有元素超过改值，才会进行树化（为了防止前期阶段频繁扩容和树化过程冲突）

//The smallest table capacity for which bins may be treeified. 
//(Otherwise the table is resized if too many nodes in a bin.)
//Should be at least 4 * TREEIFY_THRESHOLD to avoid conflicts between 
//resizing and treeification thresholds.
static final int MIN_TREEIFY_CAPACITY = 64;

HashMap.put(K,V)流程及分析

HashMap在put(K,V）流程如下：

首先数组的查询效率高，可以通过下标直接查找，使用链表是为了解决hash冲突问题，根据上述流程图可知，在put元素时，首先通过hash(key)算法取到key的hash值，然后通过hash(key)&(n-1)可以计算出此key对应在Node[]的位置，那么就有可能两个不同的key落在同一下标位置上，存放这一类下标相同的结构为链表。链表结构如下：

// Node类 存放元素结构 JDK1.7使用的是Entry类
static class Node<K,V> implements Map.Entry<K,V> {
    // hash值
    final int hash;
    final K key;
    V value;
    // 下一个元素指针
    Node<K,V> next;
    // 构造方法
    Node(int hash, K key, V value, Node<K,V> next) {
        this.hash = hash;
        this.key = key;
        this.value = value;
        this.next = next;
    }

    public final K getKey()        { return key; }   // 返回 与 此项 对应的键
    public final V getValue()      { return value; } // 返回 与 此项 对应的值
    public final String toString() { return key + "=" + value; }

    public final V setValue(V newValue) {
        V oldValue = value;
        value = newValue;
        return oldValue;
    }

    /** 
    * hashCode（） 
    */
    public final int hashCode() {
        return Objects.hashCode(key) ^ Objects.hashCode(value);
    }

    /** 
    * equals（）
    * 作用：判断2个Entry是否相等，必须key和value都相等，才返回true  
    */
    public final boolean equals(Object o) {
        if (o == this)
            return true;
        if (o instanceof Map.Entry) {
            Map.Entry<?,?> e = (Map.Entry<?,?>)o;
            if (Objects.equals(key, e.getKey()) &&
                Objects.equals(value, e.getValue()))
                return true;
        }
        return false;
    }
}

分析1:hash(key)

源码如下：

// 将key转换成哈希码（hashCode）操作 = 使用hashCode() + 1次位运算 + 1次异或运算（2次扰动）
// 1. 取hashCode值： h = key.hashCode() 
// 2. 高位参与低位的运算：h ^ (h >>> 16)
static final int hash(Object key) {
    int h;
    return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
    // a. 当key = null时，hash值 = 0，所以HashMap的key 可为null
    // b. 当key ≠ null时，则通过先计算出 key的 hashCode()（记为h），
    // 然后 对哈希码进行 扰动处理： 按位 异或（^） 哈希码自身右移16位后的二进制
}

计算示意图 hashCode为int类型，在32位操作系统占4个字节即32位，所以采用右移16位按位异或运算的方式，这样加大了hash值的随机性，从而使在后续的计算下标时，能够使下标更加均匀，充分利用空间

分析2：计算存储位置-(length - 1) & hash

根据hash计算位置，可以用hash % length 取余数的方式来计算，但是hashMap中使用的是 & 运算来计算，可以达到同样的效果并且计算更快，举个例子：

数组长度设置为2的n次幂，
二进制为：0000 0000 0000 0000 0000 0000 0000 0001 0000
则&运算用到的值为长度-1，
二进制为：0000 0000 0000 0000 0000 0000 0000 0000 1111
通过这样的值和其他数做&运算操作时，能保证所得数字一定小于等于这个值，并且能保证小于这个数的每个值都有可能得到，保证了数组下标的使用，并且通过和上面👆处理过的hash值操作，能较少碰撞，尽量把数据分配均匀，每个链表⻓度⼤致相同。

分析3: putVal(hash(key), key, value, false, true)

// 
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
                   boolean evict) {
    // n为数组长度，i为hash通过&运算获取到的数组下标
    Node<K,V>[] tab; Node<K,V> p; int n, i;
    // 1. 若哈希表的数组tab为空，则 通过resize() 创建
    // 所以，初始化哈希表的时机 = 第1次调用put函数时，即调用resize() 初始化创建
    // 关于resize（）的源码分析将在下面讲解扩容时详细分析，此处先跳过
    if ((tab = table) == null || (n = tab.length) == 0)
        n = (tab = resize()).length;
    // 2.若哈希表的数组i位置是空，则直接在该数组位置新建节点，插入完毕
    // p 为tab[i]元素的指针
    if ((p = tab[i = (n - 1) & hash]) == null)
        tab[i] = newNode(hash, key, value, null);
    // 否则，代表存在Hash冲突，即当前存储位置已存在节点，则依次往下判断
    else {
        Node<K,V> e; K k;
        //a. 当前位置的key是否与需插入的key相同
        if (p.hash == hash &&
            ((k = p.key) == key || (key != null && key.equals(k))))
            e = p;
        // 判断需插入的数据结构是否为红黑树
        else if (p instanceof TreeNode)
            e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
        // 否则是链表结构
        else {
            // 使用尾插法将新节点加入链表 
            // 为什么不用头插法，我认为是因为JDK1.8加入了红黑树的数据结构，
            // 需要判断链表结构的个数是否达到树化阀值，所以需要遍历整个链表，
            // 既然一定要遍历整个链表，不如使用尾插法。
            for (int binCount = 0; ; ++binCount) {
                // 当前位置是否是链表的尾，e为下一个位置的Node
                if ((e = p.next) == null) {
                    // 如果是尾节点，则新建节点，放在p节点后面，完成尾插
                    p.next = newNode(hash, key, value, null);
                    // 判断是否达到树化阀值，达到则进行树化
                    if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
                        treeifyBin(tab, hash);
                    break;
                }
                // 如果在链表中存在key与需插入的key相同，返回
                if (e.hash == hash &&
                    ((k = e.key) == key || (key != null && key.equals(k))))
                    break;
                // 将p重新设置为循环的判断节点Node
                p = e;
            }
        }
        // 哈希表中已经存在相同key的Node对象了，替换Node的Value
        if (e != null) { // existing mapping for key
            V oldValue = e.value;
            if (!onlyIfAbsent || oldValue == null)
                e.value = value;
            // LinkedHashMap使用
            afterNodeAccess(e);
            return oldValue;
        }
    }
    // 修改次数加1 此处为ConcurrentHashMap的操作用
    ++modCount;
    // 如果size比阀值大，则扩容
    if (++size > threshold)
        resize();
    afterNodeInsertion(evict);
    return null;
}

分析4: resize()

    /**
     * 数组扩容
     * @return 扩容后的数组
     */
    final Node<K,V>[] resize() {
        // 旧的数组
        Node<K,V>[] oldTab = table;
        // 旧数组长度
        int oldCap = (oldTab == null) ? 0 : oldTab.length;
        // 旧的阀值
        int oldThr = threshold;
        // 新数组长度 新阀值
        int newCap, newThr = 0;
        // 如果旧数组不是空
        if (oldCap > 0) {
            // 如果旧的数组已经比最大容量大 不再扩容 返回旧数组 一般不会达到
            if (oldCap >= MAXIMUM_CAPACITY) {
                threshold = Integer.MAX_VALUE;
                return oldTab;
            }
            // 否则 将旧数组长度翻倍 并且判断旧数组长度是否比16大
            else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
                    oldCap >= DEFAULT_INITIAL_CAPACITY) {
                // 将新的阀值翻倍：16*0.75=12 32*0.75=24
                newThr = oldThr << 1;
            }
        } else if (oldThr > 0) {
            // 将数组长度设置为阀值
            newCap = oldThr;
        } else {
            // 如果未设置扩容条件 则使用默认的扩容条件和容量
            newCap = DEFAULT_INITIAL_CAPACITY;
            newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
        }
        // 如果新阀值是0:说明旧阀值>0,但旧数组长度为0
        // 重新设置新阀值
        if (newThr == 0) {
            float ft = (float)newCap * loadFactor;
            newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
                    (int)ft : Integer.MAX_VALUE);
        }
        threshold = newThr;
        Node<K,V>[] newTab = new Node[newCap];
        // 新数组
        table = newTab;
        if (oldTab != null) {
            // 如果旧数组不是空 则遍历旧数组上的Node 重新挪动到新数组中
            for (int j = 0; j < oldCap; ++j) {
                Node<K,V> e;
                if ((e = oldTab[j]) != null) {
                    oldTab[j] = null;
                    // e.next 是空 说明当前数组下标下只有一个元素 不是链表或红黑树
                    if (e.next == null) {
                        // e.hash & (newCap - 1)
                        // 相当于用e的hash值对新数组长度取余获取对应下标位置
                        // 放入新数组对应下标上
                        newTab[e.hash & (newCap - 1)] = e;
                    } else if (e instanceof TreeNode) {
                        // 如果是红黑树结构 则执行下面代码
                        ((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
                    } else {
                        // 高位链表
                        Node<K,V> loHead = null, loTail = null;
                        // 低位链表
                        Node<K,V> hiHead = null, hiTail = null;
                        Node<K,V> next;
                        do {
                            next = e.next;
                            // 原始Entry通过hashCode & （newCap - 1)只会落在两个下标下
                            // No1、原下标
                            // No2、原下标 << 1
                            if ((e.hash & oldCap) == 0) {
                                if (loTail == null) {
                                    loHead = e;
                                } else {
                                    loTail.next = e;
                                }
                                loTail = e;
                            }
                            else {
                                if (hiTail == null) {
                                    hiHead = e;
                                } else {
                                    hiTail.next = e;
                                }
                                hiTail = e;
                            }
                        } while ((e = next) != null);
                        if (loTail != null) {
                            loTail.next = null;
                            newTab[j] = loHead;
                        }
                        if (hiTail != null) {
                            hiTail.next = null;
                            newTab[j + oldCap] = hiHead;
                        }
                    }
                }
            }
        }
        return newTab;
    }

扩容过程中，高低链表操作详解：

分析5：尾插法+高低链表解决JDK1.7扩容环形链表问题

JDK1.7扩容源码：

/**
* 分析1：resize(2 * table.length)
* 作用：当容量不足时（容量 > 阈值），则扩容（扩到2倍）
*/ 
   void resize(int newCapacity) {  
    
    // 1. 保存旧数组（old table） 
    Entry[] oldTable = table;  

    // 2. 保存旧容量（old capacity ），即数组长度
    int oldCapacity = oldTable.length; 

    // 3. 若旧容量已经是系统默认最大容量了，那么将阈值设置成整型的最大值，退出    
    if (oldCapacity == MAXIMUM_CAPACITY) {  
        threshold = Integer.MAX_VALUE;  
        return;  
    }  
  
    // 4. 根据新容量（2倍容量）新建1个数组，即新table  
    Entry[] newTable = new Entry[newCapacity];  

    // 5. 将旧数组上的数据（键值对）转移到新table中，从而完成扩容 ->>分析1.1 
    transfer(newTable); 

    // 6. 新数组table引用到HashMap的table属性上
    table = newTable;  

    // 7. 重新设置阈值  
    threshold = (int)(newCapacity * loadFactor); 
} 

 /**
   * 分析1.1：transfer(newTable); 
   * 作用：将旧数组上的数据（键值对）转移到新table中，从而完成扩容
   * 过程：按旧链表的正序遍历链表、在新链表的头部依次插入
   */ 
void transfer(Entry[] newTable) {
      // 1. src引用了旧数组
      Entry[] src = table; 

      // 2. 获取新数组的大小 = 获取新容量大小                 
      int newCapacity = newTable.length;

      // 3. 通过遍历 旧数组，将旧数组上的数据（键值对）转移到新数组中
      for (int j = 0; j < src.length; j++) { 
          // 3.1 取得旧数组的每个元素  
          Entry<K,V> e = src[j];           
          if (e != null) {
              // 3.2 释放旧数组的对象引用（for循环后，旧数组不再引用任何对象）
              src[j] = null; 
              do { 
                  // 3.3 遍历 以该数组元素为首 的链表
                  // 注：转移链表时，因是单链表，故要保存下1个结点，否则转移后链表会断开
                  Entry<K,V> next = e.next; 
                 // 3.4 重新计算每个元素的存储位置
                 int i = indexFor(e.hash, newCapacity); 
                 // 3.5 将元素放在数组上：采用单链表的头插入方式 = 在链表头上存放数据 = 将数组位置的原有数据放在后1个指针、将需放入的数据放到数组位置中
                 // 即 扩容后，可能出现逆序：按旧链表的正序遍历链表、在新链表的头部依次插入
                 e.next = newTable[i]; 
                 newTable[i] = e;  
                 // 3.6 访问下1个Entry链上的元素，如此不断循环，直到遍历完该链表上的所有节点
                 e = next;             
             } while (e != null);
             // 如此不断循环，直到遍历完数组上的所有数据元素
         }
     }
 }

在JDK1.7中，链表的创建使用的是头插法，并且没有用高低链表，在高并发情况下，有可能有两个线程同时操作同一个HashMap中数组下标相同的链表。两个线程之间，通过CPU调度轮换执行，容易形成环形链表，如下图所示：