AQS源码解析

150 阅读14分钟

AQS整体设计

image.png

AQS是用来构建锁和其他同步组件的基础框架,主要用来实现并发工具和阻塞队列。 并发工具包括(CountDownLatch/CyclicBarrier/Semaphore)以及我们的同步锁ReentrantLock。BlocingQueue实际也是基于ReentrantLock底层还是我们的AQS。

AQS实现的接口

public abstract class AbstractQueuedSynchronizer
    extends AbstractOwnableSynchronizer

// 接口AbstractOwnableSynchronizer的具体方法如下
public abstract class AbstractOwnableSynchronizer
    implements java.io.Serializable {

    private static final long serialVersionUID = 3737899427754241961L;

    protected AbstractOwnableSynchronizer() { }
    // 当前持有锁的线程
    private transient Thread exclusiveOwnerThread;

    protected final void setExclusiveOwnerThread(Thread thread) {
        exclusiveOwnerThread = thread;
    }

    protected final Thread getExclusiveOwnerThread() {
        return exclusiveOwnerThread;
    }
}

AQS 工作模式

  • Exclusive Mode 独占模式,可以理解为互斥锁。
  • Share Mode 共享模式,用于控制一定量的线程的并发执行,用于控制对共享资源的同步访问线程的个数。

AQS的重要成员

由于AQS就是一个抽象类,其实就一个prorected的空构造方法,这里对于构造方法就提一下。

  • CANCELLED 作废状态,该节点的线程由于超时,中断等原因而处于作废状态。是不可逆的,一旦处于这个状态,说明应该将该节点移除了。
  • SIGNAL 待唤醒后继状态,当前节点的线程处于此状态,后继节点会被挂起,当前节点释放锁或取消之后必须唤醒它的后继节点。
  • CONDITION 等待状态,表明节点对应的线程因为不满足一个条件(Condition)而被阻塞。
  • PROPAGATE 传播,表示下一个线程获取共享锁后,自己的共享 状态会被无条件地传播下去,因为共享锁可能出现同时有N个锁可以
static final class Node {
    /** Marker to indicate a node is waiting in shared mode 
    * 这里就是用来判断结点的mode是否是共享
    */
    static final Node SHARED = new Node();
    // 这下面都是结点的状态的标识, 看英文就行了
    /** Marker to indicate a node is waiting in exclusive mode */
    static final Node EXCLUSIVE = null;
    /** waitStatus value to indicate thread has cancelled 被中断等操作取消*/
    static final int CANCELLED =  1;
    /** waitStatus value to indicate successor's thread needs unparking */
    static final int SIGNAL    = -1;
    /** waitStatus value to indicate thread is waiting on condition */
    static final int CONDITION = -2;
    /**
     * waitStatus value to indicate the next acquireShared should
     * unconditionally propagate
     */
    static final int PROPAGATE = -3;
    // 这里就是存储以上的各种状态
    volatile int waitStatus;
    // 前一个结点
    volatile Node prev;
    // 下一个结点
    volatile Node next;
    // 当前结点上的线程
    volatile Thread thread;
    //等待条件的下一个节点,ConditonObject中用到
    Node nextWaiter;

    /**
     * Returns true if node is waiting in shared mode.
     * 看到这里肯能会有疑惑,为什么nextWaiter == SHARED用来判断是否共享呢?
     * 
     */
    final boolean isShared() {
        return nextWaiter == SHARED;
    }

    final Node predecessor() throws NullPointerException {
        Node p = prev;
        if (p == null)
            throw new NullPointerException();
        else
            return p;
    }

    Node() {    // Used to establish initial head or SHARED marker
    }

    Node(Thread thread, Node mode) {     // Used by addWaiter
        this.nextWaiter = mode;
        this.thread = thread;
    }

    Node(Thread thread, int waitStatus) { // Used by Condition
        this.waitStatus = waitStatus;
        this.thread = thread;
    }
}

image.png

AQS里面使用到SHARE变量的地方就这四个位置,我们可以跟进去SHARE使用到的这个addWaiter的方法

addWaiter方法解析

// 这个方法就是要把结点加入当前的队列
private Node addWaiter(Node mode) {
    // 这里就是传入SHARED的状态,所以上面是用 == 去判断是否是共享MODE的
    Node node = new Node(Thread.currentThread(), mode);
    // Try the fast path of enq; backup to full enq on failure
    Node pred = tail;
    if (pred != null) {
        node.prev = pred;
        if (compareAndSetTail(pred, node)) {
            pred.next = node;
            return node;
        }
    }
    //第一次尝试添加尾部失败,意味着有并发抢锁发生,需要进行自旋
    enq(node);
    return node;
}
// cas完成替换
private final boolean compareAndSetTail(Node expect, Node update) {
    return unsafe.compareAndSwapObject(this, tailOffset, expect, update);
}

enq方法在下面介绍。

成员变量介绍

/**
 * 维护的等待队列(也叫CLH队列,同步队列)的头节点和尾节点。
 */
private transient volatile Node head;

private transient volatile Node tail;

// 维护的临界资源的描述,表示有多少线程获取了锁。
private volatile int state;

protected final int getState() {
    return state;
}

protected final void setState(int newState) {
    state = newState;
}

protected final boolean compareAndSetState(int expect, int update) {
    // See below for intrinsics setup to support this
    return unsafe.compareAndSwapInt(this, stateOffset, expect, update);
}
// 自旋的超时阈值
static final long spinForTimeoutThreshold = 1000L;

// 自旋添加到队列尾部, 返回当前结点的前驱---记住
private Node enq(final Node node) {
    for (;;) {
        Node t = tail;
        // 队列还未初始化,需要先初始化头尾结点
        // 否则结点添加到队列尾部
        if (t == null) { // Must initialize
            if (compareAndSetHead(new Node()))
                tail = head;
        } else {
            node.prev = t;
            if (compareAndSetTail(t, node)) {
                t.next = node;
                return t;
            }
        }
    }
}
// 这两个操作都是cas去完成的
private static final Unsafe unsafe = Unsafe.getUnsafe();
private static final long stateOffset;
private static final long headOffset;
private static final long tailOffset;
private static final long waitStatusOffset;
private static final long nextOffset;

static {
    try {
        stateOffset = unsafe.objectFieldOffset
            (AbstractQueuedSynchronizer.class.getDeclaredField("state"));
        headOffset = unsafe.objectFieldOffset
            (AbstractQueuedSynchronizer.class.getDeclaredField("head"));
        tailOffset = unsafe.objectFieldOffset
            (AbstractQueuedSynchronizer.class.getDeclaredField("tail"));
        waitStatusOffset = unsafe.objectFieldOffset
            (Node.class.getDeclaredField("waitStatus"));
        nextOffset = unsafe.objectFieldOffset
            (Node.class.getDeclaredField("next"));

    } catch (Exception ex) { throw new Error(ex); }
}
/** 
 * CAS 操作head指针,仅仅被enq()调用 
 */ 
private final boolean compareAndSetHead(Node update) {
    return unsafe.compareAndSwapObject(this, headOffset, null, update);
}
/** 
 * CAS 操作tail指针,仅仅被enq()调用 
 */ 
private final boolean compareAndSetTail(Node expect, Node update) {
    return unsafe.compareAndSwapObject(this, tailOffset, expect, update);
}

AQS内部的队列为CLH队列的变种,以自旋的方式获取资源,是可阻塞的先进先出的双向队列。通过自旋和CAS操作保证节点插入和移除的原子性。当有线程获取锁失败,就被添加到队列末尾

一个包含三个同步节点的AQS队列如下图所示:因为head指向的结点要么为空,要么是已经获取执行权限的结点

image.png

重要方法

获取资源

1. acquire方法: 尝试获取资源的方法

// 获取的逻辑, 其中的 tryAcquire方法是交给子类去实现获取arg个资源, 如果acquireQueued返回true代表线程中断
public final void acquire(int arg) {
    if (!tryAcquire(arg) &&
        acquireQueued(addWaiter(Node.EXCLUSIVE), arg))
        selfInterrupt();
}
/**
 * Convenience method to interrupt current thread. 中断当前线程
 */
static void selfInterrupt() {
    Thread.currentThread().interrupt();
}

// 下面这两个方法是为了实现可中断获取以及超时获取
public final void acquireInterruptibly(int arg)
        throws InterruptedException {
    if (Thread.interrupted())
        throw new InterruptedException();
    if (!tryAcquire(arg))
        doAcquireInterruptibly(arg);
}
public final boolean tryAcquireNanos(int arg, long nanosTimeout)
        throws InterruptedException {
    if (Thread.interrupted())
        throw new InterruptedException();
    return tryAcquire(arg) ||
        doAcquireNanos(arg, nanosTimeout);
}
  • 如果调用tryAcquire(arg) 尝试成功,则acquire()将直接返回,表示已经抢到锁;若不成功,则 将线程加入等待队列。其中的addWaiter方法在上面已经介绍了,可以再看下。

2. acquireQueued方法: 自旋抢占资源的方法

final boolean acquireQueued(final Node node, int arg) {
    boolean failed = true;
    try {
        boolean interrupted = false;
        for (;;) {
            final Node p = node.predecessor();
            // 当前驱结点是head结点的时候才去尝试获取锁
            if (p == head && tryAcquire(arg)) {
                setHead(node);
                p.next = null; // help GC
                failed = false;
                return interrupted;
            }
            // 如果获取失败判断是否需要挂起当前线程
            // 如果获取锁失败,那就看此节点的前驱结点的传播状态是否被设置,没有被设置就继续检查
            // 如果设置完成为signal的话就parkAndparkAndCheckInterrupt
            if (shouldParkAfterFailedAcquire(p, node) &&
                parkAndCheckInterrupt())
                interrupted = true;
        }
    } finally {
        //如果等待过程中没有成功获取资源(如timeout,或者可中断、的情况下被中断了) 
        //那么取消节点在队列中的等待 
        if (failed)
            cancelAcquire(node);
    }
}

为什么只有当前驱结点是head的时候才去获取锁?

  1. 头节点head成功获取同步状态(锁)的节点,而头节点的线程 释放了同步状态以后,将会唤醒其后继节点,后继节点的线程被唤醒 后要检查自己的前驱节点是否为头节点。 这样也可以避免空自旋。每个阻塞的结点都是在其前驱结点上进行自旋的操作
  2. 维护同步队列的FIFO原则,节点进入同步队列之后,就进入 了自旋的过程,每个节点都在不断地执行for死循环。

注意:调用acquireQueued()方法的线程一定是node所绑定的线程(由它 的thread属性所引用),该线程也是最开始调用lock()方法抢锁的那 个线程,在acquireQueued()的死循环中,该线程可能重复进行阻塞和 被唤醒。

3.shouldParkAfterFailedAcquire(p, node)

private static boolean shouldParkAfterFailedAcquire(Node pred, Node node) {
    // 拿到当前结点的状态, 当前的pred及诶单
    int ws = pred.waitStatus;
    // 已经被设置了返回true
    if (ws == Node.SIGNAL)
        /*
         * This node has already set status asking a release
         * to signal it, so it can safely park.
         */
        return true;
    // 如果当前前驱结点被取消
    if (ws > 0) {
        /*
         * Predecessor was cancelled. Skip over predecessors and
         * indicate retry.
         * 如果前驱结点的状态由于中断、超时等操作被取消了。由于之后的cancel操作里面会把取消结点的next
         * 指针设置为null 帮助gc, 所以我们去找前驱有效结点是从当前往前找,一直找到有效结点。
         * 因为ws > 0也就是说处于CANCEL = -1的状态
         */
        do {
            node.prev = pred = pred.prev;
        } while (pred.waitStatus > 0);
        pred.next = node;
    } else {
        /*
         * waitStatus must be 0 or PROPAGATE.  Indicate that we
         * need a signal, but don't park yet.  Caller will need to
         * retry to make sure it cannot acquire before parking.
         * 否则尝试设置前驱结点的状态为SIGNAL
         */
        compareAndSetWaitStatus(pred, ws, Node.SIGNAL);
    }
    return false;
}
  • 注意:shouldParkAfterFailedAcquire至少是要执行两次的。为什么?
  • 第一次回去设置pred的ws值为SIGNAL,然后判断条件
  • if (shouldParkAfterFailedAcquire(p, node) &&parkAndCheckInterrupt())
  • shouldParkAfterFailedAcquire返回false。acquireQueued方法会在自旋里面再次走到当前的条件。然后返回true,执行parkAndCheckInterrupt()方法

这里会存在三种情况

  1. 如果前驱节点的状态为-1(SIGNAL),说明前驱的等待标志 已设好,返回true表示设置完毕。
  2. 如果前驱节点的状态为1(CANCELLED),说明前驱节点本身 不再等待了,需要跨越这些节点,然后找到一个有效节点,再把当前 节点和这个有效节点的唤醒关系建立好:调整前驱节点的next指针为 自己。
  3. 如果是其他情况:-3(PROPAGATE,共享锁等待)、-2(CONDITION,条件等待)、0(初始状态),那么通过CAS尝试设置 前驱节点为SIGNAL,表示只要前驱节点释放锁,当前节点就可以抢占锁了。

4. parkAndCheckInterrupt方法

shouldParkAfterFailedAcquire返回true的话,此方法会挂起线程并且返回中断标记。

/**
 * Convenience method to park and then check if interrupted
 *
 * @return {@code true} if interrupted
 */
private final boolean parkAndCheckInterrupt() {
    LockSupport.park(this);
    return Thread.interrupted();
}

5. cancelAcquire方法分析

  private void cancelAcquire(Node node) {
        if (node == null)
            return;

        node.thread = null;

        // Skip cancelled predecessors 跳过被取消的前驱结点
        Node pred = node.prev;
        while (pred.waitStatus > 0)
            node.prev = pred = pred.prev;


        Node predNext = pred.next;
        node.waitStatus = Node.CANCELLED;

        // node是尾结点的话删除自身就好了
        if (node == tail && compareAndSetTail(node, pred)) {
            compareAndSetNext(pred, predNext, null);
        } else {
            // 如果后继结点需要被唤醒,那就唤醒后来的结点,只要当前结点的状态是SIGNAL或者设置传播状态成功
            int ws;
            if (pred != head &&
                ((ws = pred.waitStatus) == Node.SIGNAL ||
                 (ws <= 0 && compareAndSetWaitStatus(pred, ws, Node.SIGNAL))) &&
                pred.thread != null) {
                Node next = node.next;
                if (next != null && next.waitStatus <= 0)
                    compareAndSetNext(pred, predNext, next);
            } else {
                // 出队操作
                unparkSuccessor(node);
            }

            node.next = node; // help GC
        }
    }

入队和出队操作

入队其实就是上面介绍过的enq方法。

1. 出队操作-- 其实acquire和release也就是获取和释放锁的时候会用到的真正的方法

public final boolean release(long arg) { 
   if (tryRelease(arg)) { //释放锁的钩子方法的实现 
       Node h = head; //队列头节点 
       if (h != null && h.waitStatus != 0) 
         unparkSuccessor(h); //唤醒后继线程 
       return true; 
   } 
   return false; 
 }

2. unparkSuccessor方法

 private void unparkSuccessor(Node node) {
    int ws = node.waitStatus;
     // 这里只是优雅的改变成为初始状态,失败了也没太大关系
    if (ws < 0)
        compareAndSetWaitStatus(node, ws, 0);

    // 从后往前找到有效的结点
    Node s = node.next;
    if (s == null || s.waitStatus > 0) {
        s = null;
        for (Node t = tail; t != null && t != node; t = t.prev)
            if (t.waitStatus <= 0)
                s = t;
    }
    // 唤醒后继结点的线程
    if (s != null)
        LockSupport.unpark(s.thread);
}
    protected boolean tryAcquire(int arg) {
        throw new UnsupportedOperationException();
    }


    protected boolean tryRelease(int arg) {
        throw new UnsupportedOperationException();
    }


    protected int tryAcquireShared(int arg) {
        throw new UnsupportedOperationException();
    }


    protected boolean tryReleaseShared(int arg) {
        throw new UnsupportedOperationException();
    }

这些方法是模板方法的钩子方法,在对应的方法里面进行了调用,提供给子类实现。比如ReetrantLock的tryAcquire方法的实现, 真正被调用是在AQS的aquire()方法

protected final boolean tryAcquire(int acquires) {
    return nonfairTryAcquire(acquires);
}
final boolean nonfairTryAcquire(int acquires) {
    final Thread current = Thread.currentThread();
    int c = getState();
    if (c == 0) {
        if (compareAndSetState(0, acquires)) {
            setExclusiveOwnerThread(current);
            return true;
        }
    }
    else if (current == getExclusiveOwnerThread()) {
        int nextc = c + acquires;
        if (nextc < 0) // overflow
            throw new Error("Maximum lock count exceeded");
        setState(nextc);
        return true;
    }
    return false;
}

AQS的获取state资源总结

上述核心代码基本就是AQS的对state以及结点入队出队的核心操作了。当然我们还剩下一个部分没有分析,也就是Condition条件部分。

Condition条件队列分析

实际上AQS里面的条件对象是ConditionObject,它继承自Condition接口

public interface Condition {
    /**
    * Condition与Object的wait()/notify()作用是相似的,都是使得
    * 一个线程等待某个条件,只有当该条件具备signal()或者signalAll()
    * 方法被调用时等待线程才会被唤醒,从而重新争夺锁。
    */
    void await() throws InterruptedException;

    void awaitUninterruptibly();

    long awaitNanos(long nanosTimeout) throws InterruptedException;

    boolean await(long time, TimeUnit unit) throws InterruptedException;

    boolean awaitUntil(Date deadline) throws InterruptedException;

    void signal();

    void signalAll();
}

应用: 当需要进行线程间的通信时,建议结合使用 ReetrantLockCondition,通过Conditionawait()和signal()方法 进行线程间的阻塞与唤醒。

ConditionObject

public class ConditionObject implements Condition, java.io.Serializable {
    private static final long serialVersionUID = 1173984872572414699L;
    /** First node of condition queue. */
    private transient Node firstWaiter;
    /** Last node of condition queue. */
    private transient Node lastWaiter;
   
    //发生了中断,但在后续不抛出中断异常,而是“补上”这次中断
    private static final int REINTERRUPT =  1;
    //发生了中断,且在后续需要抛出中断异常
    private static final int THROW_IE    = -1;

    /**
     * Creates a new {@code ConditionObject} instance.
     */
    public ConditionObject() { }
}

image.png

直接看图 nextWaiter是Node结点里面的属性

条件队列到AQS队列之间的转换关系图如下:

image.png

  • 一个AQS可以有多个条件队列,每个条件队列的关系是独立的。

ConditionObject里面的方法大多是对await方法进行的重载方法以及获取waiters等不太核心的方法。

1. 核心方法-await()

public final void await() throws InterruptedException {
    if (Thread.interrupted())
        throw new InterruptedException();
    Node node = addConditionWaiter();
    int savedState = fullyRelease(node);
    int interruptMode = 0;
    while (!isOnSyncQueue(node)) {
        LockSupport.park(this);
        // 这里为啥要用循环park呢,我个人感觉是要防止虚假唤醒
        if ((interruptMode = checkInterruptWhileWaiting(node)) != 0)
            break;
    }
    if (acquireQueued(node, savedState) && interruptMode != THROW_IE)
        interruptMode = REINTERRUPT;
    if (node.nextWaiter != null) // clean up if cancelled
        unlinkCancelledWaiters();
    if (interruptMode != 0)
        reportInterruptAfterWait(interruptMode);
}

执行流程:

  1. 新创建一个结点并放在条件队列的末尾。
  2. 释放锁并且唤醒AQS同步队列头结点的下一个结点。
  3. 然后执行while循环,将该节点的线程阻塞,直到该节点离开等待队列,重新回到同步队列成为同步节点后,线程才退出while循环。
  4. 退出循环后,开始调用acquireQueued()不断尝试拿锁。
  5. 拿到锁后,会清空Condition队列中被取消的节点。

下面解析1 2 3 5 步骤

第一步addConditionWaiter方法

private Node addConditionWaiter() {
    Node t = lastWaiter;
    // If lastWaiter is cancelled, clean out.
    if (t != null && t.waitStatus != Node.CONDITION) {
        unlinkCancelledWaiters();
        t = lastWaiter;
    }
    // 创建一个条件结点
    Node node = new Node(Thread.currentThread(), Node.CONDITION);
    // 将新Node加入等待队列 
    if (t == null)
        firstWaiter = node;
    else
        t.nextWaiter = node;
    lastWaiter = node;
    return node;
}

第二步- fullyRelease

  /**
   * Invokes release with current state value; returns saved state.
   * Cancels node and throws exception on failure.
   * @param node the condition node for this wait
   * @return previous sync state
   */
  final int fullyRelease(Node node) {
      boolean failed = true;
      try {
          int savedState = getState();
          // 这里调用了release方法释放锁
          if (release(savedState)) {
              failed = false;
              return savedState;
          } else {
              throw new IllegalMonitorStateException();
          }
      } finally {
          if (failed)
              node.waitStatus = Node.CANCELLED;
      }
  }

第三步 isOnSyncQueue方法

// 此方法就是判断是否进行了同步队列
final boolean isOnSyncQueue(Node node) {
    // 防止条件不满足虚假唤醒
    if (node.waitStatus == Node.CONDITION || node.prev == null)
        return false;
    if (node.next != null) // If has successor, it must be on queue
        return true;
    /*
     * node.prev can be non-null, but not yet on queue because
     * the CAS to place it on queue can fail. So we have to
     * traverse from tail to make sure it actually made it.  It
     * will always be near the tail in calls to this method, and
     * unless the CAS failed (which is unlikely), it will be
     * there, so we hardly ever traverse much.
     */
    return findNodeFromTail(node);
}
// 从同步队列尾部开始往头部找,看是否结点存在同步队列上
private boolean findNodeFromTail(Node node) {
    Node t = tail;
    for (;;) {
        if (t == node)
            return true;
        if (t == null)
            return false;
        t = t.prev;
    }
}

第五步会清空Condition队列中被取消的节点。

// 看方法名就知道就是清空不合法的结点
private void unlinkCancelledWaiters() {
    Node t = firstWaiter;
    Node trail = null;
    while (t != null) {
        Node next = t.nextWaiter;
        if (t.waitStatus != Node.CONDITION) {
            t.nextWaiter = null;
            if (trail == null)
                firstWaiter = next;
            else
                trail.nextWaiter = next;
            if (next == null)
                lastWaiter = trail;
        }
        else
            trail = t;
        t = next;
    }
}

2. 核心方法-signal

public final void signal() {
    if (!isHeldExclusively())
        throw new IllegalMonitorStateException();
    Node first = firstWaiter;
    if (first != null)
        doSignal(first);
}
// 唤醒一个结点
private void doSignal(Node first) {
    do {
        if ( (firstWaiter = first.nextWaiter) == null)
            lastWaiter = null;
        first.nextWaiter = null;
    } while (!transferForSignal(first) &&
             (first = firstWaiter) != null);
}

/**
 * Removes and transfers all nodes.
 * @param first (non-null) the first node on condition queue
 */
private void doSignalAll(Node first) {
    lastWaiter = firstWaiter = null;
    do {
        Node next = first.nextWaiter;
        first.nextWaiter = null;
        transferForSignal(first);
        first = next;
    } while (first != null);
}
// 使用cas去修改结点的状态, 修改成功的话把结点加入队列尾部
final boolean transferForSignal(Node node) {
      /*
       * If cannot change waitStatus, the node has been cancelled.
       */
      if (!compareAndSetWaitStatus(node, Node.CONDITION, 0))
          return false;

      /*
       * Splice onto queue and try to set waitStatus of predecessor to
       * indicate that thread is (probably) waiting. If cancelled or
       * attempt to set waitStatus fails, wake up to resync (in which
       * case the waitStatus can be transiently and harmlessly wrong).
       */
      // 唤醒之后入队
      Node p = enq(node);
      int ws = p.waitStatus;
      // 如果前驱节点的状态是取消状态,或者设置前驱节点为
      // Signal状态失败,就唤醒当前节点的线程;否则节点在同步队列的尾
      // 部,参与排队。
      if (ws > 0 || !compareAndSetWaitStatus(p, ws, Node.SIGNAL))
          LockSupport.unpark(node.thread);
      return true;
  }