【RocketMQ】代码片段之 ServiceThread

357 阅读9分钟

01、ServiceThread 源码

package org.apache.rocketmq.common;
    
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

public abstract class ServiceThread implements Runnable {
    private static final Logger log = LoggerFactory.getLogger(ServiceThread.class);

    private static final long JOIN_TIME = 90 * 1000;

    private Thread thread;

    //// 既能定时执行任务
    //// 又能通过其他显式调用方法"提前执行任务"
    private CountDownLatch2 waitPoint = new CountDownLatch2(1);

    //// 这个变量是用来保证当调用waitForRunning()时,如果发现已经有线程调用了wakeup(),
    //// 但是waitPoint还没有被释放的时候(count还不为0),可以立即退出(因为马上waitPoint就要被释放了)。
    //// wakeup()方法并不是atomic的,它首先将hasNotified设置为true,
    //// 告诉其他人,我马上就要把count减为0了(也就是释放这个CountDownLatch),然后再紧接着把count减为0。这两步并不是原子的。
    protected volatile AtomicBoolean hasNotified = new AtomicBoolean(false);

    //// 它不需要原子性的原因在于,stopped被设置成True之后是允许上层代码继续执行 waitForRuning(),
    //// 也就是说具体停了之后还跑不跑waitForRunning()是由上层代码决定,
    //// stopped只是提供给上层一个"君子协议"一样的标志位:有人告诉我应该stop了,所以你最好check一下是不是stopped了,
    //// 如果stopped了就不要再调用waitForRunning()了。stopped并不在waitForRunning()里起任何作用。
    protected volatile boolean stopped = false;

    protected boolean isDaemon = false;

    //Make it able to restart the thread
    private final AtomicBoolean started = new AtomicBoolean(false);

    public ServiceThread() {}

    public abstract String getServiceName();

    public void start() {
        log.info("Try to start service thread:{} started:{} lastThread:{}", getServiceName(), started.get(), thread);
        if (!started.compareAndSet(false, true)) {
            return;
        }
        stopped = false;
        this.thread = new Thread(this, getServiceName());
        this.thread.setDaemon(isDaemon);
        this.thread.start();
    }

    public void shutdown() {
        this.shutdown(false);
    }

    public void shutdown(final boolean interrupt) {
        log.info("Try to shutdown service thread:{} started:{} lastThread:{}", getServiceName(), started.get(), thread);
        if (!started.compareAndSet(true, false)) {
            return;
        }
        this.stopped = true;
        log.info("shutdown thread " + this.getServiceName() + " interrupt " + interrupt);

        if (hasNotified.compareAndSet(false, true)) {
            waitPoint.countDown(); // notify
        }

        try {
            if (interrupt) {
                this.thread.interrupt();
            }

            long beginTime = System.currentTimeMillis();
            if (!this.thread.isDaemon()) {
                //// 等待子线程执行(有执行超时时间 jointime)
                //// 超时后执行下面的业务代码
                this.thread.join(this.getJointime());
            }
            long elapsedTime = System.currentTimeMillis() - beginTime;
            log.info("join thread " + this.getServiceName() + " elapsed time(ms) " + elapsedTime + " "
                    + this.getJointime());
        } catch (InterruptedException e) {
            log.error("Interrupted", e);
        }
    }

    public long getJointime() {
        return JOIN_TIME;
    }

    @Deprecated
    public void stop() {
        this.stop(false);
    }

    @Deprecated
    public void stop(final boolean interrupt) {
        if (!started.get()) {
            return;
        }
        this.stopped = true;
        log.info("stop thread " + this.getServiceName() + " interrupt " + interrupt);

        if (hasNotified.compareAndSet(false, true)) {
            waitPoint.countDown(); // notify
        }

        if (interrupt) {
            this.thread.interrupt();
        }
    }

    public void makeStop() {
        if (!started.get()) {
            return;
        }
        this.stopped = true;
        log.info("makestop thread " + this.getServiceName());
    }

    public void wakeup() {
        if (hasNotified.compareAndSet(false, true)) {
            waitPoint.countDown(); // notify
        }
    }

    protected void waitForRunning(long interval) {
        // if hasNotified == True, means either wakeup() or stop()
        // has been executed by another thread
        if (hasNotified.compareAndSet(true, false)) {
            this.onWaitEnd();
            return;
        }

        //entry to wait
        waitPoint.reset();

        try {
            waitPoint.await(interval, TimeUnit.MILLISECONDS);
        } catch (InterruptedException e) {
            log.error("Interrupted", e);
        } finally {
            hasNotified.set(false);
            this.onWaitEnd();
        }
    }

    protected void onWaitEnd() {
    }

    public boolean isStopped() {
        return stopped;
    }

    public boolean isDaemon() {
        return isDaemon;
    }

    public void setDaemon(boolean daemon) {
        isDaemon = daemon;
    }
}

ServiceThread中出现了CountDownLatch2,我们先来看下这个类。

02、CountDownLatch2 源码

package org.apache.rocketmq.common;

import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.AbstractQueuedSynchronizer;

/**
 * Add reset feature for @see java.util.concurrent.CountDownLatch
 *
 * 说明:和CountDownLatch不同的是它支持reset(),可以被反复使用。
 * 通常来说CyclicBarrier也能被反复使用,它们的应用场景区别在哪里呢?
 *  1. CyclicBarrier 是用来等待事件完成后才继续
 *  2. CountDownLatch 是用来等待其他线程执行到某一步
 *
 */
public class CountDownLatch2 {
    private final Sync sync;

    /**
     * Constructs a {@code CountDownLatch2} initialized with the given count.
     *
     * @param count the number of times {@link #countDown} must be invoked before threads can pass through {@link
     * #await}
     * @throws IllegalArgumentException if {@code count} is negative
     */
    public CountDownLatch2(int count) {
        if (count < 0)
            throw new IllegalArgumentException("count < 0");
        this.sync = new Sync(count);
    }

    /**
     * Causes the current thread to wait until the latch has counted down to
     * zero, unless the thread is {@linkplain Thread#interrupt interrupted}.
     *
     * <p>If the current count is zero then this method returns immediately.
     *
     * <p>If the current count is greater than zero then the current
     * thread becomes disabled for thread scheduling purposes and lies
     * dormant until one of two things happen:
     * <ul>
     * <li>The count reaches zero due to invocations of the
     * {@link #countDown} method; or
     * <li>Some other thread {@linkplain Thread#interrupt interrupts}
     * the current thread.
     * </ul>
     *
     * <p>If the current thread:
     * <ul>
     * <li>has its interrupted status set on entry to this method; or
     * <li>is {@linkplain Thread#interrupt interrupted} while waiting,
     * </ul>
     * then {@link InterruptedException} is thrown and the current thread's
     * interrupted status is cleared.
     *
     * @throws InterruptedException if the current thread is interrupted while waiting
     */
    public void await() throws InterruptedException {
        sync.acquireSharedInterruptibly(1);
    }

    /**
     * Causes the current thread to wait until the latch has counted down to
     * zero, unless the thread is {@linkplain Thread#interrupt interrupted},
     * or the specified waiting time elapses.
     *
     * <p>If the current count is zero then this method returns immediately
     * with the value {@code true}.
     *
     * <p>If the current count is greater than zero then the current
     * thread becomes disabled for thread scheduling purposes and lies
     * dormant until one of three things happen:
     * <ul>
     * <li>The count reaches zero due to invocations of the
     * {@link #countDown} method; or
     * <li>Some other thread {@linkplain Thread#interrupt interrupts}
     * the current thread; or
     * <li>The specified waiting time elapses.
     * </ul>
     *
     * <p>If the count reaches zero then the method returns with the
     * value {@code true}.
     *
     * <p>If the current thread:
     * <ul>
     * <li>has its interrupted status set on entry to this method; or
     * <li>is {@linkplain Thread#interrupt interrupted} while waiting,
     * </ul>
     * then {@link InterruptedException} is thrown and the current thread's
     * interrupted status is cleared.
     *
     * <p>If the specified waiting time elapses then the value {@code false}
     * is returned.  If the time is less than or equal to zero, the method
     * will not wait at all.
     *
     * @param timeout the maximum time to wait
     * @param unit the time unit of the {@code timeout} argument
     * @return {@code true} if the count reached zero and {@code false} if the waiting time elapsed before the count
     * reached zero
     * @throws InterruptedException if the current thread is interrupted while waiting
     */
    public boolean await(long timeout, TimeUnit unit)
            throws InterruptedException {
        return sync.tryAcquireSharedNanos(1, unit.toNanos(timeout));
    }

    /**
     * Decrements the count of the latch, releasing all waiting threads if
     * the count reaches zero.
     *
     * <p>If the current count is greater than zero then it is decremented.
     * If the new count is zero then all waiting threads are re-enabled for
     * thread scheduling purposes.
     *
     * <p>If the current count equals zero then nothing happens.
     */
    public void countDown() {
        sync.releaseShared(1);
    }

    /**
     * Returns the current count.
     *
     * <p>This method is typically used for debugging and testing purposes.
     *
     * @return the current count
     */
    public long getCount() {
        return sync.getCount();
    }

    public void reset() {
        sync.reset();
    }

    /**
     * Returns a string identifying this latch, as well as its state.
     * The state, in brackets, includes the String {@code "Count ="}
     * followed by the current count.
     *
     * @return a string identifying this latch, as well as its state
     */
    public String toString() {
        return super.toString() + "[Count = " + sync.getCount() + "]";
    }

    //// 和 CountDownLatch 的区别见下面注释
    //// 1. startCount
    //// 2. startCount = count
    //// 3. reset()
    private static final class Sync extends AbstractQueuedSynchronizer {
        private static final long serialVersionUID = 4982264981922014374L;

        //// 1. 区别点,新增成员变量 startCount
        private final int startCount;

        Sync(int count) {
            //// 初始化 startCount
            startCount = count;
            setState(count);
        }

        int getCount() {
            return getState();
        }

        protected int tryAcquireShared(int acquires) {
            return (getState() == 0) ? 1 : -1;
        }

        protected boolean tryReleaseShared(int releases) {
            // Decrement count; signal when transition to zero
            for (;;) {
                int c = getState();
                if (c == 0)
                    return false;
                int nextc = c-1;
                if (compareAndSetState(c, nextc))
                    return nextc == 0;
            }
        }

        //// 2. 添加了重置的功能函数,调用后还原到new对象的初始状态
        protected void reset() {
            setState(startCount);
        }
    }
}

注意上面的关键点,添加了startCount成员变量以及reset方法。

03、ServiceThread 中的方法剖析

start() 方法

下面我们来分析下ServiceThread,这是一个抽象类,实现了Runnable接口,Runnable接口用来指定线程要执行的任务。分析到此,假设我们有一个子类XxxServiceThread extends ServiceThread,且XxxServiceThread中实现了run()方法,要执行这样的任务,我们一般会这样操作(伪代码):

public static void main(String[] args) {
    XxxServiceThread xxx = new XxxServiceThread();
    new Thread(xxx).start();
}

继续看ServiceThread的源码,我们发现一个start()方法,源码如下:

public void start() {
    log.info("Try to start service thread:{} started:{} lastThread:{}", getServiceName(), started.get(), thread);
    if (!started.compareAndSet(false, true)) {
        return;
    }
    stopped = false;
    this.thread = new Thread(this, getServiceName());
    this.thread.setDaemon(isDaemon);
    this.thread.start();
}

我们似乎看到了熟悉的代码:this.thread = new Thread(this, getServiceName()); this.thread.start();,这样我们就不需要在外部创建线程,把XxxServiceThread创建的实例传入(this)执行即可。ServiceThreadstart()方法内部会创建一个线程来运行这个XxxServiceThread实例。但需要注意的是,通过ServiceThread.start()方法启动任务,不管调用多少次start()方法,只会有一个线程在运行,这是通过代码中的CAS操作 和 volatile修饰的started成员变量保证的。

现在我们知道了,ServiceThreadstart方法用来保证子类实例调用该方法N次,只会创建一个线程在执行该任务。

waitForRunning(long interval) 方法

该方法用protected关键字修饰,在不同包名下子类的实例是无法调用的,而在子类方法实现中可以调用。这样就防止了XxxServiceThread xxx = new XxxServiceThread()创建的xxx实例调用waitForRunning(..)方法,只允许XxxServiceThread类中实现的run方法中调用。为什么要这样做?我们后面慢慢说。

我们先来看下这个方法的源码:

protected void waitForRunning(long interval) {
    // if hasNotified == True, means either wakeup() or stop()
    // has been executed by another thread
    // 上面这段注释的理解,如果 hasNotified = True,那肯定有其它线程调用了该实例的wakeup()或stop()方法。
    // 假设我们创建了一个 XxxServiceThread xxx = new XxxServiceThread()
    // 线程A持有 xxx 实例
    // 线程B持有 xxx 实例
    // 假设场景1:
    // 时间片1,线程A.xxx调用了 wakeup() 设置了 hasNotified = True,方法结束
    // 时间片2,线程B.xxx调用了waitForRunning(..),成功进入if循环,return
    // 
    // 假设场景2:
    // 时间片1,线程A.xxx调用了shutdown(),设置了 hasNotified = True,设置stopped标志位,方法结束
    // 时间片2,线程B.xxx调用了waitForRunning(...),成功进入if循环,return
    //
    // 假设场景3:
    // 线程C也持有 xxx 实例
    // 时间片1,线程A.xxx调用了wakeup() 设置了 hasNotified = True,但还没来的及 waitPoint.countDown()
    // 时间片2,线程B.xxx调用了 waitForRunning(...),设置hasNotified=false,进入if循环,返回。直接业务代码;
    // 时间片3,线程C.xxx调用了 waitForRunning(...),waitPoint.reset(), waitPoint.await(...),进入等待
    // 时间片4,线程A.xxx 执行到了 waitPoint.countDown(),此时线程C.xxx还没等多久,就继续下面的业务代码了,在时间上可能造成 线程B 的业务代码和线C 的业务代码重合执行。
    // 以上只是假设,但是我们知道 waitForRunning 首先只能在 ServiceThread 子类中调用,一般就在 run 方法中使用,且 ServiceThread 的 start 方法也保证了只有一个线程运行,也就保证了 ServiceThread 子类的实例的 run 方法只会在一个线程中执行,即 run 方法中存在 waitForRunning 调用,也只会由一个线程触发。
    // 
    // 
    if (hasNotified.compareAndSet(true, false)) {
        this.onWaitEnd();
        return;
    }

    //entry to wait
    waitPoint.reset();

    try {
        waitPoint.await(interval, TimeUnit.MILLISECONDS);
    } catch (InterruptedException e) {
        log.error("Interrupted", e);
    } finally {
        hasNotified.set(false);
        this.onWaitEnd();
    }
}

wakeup()

// 该方法下面两步不是一个原子操作
public void wakeup() {
    if (hasNotified.compareAndSet(false, true)) {
        waitPoint.countDown(); // notify
    }
}

05、RocketMQ 中自己的运用

DefaultMessageQueue

class FlushConsumeQueueService extends ServiceThread {
    private static final int RETRY_TIMES_OVER = 3;
    private long lastFlushTimestamp = 0;

    private void doFlush(int retryTimes) {
        int flushConsumeQueueLeastPages = DefaultMessageStore.this.getMessageStoreConfig().getFlushConsumeQueueLeastPages();

        if (retryTimes == RETRY_TIMES_OVER) {
            flushConsumeQueueLeastPages = 0;
        }

        long logicsMsgTimestamp = 0;

        int flushConsumeQueueThoroughInterval = DefaultMessageStore.this.getMessageStoreConfig().getFlushConsumeQueueThoroughInterval();
        long currentTimeMillis = System.currentTimeMillis();
        if (currentTimeMillis >= (this.lastFlushTimestamp + flushConsumeQueueThoroughInterval)) {
            this.lastFlushTimestamp = currentTimeMillis;
            flushConsumeQueueLeastPages = 0;
            logicsMsgTimestamp = DefaultMessageStore.this.getStoreCheckpoint().getLogicsMsgTimestamp();
        }

        ConcurrentMap<String, ConcurrentMap<Integer, ConsumeQueue>> tables = DefaultMessageStore.this.consumeQueueTable;

        for (ConcurrentMap<Integer, ConsumeQueue> maps : tables.values()) {
            for (ConsumeQueue cq : maps.values()) {
                boolean result = false;
                for (int i = 0; i < retryTimes && !result; i++) {
                    result = cq.flush(flushConsumeQueueLeastPages);
                }
            }
        }

        if (0 == flushConsumeQueueLeastPages) {
            if (logicsMsgTimestamp > 0) {
                DefaultMessageStore.this.getStoreCheckpoint().setLogicsMsgTimestamp(logicsMsgTimestamp);
            }
            DefaultMessageStore.this.getStoreCheckpoint().flush();
        }
    }

    public void run() {
        DefaultMessageStore.log.info(this.getServiceName() + " service started");
        // 周期性的循环调用 doFlush 方法,持久化数据到磁盘上
        // 详细的代码,在 RocketMQ 章节中讲解。
        while (!this.isStopped()) {
            try {
                int interval = DefaultMessageStore.this.getMessageStoreConfig().getFlushIntervalConsumeQueue();
                this.waitForRunning(interval);
                this.doFlush(1);
            } catch (Exception e) {
                DefaultMessageStore.log.warn(this.getServiceName() + " service has exception. ", e);
            }
        }

        this.doFlush(RETRY_TIMES_OVER);

        DefaultMessageStore.log.info(this.getServiceName() + " service end");
    }

    @Override
    public String getServiceName() {
        return FlushConsumeQueueService.class.getSimpleName();
    }

    @Override
    public long getJointime() {
        return 1000 * 60;
    }
}

05、学习和使用

public class ServiceThreadTestCase {

    public static void main(String[] args) {
        MyServiceThread myServiceThread = new MyServiceThread();
        myServiceThread.start();

        // 在任务启动第 12 秒的时候,主动让 myService 线程做一次工作
        new Thread(new Runnable() {
            @Override
            public void run() {
                try {
                    Thread.sleep(12000);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
                //// 主动唤醒
                myServiceThread.wakeup();
            }
        }).start();
    }

    private static final class MyServiceThread extends ServiceThread {
        private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        @Override
        public String getServiceName() {
            return "my-service";
        }

        @Override
        public void run() {
            while (!this.isStopped()) {
                // 每隔 5 秒做一次工作
                this.waitForRunning(5000);
                this.work();
            }
        }

        private void work() {
            System.out.println(sdf.format(new Date()) + " \t" + this.getServiceName() + " working... ");
        }
    }

}

运行结果:

10:48:34.329 [main] INFO org.apache.rocketmq.common.ServiceThread - Try to start service thread:my-service started:false lastThread:null
2022-11-07 10:48:39 	my-service working... 
2022-11-07 10:48:44 	my-service working... 
### 这是任务启动第12秒的时候,主动做了一次工作...
2022-11-07 10:48:46 	my-service working... 
2022-11-07 10:48:51 	my-service working... 
2022-11-07 10:48:56 	my-service working... 
2022-11-07 10:49:01 	my-service working... 
2022-11-07 10:49:06 	my-service working... 
2022-11-07 10:49:11 	my-service working... 
2022-11-07 10:49:16 	my-service working... 
2022-11-07 10:49:21 	my-service working... 
2022-11-07 10:49:26 	my-service working... 
2022-11-07 10:49:31 	my-service working...