01、ServiceThread 源码
package org.apache.rocketmq.common;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
public abstract class ServiceThread implements Runnable {
private static final Logger log = LoggerFactory.getLogger(ServiceThread.class);
private static final long JOIN_TIME = 90 * 1000;
private Thread thread;
//// 既能定时执行任务
//// 又能通过其他显式调用方法"提前执行任务"
private CountDownLatch2 waitPoint = new CountDownLatch2(1);
//// 这个变量是用来保证当调用waitForRunning()时,如果发现已经有线程调用了wakeup(),
//// 但是waitPoint还没有被释放的时候(count还不为0),可以立即退出(因为马上waitPoint就要被释放了)。
//// wakeup()方法并不是atomic的,它首先将hasNotified设置为true,
//// 告诉其他人,我马上就要把count减为0了(也就是释放这个CountDownLatch),然后再紧接着把count减为0。这两步并不是原子的。
protected volatile AtomicBoolean hasNotified = new AtomicBoolean(false);
//// 它不需要原子性的原因在于,stopped被设置成True之后是允许上层代码继续执行 waitForRuning(),
//// 也就是说具体停了之后还跑不跑waitForRunning()是由上层代码决定,
//// stopped只是提供给上层一个"君子协议"一样的标志位:有人告诉我应该stop了,所以你最好check一下是不是stopped了,
//// 如果stopped了就不要再调用waitForRunning()了。stopped并不在waitForRunning()里起任何作用。
protected volatile boolean stopped = false;
protected boolean isDaemon = false;
//Make it able to restart the thread
private final AtomicBoolean started = new AtomicBoolean(false);
public ServiceThread() {}
public abstract String getServiceName();
public void start() {
log.info("Try to start service thread:{} started:{} lastThread:{}", getServiceName(), started.get(), thread);
if (!started.compareAndSet(false, true)) {
return;
}
stopped = false;
this.thread = new Thread(this, getServiceName());
this.thread.setDaemon(isDaemon);
this.thread.start();
}
public void shutdown() {
this.shutdown(false);
}
public void shutdown(final boolean interrupt) {
log.info("Try to shutdown service thread:{} started:{} lastThread:{}", getServiceName(), started.get(), thread);
if (!started.compareAndSet(true, false)) {
return;
}
this.stopped = true;
log.info("shutdown thread " + this.getServiceName() + " interrupt " + interrupt);
if (hasNotified.compareAndSet(false, true)) {
waitPoint.countDown(); // notify
}
try {
if (interrupt) {
this.thread.interrupt();
}
long beginTime = System.currentTimeMillis();
if (!this.thread.isDaemon()) {
//// 等待子线程执行(有执行超时时间 jointime)
//// 超时后执行下面的业务代码
this.thread.join(this.getJointime());
}
long elapsedTime = System.currentTimeMillis() - beginTime;
log.info("join thread " + this.getServiceName() + " elapsed time(ms) " + elapsedTime + " "
+ this.getJointime());
} catch (InterruptedException e) {
log.error("Interrupted", e);
}
}
public long getJointime() {
return JOIN_TIME;
}
@Deprecated
public void stop() {
this.stop(false);
}
@Deprecated
public void stop(final boolean interrupt) {
if (!started.get()) {
return;
}
this.stopped = true;
log.info("stop thread " + this.getServiceName() + " interrupt " + interrupt);
if (hasNotified.compareAndSet(false, true)) {
waitPoint.countDown(); // notify
}
if (interrupt) {
this.thread.interrupt();
}
}
public void makeStop() {
if (!started.get()) {
return;
}
this.stopped = true;
log.info("makestop thread " + this.getServiceName());
}
public void wakeup() {
if (hasNotified.compareAndSet(false, true)) {
waitPoint.countDown(); // notify
}
}
protected void waitForRunning(long interval) {
// if hasNotified == True, means either wakeup() or stop()
// has been executed by another thread
if (hasNotified.compareAndSet(true, false)) {
this.onWaitEnd();
return;
}
//entry to wait
waitPoint.reset();
try {
waitPoint.await(interval, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
log.error("Interrupted", e);
} finally {
hasNotified.set(false);
this.onWaitEnd();
}
}
protected void onWaitEnd() {
}
public boolean isStopped() {
return stopped;
}
public boolean isDaemon() {
return isDaemon;
}
public void setDaemon(boolean daemon) {
isDaemon = daemon;
}
}
ServiceThread中出现了CountDownLatch2,我们先来看下这个类。
02、CountDownLatch2 源码
package org.apache.rocketmq.common;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.AbstractQueuedSynchronizer;
/**
* Add reset feature for @see java.util.concurrent.CountDownLatch
*
* 说明:和CountDownLatch不同的是它支持reset(),可以被反复使用。
* 通常来说CyclicBarrier也能被反复使用,它们的应用场景区别在哪里呢?
* 1. CyclicBarrier 是用来等待事件完成后才继续
* 2. CountDownLatch 是用来等待其他线程执行到某一步
*
*/
public class CountDownLatch2 {
private final Sync sync;
/**
* Constructs a {@code CountDownLatch2} initialized with the given count.
*
* @param count the number of times {@link #countDown} must be invoked before threads can pass through {@link
* #await}
* @throws IllegalArgumentException if {@code count} is negative
*/
public CountDownLatch2(int count) {
if (count < 0)
throw new IllegalArgumentException("count < 0");
this.sync = new Sync(count);
}
/**
* Causes the current thread to wait until the latch has counted down to
* zero, unless the thread is {@linkplain Thread#interrupt interrupted}.
*
* <p>If the current count is zero then this method returns immediately.
*
* <p>If the current count is greater than zero then the current
* thread becomes disabled for thread scheduling purposes and lies
* dormant until one of two things happen:
* <ul>
* <li>The count reaches zero due to invocations of the
* {@link #countDown} method; or
* <li>Some other thread {@linkplain Thread#interrupt interrupts}
* the current thread.
* </ul>
*
* <p>If the current thread:
* <ul>
* <li>has its interrupted status set on entry to this method; or
* <li>is {@linkplain Thread#interrupt interrupted} while waiting,
* </ul>
* then {@link InterruptedException} is thrown and the current thread's
* interrupted status is cleared.
*
* @throws InterruptedException if the current thread is interrupted while waiting
*/
public void await() throws InterruptedException {
sync.acquireSharedInterruptibly(1);
}
/**
* Causes the current thread to wait until the latch has counted down to
* zero, unless the thread is {@linkplain Thread#interrupt interrupted},
* or the specified waiting time elapses.
*
* <p>If the current count is zero then this method returns immediately
* with the value {@code true}.
*
* <p>If the current count is greater than zero then the current
* thread becomes disabled for thread scheduling purposes and lies
* dormant until one of three things happen:
* <ul>
* <li>The count reaches zero due to invocations of the
* {@link #countDown} method; or
* <li>Some other thread {@linkplain Thread#interrupt interrupts}
* the current thread; or
* <li>The specified waiting time elapses.
* </ul>
*
* <p>If the count reaches zero then the method returns with the
* value {@code true}.
*
* <p>If the current thread:
* <ul>
* <li>has its interrupted status set on entry to this method; or
* <li>is {@linkplain Thread#interrupt interrupted} while waiting,
* </ul>
* then {@link InterruptedException} is thrown and the current thread's
* interrupted status is cleared.
*
* <p>If the specified waiting time elapses then the value {@code false}
* is returned. If the time is less than or equal to zero, the method
* will not wait at all.
*
* @param timeout the maximum time to wait
* @param unit the time unit of the {@code timeout} argument
* @return {@code true} if the count reached zero and {@code false} if the waiting time elapsed before the count
* reached zero
* @throws InterruptedException if the current thread is interrupted while waiting
*/
public boolean await(long timeout, TimeUnit unit)
throws InterruptedException {
return sync.tryAcquireSharedNanos(1, unit.toNanos(timeout));
}
/**
* Decrements the count of the latch, releasing all waiting threads if
* the count reaches zero.
*
* <p>If the current count is greater than zero then it is decremented.
* If the new count is zero then all waiting threads are re-enabled for
* thread scheduling purposes.
*
* <p>If the current count equals zero then nothing happens.
*/
public void countDown() {
sync.releaseShared(1);
}
/**
* Returns the current count.
*
* <p>This method is typically used for debugging and testing purposes.
*
* @return the current count
*/
public long getCount() {
return sync.getCount();
}
public void reset() {
sync.reset();
}
/**
* Returns a string identifying this latch, as well as its state.
* The state, in brackets, includes the String {@code "Count ="}
* followed by the current count.
*
* @return a string identifying this latch, as well as its state
*/
public String toString() {
return super.toString() + "[Count = " + sync.getCount() + "]";
}
//// 和 CountDownLatch 的区别见下面注释
//// 1. startCount
//// 2. startCount = count
//// 3. reset()
private static final class Sync extends AbstractQueuedSynchronizer {
private static final long serialVersionUID = 4982264981922014374L;
//// 1. 区别点,新增成员变量 startCount
private final int startCount;
Sync(int count) {
//// 初始化 startCount
startCount = count;
setState(count);
}
int getCount() {
return getState();
}
protected int tryAcquireShared(int acquires) {
return (getState() == 0) ? 1 : -1;
}
protected boolean tryReleaseShared(int releases) {
// Decrement count; signal when transition to zero
for (;;) {
int c = getState();
if (c == 0)
return false;
int nextc = c-1;
if (compareAndSetState(c, nextc))
return nextc == 0;
}
}
//// 2. 添加了重置的功能函数,调用后还原到new对象的初始状态
protected void reset() {
setState(startCount);
}
}
}
注意上面的关键点,添加了startCount成员变量以及reset方法。
03、ServiceThread 中的方法剖析
start() 方法
下面我们来分析下ServiceThread,这是一个抽象类,实现了Runnable接口,Runnable接口用来指定线程要执行的任务。分析到此,假设我们有一个子类XxxServiceThread extends ServiceThread,且XxxServiceThread中实现了run()方法,要执行这样的任务,我们一般会这样操作(伪代码):
public static void main(String[] args) {
XxxServiceThread xxx = new XxxServiceThread();
new Thread(xxx).start();
}
继续看ServiceThread的源码,我们发现一个start()方法,源码如下:
public void start() {
log.info("Try to start service thread:{} started:{} lastThread:{}", getServiceName(), started.get(), thread);
if (!started.compareAndSet(false, true)) {
return;
}
stopped = false;
this.thread = new Thread(this, getServiceName());
this.thread.setDaemon(isDaemon);
this.thread.start();
}
我们似乎看到了熟悉的代码:this.thread = new Thread(this, getServiceName()); this.thread.start();,这样我们就不需要在外部创建线程,把XxxServiceThread创建的实例传入(this)执行即可。ServiceThread的start()方法内部会创建一个线程来运行这个XxxServiceThread实例。但需要注意的是,通过ServiceThread.start()方法启动任务,不管调用多少次start()方法,只会有一个线程在运行,这是通过代码中的CAS操作 和 volatile修饰的started成员变量保证的。
现在我们知道了,ServiceThread的start方法用来保证子类实例调用该方法N次,只会创建一个线程在执行该任务。
waitForRunning(long interval) 方法
该方法用protected关键字修饰,在不同包名下子类的实例是无法调用的,而在子类方法实现中可以调用。这样就防止了XxxServiceThread xxx = new XxxServiceThread()创建的xxx实例调用waitForRunning(..)方法,只允许XxxServiceThread类中实现的run方法中调用。为什么要这样做?我们后面慢慢说。
我们先来看下这个方法的源码:
protected void waitForRunning(long interval) {
// if hasNotified == True, means either wakeup() or stop()
// has been executed by another thread
// 上面这段注释的理解,如果 hasNotified = True,那肯定有其它线程调用了该实例的wakeup()或stop()方法。
// 假设我们创建了一个 XxxServiceThread xxx = new XxxServiceThread()
// 线程A持有 xxx 实例
// 线程B持有 xxx 实例
// 假设场景1:
// 时间片1,线程A.xxx调用了 wakeup() 设置了 hasNotified = True,方法结束
// 时间片2,线程B.xxx调用了waitForRunning(..),成功进入if循环,return
//
// 假设场景2:
// 时间片1,线程A.xxx调用了shutdown(),设置了 hasNotified = True,设置stopped标志位,方法结束
// 时间片2,线程B.xxx调用了waitForRunning(...),成功进入if循环,return
//
// 假设场景3:
// 线程C也持有 xxx 实例
// 时间片1,线程A.xxx调用了wakeup() 设置了 hasNotified = True,但还没来的及 waitPoint.countDown()
// 时间片2,线程B.xxx调用了 waitForRunning(...),设置hasNotified=false,进入if循环,返回。直接业务代码;
// 时间片3,线程C.xxx调用了 waitForRunning(...),waitPoint.reset(), waitPoint.await(...),进入等待
// 时间片4,线程A.xxx 执行到了 waitPoint.countDown(),此时线程C.xxx还没等多久,就继续下面的业务代码了,在时间上可能造成 线程B 的业务代码和线C 的业务代码重合执行。
// 以上只是假设,但是我们知道 waitForRunning 首先只能在 ServiceThread 子类中调用,一般就在 run 方法中使用,且 ServiceThread 的 start 方法也保证了只有一个线程运行,也就保证了 ServiceThread 子类的实例的 run 方法只会在一个线程中执行,即 run 方法中存在 waitForRunning 调用,也只会由一个线程触发。
//
//
if (hasNotified.compareAndSet(true, false)) {
this.onWaitEnd();
return;
}
//entry to wait
waitPoint.reset();
try {
waitPoint.await(interval, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
log.error("Interrupted", e);
} finally {
hasNotified.set(false);
this.onWaitEnd();
}
}
wakeup()
// 该方法下面两步不是一个原子操作
public void wakeup() {
if (hasNotified.compareAndSet(false, true)) {
waitPoint.countDown(); // notify
}
}
05、RocketMQ 中自己的运用
DefaultMessageQueue
class FlushConsumeQueueService extends ServiceThread {
private static final int RETRY_TIMES_OVER = 3;
private long lastFlushTimestamp = 0;
private void doFlush(int retryTimes) {
int flushConsumeQueueLeastPages = DefaultMessageStore.this.getMessageStoreConfig().getFlushConsumeQueueLeastPages();
if (retryTimes == RETRY_TIMES_OVER) {
flushConsumeQueueLeastPages = 0;
}
long logicsMsgTimestamp = 0;
int flushConsumeQueueThoroughInterval = DefaultMessageStore.this.getMessageStoreConfig().getFlushConsumeQueueThoroughInterval();
long currentTimeMillis = System.currentTimeMillis();
if (currentTimeMillis >= (this.lastFlushTimestamp + flushConsumeQueueThoroughInterval)) {
this.lastFlushTimestamp = currentTimeMillis;
flushConsumeQueueLeastPages = 0;
logicsMsgTimestamp = DefaultMessageStore.this.getStoreCheckpoint().getLogicsMsgTimestamp();
}
ConcurrentMap<String, ConcurrentMap<Integer, ConsumeQueue>> tables = DefaultMessageStore.this.consumeQueueTable;
for (ConcurrentMap<Integer, ConsumeQueue> maps : tables.values()) {
for (ConsumeQueue cq : maps.values()) {
boolean result = false;
for (int i = 0; i < retryTimes && !result; i++) {
result = cq.flush(flushConsumeQueueLeastPages);
}
}
}
if (0 == flushConsumeQueueLeastPages) {
if (logicsMsgTimestamp > 0) {
DefaultMessageStore.this.getStoreCheckpoint().setLogicsMsgTimestamp(logicsMsgTimestamp);
}
DefaultMessageStore.this.getStoreCheckpoint().flush();
}
}
public void run() {
DefaultMessageStore.log.info(this.getServiceName() + " service started");
// 周期性的循环调用 doFlush 方法,持久化数据到磁盘上
// 详细的代码,在 RocketMQ 章节中讲解。
while (!this.isStopped()) {
try {
int interval = DefaultMessageStore.this.getMessageStoreConfig().getFlushIntervalConsumeQueue();
this.waitForRunning(interval);
this.doFlush(1);
} catch (Exception e) {
DefaultMessageStore.log.warn(this.getServiceName() + " service has exception. ", e);
}
}
this.doFlush(RETRY_TIMES_OVER);
DefaultMessageStore.log.info(this.getServiceName() + " service end");
}
@Override
public String getServiceName() {
return FlushConsumeQueueService.class.getSimpleName();
}
@Override
public long getJointime() {
return 1000 * 60;
}
}
05、学习和使用
public class ServiceThreadTestCase {
public static void main(String[] args) {
MyServiceThread myServiceThread = new MyServiceThread();
myServiceThread.start();
// 在任务启动第 12 秒的时候,主动让 myService 线程做一次工作
new Thread(new Runnable() {
@Override
public void run() {
try {
Thread.sleep(12000);
} catch (InterruptedException e) {
e.printStackTrace();
}
//// 主动唤醒
myServiceThread.wakeup();
}
}).start();
}
private static final class MyServiceThread extends ServiceThread {
private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
@Override
public String getServiceName() {
return "my-service";
}
@Override
public void run() {
while (!this.isStopped()) {
// 每隔 5 秒做一次工作
this.waitForRunning(5000);
this.work();
}
}
private void work() {
System.out.println(sdf.format(new Date()) + " \t" + this.getServiceName() + " working... ");
}
}
}
运行结果:
10:48:34.329 [main] INFO org.apache.rocketmq.common.ServiceThread - Try to start service thread:my-service started:false lastThread:null
2022-11-07 10:48:39 my-service working...
2022-11-07 10:48:44 my-service working...
### 这是任务启动第12秒的时候,主动做了一次工作...
2022-11-07 10:48:46 my-service working...
2022-11-07 10:48:51 my-service working...
2022-11-07 10:48:56 my-service working...
2022-11-07 10:49:01 my-service working...
2022-11-07 10:49:06 my-service working...
2022-11-07 10:49:11 my-service working...
2022-11-07 10:49:16 my-service working...
2022-11-07 10:49:21 my-service working...
2022-11-07 10:49:26 my-service working...
2022-11-07 10:49:31 my-service working...