GC机制：HeapTaskDaemonAndroid每个进程都有一个HeapTaskDaemon线程，在该线程内进行GC

Android每个进程都有一个HeapTaskDaemon线程，在该线程内进行GC操作。本文分析下HeapTaskDaemon创建过程以及工作机制。

HeapTaskDaemon作为守护线程，在zygote进程创建。当通过fork创建子进程后，子进程同样包含HeapTaskDaemon线程。先看下进程创建的代码：

/**
 * Forks a new VM instance.  The current VM must have been started
 * with the -Xzygote flag. <b>NOTE: new instance keeps all
 * root capabilities. The new process is expected to call capset()</b>.
 ...
 * @return 0 if this is the child, pid of the child
 * if this is the parent, or -1 on error.
 */
static int forkAndSpecialize(int uid, int gid, int[] gids, int runtimeFlags,
        int[][] rlimits, int mountExternal, String seInfo, String niceName, int[] fdsToClose,
        int[] fdsToIgnore, boolean startChildZygote, String instructionSet, String appDataDir,
        boolean isTopApp, String[] pkgDataInfoList, String[] allowlistedDataInfoList,
        boolean bindMountAppDataDirs, boolean bindMountAppStorageDirs) {
    ZygoteHooks.preFork();

    int pid = nativeForkAndSpecialize(
            uid, gid, gids, runtimeFlags, rlimits, mountExternal, seInfo, niceName, fdsToClose,
            fdsToIgnore, startChildZygote, instructionSet, appDataDir, isTopApp,
            pkgDataInfoList, allowlistedDataInfoList, bindMountAppDataDirs,
            bindMountAppStorageDirs);
    if (pid == 0) {
        // Note that this event ends at the end of handleChildProc,
        Trace.traceBegin(Trace.TRACE_TAG_ACTIVITY_MANAGER, "PostFork");

        // If no GIDs were specified, don't make any permissions changes based on groups.
        if (gids != null && gids.length > 0) {
            NetworkUtilsInternal.setAllowNetworkingForProcess(containsInetGid(gids));
        }
    }

    // Set the Java Language thread priority to the default value for new apps.
    Thread.currentThread().setPriority(Thread.NORM_PRIORITY);

    ZygoteHooks.postForkCommon();
    return pid;
}

从注释我们看到每次fork都要调用preFork方法，子进程创建完成后要调用postForkChild和postForkCommon方法，父进程要调用postForkCommon方法。

/**
 * Called by the zygote prior to every fork. Each call to {@code preFork}
 * is followed by a matching call to {@link #postForkChild(int, boolean, boolean, String)} on
 * the child process and {@link #postForkCommon()} on both the parent and the child
 * process. {@code postForkCommon} is called after {@code postForkChild} in
 * the child process.
 *
 * @hide
 */
@SystemApi(client = MODULE_LIBRARIES)
public static void preFork() {
    Daemons.stop();
    token = nativePreFork();
    waitUntilAllThreadsStopped();
}

 /**
* Called by the zygote in both the parent and child processes after
* every fork. In the child process, this method is called after
* {  @code  postForkChild}.
*
*  @hide
 */
@SystemApi(client = MODULE_LIBRARIES)
public static void postForkCommon() {
    // Notify the runtime before creating new threads.
    nativePostZygoteFork();
    Daemons.startPostZygoteFork();
}

可以看到在fork前先调用Daemons#stop方法，在fork结束后父子进程都调用了Daemons#startPostZygoteFork方法。由于Daemons#stop是暂停守护线程，我们先看下startPostZygoteFork方法。

//Daemons.java

private static final Daemon[] DAEMONS = new Daemon[] {
        HeapTaskDaemon.INSTANCE,
        ReferenceQueueDaemon.INSTANCE,
        FinalizerDaemon.INSTANCE,
        FinalizerWatchdogDaemon.INSTANCE,
};

public static void startPostZygoteFork() {
    postZygoteFork = true;
    for (Daemon daemon : DAEMONS) {
        daemon.startPostZygoteFork();
    }
}

可以看到是依次调用DAEMONS元素的startPostZygoteFork方法，每个元素都是守护线程的是实例，其中就包括HeapTaskDaemon。 每个守护线程都继承自Daemons的内部类Daemon。我们先看他们父类Daemon的startPostZygoteFork方法。

private static abstract class Daemon implements Runnable {
    @UnsupportedAppUsage
    private Thread thread;
    private String name;
    private boolean postZygoteFork;

    protected Daemon(String name) {
        this.name = name;
    }
    
    public synchronized void startPostZygoteFork() {
        postZygoteFork = true;
        startInternal();
    }

    public void startInternal() {
        if (thread != null) {
            throw new IllegalStateException("already running");
        }
        thread = new Thread(ThreadGroup.systemThreadGroup, this, name);
        thread.setDaemon(true);
        thread.setSystemDaemon(true);
        thread.start();
    }

可以看到父类Daemon继承自Runnable，当调用startPostZygoteFork时最终会创建一个线程并开启任务。接着看Daemon的run方法。

// Daemons.java
private static final CountDownLatch POST_ZYGOTE_START_LATCH = new CountDownLatch(DAEMONS.length);
public final void run() {
    if (postZygoteFork) {
        // We don't set the priority before the Thread.start() call above because
        // Thread.start() will call SetNativePriority and overwrite the desired native
        // priority. We (may) use a native priority that doesn't have a corresponding
        // java.lang.Thread-level priority (native priorities are more coarse-grained.)
        VMRuntime.getRuntime().setSystemDaemonThreadPriority();
        POST_ZYGOTE_START_LATCH.countDown();
    } else {
        PRE_ZYGOTE_START_LATCH.countDown();
    }
    try {
        runInternal();
    } catch (Throwable ex) {
        // Should never happen, but may not o.w. get reported, e.g. in zygote.
        // Risk logging redundantly, rather than losing it.
        System.logE("Uncaught exception in system thread " + name, ex);
        throw ex;
    }
}

该方法中postZygoteFork值是true，会先设置线程优先级，接着使用CountDownLatch等待4个守护线程都执行到run方法。随后执行runInternal方法，该方法是抽象方法，每个子类都复写了。在介绍runInternal方法前，我们先看下线程优先级的设置。进入dalvik_system_VMRuntime.cc文件中

//dalvik_system_VMRuntime.cc
static void VMRuntime_setSystemDaemonThreadPriority(JNIEnv* env ATTRIBUTE_UNUSED,
                                                    jclass klass ATTRIBUTE_UNUSED) {
#ifdef ART_TARGET_ANDROID
  Thread* self = Thread::Current();
  DCHECK(self != nullptr);
  pid_t tid = self->GetTid();
  // We use a priority lower than the default for the system daemon threads (eg HeapTaskDaemon) to
  // avoid jank due to CPU contentions between GC and other UI-related threads. b/36631902.
  // We may use a native priority that doesn't have a corresponding java.lang.Thread-level priority.
  static constexpr int kSystemDaemonNiceValue = 4;  // priority 124
  if (setpriority(PRIO_PROCESS, tid, kSystemDaemonNiceValue) != 0) {
    PLOG(INFO) << *self << " setpriority(PRIO_PROCESS, " << tid << ", "
               << kSystemDaemonNiceValue << ") failed";
  }
#endif
}

该方法主要是调用setpriority方法给当前线程设置优先级为4。优先级范围是【-20，19】，值越大表示优先级越低，优先级是124。具体可参考setpriority的说明和杂谈Android线程优先级。

接着看下HeapTaskDaemon的runInternal方法实现。

@UnsupportedAppUsage
protected synchronized boolean isRunning() {
    return thread != null;
}

private static class HeapTaskDaemon extends Daemon {
    private static final HeapTaskDaemon INSTANCE = new HeapTaskDaemon();

    HeapTaskDaemon() {
        super("HeapTaskDaemon");
    }

    // Overrides the Daemon.interupt method which is called from Daemons.stop.
    public synchronized void interrupt(Thread thread) {
        VMRuntime.getRuntime().stopHeapTaskProcessor();
    }

    @Override public void runInternal() {
        synchronized (this) {
            // 线程存在，进入if判断内
            if (isRunning()) {
              // Needs to be synchronized or else we there is a race condition where we start
              // the thread, call stopHeapTaskProcessor before we start the heap task
              // processor, resulting in a deadlock since startHeapTaskProcessor restarts it
              // while the other thread is waiting in Daemons.stop().
              VMRuntime.getRuntime().startHeapTaskProcessor();
            }
        }
        // This runs tasks until we are stopped and there is no more pending task.
        VMRuntime.getRuntime().runHeapTasks();
    }
}

该方法会调用VMRuntime的startHeapTaskProcessor和runHeapTasks方法。先看第一个，先是获取当前进程堆管理器的TaskProcessor对象，接着调用其Start方法。

//dalvik_system_VMRuntime.cc
static void VMRuntime_startHeapTaskProcessor(JNIEnv* env, jobject) {
  Runtime::Current()->GetHeap()->GetTaskProcessor()->Start(ThreadForEnv(env));
}

//art/runtime/gc/task_processor.cc
void TaskProcessor::Start(Thread* self) {
  MutexLock mu(self, lock_);
  is_running_ = true;
  running_thread_ = self;
}

可以看到startHeapTaskProcessor方法主要是设置标记位，接着看runHeapTasks方法。

//dalvik_system_VMRuntime.cc
static void VMRuntime_runHeapTasks(JNIEnv* env, jobject) {
  Runtime::Current()->GetHeap()->GetTaskProcessor()->RunAllTasks(ThreadForEnv(env));
}

void TaskProcessor::RunAllTasks(Thread* self) {
  while (true) {
    // Wait and get a task, may be interrupted.
    HeapTask* task = GetTask(self);
    if (task != nullptr) {
      task->Run(self);
      task->Finalize();
    } else if (!IsRunning()) {
      break;
    }
  }
}

在RunAllTasks方法里面开启一个死循环，在循环内通过GetTask获取任务，当获取到后调用HeapTask的run和Finalize方法。看下GetTask的方法实现，从下面可以看到是从tasks_集合里面获取元素，如果集合列表为空就一直等待。在else判断里面判断获取到的任务是否可以现在就执行，如果是在未来执行的话就继续等待一段时间。可以看到整体就是一个生产-消费模型。

std::multiset<HeapTask*, CompareByTargetRunTime> tasks_ GUARDED_BY(lock_);

HeapTask* TaskProcessor::GetTask(Thread* self) {
  ScopedThreadStateChange tsc(self, ThreadState::kWaitingForTaskProcessor);
  MutexLock mu(self, lock_);
  while (true) {
    if (tasks_.empty()) {
      if (!is_running_) {
        return nullptr;
      }
      cond_.Wait(self);  // Empty queue, wait until we are signalled.
    } else {
      // Non empty queue, look at the top element and see if we are ready to run it.
      const uint64_t current_time = NanoTime();
      HeapTask* task = *tasks_.begin();
      // If we are shutting down, return the task right away without waiting. Otherwise return the
      // task if it is late enough.
      uint64_t target_time = task->GetTargetRunTime();
      if (!is_running_ || target_time <= current_time) {
        tasks_.erase(tasks_.begin());
        return task;
      }
      DCHECK_GT(target_time, current_time);
      // Wait until we hit the target run time.
      const uint64_t delta_time = target_time - current_time;
      const uint64_t ms_delta = NsToMs(delta_time);
      const uint64_t ns_delta = delta_time - MsToNs(ms_delta);
      cond_.TimedWait(self, static_cast<int64_t>(ms_delta), static_cast<int32_t>(ns_delta));
    }
  }
  UNREACHABLE();
}

在TaskProcessor的AddTask和UpdateTargetRunTime方法里会给tasks_添加任务并通过 cond_.Signal来唤醒等待。

void TaskProcessor::AddTask(Thread* self, HeapTask* task) {
  ScopedThreadStateChange tsc(self, ThreadState::kWaitingForTaskProcessor);
  MutexLock mu(self, lock_);
  tasks_.insert(task);
  cond_.Signal(self);
}

void TaskProcessor::UpdateTargetRunTime(Thread* self, HeapTask* task, uint64_t new_target_time) {
  MutexLock mu(self, lock_);
  // Find the task.
  auto range = tasks_.equal_range(task);
  for (auto it = range.first; it != range.second; ++it) {
    if (*it == task) {
      // Check if the target time was updated, if so re-insert then wait.
      if (new_target_time != task->GetTargetRunTime()) {
        tasks_.erase(it);
        task->SetTargetRunTime(new_target_time);
        tasks_.insert(task);
        // If we became the first task then we may need to signal since we changed the task that we
        // are sleeping on.
        if (*tasks_.begin() == task) {
          cond_.Signal(self);
        }
        return;
      }
    }
  }
}

下面看一个Background young concurrent copying GC的例子。当创建Java对象时会进入AllocObjectWithAllocator方法。

inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self,
                                                      ObjPtr<mirror::Class> klass,
                                                      size_t byte_count,
                                                      AllocatorType allocator,
                                                      const PreFenceVisitor& pre_fence_visitor) {
  ...
  if (need_gc) {
    // Do this only once thread suspension is allowed again, and we're done with kInstrumented.
    RequestConcurrentGCAndSaveObject(self, /*force_full=*/ false, starting_gc_num, &obj);
  }
  VerifyObject(obj);
  self->VerifyStack();
  return obj.Ptr();
}

当内存不足时会先进行GC操作

void Heap::RequestConcurrentGCAndSaveObject(Thread* self,
                                            bool force_full,
                                            uint32_t observed_gc_num,
                                            ObjPtr<mirror::Object>* obj) {
  StackHandleScope<1> hs(self);
  HandleWrapperObjPtr<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
  RequestConcurrentGC(self, kGcCauseBackground, force_full, observed_gc_num);
}

bool Heap::RequestConcurrentGC(Thread* self,
                               GcCause cause,
                               bool force_full,
                               uint32_t observed_gc_num) {
  uint32_t max_gc_requested = max_gc_requested_.load(std::memory_order_relaxed);
  if (!GCNumberLt(observed_gc_num, max_gc_requested)) {
    // observed_gc_num >= max_gc_requested: Nobody beat us to requesting the next gc.
    if (CanAddHeapTask(self)) {
      // Since observed_gc_num >= max_gc_requested, this increases max_gc_requested_, if successful.
      if (max_gc_requested_.CompareAndSetStrongRelaxed(max_gc_requested, observed_gc_num + 1)) {
        task_processor_ -> AddTask(self, new ConcurrentGCTask(NanoTime(),  // Start straight away.
                                                            cause,
                                                            force_full,
                                                            observed_gc_num + 1));
      }
      DCHECK(GCNumberLt(observed_gc_num, max_gc_requested_.load(std::memory_order_relaxed)));
      // If we increased max_gc_requested_, then we added a task that will eventually cause
      // gcs_completed_ to be incremented (to at least observed_gc_num + 1).
      // If the CAS failed, somebody else did.
      return true;
    }
    return false;
  }
  return true;  // Vacuously.
}

可以看到调用了task_processor_ 的AddTask方法，添加一个ConcurrentGCTask对象。再来看下ConcurrentGCTask的run方法。从下面代码看是调用了heap的ConcurrentGC方法。

class Heap::ConcurrentGCTask : public HeapTask {
 public:
  ConcurrentGCTask(uint64_t target_time, GcCause cause, bool force_full, uint32_t gc_num)
      : HeapTask(target_time), cause_(cause), force_full_(force_full), my_gc_num_(gc_num) {}
  void Run(Thread* self) override {
    Runtime* runtime = Runtime::Current();
    gc::Heap* heap = runtime->GetHeap();
    DCHECK(GCNumberLt(my_gc_num_, heap->GetCurrentGcNum() + 2));  // <= current_gc_num + 1
    heap -> ConcurrentGC ( self , cause_ , force_full_ , my_gc_num_ );
    CHECK_IMPLIES(GCNumberLt(heap->GetCurrentGcNum(), my_gc_num_), runtime->IsShuttingDown(self));
  }

 private:
  const GcCause cause_;
  const bool force_full_;  // If true, force full (or partial) collection.
  const uint32_t my_gc_num_;  // Sequence number of requested GC.
};

void Heap::ConcurrentGC(Thread* self, GcCause cause, bool force_full, uint32_t requested_gc_num) {
  if (!Runtime::Current()->IsShuttingDown(self)) {
    // Wait for any GCs currently running to finish. If this incremented GC number, we're done.
    WaitForGcToComplete(cause, self);
    if (GCNumberLt(GetCurrentGcNum(), requested_gc_num)) {
      collector::GcType next_gc_type = next_gc_type_;
      // If forcing full and next gc type is sticky, override with a non-sticky type.
      if (force_full && next_gc_type == collector::kGcTypeSticky) {
        next_gc_type = NonStickyGcType();
      }
      // If we can't run the GC type we wanted to run, find the next appropriate one and try
      // that instead. E.g. can't do partial, so do full instead.
      // We must ensure that we run something that ends up incrementing gcs_completed_.
      // In the kGcTypePartial case, the initial CollectGarbageInternal call may not have that
      // effect, but the subsequent KGcTypeFull call will.
      if (CollectGarbageInternal(next_gc_type, cause, false, requested_gc_num)
          == collector::kGcTypeNone) {
        for (collector::GcType gc_type : gc_plan_) {
          if (!GCNumberLt(GetCurrentGcNum(), requested_gc_num)) {
            // Somebody did it for us.
            break;
          }
          // Attempt to run the collector, if we succeed, we are done.
          if (gc_type > next_gc_type &&
              CollectGarbageInternal(gc_type, cause, false, requested_gc_num)
              != collector::kGcTypeNone) {
            break;
          }
        }
      }
    }
  }
}

在CollectGarbageInternal执行真正的GC

collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type,
                                               GcCause gc_cause,
                                               bool clear_soft_references,
                                               uint32_t requested_gc_num) {
  Thread* self = Thread::Current();
  Runtime* runtime = Runtime::Current();
  ...
  // 开始GC
  collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
  IncrementFreedEver();
  RequestTrim(self);
  // Collect cleared references.
  SelfDeletingTask* clear = reference_processor_->CollectClearedReferences(self);
  // 调整内存的targetSize
  // Grow the heap so that we know when to perform the next GC.
  GrowForUtilization(collector, bytes_allocated_before_gc);
  old_native_bytes_allocated_.store(GetNativeBytes());
  // 打印GC日志
  LogGC(gc_cause, collector);
  FinishGC(self, gc_type);
  // Actually enqueue all cleared references. Do this after the GC has officially finished since
  // otherwise we can deadlock.
  clear->Run(self);
  clear->Finalize();
  // Inform DDMS that a GC completed.
  Dbg::GcDidFinish();

  // Unload native libraries for class unloading. We do this after calling FinishGC to prevent
  // deadlocks in case the JNI_OnUnload function does allocations.
  {
    ScopedObjectAccess soa(self);
    soa.Vm()->UnloadNativeLibraries();
  }
  return gc_type;
}

分析完HeapTaskDaemon启动和工作机制后，再看下ZygoteHooks#preFork将线程暂停的过程。

public static void preFork() {
    Daemons.stop();
    token = nativePreFork();
    waitUntilAllThreadsStopped();
}

@UnsupportedAppUsage
public static void stop() {
    for (Daemon daemon : DAEMONS) {
        daemon.stop();
    }
}

public void stop() {
    Thread threadToStop;
    synchronized (this) {
        threadToStop = thread;
        thread = null;
    }
    if (threadToStop == null) {
        throw new IllegalStateException("not running");
    }
    interrupt (threadToStop);
    while (true) {
        try {
            threadToStop.join();
            return;
        } catch (InterruptedException ignored) {
        } catch (OutOfMemoryError ignored) {
            // An OOME may be thrown if allocating the InterruptedException failed.
        }
    }
}

通过将thread设置为null ，HeapTaskDaemon#runInternal的if判断将不会进入。HeapTaskDaemon也将interrupt方法重写

public synchronized void interrupt(Thread thread) {
    VMRuntime.getRuntime().stopHeapTaskProcessor();
}

static void VMRuntime_stopHeapTaskProcessor(JNIEnv* env, jobject) {
  Runtime::Current()->GetHeap()->GetTaskProcessor()->Stop(Thread::ForEnv(env));
}

//art/runtime/gc/task_processor.cc
void TaskProcessor::Stop(Thread* self) {
  MutexLock mu(self, lock_);
  is_running_ = false;
  running_thread_ = nullptr;
  cond_.Broadcast(self);
}

将is_running_设置为false，并通过cond_.Broadcast唤醒锁等待。在GetTask里面返回null最终导致RunAllTasks的死循环退出。

std::multiset<HeapTask*, CompareByTargetRunTime> tasks_ GUARDED_BY(lock_);

HeapTask* TaskProcessor::GetTask(Thread* self) {
  ScopedThreadStateChange tsc(self, ThreadState::kWaitingForTaskProcessor);
  MutexLock mu(self, lock_);
  while (true) {
    if (tasks_.empty()) {
      if (!is_running_) {
        return nullptr;
      }
      cond_.Wait(self);  // Empty queue, wait until we are signalled.
    } else {
      // Non empty queue, look at the top element and see if we are ready to run it.
      const uint64_t current_time = NanoTime();
      HeapTask* task = *tasks_.begin();
      // If we are shutting down, return the task right away without waiting. Otherwise return the
      // task if it is late enough.
      uint64_t target_time = task->GetTargetRunTime();
      if (!is_running_ || target_time <= current_time) {
        tasks_.erase(tasks_.begin());
        return task;
      }
      DCHECK_GT(target_time, current_time);
      // Wait until we hit the target run time.
      const uint64_t delta_time = target_time - current_time;
      const uint64_t ms_delta = NsToMs(delta_time);
      const uint64_t ns_delta = delta_time - MsToNs(ms_delta);
      cond_.TimedWait(self, static_cast<int64_t>(ms_delta), static_cast<int32_t>(ns_delta));
    }
  }
  UNREACHABLE();
}

void TaskProcessor::RunAllTasks(Thread* self) {
  while (true) {
    // Wait and get a task, may be interrupted.
    HeapTask* task = GetTask(self);
    if (task != nullptr) {
      task->Run(self);
      task->Finalize();
    } else if (!IsRunning()) {
      break;
    }
  }
}`