Spark源码阅读篇-Rpc通信-Master

15 阅读29分钟

上一节介绍了单例模式下driver提交应用的过程,这一节我们来具体看一下master怎么后续进行资源调度和启动executor来执行任务,这个过程中可能会有发消息给driver或者worker,请跳转到相应的页面看(上一节是driver,下一节是worker),由于看代码的过程中是一行一行的看的,看到函数就会跳转到函数中看,有时候会跳转很多次有突然回到最开始,最好是跟着一遍看解析一遍对着源码。如果从代码第一行开始往下读,没有思绪很容易无聊,所以我们从driver提交应用开始往后面读,思路会清晰很多。

Master源码阅读

package org.apache.spark.deploy.master

import java.text.SimpleDateFormat
import java.util.{Date, Locale}
import java.util.concurrent.{ScheduledFuture, TimeUnit}

import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
import scala.util.Random

import org.apache.spark.{SecurityManager, SparkConf, SparkException}
import org.apache.spark.deploy.{ApplicationDescription, DriverDescription, ExecutorState, SparkHadoopUtil}
import org.apache.spark.deploy.DeployMessages._
import org.apache.spark.deploy.master.DriverState.DriverState
import org.apache.spark.deploy.master.MasterMessages._
import org.apache.spark.deploy.master.ui.MasterWebUI
import org.apache.spark.deploy.rest.StandaloneRestServer
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._
import org.apache.spark.internal.config.Deploy._
import org.apache.spark.internal.config.UI._
import org.apache.spark.internal.config.Worker._
import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
import org.apache.spark.resource.{ResourceRequirement, ResourceUtils}
import org.apache.spark.rpc._
import org.apache.spark.serializer.{JavaSerializer, Serializer}
import org.apache.spark.util.{SparkUncaughtExceptionHandler, ThreadUtils, Utils}

private[deploy] class Master(
    override val rpcEnv: RpcEnv,
    address: RpcAddress,
    webUiPort: Int,
    val securityMgr: SecurityManager,
    val conf: SparkConf)
  extends ThreadSafeRpcEndpoint with Logging with LeaderElectable {
  
  //转发消息线程
  private val forwardMessageThread =
    ThreadUtils.newDaemonSingleThreadScheduledExecutor("master-forward-message-thread")
  
  //获取hadoop配置
  private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)

  // For application IDs
  //为创建应用设置的时间格式
  private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
  
  //获取Worker超时时间
  private val workerTimeoutMs = conf.get(WORKER_TIMEOUT) * 1000

  //保留的应用程序
  private val retainedApplications = conf.get(RETAINED_APPLICATIONS)
  
  //保留的Driver
  private val retainedDrivers = conf.get(RETAINED_DRIVERS)
  
  //收割者迭代器
  private val reaperIterations = conf.get(REAPER_ITERATIONS)
  
  //恢复模式
  private val recoveryMode = conf.get(RECOVERY_MODE)

  //最大执行器重试次数
  private val maxExecutorRetries = conf.get(MAX_EXECUTOR_RETRIES)
  
  //所有的Worker
  val workers = new HashSet[WorkerInfo]
  
  //应用与ID的映射
  val idToApp = new HashMap[String, ApplicationInfo]
  
  //等待执行的应用
  private val waitingApps = new ArrayBuffer[ApplicationInfo]
  
  //Master中有哪些应用
  val apps = new HashSet[ApplicationInfo]
 
  //Worker与ID的映射
  private val idToWorker = new HashMap[String, WorkerInfo]
  
  //Worker与地址的映射
  private val addressToWorker = new HashMap[RpcAddress, WorkerInfo]
  
  //端点与应用的映射 通常指dirver
  private val endpointToApp = new HashMap[RpcEndpointRef, ApplicationInfo]
  
  //应用与地址的映射
  private val addressToApp = new HashMap[RpcAddress, ApplicationInfo]
  
  //已经完成的应用
  private val completedApps = new ArrayBuffer[ApplicationInfo]
  
  //下一个应用的序号
  private var nextAppNumber = 0
  
  //所有的driver的信息
  private val drivers = new HashSet[DriverInfo]
  
  //提交的应用已经完成的driver
  private val completedDrivers = new ArrayBuffer[DriverInfo]
  // Drivers currently spooled for scheduling
  //当前已放到后台处理的driver 等待被执行的driver
  private val waitingDrivers = new ArrayBuffer[DriverInfo]
  
  //下一个被执行的driver的序号
  private var nextDriverNumber = 0
  
  //检查主机是否只包含有效的主机名/ip而不包含端口
  Utils.checkHost(address.host)
  
  //MetricsSystem--由特定的个例(可以是Master/client/worker等)创建 主要有source和sink组成,source采集数据sink下沉到目的地
  //这里是创建Master的metrics系统
  private val masterMetricsSystem =
    MetricsSystem.createMetricsSystem(MetricsSystemInstances.MASTER, conf, securityMgr)
  
  //创建应用的metrics系统
  private val applicationMetricsSystem =
    MetricsSystem.createMetricsSystem(MetricsSystemInstances.APPLICATIONS, conf, securityMgr)
  
  //指定Master的source为自身
  private val masterSource = new MasterSource(this)

  // After onStart, webUi will be set
  //一旦启动 webUi将会被设置
  private var webUi: MasterWebUI = null
  
  //master的公开地址
  private val masterPublicAddress = {
    //获取DNS地址
    val envVar = conf.getenv("SPARK_PUBLIC_DNS")
    //如果获取到DNS地址则返回该地址 没有获取到则返回Master主机ip
    if (envVar != null) envVar else address.host
  }
   
  //master的url
  private val masterUrl = address.toSparkURL
  
  //master的webui的地址
  private var masterWebUiUrl: String = _
  
  //设置状态 默认是备用状态
  private var state = RecoveryState.STANDBY
  
  //PersistenceEngine--允许Master保持从故障中恢复所需的任何状态。
  //持久化引擎
  private var persistenceEngine: PersistenceEngine = _
  
  //LeaderElectionAgent--LeaderElectionAgent跟踪当前master,是所有选举代理的通用接口。
  //leader选举代理
  private var leaderElectionAgent: LeaderElectionAgent = _
  
  //ScheduledFuture--可以取消的延迟结果承载操作。通常,计划的未来是使用ScheduledExecutiorService计划任务的结果。
  //恢复完成任务
  private var recoveryCompletionTask: ScheduledFuture[_] = _
  
  //检查Worker超时任务
  private var checkForWorkerTimeOutTask: ScheduledFuture[_] = _

  // As a temporary workaround before better ways of configuring memory, we allow users to set
  // a flag that will perform round-robin scheduling across the nodes (spreading out each app
  // among all the nodes) instead of trying to consolidate each app onto a small # of nodes.
  //作为配置内存的更好方法之前的临时解决方案,我们允许用户设置一个标志,
  //在节点之间执行循环调度(将每个应用程序分散在所有节点中),而不是试图将每个应用整合到一小部分节点上。
  //展开应用程序
  private val spreadOutApps = conf.get(SPREAD_OUT_APPS)

  // Default maxCores for applications that don't specify it (i.e. pass Int.MaxValue)
  //未指定的应用程序的默认maxCores(即传递Int.MaxValue)
  private val defaultCores = conf.get(DEFAULT_CORES)
  
  //UI反向代理 默认不设置反向代理
  val reverseProxy = conf.get(UI_REVERSE_PROXY)
  
  //如果设置的默认核数小于1 则抛出异常
  if (defaultCores < 1) {
    throw new SparkException(s"${DEFAULT_CORES.key} must be positive")
  }

  // Alternative application submission gateway that is stable across Spark versions
  //跨Spark版本稳定的替代应用程序提交网关
  //剩下可用的服务器
  private val restServerEnabled = conf.get(MASTER_REST_SERVER_ENABLED)
  //剩下的服务器
  private var restServer: Option[StandaloneRestServer] = None
  //剩下的服务器绑定端口的
  private var restServerBoundPort: Option[Int] = None

  {
    val authKey = SecurityManager.SPARK_AUTH_SECRET_CONF
    require(conf.getOption(authKey).isEmpty || !restServerEnabled,
      //RestSubmissionServer不支持通过{authKey}进行身份验证。
      //使用spark.master.rest.enabled=false关闭RestSubmissionServer,或者不使用身份验证。
      s"The RestSubmissionServer does not support authentication via ${authKey}.  Either turn " +
        "off the RestSubmissionServer with spark.master.rest.enabled=false, or do not use " +
        "authentication.")
  }
  
  //启动Master
  override def onStart(): Unit = {
    logInfo("Starting Spark master at " + masterUrl)
    logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
    //设置webui地址
    webUi = new MasterWebUI(this, webUiPort)
    webUi.bind()
    masterWebUiUrl = webUi.webUrl
    //如果设置反向代理
    if (reverseProxy) {
      //获取webui地址url
      val uiReverseProxyUrl = conf.get(UI_REVERSE_PROXY_URL).map(_.stripSuffix("/"))
      //如果url不为空
      if (uiReverseProxyUrl.nonEmpty) {
        System.setProperty("spark.ui.proxyBase", uiReverseProxyUrl.get)
        // If the master URL has a path component, it must end with a slash.
        // Otherwise the browser generates incorrect relative links
        //如果主URL具有路径组件,则它必须以斜杠结尾。
        //否则,浏览器会生成不正确的相对链接
        masterWebUiUrl = uiReverseProxyUrl.get + "/"
      }
      //添加代理
      webUi.addProxy()
      //Spark Master充当反向代理。Master、Workers和Applications UI可在$masterWebUiUrl上获得
      logInfo(s"Spark Master is acting as a reverse proxy. Master, Workers and " +
       s"Applications UIs are available at $masterWebUiUrl")
    }

    //检查Worker超时任务
    checkForWorkerTimeOutTask = forwardMessageThread.scheduleAtFixedRate(
      () => Utils.tryLogNonFatalError { self.send(CheckForWorkerTimeOut) },
      0, workerTimeoutMs, TimeUnit.MILLISECONDS)
    
    //如果还有剩下的服务器可用
    if (restServerEnabled) {
      //获取剩下的服务器的端口
      val port = conf.get(MASTER_REST_SERVER_PORT)
      //获取剩下的服务器
      restServer = Some(new StandaloneRestServer(address.host, port, conf, self, masterUrl))
    }
    //剩下的服务器绑定端口
    restServerBoundPort = restServer.map(_.start())
    
    //指定Master的source为自身
    masterMetricsSystem.registerSource(masterSource)
    //Master Metrics启动
    masterMetricsSystem.start()
    //应用 Metrics启动
    applicationMetricsSystem.start()
    // Attach the master and app metrics servlet handler to the web ui after the metrics systems are
    // started.
    //在Metrics系统启动后,将master和app度量servlet处理程序附加到web ui。
    masterMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
    applicationMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)

    val serializer = new JavaSerializer(conf)
    //设置Master故障之后恢复模式
    val (persistenceEngine_, leaderElectionAgent_) = recoveryMode match {
      case "ZOOKEEPER" =>
        //将恢复状态持续到ZooKeeper
        logInfo("Persisting recovery state to ZooKeeper")
        val zkFactory =
          new ZooKeeperRecoveryModeFactory(conf, serializer)
          //Master设置自己作为leader选举候选人
        (zkFactory.createPersistenceEngine(), zkFactory.createLeaderElectionAgent(this))
      //文件系统模式
      case "FILESYSTEM" =>
        val fsFactory =
          new FileSystemRecoveryModeFactory(conf, serializer)
        //Master设置自己作为leader选举候选人
        (fsFactory.createPersistenceEngine(), fsFactory.createLeaderElectionAgent(this))
      //自定义模式
      case "CUSTOM" =>
        val clazz = Utils.classForName(conf.get(RECOVERY_MODE_FACTORY))
        val factory = clazz.getConstructor(classOf[SparkConf], classOf[Serializer])
          .newInstance(conf, serializer)
          .asInstanceOf[StandaloneRecoveryModeFactory]
        (factory.createPersistenceEngine(), factory.createLeaderElectionAgent(this))
      //单节点模式 即Master一直是leader
      case _ =>
        //MonarchyLeaderAgent--LeaderElectionAgent的单节点实现——我们最初一直是领导者。
        (new BlackHolePersistenceEngine(), new MonarchyLeaderAgent(this))
    }

    //设置Master状态持久化模式
    persistenceEngine = persistenceEngine_
    //设置Master故障后选举代理
    leaderElectionAgent = leaderElectionAgent_
  }

  override def onStop(): Unit = {
    //Master通知 sink发出
    masterMetricsSystem.report()
    //应用通知 sink发出
    applicationMetricsSystem.report()
    // prevent the CompleteRecovery message sending to restarted master
    //阻止将CompleteRecovery消息发送到重新启动的主机
    if (recoveryCompletionTask != null) {
      //取消重启任务
      recoveryCompletionTask.cancel(true)
    }
    if (checkForWorkerTimeOutTask != null) {
      //检查Worker超时任务停止
      checkForWorkerTimeOutTask.cancel(true)
    }
    //转发消息线程关闭
    forwardMessageThread.shutdownNow()
    //ui停止
    webUi.stop()
    //将剩下的服务器停止
    restServer.foreach(_.stop())
    //Master的Meteics系统停止
    masterMetricsSystem.stop()
    //应用的Metrics系统停止
    applicationMetricsSystem.stop()
    //Master的持久化状态引擎关闭
    persistenceEngine.close()
    //选举代理关闭
    leaderElectionAgent.stop()
  }
  
  //选举leader
  override def electedLeader(): Unit = {
    //发送自身参与leader选举
    self.send(ElectedLeader)
  }
  
  //被撤销的领导权
  override def revokedLeadership(): Unit = {
    self.send(RevokedLeadership)
  }

  override def receive: PartialFunction[Any, Unit] = {
    //被选举的leader
    case ElectedLeader =>
      //持久化引擎从rpcEnv中读取应用信息 Driver信息 Worker信息
      val (storedApps, storedDrivers, storedWorkers) = persistenceEngine.readPersistedData(rpcEnv)
      //如果没有应用 没有注册的Driver 没有Worker 则状态置为存活 否则置为恢复中
      state = if (storedApps.isEmpty && storedDrivers.isEmpty && storedWorkers.isEmpty) {
        RecoveryState.ALIVE
      } else {
        RecoveryState.RECOVERING
      }
      //打印消息 已经当选为leader 
      logInfo("I have been elected leader! New state: " + state)
      //如果状态是恢复中
      if (state == RecoveryState.RECOVERING) {
        //开始恢复应用 重新注册应用
        beginRecovery(storedApps, storedDrivers, storedWorkers)
        //恢复完成任务
        recoveryCompletionTask = forwardMessageThread.schedule(new Runnable {
          override def run(): Unit = Utils.tryLogNonFatalError {
            //发送消息已经完全恢复
            self.send(CompleteRecovery)
          }
        }, workerTimeoutMs, TimeUnit.MILLISECONDS)
      }
    
    //完成恢复
    case CompleteRecovery => completeRecovery()
    
    //被撤销的领导地位
    case RevokedLeadership =>
      logError("Leadership has been revoked -- master shutting down.")
      System.exit(0)
    
    //worker退役
    case WorkerDecommissioning(id, workerRef) =>
      //如果master的状态是standby
      if (state == RecoveryState.STANDBY) {
        //向worker发消息master是standby状态
        workerRef.send(MasterInStandby)
      } else {
        // We use foreach since get gives us an option and we can skip the failures.
        //我们使用foreach,因为get给了我们一个选项,我们可以跳过失败。
        //遍历所有的worker 并告知该worker已经退役
        idToWorker.get(id).foreach(decommissionWorker)
      }
    
    //使某个worker退役
    case DecommissionWorkers(ids) =>
      // The caller has already checked the state when handling DecommissionWorkersOnHosts,
      // so it should not be the STANDBY
      //调用方在处理DecommissionWorkersOnHosts时已经检查了状态,因此它不应该是STANDBY
      //断言 如果状态是standby则抛出异常
      assert(state != RecoveryState.STANDBY)
      ids.foreach ( id =>
        // We use foreach since get gives us an option and we can skip the failures.
        idToWorker.get(id).foreach { w =>
          //使该worker退役
          decommissionWorker(w)
          // Also send a message to the worker node to notify.
          //还要向工作节点发送消息以进行通知。
          w.endpoint.send(DecommissionWorker)
        }
      )
    
    //注册worker
    case RegisterWorker(
      id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl,
      masterAddress, resources) =>
    //正在注册worker
      logInfo("Registering worker %s:%d with %d cores, %s RAM".format(
        workerHost, workerPort, cores, Utils.megabytesToString(memory)))
      //如果master的状态是standby
      if (state == RecoveryState.STANDBY) {
        //告知该worker状态是standby
        workerRef.send(MasterInStandby)
      //如果master中包含worker的id
      } else if (idToWorker.contains(id)) {
        //告知worker已经注册了
        workerRef.send(RegisteredWorker(self, masterWebUiUrl, masterAddress, true))
      } else {
        //worker的资源
        val workerResources = resources.map(r => r._1 -> WorkerResourceInfo(r._1, r._2.addresses))
        //初始化worker的信息
        val worker = new WorkerInfo(id, workerHost, workerPort, cores, memory,
          workerRef, workerWebUiUrl, workerResources)
        //如果worker注册成功
        if (registerWorker(worker)) {
          //在持久化引擎中添加该worker
          persistenceEngine.addWorker(worker)
          //向该worker发送消息已经注册成功 并带上master的url和地址
          workerRef.send(RegisteredWorker(self, masterWebUiUrl, masterAddress, false))
          //重新调度
          schedule()
        } else {
          //初始化worker的地址
          val workerAddress = worker.endpoint.address
          //提示worker注册失败 尝试在该地址上重新注册
          logWarning("Worker registration failed. Attempted to re-register worker at same " +
            "address: " + workerAddress)
          //向该worker发送消息注册失败
          workerRef.send(RegisterWorkerFailed("Attempted to re-register worker at same address: "
            + workerAddress))
        }
      }
    
    //注册应用
    case RegisterApplication(description, driver) =>
      // TODO Prevent repeated registrations from some driver
      //如果是备用的Master 则不做处理
      if (state == RecoveryState.STANDBY) {
        // ignore, don't send response
      } else {
        logInfo("Registering app " + description.name)
        //创建应用
        val app = createApplication(description, driver)
        //注册应用
        registerApplication(app)
        logInfo("Registered app " + description.name + " with ID " + app.id)
        //持久化引擎中加入该应用
        persistenceEngine.addApplication(app)
        //向提交给该应用的driver发消息应用注册
        driver.send(RegisteredApplication(app.id, self))
        //开始调度
        schedule()
      }
    
    //executor状态已改变
    case ExecutorStateChanged(appId, execId, state, message, exitStatus) =>
      //从应用列表中获取executor id
      val execOption = idToApp.get(appId).flatMap(app => app.executors.get(execId))
      //executor id 匹配
      execOption match {
        case Some(exec) =>
          //根据id获取到引用
          val appInfo = idToApp(appId)
          //获取executor状态
          val oldState = exec.state
          //设置executor状态
          exec.state = state
          
          //如果executor状态是运行中
          if (state == ExecutorState.RUNNING) {
            //断言 如果executor状态不是运行中 则抛出异常
            assert(oldState == ExecutorState.LAUNCHING,
              s"executor $execId state transfer from $oldState to RUNNING is illegal")
            //应用重试次数置0
            appInfo.resetRetryCount()
          }
          
          //向该executor执行的任务driver发送executor状态已经改变的消息
          exec.application.driver.send(ExecutorUpdated(execId, state, message, exitStatus, None))
          
          //如果executor状态是已经完成
          if (ExecutorState.isFinished(state)) {
            // Remove this executor from the worker and app
            //从worker和应用中移除该executor
            logInfo(s"Removing executor ${exec.fullId} because it is $state")
            // If an application has already finished, preserve its
            // state to display its information properly on the UI
            //如果应用程序已经完成,请保留其状态以在UI上正确显示其信息
            if (!appInfo.isFinished) {
              //应用中移除executor
              appInfo.removeExecutor(exec)
            }
            //在该executor所在的worker中移除该executor
            exec.worker.removeExecutor(exec)
            
            //判断是否是正常退出 0则是正常退出
            val normalExit = exitStatus == Some(0)
            // Only retry certain number of times so we don't go into an infinite loop.
            // Important note: this code path is not exercised by tests, so be very careful when
            // changing this `if` condition.
            // We also don't count failures from decommissioned workers since they are "expected."
            //只重试一定次数,这样我们就不会进入无限循环。
            //重要提示:测试不使用此代码路径,因此在更改此“if”条件时要非常小心。我们也不计算退役worker的故障,因为它们是“意料之中的”
            //如果不是正常退出 且原状态不是已经退役状态 且应用重试次数大于executor重试最大数 
            if (!normalExit
                && oldState != ExecutorState.DECOMMISSIONED
                && appInfo.incrementRetryCount() >= maxExecutorRetries
                && maxExecutorRetries >= 0) { // < 0 disables this application-killing path
              //获取所有executor
              val execs = appInfo.executors.values
              //判断每个executor的状态是否是运行中 如果有不是运行中则执行
              if (!execs.exists(_.state == ExecutorState.RUNNING)) {
                //提示应用已经失败了多次 移除该任务
                logError(s"Application ${appInfo.desc.name} with ID ${appInfo.id} failed " +
                  s"${appInfo.retryCount} times; removing it")
                //移除应用
                removeApplication(appInfo, ApplicationState.FAILED)
              }
            }
          }
          //重新调度
          schedule()
        case None =>
          //已获取未知executor的状态更新
          logWarning(s"Got status update for unknown executor $appId/$execId")
      }
    
    //driver状态已经更新
    case DriverStateChanged(driverId, state, exception) =>
      //状态匹配
      state match {
        //如果状态是错误/结束/被杀掉/失败
        case DriverState.ERROR | DriverState.FINISHED | DriverState.KILLED | DriverState.FAILED =>
          //移除driver
          removeDriver(driverId, state, exception)
        //如果是其他状态则抛出异常
        case _ =>
          throw new Exception(s"Received unexpected state update for driver $driverId: $state")
      }
    
    //接收到worker的心跳
    case Heartbeat(workerId, worker) =>
      //进行worker匹配
      idToWorker.get(workerId) match {
        //如果是已经注册的worker
        case Some(workerInfo) =>
          //记录当前worker的上一次心跳时间是当前系统时间
          workerInfo.lastHeartbeat = System.currentTimeMillis()
        //如果不是已经注册的worker
        case None =>
          //如果能找到worker对应的id
          if (workers.map(_.id).contains(workerId)) {
            //提示收到未注册的worker的心跳 要求重新注册
            logWarning(s"Got heartbeat from unregistered worker $workerId." +
              " Asking it to re-register.")
            //向worker发消息重新连接到master
            worker.send(ReconnectWorker(masterUrl))
          } else {
            //提示收到未注册的worker的心跳并且该worker从未注册过 可以忽略该心跳
            logWarning(s"Got heartbeat from unregistered worker $workerId." +
              " This worker was never registered, so ignoring the heartbeat.")
          }
      }
    
    //master收到变更已确认
    case MasterChangeAcknowledged(appId) =>
      //通过应用id找到应用并进行匹配
      idToApp.get(appId) match {
        //如果找到对应得应用
        case Some(app) =>
          //提示应用已经重新注册
          logInfo("Application has been re-registered: " + appId)
          //设置应用的状态是等待中
          app.state = ApplicationState.WAITING
        case None =>
          //提示未知应用程序的master更改确认
          logWarning("Master change ack from unknown app: " + appId)
      }
      
      //如果worker和应用状态正常 则完成恢复
      if (canCompleteRecovery) { completeRecovery() }
    
    //worker调度状态回应
    case WorkerSchedulerStateResponse(workerId, execResponses, driverResponses) =>
      idToWorker.get(workerId) match {
        //如果匹配到worker
        case Some(worker) =>
          logInfo("Worker has been re-registered: " + workerId)
          //设置worker的状态是存活
          worker.state = WorkerState.ALIVE
          
          //获取可用的executor
          val validExecutors = execResponses.filter(
            exec => idToApp.get(exec.desc.appId).isDefined)
          //遍历可用的executor
          for (exec <- validExecutors) {
            //executor资源信息
            val (execDesc, execResources) = (exec.desc, exec.resources)
            //应用信息
            val app = idToApp(execDesc.appId)
            //应用添加executor信息
            val execInfo = app.addExecutor(
              worker, execDesc.cores, execResources, Some(execDesc.execId))
            //worker添加executor
            worker.addExecutor(execInfo)
            //worker添加executor资源
            worker.recoverResources(execResources)
            //添加exeecutor信息
            execInfo.copyState(execDesc)
          }
          
          //遍历driver
          for (driver <- driverResponses) {
            //初始化driver信息
            val (driverId, driverResource) = (driver.driverId, driver.resources)
            //遍历drivers列表找到该driver id的driver
            drivers.find(_.id == driverId).foreach { driver =>
              //添加driver的worker
              driver.worker = Some(worker)
              //设置driver的状态是运行中
              driver.state = DriverState.RUNNING
              //添加driver的资源
              driver.withResources(driverResource)
              //添加worker的driver资源
              worker.recoverResources(driverResource)
              //添加worker的driver
              worker.addDriver(driver)
            }
          }
        case None =>
          //提示未知worker的计划程序状态
          logWarning("Scheduler state from unknown worker: " + workerId)
      }
      
      ////如果worker和应用状态正常 则完成恢复
      if (canCompleteRecovery) { completeRecovery() }

    //worker最新状态
    case WorkerLatestState(workerId, executors, driverIds) =>
      //通过worker id得到对应的worker
      idToWorker.get(workerId) match {
        //如果是已经注册的worker
        case Some(worker) =>
          //遍历所有的executor
          for (exec <- executors) {
            //判断executor是否存在 
            val executorMatches = worker.executors.exists {
              //如果应用id和executor id都能匹配上则返回true
              case (_, e) => e.application.id == exec.appId && e.id == exec.execId
            }
            //如果没匹配上
            if (!executorMatches) {
              // master doesn't recognize this executor. So just tell worker to kill it.
              //master不认识这个executor。所以,只要告诉worker杀死它。
              //向worker发消息杀死executor
              worker.endpoint.send(KillExecutor(masterUrl, exec.appId, exec.execId))
            }
          }
          
          //遍历driver
          for (driverId <- driverIds) {
            //判断driver是否存在
            val driverMatches = worker.drivers.exists { case (id, _) => id == driverId }
            //如果driver不存在
            if (!driverMatches) {
              // master doesn't recognize this driver. So just tell worker to kill it.
              //向worker发消息杀死该driver
              worker.endpoint.send(KillDriver(driverId))
            }
          }
        case None =>
          //提示worker的状态来自未知worker
          logWarning("Worker state from unknown worker: " + workerId)
      }
    
    //注销应用
    case UnregisterApplication(applicationId) =>
      //提示收到来自应用程序的注销请求
      logInfo(s"Received unregister request from application $applicationId")
      //通过id找到应用并结束该应用
      idToApp.get(applicationId).foreach(finishApplication)
    
    //检查worker超时
    case CheckForWorkerTimeOut =>
      //检查并移除任何超时worker
      timeOutDeadWorkers()

  }

  override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
    //请求提交driver
    case RequestSubmitDriver(description) =>
      //如果状态不是存活
      if (state != RecoveryState.ALIVE) {
        //提示 只能接受处于“活动”状态的驱动程序提交。
        val msg = s"${Utils.BACKUP_STANDALONE_MASTER_PREFIX}: $state. " +
          "Can only accept driver submissions in ALIVE state."
        //回复提交失败
        context.reply(SubmitDriverResponse(self, false, None, msg))
      } else {
        logInfo("Driver submitted " + description.command.mainClass)
        //创建driver
        val driver = createDriver(description)
        //将driver添加到持久化引擎中
        persistenceEngine.addDriver(driver)
        //将该driver添加到等待列表中
        waitingDrivers += driver
        //driver添加到drivers列表中
        drivers.add(driver)
        //调度资源
        schedule()

        // TODO: It might be good to instead have the submission client poll the master to determine
        //       the current status of the driver. For now it's simply "fire and forget".
        //相反,最好让提交客户端轮询master以确定驱动程序的当前状态。现在,它只是“点燃并忘记”。
        //回复提交driver成功
        context.reply(SubmitDriverResponse(self, true, Some(driver.id),
          s"Driver successfully submitted as ${driver.id}"))
      }
    
    //请求杀死driver
    case RequestKillDriver(driverId) =>
      //如果状态不是存活
      if (state != RecoveryState.ALIVE) {
        //提示只能杀死存活的driver
        val msg = s"${Utils.BACKUP_STANDALONE_MASTER_PREFIX}: $state. " +
          s"Can only kill drivers in ALIVE state."
        //回复杀死driver失败
        context.reply(KillDriverResponse(self, driverId, success = false, msg))
      } else {
        //提示请求杀死driver
        logInfo("Asked to kill driver " + driverId)
        //根据id找到需要杀死的driver
        val driver = drivers.find(_.id == driverId)
        //driver匹配
        driver match {
          //如果找到对应的driver
          case Some(d) =>
            //如果是等待执行的driver
            if (waitingDrivers.contains(d)) {
              //将该driver从等待列表中移除
              waitingDrivers -= d
              //向该driver发消息状态已经变化 
              self.send(DriverStateChanged(driverId, DriverState.KILLED, None))
            } else {
              // We just notify the worker to kill the driver here. The final bookkeeping occurs
              // on the return path when the worker submits a state change back to the master
              // to notify it that the driver was successfully killed.
              //我们只是通知worker杀死这里的driver。最终记账发生
              //在返回路径上,当worker向master提交状态更改以通知其driver已成功终止时。
              d.worker.foreach { w =>
                //向worker发消息杀死driver
                w.endpoint.send(KillDriver(driverId))
              }
            }
            // TODO: It would be nice for this to be a synchronous response
            //如果这是一个同步响应,那就太好了
            //提示请求杀死driver
            val msg = s"Kill request for $driverId submitted"
            logInfo(msg)
            //回复提交driver成功
            context.reply(KillDriverResponse(self, driverId, success = true, msg))
          //如果没有匹配到对应的driver
          case None =>
            //提示driver已经结束或者不存在
            val msg = s"Driver $driverId has already finished or does not exist"
            logWarning(msg)
            //回复杀死driver失败
            context.reply(KillDriverResponse(self, driverId, success = false, msg))
        }
      }
    
    //请求driver状态
    case RequestDriverStatus(driverId) =>
      //如果状态不是存活
      if (state != RecoveryState.ALIVE) {
        //提示只能请求存活状态的driver
        val msg = s"${Utils.BACKUP_STANDALONE_MASTER_PREFIX}: $state. " +
          "Can only request driver status in ALIVE state."
        //回复返回driver状态失败
        context.reply(
          DriverStatusResponse(found = false, None, None, None, Some(new Exception(msg))))
      } else {
        //在drivers列表中加上已经完成的driver列表中找
        (drivers ++ completedDrivers).find(_.id == driverId) match {
          //找到driver
          case Some(driver) =>
            //返回driver的状态
            context.reply(DriverStatusResponse(found = true, Some(driver.state),
              driver.worker.map(_.id), driver.worker.map(_.hostPort), driver.exception))
          //没找到driver
          case None =>
            context.reply(DriverStatusResponse(found = false, None, None, None, None))
        }
      }
    
    //请求master状态
    case RequestMasterState =>
      //回复master状态
      context.reply(MasterStateResponse(
        address.host, address.port, restServerBoundPort,
        workers.toArray, apps.toArray, completedApps.toArray,
        drivers.toArray, completedDrivers.toArray, state))
    
    //边界端口请求
    case BoundPortsRequest =>
      //回复边界端口
      context.reply(BoundPortsResponse(address.port, webUi.boundPort, restServerBoundPort))
    
    //应用程序请求executor
    case RequestExecutors(appId, requestedTotal) =>
      context.reply(handleRequestExecutors(appId, requestedTotal))
    
    //杀死executor
    case KillExecutors(appId, executorIds) =>
      //将executor id转换为整型
      val formattedExecutorIds = formatExecutorIds(executorIds)
      //回复 杀死executor是否成功
      context.reply(handleKillExecutors(appId, formattedExecutorIds))
    
    //使hosts上的worker退役
    case DecommissionWorkersOnHosts(hostnames) =>
      //如果状态不是STANDBY
      if (state != RecoveryState.STANDBY) {
        //回复 是否使worker退役
        context.reply(decommissionWorkersOnHosts(hostnames))
      } else {
        //否则回复0
        context.reply(0)
      }
  }
  
  //已经断开连接
  override def onDisconnected(address: RpcAddress): Unit = {
    // The disconnected client could've been either a worker or an app; remove whichever it was
    //断开连接的客户端可能是工作程序或应用程序;删除它
    //提示 该地址已经断开连接
    logInfo(s"$address got disassociated, removing it.")
    //通过地址找到对应的worker并移除该worker
    addressToWorker.get(address).foreach(removeWorker(_, s"${address} got disassociated"))
    //通过地址找到对应的应用并结束应用
    addressToApp.get(address).foreach(finishApplication)
    //如果状态是正在回复中或者能够恢复 则完成恢复
    if (state == RecoveryState.RECOVERING && canCompleteRecovery) { completeRecovery() }
  }
  
  //能完成恢复 如果worker中没有状态是unknown的并且应用没有状态是unknown的 则返回true 否则返回false
  private def canCompleteRecovery =
    workers.count(_.state == WorkerState.UNKNOWN) == 0 &&
      apps.count(_.state == ApplicationState.UNKNOWN) == 0
  
  //开始恢复
  private def beginRecovery(storedApps: Seq[ApplicationInfo], storedDrivers: Seq[DriverInfo],
      storedWorkers: Seq[WorkerInfo]): Unit = {
    //遍历存储的应用信息
    for (app <- storedApps) {
      logInfo("Trying to recover app: " + app.id)
      try {
        //重新注册应用
        registerApplication(app)
        //将该应用的状态设置成unknown
        app.state = ApplicationState.UNKNOWN
        //向提交给该应用的driver发消息 告知其Master已经改为当前Master
        app.driver.send(MasterChanged(self, masterWebUiUrl))
      } catch {
        //重新注册应用失败 则抛出异常
        case e: Exception => logInfo("App " + app.id + " had exception on reconnect")
      }
    }
    
    //遍历存储的driver
    for (driver <- storedDrivers) {
      // Here we just read in the list of drivers. Any drivers associated with now-lost workers
      // will be re-launched when we detect that the worker is missing.
      //在这里,我们刚刚阅读了driver列表。当我们检测到worker失踪时,任何与现在失踪的worker相关的driver都将重新启动。
      drivers += driver
    }
    
    //遍历存储的worker信息
    for (worker <- storedWorkers) {
      //尝试恢复worker
      logInfo("Trying to recover worker: " + worker.id)
      try {
        //worker注册
        registerWorker(worker)
        //worker的状态设置成unknown
        worker.state = WorkerState.UNKNOWN
        //向worker所在的端点发送Master已经改变的消息
        worker.endpoint.send(MasterChanged(self, masterWebUiUrl))
      } catch {
        //worker注册失败则抛出异常
        case e: Exception => logInfo("Worker " + worker.id + " had exception on reconnect")
      }
    }
  }
  
  //完成恢复
  private def completeRecovery(): Unit = {
    // Ensure "only-once" recovery semantics using a short synchronization period.
    //确保使用短同步期的“仅一次”恢复语义。
    //如果状态不是在恢复中 则返回
    if (state != RecoveryState.RECOVERING) { return }
    
    //设置状态为已经完成恢复
    state = RecoveryState.COMPLETING_RECOVERY

    // Kill off any workers and apps that didn't respond to us.
    //杀死所有没有回应我们的worker和应用程序。
    workers.filter(_.state == WorkerState.UNKNOWN).foreach(
      //移除所有状态是unknown的worker
      removeWorker(_, "Not responding for recovery"))
    //如果应用状态是unknown 则结束应用
    apps.filter(_.state == ApplicationState.UNKNOWN).foreach(finishApplication)

    // Update the state of recovered apps to RUNNING
    //将恢复的应用程序的状态更新为RUNNING
    apps.filter(_.state == ApplicationState.WAITING).foreach(_.state = ApplicationState.RUNNING)

    // Reschedule drivers which were not claimed by any workers
    //重新安排没有任何worker认领的driver
    drivers.filter(_.worker.isEmpty).foreach { d =>
      //打印driver没有被找到当master恢复之后
      logWarning(s"Driver ${d.id} was not found after master recovery")
      //如果driver被管理了
      if (d.desc.supervise) {
        logWarning(s"Re-launching ${d.id}")
        //重新启动该driver
        relaunchDriver(d)
      } else {
        //移除driver
        removeDriver(d.id, DriverState.ERROR, None)
        //打印 没有重启该driver 因为它没有被管理
        logWarning(s"Did not re-launch ${d.id} because it was not supervised")
      }
    }
    
    //状态设置成存活
    state = RecoveryState.ALIVE
    //调度资源
    schedule()
    //恢复完成
    logInfo("Recovery complete - resuming operations!")
  }

  /**
   * Schedule executors to be launched on the workers.
   * Returns an array containing number of cores assigned to each worker.
   *
   * There are two modes of launching executors. The first attempts to spread out an application's
   * executors on as many workers as possible, while the second does the opposite (i.e. launch them
   * on as few workers as possible). The former is usually better for data locality purposes and is
   * the default.
   *
   * The number of cores assigned to each executor is configurable. When this is explicitly set,
   * multiple executors from the same application may be launched on the same worker if the worker
   * has enough cores and memory. Otherwise, each executor grabs all the cores available on the
   * worker by default, in which case only one executor per application may be launched on each
   * worker during one single schedule iteration.
   * Note that when `spark.executor.cores` is not set, we may still launch multiple executors from
   * the same application on the same worker. Consider appA and appB both have one executor running
   * on worker1, and appA.coresLeft > 0, then appB is finished and release all its cores on worker1,
   * thus for the next schedule iteration, appA launches a new executor that grabs all the free
   * cores on worker1, therefore we get multiple executors from appA running on worker1.
   *
   * It is important to allocate coresPerExecutor on each worker at a time (instead of 1 core
   * at a time). Consider the following example: cluster has 4 workers with 16 cores each.
   * User requests 3 executors (spark.cores.max = 48, spark.executor.cores = 16). If 1 core is
   * allocated at a time, 12 cores from each worker would be assigned to each executor.
   * Since 12 < 16, no executors would launch [SPARK-8881].
   */
  //计划executor将启动到worker身上。返回一个数组,该数组包含分配给每个worker的核数。

  //有两种启动执行器的模式。第一种方法试图将应用程序的executor分配给尽可能多的worker,
  //而第二种方法则相反(即在尽可能少的worker上启动它们)。
  //前者通常更适合用于数据本地化,并且是默认值。
  
  //分配给每个executor的内核数量是可配置的。
  //如果显式设置了这一点,则如果同一个worker具有足够的核心和内存,则可以在该worker上启动来自同一应用程序的多个executor。
  //否则,默认情况下,每个executor都会获取worker上可用的所有核心,
  //在这种情况下,在一次调度迭代期间,每个应用程序只能在每个worker上启动一个executor。
  //请注意,当未设置“spark.executor.cores”时,我们仍然可以在同一worker上从同一应用程序启动多个executor。
  //假设appA和appB都有一个executor在worker1上运行,并且appA.coresLeft>0,那么appB就完成了,
  //并释放了worker1上的所有核心,因此在下一次计划迭代中,appA启动了一个新的executor,
  //该executor会获取worker1中的所有空闲核心,因此我们从运行在worker1的appA中获得多个executor。

  //一次在每个工作线程上分配coresPerExecutor是很重要的(而不是一次分配一个core)。
  //考虑以下示例:集群有4个worker,每个worker有16个核心。
  //用户请求3个executor(spark.cores.max=48,spark.executor.cores=16)。
  //如果一次分配1个核心,则每个worker的12个核心将被分配给每个executor。由于12<16,没有executor会启动[SPARK-8881]。
  
  //在worker上启动executor
  private def scheduleExecutorsOnWorkers(
      app: ApplicationInfo,
      //可用的worker
      usableWorkers: Array[WorkerInfo],
      //是否展开应用程序
      spreadOutApps: Boolean): Array[Int] = {
    //获取每个executor分配的core
    val coresPerExecutor = app.desc.coresPerExecutor
    //每个executor最少需要的核数
    val minCoresPerExecutor = coresPerExecutor.getOrElse(1)
    //每个worker上是否有一个executor
    val oneExecutorPerWorker = coresPerExecutor.isEmpty
    //每个executor需要的内存
    val memoryPerExecutor = app.desc.memoryPerExecutorMB
    //每个executor需要的资源
    val resourceReqsPerExecutor = app.desc.resourceReqsPerExecutor
    //可用的worker数
    val numUsable = usableWorkers.length
    //worker已经分配的核数
    val assignedCores = new Array[Int](numUsable) // Number of cores to give to each worker
    //每个worker上已经分配的executor数
    val assignedExecutors = new Array[Int](numUsable) // Number of new executors on each worker
    //需要分配的核数 取应用剩下的和可用worker空闲的之中小的
    var coresToAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)

    /** Return whether the specified worker can launch an executor for this app. */
    //返回指定的worker是否可以启动此应用程序的executor。
    //判断worker能否为该应用启动executor
    def canLaunchExecutorForApp(pos: Int): Boolean = {
      //判断能否调度 如果需要分配的核数大于每个executor最少拥有的核数 则为true
      val keepScheduling = coresToAssign >= minCoresPerExecutor
      //判断是否有足够的核数 如果pos位置的worker有的空闲核数减去已经分配的核数大于每个executor最少需要的核数 则为true
      val enoughCores = usableWorkers(pos).coresFree - assignedCores(pos) >= minCoresPerExecutor
      //获取worker上已经分配的executor数
      val assignedExecutorNum = assignedExecutors(pos)

      // If we allow multiple executors per worker, then we can always launch new executors.
      // Otherwise, if there is already an executor on this worker, just give it more cores.
      //如果我们允许每个worker有多个executor,那么我们总是可以启动新的executor。
      //否则,如果这个worker上已经有一个executor,只需给它更多的core。
      //如果worker上的executor数不止一个且已经分配的executor为0 则为true
      val launchingNewExecutor = !oneExecutorPerWorker || assignedExecutorNum == 0
      if (launchingNewExecutor) {
        //已经分配的内存 已经分配的executor乘以每个executor分配的内存
        val assignedMemory = assignedExecutorNum * memoryPerExecutor
        //判断是否有足够的内存 worker可用的空闲内存减去已经分配的内存大于每个executor需要的内存
        val enoughMemory = usableWorkers(pos).memoryFree - assignedMemory >= memoryPerExecutor
        //已经分配的资源
        val assignedResources = resourceReqsPerExecutor.map {
          req => req.resourceName -> req.amount * assignedExecutorNum
        }.toMap
        //空闲资源
        val resourcesFree = usableWorkers(pos).resourcesAmountFree.map {

          case (rName, free) => rName -> (free - assignedResources.getOrElse(rName, 0))
        }
        //判断资源是否满足要求 如果可用资源多于需要的资源
        val enoughResources = ResourceUtils.resourcesMeetRequirements(
          resourcesFree, resourceReqsPerExecutor)
        //判断是否低于限制 如果已经分配的executor加上应用目前已有的executor数小于应用executor限制数 则为true
        val underLimit = assignedExecutors.sum + app.executors.size < app.executorLimit
        //只有当各种资源都符合要求时才会返回true
        keepScheduling && enoughCores && enoughMemory && enoughResources && underLimit
      } else {
        // We're adding cores to an existing executor, so no need
        // to check memory and executor limits
        //我们正在向现有的executor添加核心,因此无需检查内存和执行器限制
        //只需要有足够的核数并且核数多于executor最小需求数
        keepScheduling && enoughCores
      }
    }

    // Keep launching executors until no more workers can accommodate any
    // more executors, or if we have reached this application's limits
    //继续启动executor,直到没有更多的worker可以容纳任何更多的executor,或者如果我们已经达到了申请的限制
    //获取可用的worker
    var freeWorkers = (0 until numUsable).filter(canLaunchExecutorForApp)
    //当有可用的worker时
    while (freeWorkers.nonEmpty) {
      //遍历每个可用的worker
      freeWorkers.foreach { pos =>
        //初始化默认可以继续调度
        var keepScheduling = true
        //当可以继续调度并且可以为应用启动executor
        while (keepScheduling && canLaunchExecutorForApp(pos)) {
          //需要分配的核数中减去executor启动需要的最少核数
          coresToAssign -= minCoresPerExecutor
          //已经分配的核数中加上executor启动需要的最少核数
          assignedCores(pos) += minCoresPerExecutor

          // If we are launching one executor per worker, then every iteration assigns 1 core
          // to the executor. Otherwise, every iteration assigns cores to a new executor.
          //如果我们为每个worker启动一个executor,那么每次迭代都会为executor分配一个核心。否则,每次迭代都会将核心分配给一个新的executor。
          //判断worker上是否有启动的executor
          if (oneExecutorPerWorker) {
            //没有则初始化为1 表示有一个
            assignedExecutors(pos) = 1
          } else {
            //有就加一个
            assignedExecutors(pos) += 1
          }

          // Spreading out an application means spreading out its executors across as
          // many workers as possible. If we are not spreading out, then we should keep
          // scheduling executors on this worker until we use all of its resources.
          // Otherwise, just move on to the next worker.
          //分散应用程序意味着将其executor分散为尽可能多的worker。
          //如果我们不分散,那么我们应该继续为这个worker安排executor,直到我们使用了它的所有资源。否则,请转到下一个worker。
          //如果开启了分散 则不继续调度起executor 而是将executor分配到其他worker上
          if (spreadOutApps) {
            //不分散应用程序
            keepScheduling = false
          }
        }
      }
      //更新可用的worker 在可用的worker中去掉启用的
      freeWorkers = freeWorkers.filter(canLaunchExecutorForApp)
    }
    //返回已经分配的核数
    assignedCores
  }

  /**
   * Schedule and launch executors on workers
   */
   //在worker中安排和启动executor
  private def startExecutorsOnWorkers(): Unit = {
    // Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
    // in the queue, then the second app, etc.
    //现在这是一个非常简单的FIFO调度器。我们一直在努力适应队列中的第一个应用程序,然后是第二个应用程序等。
    //遍历等待执行的应用列表
    for (app <- waitingApps) {
      //获取给定的executor数 默认1个
      val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(1)
      // If the cores left is less than the coresPerExecutor,the cores left will not be allocated
      //如果剩余的内核小于coresPerExecutor,则不会分配剩余的内核
      if (app.coresLeft >= coresPerExecutor) {
        // Filter out workers that don't have enough resources to launch an executor
        //筛选出没有足够资源启动executor的worker
        //找出符合条件的worker 即资源满足条件的
        val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
          .filter(canLaunchExecutor(_, app.desc))
          .sortBy(_.coresFree).reverse
        //应用程序是否可能挂起 等待应用只有一个且唯一的应用的executor是空的且可用的worker是空的
        val appMayHang = waitingApps.length == 1 &&
          waitingApps.head.executors.isEmpty && usableWorkers.isEmpty
        //如果应用可能挂起
        if (appMayHang) {
          //提示应用程序需要的资源超过任何一个worker拥有的
          logWarning(s"App ${app.id} requires more resource than any of Workers could have.")
        }

        //worker分配给该应用程序的核数
        val assignedCores = scheduleExecutorsOnWorkers(app, usableWorkers, spreadOutApps)

        // Now that we've decided how many cores to allocate on each worker, let's allocate them
        //现在我们已经决定给每个worker分配多少core了,接下来开始分配
        for (pos <- 0 until usableWorkers.length if assignedCores(pos) > 0) {
          //将worker的资源分配给executor
          allocateWorkerResourceToExecutors(
            app, assignedCores(pos), app.desc.coresPerExecutor, usableWorkers(pos))
        }
      }
    }
  }

  /**
   * Allocate a worker's resources to one or more executors.
   * @param app the info of the application which the executors belong to
   * @param assignedCores number of cores on this worker for this application
   * @param coresPerExecutor number of cores per executor
   * @param worker the worker info
   */
   //分配worker的资源给一个或者多个executor
   //app执行者所属的应用程序的信息
   //assignedCores表示此工作线程上用于此应用程序的核心数
   //coresPerExecutor表示每个executor的核数
   //worker表示worker的信息
  private def allocateWorkerResourceToExecutors(
      app: ApplicationInfo,
      assignedCores: Int,
      coresPerExecutor: Option[Int],
      worker: WorkerInfo): Unit = {
    // If the number of cores per executor is specified, we divide the cores assigned
    // to this worker evenly among the executors with no remainder.
    // Otherwise, we launch a single executor that grabs all the assignedCores on this worker.
    //每个executor的内核数如果指定了
    //我们将分配给该worker的内核在executor中平均分配,没有余数。
    //否则,我们将启动一个单独的executor来获取该worker上所有的assignedCores。
    //将要分配的核数除以execotor数 即将core平均分给每个executor
    val numExecutors = coresPerExecutor.map { assignedCores / _ }.getOrElse(1)
    //将要分配的核数 优先获取指定的值 获取不到再获取平均核数
    val coresToAssign = coresPerExecutor.getOrElse(assignedCores)
    //遍历每个executor
    for (i <- 1 to numExecutors) {
      //获取worker分配给该应用程序的资源
      val allocated = worker.acquireResources(app.desc.resourceReqsPerExecutor)
      //在应用中加入executor
      val exec = app.addExecutor(worker, coresToAssign, allocated)
      //启动worker上的executor
      launchExecutor(worker, exec)
      //设置应用程序的状态是运行中
      app.state = ApplicationState.RUNNING
    }
  }
  
  //能否启动
  private def canLaunch(
      worker: WorkerInfo,
      memoryReq: Int,
      coresReq: Int,
      resourceRequirements: Seq[ResourceRequirement])
    : Boolean = {
    //判断worker的内存是否大于给定的内存
    val enoughMem = worker.memoryFree >= memoryReq
    //判断worker的核数是否大于给定的核数
    val enoughCores = worker.coresFree >= coresReq
    //判断worker空闲空间能否装载需要的资源
    val enoughResources = ResourceUtils.resourcesMeetRequirements(
      worker.resourcesAmountFree, resourceRequirements)
    //条件都满足则可以启动
    enoughMem && enoughCores && enoughResources
  }

  /**
   * @return whether the worker could launch the driver represented by DriverDescription
   */
   //worker是否可以启动driver DriverDescription表示的驱动程序
  private def canLaunchDriver(worker: WorkerInfo, desc: DriverDescription): Boolean = {
    canLaunch(worker, desc.mem, desc.cores, desc.resourceReqs)
  }

  /**
   * @return whether the worker could launch the executor according to application's requirement
   */
  //worker是否可以根据应用程序的要求启动executor
  private def canLaunchExecutor(worker: WorkerInfo, desc: ApplicationDescription): Boolean = {
    canLaunch(
      worker,
      desc.memoryPerExecutorMB,
      desc.coresPerExecutor.getOrElse(1),
      desc.resourceReqsPerExecutor)
  }

  /**
   * Schedule the currently available resources among waiting apps. This method will be called
   * every time a new app joins or resource availability changes.
   */
   //在等待的应用程序中安排当前可用的资源。每当有新应用加入或资源可用性发生变化时,都会调用此方法。
  private def schedule(): Unit = {
    //如果状态不是存活 则返回
    if (state != RecoveryState.ALIVE) {
      return
    }
    // Drivers take strict precedence over executors
    //driver优先于executor
    //Random.shuffle--打乱组列表中元素的位置
    //打乱worker信息列表中存活的worker的位置
    val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE))
    //获取存活的worker的数量
    val numWorkersAlive = shuffledAliveWorkers.size
    //当前位置0
    var curPos = 0
    //遍历等待中的driver列表
    for (driver <- waitingDrivers.toList) { // iterate over a copy of waitingDrivers
      // We assign workers to each waiting driver in a round-robin fashion. For each driver, we
      // start from the last worker that was assigned a driver, and continue onwards until we have
      // explored all alive workers.
      //迭代遍历waitingDrivers的副本。我们以循环的方式为每个等候的driver分配worker。
      //对于每个driver,我们从最后一个被分配driver的worker开始,然后继续,直到我们探索了所有活着的worker。

      //设置启动状态为false
      var launched = false

      //集群状态空闲状态 true
      var isClusterIdle = true

      //可访问的worker数量
      var numWorkersVisited = 0

      //当可访问的worker数量少于存活的worker数量并且deriver状态是未启动
      while (numWorkersVisited < numWorkersAlive && !launched) {
        //取打乱位置的组中curpos位置的worker
        val worker = shuffledAliveWorkers(curPos)
        //如果该worker的driver为空且该worker的exxecutor是空 则设置集群状态为空闲
        isClusterIdle = worker.drivers.isEmpty && worker.executors.isEmpty
        //可访问的worker数量加1
        numWorkersVisited += 1
        //判断如果worker能启动driver
        if (canLaunchDriver(worker, driver.desc)) {
          //分配给workerd的资源
          val allocated = worker.acquireResources(driver.desc.resourceReqs)
          //设置driver的资源
          driver.withResources(allocated)
          //在worker上启动driver
          launchDriver(worker, driver)
          //等待的driver列表中去掉已经启动的driver
          waitingDrivers -= driver
          //设置该driver的启动状态为true
          launched = true
        }
        //指标加1
        curPos = (curPos + 1) % numWorkersAlive
      }
      //如果没有启动且集群还是空闲
      if (!launched && isClusterIdle) {
        //打印消息 driver需要比任何worker更多的资源
        logWarning(s"Driver ${driver.id} requires more resource than any of Workers could have.")
      }
    }
    //启动worker上的executor
    startExecutorsOnWorkers()
  }
  
  //在worker上启动executor
  private def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc): Unit = {
    //提示 在worker上启动executor
    logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
    //在worker上加入executor
    worker.addExecutor(exec)
    //向worker发消息启动了新的executor
    worker.endpoint.send(LaunchExecutor(masterUrl, exec.application.id, exec.id,
      exec.application.desc, exec.cores, exec.memory, exec.resources))
    //向提交该应用的driver发消息executor已经添加
    exec.application.driver.send(
      ExecutorAdded(exec.id, worker.id, worker.hostPort, exec.cores, exec.memory))
  }
  
  //注册worker
  private def registerWorker(worker: WorkerInfo): Boolean = {
    // There may be one or more refs to dead workers on this same node (w/ different ID's),
    // remove them.
    //同一节点上可能有一个或多个引用指向已死亡的工作人员(具有不同的ID),请删除它们。
    //在workers中移除之前的引用
    workers.filter { w =>
      (w.host == worker.host && w.port == worker.port) && (w.state == WorkerState.DEAD)
    }.foreach { w =>
      workers -= w
    }
    //获取worker的地址
    val workerAddress = worker.endpoint.address
    //如果包含该worker的地址
    if (addressToWorker.contains(workerAddress)) {
      //获取旧worker
      val oldWorker = addressToWorker(workerAddress)
      //如果旧worker的状态未知
      if (oldWorker.state == WorkerState.UNKNOWN) {
        // A worker registering from UNKNOWN implies that the worker was restarted during recovery.
        // The old worker must thus be dead, so we will remove it and accept the new worker.
        //从UNKNOWN注册的工作程序意味着该worker在恢复过程中已重新启动。
        //因此,旧worker肯定已经死了,所以我们将把它移走,接受新worker。
        //移除worker
        removeWorker(oldWorker, "Worker replaced by a new worker with same address")
      } else {
        logInfo("Attempted to re-register worker at same address: " + workerAddress)
        return false
      }
    }
    //workers列表中添加该driver
    workers += worker
    //添加worker和id的映射
    idToWorker(worker.id) = worker
    //添加worker和地址的映射
    addressToWorker(workerAddress) = worker
    true
  }

  /**
   * Decommission all workers that are active on any of the given hostnames. The decommissioning is
   * asynchronously done by enqueueing WorkerDecommission messages to self. No checks are done about
   * the prior state of the worker. So an already decommissioned worker will match as well.
   *
   * @param hostnames: A list of hostnames without the ports. Like "localhost", "foo.bar.com" etc
   *
   * Returns the number of workers that matched the hostnames.
   */
   //停用任何给定主机名上活动的所有worker。停用是通过将WorkerDecommission消息排队到self异步完成的。
   //不检查worker先前的状态。因此,一个已经退役的工人也会匹配。
   //hostnames:不带端口的主机名列表。像“localhost”、“foo.bar.com”等
   //返回与主机名匹配的worker数。
  private def decommissionWorkersOnHosts(hostnames: Seq[String]): Integer = {
    //将hostname转化为小写
    val hostnamesSet = hostnames.map(_.toLowerCase(Locale.ROOT)).toSet
    //找到需要移除的worker
    val workersToRemove = addressToWorker
      .filterKeys(addr => hostnamesSet.contains(addr.host.toLowerCase(Locale.ROOT)))
      .values
    
    //找到需要移除的worker的端口
    val workersToRemoveHostPorts = workersToRemove.map(_.hostPort)
    //提示 正在使该host:port退役
    logInfo(s"Decommissioning the workers with host:ports ${workersToRemoveHostPorts}")

    // The workers are removed async to avoid blocking the receive loop for the entire batch
    //异步移除工作进程,以避免阻塞整个批处理的接收循环
    //向worker发消息退役
    self.send(DecommissionWorkers(workersToRemove.map(_.id).toSeq))

    // Return the count of workers actually removed
    //返回实际移除的worker数量
    workersToRemove.size
  }
  

  //使某个worker退役
  private def decommissionWorker(worker: WorkerInfo): Unit = {
    //如果worker不是退役状态
    if (worker.state != WorkerState.DECOMMISSIONED) {
      //正在退役的worker是..
      logInfo("Decommissioning worker %s on %s:%d".format(worker.id, worker.host, worker.port))
      //标记worker状态是退役
      worker.setState(WorkerState.DECOMMISSIONED)
      //遍历该worker中的executor
      for (exec <- worker.executors.values) {
        logInfo("Telling app of decommission executors")
        //向executor正在执行的应用的driver发消息 该executor要退役了
        exec.application.driver.send(ExecutorUpdated(
          exec.id, ExecutorState.DECOMMISSIONED,
          Some("worker decommissioned"), None,
          // worker host is being set here to let the driver know that the host (aka. worker)
          // is also being decommissioned. So the driver can unregister all the shuffle map
          // statues located at this host when it receives the executor lost event.
          //这里设置的worker主机是为了让driver知道主机(aka.worker)也正在停用。
          //因此,当driver接收到executor lost事件时,它可以注销位于该主机上的所有shuffle映射雕像。
          Some(worker.host)))
        //设置executor退役
        exec.state = ExecutorState.DECOMMISSIONED
        //通知应用程序移除executor
        exec.application.removeExecutor(exec)
      }
      // On recovery do not add a decommissioned executor
      //恢复时不要添加已停用的执行器
      persistenceEngine.removeWorker(worker)
    } else {
      //如果该worker已经退役
      logWarning("Skipping decommissioning worker %s on %s:%d as worker is already decommissioned".
        format(worker.id, worker.host, worker.port))
    }
  }
  
  //移除worker
  private def removeWorker(worker: WorkerInfo, msg: String): Unit = {
    logInfo("Removing worker " + worker.id + " on " + worker.host + ":" + worker.port)
    //设置worker的状态是死亡
    worker.setState(WorkerState.DEAD)
    //移除worker的id
    idToWorker -= worker.id
    //移除worker的地址
    addressToWorker -= worker.endpoint.address
    
    //遍历该worker中的executor
    for (exec <- worker.executors.values) {
      logInfo("Telling app of lost executor: " + exec.id)
      //告知executor上面的提交应用的dirver 该executor丢失了
      exec.application.driver.send(ExecutorUpdated(
        exec.id, ExecutorState.LOST, Some("worker lost"), None, Some(worker.host)))
      //设置executor的状态是丢失
      exec.state = ExecutorState.LOST
      //应用程序移除该executor
      exec.application.removeExecutor(exec)
    }

    //遍历该worker中的driver
    for (driver <- worker.drivers.values) {
      //如果该driver被管理
      if (driver.desc.supervise) {
        logInfo(s"Re-launching ${driver.id}")
        //重新启动该driver
        relaunchDriver(driver)
      } else {
        //提示不是重启该driver 因为该driver没有被管理
        logInfo(s"Not re-launching ${driver.id} because it was not supervised")
        //移除该状态异常的driver
        removeDriver(driver.id, DriverState.ERROR, None)
      }
    }
    logInfo(s"Telling app of lost worker: " + worker.id)
    //遍历未完成的应用列表
    apps.filterNot(completedApps.contains(_)).foreach { app =>
      //向提交该应用的driver发消息该worker已经被移除了
      app.driver.send(WorkerRemoved(worker.id, worker.host, msg))
    }
    //从持久化引擎中移除该worker
    persistenceEngine.removeWorker(worker)
    //重新调度
    schedule()
  }
  

  //重新启动该driver
  private def relaunchDriver(driver: DriverInfo): Unit = {
    // We must setup a new driver with a new driver id here, because the original driver may
    // be still running. Consider this scenario: a worker is network partitioned with master,
    // the master then relaunches driver driverID1 with a driver id driverID2, then the worker
    // reconnects to master. From this point on, if driverID2 is equal to driverID1, then master
    // can not distinguish the statusUpdate of the original driver and the newly relaunched one,
    // for example, when DriverStateChanged(driverID1, KILLED) arrives at master, master will
    // remove driverID1, so the newly relaunched driver disappears too. See SPARK-19900 for details.
    //我们必须在此处设置一个具有新driver id的新driver,因为原始driver可能仍在运行。
    //考虑这个场景:工作进程与主进程进行网络分区,然后主进程使用驱动程序id driverID2重新启动驱动程序driverID1,
    //然后工作进程重新连接到主进程。从这一点开始,如果driverID2等于driverID1,
    //则master无法区分原始驱动程序和新重新启动的驱动程序的状态Update,
    //例如,当DriverStateChanged(driverID 1,KILLED)到达master时,master会删除driverIDl,
    //因此新重新启动的驱动程序也会消失。有关详细信息,请参阅SPARK-19900。
    //移除driver
    removeDriver(driver.id, DriverState.RELAUNCHING, None)
    //新建driver
    val newDriver = createDriver(driver.desc)
    //在持久化引擎中添加该driver
    persistenceEngine.addDriver(newDriver)
    //在drivers列表中添加driver
    drivers.add(newDriver)
    //将该driver添加到等待列表中
    waitingDrivers += newDriver
    //重新调度driver
    schedule()
  }

  //创建应用
  private def createApplication(desc: ApplicationDescription, driver: RpcEndpointRef):
      ApplicationInfo = {
    val now = System.currentTimeMillis()
    val date = new Date(now)
    val appId = newApplicationId(date)
    new ApplicationInfo(now, appId, desc, date, driver, defaultCores)
  }
  
  //注册应用
  private def registerApplication(app: ApplicationInfo): Unit = {
    //初始化应用地址
    val appAddress = app.driver.address
    //如果应用地址列表中包含该地址
    if (addressToApp.contains(appAddress)) {
      //提示在同一个地址上尝试重新注册应用
      logInfo("Attempted to re-register application at same address: " + appAddress)
      return
    }
    //在应用metris系统中注册应用
    applicationMetricsSystem.registerSource(app.appSource)
    //在apps应用列表中加入该应用
    apps += app
    //添加应用id
    idToApp(app.id) = app
    //添加应用程序的端点
    endpointToApp(app.driver) = app
    //添加应用程序的地址
    addressToApp(appAddress) = app
    //将该应用添加到等待执行的应用列表中
    waitingApps += app
  }
  
  //结束应用
  private def finishApplication(app: ApplicationInfo): Unit = {
    //移除应用
    removeApplication(app, ApplicationState.FINISHED)
  }
  
  //移除应用
  def removeApplication(app: ApplicationInfo, state: ApplicationState.Value): Unit = {
    //如果应用列表中包含该应用
    if (apps.contains(app)) {
      //提示正在移除该应用
      logInfo("Removing app " + app.id)
      //应用列表中去掉该应用
      apps -= app
      //移除该应用id
      idToApp -= app.id
      //移除提交该应用的driver
      endpointToApp -= app.driver
      //移除提交该应用的driver的地址
      addressToApp -= app.driver.address
      
      //如果已经完成的应用数量大于保留未执行的应用数量
      if (completedApps.size >= retainedApplications) {
        //待移除的应用数量
        val toRemove = math.max(retainedApplications / 10, 1)
        //在已经完成的应用列表中选取指定数量的应用
        completedApps.take(toRemove).foreach { a =>
          //在应用metris系统中移除该应用
          applicationMetricsSystem.removeSource(a.appSource)
        }
        //已经完成的应用列表中移除前toRemove个
        completedApps.trimStart(toRemove)
      }
      //将该应用添加到已经完成的应用列表中
      completedApps += app // Remember it in our history
      //在应用等待列表中去掉该应用
      waitingApps -= app
      
      //遍历该应用下的所有executor
      for (exec <- app.executors.values) {
        //杀死executor
        killExecutor(exec)
      }
      //标记应用的状态
      app.markFinished(state)
      //如果应用的状态不是已经结束
      if (state != ApplicationState.FINISHED) {
        //向提交该应用的driver发消息该应用已经被移除了
        app.driver.send(ApplicationRemoved(state.toString))
      }
      //从持久化引擎中移除该应用
      persistenceEngine.removeApplication(app)
      //重新调度
      schedule()

      // Tell all workers that the application has finished, so they can clean up any app state.
      //告诉所有worker应用程序已完成,以便他们可以清除任何应用程序状态。
      workers.foreach { w =>
        //向worker发消息应用已经结束
        w.endpoint.send(ApplicationFinished(app.id))
      }
    }
  }

  /**
   * Handle a request to set the target number of executors for this application.
   *
   * If the executor limit is adjusted upwards, new executors will be launched provided
   * that there are workers with sufficient resources. If it is adjusted downwards, however,
   * we do not kill existing executors until we explicitly receive a kill request.
   *
   * @return whether the application has previously registered with this Master.
   */
   //处理设置此应用程序的executor目标数量的请求。
   //如果executor限额上调,只要有足够资源的worker,就会推出新的executor。
   //然而,如果它向下调整,我们不会杀死现有的executor,直到我们明确地收到一个杀死请求。
  private def handleRequestExecutors(appId: String, requestedTotal: Int): Boolean = {
    //通过id找到应用
    idToApp.get(appId) match {
      //匹配应用
      case Some(appInfo) =>
        //提示应用程序请求将executor总数设置为$requestedTotal。
        logInfo(s"Application $appId requested to set total executors to $requestedTotal.")
        //应用的executor总数设置为requestedTotal
        appInfo.executorLimit = requestedTotal
        //重新调度
        schedule()
        //返回true
        true
      //如果没有匹配到
      case None =>
        //提示未知应用程序请求了$requestedTotal个执行程序。
        logWarning(s"Unknown application $appId requested $requestedTotal total executors.")
        false
    }
  }

  /**
   * Handle a kill request from the given application.
   *
   * This method assumes the executor limit has already been adjusted downwards through
   * a separate [[RequestExecutors]] message, such that we do not launch new executors
   * immediately after the old ones are removed.
   *
   * @return whether the application has previously registered with this Master.
   */
   //处理来自给定应用程序的终止请求。
   //该方法假设executor限制已经通过单独的[[RequestExecutions]]消息向下调整,
   //这样我们就不会在删除旧的executor后立即启动新的executor。
  private def handleKillExecutors(appId: String, executorIds: Seq[Int]): Boolean = {
    //通过id找到应用
    idToApp.get(appId) match {
      //匹配应用
      case Some(appInfo) =>
        //提示应用程序请求终止执行程序
        logInfo(s"Application $appId requests to kill executors: " + executorIds.mkString(", "))
        //将该应用的executor分类
        val (known, unknown) = executorIds.partition(appInfo.executors.contains)
        //已知的executor遍历
        known.foreach { executorId =>
          //获取excutor信息
          val desc = appInfo.executors(executorId)
          //应用中移除executor
          appInfo.removeExecutor(desc)
          //杀死该executor
          killExecutor(desc)
        }
        //如果未知的executor存在
        if (unknown.nonEmpty) {
          //提示 应用尝试杀死不存在的executor
          logWarning(s"Application $appId attempted to kill non-existent executors: "
            + unknown.mkString(", "))
        }
        //重新调度
        schedule()
        true
      //如果没有匹配到
      case None =>
        //提示 没有注册的应用请求杀死executor
        logWarning(s"Unregistered application $appId requested us to kill executors!")
        false
    }
  }

  /**
   * Cast the given executor IDs to integers and filter out the ones that fail.
   *
   * All executors IDs should be integers since we launched these executors. However,
   * the kill interface on the driver side accepts arbitrary strings, so we need to
   * handle non-integer executor IDs just to be safe.
   */
   //将给定的executorID强制转换为整数,并过滤掉失败的ID。
   //所有executorID都应该是整数,因为我们启动了这些executor。
   //然而,驱动程序端的kill接口接受任意字符串,因此为了安全起见,我们需要处理非整数执行器ID。
  private def formatExecutorIds(executorIds: Seq[String]): Seq[Int] = {
    //executor遍历
    executorIds.flatMap { executorId =>
      try {
        //转化为Int类型
        Some(executorId.toInt)
      } catch {
        case e: NumberFormatException =>
          //提示 遇到具有非整数ID的executor:$executorId。忽略
          logError(s"Encountered executor with a non-integer ID: $executorId. Ignoring")
          None
      }
    }
  }

  /**
   * Ask the worker on which the specified executor is launched to kill the executor.
   */
  //询问启动指定executor的worker以终止该executor。
  private def killExecutor(exec: ExecutorDesc): Unit = {
    //worker移除该executor
    exec.worker.removeExecutor(exec)
    //向worker发消息杀死executor
    exec.worker.endpoint.send(KillExecutor(masterUrl, exec.application.id, exec.id))
    //设置executor的状态是被杀死
    exec.state = ExecutorState.KILLED
  }

  /** Generate a new app ID given an app's submission date */
  private def newApplicationId(submitDate: Date): String = {
    val appId = "app-%s-%04d".format(createDateFormat.format(submitDate), nextAppNumber)
    nextAppNumber += 1
    appId
  }

  /** Check for, and remove, any timed-out workers */
  //检查并移除任何超时worker
  private def timeOutDeadWorkers(): Unit = {
    // Copy the workers into an array so we don't modify the hashset while iterating through it
    //将worker复制到一个数组中,这样我们在迭代时就不会修改哈希集
    //获取当前系统时间
    val currentTime = System.currentTimeMillis()
    //过滤出上一次心跳时间小于当前时间减去超时时间的差值
    val toRemove = workers.filter(_.lastHeartbeat < currentTime - workerTimeoutMs).toArray
    //遍历需要移除的worker
    for (worker <- toRemove) {
      //如果worker的状态不是死亡
      if (worker.state != WorkerState.DEAD) {
        //获取Worker超时时间
        val workerTimeoutSecs = TimeUnit.MILLISECONDS.toSeconds(workerTimeoutMs)
        //提示移除worker 因为未在指定时间内收到心跳
        logWarning("Removing %s because we got no heartbeat in %d seconds".format(
          worker.id, workerTimeoutSecs))
        //移除worker
        removeWorker(worker, s"Not receiving heartbeat for $workerTimeoutSecs seconds")
      } else {
        if (worker.lastHeartbeat < currentTime - ((reaperIterations + 1) * workerTimeoutMs)) {
          workers -= worker // we've seen this DEAD worker in the UI, etc. for long enough; cull it
                            //我们已经在UI等中看到这个DEAD worker足够长的时间了;剔除它
        }
      }
    }
  }

  //生成新的driver id
  private def newDriverId(submitDate: Date): String = {
    val appId = "driver-%s-%04d".format(createDateFormat.format(submitDate), nextDriverNumber)
    //下一个被执行的driver的序号
    nextDriverNumber += 1
    appId
  }
  
  //创建新的driver
  private def createDriver(desc: DriverDescription): DriverInfo = {
    //获取系统时间
    val now = System.currentTimeMillis()
    //获取日期
    val date = new Date(now)
    //初始化driver信息
    new DriverInfo(now, newDriverId(date), desc, date)
  }
  
  //启动driver
  private def launchDriver(worker: WorkerInfo, driver: DriverInfo): Unit = {
    //打印 正在worker上启动driver
    logInfo("Launching driver " + driver.id + " on worker " + worker.id)
    //worker中添加driver
    worker.addDriver(driver)
    //设置driver的worker是该worker
    driver.worker = Some(worker)
    //向该worker发消息启动driver
    worker.endpoint.send(LaunchDriver(driver.id, driver.desc, driver.resources))
    //设置driver的状态是运行中
    driver.state = DriverState.RUNNING
  }
  
  //移除driver
  private def removeDriver(
      driverId: String,
      finalState: DriverState,
      exception: Option[Exception]): Unit = {
    //在drivers列表中找到需要移除的driver
    drivers.find(d => d.id == driverId) match {
      case Some(driver) =>
        logInfo(s"Removing driver: $driverId")
        //移除driver
        drivers -= driver
        //如果已经完成的driver 即已经完成的应用 多余还未完成的driver
        if (completedDrivers.size >= retainedDrivers) {
          //需要移除的driver数量
          val toRemove = math.max(retainedDrivers / 10, 1)
          //在已经完成的driver中从前往后开始移除
          completedDrivers.trimStart(toRemove)
        }
        //将该driver加到完成的driver列表
        completedDrivers += driver
        //持久化状态中移除该driver
        persistenceEngine.removeDriver(driver)
        //设置该driver的状态是完成状态
        driver.state = finalState
        //设置driver的异常
        driver.exception = exception
        //遍历该driver所对应的worker并在worker中移除该driver
        driver.worker.foreach(w => w.removeDriver(driver))
        //调度任务
        schedule()
      case None =>
        logWarning(s"Asked to remove unknown driver: $driverId")
    }
  }
}

//Master对象
private[deploy] object Master extends Logging {
  val SYSTEM_NAME = "sparkMaster"
  val ENDPOINT_NAME = "Master"

  def main(argStrings: Array[String]): Unit = {
    Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler(
      exitOnUncaughtException = false))
    Utils.initDaemon(log)
    val conf = new SparkConf
    val args = new MasterArguments(argStrings, conf)
    //启动环境和端点
    val (rpcEnv, _, _) = startRpcEnvAndEndpoint(args.host, args.port, args.webUiPort, conf)
    rpcEnv.awaitTermination()
  }

  /**
   * Start the Master and return a three tuple of:
   *   (1) The Master RpcEnv
   *   (2) The web UI bound port
   *   (3) The REST server bound port, if any
   */
  def startRpcEnvAndEndpoint(
      host: String,
      port: Int,
      webUiPort: Int,
      conf: SparkConf): (RpcEnv, Int, Option[Int]) = {
    //初始化安全管理器
    val securityMgr = new SecurityManager(conf)
    //初始化rpcenv环境
    val rpcEnv = RpcEnv.create(SYSTEM_NAME, host, port, conf, securityMgr)
    //初始化master
    val masterEndpoint = rpcEnv.setupEndpoint(ENDPOINT_NAME,
      new Master(rpcEnv, rpcEnv.address, webUiPort, securityMgr, conf))
    //端口请求响应
    val portsResponse = masterEndpoint.askSync[BoundPortsResponse](BoundPortsRequest)
    (rpcEnv, portsResponse.webUIPort, portsResponse.restPort)
  }
}

上一节我们读到master给driver发回复消息应用已经注册成功,接下来开始调度资源schedule():

//注册应用
    case RegisterApplication(description, driver) =>
      // TODO Prevent repeated registrations from some driver
      //如果是备用的Master 则不做处理
      if (state == RecoveryState.STANDBY) {
        // ignore, don't send response
      } else {
        logInfo("Registering app " + description.name)
        //创建应用
        val app = createApplication(description, driver)
        //注册应用
        registerApplication(app)
        logInfo("Registered app " + description.name + " with ID " + app.id)
        //持久化引擎中加入该应用
        persistenceEngine.addApplication(app)
        //向提交给该应用的driver发消息应用注册
        driver.send(RegisteredApplication(app.id, self))
        //开始调度
        schedule()
      }

进一步看一下schedule()是怎么调度资源的:

/**
   * Schedule the currently available resources among waiting apps. This method will be called
   * every time a new app joins or resource availability changes.
   */
   //在等待的应用程序中安排当前可用的资源。每当有新应用加入或资源可用性发生变化时,都会调用此方法。
  private def schedule(): Unit = {
    //如果状态不是存活 则返回
    if (state != RecoveryState.ALIVE) {
      return
    }
    // Drivers take strict precedence over executors
    //driver优先于executor
    //Random.shuffle--打乱组列表中元素的位置
    //打乱worker信息列表中存活的worker的位置
    val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE))
    //获取存活的worker的数量
    val numWorkersAlive = shuffledAliveWorkers.size
    //当前位置0
    var curPos = 0
    //遍历等待中的driver列表
    for (driver <- waitingDrivers.toList) { // iterate over a copy of waitingDrivers
      // We assign workers to each waiting driver in a round-robin fashion. For each driver, we
      // start from the last worker that was assigned a driver, and continue onwards until we have
      // explored all alive workers.
      //迭代遍历waitingDrivers的副本。我们以循环的方式为每个等候的driver分配worker。
      //对于每个driver,我们从最后一个被分配driver的worker开始,然后继续,直到我们探索了所有活着的worker。

      //设置启动状态为false
      var launched = false

      //集群状态空闲状态 true
      var isClusterIdle = true

      //可访问的worker数量
      var numWorkersVisited = 0

      //当可访问的worker数量少于存活的worker数量并且deriver状态是未启动
      while (numWorkersVisited < numWorkersAlive && !launched) {
        //取打乱位置的组中curpos位置的worker
        val worker = shuffledAliveWorkers(curPos)
        //如果该worker的driver为空且该worker的exxecutor是空 则设置集群状态为空闲
        isClusterIdle = worker.drivers.isEmpty && worker.executors.isEmpty
        //可访问的worker数量加1
        numWorkersVisited += 1
        //判断如果worker能启动driver
        if (canLaunchDriver(worker, driver.desc)) {
          //分配给workerd的资源
          val allocated = worker.acquireResources(driver.desc.resourceReqs)
          //设置driver的资源
          driver.withResources(allocated)
          //在worker上启动driver
          launchDriver(worker, driver)
          //等待的driver列表中去掉已经启动的driver
          waitingDrivers -= driver
          //设置该driver的启动状态为true
          launched = true
        }
        //指标加1
        curPos = (curPos + 1) % numWorkersAlive
      }
      //如果没有启动且集群还是空闲
      if (!launched && isClusterIdle) {
        //打印消息 driver需要比任何worker更多的资源
        logWarning(s"Driver ${driver.id} requires more resource than any of Workers could have.")
      }
    }
    //启动worker上的executor
    startExecutorsOnWorkers()
  }

schedule是每次可用资源发生变化时或者有新应用提交时都会被调用,首先会检查当前master的状态是不是存活,如果不是则直接返回,接下来将workers(保存所有的worker的信息)中存活的worker位置随机打乱,(不太清楚为啥这么做),获取可用的worker数量,遍历提交应用等待执行的driver列表waitingDrivers为driver分配worker,每次都遍历可用的worker列表,如果判断某个worker的资源能够满足启动该driver提交的应用则将该worker分配给该driver,最后在worker上启动Executor--startExecutorsOnWorkers:

/**
   * Schedule and launch executors on workers
   */
   //在worker中安排和启动executor
  private def startExecutorsOnWorkers(): Unit = {
    // Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
    // in the queue, then the second app, etc.
    //现在这是一个非常简单的FIFO调度器。我们一直在努力适应队列中的第一个应用程序,然后是第二个应用程序等。
    //遍历等待执行的应用列表
    for (app <- waitingApps) {
      //获取给定的executor数 默认1个
      val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(1)
      // If the cores left is less than the coresPerExecutor,the cores left will not be allocated
      //如果剩余的内核小于coresPerExecutor,则不会分配剩余的内核
      if (app.coresLeft >= coresPerExecutor) {
        // Filter out workers that don't have enough resources to launch an executor
        //筛选出没有足够资源启动executor的worker
        //找出符合条件的worker 即资源满足条件的
        val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
          .filter(canLaunchExecutor(_, app.desc))
          .sortBy(_.coresFree).reverse
        //应用程序是否可能挂起 等待应用只有一个且唯一的应用的executor是空的且可用的worker是空的
        val appMayHang = waitingApps.length == 1 &&
          waitingApps.head.executors.isEmpty && usableWorkers.isEmpty
        //如果应用可能挂起
        if (appMayHang) {
          //提示应用程序需要的资源超过任何一个worker拥有的
          logWarning(s"App ${app.id} requires more resource than any of Workers could have.")
        }

        //worker分配给该应用程序的核数
        val assignedCores = scheduleExecutorsOnWorkers(app, usableWorkers, spreadOutApps)

        // Now that we've decided how many cores to allocate on each worker, let's allocate them
        //现在我们已经决定给每个worker分配多少core了,接下来开始分配
        for (pos <- 0 until usableWorkers.length if assignedCores(pos) > 0) {
          //将worker的资源分配给executor
          allocateWorkerResourceToExecutors(
            app, assignedCores(pos), app.desc.coresPerExecutor, usableWorkers(pos))
        }
      }
    }
  }

对于提交的应用来说应用的执行顺序是先来先执行,遍历等待应用列表waitingApps,获取给定的Executor数,如果剩余的内核数小于给定的内核数则不会分配剩余的内核,找出符合条件的worker,然后算出满足条件的worker应该分配给该应用的核数,最后就是将worker的资源下发给Executor。scheduleExecutorsOnWorkers负责计算出worker给应用的核数:

//计划executor将启动到worker身上。返回一个数组,该数组包含分配给每个worker的核数。

  //有两种启动执行器的模式。第一种方法试图将应用程序的executor分配给尽可能多的worker,
  //而第二种方法则相反(即在尽可能少的worker上启动它们)。
  //前者通常更适合用于数据本地化,并且是默认值。
  
  //分配给每个executor的内核数量是可配置的。
  //如果显式设置了这一点,则如果同一个worker具有足够的核心和内存,则可以在该worker上启动来自同一应用程序的多个executor。
  //否则,默认情况下,每个executor都会获取worker上可用的所有核心,
  //在这种情况下,在一次调度迭代期间,每个应用程序只能在每个worker上启动一个executor。
  //请注意,当未设置“spark.executor.cores”时,我们仍然可以在同一worker上从同一应用程序启动多个executor。
  //假设appA和appB都有一个executor在worker1上运行,并且appA.coresLeft>0,那么appB就完成了,
  //并释放了worker1上的所有核心,因此在下一次计划迭代中,appA启动了一个新的executor,
  //该executor会获取worker1中的所有空闲核心,因此我们从运行在worker1的appA中获得多个executor。

  //一次在每个工作线程上分配coresPerExecutor是很重要的(而不是一次分配一个core)。
  //考虑以下示例:集群有4个worker,每个worker有16个核心。
  //用户请求3个executor(spark.cores.max=48,spark.executor.cores=16)。
  //如果一次分配1个核心,则每个worker的12个核心将被分配给每个executor。由于12<16,没有executor会启动[SPARK-8881]。
  
  //在worker上启动executor
  private def scheduleExecutorsOnWorkers(
      app: ApplicationInfo,
      //可用的worker
      usableWorkers: Array[WorkerInfo],
      //是否展开应用程序
      spreadOutApps: Boolean): Array[Int] = {
    //获取每个executor分配的core
    val coresPerExecutor = app.desc.coresPerExecutor
    //每个executor最少需要的核数
    val minCoresPerExecutor = coresPerExecutor.getOrElse(1)
    //每个worker上是否有一个executor
    val oneExecutorPerWorker = coresPerExecutor.isEmpty
    //每个executor需要的内存
    val memoryPerExecutor = app.desc.memoryPerExecutorMB
    //每个executor需要的资源
    val resourceReqsPerExecutor = app.desc.resourceReqsPerExecutor
    //可用的worker数
    val numUsable = usableWorkers.length
    //worker已经分配的核数
    val assignedCores = new Array[Int](numUsable) // Number of cores to give to each worker
    //每个worker上已经分配的executor数
    val assignedExecutors = new Array[Int](numUsable) // Number of new executors on each worker
    //需要分配的核数 取应用剩下的和可用worker空闲的之中小的
    var coresToAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)

    /** Return whether the specified worker can launch an executor for this app. */
    //返回指定的worker是否可以启动此应用程序的executor。
    //判断worker能否为该应用启动executor
    def canLaunchExecutorForApp(pos: Int): Boolean = {
      //判断能否调度 如果需要分配的核数大于每个executor最少拥有的核数 则为true
      val keepScheduling = coresToAssign >= minCoresPerExecutor
      //判断是否有足够的核数 如果pos位置的worker有的空闲核数减去已经分配的核数大于每个executor最少需要的核数 则为true
      val enoughCores = usableWorkers(pos).coresFree - assignedCores(pos) >= minCoresPerExecutor
      //获取worker上已经分配的executor数
      val assignedExecutorNum = assignedExecutors(pos)

      // If we allow multiple executors per worker, then we can always launch new executors.
      // Otherwise, if there is already an executor on this worker, just give it more cores.
      //如果我们允许每个worker有多个executor,那么我们总是可以启动新的executor。
      //否则,如果这个worker上已经有一个executor,只需给它更多的core。
      //如果worker上的executor数不止一个且已经分配的executor为0 则为true
      val launchingNewExecutor = !oneExecutorPerWorker || assignedExecutorNum == 0
      if (launchingNewExecutor) {
        //已经分配的内存 已经分配的executor乘以每个executor分配的内存
        val assignedMemory = assignedExecutorNum * memoryPerExecutor
        //判断是否有足够的内存 worker可用的空闲内存减去已经分配的内存大于每个executor需要的内存
        val enoughMemory = usableWorkers(pos).memoryFree - assignedMemory >= memoryPerExecutor
        //已经分配的资源
        val assignedResources = resourceReqsPerExecutor.map {
          req => req.resourceName -> req.amount * assignedExecutorNum
        }.toMap
        //空闲资源
        val resourcesFree = usableWorkers(pos).resourcesAmountFree.map {

          case (rName, free) => rName -> (free - assignedResources.getOrElse(rName, 0))
        }
        //判断资源是否满足要求 如果可用资源多于需要的资源
        val enoughResources = ResourceUtils.resourcesMeetRequirements(
          resourcesFree, resourceReqsPerExecutor)
        //判断是否低于限制 如果已经分配的executor加上应用目前已有的executor数小于应用executor限制数 则为true
        val underLimit = assignedExecutors.sum + app.executors.size < app.executorLimit
        //只有当各种资源都符合要求时才会返回true
        keepScheduling && enoughCores && enoughMemory && enoughResources && underLimit
      } else {
        // We're adding cores to an existing executor, so no need
        // to check memory and executor limits
        //我们正在向现有的executor添加核心,因此无需检查内存和执行器限制
        //只需要有足够的核数并且核数多于executor最小需求数
        keepScheduling && enoughCores
      }
    }

    // Keep launching executors until no more workers can accommodate any
    // more executors, or if we have reached this application's limits
    //继续启动executor,直到没有更多的worker可以容纳任何更多的executor,或者如果我们已经达到了申请的限制
    //获取可用的worker
    var freeWorkers = (0 until numUsable).filter(canLaunchExecutorForApp)
    //当有可用的worker时
    while (freeWorkers.nonEmpty) {
      //遍历每个可用的worker
      freeWorkers.foreach { pos =>
        //初始化默认可以继续调度
        var keepScheduling = true
        //当可以继续调度并且可以为应用启动executor
        while (keepScheduling && canLaunchExecutorForApp(pos)) {
          //需要分配的核数中减去executor启动需要的最少核数
          coresToAssign -= minCoresPerExecutor
          //已经分配的核数中加上executor启动需要的最少核数
          assignedCores(pos) += minCoresPerExecutor

          // If we are launching one executor per worker, then every iteration assigns 1 core
          // to the executor. Otherwise, every iteration assigns cores to a new executor.
          //如果我们为每个worker启动一个executor,那么每次迭代都会为executor分配一个核心。否则,每次迭代都会将核心分配给一个新的executor。
          //判断worker上是否有启动的executor
          if (oneExecutorPerWorker) {
            //没有则初始化为1 表示有一个
            assignedExecutors(pos) = 1
          } else {
            //有就加一个
            assignedExecutors(pos) += 1
          }

          // Spreading out an application means spreading out its executors across as
          // many workers as possible. If we are not spreading out, then we should keep
          // scheduling executors on this worker until we use all of its resources.
          // Otherwise, just move on to the next worker.
          //分散应用程序意味着将其executor分散为尽可能多的worker。
          //如果我们不分散,那么我们应该继续为这个worker安排executor,直到我们使用了它的所有资源。否则,请转到下一个worker。
          //如果开启了分散 则不继续调度起executor 而是将executor分配到其他worker上
          if (spreadOutApps) {
            //不分散应用程序
            keepScheduling = false
          }
        }
      }
      //更新可用的worker 在可用的worker中去掉启用的
      freeWorkers = freeWorkers.filter(canLaunchExecutorForApp)
    }
    //返回已经分配的核数
    assignedCores
  }

需要注意参数spreadOutApps,表示是否展开应用,如果确定展开则将应用分配到多个worker中,否则尽量在一个worker中多分配几个executor,该函数中包含的方法canLaunchExecutorForApp主要是用来判断worker能否为该应用启动executor,接着遍历所有可用的worker,如果可以继续调度资源并且可以为该应用启动executor时,每次为该应用加一个核,直到满足应用要求。

allocateWorkerResourceToExecutors负责将资源分给Executor:

//分配worker的资源给一个或者多个executor
   //app执行者所属的应用程序的信息
   //assignedCores表示此工作线程上用于此应用程序的核心数
   //coresPerExecutor表示每个executor的核数
   //worker表示worker的信息
  private def allocateWorkerResourceToExecutors(
      app: ApplicationInfo,
      assignedCores: Int,
      coresPerExecutor: Option[Int],
      worker: WorkerInfo): Unit = {
    // If the number of cores per executor is specified, we divide the cores assigned
    // to this worker evenly among the executors with no remainder.
    // Otherwise, we launch a single executor that grabs all the assignedCores on this worker.
    //每个executor的内核数如果指定了
    //我们将分配给该worker的内核在executor中平均分配,没有余数。
    //否则,我们将启动一个单独的executor来获取该worker上所有的assignedCores。
    //将要分配的核数除以execotor数 即将core平均分给每个executor
    val numExecutors = coresPerExecutor.map { assignedCores / _ }.getOrElse(1)
    //将要分配的核数 优先获取指定的值 获取不到再获取平均核数
    val coresToAssign = coresPerExecutor.getOrElse(assignedCores)
    //遍历每个executor
    for (i <- 1 to numExecutors) {
      //获取worker分配给该应用程序的资源
      val allocated = worker.acquireResources(app.desc.resourceReqsPerExecutor)
      //在应用中加入executor
      val exec = app.addExecutor(worker, coresToAssign, allocated)
      //启动worker上的executor
      launchExecutor(worker, exec)
      //设置应用程序的状态是运行中
      app.state = ApplicationState.RUNNING
    }
  }

worker给executor分配core时是平均分配,如果是指定了需要的核数则优先按照指定的核数分配,遍历每个executor的时候将executor加入到应用程序资源中,然后启动executor,最后设置应用处于运行状态,接下来看一下executor是怎么启动的:

  //在worker上启动executor
  private def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc): Unit = {
    //提示 在worker上启动executor
    logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
    //在worker上加入executor
    worker.addExecutor(exec)
    //向worker发消息启动了新的executor
    worker.endpoint.send(LaunchExecutor(masterUrl, exec.application.id, exec.id,
      exec.application.desc, exec.cores, exec.memory, exec.resources))
    //向提交该应用的driver发消息executor已经添加
    exec.application.driver.send(
      ExecutorAdded(exec.id, worker.id, worker.hostPort, exec.cores, exec.memory))
  }

先在worker上加入该executor,然后向worker发送启动executor的消息--LaunchExecutor,然后给driver发消息exexutor已经加入的消息--ExecutorAdded。下一节我们继续看一下worker收到消息怎么处理。