开启掘金成长之旅!这是我参与「掘金日新计划 · 12 月更文挑战」的第32天,点击查看活动详情
5.2 YARN源码解析
job.waitForCompletion submit() submitter.submitJobInternal writeSplits writeConf submitJob createApplicationSubmissionContext submitApplication setupAMCommand /bin/java + 一堆参数+ APPLICATION_MASTER_CLASS( org.apache.hadoop.mapreduce.v2.ap p.MRAppMaster) rmClient.submitApplica tion ClientRMService.java submitApplication 将任务提交到任务队列,开 始执行MRAppMaster任务 启动MRAppMaster.java 任务main() initAndStartAppMaster appMaster.init appMaster.start serviceInit dispatcher = createDispatcher() serviceStart startJobs dispatcher.getEven tHandler().handle( startJobEvent) GenericEventHandler eventQueue.put(event) YarnChild.java main taskFinal.run maptask.run runNewMapper mapper.run reducetask.run runNewReducer reducer.run
5.3 Yarn客户端向RM提交作业
1)在wordcount程序的驱动类中点击 Job.java
boolean result = job.waitForCompletion(true);
public boolean waitForCompletion(boolean verbose
) throws IOException, InterruptedException,
ClassNotFoundException {
if (state == JobState.DEFINE) {
submit();
}
if (verbose) {
monitorAndPrintJob();
} else {
// get the completion poll interval from the client.
int completionPollIntervalMillis =
Job.getCompletionPollInterval(cluster.getConf());
while (!isComplete()) {
try {
Thread.sleep(completionPollIntervalMillis);
} catch (InterruptedException ie) {
}
}
}
return isSuccessful();
}
public void submit()
throws IOException, InterruptedException, ClassNotFoundException {
ensureState(JobState.DEFINE);
setUseNewAPI();
connect();
final JobSubmitter submitter =
getJobSubmitter(cluster.getFileSystem(), cluster.getClient());
status = ugi.doAs(new PrivilegedExceptionAction<JobStatus>() {
public JobStatus run() throws IOException, InterruptedException,
ClassNotFoundException {
return submitter.submitJobInternal(Job.this, cluster);
}
});
state = JobState.RUNNING;
LOG.info("The url to track the job: " + getTrackingURL());
}
点击submitJobInternal() JobSubmitter.java
JobStatus submitJobInternal(Job job, Cluster cluster)
throws ClassNotFoundException, InterruptedException, IOException {
... ...
status = submitClient.submitJob(
jobId, submitJobDir.toString(), job.getCredentials());
... ...
}
public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts) throws IOException, InterruptedException;
2)创建提交环境 ctrl + alt +B 查找submitJob实现类,YARNRunner.java
public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts)
throws IOException, InterruptedException {
addHistoryToken(ts);
// 创建提交环境:
ApplicationSubmissionContext appContext =
createApplicationSubmissionContext(conf, jobSubmitDir, ts);
// Submit to ResourceManager
try {
// 向RM提交一个应用程序,appContext里面封装了启动mrappMaster和运行container的命令
ApplicationId applicationId =
resMgrDelegate.submitApplication(appContext);
// 获取提交响应
ApplicationReport appMaster = resMgrDelegate
.getApplicationReport(applicationId);
String diagnostics =
(appMaster == null ?
"application report is null" : appMaster.getDiagnostics());
if (appMaster == null
|| appMaster.getYarnApplicationState() == YarnApplicationState.FAILED
|| appMaster.getYarnApplicationState() == YarnApplicationState.KILLED) {
throw new IOException("Failed to run job : " +
diagnostics);
}
return clientCache.getClient(jobId).getJobStatus(jobId);
} catch (YarnException e) {
throw new IOException(e);
}
}
public ApplicationSubmissionContext createApplicationSubmissionContext(
Configuration jobConf, String jobSubmitDir, Credentials ts)
throws IOException {
ApplicationId applicationId = resMgrDelegate.getApplicationId();
// Setup LocalResources
// 封装了本地资源相关路径
Map<String, LocalResource> localResources =
setupLocalResources(jobConf, jobSubmitDir);
// Setup security tokens
DataOutputBuffer dob = new DataOutputBuffer();
ts.writeTokenStorageToStream(dob);
ByteBuffer securityTokens =
ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
// Setup ContainerLaunchContext for AM container
// 封装了启动mrappMaster和运行container的命令
List<String> vargs = setupAMCommand(jobConf);
ContainerLaunchContext amContainer = setupContainerLaunchContextForAM(
jobConf, localResources, securityTokens, vargs);
... ...
return appContext;
}
private List<String> setupAMCommand(Configuration jobConf) {
List<String> vargs = new ArrayList<>(8);
// Java进程启动命令开始
vargs.add(MRApps.crossPlatformifyMREnv(jobConf, Environment.JAVA_HOME)
+ "/bin/java");
Path amTmpDir =
new Path(MRApps.crossPlatformifyMREnv(conf, Environment.PWD),
YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR);
vargs.add("-Djava.io.tmpdir=" + amTmpDir);
MRApps.addLog4jSystemProperties(null, vargs, conf);
// Check for Java Lib Path usage in MAP and REDUCE configs
warnForJavaLibPath(conf.get(MRJobConfig.MAP_JAVA_OPTS, ""),
"map",
MRJobConfig.MAP_JAVA_OPTS,
MRJobConfig.MAP_ENV);
warnForJavaLibPath(conf.get(MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS, ""),
"map",
MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS,
MRJobConfig.MAPRED_ADMIN_USER_ENV);
warnForJavaLibPath(conf.get(MRJobConfig.REDUCE_JAVA_OPTS, ""),
"reduce",
MRJobConfig.REDUCE_JAVA_OPTS,
MRJobConfig.REDUCE_ENV);
warnForJavaLibPath(conf.get(MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS, ""),
"reduce",
MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS,
MRJobConfig.MAPRED_ADMIN_USER_ENV);
// Add AM admin command opts before user command opts
// so that it can be overridden by user
String mrAppMasterAdminOptions = conf.get(MRJobConfig.MR_AM_ADMIN_COMMAND_OPTS,
MRJobConfig.DEFAULT_MR_AM_ADMIN_COMMAND_OPTS);
warnForJavaLibPath(mrAppMasterAdminOptions, "app master",
MRJobConfig.MR_AM_ADMIN_COMMAND_OPTS, MRJobConfig.MR_AM_ADMIN_USER_ENV);
vargs.add(mrAppMasterAdminOptions);
// Add AM user command opts 用户命令参数
String mrAppMasterUserOptions = conf.get(MRJobConfig.MR_AM_COMMAND_OPTS,
MRJobConfig.DEFAULT_MR_AM_COMMAND_OPTS);
warnForJavaLibPath(mrAppMasterUserOptions, "app master",
MRJobConfig.MR_AM_COMMAND_OPTS, MRJobConfig.MR_AM_ENV);
vargs.add(mrAppMasterUserOptions);
if (jobConf.getBoolean(MRJobConfig.MR_AM_PROFILE,
MRJobConfig.DEFAULT_MR_AM_PROFILE)) {
final String profileParams = jobConf.get(MRJobConfig.MR_AM_PROFILE_PARAMS,
MRJobConfig.DEFAULT_TASK_PROFILE_PARAMS);
if (profileParams != null) {
vargs.add(String.format(profileParams,
ApplicationConstants.LOG_DIR_EXPANSION_VAR + Path.SEPARATOR
+ TaskLog.LogName.PROFILE));
}
}
// 封装了要启动的mrappmaster全类名
// org.apache.hadoop.mapreduce.v2.app.MRAppMaster
vargs.add(MRJobConfig.APPLICATION_MASTER_CLASS);
vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR +
Path.SEPARATOR + ApplicationConstants.STDOUT);
vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR +
Path.SEPARATOR + ApplicationConstants.STDERR);
return vargs;
}
3)向Yarn提交 点击submitJob方法中的submitApplication() YARNRunner.java
ApplicationId applicationId =
resMgrDelegate.submitApplication(appContext);
public ApplicationId
submitApplication(ApplicationSubmissionContext appContext)
throws YarnException, IOException {
return client.submitApplication(appContext);
}
ctrl + alt +B 查找submitApplication实现类,YarnClientImpl.java
public ApplicationId
submitApplication(ApplicationSubmissionContext appContext)
throws YarnException, IOException {
ApplicationId applicationId = appContext.getApplicationId();
if (applicationId == null) {
throw new ApplicationIdNotProvidedException(
"ApplicationId is not provided in ApplicationSubmissionContext");
}
// 创建一个提交请求
SubmitApplicationRequest request =
Records.newRecord(SubmitApplicationRequest.class);
request.setApplicationSubmissionContext(appContext);
... ...
//TODO: YARN-1763:Handle RM failovers during the submitApplication call.
// 继续提交,实现类是ApplicationClientProtocolPBClientImpl
rmClient.submitApplication(request);
int pollCount = 0;
long startTime = System.currentTimeMillis();
EnumSet<YarnApplicationState> waitingStates =
EnumSet.of(YarnApplicationState.NEW,
YarnApplicationState.NEW_SAVING,
YarnApplicationState.SUBMITTED);
EnumSet<YarnApplicationState> failToSubmitStates =
EnumSet.of(YarnApplicationState.FAILED,
YarnApplicationState.KILLED);
while (true) {
try {
// 获取提交给Yarn的反馈
ApplicationReport appReport = getApplicationReport(applicationId);
YarnApplicationState state = appReport.getYarnApplicationState();
... ...
} catch (ApplicationNotFoundException ex) {
// FailOver or RM restart happens before RMStateStore saves
// ApplicationState
LOG.info("Re-submit application " + applicationId + "with the " +
"same ApplicationSubmissionContext");
// 如果提交失败,则再次提交
rmClient.submitApplication(request);
}
}
return applicationId;
}
ctrl + alt +B 查找submitApplication实现类,ClientRMService.java
public SubmitApplicationResponse submitApplication(
SubmitApplicationRequest request) throws YarnException, IOException {
ApplicationSubmissionContext submissionContext = request
.getApplicationSubmissionContext();
ApplicationId applicationId = submissionContext.getApplicationId();
CallerContext callerContext = CallerContext.getCurrent();
... ...
try {
// call RMAppManager to submit application directly
rmAppManager.submitApplication(submissionContext,
System.currentTimeMillis(), user);
LOG.info("Application with id " + applicationId.getId() +
" submitted by user " + user);
RMAuditLogger.logSuccess(user, AuditConstants.SUBMIT_APP_REQUEST,
"ClientRMService", applicationId, callerContext,
submissionContext.getQueue());
} catch (YarnException e) {
LOG.info("Exception in submitting " + applicationId, e);
RMAuditLogger.logFailure(user, AuditConstants.SUBMIT_APP_REQUEST,
e.getMessage(), "ClientRMService",
"Exception in submitting application", applicationId, callerContext,
submissionContext.getQueue());
throw e;
}
return recordFactory
.newRecordInstance(SubmitApplicationResponse.class);
}
5.4 RM启动MRAppMaster
0)在pom.xml中增加如下依赖
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-app</artifactId>
<version>3.1.3</version>
</dependency>
ctrl +n 查找MRAppMaster,搜索main方法
public static void main(String[] args) {
try {
... ...
// 初始化一个container
ContainerId containerId = ContainerId.fromString(containerIdStr);
ApplicationAttemptId applicationAttemptId =
containerId.getApplicationAttemptId();
if (applicationAttemptId != null) {
CallerContext.setCurrent(new CallerContext.Builder(
"mr_appmaster_" + applicationAttemptId.toString()).build());
}
long appSubmitTime = Long.parseLong(appSubmitTimeStr);
// 创建appMaster对象
MRAppMaster appMaster =
new MRAppMaster(applicationAttemptId, containerId, nodeHostString,
Integer.parseInt(nodePortString),
Integer.parseInt(nodeHttpPortString), appSubmitTime);
... ...
// 初始化并启动AppMaster
initAndStartAppMaster(appMaster, conf, jobUserName);
} catch (Throwable t) {
LOG.error("Error starting MRAppMaster", t);
ExitUtil.terminate(1, t);
}
}
protected static void initAndStartAppMaster(final MRAppMaster appMaster,
final JobConf conf, String jobUserName) throws IOException,
InterruptedException {
... ...
conf.getCredentials().addAll(credentials);
appMasterUgi.doAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws Exception {
// 初始化
appMaster.init(conf);
// 启动
appMaster.start();
if(appMaster.errorHappenedShutDown) {
throw new IOException("Was asked to shut down.");
}
return null;
}
});
}
public void init(Configuration conf) {
... ...
synchronized (stateChangeLock) {
if (enterState(STATE.INITED) != STATE.INITED) {
setConfig(conf);
try {
// 调用MRAppMaster中的serviceInit()方法
serviceInit(config);
if (isInState(STATE.INITED)) {
//if the service ended up here during init,
//notify the listeners
// 如果初始化完成,通知监听器
notifyListeners();
}
} catch (Exception e) {
noteFailure(e);
ServiceOperations.stopQuietly(LOG, this);
throw ServiceStateException.convert(e);
}
}
}
}
ctrl + alt +B 查找serviceInit实现类,MRAppMaster.java
protected void serviceInit(final Configuration conf) throws Exception {
... ...
// 创建提交路径
clientService = createClientService(context);
// 创建调度器
clientService.init(conf);
// 创建job提交RPC客户端
containerAllocator = createContainerAllocator(clientService, context);
... ...
}