接上篇,上一篇说到java提交flink有四种方式,第三种方式最适合做大数据平台,因为作业提交完毕可以立马拿到ClusterClient对象,通过此对象可以立马拿到ApplicationId,jobmanager的host和port,有了这些信息就可以很容易的通过flink restapi或yarnClient获取作业的状态信息,从而实现监控作业的目的,话不多说,今儿就实现第三种提交任务的方式: 在上一篇FlinkClient的基础上新增一个重载的方法:
public void startJobV1(Application app){
String flinkHome = app.getFlinkHome();
String flinkConf=flinkHome+"/conf";
Configuration flinkConfig = GlobalConfiguration.loadConfiguration(flinkConf);
List<CustomCommandLine> customCommandLines = CliFrontend.loadCustomCommandLines(flinkConfig, flinkConf);
setYarnConfig(flinkConfig);
String flinkDistJar = getFlinkDistJar(flinkHome);
if(flinkDistJar!=null&&!flinkDistJar.isEmpty()){
flinkConfig.setString(YarnConfigOptions.FLINK_DIST_JAR.key(),flinkDistJar);
}
//如果系统里配了FLINK_HOME的环境变量,不用此项配置,如果没有配置 则需要把flink lib下的jar包上传至hdfs,然后此项配置设置成hdfs相应的目录
flinkConfig.setString(YarnConfigOptions.PROVIDED_LIB_DIRS.key(),"hdfs://hadoop202:8020/flink-1.14.2/lib");
log.info("-----------------------------flink config--------------------------");
flinkConfig.toMap().forEach((k,v)->{
log.info("{}:{}",k,v);
});
try {
CliFrontend cli = new CliFrontend(flinkConfig, customCommandLines);
SecurityUtils.install(new SecurityConfiguration(flinkConfig));
String[] customArgs = toArgs(app);
SecurityUtils.getInstalledContext().runSecured(() -> {
//采用第三种提交任务的方式,从yarnDescriptor切入
String[] params = (String[])Arrays.copyOfRange(customArgs, 1, customArgs.length);
Options commandOptions = CliFrontendParser.getRunCommandOptions();
CommandLine commandLine = cli.getCommandLine(commandOptions, params, true);
CustomCommandLine activeCommandLine = cli.validateAndGetActiveCommandLine((CommandLine) Preconditions.checkNotNull(commandLine));
ProgramOptions programOptions = ProgramOptions.create(commandLine);
programOptions.validate();
URI uri = PackagedProgramUtils.resolveURI(programOptions.getJarFilePath());
//get effective commandline
Configuration effectiveConfiguration = this.getEffectiveConfiguration(activeCommandLine, commandLine, programOptions, Collections.singletonList(uri.toString()),flinkConfig);
ApplicationConfiguration applicationConfiguration = new ApplicationConfiguration(programOptions.getProgramArgs(), programOptions.getEntryPointClassName());
//直接new一个yarnClusterFactory
YarnClusterClientFactory clusterFactory = new YarnClusterClientFactory();
YarnClusterDescriptor clusterDescriptor = clusterFactory.createClusterDescriptor(effectiveConfiguration);
ClusterSpecification clusterSpecification = clusterFactory.getClusterSpecification(effectiveConfiguration);
//部署集群
ClusterClientProvider<ApplicationId> clusterClient = clusterDescriptor.deployApplicationCluster(clusterSpecification, applicationConfiguration);
ClusterClient<ApplicationId> realClient = clusterClient.getClusterClient();
int num=10;
while(num>0){
ApplicationId appId = realClient.getClusterId();
String jmUrl = realClient.getWebInterfaceURL();
if(jmUrl!=null && !jmUrl.isEmpty()){
log.info("appId:{},jm url:{}",appId,jmUrl);
break;
}
num--;
Thread.sleep(5000);
}
cli.parseAndRun(customArgs);
return 0;
});
} catch (Throwable var10) {
Throwable strippedThrowable = ExceptionUtils.stripException(var10, UndeclaredThrowableException.class);
log.error("Fatal error while running command line interface.", strippedThrowable);
strippedThrowable.printStackTrace();
}
}
使用这种方式提交的作业就可以很容易获取applicationId和jobmanager url,下面就是提交完成后打印的日志信息