hadoop 3.2.4集群安装
地址规划
| hostname | ip |
|---|---|
| node01 | 192.168.40.128 |
| node02 | 192.168.40.129 |
| node03 | 192.168.40.130 |
组件规划
| 组件名称 | 组件版本 | node01 | node02 | node03 |
|---|---|---|---|---|
| hadoop | 3.2.4 | √ | √ | √ |
| journalnode | √ | √ | √ | |
| namenode | √ | √ | ||
| datanode | √ | √ | √ | |
| nodemanager | √ | √ | ||
| resourcemanager | √ | √ | √ | |
| ZKFC | √ | √ | √ |
安装
创建普通用户
useradd hadoop
passwd hadoop
这里不赘述文件解压问题,直接丢配置文件,hdfs的安装主要是配置文件的修改
修改core-site.xml文件
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hdfscluster01</value>
</property>
<!-- 指定hadoop临时目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/tmp/hadoop/data/tmp/</value>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>192.168.40.128:2181,192.168.40.128:2182,192.168.40.128:2183</value>
</property>
<!-- hadoop链接zookeeper的超时时长设置 -->
<property>
<name>ha.zookeeper.session-timeout.ms</name>
<value>1000</value>
<description>ms</description>
</property>
</configuration>
修改 hdfs-site.xml文件
<configuration>
<!-- nn web端访问地址 -->
<property>
<name>dfs.namenode.http-address.hdfscluster01.nn1</name>
<value>node01:50070</value>
</property>
<!-- 2nn web端访问地址 -->
<property>
<name>dfs.namenode.http-address.hdfscluster01.nn2</name>
<value>node02:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.hdfscluster01.nn3</name>
<value>node03:50070</value>
</property>
<!-- 指定副本数 -->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<!-- 配置namenode和datanode的工作目录-数据存储目录 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/dfs/data</value>
</property>
<!-- 启用webhdfs -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<!--指定hdfs的nameservice为cluster1,需要和core-site.xml中的保持一致
dfs.ha.namenodes.[nameservice id]为在nameservice中的每一个NameNode设置唯一标示符。
配置一个逗号分隔的NameNode ID列表。这将是被DataNode识别为所有的NameNode。
例如,如果使用"cluster1"作为nameservice ID,并且使用"nn1"和"nn2"作为NameNodes标示符
-->
<property>
<name>dfs.nameservices</name>
<value>hdfscluster01</value>
</property>
<!-- cluster下面有3个NameNode,分别是nn1,nn2,nn3-->
<property>
<name>dfs.ha.namenodes.hdfscluster01</name>
<value>nn1,nn2,nn3</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.hdfscluster01.nn1</name>
<value>node01:8020</value>
</property>
<!-- nn1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.hdfscluster01.nn1</name>
<value>node01:50070</value>
</property>
<!-- nn2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.hdfscluster01.nn2</name>
<value>node02:8020</value>
</property>
<!-- nn2的http通信地址 -->
<property>
<name>dfs.namenode.http-address.hdfscluster01.nn2</name>
<value>node02:50070</value>
</property>
<!-- nn3的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.hdfscluster01.nn3</name>
<value>node03:8020</value>
</property>
<!-- nn3的http通信地址 -->
<property>
<name>dfs.namenode.http-address.hdfscluster01.nn3</name>
<value>node03:50070</value>
</property>
<!-- 指定NameNode的edits元数据的共享存储位置。也就是JournalNode列表
该url的配置格式:qjournal://host1:port1;host2:port2;host3:port3/journalId
journalId推荐使用nameservice,默认端口号是:8485 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://node01:8485;node02:8485;node03:8485/hdfscluster01</value>
</property>
<!-- 指定JournalNode在本地磁H的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/Hadoop-HA/journaldata</value>
</property>
<!-- 开启NameNode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失败自动切换实现方式 -->
<property>
<name>dfs.client.failover.proxy.provider.hdfscluster01</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!-- 使用sshfence隔离机制时需要ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<property>
<name>ha.failover-controller.cli-check.rpc-timeout.ms</name>
<value>60000</value>
</property>
</configuration>
修改mapred-site.xml文件
<configuration>
<!-- 指定mr框架为yarn方式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- 指定mapreduce jobhistory地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>node03:10020</value>
</property>
<!-- 任务历史服务器的web地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>node03:19888</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>4096</value>
<description>map的最大可使用资源</description>
</property>
<property>
<name>mapreduce.job.running.map.limit</name>
<value>5</value>
<description>map最大并发数</description>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx1024M</value>
<description>map的堆内存</description>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>4096</value>
<description>reduce的最大可使用资源</description>
</property>
<property>
<name>mapreduce.job.running.reduce.limit</name>
<value>4</value>
<description>reduce的最大并发数</description>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx1024M</value>
<description>reduce堆内存</description>
</property>
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>512</value>
<description>数据排序时的内存大小</description>
</property>
<property>
<name>mapreduce.task.io.sort.factor</name>
<value>100</value>
<description>数据排序时合并多个数据流</description>
</property>
<property>
<name>mapreduce.reduce.shuffle.parallelcopies</name>
<value>50</value>
<description>当map数量多于reduce数量时,增加reduce并行副本数量。</description>
</property>
</configuration>
修改yarn-site.xml文件
<!-- Site specific YARN configuration properties -->
<!-- 开启RM高可用 -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 指定RM的cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yrc</value>
</property>
<!-- 指定RM的名字 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 分别指定RM的地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>node01</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>node02</value>
</property>
<!-- 配置第一台机器的resourceManager通信地址 -->
<!--客户端通过该地址向RM提交对应用程序操作-->
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>node01:8032</value>
</property>
<!--向RM调度资源地址-->
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>node01:8030</value>
</property>
<!--NodeManager通过该地址交换信息-->
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>node01:8031</value>
</property>
<!--管理员通过该地址向RM发送管理命令-->
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>node01:8033</value>
</property>
<!--RM HTTP访问地址,查看集群信息-->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>node01:8088</value>
</property>
<!-- 配置第二台机器的resourceManager通信地址 -->
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>node02:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>node02:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>node02:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>node02:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>node02:8088</value>
</property>
<!-- 指定zk集群地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>192.168.40.128:2181,192.168.40.128:2182,192.168.40.128:2183</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>86400</value>
</property>
<!-- 启用自动恢复 -->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.id</name>
<value>rm1</value>
<description>If we want to launch more than one RM in single node, we need this configuration</description>
</property>
<!-- 制定resourcemanager的状态信息存储在zookeeper集群上 -->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!-- 单个任务可申请最少内存,默认1024MB -->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>512</value>
</property>
<!--多长时间聚合删除一次日志 此处-->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>2592000</value><!--30 day-->
</property>
<!--时间在几秒钟内保留用户日志。只适用于如果日志聚合是禁用的-->
<property>
<name>yarn.nodemanager.log.retain-seconds</name>
<value>604800</value><!--7 day-->
</property>
<!-- 指定zk集群地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>192.168.1.221:2181,192.168.1.222:2181,192.168.1.223:2181</value>
</property>
<!-- 逗号隔开的服务列表,列表名称应该只包含a-zA-Z0-9_,不能以数字开始-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>65536</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>15</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>512</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>16384</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://node03:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>hdfs://hdfs_cluster1/user/hpe/yarn-logs/</value>
</property>
<property>
<name>nodemanager.resource.cpu-vcores</name>
<value>30</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<!-- 使用hadoop classpath 获取 -->
<property>
<name>yarn.application.classpath</name>
<value>/usr/local/package/hadoop-3.2.4/etc/hadoop:/usr/local/package/hadoop-3.2.4/share/hadoop/common/lib/*:/usr/local/package/hadoop-3.2.4/share/hadoop/common/*:/usr/local/package/hadoop-3.2.4/share/hadoop/hdfs:/usr/local/package/hadoop-3.2.4/share/hadoop/hdfs/lib/*:/usr/local/package/hadoop-3.2.4/share/hadoop/hdfs/*:/usr/local/package/hadoop-3.2.4/share/hadoop/mapreduce/lib/*:/usr/local/package/hadoop-3.2.4/share/hadoop/mapreduce/*:/usr/local/package/hadoop-3.2.4/share/hadoop/yarn:/usr/local/package/hadoop-3.2.4/share/hadoop/yarn/lib/*:/usr/local/package/hadoop-3.2.4/share/hadoop/yarn/*</value>
</property>
</configuration>
编辑workers
将datanode节点加进去
集群启动
# 启动前要对所有节点进行免密操作
# 启动journalnode节点
# 需要到三个节点上分别启动
# node01 node02 node03
hdfs --daemon start journalnode
# name格式化
# node01
hadoop namenode -format
# 单独启动namenode
# node01
hdfs namenode
此时 node01会显示Serving checkpoints as ... 这样的日志 (忘记截图)
在另外两个namenode节点执行
# node02 node03
hdfs namenode -bootstrapStandby
执行完成后 node01 就会进行同步,很快就会结束,等待一下需要手动将进程结束
# 格式化zkfc
# node01
hdfs zkfc -formatZK
启动服务
# 前提需要将环境变量加入/etc/profile
start-all.sh
# 启动jobhistory
# node03
mr-jobhistory-daemon.sh start historyserver
查看进程 我这里缺少zookeeper进程,因为zk是用容器部署的,可以查看 swarm安装常用组件
node01
node02
node03
测试
hdfs web http://node01:50070
yarn web http://node02:8088
mr 测试
# 编辑一个文件上传到hdfs
echo "hellow" > word.xml
hdfs dfs -put word.xml /
# 运行wordcount
hadoop jar /usr/local/soft/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.4.jar wordcount /word.xml /output
# 查看结果
hdfs dfs -cat /output/part-r-00000