第一步: 1、spark下载地址:
这里采用spark-3.1.2-bin-hadoop3.2.tgz
2、Scala安装
[root@hadoop10 software]# tar -zxvf scala-2.12.14.tgz
[root@hadoop10 software]# mv scala-2.12.14 scala
3、配置环境变量
export SCALA_HOME=/software/scala
export PATH=.:$PATH:$SCALA_HOME/bin
4、验证
[root@hadoop10 software]# source /etc/profile
[root@hadoop10 software]# scala -version
Scala code runner version 2.12.14 -- Copyright 2002-2021, LAMP/EPFL and
Lightbend, Inc.
[root@hadoop10 software]# scala
Welcome to Scala 2.12.14 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_202).
Type in expressions for evaluation. Or try :help.
scala> 6+6
res0: Int = 12
scala>
分发其他节点
scp -r /software/scala hadoop11:/software/
scp -r /software/scala hadoop12:/software/
export SCALA_HOME=/software/scala
export PATH=.:$PATH:$SCALA_HOME/bin
进行source
[root@hadoop12 software]# source /etc/profile
集群
hadoop10: master
hadoop11: worker/slave
hadoop12: worker/slave
第二步: spark安装 下载地址
archive.apache.org/dist/spark/…
[root@hadoop10 software]# tar -zxvf spark-3.1.2-bin-hadoop3.2.tgz
[root@hadoop10 software]# mv spark-3.1.2-bin-hadoop3.2 spark
配置文件
修改配置文件名称
[root@hadoop0 conf]# mv workers.template workers
vim workers
内容如下:
hadoop11
hadoop12
进入配置目录
cd /software/spark/conf
修改配置文件名称
mv spark-env.sh.template spark-env.sh
修改配置文件
vim spark-env.sh
增加如下内容:
## 设置JAVA安装目录
JAVA_HOME=/software/jdk
## HADOOP软件配置文件目录,读取HDFS上文件和运行Spark在YARN集群时需要,先提前配上
HADOOP_CONF_DIR=/software/hadoop/etc/hadoop
YARN_CONF_DIR=/software/hadoop/etc/hadoop
## 指定spark老大Master的IP和提交任务的通信端口
SPARK_MASTER_HOST=hadoop10
SPARK_MASTER_PORT=7077
SPARK_MASTER_WEBUI_PORT=8080
SPARK_WORKER_CORES=1
SPARK_WORKER_MEMORY=1g
分发到其他节点:
scp -r /software/spark hadoop11:/software/
scp -r /software/spark hadoop12:/software/
启动:
cd /software/spark/sbin
start-all.sh
/software/spark/sbin/start-all.sh
运行一个求PI的例子:
[root@hadoop10 bin]# cd /software/spark/bin/
[root@hadoop10 bin]# pwd
/software/spark/bin
[root@hadoop10 bin]# run-example SparkPi 10
运行个WordCount的例子
[root@hadoop10 bin]# cd /software/spark/bin/
[root@hadoop10 bin]# pwd
/software/spark/bin
[root@hadoop10 bin]# spark-shell
2021-11-09 16:57:03,855 WARN util.NativeCodeLoader: Unable to load native-hadoop
library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use
setLogLevel(newLevel).
Spark context Web UI available at http://hadoop10:4040
Spark context available as 'sc' (master = local[*], app id = local-
1636448230277).
Spark session available as 'spark'.
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 3.1.2
/_/
Using Scala version 2.12.10 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_202)
Type in expressions to have them evaluated.
Type :help for more information.
scala> sc.textFile("file:///home/data/wordcount.txt").flatMap(_.split("
")).map((_,1)).reduceByKey(_+_).foreach(println)
(hadoop,1) (0 + 2) / 2]
(hbase,1)
(hello,3)
(world,1)2.3.7.2 Spark集群模式
这种集群模式需要先启动Spark集群。
在/software/spark/bin 目录下面运行下面的命令
完整过程如下:
scala>