【AWS】安装 Kudu 和 Impala 社区版本

141 阅读2分钟

时间服务器

# 安装 chronyd(ntpd)
sudo yum install chrony -y
sudo systemctl enable chronyd

# 手动同步一次时间
chronyc makestep

# 设置时区
timedatectl set-timezone Asia/Shanghai
或者
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime

# 启动 chronyd
sudo systemctl restart chronyd
sudo systemctl status chronyd

主机名映射

cat /etc/hosts
172.16.0.47  emr-kudu1
172.16.0.48  emr-kudu2
172.16.0.49  emr-kudu3

Kudu

# 安装 kudu
sudo yum install kudu-1.15.0-1.x86_64.rpm -y

# 创建 kudu 数据/日志 存储路径
k
sudo chown -R kudu:kudu /data/kudu

# 配置 kudu master
sudo cat > /etc/kudu/conf/master.gflagfile < /etc/kudu/conf/tserver.gflagfile <

Impala

sudo wget https://cos-1252090120.cos.ap-shanghai.myqcloud.com/public/pkg/impala.tar.gz

sudo mkdir -p /opt/apps/impala

sudo tar -zxvf impala.tar.gz -C /opt/apps/impala

sudo mkdir -p /etc/impala/conf

cp core-site.xml /etc/impala/conf/
cp hdfs-site.xml /etc/impala/conf/
cp hive-site.xml /etc/impala/conf/

sudo mkdir -p /usr/share/java
curl -SsL https://cos-1252090120.cos.ap-shanghai.myqcloud.com/public/pkg/mysql-connector-java.jar -o /usr/share/java/mysql-connector-java.jar

# impala-shell 软链
ln -s /opt/apps/impala/bin/impala-shell /usr/bin/

# (单节点)启动 impalad, 在第一台机器上, 例如: 172.16.0.47
nohup /opt/apps/impala/bin/statestored &
nohup /opt/apps/impala/bin/catalogd &
nohup /opt/apps/impala/bin/impalad -kudu_master_hosts=172.16.0.47,172.16.0.48,172.16.0.49 -fe_service_threads=256 &

# (多节点)启动 impalad, statestored 和 catalogd 服务进程只能启动一个, 在第二台和第三台机器上指定 statestored 和 catalogd 地址, 例如:
# 172.16.0.48
nohup /opt/apps/impala/bin/impalad -kudu_master_hosts=172.16.0.47,172.16.0.48,172.16.0.49 -fe_service_threads=256 -state_store_host=172.16.0.47 -catalog_service_host=172.16.0.47 &

# 172.16.0.49
nohup /opt/apps/impala/bin/impalad -kudu_master_hosts=172.16.0.47,172.16.0.48,172.16.0.49 -fe_service_threads=256 -state_store_host=172.16.0.47 -catalog_service_host=172.16.0.47 &

# Impala 涉及几个重要的环境变量,建议设置
export IMPALA_HOME=${IMPALA_HOME:-/opt/apps/impala}
export IMPALA_BIN=${IMPALA_BIN:-$IMPALA_HOME/bin}
export IMPALA_CONF_DIR=${IMPALA_CONF_DIR:-/etc/impala/conf}

Hive 创建 Hive 表, Impala 查询

-- 创建 hive 表
CREATE TABLE employee (
  emp_id INT,
  emp_name STRING,
  emp_salary DOUBLE
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',';

-- 插入样例数据
INSERT INTO employee VALUES
  (1, 'John Doe', 50000),
  (2, 'Jane Smith', 60000),
  (3, 'Bob Johnson', 75000);

-- 在 Impala 中查询
-- 需要先刷新 Impala 元数据
INVALIDATE METADATA;

select * from employee;

Impala 创建 Hive 表, Impala/Hive 查询

-- 创建 hive 表
CREATE TABLE my_table (
  id INT,
  name STRING,
  age INT
)

-- 插入样例数据
INSERT INTO my_table VALUES
  (1, 'John Doe', 30),
  (2, 'Jane Smith', 25),
  (3, 'Bob Johnson', 40);

-- 在 Hive 中 查询
select * from my_table;

Impala 创建 Kudu 表, Impala 查询

-- 创建 kudu 表
CREATE TABLE my_first_table
(
  id BIGINT,
  name STRING,
  PRIMARY KEY(id)
)
PARTITION BY HASH PARTITIONS 16
STORED AS KUDU

-- 插入样例数据
INSERT INTO my_first_table VALUES
  (1, 'John Doe'),
  (2, 'Jane Smith'),
  (3, 'Bob Johnson'),
  (4, 'Alice Williams');

-- 在 Impala 中查询
select * from my_first_table;