手动安装Kafka依赖 下载jar包
通过网址下载:packages.confluent.io/archive/5.3… 解压后找到以下jar包,上传服务器hadoop102
common-config-5.3.4.jar common-utils-5.3.4.jar kafka-avro-serializer-5.3.4.jar kafka-schema-registry-client-5.3.4.jar install到maven本地仓库
mvn install:install-file -DgroupId=io.confluent -DartifactId=common-config -Dversion=5.3.4 -Dpackaging=jar -Dfile=./common-config-5.3.4.jar
mvn install:install-file -DgroupId=io.confluent -DartifactId=common-utils -Dversion=5.3.4 -Dpackaging=jar -Dfile=./common-utils-5.3.4.jar
mvn install:install-file -DgroupId=io.confluent -DartifactId=kafka-avro-serializer -Dversion=5.3.4 -Dpackaging=jar -Dfile=./kafka-avro-serializer-5.3.4.jar
mvn install:install-file -DgroupId=io.confluent -DartifactId=kafka-schema-registry-client -Dversion=5.3.4 -Dpackaging=jar -Dfile=./kafka-schema-registry-client-5.3.4.jar
安装juicefs
$ JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v')
$ wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz"
[Mon May 29 2023 1:11PM (CST+0800)] [3 days, 4:12] :~/workspace/bigdata/hudi 🍣 master 📦 📝 ×3🛤️ ×2 via 🌟 v1.8.0 took 11s
❯ ls |grep juicefs
juicefs-1.0.4-linux-amd64.tar.gz
[Mon May 29 2023 1:12PM (CST+0800)] [3 days, 4:12] :~/workspace/bigdata/hudi 🍣 master 📦 📝 ×3🛤️ ×2 via 🌟 v1.8.0
❯
mkdir juice && tar -zxvf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" -C juice
sudo install juice/juicefs /usr/local/bin
安装minio
docker pull minio/minio
启动命令
docker run -p 9000:9000 --name minio \
-d --restart=always \
-e MINIO_ACCESS_KEY=minio \
-e MINIO_SECRET_KEY=minio@123 \
-v /usr/local/minio/data:/data \
-v /usr/local/minio/config:/root/.minio \
minio/minio server /data --console-address ":9000" --address ":9090"
docker pull bitnami/minio:2022.8.11
docker pull bitnami/minio-client:2022.8.11
docker run --name minio-server -d \
--publish 9000:9000 \
--publish 9001:9001 \
--env MINIO_ROOT_USER="minio-root-user" \
--env MINIO_ROOT_PASSWORD="minio-root-password" \
bitnami/minio:2022.8.11
docker run -it --link minio-server:minio-server --rm --name minio-client \
--env MINIO_SERVER_HOST="minio-server" \
--env MINIO_SERVER_ACCESS_KEY="minio-root-user" \
--env MINIO_SERVER_SECRET_KEY="minio-root-password" \
bitnami/minio-client:2022.8.11 \
mb minio/my-bucket
启动一个元数据存储redis
docker run --name redis -d -p 6379:6379 redis
docker pull bitnami/redis:7.0
# 运行redis server
docker run -d -p 6379:6379 --name redis-server \
-d -e ALLOW_EMPTY_PASSWORD=yes \
-v /home/xfhuang/workspace/bigdata/flink/flink-dist/target/flink-1.17-SNAPSHOT-bin/flink-1.17-SNAPSHOT/redis:/bitnami/redis/data \
bitnami/redis:7.0
# redis client连接
docker run -it --link redis-server:redis-server --rm bitnami/redis:7.0 redis-cli -h redis-server
启动juicefs
启动flink
cp ~/workspace/bigdata/hudi/packaging/hudi-flink-bundle/target/hudi-flink1.17-bundle-0.14.0-SNAPSHOT.jar lib
export HADOOP_CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath`
bin/start-cluster.sh
juicefs format \
--storage minio \
--bucket http://localhost:9000/hudi \
--access-key tZcPkJgG1Bt98X6i61O9 \
--secret-key Pr1UTa6QTeIBqzlZSEgKZAteKnViinLoWGdmffrO \
"redis://localhost:6379/1" \
hudi
juicefs format \
--storage minio \
--bucket http://127.0.0.1:9000/hudi \
--access-key minio-root-user \
--secret-key minio-root-password \
redis://192.168.120.181:6379/1 \
hudi
启动flink sql
./bin/sql-client.sh embedded
set sql-client.execution.result-mode=tableau;
CREATE TABLE t1(
uuid VARCHAR(20) PRIMARY KEY NOT ENFORCED,
name VARCHAR(10),
age INT,
ts TIMESTAMP(3),
`partition` VARCHAR(20)
)
PARTITIONED BY (`partition`)
WITH (
'connector' = 'hudi',
'path' = 'jfs://hudi/warehouse',
'table.type' = 'MERGE_ON_READ' ,
'hoodie.fs.atomic_creation.support'='jfs' #解决报错 Caused by: org.apache.hudi.exception.HoodieLockException: Unsupported scheme :jfs, since this fs can not support atomic creation
);
导入数据
INSERT INTO t1 VALUES
('id1','Danny',23,TIMESTAMP '1970-01-01 00:00:01','par1'),
('id2','Stephen',33,TIMESTAMP '1970-01-01 00:00:02','par1'),
('id3','Julian',53,TIMESTAMP '1970-01-01 00:00:03','par2'),
('id4','Fabian',31,TIMESTAMP '1970-01-01 00:00:04','par2'),
('id5','Sophia',18,TIMESTAMP '1970-01-01 00:00:05','par3'),
('id6','Emma',20,TIMESTAMP '1970-01-01 00:00:06','par3'),
('id7','Bob',44,TIMESTAMP '1970-01-01 00:00:07','par4'),
('id8','Han',56,TIMESTAMP '1970-01-01 00:00:08','par4');
注意,如果使用的是jdk11编译,就用jdk11运行,否则flink会报错
java.lang.NoSuchMethodError: java.nio.ByteBuffer.flip()Ljava/nio/ByteBuffer 解决办法
原因
编译使用的jdk版本(12)高于运行环境的jre版本(1.8)(1)
解决办法
指定maven编译版本,用release代替source/target