1.准备容器环境
1.1 容器网络桥接器
新建容器网络桥接器,构建一个容器的子网
docker network create --driver=bridge clickhouse_test
# 查看本机容器环境的所有网络
docker network ls
#查看桥接器下面有哪些容器
docker network inspect clickhose_test
1.2 准备zookeeper容器集群
参考 cloud.tencent.com/developer/a…
3台zookeeper容器,zookeeper01、zookeeper02、zookeeper03 通过docker-compose方式部署 dokcer-compose.yml文件如下;
version: '3.1'
services:
zookeeper01:
image: zookeeper:3.5.8
restart: always
hostname: zookeeper01
environment:
ZOO_MY_ID: 1
ZOO_SERVERS: server.1=0.0.0.0:2888:3888;2181 server.2=zookeeper02:2888:3888;2181 server.3=zookeeper03:2888:3888;2181
zookeeper02:
image: zookeeper:3.5.8
restart: always
hostname: zookeeper02
environment:
ZOO_MY_ID: 2
ZOO_SERVERS: server.1=zookeeper01:2888:3888;2181 server.2=0.0.0.0:2888:3888;2181 server.3=zookeeper03:2888:3888;2181
zookeeper03:
image: zookeeper:3.5.8
restart: always
hostname: zookeeper03
environment:
ZOO_MY_ID: 3
ZOO_SERVERS: server.1=zookeeper01:2888:3888;2181 server.2=zookeeper02:2888:3888;2181 server.3=0.0.0.0:2888:3888;2181
#指定网络
networks:
default: # default意思是上面service没有配置网络,使用此网络
external:
name: clickhouse_test #使用自定义的桥接网络
每一台zookeeper指定hostname,networks为配置的桥接器clichouse_test
接着我们在 docker-compose.yml 当前目录下运行如下命令:
#启动zookeeper容器
docker-compose up -d
#不需要使用的时候使用以下命令关闭并删除容器
docker-compose down
1.3 准备clickhouse容器
# 下载clichouse镜像
docker pull yandex/clickhouse-server:latest
# 启动3个clichouset容器,指定host 和name,nerwork为配置的桥接器clichouse_test
docker run -itd --network clickhouse_test -h "ch01" --name "ch01" yandex/clickhouse-server:latest /bin/bash
docker run -itd --network clickhouse_test -h "ch02" --name "ch02" yandex/clickhouse-server:latest /bin/bash
docker run -itd --network clickhouse_test -h "ch03" --name "ch03" yandex/clickhouse-server:latest /bin/bash
进入每一台clickhouse容器,安装ping和vim命令
apt-get update
apt-get install -y inetutils-ping
apt install -y vim
2 clickhouse副本配置( zookeeper+ReplicatedMergeTree副本方案)
构建1分片、2副本(3台机器纯副本模式)
2.1 配置文件
/etc/clickhouse-server/config.d目录中新建一个metrika.xml文件
touch metrika.xml
<?xml version="1.0"?>
<yandex>
<zookeeper-server>
<node index="1">
<host>zookeeper01</host>
<port>2181</port>
</node>
<node index="2">
<host>zookeeper02</host>
<port>2181</port>
</node>
<node index="3">
<host>zookeeper03</host>
<port>2181</port>
</node>
</zookeeper-server>
<macros>
<shard>01</shard>
<replica>ch01</replica>
</macros>
</yandex>
ch01值,每一台机器配置不同,建议以机器名称 ch01 ch02 ch03
config.xml 修改配置引入metrika.xml文件
<include_from>/etc/clickhouse-server/config.d/metrika.xml</include_from>
<zookeeper incl="zookeeper-server" optional="false" />
<macros incl="macros" optional="false" />
配置完成重启ch服务
clickhouse restart
# 启动clickhouse客户端
clickhouse-client
查看zookeeper信息表
SELECT * FROM system.zookeeper where path = '/';
2.2建库、建表测试
- 建库
每一个副本上都要执行create语句
CREATE DATABASE IF NOT EXISTS test_db ;
- 建表
每一个副本上都要执行create语句
--物理表
CREATE TABLE IF NOT EXISTS test_db.t3
(
`uid` UInt64 CODEC(T64,LZ4),
`timestamp` Datetime64(6) CODEC(DoubleDelta,LZ4),
`psSize` Float64 CODEC(Gorilla,LZ4)
)ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/t1', '{replica}')
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (uid, timestamp)
{shard}和{replica} 为上面配置文件中定义的macros宏
- 插入数据测试 --- 任意一台副本插入数据,其他副本是否会同步
# ch01插入
insert into test_db.t1 values(1,1640966400000000,1)
# ch02查询、 ch02查询
select * from test_db.t1
# ch02插入
insert into test_db.t1 values(2,1640966400000000,1)
# ch01查询、 ch03查询
select * from test_db.t1
# ch03插入
insert into test_db.t1 values(3,1640966400000000,1)
# ch01查询、 ch02查询
select * from test_db.t1
测试结果和预期一致,任意一台物理表插入数据,另外两个副本都会同步数据
2.3 更多副本测试案例
- 插入数据测试 --- ch02容器中clickhouse服务挂掉、 ch01或ch03插入数据, 然后ch02服务重新启动,是否会同步挂掉期间插入副本的数据
#案例1
#关闭ch02容器上的clikhouse服务
clickhouse stop
# ch01插入
insert into test_db.t1 values(21,1640966400000000,1)
# ch03查询
select * from test_db.t1
#启动ch02容器上的clikhouse服务
clickhouse restart
# ch02查询
select * from test_db.t1
#关闭ch02容器上的clikhouse服务
clickhouse stop
# 案例2
# ch03插入
insert into test_db.t1 values(23,1640966400000000,1)
# ch01查询
select * from test_db.t1
#启动ch02容器上的clikhouse服务
clickhouse restart
# ch02查询
select * from test_db.t1
测试结果:ch02容器关掉服务期间,其他副本插入的数据,ch02服务重启以后,可以同步服务挂掉期间插入的数据
- 插入数据测试 --- ch02容器挂掉、 ch01或ch03插入数据, 然后ch02容器重启,是否会同步挂掉期间插入副本的数据
# 关闭容器ch02
docker stop CONTAINER ID
# ch01插入
insert into test_db.t1 values(31,1640966400000000,1)
# ch03查询
select * from test_db.t1
# 启动容器ch02
docker start CONTAINER ID
# 进入容器
docker exec -it d4a6dfcab852 bash
# 启动clickhouse服务
clickhouse restart
# ch02查询
select * from test_db.t1
测试结果:ch02容器挂掉期间,其他容器clickhouse副本插入的数据,ch02启动以后会自动同步
- 测试 --- 只在两个副本上创建副本表,并插入数据,第三台读本再新建副本表,是否会同步数据
# ch01建副本表
CREATE TABLE IF NOT EXISTS test_db.t2
(
`uid` UInt64 CODEC(T64,LZ4),
`timestamp` Datetime64(6) CODEC(DoubleDelta,LZ4),
`psSize` Float64 CODEC(Gorilla,LZ4)
)ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/t2', '{replica}')
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (uid, timestamp)
# ch02建副本表
CREATE TABLE IF NOT EXISTS test_db.t2
(
`uid` UInt64 CODEC(T64,LZ4),
`timestamp` Datetime64(6) CODEC(DoubleDelta,LZ4),
`psSize` Float64 CODEC(Gorilla,LZ4)
)ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/t2', '{replica}')
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (uid, timestamp)
# ch01插入
insert into test_db.t2 values(1,1640966400000000,1)
# ch02查询
select * from test_db.t2
#ch03建副本表
CREATE TABLE IF NOT EXISTS test_db.t2
(
`uid` UInt64 CODEC(T64,LZ4),
`timestamp` Datetime64(6) CODEC(DoubleDelta,LZ4),
`psSize` Float64 CODEC(Gorilla,LZ4)
)ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/t2', '{replica}')
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (uid, timestamp)
# ch03查询
select * from test_db.t2
后建的副本表可以自动同步数据其他副本表之前插入的数据
- 测试 --- 增加一台clickhouse服务(3副本配置), 新增的服务创建副本表,是否会同步原副本的数据
#增加一台clickhouse服务
docker run -itd --network clickhouse_test -h "ch04" --name "ch04" yandex/clickhouse-server:latest /bin/bash
#进入容器
#安装ping和vim
apt-get update
apt-get install -y inetutils-ping
apt install -y vim
配置 metrika.xml
<?xml version="1.0"?>
<yandex>
<zookeeper-server>
<node index="1">
<host>zookeeper01</host>
<port>2181</port>
</node>
<node index="2">
<host>zookeeper02</host>
<port>2181</port>
</node>
<node index="3">
<host>zookeeper03</host>
<port>2181</port>
</node>
</zookeeper-server>
<macros>
<shard>01</shard>
<replica>ch04</replica>
</macros>
</yandex>
注意 ch04 配置
config.xml 修改配置引入metrika.xml文件
<include_from>/etc/clickhouse-server/config.d/metrika.xml</include_from>
<zookeeper incl="zookeeper-server" optional="false" />
<macros incl="macros" optional="false" />
配置完成重启ch服务
clickhouse restart
# 启动clickhouse客户端
clickhouse-client
建库、建表
CREATE DATABASE IF NOT EXISTS test_db ;
#建表1
CREATE TABLE IF NOT EXISTS test_db.t1
(
`uid` UInt64 CODEC(T64,LZ4),
`timestamp` Datetime64(6) CODEC(DoubleDelta,LZ4),
`psSize` Float64 CODEC(Gorilla,LZ4)
)ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/t1', '{replica}')
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (uid, timestamp)
#ch02查询
select * from test_db.t1;
#建表2
CREATE TABLE IF NOT EXISTS test_db.t2
(
`uid` UInt64 CODEC(T64,LZ4),
`timestamp` Datetime64(6) CODEC(DoubleDelta,LZ4),
`psSize` Float64 CODEC(Gorilla,LZ4)
)ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/t2', '{replica}')
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (uid, timestamp)
#ch02查询
select * from test_db.t2;
测试结果,新增的副本,可以同步原副本插入的数据
参考博客
cloud.tencent.com/developer/a…
3、集群+2副本
创建本地表只需要在一台clickhouse服务上,集群会自动在另外两个副本上同步表 metrika.xml文件中加入如下内容
<yandex>
<clickhouse_remote_servers>
<ck_cluster>
<shard>
<!--分片0-->
<replica>
<host>ch01</host>
<port>9000</port>
</replica>
<replica>
<host>ch02</host>
<port>9000</port>
</replica>
<replica>
<host>ch03</host>
<port>9000</port>
</replica>
</shard>
</ck_cluster>
</clickhouse_remote_servers>
</yandex>
config.xml中引入配置
<remote_servers incl="clickhouse_remote_servers" optional="false" />
查询集群配置表信息
select * from system.clusters;
建表测试
建表测试,在其中一台机器建表,其他副本会自动构建副本表和分区表
--物理表DDL建表语句
CREATE TABLE IF NOT EXISTS test_db.t2 on cluster ck_cluster
(
`uid` UInt64 CODEC(T64,LZ4),
`timestamp` Datetime64(6) CODEC(DoubleDelta,LZ4),
`psSize` Float64 CODEC(Gorilla,LZ4)
)ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/test_db/t2', '{replica}')
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (uid, timestamp)
--{shard}同一分片下相同,{replica}必须保证唯一
--分布式表DDL建表语句(只是查询用分布式表)
create table test_db.t2_dist on cluster ck_cluster
(
`uid` UInt64 CODEC(T64,LZ4),
`timestamp` Datetime64(6) CODEC(DoubleDelta,LZ4),
`psSize` Float64 CODEC(Gorilla,LZ4)
)engine = Distributed('ck_cluster','test_db', 't2',rand());
在ch01上插入一条数据到副本表tb_test
insert into test_db.tb_test values(1,1640966400000000,1)