redis脚本
#!/bin/bash
#
# 优化版Redis 7 Sentinel集群远程部署脚本 (CentOS 7)
# 从单一部署机器上部署整个集群,根据hosts文件确定节点角色和配置
#
# 使用方法:
# ./deploy_redis_sentinel.sh [hosts文件路径] [SSH用户名] [SSH密码] [Redis密码(可选)]
#
# 示例:
# ./deploy_redis_sentinel.sh hosts.txt root password123
#
# hosts文件格式示例:
# -----------------------
# # IP地址 主机名 角色 备注
# 192.168.1.101 redis-node1 master cluster_name=mycluster,password=mypassword
# 192.168.1.102 redis-node2 replica cluster_name=mycluster,password=mypassword
# 192.168.1.103 redis-node3 replica cluster_name=mycluster,password=mypassword
# -----------------------
#
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
NC='\033[0m' # 无颜色
# 日志文件
LOG_DIR="logs"
LOG_FILE="${LOG_DIR}/redis_deploy_$(date +%Y%m%d_%H%M%S).log"
# 版本和路径设置
REDIS_VERSION="7.0.15"
REDIS_DOWNLOAD_URL="https://download.redis.io/releases/redis-${REDIS_VERSION}.tar.gz"
REDIS_CONFIG_DIR="/etc/redis"
REDIS_DATA_DIR="/var/lib/redis"
REDIS_LOG_DIR="/var/log/redis"
REDIS_RUN_DIR="/var/run/redis"
# 创建日志目录
mkdir -p $LOG_DIR
# 日志函数
log() {
local level=$1
local message=$2
local timestamp=$(date +"%Y-%m-%d %H:%M:%S")
case $level in
"INFO")
local color=$GREEN
;;
"WARN")
local color=$YELLOW
;;
"ERROR")
local color=$RED
;;
"CMD")
local color=$BLUE
;;
*)
local color=$NC
;;
esac
echo -e "${color}[$timestamp] [$level] $message${NC}" | tee -a $LOG_FILE
}
# 检查命令执行状态
check_status() {
local status=$1
local command=$2
local host=$3
if [ $status -eq 0 ]; then
log "INFO" "在 $host 上执行命令成功: $command"
return 0
else
log "ERROR" "在 $host 上执行命令失败: $command"
return 1
fi
}
# 显示帮助信息
show_help() {
echo "用法: $0 [hosts文件路径] [SSH用户名] [SSH密码] [Redis密码(可选)]"
echo
echo "参数说明:"
echo " hosts文件路径 - 包含节点信息的hosts文件"
echo " SSH用户名 - 用于SSH连接的用户名"
echo " SSH密码 - 用于SSH连接的密码"
echo " Redis密码 - Redis的认证密码(可选,也可以在hosts文件中指定)"
echo
echo "hosts文件格式示例:"
echo "# IP地址 主机名 角色 备注"
echo "192.168.1.101 redis-node1 master cluster_name=mycluster,password=mypassword"
echo "192.168.1.102 redis-node2 replica cluster_name=mycluster,password=mypassword"
echo "192.168.1.103 redis-node3 replica cluster_name=mycluster,password=mypassword"
echo
echo "示例:"
echo " $0 hosts.txt root password123"
exit 1
}
# 检查参数
if [ "$#" -lt 3 ] || [ "$#" -gt 4 ]; then
log "ERROR" "参数不足或过多"
show_help
fi
# 获取参数
HOSTS_FILE=$1
SSH_USER=$2
SSH_PASS=$3
CMD_REDIS_PASS=$4 # 命令行参数中的Redis密码,如果hosts文件中没有指定则使用此值
# 检查hosts文件是否存在
if [ ! -f "$HOSTS_FILE" ]; then
log "ERROR" "hosts文件不存在: $HOSTS_FILE"
exit 1
fi
# 检查sshpass是否安装
if ! command -v sshpass &> /dev/null; then
log "ERROR" "sshpass 未安装,请先安装sshpass"
log "INFO" "对于CentOS/RHEL: yum install -y sshpass"
log "INFO" "对于Ubuntu/Debian: apt-get install -y sshpass"
exit 1
fi
# 解析hosts文件
declare -A NODE_ROLES
declare -A NODE_NAMES
declare -A NODE_NOTES
MASTER_IP=""
REPLICAS=()
CLUSTER_NAME=""
REDIS_PASS=""
log "INFO" "解析hosts文件: $HOSTS_FILE"
while IFS= read -r line; do
# 忽略注释行和空行
if [[ $line =~ ^# ]] || [[ -z $line ]]; then
continue
fi
# 解析行内容: IP 主机名 角色 备注
read -r IP HOSTNAME ROLE NOTES <<< "$(echo $line | awk '{print $1, $2, $3, $4}')"
# 验证IP地址格式
if [[ ! $IP =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
log "ERROR" "IP地址格式无效: $IP"
continue
fi
# 保存节点信息
NODE_ROLES[$IP]=$ROLE
NODE_NAMES[$IP]=$HOSTNAME
NODE_NOTES[$IP]=$NOTES
# 从备注中提取集群名称和密码(如果有)
if [[ -n "$NOTES" ]]; then
# 修正:提取集群名称 - 使用更严格的边界匹配
if [[ $NOTES =~ (^|[,[:space:]])cluster_name=([^,[:space:]]+) ]]; then
node_cluster_name="${BASH_REMATCH[2]}" # 移除local关键字
if [[ -z "$CLUSTER_NAME" ]]; then
CLUSTER_NAME="$node_cluster_name"
log "INFO" "从hosts文件中提取集群名称: $CLUSTER_NAME"
elif [[ "$CLUSTER_NAME" != "$node_cluster_name" ]]; then
log "WARN" "节点 $IP 的集群名称 ($node_cluster_name) 与之前提取的集群名称 ($CLUSTER_NAME) 不一致"
fi
fi
# 修正:提取密码 - 使用更严格的边界匹配
if [[ $NOTES =~ (^|[,[:space:]])password=([^,[:space:]]+) ]]; then
node_password="${BASH_REMATCH[2]}" # 移除local关键字
if [[ -z "$REDIS_PASS" ]]; then
REDIS_PASS="$node_password"
log "INFO" "从hosts文件中提取Redis密码"
elif [[ "$REDIS_PASS" != "$node_password" ]]; then
log "WARN" "节点 $IP 的Redis密码与之前提取的密码不一致"
fi
fi
fi
# 根据角色分类
if [[ $ROLE == "master" ]]; then
if [[ -n $MASTER_IP ]]; then
log "WARN" "检测到多个主节点,将使用最后一个: $IP"
fi
MASTER_IP=$IP
elif [[ $ROLE == "replica" ]]; then
REPLICAS+=($IP)
else
log "WARN" "未知的节点角色: $ROLE, IP: $IP"
fi
log "INFO" "发现节点: $IP, 主机名: $HOSTNAME, 角色: $ROLE, 备注: $NOTES"
done < "$HOSTS_FILE"
# 调试信息
echo "DEBUG: CLUSTER_NAME=$CLUSTER_NAME, REDIS_PASS=$REDIS_PASS"
# 设置默认集群名称如果未在hosts文件中指定
if [[ -z "$CLUSTER_NAME" ]]; then
CLUSTER_NAME="mymaster"
log "INFO" "未在hosts文件中指定集群名称,使用默认值: $CLUSTER_NAME"
fi
# 设置Redis密码优先级:hosts文件 > 命令行参数
if [[ -z "$REDIS_PASS" ]]; then
if [[ -n "$CMD_REDIS_PASS" ]]; then
REDIS_PASS="$CMD_REDIS_PASS"
log "INFO" "使用命令行参数中的Redis密码"
else
log "ERROR" "未指定Redis密码,请在hosts文件或命令行参数中指定"
exit 1
fi
fi
# 验证解析结果
if [[ -z $MASTER_IP ]]; then
log "ERROR" "未找到主节点,请检查hosts文件"
exit 1
fi
if [[ ${#REPLICAS[@]} -eq 0 ]]; then
log "WARN" "未找到从节点,将只部署主节点"
fi
# 显示配置信息
log "INFO" "======================================"
log "INFO" "Redis 7 Sentinel集群远程部署脚本"
log "INFO" "======================================"
log "INFO" "Redis版本: $REDIS_VERSION"
log "INFO" "集群名称: $CLUSTER_NAME"
log "INFO" "主节点IP: $MASTER_IP (${NODE_NAMES[$MASTER_IP]})"
for i in "${!REPLICAS[@]}"; do
log "INFO" "从节点$((i+1)) IP: ${REPLICAS[$i]} (${NODE_NAMES[${REPLICAS[$i]}]})"
done
log "INFO" "SSH用户名: $SSH_USER"
log "INFO" "Redis密码已配置"
log "INFO" "日志文件: $LOG_FILE"
log "INFO" "======================================"
# 确认继续
read -p "以上配置正确吗? 按[Enter]继续或Ctrl+C取消..."
# 远程执行命令函数
remote_exec() {
local host=$1
local cmd=$2
local desc=$3
log "CMD" "[$host] $desc"
log "CMD" "[$host] 执行: $cmd"
# 使用sshpass执行远程命令
output=$(sshpass -p "$SSH_PASS" ssh -o StrictHostKeyChecking=no "$SSH_USER@$host" "$cmd" 2>&1)
status=$?
# 检查命令状态
if check_status $status "$desc" "$host"; then
if [ -n "$output" ]; then
log "INFO" "[$host] 输出: $output"
fi
return 0
else
if [ -n "$output" ]; then
log "ERROR" "[$host] 错误输出: $output"
fi
return 1
fi
}
# 远程执行脚本函数
remote_exec_script() {
local host=$1
local script=$2
local desc=$3
log "CMD" "[$host] $desc"
# 使用sshpass和管道传输脚本并执行
echo "$script" | sshpass -p "$SSH_PASS" ssh -o StrictHostKeyChecking=no "$SSH_USER@$host" "bash -s" 2>&1 | tee -a $LOG_FILE
status=${PIPESTATUS[1]}
# 检查命令状态
check_status $status "$desc" "$host"
return $?
}
# 准备Redis安装和配置脚本
prepare_redis_script() {
local node_type=$1
local local_ip=$2
local master_ip=$3
local cluster_name=$4
cat << 'EOT'
#!/bin/bash
# Redis安装和配置脚本
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
NC='\033[0m'
# 错误处理增强 - 设置严格的错误处理模式
set -e
# 日志函数
log_info() {
echo -e "${GREEN}[INFO] $1${NC}"
}
log_warn() {
echo -e "${YELLOW}[WARN] $1${NC}"
}
log_error() {
echo -e "${RED}[ERROR] $1${NC}" >&2
}
# 改进的错误处理函数
error_exit() {
log_error "$1"
exit 1
}
# 检查非零退出状态
check_exit_status() {
if [ $? -ne 0 ]; then
error_exit "上一个命令失败,退出代码: $?"
fi
}
# 尝试执行命令,如果失败则记录但继续
try_command() {
if ! "$@"; then
log_warn "命令失败但继续执行: $*"
fi
}
# 清理函数 - 在脚本退出时执行
cleanup() {
log_info "执行清理操作..."
# 删除临时文件
rm -f /tmp/redis-*.tar.gz 2>/dev/null || true
log_info "清理完成"
}
# 设置退出陷阱
trap cleanup EXIT
EOT
# 结束heredoc并添加特定节点类型的配置
echo "
# 获取参数
NODE_TYPE=\"$node_type\"
LOCAL_IP=\"$local_ip\"
MASTER_IP=\"$master_ip\"
CLUSTER_NAME=\"$cluster_name\"
REDIS_PASSWORD=\"$REDIS_PASS\"
REDIS_VERSION=\"$REDIS_VERSION\"
REDIS_DOWNLOAD_URL=\"$REDIS_DOWNLOAD_URL\"
REDIS_CONFIG_DIR=\"$REDIS_CONFIG_DIR\"
REDIS_DATA_DIR=\"$REDIS_DATA_DIR\"
REDIS_LOG_DIR=\"$REDIS_LOG_DIR\"
REDIS_RUN_DIR=\"$REDIS_RUN_DIR\"
# 调试信息
log_info \"开始部署 Redis [\$NODE_TYPE] 节点\"
log_info \"集群名称: \$CLUSTER_NAME\"
log_info \"本机IP: \$LOCAL_IP\"
log_info \"主节点IP: \$MASTER_IP\"
# 计算Redis内存配置 - 使用系统内存的75%,但不使用百分比表示
TOTAL_MEM_KB=\$(free | grep Mem | awk '{print \$2}')
REDIS_MEM_KB=\$((\$TOTAL_MEM_KB * 75 / 100))
# 如果内存大于1GB,则以GB为单位,否则以MB为单位
if [ \$REDIS_MEM_KB -gt 1048576 ]; then
REDIS_MEM=\$((\$REDIS_MEM_KB / 1048576))\"gb\"
else
REDIS_MEM=\$((\$REDIS_MEM_KB / 1024))\"mb\"
fi
# 检查并优化文件描述符限制
MAX_OPEN_FILES=\$(ulimit -n)
if [ \$MAX_OPEN_FILES -lt 65535 ]; then
log_warn \"系统文件描述符限制小于推荐值65535,当前值: \$MAX_OPEN_FILES\"
# 尝试增加当前会话的限制
ulimit -n 65535 2>/dev/null || log_warn \"无法增加文件描述符限制,将在稍后尝试系统级别配置\"
fi
log_info \"Redis内存配置: \$REDIS_MEM (系统内存的75%)\"
log_info \"系统文件描述符限制: \$(ulimit -n)\"
echo -e \"\${GREEN}[1/7] 安装前提条件...\${NC}\"
# 已移除系统更新步骤以加快部署速度
yum install -y epel-release
check_exit_status
yum groupinstall -y \"Development Tools\"
check_exit_status
yum install -y tcl wget systemd-devel
check_exit_status
echo -e \"\${GREEN}[2/7] 从源码安装Redis \${REDIS_VERSION}...\${NC}\"
cd /tmp
if [ ! -f redis-\${REDIS_VERSION}.tar.gz ]; then
wget \${REDIS_DOWNLOAD_URL}
check_exit_status
fi
tar xzf redis-\${REDIS_VERSION}.tar.gz
check_exit_status
cd redis-\${REDIS_VERSION}
make
check_exit_status
make install
check_exit_status
echo -e \"\${GREEN}[3/7] 创建Redis用户和目录...\${NC}\"
useradd -r -s /bin/false redis 2>/dev/null || echo \"用户已存在\"
mkdir -p \${REDIS_CONFIG_DIR}
mkdir -p \${REDIS_DATA_DIR}
mkdir -p \${REDIS_LOG_DIR}
mkdir -p \${REDIS_RUN_DIR}
chown -R redis:redis \${REDIS_DATA_DIR}
chown -R redis:redis \${REDIS_LOG_DIR}
chown -R redis:redis \${REDIS_RUN_DIR}
chown -R redis:redis \${REDIS_CONFIG_DIR}
echo -e \"\${GREEN}[4/7] 创建Redis配置...\${NC}\"
# 主节点Redis配置 - 添加masterauth配置
if [ \"\$NODE_TYPE\" = \"master\" ]; then
cat > \${REDIS_CONFIG_DIR}/redis.conf << EOF
# 基本设置
bind \$LOCAL_IP
port 6379
protected-mode yes
daemonize yes
supervised systemd
pidfile \${REDIS_RUN_DIR}/redis.pid
loglevel notice
logfile \${REDIS_LOG_DIR}/redis.log
dir \${REDIS_DATA_DIR}
# 性能优化设置
databases 16
maxmemory \$REDIS_MEM
maxmemory-policy volatile-lru
tcp-backlog 511
tcp-keepalive 60
timeout 0
io-threads 4
io-threads-do-reads yes
# 连接池优化
maxclients 10000
min-replicas-to-write 1
min-replicas-max-lag 10
# 持久化设置
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
dbfilename dump.rdb
appendonly yes
appendfilename \"appendonly.aof\"
appendfsync everysec
no-appendfsync-on-rewrite yes
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
# 慢查询日志
slowlog-log-slower-than 10000
slowlog-max-len 128
# 安全设置
requirepass \$REDIS_PASSWORD
# 添加主节点认证密码配置
masterauth \$REDIS_PASSWORD
EOF
# 从节点Redis配置
else
cat > \${REDIS_CONFIG_DIR}/redis.conf << EOF
# 基本设置
bind \$LOCAL_IP
port 6379
protected-mode yes
daemonize yes
supervised systemd
pidfile \${REDIS_RUN_DIR}/redis.pid
loglevel notice
logfile \${REDIS_LOG_DIR}/redis.log
dir \${REDIS_DATA_DIR}
# 复制设置
replicaof \$MASTER_IP 6379
masterauth \$REDIS_PASSWORD
replica-serve-stale-data yes
replica-read-only yes
replica-priority 100
# 性能优化设置
databases 16
maxmemory \$REDIS_MEM
maxmemory-policy volatile-lru
tcp-backlog 511
tcp-keepalive 60
timeout 0
io-threads 4
io-threads-do-reads yes
# 连接池优化
maxclients 10000
# 持久化设置
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
dbfilename dump.rdb
appendonly yes
appendfilename \"appendonly.aof\"
appendfsync everysec
no-appendfsync-on-rewrite yes
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
# 慢查询日志
slowlog-log-slower-than 10000
slowlog-max-len 128
# 安全设置
requirepass \$REDIS_PASSWORD
EOF
fi
echo -e \"\${GREEN}[5/7] 创建Sentinel配置...\${NC}\"
echo \"使用集群名称: \$CLUSTER_NAME 和主节点IP: \$MASTER_IP\"
cat > \${REDIS_CONFIG_DIR}/sentinel.conf << EOF
# 基本设置
bind \$LOCAL_IP
port 26379
daemonize yes
supervised systemd
pidfile \${REDIS_RUN_DIR}/sentinel.pid
logfile \${REDIS_LOG_DIR}/sentinel.log
dir \${REDIS_DATA_DIR}
# Sentinel优化配置
sentinel monitor \$CLUSTER_NAME \$MASTER_IP 6379 2
sentinel auth-pass \$CLUSTER_NAME \$REDIS_PASSWORD
sentinel down-after-milliseconds \$CLUSTER_NAME 3000
sentinel failover-timeout \$CLUSTER_NAME 30000
sentinel parallel-syncs \$CLUSTER_NAME 1
# 性能调优
sentinel client-reconfig-script \$CLUSTER_NAME /usr/local/bin/redis-cli
sentinel deny-scripts-reconfig yes
sentinel resolve-hostnames no
# 连接池和并发设置
sentinel announce-ip \$LOCAL_IP
sentinel announce-port 26379
# Sentinel安全设置 (Redis 6.2+支持)
# 为Sentinel之间的通信添加安全认证
sentinel sentinel-pass \$REDIS_PASSWORD
EOF
# 确保权限正确
chown redis:redis \${REDIS_CONFIG_DIR}/redis.conf
chown redis:redis \${REDIS_CONFIG_DIR}/sentinel.conf
chmod 640 \${REDIS_CONFIG_DIR}/redis.conf
chmod 640 \${REDIS_CONFIG_DIR}/sentinel.conf
echo -e \"\${GREEN}[6/7] 创建Systemd服务...\${NC}\"
# Redis服务
cat > /etc/systemd/system/redis.service << EOF
[Unit]
Description=Redis In-Memory Data Store
After=network.target
[Service]
User=redis
Group=redis
Type=notify
ExecStart=/usr/local/bin/redis-server \${REDIS_CONFIG_DIR}/redis.conf
ExecStop=/usr/local/bin/redis-cli -h \$LOCAL_IP -p 6379 -a \$REDIS_PASSWORD shutdown
Restart=always
LimitNOFILE=65535
TimeoutStartSec=60
TimeoutStopSec=60
[Install]
WantedBy=multi-user.target
EOF
# Sentinel服务
cat > /etc/systemd/system/redis-sentinel.service << EOF
[Unit]
Description=Redis Sentinel
After=network.target redis.service
[Service]
User=redis
Group=redis
Type=notify
ExecStart=/usr/local/bin/redis-sentinel \${REDIS_CONFIG_DIR}/sentinel.conf
ExecStop=/usr/local/bin/redis-cli -h \$LOCAL_IP -p 26379 shutdown
Restart=always
LimitNOFILE=65535
TimeoutStartSec=60
TimeoutStopSec=60
[Install]
WantedBy=multi-user.target
EOF
echo -e \"\${GREEN}[7/7] 设置系统参数并启动服务...\${NC}\"
cat > /etc/sysctl.d/60-redis.conf << EOF
# Redis推荐系统设置
vm.overcommit_memory = 1
net.core.somaxconn = 65535
vm.swappiness = 0
net.ipv4.tcp_sack = 1
net.ipv4.tcp_timestamps = 1
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_congestion_control = cubic
net.ipv4.tcp_syncookies = 1
# 文件描述符优化
fs.file-max = 500000
EOF
# 应用sysctl设置
sysctl -p /etc/sysctl.d/60-redis.conf
# 创建文件描述符限制配置
cat > /etc/security/limits.d/redis.conf << EOF
redis soft nofile 65535
redis hard nofile 65535
EOF
# 禁用透明大页面
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag
# 确保重启后保持设置
cat > /etc/rc.local << EOF
#!/bin/bash
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag
exit 0
EOF
chmod +x /etc/rc.local
# 启动服务
systemctl daemon-reload
systemctl enable redis
systemctl enable redis-sentinel
systemctl start redis
# 如果是主节点,等待一段时间再启动Sentinel,确保主节点完全启动
if [ \"\$NODE_TYPE\" = \"master\" ]; then
echo -e \"\${YELLOW}等待主节点Redis启动完成...\${NC}\"
sleep 5
fi
# 检查Redis服务状态
echo -e \"\${YELLOW}检查Redis服务状态...\${NC}\"
systemctl status redis
if [ \$? -ne 0 ]; then
log_error \"Redis服务未能正常启动,请检查日志\"
exit 1
fi
# 启动Sentinel服务
systemctl start redis-sentinel
# 检查Sentinel服务状态
echo -e \"\${YELLOW}检查Sentinel服务状态...\${NC}\"
systemctl status redis-sentinel
if [ \$? -ne 0 ]; then
log_error \"Sentinel服务未能正常启动,请检查日志\"
exit 1
fi
# 验证部署
echo -e \"\${GREEN}验证部署...\${NC}\"
# 等待服务完全启动
sleep 3
# 检查Redis状态
echo -e \"\${YELLOW}Redis状态:\${NC}\"
redis-cli -h \$LOCAL_IP -a \$REDIS_PASSWORD info replication | grep -E \"role:|connected_slaves:|master_host:|master_port:\"
redis-cli -h \$LOCAL_IP -a \$REDIS_PASSWORD info memory | grep -E \"used_memory_human:|maxmemory_human:\"
# 检查Sentinel状态
echo -e \"\${YELLOW}Sentinel状态:\${NC}\"
redis-cli -h \$LOCAL_IP -p 26379 sentinel master \$CLUSTER_NAME | grep -E \"name|ip|port|flags|num-slaves|num-other-sentinels\"
echo -e \"\${GREEN}========================================\${NC}\"
echo -e \"\${GREEN}Redis 7 Sentinel节点部署完成!\${NC}\"
echo -e \"\${YELLOW}集群名称: \$CLUSTER_NAME\${NC}\"
if [ \"\$NODE_TYPE\" = \"master\" ]; then
echo -e \"\${YELLOW}此节点已配置为主节点\${NC}\"
else
echo -e \"\${YELLOW}此节点已配置为从节点,复制来自 \$MASTER_IP\${NC}\"
fi
echo -e \"\${GREEN}========================================\${NC}\"
"
}
# 测试SSH连接函数
test_ssh_connection() {
local host=$1
log "INFO" "测试到 $host 的SSH连接"
# 尝试SSH连接
sshpass -p "$SSH_PASS" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "$SSH_USER@$host" "echo 连接成功" &> /dev/null
if [ $? -eq 0 ]; then
log "INFO" "SSH连接到 $host 成功"
return 0
else
log "ERROR" "无法连接到 $host,请检查SSH设置或主机可达性"
return 1
fi
}
# 远程部署前检查所有SSH连接
log "INFO" "检查所有节点的SSH连接"
failed_hosts=()
# 检查主节点
if ! test_ssh_connection "$MASTER_IP"; then
failed_hosts+=("$MASTER_IP")
fi
# 检查从节点
for replica_ip in "${REPLICAS[@]}"; do
if ! test_ssh_connection "$replica_ip"; then
failed_hosts+=("$replica_ip")
fi
done
# 如果有SSH连接失败,询问是否继续
if [ ${#failed_hosts[@]} -gt 0 ]; then
log "ERROR" "以下节点的SSH连接测试失败:"
for host in "${failed_hosts[@]}"; do
echo " - $host (${NODE_NAMES[$host]})"
done
read -p "是否继续部署过程? [y/N] " continue_deploy
if [[ ! $continue_deploy =~ ^[Yy]$ ]]; then
log "INFO" "部署被用户取消"
exit 1
fi
log "WARN" "用户选择继续部署,尽管有SSH连接问题"
fi
# 检查系统资源
check_system_resources() {
local host=$1
log "INFO" "检查 $host 的系统资源"
# 检查内存
remote_exec "$host" "free -m | awk '/Mem:/ {print \$2}'" "检查可用内存"
# 检查磁盘空间
remote_exec "$host" "df -h | grep '/'" "检查磁盘空间"
# 检查CPU负载
remote_exec "$host" "uptime | awk '{print \$(NF-2),\$(NF-1),\$NF}'" "检查CPU负载"
# 检查打开文件限制
remote_exec "$host" "ulimit -n" "检查打开文件限制"
}
# 部署主节点
deploy_master() {
log "INFO" "开始部署主节点 $MASTER_IP (${NODE_NAMES[$MASTER_IP]})"
# 检查系统资源
check_system_resources "$MASTER_IP"
# 准备主节点脚本
master_script=$(prepare_redis_script "master" "$MASTER_IP" "$MASTER_IP" "$CLUSTER_NAME")
# 执行主节点部署
if ! remote_exec_script "$MASTER_IP" "$master_script" "部署Redis主节点"; then
log "ERROR" "主节点部署失败,终止进程"
exit 1
fi
log "INFO" "主节点部署完成"
# 给主节点一些启动时间
log "INFO" "等待主节点完全启动 (10秒)..."
sleep 10
}
# 部署从节点
deploy_replica() {
local replica_ip=$1
local replica_num=$2
log "INFO" "开始部署从节点$replica_num $replica_ip (${NODE_NAMES[$replica_ip]})"
# 检查系统资源
check_system_resources "$replica_ip"
# 准备从节点脚本
replica_script=$(prepare_redis_script "replica" "$replica_ip" "$MASTER_IP" "$CLUSTER_NAME")
# 执行从节点部署
if ! remote_exec_script "$replica_ip" "$replica_script" "部署Redis从节点$replica_num"; then
log "ERROR" "从节点$replica_num部署失败,但继续处理其他节点"
else
log "INFO" "从节点$replica_num部署完成"
fi
}
# 检查主节点状态
check_master_status() {
log "INFO" "检查主节点状态"
local cmd="redis-cli -h $MASTER_IP -a $REDIS_PASS info replication | grep role"
if ! remote_exec "$MASTER_IP" "$cmd" "检查主节点角色"; then
log "ERROR" "无法检查主节点状态"
return 1
fi
# 检查内存使用情况
local cmd_mem="redis-cli -h $MASTER_IP -a $REDIS_PASS info memory | grep -E 'used_memory_human:|maxmemory_human:'"
remote_exec "$MASTER_IP" "$cmd_mem" "检查主节点内存使用情况"
return 0
}
# 检查从节点状态
check_replica_status() {
local replica_ip=$1
local replica_num=$2
log "INFO" "检查从节点$replica_num状态"
local cmd="redis-cli -h $replica_ip -a $REDIS_PASS info replication | grep -E 'role:|master_host:'"
if ! remote_exec "$replica_ip" "$cmd" "检查从节点$replica_num角色和主节点"; then
log "ERROR" "无法检查从节点$replica_num状态"
return 1
fi
# 检查内存使用情况
local cmd_mem="redis-cli -h $replica_ip -a $REDIS_PASS info memory | grep -E 'used_memory_human:|maxmemory_human:'"
remote_exec "$replica_ip" "$cmd_mem" "检查从节点$replica_num内存使用情况"
return 0
}
# 检查Sentinel状态
check_sentinel_status() {
local host=$1
local desc=$2
log "INFO" "检查$desc上的Sentinel状态"
local cmd="redis-cli -h $host -p 26379 sentinel master $CLUSTER_NAME | grep -E 'name|ip|port|num-slaves|num-other-sentinels|quorum'"
if ! remote_exec "$host" "$cmd" "检查$desc上的Sentinel状态"; then
log "ERROR" "无法检查$desc上的Sentinel状态"
return 1
fi
# 检查Sentinel的健康状态
local cmd_health="redis-cli -h $host -p 26379 sentinel ckquorum $CLUSTER_NAME"
remote_exec "$host" "$cmd_health" "检查$desc上的Sentinel健康状态"
return 0
}
# 测试集群功能
test_cluster_functionality() {
log "INFO" "测试集群功能"
# 在主节点上写入测试值
remote_exec "$MASTER_IP" "redis-cli -h $MASTER_IP -a $REDIS_PASS set test_key test_value" "写入测试"
# 从从节点读取测试值
for replica_ip in "${REPLICAS[@]}"; do
remote_exec "$replica_ip" "redis-cli -h $replica_ip -a $REDIS_PASS get test_key" "读取测试"
done
# 测试故障转移(可选)
log "WARN" "是否测试故障转移?这将重启主节点Redis实例。[y/N]"
read -p "按[Enter]跳过或输入'y'继续: " test_failover
if [[ "$test_failover" == "y" ]]; then
remote_exec "$MASTER_IP" "systemctl restart redis" "测试故障转移"
sleep 10
check_sentinel_status "${REPLICAS[0]}" "故障转移后的Sentinel"
fi
}
# 创建示例hosts文件(如果用户没有提供)
create_sample_hosts_file() {
local sample_file="hosts_sample.txt"
if [ ! -f "$sample_file" ]; then
log "INFO" "创建示例hosts文件: $sample_file"
cat > "$sample_file" << EOF
# IP地址 主机名 角色 备注
# 请根据实际情况修改以下内容
192.168.1.101 redis-node1 master cluster_name=mycluster,password=mypassword
192.168.1.102 redis-node2 replica cluster_name=mycluster,password=mypassword
192.168.1.103 redis-node3 replica cluster_name=mycluster,password=mypassword
EOF
log "INFO" "请编辑示例文件 $sample_file 然后重新运行脚本"
exit 0
fi
}
# 开始部署过程
# 如果hosts文件不存在或为空,创建示例文件
if [ ! -s "$HOSTS_FILE" ]; then
create_sample_hosts_file
fi
log "INFO" "开始Redis Sentinel集群部署"
# 1. 部署主节点
deploy_master
# 2. 检查主节点状态
if ! check_master_status; then
log "ERROR" "主节点验证失败,但继续部署过程"
fi
# 3. 并行部署从节点
log "INFO" "开始并行部署从节点"
pids=()
for i in "${!REPLICAS[@]}"; do
log "INFO" "启动从节点${REPLICAS[$i]}的部署进程"
(deploy_replica "${REPLICAS[$i]}" "$((i+1))") &
pids+=($!)
done
# 等待所有从节点部署完成
for pid in "${pids[@]}"; do
wait $pid
log "INFO" "从节点部署进程 $pid 已完成"
done
# 4. 检查各节点状态
log "INFO" "所有节点已部署,开始检查集群状态"
# 检查主节点状态
check_master_status
# 检查从节点状态
for i in "${!REPLICAS[@]}"; do
check_replica_status "${REPLICAS[$i]}" "$((i+1))"
done
# 检查Sentinel状态
check_sentinel_status "$MASTER_IP" "主节点 (${NODE_NAMES[$MASTER_IP]})"
for i in "${!REPLICAS[@]}"; do
check_sentinel_status "${REPLICAS[$i]}" "从节点$((i+1)) (${NODE_NAMES[${REPLICAS[$i]}]})"
done
# 测试集群功能
test_cluster_functionality
log "INFO" "==========================================="
log "INFO" "Redis 7 Sentinel集群部署已完成"
log "INFO" "集群名称: $CLUSTER_NAME"
log "INFO" "主节点: $MASTER_IP (${NODE_NAMES[$MASTER_IP]})"
for i in "${!REPLICAS[@]}"; do
log "INFO" "从节点$((i+1)): ${REPLICAS[$i]} (${NODE_NAMES[${REPLICAS[$i]}]})"
done
log "INFO" "Redis端口: 6379"
log "INFO" "Sentinel端口: 26379"
log "INFO" "详细日志请查看: $LOG_FILE"
log "INFO" "==========================================="
使用教程
[root@vms76 ~]# chmod +x depoy-redis.sh
[root@vms76 ~]# ./depoy-redis.sh
[2025-04-28 21:19:26] [ERROR] 参数不足或过多
用法: ./depoy-redis.sh [hosts文件路径] [SSH用户名] [SSH密码] [Redis密码(可选)]
参数说明:
hosts文件路径 - 包含节点信息的hosts文件
SSH用户名 - 用于SSH连接的用户名
SSH密码 - 用于SSH连接的密码
Redis密码 - Redis的认证密码(可选,也可以在hosts文件中指定)
hosts文件格式示例:
# IP地址 主机名 角色 备注
192.168.1.101 redis-node1 master cluster_name=mycluster,password=mypassword
192.168.1.102 redis-node2 replica cluster_name=mycluster,password=mypassword
192.168.1.103 redis-node3 replica cluster_name=mycluster,password=mypassword
示例:
./depoy-redis.sh hosts.txt root password123
测试
import redis
from redis.sentinel import Sentinel
import time
import sys
def main():
# 配置参数,实际使用时替换为您的值
master_name = "mycluster"
password = "mypassword"
# 添加Sentinel节点信息
sentinel_hosts = [
('192.168.26.76', 26379),
('192.168.26.77', 26379),
('192.168.26.78', 26379)
]
print("正在连接到Redis Sentinel集群...")
# 创建Sentinel连接
sentinel = Sentinel(sentinel_hosts, socket_timeout=1.0,password=password)
counter = 0
try:
while True:
counter += 1
key = f"test_key_{counter}"
value = f"test_value_{int(time.time())}"
try:
# 获取当前主节点的连接
master = sentinel.master_for(master_name, socket_timeout=1.0, password=password)
# 获取当前主节点信息
master_info = sentinel.discover_master(master_name)
current_master = f"{master_info[0]}:{master_info[1]}"
# 写入数据
result = master.set(key, value)
# 打印操作结果
print(f"[{counter}] 成功写入到 {current_master}, 结果: {result}, key: {key}")
# 为了观察,每次写入后休眠1秒
time.sleep(1)
except Exception as e:
print(f"[{counter}] 写入失败: {e}", file=sys.stderr)
# 出现异常时等待短暂时间,避免无限循环消耗资源
time.sleep(3)
# 每10次操作后显示当前状态
if counter % 10 == 0:
try:
master_info = sentinel.discover_master(master_name)
current_master = f"{master_info[0]}:{master_info[1]}"
print("\n====== 当前状态 ======")
print(f"已执行操作: {counter}")
print(f"当前主节点: {current_master}")
print("======================\n")
except Exception as e:
print(f"获取主节点信息失败: {e}", file=sys.stderr)
except KeyboardInterrupt:
print("\n程序被用户中断")
except Exception as e:
print(f"发生异常: {e}", file=sys.stderr)
if __name__ == "__main__":
main()
停止master服务看会故障转移就可以了