shell脚本部署redis哨兵

161 阅读11分钟

redis脚本

#!/bin/bash
#
# 优化版Redis 7 Sentinel集群远程部署脚本 (CentOS 7)
# 从单一部署机器上部署整个集群,根据hosts文件确定节点角色和配置
#
# 使用方法:
#   ./deploy_redis_sentinel.sh [hosts文件路径] [SSH用户名] [SSH密码] [Redis密码(可选)]
#
# 示例:
#   ./deploy_redis_sentinel.sh hosts.txt root password123
#
# hosts文件格式示例:
# -----------------------
# # IP地址        主机名        角色        备注
# 192.168.1.101  redis-node1  master     cluster_name=mycluster,password=mypassword
# 192.168.1.102  redis-node2  replica    cluster_name=mycluster,password=mypassword
# 192.168.1.103  redis-node3  replica    cluster_name=mycluster,password=mypassword
# -----------------------
#

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
NC='\033[0m' # 无颜色

# 日志文件
LOG_DIR="logs"
LOG_FILE="${LOG_DIR}/redis_deploy_$(date +%Y%m%d_%H%M%S).log"

# 版本和路径设置
REDIS_VERSION="7.0.15"
REDIS_DOWNLOAD_URL="https://download.redis.io/releases/redis-${REDIS_VERSION}.tar.gz"
REDIS_CONFIG_DIR="/etc/redis"
REDIS_DATA_DIR="/var/lib/redis"
REDIS_LOG_DIR="/var/log/redis"
REDIS_RUN_DIR="/var/run/redis"

# 创建日志目录
mkdir -p $LOG_DIR

# 日志函数
log() {
    local level=$1
    local message=$2
    local timestamp=$(date +"%Y-%m-%d %H:%M:%S")

    case $level in
        "INFO")
            local color=$GREEN
            ;;
        "WARN")
            local color=$YELLOW
            ;;
        "ERROR")
            local color=$RED
            ;;
        "CMD")
            local color=$BLUE
            ;;
        *)
            local color=$NC
            ;;
    esac

    echo -e "${color}[$timestamp] [$level] $message${NC}" | tee -a $LOG_FILE
}

# 检查命令执行状态
check_status() {
    local status=$1
    local command=$2
    local host=$3

    if [ $status -eq 0 ]; then
        log "INFO" "在 $host 上执行命令成功: $command"
        return 0
    else
        log "ERROR" "在 $host 上执行命令失败: $command"
        return 1
    fi
}

# 显示帮助信息
show_help() {
    echo "用法: $0 [hosts文件路径] [SSH用户名] [SSH密码] [Redis密码(可选)]"
    echo
    echo "参数说明:"
    echo "  hosts文件路径    - 包含节点信息的hosts文件"
    echo "  SSH用户名        - 用于SSH连接的用户名"
    echo "  SSH密码          - 用于SSH连接的密码"
    echo "  Redis密码        - Redis的认证密码(可选,也可以在hosts文件中指定)"
    echo
    echo "hosts文件格式示例:"
    echo "# IP地址        主机名        角色        备注"
    echo "192.168.1.101  redis-node1  master     cluster_name=mycluster,password=mypassword"
    echo "192.168.1.102  redis-node2  replica    cluster_name=mycluster,password=mypassword"
    echo "192.168.1.103  redis-node3  replica    cluster_name=mycluster,password=mypassword"
    echo
    echo "示例:"
    echo "  $0 hosts.txt root password123"
    exit 1
}

# 检查参数
if [ "$#" -lt 3 ] || [ "$#" -gt 4 ]; then
    log "ERROR" "参数不足或过多"
    show_help
fi

# 获取参数
HOSTS_FILE=$1
SSH_USER=$2
SSH_PASS=$3
CMD_REDIS_PASS=$4  # 命令行参数中的Redis密码,如果hosts文件中没有指定则使用此值

# 检查hosts文件是否存在
if [ ! -f "$HOSTS_FILE" ]; then
    log "ERROR" "hosts文件不存在: $HOSTS_FILE"
    exit 1
fi

# 检查sshpass是否安装
if ! command -v sshpass &> /dev/null; then
    log "ERROR" "sshpass 未安装,请先安装sshpass"
    log "INFO" "对于CentOS/RHEL: yum install -y sshpass"
    log "INFO" "对于Ubuntu/Debian: apt-get install -y sshpass"
    exit 1
fi

# 解析hosts文件
declare -A NODE_ROLES
declare -A NODE_NAMES
declare -A NODE_NOTES
MASTER_IP=""
REPLICAS=()
CLUSTER_NAME=""
REDIS_PASS=""

log "INFO" "解析hosts文件: $HOSTS_FILE"
while IFS= read -r line; do
    # 忽略注释行和空行
    if [[ $line =~ ^# ]] || [[ -z $line ]]; then
        continue
    fi

    # 解析行内容: IP 主机名 角色 备注
    read -r IP HOSTNAME ROLE NOTES <<< "$(echo $line | awk '{print $1, $2, $3, $4}')"

    # 验证IP地址格式
    if [[ ! $IP =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
        log "ERROR" "IP地址格式无效: $IP"
        continue
    fi

    # 保存节点信息
    NODE_ROLES[$IP]=$ROLE
    NODE_NAMES[$IP]=$HOSTNAME
    NODE_NOTES[$IP]=$NOTES

    # 从备注中提取集群名称和密码(如果有)
    if [[ -n "$NOTES" ]]; then
        # 修正:提取集群名称 - 使用更严格的边界匹配
        if [[ $NOTES =~ (^|[,[:space:]])cluster_name=([^,[:space:]]+) ]]; then
            node_cluster_name="${BASH_REMATCH[2]}"  # 移除local关键字
            if [[ -z "$CLUSTER_NAME" ]]; then
                CLUSTER_NAME="$node_cluster_name"
                log "INFO" "从hosts文件中提取集群名称: $CLUSTER_NAME"
            elif [[ "$CLUSTER_NAME" != "$node_cluster_name" ]]; then
                log "WARN" "节点 $IP 的集群名称 ($node_cluster_name) 与之前提取的集群名称 ($CLUSTER_NAME) 不一致"
            fi
        fi

        # 修正:提取密码 - 使用更严格的边界匹配
        if [[ $NOTES =~ (^|[,[:space:]])password=([^,[:space:]]+) ]]; then
            node_password="${BASH_REMATCH[2]}"  # 移除local关键字
            if [[ -z "$REDIS_PASS" ]]; then
                REDIS_PASS="$node_password"
                log "INFO" "从hosts文件中提取Redis密码"
            elif [[ "$REDIS_PASS" != "$node_password" ]]; then
                log "WARN" "节点 $IP 的Redis密码与之前提取的密码不一致"
            fi
        fi
    fi

    # 根据角色分类
    if [[ $ROLE == "master" ]]; then
        if [[ -n $MASTER_IP ]]; then
            log "WARN" "检测到多个主节点,将使用最后一个: $IP"
        fi
        MASTER_IP=$IP
    elif [[ $ROLE == "replica" ]]; then
        REPLICAS+=($IP)
    else
        log "WARN" "未知的节点角色: $ROLE, IP: $IP"
    fi

    log "INFO" "发现节点: $IP, 主机名: $HOSTNAME, 角色: $ROLE, 备注: $NOTES"
done < "$HOSTS_FILE"

# 调试信息
echo "DEBUG: CLUSTER_NAME=$CLUSTER_NAME, REDIS_PASS=$REDIS_PASS"

# 设置默认集群名称如果未在hosts文件中指定
if [[ -z "$CLUSTER_NAME" ]]; then
    CLUSTER_NAME="mymaster"
    log "INFO" "未在hosts文件中指定集群名称,使用默认值: $CLUSTER_NAME"
fi

# 设置Redis密码优先级:hosts文件 > 命令行参数
if [[ -z "$REDIS_PASS" ]]; then
    if [[ -n "$CMD_REDIS_PASS" ]]; then
        REDIS_PASS="$CMD_REDIS_PASS"
        log "INFO" "使用命令行参数中的Redis密码"
    else
        log "ERROR" "未指定Redis密码,请在hosts文件或命令行参数中指定"
        exit 1
    fi
fi

# 验证解析结果
if [[ -z $MASTER_IP ]]; then
    log "ERROR" "未找到主节点,请检查hosts文件"
    exit 1
fi

if [[ ${#REPLICAS[@]} -eq 0 ]]; then
    log "WARN" "未找到从节点,将只部署主节点"
fi

# 显示配置信息
log "INFO" "======================================"
log "INFO" "Redis 7 Sentinel集群远程部署脚本"
log "INFO" "======================================"
log "INFO" "Redis版本: $REDIS_VERSION"
log "INFO" "集群名称: $CLUSTER_NAME"
log "INFO" "主节点IP: $MASTER_IP (${NODE_NAMES[$MASTER_IP]})"
for i in "${!REPLICAS[@]}"; do
    log "INFO" "从节点$((i+1)) IP: ${REPLICAS[$i]} (${NODE_NAMES[${REPLICAS[$i]}]})"
done
log "INFO" "SSH用户名: $SSH_USER"
log "INFO" "Redis密码已配置"
log "INFO" "日志文件: $LOG_FILE"
log "INFO" "======================================"

# 确认继续
read -p "以上配置正确吗? 按[Enter]继续或Ctrl+C取消..."

# 远程执行命令函数
remote_exec() {
    local host=$1
    local cmd=$2
    local desc=$3

    log "CMD" "[$host] $desc"
    log "CMD" "[$host] 执行: $cmd"

    # 使用sshpass执行远程命令
    output=$(sshpass -p "$SSH_PASS" ssh -o StrictHostKeyChecking=no "$SSH_USER@$host" "$cmd" 2>&1)
    status=$?

    # 检查命令状态
    if check_status $status "$desc" "$host"; then
        if [ -n "$output" ]; then
            log "INFO" "[$host] 输出: $output"
        fi
        return 0
    else
        if [ -n "$output" ]; then
            log "ERROR" "[$host] 错误输出: $output"
        fi
        return 1
    fi
}

# 远程执行脚本函数
remote_exec_script() {
    local host=$1
    local script=$2
    local desc=$3

    log "CMD" "[$host] $desc"

    # 使用sshpass和管道传输脚本并执行
    echo "$script" | sshpass -p "$SSH_PASS" ssh -o StrictHostKeyChecking=no "$SSH_USER@$host" "bash -s" 2>&1 | tee -a $LOG_FILE
    status=${PIPESTATUS[1]}

    # 检查命令状态
    check_status $status "$desc" "$host"
    return $?
}

# 准备Redis安装和配置脚本
prepare_redis_script() {
    local node_type=$1
    local local_ip=$2
    local master_ip=$3
    local cluster_name=$4

cat << 'EOT'
#!/bin/bash
# Redis安装和配置脚本

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
NC='\033[0m'

# 错误处理增强 - 设置严格的错误处理模式
set -e

# 日志函数
log_info() {
    echo -e "${GREEN}[INFO] $1${NC}"
}

log_warn() {
    echo -e "${YELLOW}[WARN] $1${NC}"
}

log_error() {
    echo -e "${RED}[ERROR] $1${NC}" >&2
}

# 改进的错误处理函数
error_exit() {
    log_error "$1"
    exit 1
}

# 检查非零退出状态
check_exit_status() {
    if [ $? -ne 0 ]; then
        error_exit "上一个命令失败,退出代码: $?"
    fi
}

# 尝试执行命令,如果失败则记录但继续
try_command() {
    if ! "$@"; then
        log_warn "命令失败但继续执行: $*"
    fi
}

# 清理函数 - 在脚本退出时执行
cleanup() {
    log_info "执行清理操作..."
    # 删除临时文件
    rm -f /tmp/redis-*.tar.gz 2>/dev/null || true
    log_info "清理完成"
}

# 设置退出陷阱
trap cleanup EXIT
EOT

# 结束heredoc并添加特定节点类型的配置
echo "
# 获取参数
NODE_TYPE=\"$node_type\"
LOCAL_IP=\"$local_ip\"
MASTER_IP=\"$master_ip\"
CLUSTER_NAME=\"$cluster_name\"
REDIS_PASSWORD=\"$REDIS_PASS\"
REDIS_VERSION=\"$REDIS_VERSION\"
REDIS_DOWNLOAD_URL=\"$REDIS_DOWNLOAD_URL\"
REDIS_CONFIG_DIR=\"$REDIS_CONFIG_DIR\"
REDIS_DATA_DIR=\"$REDIS_DATA_DIR\"
REDIS_LOG_DIR=\"$REDIS_LOG_DIR\"
REDIS_RUN_DIR=\"$REDIS_RUN_DIR\"

# 调试信息
log_info \"开始部署 Redis [\$NODE_TYPE] 节点\"
log_info \"集群名称: \$CLUSTER_NAME\"
log_info \"本机IP: \$LOCAL_IP\"
log_info \"主节点IP: \$MASTER_IP\"

# 计算Redis内存配置 - 使用系统内存的75%,但不使用百分比表示
TOTAL_MEM_KB=\$(free | grep Mem | awk '{print \$2}')
REDIS_MEM_KB=\$((\$TOTAL_MEM_KB * 75 / 100))

# 如果内存大于1GB,则以GB为单位,否则以MB为单位
if [ \$REDIS_MEM_KB -gt 1048576 ]; then
    REDIS_MEM=\$((\$REDIS_MEM_KB / 1048576))\"gb\"
else
    REDIS_MEM=\$((\$REDIS_MEM_KB / 1024))\"mb\"
fi

# 检查并优化文件描述符限制
MAX_OPEN_FILES=\$(ulimit -n)
if [ \$MAX_OPEN_FILES -lt 65535 ]; then
    log_warn \"系统文件描述符限制小于推荐值65535,当前值: \$MAX_OPEN_FILES\"
    # 尝试增加当前会话的限制
    ulimit -n 65535 2>/dev/null || log_warn \"无法增加文件描述符限制,将在稍后尝试系统级别配置\"
fi

log_info \"Redis内存配置: \$REDIS_MEM (系统内存的75%)\"
log_info \"系统文件描述符限制: \$(ulimit -n)\"

echo -e \"\${GREEN}[1/7] 安装前提条件...\${NC}\"
# 已移除系统更新步骤以加快部署速度

yum install -y epel-release
check_exit_status

yum groupinstall -y \"Development Tools\"
check_exit_status

yum install -y tcl wget systemd-devel
check_exit_status

echo -e \"\${GREEN}[2/7] 从源码安装Redis \${REDIS_VERSION}...\${NC}\"
cd /tmp
if [ ! -f redis-\${REDIS_VERSION}.tar.gz ]; then
    wget \${REDIS_DOWNLOAD_URL}
    check_exit_status
fi

tar xzf redis-\${REDIS_VERSION}.tar.gz
check_exit_status

cd redis-\${REDIS_VERSION}
make
check_exit_status

make install
check_exit_status

echo -e \"\${GREEN}[3/7] 创建Redis用户和目录...\${NC}\"
useradd -r -s /bin/false redis 2>/dev/null || echo \"用户已存在\"

mkdir -p \${REDIS_CONFIG_DIR}
mkdir -p \${REDIS_DATA_DIR}
mkdir -p \${REDIS_LOG_DIR}
mkdir -p \${REDIS_RUN_DIR}

chown -R redis:redis \${REDIS_DATA_DIR}
chown -R redis:redis \${REDIS_LOG_DIR}
chown -R redis:redis \${REDIS_RUN_DIR}
chown -R redis:redis \${REDIS_CONFIG_DIR}

echo -e \"\${GREEN}[4/7] 创建Redis配置...\${NC}\"

# 主节点Redis配置 - 添加masterauth配置
if [ \"\$NODE_TYPE\" = \"master\" ]; then
    cat > \${REDIS_CONFIG_DIR}/redis.conf << EOF
# 基本设置
bind \$LOCAL_IP
port 6379
protected-mode yes
daemonize yes
supervised systemd
pidfile \${REDIS_RUN_DIR}/redis.pid
loglevel notice
logfile \${REDIS_LOG_DIR}/redis.log
dir \${REDIS_DATA_DIR}

# 性能优化设置
databases 16
maxmemory \$REDIS_MEM
maxmemory-policy volatile-lru
tcp-backlog 511
tcp-keepalive 60
timeout 0
io-threads 4
io-threads-do-reads yes

# 连接池优化
maxclients 10000
min-replicas-to-write 1
min-replicas-max-lag 10

# 持久化设置
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
dbfilename dump.rdb
appendonly yes
appendfilename \"appendonly.aof\"
appendfsync everysec
no-appendfsync-on-rewrite yes
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb

# 慢查询日志
slowlog-log-slower-than 10000
slowlog-max-len 128

# 安全设置
requirepass \$REDIS_PASSWORD
# 添加主节点认证密码配置
masterauth \$REDIS_PASSWORD
EOF

# 从节点Redis配置
else
    cat > \${REDIS_CONFIG_DIR}/redis.conf << EOF
# 基本设置
bind \$LOCAL_IP
port 6379
protected-mode yes
daemonize yes
supervised systemd
pidfile \${REDIS_RUN_DIR}/redis.pid
loglevel notice
logfile \${REDIS_LOG_DIR}/redis.log
dir \${REDIS_DATA_DIR}

# 复制设置
replicaof \$MASTER_IP 6379
masterauth \$REDIS_PASSWORD
replica-serve-stale-data yes
replica-read-only yes
replica-priority 100

# 性能优化设置
databases 16
maxmemory \$REDIS_MEM
maxmemory-policy volatile-lru
tcp-backlog 511
tcp-keepalive 60
timeout 0
io-threads 4
io-threads-do-reads yes

# 连接池优化
maxclients 10000

# 持久化设置
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
dbfilename dump.rdb
appendonly yes
appendfilename \"appendonly.aof\"
appendfsync everysec
no-appendfsync-on-rewrite yes
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb

# 慢查询日志
slowlog-log-slower-than 10000
slowlog-max-len 128

# 安全设置
requirepass \$REDIS_PASSWORD
EOF
fi

echo -e \"\${GREEN}[5/7] 创建Sentinel配置...\${NC}\"
echo \"使用集群名称: \$CLUSTER_NAME 和主节点IP: \$MASTER_IP\"

cat > \${REDIS_CONFIG_DIR}/sentinel.conf << EOF
# 基本设置
bind \$LOCAL_IP
port 26379
daemonize yes
supervised systemd
pidfile \${REDIS_RUN_DIR}/sentinel.pid
logfile \${REDIS_LOG_DIR}/sentinel.log
dir \${REDIS_DATA_DIR}

# Sentinel优化配置
sentinel monitor \$CLUSTER_NAME \$MASTER_IP 6379 2
sentinel auth-pass \$CLUSTER_NAME \$REDIS_PASSWORD
sentinel down-after-milliseconds \$CLUSTER_NAME 3000
sentinel failover-timeout \$CLUSTER_NAME 30000
sentinel parallel-syncs \$CLUSTER_NAME 1

# 性能调优
sentinel client-reconfig-script \$CLUSTER_NAME /usr/local/bin/redis-cli
sentinel deny-scripts-reconfig yes
sentinel resolve-hostnames no

# 连接池和并发设置
sentinel announce-ip \$LOCAL_IP
sentinel announce-port 26379

# Sentinel安全设置 (Redis 6.2+支持)
# 为Sentinel之间的通信添加安全认证
sentinel sentinel-pass \$REDIS_PASSWORD
EOF

# 确保权限正确
chown redis:redis \${REDIS_CONFIG_DIR}/redis.conf
chown redis:redis \${REDIS_CONFIG_DIR}/sentinel.conf
chmod 640 \${REDIS_CONFIG_DIR}/redis.conf
chmod 640 \${REDIS_CONFIG_DIR}/sentinel.conf

echo -e \"\${GREEN}[6/7] 创建Systemd服务...\${NC}\"

# Redis服务
cat > /etc/systemd/system/redis.service << EOF
[Unit]
Description=Redis In-Memory Data Store
After=network.target

[Service]
User=redis
Group=redis
Type=notify
ExecStart=/usr/local/bin/redis-server \${REDIS_CONFIG_DIR}/redis.conf
ExecStop=/usr/local/bin/redis-cli -h \$LOCAL_IP -p 6379 -a \$REDIS_PASSWORD shutdown
Restart=always
LimitNOFILE=65535
TimeoutStartSec=60
TimeoutStopSec=60

[Install]
WantedBy=multi-user.target
EOF

# Sentinel服务
cat > /etc/systemd/system/redis-sentinel.service << EOF
[Unit]
Description=Redis Sentinel
After=network.target redis.service

[Service]
User=redis
Group=redis
Type=notify
ExecStart=/usr/local/bin/redis-sentinel \${REDIS_CONFIG_DIR}/sentinel.conf
ExecStop=/usr/local/bin/redis-cli -h \$LOCAL_IP -p 26379 shutdown
Restart=always
LimitNOFILE=65535
TimeoutStartSec=60
TimeoutStopSec=60

[Install]
WantedBy=multi-user.target
EOF

echo -e \"\${GREEN}[7/7] 设置系统参数并启动服务...\${NC}\"
cat > /etc/sysctl.d/60-redis.conf << EOF
# Redis推荐系统设置
vm.overcommit_memory = 1
net.core.somaxconn = 65535
vm.swappiness = 0
net.ipv4.tcp_sack = 1
net.ipv4.tcp_timestamps = 1
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_congestion_control = cubic
net.ipv4.tcp_syncookies = 1
# 文件描述符优化
fs.file-max = 500000
EOF

# 应用sysctl设置
sysctl -p /etc/sysctl.d/60-redis.conf

# 创建文件描述符限制配置
cat > /etc/security/limits.d/redis.conf << EOF
redis soft nofile 65535
redis hard nofile 65535
EOF

# 禁用透明大页面
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag

# 确保重启后保持设置
cat > /etc/rc.local << EOF
#!/bin/bash
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag
exit 0
EOF
chmod +x /etc/rc.local

# 启动服务
systemctl daemon-reload
systemctl enable redis
systemctl enable redis-sentinel
systemctl start redis

# 如果是主节点,等待一段时间再启动Sentinel,确保主节点完全启动
if [ \"\$NODE_TYPE\" = \"master\" ]; then
    echo -e \"\${YELLOW}等待主节点Redis启动完成...\${NC}\"
    sleep 5
fi

# 检查Redis服务状态
echo -e \"\${YELLOW}检查Redis服务状态...\${NC}\"
systemctl status redis
if [ \$? -ne 0 ]; then
    log_error \"Redis服务未能正常启动,请检查日志\"
    exit 1
fi

# 启动Sentinel服务
systemctl start redis-sentinel

# 检查Sentinel服务状态
echo -e \"\${YELLOW}检查Sentinel服务状态...\${NC}\"
systemctl status redis-sentinel
if [ \$? -ne 0 ]; then
    log_error \"Sentinel服务未能正常启动,请检查日志\"
    exit 1
fi

# 验证部署
echo -e \"\${GREEN}验证部署...\${NC}\"

# 等待服务完全启动
sleep 3

# 检查Redis状态
echo -e \"\${YELLOW}Redis状态:\${NC}\"
redis-cli -h \$LOCAL_IP -a \$REDIS_PASSWORD info replication | grep -E \"role:|connected_slaves:|master_host:|master_port:\"
redis-cli -h \$LOCAL_IP -a \$REDIS_PASSWORD info memory | grep -E \"used_memory_human:|maxmemory_human:\"

# 检查Sentinel状态
echo -e \"\${YELLOW}Sentinel状态:\${NC}\"
redis-cli -h \$LOCAL_IP -p 26379 sentinel master \$CLUSTER_NAME | grep -E \"name|ip|port|flags|num-slaves|num-other-sentinels\"

echo -e \"\${GREEN}========================================\${NC}\"
echo -e \"\${GREEN}Redis 7 Sentinel节点部署完成!\${NC}\"
echo -e \"\${YELLOW}集群名称: \$CLUSTER_NAME\${NC}\"
if [ \"\$NODE_TYPE\" = \"master\" ]; then
    echo -e \"\${YELLOW}此节点已配置为主节点\${NC}\"
else
    echo -e \"\${YELLOW}此节点已配置为从节点,复制来自 \$MASTER_IP\${NC}\"
fi
echo -e \"\${GREEN}========================================\${NC}\"
"
}

# 测试SSH连接函数
test_ssh_connection() {
    local host=$1

    log "INFO" "测试到 $host 的SSH连接"

    # 尝试SSH连接
    sshpass -p "$SSH_PASS" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "$SSH_USER@$host" "echo 连接成功" &> /dev/null
    if [ $? -eq 0 ]; then
        log "INFO" "SSH连接到 $host 成功"
        return 0
    else
        log "ERROR" "无法连接到 $host,请检查SSH设置或主机可达性"
        return 1
    fi
}

# 远程部署前检查所有SSH连接
log "INFO" "检查所有节点的SSH连接"
failed_hosts=()

# 检查主节点
if ! test_ssh_connection "$MASTER_IP"; then
    failed_hosts+=("$MASTER_IP")
fi

# 检查从节点
for replica_ip in "${REPLICAS[@]}"; do
    if ! test_ssh_connection "$replica_ip"; then
        failed_hosts+=("$replica_ip")
    fi
done

# 如果有SSH连接失败,询问是否继续
if [ ${#failed_hosts[@]} -gt 0 ]; then
    log "ERROR" "以下节点的SSH连接测试失败:"
    for host in "${failed_hosts[@]}"; do
        echo "  - $host (${NODE_NAMES[$host]})"
    done

    read -p "是否继续部署过程? [y/N] " continue_deploy
    if [[ ! $continue_deploy =~ ^[Yy]$ ]]; then
        log "INFO" "部署被用户取消"
        exit 1
    fi

    log "WARN" "用户选择继续部署,尽管有SSH连接问题"
fi

# 检查系统资源
check_system_resources() {
    local host=$1
    log "INFO" "检查 $host 的系统资源"

    # 检查内存
    remote_exec "$host" "free -m | awk '/Mem:/ {print \$2}'" "检查可用内存"

    # 检查磁盘空间
    remote_exec "$host" "df -h | grep '/'" "检查磁盘空间"

    # 检查CPU负载
    remote_exec "$host" "uptime | awk '{print \$(NF-2),\$(NF-1),\$NF}'" "检查CPU负载"

    # 检查打开文件限制
    remote_exec "$host" "ulimit -n" "检查打开文件限制"
}

# 部署主节点
deploy_master() {
    log "INFO" "开始部署主节点 $MASTER_IP (${NODE_NAMES[$MASTER_IP]})"

    # 检查系统资源
    check_system_resources "$MASTER_IP"

    # 准备主节点脚本
    master_script=$(prepare_redis_script "master" "$MASTER_IP" "$MASTER_IP" "$CLUSTER_NAME")

    # 执行主节点部署
    if ! remote_exec_script "$MASTER_IP" "$master_script" "部署Redis主节点"; then
        log "ERROR" "主节点部署失败,终止进程"
        exit 1
    fi

    log "INFO" "主节点部署完成"

    # 给主节点一些启动时间
    log "INFO" "等待主节点完全启动 (10秒)..."
    sleep 10
}

# 部署从节点
deploy_replica() {
    local replica_ip=$1
    local replica_num=$2

    log "INFO" "开始部署从节点$replica_num $replica_ip (${NODE_NAMES[$replica_ip]})"

    # 检查系统资源
    check_system_resources "$replica_ip"

    # 准备从节点脚本
    replica_script=$(prepare_redis_script "replica" "$replica_ip" "$MASTER_IP" "$CLUSTER_NAME")

    # 执行从节点部署
    if ! remote_exec_script "$replica_ip" "$replica_script" "部署Redis从节点$replica_num"; then
        log "ERROR" "从节点$replica_num部署失败,但继续处理其他节点"
    else
        log "INFO" "从节点$replica_num部署完成"
    fi
}

# 检查主节点状态
check_master_status() {
    log "INFO" "检查主节点状态"

    local cmd="redis-cli -h $MASTER_IP -a $REDIS_PASS info replication | grep role"
    if ! remote_exec "$MASTER_IP" "$cmd" "检查主节点角色"; then
        log "ERROR" "无法检查主节点状态"
        return 1
    fi

    # 检查内存使用情况
    local cmd_mem="redis-cli -h $MASTER_IP -a $REDIS_PASS info memory | grep -E 'used_memory_human:|maxmemory_human:'"
    remote_exec "$MASTER_IP" "$cmd_mem" "检查主节点内存使用情况"

    return 0
}

# 检查从节点状态
check_replica_status() {
    local replica_ip=$1
    local replica_num=$2

    log "INFO" "检查从节点$replica_num状态"

    local cmd="redis-cli -h $replica_ip -a $REDIS_PASS info replication | grep -E 'role:|master_host:'"
    if ! remote_exec "$replica_ip" "$cmd" "检查从节点$replica_num角色和主节点"; then
        log "ERROR" "无法检查从节点$replica_num状态"
        return 1
    fi

    # 检查内存使用情况
    local cmd_mem="redis-cli -h $replica_ip -a $REDIS_PASS info memory | grep -E 'used_memory_human:|maxmemory_human:'"
    remote_exec "$replica_ip" "$cmd_mem" "检查从节点$replica_num内存使用情况"

    return 0
}

# 检查Sentinel状态
check_sentinel_status() {
    local host=$1
    local desc=$2

    log "INFO" "检查$desc上的Sentinel状态"

    local cmd="redis-cli -h $host -p 26379 sentinel master $CLUSTER_NAME | grep -E 'name|ip|port|num-slaves|num-other-sentinels|quorum'"
    if ! remote_exec "$host" "$cmd" "检查$desc上的Sentinel状态"; then
        log "ERROR" "无法检查$desc上的Sentinel状态"
        return 1
    fi

    # 检查Sentinel的健康状态
    local cmd_health="redis-cli -h $host -p 26379 sentinel ckquorum $CLUSTER_NAME"
    remote_exec "$host" "$cmd_health" "检查$desc上的Sentinel健康状态"

    return 0
}

# 测试集群功能
test_cluster_functionality() {
    log "INFO" "测试集群功能"

    # 在主节点上写入测试值
    remote_exec "$MASTER_IP" "redis-cli -h $MASTER_IP -a $REDIS_PASS set test_key test_value" "写入测试"

    # 从从节点读取测试值
    for replica_ip in "${REPLICAS[@]}"; do
        remote_exec "$replica_ip" "redis-cli -h $replica_ip -a $REDIS_PASS get test_key" "读取测试"
    done

    # 测试故障转移(可选)
    log "WARN" "是否测试故障转移?这将重启主节点Redis实例。[y/N]"
    read -p "按[Enter]跳过或输入'y'继续: " test_failover
    if [[ "$test_failover" == "y" ]]; then
        remote_exec "$MASTER_IP" "systemctl restart redis" "测试故障转移"
        sleep 10
        check_sentinel_status "${REPLICAS[0]}" "故障转移后的Sentinel"
    fi
}

# 创建示例hosts文件(如果用户没有提供)
create_sample_hosts_file() {
    local sample_file="hosts_sample.txt"
    if [ ! -f "$sample_file" ]; then
        log "INFO" "创建示例hosts文件: $sample_file"
        cat > "$sample_file" << EOF
# IP地址        主机名        角色        备注
# 请根据实际情况修改以下内容
192.168.1.101  redis-node1  master     cluster_name=mycluster,password=mypassword
192.168.1.102  redis-node2  replica    cluster_name=mycluster,password=mypassword
192.168.1.103  redis-node3  replica    cluster_name=mycluster,password=mypassword
EOF
        log "INFO" "请编辑示例文件 $sample_file 然后重新运行脚本"
        exit 0
    fi
}

# 开始部署过程
# 如果hosts文件不存在或为空,创建示例文件
if [ ! -s "$HOSTS_FILE" ]; then
    create_sample_hosts_file
fi

log "INFO" "开始Redis Sentinel集群部署"

# 1. 部署主节点
deploy_master

# 2. 检查主节点状态
if ! check_master_status; then
    log "ERROR" "主节点验证失败,但继续部署过程"
fi

# 3. 并行部署从节点
log "INFO" "开始并行部署从节点"
pids=()
for i in "${!REPLICAS[@]}"; do
    log "INFO" "启动从节点${REPLICAS[$i]}的部署进程"
    (deploy_replica "${REPLICAS[$i]}" "$((i+1))") &
    pids+=($!)
done

# 等待所有从节点部署完成
for pid in "${pids[@]}"; do
    wait $pid
    log "INFO" "从节点部署进程 $pid 已完成"
done

# 4. 检查各节点状态
log "INFO" "所有节点已部署,开始检查集群状态"

# 检查主节点状态
check_master_status

# 检查从节点状态
for i in "${!REPLICAS[@]}"; do
    check_replica_status "${REPLICAS[$i]}" "$((i+1))"
done

# 检查Sentinel状态
check_sentinel_status "$MASTER_IP" "主节点 (${NODE_NAMES[$MASTER_IP]})"
for i in "${!REPLICAS[@]}"; do
    check_sentinel_status "${REPLICAS[$i]}" "从节点$((i+1)) (${NODE_NAMES[${REPLICAS[$i]}]})"
done

# 测试集群功能
test_cluster_functionality

log "INFO" "==========================================="
log "INFO" "Redis 7 Sentinel集群部署已完成"
log "INFO" "集群名称: $CLUSTER_NAME"
log "INFO" "主节点: $MASTER_IP (${NODE_NAMES[$MASTER_IP]})"
for i in "${!REPLICAS[@]}"; do
    log "INFO" "从节点$((i+1)): ${REPLICAS[$i]} (${NODE_NAMES[${REPLICAS[$i]}]})"
done
log "INFO" "Redis端口: 6379"
log "INFO" "Sentinel端口: 26379"
log "INFO" "详细日志请查看: $LOG_FILE"
log "INFO" "==========================================="

使用教程

[root@vms76 ~]# chmod +x depoy-redis.sh
[root@vms76 ~]# ./depoy-redis.sh
[2025-04-28 21:19:26] [ERROR] 参数不足或过多
用法: ./depoy-redis.sh [hosts文件路径] [SSH用户名] [SSH密码] [Redis密码(可选)]

参数说明:
  hosts文件路径    - 包含节点信息的hosts文件
  SSH用户名        - 用于SSH连接的用户名
  SSH密码          - 用于SSH连接的密码
  Redis密码        - Redis的认证密码(可选,也可以在hosts文件中指定)

hosts文件格式示例:
# IP地址        主机名        角色        备注
192.168.1.101  redis-node1  master     cluster_name=mycluster,password=mypassword
192.168.1.102  redis-node2  replica    cluster_name=mycluster,password=mypassword
192.168.1.103  redis-node3  replica    cluster_name=mycluster,password=mypassword

示例:
  ./depoy-redis.sh hosts.txt root password123

测试

import redis
from redis.sentinel import Sentinel
import time
import sys


def main():
    # 配置参数,实际使用时替换为您的值
    master_name = "mycluster"
    password = "mypassword"
    # 添加Sentinel节点信息
    sentinel_hosts = [
        ('192.168.26.76', 26379),
        ('192.168.26.77', 26379),
        ('192.168.26.78', 26379)
    ]

    print("正在连接到Redis Sentinel集群...")

    # 创建Sentinel连接

    sentinel = Sentinel(sentinel_hosts, socket_timeout=1.0,password=password)

    counter = 0

    try:
        while True:
            counter += 1
            key = f"test_key_{counter}"
            value = f"test_value_{int(time.time())}"

            try:
                # 获取当前主节点的连接
                master = sentinel.master_for(master_name, socket_timeout=1.0, password=password)

                # 获取当前主节点信息
                master_info = sentinel.discover_master(master_name)
                current_master = f"{master_info[0]}:{master_info[1]}"

                # 写入数据
                result = master.set(key, value)

                # 打印操作结果
                print(f"[{counter}] 成功写入到 {current_master}, 结果: {result}, key: {key}")

                # 为了观察,每次写入后休眠1秒
                time.sleep(1)

            except Exception as e:
                print(f"[{counter}] 写入失败: {e}", file=sys.stderr)

                # 出现异常时等待短暂时间,避免无限循环消耗资源
                time.sleep(3)

            # 每10次操作后显示当前状态
            if counter % 10 == 0:
                try:
                    master_info = sentinel.discover_master(master_name)
                    current_master = f"{master_info[0]}:{master_info[1]}"

                    print("\n====== 当前状态 ======")
                    print(f"已执行操作: {counter}")
                    print(f"当前主节点: {current_master}")
                    print("======================\n")
                except Exception as e:
                    print(f"获取主节点信息失败: {e}", file=sys.stderr)

    except KeyboardInterrupt:
        print("\n程序被用户中断")
    except Exception as e:
        print(f"发生异常: {e}", file=sys.stderr)


if __name__ == "__main__":
    main()

停止master服务看会故障转移就可以了

image.png