1. Ansible 基础概念与架构
1.1 什么是 Ansible?
Ansible 是一款开源的自动化运维工具,基于 Python 开发,实现了批量系统配置、批量程序部署、批量运行命令等功能。它的主要特点包括:
- 无代理架构:不需要在目标服务器上安装客户端
- 基于 SSH:使用标准的 SSH 协议进行通信
- YAML 语法:使用易读的 YAML 格式编写自动化脚本
- 幂等性:多次执行同一任务不会产生意外结果
1.2 Ansible 核心架构
graph TB
A[控制节点] --> B[Inventory<br/>主机清单]
A --> C[Playbooks<br/>剧本文件]
A --> D[Modules<br/>功能模块]
A --> E[Plugins<br/>插件系统]
A --> F[SSH连接]
F --> G[被管理节点1]
F --> H[被管理节点2]
F --> I[被管理节点N]
C --> J[Roles<br/>角色]
J --> K[Tasks<br/>任务]
J --> L[Handlers<br/>触发器]
J --> M[Templates<br/>模板]
J --> N[Variables<br/>变量]
style A fill:#1e3a5f,color:#ffffff
style B fill:#1e5f3a,color:#ffffff
style C fill:#1e5f3a,color:#ffffff
style D fill:#1e5f3a,color:#ffffff
style E fill:#1e5f3a,color:#ffffff
style F fill:#5f3a1e,color:#ffffff
style G fill:#5f1e1e,color:#ffffff
style H fill:#5f1e1e,color:#ffffff
style I fill:#5f1e1e,color:#ffffff
style J fill:#4a1e5f,color:#ffffff
style K fill:#4a1e5f,color:#ffffff
style L fill:#4a1e5f,color:#ffffff
style M fill:#4a1e5f,color:#ffffff
style N fill:#4a1e5f,color:#ffffff
2. 环境准备与安装
2.1 系统要求
创建环境检查脚本:check_environment.sh
#!/bin/bash
# Ansible 环境检查脚本
echo "=== Ansible 环境检查 ==="
# 检查操作系统
echo -e "\n1. 操作系统信息:"
if [ -f /etc/os-release ]; then
source /etc/os-release
echo " 发行版: $NAME"
echo " 版本: $VERSION"
else
echo " 无法确定操作系统"
fi
# 检查 Python 版本
echo -e "\n2. Python 环境:"
if command -v python3 &> /dev/null; then
python3 --version
else
echo " Python3 未安装"
fi
# 检查 SSH
echo -e "\n3. SSH 客户端:"
if command -v ssh &> /dev/null; then
ssh -V
else
echo " SSH 客户端未安装"
fi
# 检查磁盘空间
echo -e "\n4. 磁盘空间:"
df -h / | tail -1
# 检查内存
echo -e "\n5. 内存信息:"
free -h
echo -e "\n环境检查完成"
2.2 安装 Ansible
创建安装脚本:install_ansible.sh
#!/bin/bash
# Ansible 安装脚本
set -e
echo "=== 开始安装 Ansible ==="
# 检测操作系统并安装
if [ -f /etc/redhat-release ]; then
# CentOS/RHEL
echo "检测到 CentOS/RHEL 系统"
# 安装 EPEL 仓库
sudo yum install -y epel-release
# 更新系统
sudo yum update -y
# 安装 Ansible
sudo yum install -y ansible
elif [ -f /etc/debian_version ]; then
# Debian/Ubuntu
echo "检测到 Debian/Ubuntu 系统"
# 更新包索引
sudo apt-get update
# 安装软件包
sudo apt-get install -y software-properties-common
# 添加 Ansible PPA
sudo apt-add-repository --yes --update ppa:ansible/ansible
# 安装 Ansible
sudo apt-get install -y ansible
else
echo "不支持的 Linux 发行版"
exit 1
fi
# 验证安装
echo -e "\n验证安装:"
ansible --version
# 安装有用的插件和工具
echo -e "\n安装额外工具:"
if [ -f /etc/redhat-release ]; then
sudo yum install -y python3-pip sshpass
elif [ -f /etc/debian_version ]; then
sudo apt-get install -y python3-pip sshpass
fi
# 安装 Python Ansible 库
sudo pip3 install ansible-core
echo -e "\n=== Ansible 安装完成 ==="
2.3 配置 SSH 密钥认证
创建 SSH 配置脚本:setup_ssh_keys.sh
#!/bin/bash
# SSH 密钥配置脚本
set -e
echo "=== 配置 SSH 密钥认证 ==="
# 创建 SSH 密钥对
if [ ! -f ~/.ssh/id_rsa ]; then
echo "生成 SSH 密钥对..."
ssh-keygen -t rsa -b 4096 -N "" -f ~/.ssh/id_rsa
echo "SSH 密钥对生成完成"
else
echo "SSH 密钥对已存在"
fi
# 显示公钥
echo -e "\n公钥内容:"
cat ~/.ssh/id_rsa.pub
# 配置 SSH 客户端
echo -e "\n配置 SSH 客户端..."
mkdir -p ~/.ssh
chmod 700 ~/.ssh
# 创建 SSH 配置文件
cat > ~/.ssh/config << 'EOF'
Host *
ServerAliveInterval 60
ServerAliveCountMax 5
TCPKeepAlive yes
Compression yes
ControlMaster auto
ControlPath ~/.ssh/control:%h:%p:%r
ControlPersist 4h
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
EOF
chmod 600 ~/.ssh/config
echo -e "\n=== SSH 配置完成 ==="
echo "请将公钥内容添加到目标服务器的 ~/.ssh/authorized_keys 文件中"
3. Ansible 基础配置
3.1 配置 Ansible
创建主配置文件:ansible.cfg
# Ansible 主配置文件
[defaults]
# 库存文件路径
inventory = ./inventory/hosts
# 远程用户
remote_user = root
# 私钥文件路径
private_key_file = ~/.ssh/id_rsa
# 主机密钥检查
host_key_checking = False
# 临时目录
remote_tmp = ~/.ansible/tmp
# 本地临时目录
local_tmp = ~/.ansible/tmp
# 默认传输方式
transport = smart
# 超时设置
timeout = 30
# 连接重试次数
retries = 3
# 并行进程数
forks = 5
# 显示跳过任务的信息
display_skipped_hosts = True
# 错误时停止执行
any_errors_fatal = False
# 设置模块路径
library = /usr/share/ansible
# 角色路径
roles_path = ./roles:/usr/share/ansible/roles
# 日志配置
log_path = ./ansible.log
# 事实缓存
fact_caching = memory
fact_caching_timeout = 7200
# 禁用颜色(在某些终端中可能需要)
# nocolor = 1
# 禁用 cowsay(如果安装了)
nocows = 1
[ssh_connection]
# SSH 管道
pipelining = True
# 控制持久化
control_path = ~/.ssh/control%%h%%p%%r
# SCP 如果可能的话
scp_if_ssh = True
# SSH 参数
ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no
[inventory]
# 启用缓存
cache = True
cache_plugin = memory
[privilege_escalation]
# 提权配置
become = True
become_method = sudo
become_user = root
become_ask_pass = False
[persistent_connection]
# 持久连接配置
connect_timeout = 30
connect_retries = 3
command_timeout = 30
3.2 配置主机清单
创建主机清单文件:inventory/hosts
# Ansible 主机清单文件
# 所有服务器分组
[all_servers]
# Web 服务器分组
[web_servers]
web1.example.com ansible_host=192.168.1.10 ansible_user=root
web2.example.com ansible_host=192.168.1.11 ansible_user=root
web3.example.com ansible_host=192.168.1.12 ansible_user=root
# 数据库服务器分组
[db_servers]
db1.example.com ansible_host=192.168.1.20 ansible_user=root
db2.example.com ansible_host=192.168.1.21 ansible_user=root
# 缓存服务器分组
[cache_servers]
cache1.example.com ansible_host=192.168.1.30 ansible_user=root
cache2.example.com ansible_host=192.168.1.31 ansible_user=root
# 按环境分组
[production:children]
web_servers
db_servers
cache_servers
# 按地域分组
[beijing:children]
web_servers
db_servers
[shanghai:children]
cache_servers
# 定义变量组
[web_servers:vars]
http_port=80
https_port=443
max_clients=200
[db_servers:vars]
db_port=3306
db_name=app_db
[cache_servers:vars]
cache_port=6379
cache_maxmemory=1gb
# 动态库存示例(取消注释使用)
# [all:vars]
# ansible_connection=ssh
# ansible_ssh_common_args='-o StrictHostKeyChecking=no'
3.3 创建动态库存脚本
创建动态库存脚本:inventory/dynamic_inventory.py
#!/usr/bin/env python3
"""
Ansible 动态库存脚本
从外部数据源获取主机信息
"""
import json
import argparse
import requests
class DynamicInventory:
def __init__(self):
self.inventory = {
'web_servers': {
'hosts': [],
'vars': {
'http_port': 80,
'https_port': 443
}
},
'db_servers': {
'hosts': [],
'vars': {
'db_port': 3306
}
},
'_meta': {
'hostvars': {}
}
}
def get_hosts_from_api(self):
"""从 API 获取主机信息"""
try:
# 示例:从配置管理数据库获取主机信息
# response = requests.get('http://cmdb.example.com/api/hosts')
# hosts = response.json()
# 模拟数据
hosts = [
{
'name': 'web1.example.com',
'ip': '192.168.1.10',
'groups': ['web_servers', 'production'],
'vars': {
'ansible_user': 'root',
'server_role': 'frontend'
}
},
{
'name': 'db1.example.com',
'ip': '192.168.1.20',
'groups': ['db_servers', 'production'],
'vars': {
'ansible_user': 'root',
'server_role': 'database'
}
}
]
return hosts
except Exception as e:
print(f"Error fetching hosts from API: {e}", file=sys.stderr)
return []
def build_inventory(self):
"""构建库存数据结构"""
hosts = self.get_hosts_from_api()
for host in hosts:
hostname = host['name']
ip_address = host['ip']
# 添加到对应分组
for group in host['groups']:
if group not in self.inventory:
self.inventory[group] = {'hosts': []}
self.inventory[group]['hosts'].append(hostname)
# 添加主机变量
self.inventory['_meta']['hostvars'][hostname] = {
'ansible_host': ip_address,
**host['vars']
}
def get_inventory(self):
"""获取完整的库存数据"""
self.build_inventory()
return self.inventory
def get_host(self, hostname):
"""获取特定主机的变量"""
self.build_inventory()
return self.inventory['_meta']['hostvars'].get(hostname, {})
def main():
inventory = DynamicInventory()
parser = argparse.ArgumentParser(description='Ansible Dynamic Inventory')
parser.add_argument('--list', action='store_true', help='List all hosts')
parser.add_argument('--host', help='Get host variables')
args = parser.parse_args()
if args.list:
print(json.dumps(inventory.get_inventory(), indent=2))
elif args.host:
host_vars = inventory.get_host(args.host)
print(json.dumps(host_vars, indent=2))
else:
parser.print_help()
if __name__ == '__main__':
main()
给脚本添加执行权限:
chmod +x inventory/dynamic_inventory.py
4. Ansible Ad-Hoc 命令
4.1 基础 Ad-Hoc 命令示例
创建 Ad-Hoc 命令参考:ad_hoc_commands.sh
#!/bin/bash
# Ansible Ad-Hoc 命令示例脚本
echo "=== Ansible Ad-Hoc 命令示例 ==="
# 1. 测试连接
echo -e "\n1. 测试到所有主机的连接:"
ansible all -m ping
# 2. 检查主机事实
echo -e "\n2. 收集主机事实:"
ansible web_servers -m setup | head -20
# 3. 执行 shell 命令
echo -e "\n3. 在所有主机上执行命令:"
ansible all -m shell -a "uptime"
# 4. 文件管理
echo -e "\n4. 检查文件状态:"
ansible web_servers -m stat -a "path=/etc/nginx/nginx.conf"
# 5. 包管理
echo -e "\n5. 检查 Nginx 是否安装:"
ansible web_servers -m package -a "name=nginx state=present" --become
# 6. 服务管理
echo -e "\n6. 检查服务状态:"
ansible web_servers -m service -a "name=nginx state=started" --become
# 7. 文件传输
echo -e "\n7. 传输文件到远程主机:"
ansible web_servers -m copy -a "src=./files/nginx.conf dest=/tmp/nginx.conf mode=0644" --become
# 8. 创建目录
echo -e "\n8. 创建目录:"
ansible all -m file -a "path=/tmp/ansible_test state=directory mode=0755" --become
# 9. 用户管理
echo -e "\n9. 创建用户:"
ansible all -m user -a "name=ansible_user state=present" --become
# 10. 获取磁盘信息
echo -e "\n10. 检查磁盘使用情况:"
ansible all -m shell -a "df -h"
# 11. 并行执行
echo -e "\n11. 并行执行命令:"
ansible all -m shell -a "sleep 5 && echo 'Done'" -f 5
# 12. 限制执行主机
echo -e "\n12. 在特定主机上执行:"
ansible 'web1.example.com' -m shell -a "hostname"
# 13. 使用模式匹配
echo -e "\n13. 使用模式匹配主机:"
ansible 'web*' -m shell -a "date"
# 14. 事实过滤
echo -e "\n14. 获取特定事实:"
ansible all -m setup -a "filter=ansible_distribution*"
# 15. 异步执行
echo -e "\n15. 异步执行长时间任务:"
ansible all -B 3600 -P 0 -m shell -a "yum update -y" --become
echo -e "\n=== Ad-Hoc 命令示例完成 ==="
5. Ansible Playbook 基础
5.1 第一个 Playbook
创建基础 Playbook:playbooks/first_playbook.yml
---
# 第一个 Ansible Playbook
- name: 基础服务器配置
hosts: all
become: yes
gather_facts: yes
vars:
# 基础配置变量
timezone: "Asia/Shanghai"
admin_user: "ansible-admin"
admin_groups: "sudo"
tasks:
- name: 显示开始信息
debug:
msg: "开始配置服务器 {{ inventory_hostname }}"
- name: 更新 apt 缓存 (Ubuntu/Debian)
apt:
update_cache: yes
cache_valid_time: 3600
when: ansible_os_family == "Debian"
- name: 更新 yum 缓存 (CentOS/RHEL)
yum:
name: "*"
state: latest
when: ansible_os_family == "RedHat"
- name: 安装基础软件包
package:
name:
- curl
- wget
- vim
- htop
- git
- unzip
state: present
- name: 配置时区
timezone:
name: "{{ timezone }}"
- name: 创建管理员用户
user:
name: "{{ admin_user }}"
groups: "{{ admin_groups }}"
shell: /bin/bash
state: present
create_home: yes
- name: 配置 SSH 密钥认证
authorized_key:
user: "{{ admin_user }}"
state: present
key: "{{ lookup('file', '~/.ssh/id_rsa.pub') }}"
- name: 配置 sudo 权限
copy:
content: "{{ admin_user }} ALL=(ALL) NOPASSWD:ALL"
dest: "/etc/sudoers.d/{{ admin_user }}"
mode: 0440
- name: 显示完成信息
debug:
msg: "服务器 {{ inventory_hostname }} 配置完成"
handlers:
- name: 重启 sshd
service:
name: sshd
state: restarted
listen: "restart ssh service"
执行第一个 Playbook:
ansible-playbook playbooks/first_playbook.yml
5.2 多主机环境 Playbook
创建复杂 Playbook:playbooks/multi_host_setup.yml
---
# 多主机环境配置 Playbook
- name: 配置 Web 服务器
hosts: web_servers
become: yes
vars:
nginx_version: "1.18"
web_root: "/var/www/html"
server_name: "example.com"
tasks:
- name: 安装 Nginx
package:
name: nginx
state: present
- name: 创建网站目录
file:
path: "{{ web_root }}"
state: directory
mode: 0755
owner: www-data
group: www-data
- name: 部署网站文件
copy:
src: files/index.html
dest: "{{ web_root }}/index.html"
mode: 0644
owner: www-data
group: www-data
- name: 配置 Nginx
template:
src: templates/nginx.conf.j2
dest: /etc/nginx/sites-available/default
mode: 0644
notify: restart nginx
- name: 启用 Nginx 服务
service:
name: nginx
state: started
enabled: yes
- name: 配置防火墙
ufw:
rule: allow
port: "80"
proto: tcp
when: ansible_os_family == "Debian"
- name: 显示 Web 服务器信息
debug:
msg: "Web 服务器 {{ inventory_hostname }} 配置完成,访问 http://{{ ansible_host }}"
- name: 配置数据库服务器
hosts: db_servers
become: yes
vars:
mysql_root_password: "secure_password"
mysql_database: "app_db"
mysql_user: "app_user"
mysql_user_password: "user_password"
tasks:
- name: 安装 MySQL 服务器
package:
name: mysql-server
state: present
- name: 启动 MySQL 服务
service:
name: mysql
state: started
enabled: yes
- name: 设置 root 密码
mysql_user:
login_user: root
login_password: ""
name: root
password: "{{ mysql_root_password }}"
host: localhost
check_implicit_admin: yes
- name: 创建应用数据库
mysql_db:
login_user: root
login_password: "{{ mysql_root_password }}"
name: "{{ mysql_database }}"
state: present
- name: 创建应用用户
mysql_user:
login_user: root
login_password: "{{ mysql_root_password }}"
name: "{{ mysql_user }}"
password: "{{ mysql_user_password }}"
host: "%"
priv: "{{ mysql_database }}.*:ALL"
state: present
- name: 配置防火墙
ufw:
rule: allow
port: "3306"
proto: tcp
when: ansible_os_family == "Debian"
- name: 显示数据库信息
debug:
msg: |
数据库服务器 {{ inventory_hostname }} 配置完成
数据库: {{ mysql_database }}
用户: {{ mysql_user }}
- name: 配置缓存服务器
hosts: cache_servers
become: yes
vars:
redis_port: 6379
redis_bind: "0.0.0.0"
redis_maxmemory: "1gb"
tasks:
- name: 安装 Redis
package:
name: redis-server
state: present
- name: 配置 Redis
template:
src: templates/redis.conf.j2
dest: /etc/redis/redis.conf
mode: 0644
notify: restart redis
- name: 启动 Redis 服务
service:
name: redis-server
state: started
enabled: yes
- name: 配置防火墙
ufw:
rule: allow
port: "{{ redis_port }}"
proto: tcp
when: ansible_os_family == "Debian"
- name: 显示缓存服务器信息
debug:
msg: "Redis 服务器 {{ inventory_hostname }} 配置完成,端口 {{ redis_port }}"
handlers:
- name: restart nginx
service:
name: nginx
state: restarted
- name: restart redis
service:
name: redis-server
state: restarted
6. Ansible Roles 角色系统
6.1 创建基础角色结构
创建角色初始化脚本:scripts/create_roles.sh
#!/bin/bash
# 创建标准 Ansible 角色结构
ROLE_NAME=$1
if [ -z "$ROLE_NAME" ]; then
echo "使用方法: $0 <角色名>"
exit 1
fi
ROLE_PATH="roles/$ROLE_NAME"
echo "创建角色: $ROLE_NAME"
echo "路径: $ROLE_PATH"
# 创建标准目录结构
mkdir -p $ROLE_PATH/{tasks,handlers,templates,files,vars,defaults,meta,library,module_utils,lookup_plugins}
# 创建主要文件
touch $ROLE_PATH/tasks/main.yml
touch $ROLE_PATH/handlers/main.yml
touch $ROLE_PATH/vars/main.yml
touch $ROLE_PATH/defaults/main.yml
touch $ROLE_PATH/meta/main.yml
# 创建示例任务文件
cat > $ROLE_PATH/tasks/main.yml << 'EOF'
---
# tasks file for {{ role_name }}
- name: Include OS specific variables
include_vars: "{{ ansible_os_family }}.yml"
ignore_errors: yes
- name: Debug role start
debug:
msg: "Starting {{ role_name }} role execution"
# 在这里添加具体任务
EOF
# 创建处理器文件
cat > $ROLE_PATH/handlers/main.yml << 'EOF'
---
# handlers file for {{ role_name }}
- name: restart service
service:
name: "{{ service_name }}"
state: restarted
EOF
# 创建默认变量文件
cat > $ROLE_PATH/defaults/main.yml << 'EOF'
---
# defaults file for {{ role_name }}
# 服务配置
service_name: "{{ role_name }}"
service_port: 8080
service_user: "{{ role_name }}"
service_group: "{{ role_name }}"
# 安装配置
install_path: "/opt/{{ role_name }}"
config_path: "/etc/{{ role_name }}"
log_path: "/var/log/{{ role_name }}"
EOF
# 创建元数据文件
cat > $ROLE_PATH/meta/main.yml << 'EOF'
---
galaxy_info:
author: your_name
description: {{ role_name }} role
company: your_company
license: MIT
min_ansible_version: 2.9
platforms:
- name: Ubuntu
versions:
- focal
- bionic
- name: EL
versions:
- 7
- 8
galaxy_tags:
- system
- deployment
dependencies: []
EOF
echo "角色 $ROLE_NAME 创建完成"
echo "目录结构:"
tree $ROLE_PATH
6.2 创建 Nginx 角色
创建 Nginx 角色:roles/nginx/tasks/main.yml
---
# Nginx 角色任务文件
- name: 包含操作系统特定变量
include_vars: "{{ ansible_os_family }}.yml"
- name: 安装 Nginx
package:
name: "{{ nginx_package }}"
state: present
update_cache: yes
- name: 创建 Nginx 用户和组
user:
name: "{{ nginx_user }}"
group: "{{ nginx_group }}"
system: yes
create_home: no
state: present
- name: 创建网站根目录
file:
path: "{{ web_root }}"
state: directory
owner: "{{ nginx_user }}"
group: "{{ nginx_group }}"
mode: 0755
- name: 创建日志目录
file:
path: "{{ nginx_log_dir }}"
state: directory
owner: "{{ nginx_user }}"
group: "{{ nginx_group }}"
mode: 0755
- name: 配置 Nginx
template:
src: "nginx.conf.j2"
dest: "{{ nginx_conf_path }}/nginx.conf"
owner: root
group: root
mode: 0644
notify: reload nginx
- name: 配置虚拟主机
template:
src: "vhost.conf.j2"
dest: "{{ nginx_sites_available }}/{{ server_name }}.conf"
owner: root
group: root
mode: 0644
notify: reload nginx
- name: 启用虚拟主机
file:
src: "{{ nginx_sites_available }}/{{ server_name }}.conf"
dest: "{{ nginx_sites_enabled }}/{{ server_name }}.conf"
state: link
notify: reload nginx
- name: 部署默认页面
copy:
content: |
<!DOCTYPE html>
<html>
<head>
<title>Welcome to {{ server_name }}</title>
</head>
<body>
<h1>Welcome to {{ server_name }}</h1>
<p>Server: {{ inventory_hostname }}</p>
<p>This is the default page.</p>
</body>
</html>
dest: "{{ web_root }}/index.html"
owner: "{{ nginx_user }}"
group: "{{ nginx_group }}"
mode: 0644
- name: 启动并启用 Nginx 服务
service:
name: "{{ nginx_service }}"
state: started
enabled: yes
- name: 配置防火墙
firewalld:
port: "{{ http_port }}/tcp"
permanent: yes
state: enabled
immediate: yes
when: ansible_os_family == "RedHat"
- name: 配置 UFW (Ubuntu)
ufw:
rule: allow
port: "{{ http_port }}"
proto: tcp
when: ansible_os_family == "Debian"
创建 Nginx 角色默认变量:roles/nginx/defaults/main.yml
---
# Nginx 角色默认变量
# 服务配置
nginx_package: nginx
nginx_service: nginx
nginx_user: nginx
nginx_group: nginx
# 路径配置
web_root: "/var/www/html"
nginx_conf_path: "/etc/nginx"
nginx_log_dir: "/var/log/nginx"
nginx_sites_available: "/etc/nginx/sites-available"
nginx_sites_enabled: "/etc/nginx/sites-enabled"
# 服务器配置
server_name: "{{ inventory_hostname }}"
http_port: 80
https_port: 443
worker_processes: "{{ ansible_processor_vcpus | default(2) }}"
worker_connections: 1024
# 性能调优
keepalive_timeout: 65
client_max_body_size: "100m"
创建 Nginx 模板:roles/nginx/templates/nginx.conf.j2
# Nginx 主配置文件
user {{ nginx_user }} {{ nginx_group }};
worker_processes {{ worker_processes }};
pid /run/nginx.pid;
events {
worker_connections {{ worker_connections }};
multi_accept on;
use epoll;
}
http {
# 基础设置
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout {{ keepalive_timeout }};
types_hash_max_size 2048;
client_max_body_size {{ client_max_body_size }};
# MIME 类型
include /etc/nginx/mime.types;
default_type application/octet-stream;
# 日志格式
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log {{ nginx_log_dir }}/access.log main;
error_log {{ nginx_log_dir }}/error.log warn;
# Gzip 压缩
gzip on;
gzip_vary on;
gzip_min_length 1024;
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
# 虚拟主机
include /etc/nginx/conf.d/*.conf;
include /etc/nginx/sites-enabled/*;
}
6.3 使用角色的 Playbook
创建使用角色的 Playbook:playbooks/with_roles.yml
---
# 使用角色的 Playbook
- name: 配置 Web 服务器集群
hosts: web_servers
become: yes
roles:
- role: nginx
vars:
server_name: "web-{{ inventory_hostname }}.example.com"
http_port: 80
- role: common
tags: common
- role: security
tags: security
- name: 配置数据库集群
hosts: db_servers
become: yes
roles:
- role: mysql
vars:
mysql_root_password: "{{ vault_mysql_root_password }}"
mysql_databases:
- name: app_db
encoding: utf8mb4
mysql_users:
- name: app_user
password: "{{ vault_mysql_app_password }}"
priv: "app_db.*:ALL"
host: '%'
- role: common
- role: backup
pre_tasks:
- name: 检查磁盘空间
shell: df -h /var/lib/mysql
register: disk_space
changed_when: false
- name: 显示磁盘空间
debug:
var: disk_space.stdout_lines
post_tasks:
- name: 验证 MySQL 连接
mysql_query:
login_user: root
login_password: "{{ vault_mysql_root_password }}"
query: "SHOW DATABASES;"
register: mysql_databases
- name: 显示数据库列表
debug:
var: mysql_databases.results
- name: 配置负载均衡器
hosts: lb_servers
become: yes
roles:
- role: haproxy
vars:
haproxy_backend_servers: "{{ groups['web_servers'] }}"
handlers:
- name: restart haproxy
service:
name: haproxy
state: restarted
7. Ansible Vault 加密管理
7.1 创建加密管理脚本
创建 Vault 管理脚本:scripts/vault_manager.sh
#!/bin/bash
# Ansible Vault 管理脚本
VAULT_PASSWORD_FILE=".vault_pass"
SECRETS_DIR="secrets"
# 创建目录
mkdir -p $SECRETS_DIR
create_vault_password() {
if [ ! -f $VAULT_PASSWORD_FILE ]; then
echo "创建 Vault 密码文件..."
openssl rand -base64 32 > $VAULT_PASSWORD_FILE
chmod 600 $VAULT_PASSWORD_FILE
echo "Vault 密码文件已创建: $VAULT_PASSWORD_FILE"
else
echo "Vault 密码文件已存在"
fi
}
create_encrypted_vars() {
local env=$1
local var_file="$SECRETS_DIR/${env}_vars.yml"
local encrypted_file="$SECRETS_DIR/${env}_vars.yml.encrypted"
if [ ! -f $var_file ]; then
echo "创建 $env 环境变量文件..."
cat > $var_file << EOF
---
# $env 环境敏感变量
# 数据库密码
db_root_password: "change_me_123"
db_app_password: "change_me_456"
# API 密钥
api_secret_key: "your_api_secret_here"
api_access_token: "your_access_token_here"
# SSH 密钥
ssh_private_key: |
-----BEGIN PRIVATE KEY-----
Your private key here
-----END PRIVATE KEY-----
# 证书文件
ssl_certificate: |
-----BEGIN CERTIFICATE-----
Your certificate here
-----END CERTIFICATE-----
ssl_private_key: |
-----BEGIN PRIVATE KEY-----
Your SSL private key here
-----END PRIVATE KEY-----
EOF
echo "变量文件已创建: $var_file"
fi
# 加密文件
if [ ! -f $encrypted_file ]; then
ansible-vault encrypt --vault-password-file $VAULT_PASSWORD_FILE \
--output $encrypted_file $var_file
echo "加密文件已创建: $encrypted_file"
else
echo "加密文件已存在: $encrypted_file"
fi
}
edit_encrypted_file() {
local env=$1
local encrypted_file="$SECRETS_DIR/${env}_vars.yml.encrypted"
if [ -f $encrypted_file ]; then
ansible-vault edit --vault-password-file $VAULT_PASSWORD_FILE $encrypted_file
else
echo "加密文件不存在: $encrypted_file"
fi
}
view_encrypted_file() {
local env=$1
local encrypted_file="$SECRETS_DIR/${env}_vars.yml.encrypted"
if [ -f $encrypted_file ]; then
ansible-vault view --vault-password-file $VAULT_PASSWORD_FILE $encrypted_file
else
echo "加密文件不存在: $encrypted_file"
fi
}
case "$1" in
"init")
create_vault_password
create_encrypted_vars "production"
create_encrypted_vars "staging"
create_encrypted_vars "development"
;;
"edit")
edit_encrypted_file "$2"
;;
"view")
view_encrypted_file "$2"
;;
"create")
create_encrypted_vars "$2"
;;
*)
echo "使用方法: $0 {init|edit <env>|view <env>|create <env>}"
echo " init - 初始化 Vault 环境"
echo " edit - 编辑加密文件"
echo " view - 查看加密文件"
echo " create - 创建新的环境变量文件"
exit 1
;;
esac
7.2 使用加密变量的 Playbook
创建使用 Vault 的 Playbook:playbooks/with_vault.yml
---
# 使用 Ansible Vault 的 Playbook
- name: 安全部署应用
hosts: all
become: yes
vars_files:
- "secrets/{{ env }}_vars.yml.encrypted"
vars:
env: "production"
app_name: "my_secure_app"
deploy_user: "deployer"
tasks:
- name: 创建部署用户
user:
name: "{{ deploy_user }}"
state: present
shell: /bin/bash
groups: sudo
append: yes
- name: 部署 SSH 密钥
authorized_key:
user: "{{ deploy_user }}"
state: present
key: "{{ vault_ssh_public_key }}"
- name: 创建应用目录
file:
path: "/opt/{{ app_name }}"
state: directory
owner: "{{ deploy_user }}"
group: "{{ deploy_user }}"
mode: 0755
- name: 配置数据库连接
template:
src: "templates/database.yml.j2"
dest: "/opt/{{ app_name }}/config/database.yml"
owner: "{{ deploy_user }}"
group: "{{ deploy_user }}"
mode: 0600
- name: 配置环境变量
template:
src: "templates/.env.j2"
dest: "/opt/{{ app_name }}/.env"
owner: "{{ deploy_user }}"
group: "{{ deploy_user }}"
mode: 0600
- name: 部署 SSL 证书
copy:
content: "{{ vault_ssl_certificate }}"
dest: "/etc/ssl/certs/{{ app_name }}.crt"
mode: 0644
notify: reload nginx
- name: 部署 SSL 私钥
copy:
content: "{{ vault_ssl_private_key }}"
dest: "/etc/ssl/private/{{ app_name }}.key"
mode: 0600
notify: reload nginx
- name: 验证部署
command: "echo 'Deployment completed successfully'"
register: deployment_result
- name: 显示部署结果
debug:
msg: "应用 {{ app_name }} 在 {{ env }} 环境部署完成"
handlers:
- name: reload nginx
service:
name: nginx
state: reloaded
8. 高级功能和最佳实践
8.1 创建自定义模块
创建自定义模块:library/disk_usage.py
#!/usr/bin/env python3
from ansible.module_utils.basic import AnsibleModule
import shutil
def main():
# 定义模块参数
module_args = dict(
path=dict(type='str', required=True),
threshold=dict(type='int', default=80)
)
# 创建模块对象
module = AnsibleModule(
argument_spec=module_args,
supports_check_mode=True
)
# 获取参数
path = module.params['path']
threshold = module.params['threshold']
try:
# 获取磁盘使用情况
disk_usage = shutil.disk_usage(path)
total_gb = disk_usage.total / (1024**3)
used_gb = disk_usage.used / (1024**3)
free_gb = disk_usage.free / (1024**3)
usage_percent = (disk_usage.used / disk_usage.total) * 100
# 检查是否超过阈值
alert = usage_percent > threshold
# 返回结果
result = dict(
changed=False,
path=path,
total_gb=round(total_gb, 2),
used_gb=round(used_gb, 2),
free_gb=round(free_gb, 2),
usage_percent=round(usage_percent, 2),
threshold=threshold,
alert=alert,
message=f"Disk usage: {usage_percent:.2f}%"
)
module.exit_json(**result)
except Exception as e:
module.fail_json(msg=f"Failed to get disk usage: {str(e)}")
if __name__ == '__main__':
main()
8.2 创建自定义过滤器
创建自定义过滤器:filter_plugins/custom_filters.py
#!/usr/bin/env python3
class FilterModule(object):
"""自定义 Ansible 过滤器"""
def filters(self):
return {
'to_upper': self.to_upper,
'to_lower': self.to_lower,
'format_size': self.format_size,
'mysql_connection_string': self.mysql_connection_string,
}
def to_upper(self, value):
"""转换为大写"""
return value.upper()
def to_lower(self, value):
"""转换为小写"""
return value.lower()
def format_size(self, value, unit='MB'):
"""格式化大小"""
units = {
'KB': 1024,
'MB': 1024**2,
'GB': 1024**3,
'TB': 1024**4
}
if unit in units:
return f"{value / units[unit]:.2f} {unit}"
else:
return f"{value} bytes"
def mysql_connection_string(self, host, port, user, password, database):
"""生成 MySQL 连接字符串"""
return f"mysql://{user}:{password}@{host}:{port}/{database}"
8.3 完整的项目结构
创建项目结构文档:PROJECT_STRUCTURE.md
ansible-project/
├── ansible.cfg # Ansible 配置文件
├── inventory/ # 主机清单目录
│ ├── hosts # 静态主机清单
│ ├── group_vars/ # 组变量
│ │ ├── all.yml # 所有主机变量
│ │ ├── web_servers.yml # Web 服务器变量
│ │ └── db_servers.yml # 数据库服务器变量
│ ├── host_vars/ # 主机变量
│ │ ├── web1.example.com.yml
│ │ └── db1.example.com.yml
│ └── dynamic_inventory.py # 动态库存脚本
├── playbooks/ # Playbook 目录
│ ├── site.yml # 主 Playbook
│ ├── web.yml # Web 服务器 Playbook
│ ├── db.yml # 数据库 Playbook
│ └── deploy.yml # 部署 Playbook
├── roles/ # 角色目录
│ ├── common/ # 通用角色
│ ├── nginx/ # Nginx 角色
│ ├── mysql/ # MySQL 角色
│ └── redis/ # Redis 角色
├── files/ # 文件目录
│ ├── configs/ # 配置文件
│ └── scripts/ # 部署脚本
├── templates/ # 模板目录
│ ├── nginx/ # Nginx 模板
│ └── mysql/ # MySQL 模板
├── secrets/ # 加密文件目录
│ ├── production_vars.yml.encrypted
│ └── staging_vars.yml.encrypted
├── library/ # 自定义模块
│ └── disk_usage.py
├── filter_plugins/ # 自定义过滤器
│ └── custom_filters.py
├── scripts/ # 工具脚本
│ ├── create_roles.sh
│ └── vault_manager.sh
└── README.md # 项目文档
8.4 完整的部署 Playbook
创建完整部署 Playbook:playbooks/site.yml
---
# 完整的基础设施部署 Playbook
- name: 基础系统配置
hosts: all
become: yes
gather_facts: yes
roles:
- role: common
- role: security
- role: monitoring
tags: base
- name: 部署 Web 服务器
hosts: web_servers
become: yes
pre_tasks:
- name: 验证 Web 服务器连接
wait_for_connection:
timeout: 30
- name: 检查系统资源
shell: |
free -h
df -h
register: system_resources
changed_when: false
roles:
- role: nginx
vars:
server_name: "{{ web_domain }}"
http_port: 80
https_port: 443
- role: php
when: install_php | default(true)
- role: nodejs
when: install_nodejs | default(false)
post_tasks:
- name: 验证 Web 服务
uri:
url: "http://{{ ansible_host }}"
status_code: 200
register: web_validation
- name: 显示验证结果
debug:
msg: "Web 服务验证 {{ '成功' if web_validation.status == 200 else '失败' }}"
tags: web
- name: 部署数据库服务器
hosts: db_servers
become: yes
vars_files:
- "secrets/{{ env }}_vars.yml.encrypted"
roles:
- role: mysql
vars:
mysql_root_password: "{{ vault_mysql_root_password }}"
mysql_bind_address: "0.0.0.0"
mysql_databases:
- name: "{{ app_database }}"
encoding: utf8mb4
- name: "{{ app_database }}_test"
encoding: utf8mb4
mysql_users:
- name: "{{ app_database_user }}"
password: "{{ vault_mysql_app_password }}"
priv: "{{ app_database }}.*:ALL,{{ app_database }}_test.*:ALL"
host: '%'
- role: backup
vars:
backup_destination: "/backup/mysql"
backup_retention_days: 7
tags: database
- name: 部署缓存服务器
hosts: cache_servers
become: yes
roles:
- role: redis
vars:
redis_bind: "0.0.0.0"
redis_port: 6379
redis_maxmemory: "1gb"
- role: memcached
when: install_memcached | default(false)
tags: cache
- name: 部署负载均衡器
hosts: lb_servers
become: yes
roles:
- role: haproxy
vars:
haproxy_frontend_port: 80
haproxy_backend_servers: "{{ groups['web_servers'] }}"
tags: loadbalancer
- name: 部署监控系统
hosts: monitoring_servers
become: yes
roles:
- role: prometheus
- role: grafana
- role: alertmanager
tags: monitoring
- name: 最终验证和测试
hosts: all
become: no
gather_facts: no
tasks:
- name: 测试网络连通性
wait_for:
host: "{{ ansible_host }}"
port: 22
timeout: 10
delegate_to: localhost
- name: 收集部署摘要
set_fact:
deployment_summary: |
部署完成时间: {{ ansible_date_time.iso8601 }}
部署环境: {{ env }}
部署服务器数量: {{ groups['all'] | length }}
Web 服务器: {{ groups['web_servers'] | length }}
数据库服务器: {{ groups['db_servers'] | length }}
缓存服务器: {{ groups['cache_servers'] | length }}
- name: 显示部署摘要
debug:
msg: "{{ deployment_summary }}"
tags: validation
9. 执行和监控
9.1 创建部署执行脚本
创建部署脚本:scripts/deploy.sh
#!/bin/bash
# Ansible 部署执行脚本
set -e
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# 日志函数
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 检查依赖
check_dependencies() {
log_info "检查依赖..."
if ! command -v ansible &> /dev/null; then
log_error "Ansible 未安装"
exit 1
fi
if ! command -v ansible-playbook &> /dev/null; then
log_error "ansible-playbook 未找到"
exit 1
fi
log_success "依赖检查完成"
}
# 语法检查
syntax_check() {
local playbook=$1
log_info "检查 Playbook 语法: $playbook"
if ansible-playbook --syntax-check "$playbook"; then
log_success "语法检查通过"
else
log_error "语法检查失败"
exit 1
fi
}
# 干运行
dry_run() {
local playbook=$1
log_info "执行干运行: $playbook"
ansible-playbook --check --diff "$playbook"
}
# 完整部署
full_deploy() {
local playbook=$1
local env=$2
log_info "开始完整部署: $playbook (环境: $env)"
ANSIBLE_CONFIG=./ansible.cfg \
ansible-playbook \
-i inventory/hosts \
"$playbook" \
-e "env=$env" \
--vault-password-file .vault_pass \
--private-key ~/.ssh/id_rsa
}
# 特定标签部署
tag_deploy() {
local playbook=$1
local tags=$2
local env=$3
log_info "执行标签部署: $tags"
ANSIBLE_CONFIG=./ansible.cfg \
ansible-playbook \
-i inventory/hosts \
"$playbook" \
-e "env=$env" \
--tags "$tags" \
--vault-password-file .vault_pass
}
# 限制主机部署
limit_deploy() {
local playbook=$1
local limit=$2
local env=$3
log_info "限制主机部署: $limit"
ANSIBLE_CONFIG=./ansible.cfg \
ansible-playbook \
-i inventory/hosts \
"$playbook" \
-e "env=$env" \
--limit "$limit" \
--vault-password-file .vault_pass
}
# 显示帮助
show_help() {
echo "Ansible 部署脚本"
echo ""
echo "使用方法: $0 [选项]"
echo ""
echo "选项:"
echo " check <playbook> 语法检查"
echo " dry-run <playbook> 干运行"
echo " deploy <playbook> <env> 完整部署"
echo " tags <playbook> <tags> <env> 标签部署"
echo " limit <playbook> <limit> <env> 限制主机部署"
echo " help 显示此帮助信息"
echo ""
echo "示例:"
echo " $0 check playbooks/site.yml"
echo " $0 dry-run playbooks/site.yml"
echo " $0 deploy playbooks/site.yml production"
echo " $0 tags playbooks/site.yml web,db production"
echo " $0 limit playbooks/site.yml web_servers production"
}
# 主函数
main() {
local command=$1
local playbook=$2
local arg1=$3
local arg2=$4
check_dependencies
case $command in
"check")
syntax_check "$playbook"
;;
"dry-run")
dry_run "$playbook"
;;
"deploy")
full_deploy "$playbook" "$arg1"
;;
"tags")
tag_deploy "$playbook" "$arg1" "$arg2"
;;
"limit")
limit_deploy "$playbook" "$arg1" "$arg2"
;;
"help"|"")
show_help
;;
*)
log_error "未知命令: $command"
show_help
exit 1
;;
esac
}
main "$@"
10. 总结
通过本教程,您已经学习了 Ansible 的完整知识体系:
10.1 Ansible 工作流程
graph LR
A[编写 Playbook] --> B[定义 Inventory]
B --> C[配置变量]
C --> D[执行部署]
D --> E[结果验证]
E --> F[监控维护]
style A fill:#1e3a5f,color:#ffffff
style B fill:#1e5f3a,color:#ffffff
style C fill:#1e5f3a,color:#ffffff
style D fill:#5f3a1e,color:#ffffff
style E fill:#1e5f3a,color:#ffffff
style F fill:#1e3a5f,color:#ffffff
10.2 最佳实践总结
- 版本控制:所有 Ansible 代码纳入版本控制
- 角色化设计:使用角色组织复杂配置
- 环境分离:开发、测试、生产环境分离
- 安全优先:使用 Ansible Vault 管理敏感数据
- 文档完整:为每个角色和 Playbook 编写文档
- 测试验证:部署前后进行充分测试
- 监控告警:建立完善的监控体系
10.3 下一步学习方向
- Ansible Tower/AWX:Web UI 和 API 管理
- Ansible Galaxy:社区角色共享
- 动态 Inventory:集成云平台和 CMDB
- 自定义模块:扩展 Ansible 功能
- 性能优化:大规模环境优化技巧
现在您已经具备了使用 Ansible 管理服务器舰队的能力,可以开始在实际环境中应用这些知识,构建自己的自动化运维体系。