1/上下合并(UNION / UNION ALL)
-- 基本结构
SELECT 列1, 列2, 列3 FROM 表1 WHERE 条件
UNION ALL
SELECT 列1, 列2, 列3 FROM 表2 WHERE 条件
UNION ALL
SELECT 列1, 列2, 列3 FROM 表3 WHERE 条件;
-- 具体示例:合并不同年份的订单
SELECT
order_id,
customer_id,
order_date,
amount,
'2023' as year
FROM orders_2023
WHERE amount > 1000
UNION ALL
SELECT
order_id,
customer_id,
order_date,
amount,
'2024' as year
FROM orders_2024
WHERE status = 'completed'
ORDER BY order_date DESC; -- 可以对合并结果排序
区别:
UNION:合并后去重UNION ALL:合并后不去重(性能更好)
2/左右合并(JOIN操作)
-- INNER JOIN:取交集
SELECT
a.order_id,
a.customer_id,
b.customer_name,
a.order_date,
c.product_name
FROM orders a
INNER JOIN customers b ON a.customer_id = b.customer_id
INNER JOIN products c ON a.product_id = c.product_id
WHERE a.status = 'active'
AND b.country = 'USA';
-- LEFT JOIN:以左表为主
SELECT
e.employee_id,
e.name,
d.department_name,
s.salary
FROM employees e
LEFT JOIN departments d ON e.dept_id = d.dept_id
LEFT JOIN salaries s ON e.employee_id = s.employee_id
WHERE e.hire_date > '2023-01-01';
-- FULL OUTER JOIN:取所有数据(MySQL不支持,可用UNION模拟)
3. CTE(公用表表达式)+ UNION
WITH
-- 第一个筛选结果
sales_2023 AS (
SELECT
product_id,
SUM(quantity) as total_qty,
'2023' as year
FROM sales
WHERE YEAR(sale_date) = 2023
GROUP BY product_id
),
-- 第二个筛选结果
sales_2024 AS (
SELECT
product_id,
SUM(quantity) as total_qty,
'2024' as year
FROM sales
WHERE YEAR(sale_date) = 2024
GROUP BY product_id
),
-- 产品信息
products_info AS (
SELECT product_id, product_name, category
FROM products
)
-- 合并结果
SELECT
p.product_name,
p.category,
s23.total_qty as qty_2023,
s24.total_qty as qty_2024
FROM products_info p
LEFT JOIN sales_2023 s23 ON p.product_id = s23.product_id
LEFT JOIN sales_2024 s24 ON p.product_id = s24.product_id
WHERE s23.total_qty > 0 OR s24.total_qty > 0;
4. 根据需求选择合适的方法
场景选择指南:
| 需求场景 | 推荐方法 |
|---|---|
| 相同结构表按时间分表 | UNION ALL |
| 需要关联不同表的字段 | JOIN |
| 逻辑复杂需要分步处理 | CTE + UNION/JOIN |
| 需要同时水平和垂直合并 | 混合使用 |
5. 完整示例
-- 合并三个数据源,包含汇总计算
WITH
source_a AS (
SELECT
user_id,
'Web' as source,
COUNT(*) as sessions,
SUM(duration) as total_time
FROM web_logs
WHERE date >= '2024-01-01'
GROUP BY user_id
),
source_b AS (
SELECT
user_id,
'App' as source,
COUNT(*) as sessions,
SUM(session_duration) as total_time
FROM app_logs
WHERE date >= '2024-01-01'
GROUP BY user_id
),
source_c AS (
SELECT
user_id,
'API' as source,
COUNT(*) as sessions,
NULL as total_time -- 对齐列结构
FROM api_calls
WHERE call_date >= '2024-01-01'
GROUP BY user_id
)
-- 合并所有来源
SELECT
u.user_id,
u.username,
COALESCE(a.sessions, 0) +
COALESCE(b.sessions, 0) +
COALESCE(c.sessions, 0) as total_sessions,
COALESCE(a.total_time, 0) +
COALESCE(b.total_time, 0) as total_time
FROM users u
LEFT JOIN source_a a ON u.user_id = a.user_id
LEFT JOIN source_b b ON u.user_id = b.user_id
LEFT JOIN source_c c ON u.user_id = c.user_id
WHERE u.active = 1
ORDER BY total_sessions DESC;
注意事项:
- 列数和数据类型要匹配:UNION操作需要每个SELECT的列数、类型一致
- 性能考虑:为WHERE条件和JOIN字段添加索引
- 使用别名:确保列名清晰易懂
- 去重选择:根据是否需要去重选择UNION或UNION ALL