[sql]合并数据

53 阅读2分钟

1/上下合并(UNION / UNION ALL)

-- 基本结构
SELECT1, 列2, 列3 FROM1 WHERE 条件
UNION ALL
SELECT1, 列2, 列3 FROM2 WHERE 条件
UNION ALL
SELECT1, 列2, 列3 FROM3 WHERE 条件;

-- 具体示例:合并不同年份的订单
SELECT 
    order_id, 
    customer_id, 
    order_date,
    amount,
    '2023' as year
FROM orders_2023
WHERE amount > 1000

UNION ALL

SELECT 
    order_id, 
    customer_id, 
    order_date,
    amount,
    '2024' as year
FROM orders_2024
WHERE status = 'completed'

ORDER BY order_date DESC;  -- 可以对合并结果排序

区别

  • UNION:合并后去重
  • UNION ALL:合并后不去重(性能更好)

2/左右合并(JOIN操作)

-- INNER JOIN:取交集
SELECT 
    a.order_id,
    a.customer_id,
    b.customer_name,
    a.order_date,
    c.product_name
FROM orders a
INNER JOIN customers b ON a.customer_id = b.customer_id
INNER JOIN products c ON a.product_id = c.product_id
WHERE a.status = 'active'
  AND b.country = 'USA';

-- LEFT JOIN:以左表为主
SELECT 
    e.employee_id,
    e.name,
    d.department_name,
    s.salary
FROM employees e
LEFT JOIN departments d ON e.dept_id = d.dept_id
LEFT JOIN salaries s ON e.employee_id = s.employee_id
WHERE e.hire_date > '2023-01-01';

-- FULL OUTER JOIN:取所有数据(MySQL不支持,可用UNION模拟)

3. CTE(公用表表达式)+ UNION

WITH 
-- 第一个筛选结果
sales_2023 AS (
    SELECT 
        product_id,
        SUM(quantity) as total_qty,
        '2023' as year
    FROM sales
    WHERE YEAR(sale_date) = 2023
    GROUP BY product_id
),

-- 第二个筛选结果
sales_2024 AS (
    SELECT 
        product_id,
        SUM(quantity) as total_qty,
        '2024' as year
    FROM sales
    WHERE YEAR(sale_date) = 2024
    GROUP BY product_id
),

-- 产品信息
products_info AS (
    SELECT product_id, product_name, category
    FROM products
)

-- 合并结果
SELECT 
    p.product_name,
    p.category,
    s23.total_qty as qty_2023,
    s24.total_qty as qty_2024
FROM products_info p
LEFT JOIN sales_2023 s23 ON p.product_id = s23.product_id
LEFT JOIN sales_2024 s24 ON p.product_id = s24.product_id
WHERE s23.total_qty > 0 OR s24.total_qty > 0;

4. 根据需求选择合适的方法

场景选择指南

需求场景推荐方法
相同结构表按时间分表UNION ALL
需要关联不同表的字段JOIN
逻辑复杂需要分步处理CTE + UNION/JOIN
需要同时水平和垂直合并混合使用

5. 完整示例

-- 合并三个数据源,包含汇总计算
WITH 
source_a AS (
    SELECT 
        user_id,
        'Web' as source,
        COUNT(*) as sessions,
        SUM(duration) as total_time
    FROM web_logs
    WHERE date >= '2024-01-01'
    GROUP BY user_id
),

source_b AS (
    SELECT 
        user_id,
        'App' as source,
        COUNT(*) as sessions,
        SUM(session_duration) as total_time
    FROM app_logs
    WHERE date >= '2024-01-01'
    GROUP BY user_id
),

source_c AS (
    SELECT 
        user_id,
        'API' as source,
        COUNT(*) as sessions,
        NULL as total_time  -- 对齐列结构
    FROM api_calls
    WHERE call_date >= '2024-01-01'
    GROUP BY user_id
)

-- 合并所有来源
SELECT 
    u.user_id,
    u.username,
    COALESCE(a.sessions, 0) + 
    COALESCE(b.sessions, 0) + 
    COALESCE(c.sessions, 0) as total_sessions,
    COALESCE(a.total_time, 0) + 
    COALESCE(b.total_time, 0) as total_time
FROM users u
LEFT JOIN source_a a ON u.user_id = a.user_id
LEFT JOIN source_b b ON u.user_id = b.user_id
LEFT JOIN source_c c ON u.user_id = c.user_id
WHERE u.active = 1
ORDER BY total_sessions DESC;

注意事项:

  1. 列数和数据类型要匹配:UNION操作需要每个SELECT的列数、类型一致
  2. 性能考虑:为WHERE条件和JOIN字段添加索引
  3. 使用别名:确保列名清晰易懂
  4. 去重选择:根据是否需要去重选择UNION或UNION ALL