MySQL 中的 JOIN 操作

67 阅读8分钟

MySQL JOIN 详解

1. JOIN 基本概念

JOIN 用于在两个或多个表之间,根据相关的列来合并数据行。它是关系型数据库实现表间关系查询的核心机制。

2. 示例表结构准备

-- 创建用户表
CREATE TABLE users (
    id INT PRIMARY KEY AUTO_INCREMENT,
    name VARCHAR(50) NOT NULL,
    email VARCHAR(100) UNIQUE,
    department_id INT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 创建部门表
CREATE TABLE departments (
    id INT PRIMARY KEY AUTO_INCREMENT,
    name VARCHAR(50) NOT NULL,
    manager_id INT,
    budget DECIMAL(10,2)
);

-- 创建订单表
CREATE TABLE orders (
    id INT PRIMARY KEY AUTO_INCREMENT,
    user_id INT,
    amount DECIMAL(10,2),
    status VARCHAR(20) DEFAULT 'pending',
    order_date DATE
);

-- 创建产品表
CREATE TABLE products (
    id INT PRIMARY KEY AUTO_INCREMENT,
    name VARCHAR(100) NOT NULL,
    price DECIMAL(10,2),
    category_id INT
);

-- 创建分类表
CREATE TABLE categories (
    id INT PRIMARY KEY AUTO_INCREMENT,
    name VARCHAR(50) NOT NULL,
    parent_id INT
);

-- 插入测试数据
INSERT INTO departments (id, name, manager_id, budget) VALUES
(1, 'IT部门', 1, 100000.00),
(2, '销售部门', 2, 80000.00),
(3, '人事部门', 3, 50000.00),
(4, '财务部门', NULL, 60000.00);

INSERT INTO users (id, name, email, department_id) VALUES
(1, '张三', 'zhang@example.com', 1),
(2, '李四', 'li@example.com', 2),
(3, '王五', 'wang@example.com', 1),
(4, '赵六', 'zhao@example.com', NULL),
(5, '钱七', 'qian@example.com', 3);

INSERT INTO orders (id, user_id, amount, status, order_date) VALUES
(1, 1, 1000.00, 'completed', '2024-01-15'),
(2, 1, 500.00, 'pending', '2024-02-10'),
(3, 2, 750.00, 'completed', '2024-01-20'),
(4, 3, 300.00, 'cancelled', '2024-02-05'),
(5, NULL, 200.00, 'completed', '2024-01-25');

INSERT INTO categories (id, name, parent_id) VALUES
(1, '电子产品', NULL),
(2, '服装', NULL),
(3, '手机', 1),
(4, '电脑', 1);

INSERT INTO products (id, name, price, category_id) VALUES
(1, 'iPhone 15', 8999.00, 3),
(2, 'MacBook Pro', 19999.00, 4),
(3, 'T恤衫', 99.00, 2),
(4, '无线耳机', 299.00, 1);

JOIN 类型详解

1. INNER JOIN(内连接)

作用:只返回两个表中都存在匹配关系的记录。

基本语法

SELECT columns
FROM table1
INNER JOIN table2 ON table1.column = table2.column;

实例演示

-- 查询有部门的用户信息
SELECT 
    u.id,
    u.name,
    u.email,
    d.name AS department_name,
    d.budget
FROM users u
INNER JOIN departments d ON u.department_id = d.id;

-- 结果:只显示有部门的用户(赵六被排除,因为department_id为NULL)

多表内连接

-- 查询用户、部门和订单信息
SELECT 
    u.name AS user_name,
    d.name AS department,
    o.amount,
    o.status,
    o.order_date
FROM users u
INNER JOIN departments d ON u.department_id = d.id
INNER JOIN orders o ON u.id = o.user_id
WHERE o.status = 'completed';

使用表别名和条件

-- 查询IT部门员工的已完成订单
SELECT 
    u.name,
    o.amount,
    o.order_date
FROM users u
INNER JOIN departments d ON u.department_id = d.id
INNER JOIN orders o ON u.id = o.user_id
WHERE d.name = 'IT部门' 
    AND o.status = 'completed'
ORDER BY o.amount DESC;

2. LEFT JOIN(左外连接)

作用:返回左表的所有记录,以及右表中匹配的记录。如果右表没有匹配,则显示 NULL。

基本语法

SELECT columns
FROM table1
LEFT JOIN table2 ON table1.column = table2.column;

实例演示

-- 查询所有用户及其部门信息(包括没有部门的用户)
SELECT 
    u.id,
    u.name,
    u.email,
    COALESCE(d.name, '未分配部门') AS department_name,
    d.budget
FROM users u
LEFT JOIN departments d ON u.department_id = d.id;

-- 结果:所有用户都会显示,没有部门的用户department_name显示为NULL或自定义值

查找孤立记录

-- 查找没有部门的用户
SELECT 
    u.id,
    u.name,
    u.email
FROM users u
LEFT JOIN departments d ON u.department_id = d.id
WHERE d.id IS NULL;

统计查询

-- 统计每个用户的订单数量(包括没有订单的用户)
SELECT 
    u.name,
    COUNT(o.id) AS order_count,
    COALESCE(SUM(o.amount), 0) AS total_amount
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name
ORDER BY order_count DESC;

3. RIGHT JOIN(右外连接)

作用:返回右表的所有记录,以及左表中匹配的记录。如果左表没有匹配,则显示 NULL。

基本语法

SELECT columns
FROM table1
RIGHT JOIN table2 ON table1.column = table2.column;

实例演示

-- 查询所有部门及其员工信息(包括没有员工的部门)
SELECT 
    d.name AS department_name,
    d.budget,
    u.name AS employee_name,
    u.email
FROM users u
RIGHT JOIN departments d ON u.department_id = d.id;

-- 查找没有员工的部门
SELECT 
    d.id,
    d.name AS department_name,
    d.budget
FROM users u
RIGHT JOIN departments d ON u.department_id = d.id
WHERE u.id IS NULL;

实际应用场景

-- 统计每个部门的员工数量
SELECT 
    d.name AS department_name,
    COUNT(u.id) AS employee_count,
    d.budget,
    CASE 
        WHEN COUNT(u.id) = 0 THEN '无员工部门'
        WHEN COUNT(u.id) <= 2 THEN '小部门'
        ELSE '大部门'
    END AS department_size
FROM users u
RIGHT JOIN departments d ON u.department_id = d.id
GROUP BY d.id, d.name, d.budget
ORDER BY employee_count DESC;

4. FULL OUTER JOIN(全外连接)

注意:MySQL 不直接支持 FULL OUTER JOIN,但可以通过 UNION 模拟。

模拟全外连接

-- 使用 UNION 模拟全外连接
SELECT 
    u.name AS user_name,
    d.name AS department_name
FROM users u
LEFT JOIN departments d ON u.department_id = d.id

UNION

SELECT 
    u.name AS user_name,
    d.name AS department_name
FROM users u
RIGHT JOIN departments d ON u.department_id = d.id
WHERE u.id IS NULL;

5. CROSS JOIN(交叉连接/笛卡尔积)

作用:返回两个表的笛卡尔积,即第一个表的每一行与第二个表的每一行都组合。

基本语法

SELECT columns
FROM table1
CROSS JOIN table2;

实例演示

-- 生成用户和产品的所有组合
SELECT 
    u.name AS user_name,
    p.name AS product_name,
    p.price
FROM users u
CROSS JOIN products p
LIMIT 10;

-- 实际应用:生成日期范围内的数据模板
SELECT 
    u.name,
    dates.date_value
FROM users u
CROSS JOIN (
    SELECT DATE('2024-01-01') + INTERVAL numbers.n DAY AS date_value
    FROM (
        SELECT 0 AS n UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4
    ) numbers
    WHERE DATE('2024-01-01') + INTERVAL numbers.n DAY <= '2024-01-05'
) dates;

6. SELF JOIN(自连接)

作用:表与自身进行连接,常用于处理层次结构数据。

实例演示

-- 查询员工及其经理信息(假设部门表的manager_id指向users表)
SELECT 
    emp.name AS employee_name,
    mgr.name AS manager_name,
    d.name AS department_name
FROM users emp
LEFT JOIN departments d ON emp.department_id = d.id
LEFT JOIN users mgr ON d.manager_id = mgr.id;

-- 查询分类及其父分类
SELECT 
    child.name AS category_name,
    parent.name AS parent_category_name
FROM categories child
LEFT JOIN categories parent ON child.parent_id = parent.id;

-- 查找同部门的同事
SELECT 
    u1.name AS employee1,
    u2.name AS employee2,
    d.name AS department
FROM users u1
INNER JOIN users u2 ON u1.department_id = u2.department_id AND u1.id < u2.id
INNER JOIN departments d ON u1.department_id = d.id;

高级 JOIN 技巧

1. 多条件连接

-- 使用多个条件进行连接
SELECT 
    u.name,
    o.amount,
    o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id 
    AND o.status = 'completed'
    AND o.order_date >= '2024-01-01'
ORDER BY o.amount DESC;

2. 使用子查询优化 JOIN

-- 先过滤再连接,提高性能
SELECT 
    u.name,
    big_orders.total_amount
FROM users u
INNER JOIN (
    SELECT 
        user_id,
        SUM(amount) AS total_amount
    FROM orders
    WHERE status = 'completed'
    GROUP BY user_id
    HAVING SUM(amount) > 1000
) big_orders ON u.id = big_orders.user_id;

3. 条件连接(Conditional JOIN)

-- 根据条件决定连接方式
SELECT 
    u.name,
    CASE 
        WHEN u.department_id IS NOT NULL THEN d.name
        ELSE '临时员工'
    END AS status,
    COALESCE(recent_orders.order_count, 0) AS recent_orders
FROM users u
LEFT JOIN departments d ON u.department_id = d.id
LEFT JOIN (
    SELECT 
        user_id,
        COUNT(*) AS order_count
    FROM orders
    WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
    GROUP BY user_id
) recent_orders ON u.id = recent_orders.user_id;

4. 使用 USING 子句

-- 当连接字段名相同时,可以使用 USING
-- 假设两个表都有相同的字段名
SELECT 
    u.name,
    d.name AS department_name
FROM users u
INNER JOIN departments d USING(id);  -- 等价于 ON u.id = d.id

-- 多字段 USING
SELECT *
FROM table1 t1
INNER JOIN table2 t2 USING(id, status);

JOIN 性能优化

1. 索引优化

-- 为连接字段创建索引
CREATE INDEX idx_users_department_id ON users(department_id);
CREATE INDEX idx_orders_user_id ON orders(user_id);
CREATE INDEX idx_orders_status_date ON orders(status, order_date);

-- 复合索引优化多条件查询
CREATE INDEX idx_orders_user_status_date ON orders(user_id, status, order_date);

2. 查询优化技巧

-- 1. 使用 EXISTS 代替 IN 进行子查询
-- 不推荐
SELECT u.name
FROM users u
WHERE u.id IN (
    SELECT DISTINCT user_id 
    FROM orders 
    WHERE status = 'completed'
);

-- 推荐
SELECT u.name
FROM users u
WHERE EXISTS (
    SELECT 1 
    FROM orders o 
    WHERE o.user_id = u.id 
        AND o.status = 'completed'
);

-- 2. 适当使用 LIMIT 限制结果集
SELECT 
    u.name,
    d.name AS department,
    o.amount
FROM users u
INNER JOIN departments d ON u.department_id = d.id
INNER JOIN orders o ON u.id = o.user_id
ORDER BY o.amount DESC
LIMIT 10;

3. 分析查询执行计划

-- 使用 EXPLAIN 分析查询性能
EXPLAIN SELECT 
    u.name,
    d.name AS department_name,
    COUNT(o.id) AS order_count
FROM users u
LEFT JOIN departments d ON u.department_id = d.id
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name, d.name;

-- 使用 EXPLAIN ANALYZE 获取实际执行统计(MySQL 8.0+)
EXPLAIN ANALYZE SELECT 
    u.name,
    SUM(o.amount) AS total_amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.status = 'completed'
GROUP BY u.id, u.name;

实际应用场景

1. 电商订单统计

-- 统计每个用户的订单情况
SELECT 
    u.name AS customer_name,
    u.email,
    d.name AS department,
    COUNT(o.id) AS total_orders,
    COUNT(CASE WHEN o.status = 'completed' THEN 1 END) AS completed_orders,
    COUNT(CASE WHEN o.status = 'pending' THEN 1 END) AS pending_orders,
    COUNT(CASE WHEN o.status = 'cancelled' THEN 1 END) AS cancelled_orders,
    COALESCE(SUM(CASE WHEN o.status = 'completed' THEN o.amount END), 0) AS total_revenue,
    COALESCE(AVG(CASE WHEN o.status = 'completed' THEN o.amount END), 0) AS avg_order_value
FROM users u
LEFT JOIN departments d ON u.department_id = d.id
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name, u.email, d.name
ORDER BY total_revenue DESC;

2. 层次结构查询

-- 查询产品分类层次结构
WITH RECURSIVE category_hierarchy AS (
    -- 根分类
    SELECT 
        id,
        name,
        parent_id,
        name AS full_path,
        0 AS level
    FROM categories
    WHERE parent_id IS NULL
    
    UNION ALL
    
    -- 子分类
    SELECT 
        c.id,
        c.name,
        c.parent_id,
        CONCAT(ch.full_path, ' > ', c.name) AS full_path,
        ch.level + 1 AS level
    FROM categories c
    INNER JOIN category_hierarchy ch ON c.parent_id = ch.id
)
SELECT 
    ch.full_path,
    ch.level,
    COUNT(p.id) AS product_count,
    COALESCE(AVG(p.price), 0) AS avg_price
FROM category_hierarchy ch
LEFT JOIN products p ON ch.id = p.category_id
GROUP BY ch.id, ch.full_path, ch.level
ORDER BY ch.level, ch.full_path;

3. 数据质量检查

-- 检查数据完整性问题
SELECT 
    'users_without_department' AS issue_type,
    COUNT(*) AS count
FROM users u
LEFT JOIN departments d ON u.department_id = d.id
WHERE u.department_id IS NOT NULL AND d.id IS NULL

UNION ALL

SELECT 
    'orders_without_user' AS issue_type,
    COUNT(*) AS count
FROM orders o
LEFT JOIN users u ON o.user_id = u.id
WHERE o.user_id IS NOT NULL AND u.id IS NULL

UNION ALL

SELECT 
    'departments_without_manager' AS issue_type,
    COUNT(*) AS count
FROM departments d
LEFT JOIN users u ON d.manager_id = u.id
WHERE d.manager_id IS NOT NULL AND u.id IS NULL;

4. 销售报表

-- 按部门统计销售情况
SELECT 
    COALESCE(d.name, '未分配部门') AS department,
    COUNT(DISTINCT u.id) AS employee_count,
    COUNT(o.id) AS total_orders,
    COALESCE(SUM(o.amount), 0) AS total_sales,
    COALESCE(AVG(o.amount), 0) AS avg_order_value,
    COUNT(CASE WHEN o.order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY) THEN 1 END) AS recent_orders
FROM departments d
LEFT JOIN users u ON d.id = u.department_id
LEFT JOIN orders o ON u.id = o.user_id AND o.status = 'completed'
GROUP BY d.id, d.name
ORDER BY total_sales DESC;

常见错误和解决方案

1. 避免重复数据

-- 错误:可能产生重复数据
SELECT u.name, d.name
FROM users u
INNER JOIN departments d ON u.department_id = d.id
INNER JOIN orders o ON u.id = o.user_id;

-- 正确:使用 DISTINCT 或适当的 GROUP BY
SELECT DISTINCT u.name, d.name
FROM users u
INNER JOIN departments d ON u.department_id = d.id
INNER JOIN orders o ON u.id = o.user_id;

-- 或者
SELECT u.name, d.name, COUNT(o.id) AS order_count
FROM users u
INNER JOIN departments d ON u.department_id = d.id
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name, d.name;

2. NULL 值处理

-- 正确处理 NULL 值
SELECT 
    u.name,
    COALESCE(d.name, '未分配') AS department,
    CASE 
        WHEN u.department_id IS NULL THEN '待分配'
        WHEN d.id IS NULL THEN '部门已删除'
        ELSE '正常'
    END AS status
FROM users u
LEFT JOIN departments d ON u.department_id = d.id;

3. 性能问题排查

-- 使用合适的 JOIN 顺序和条件
-- 不好的做法:大表交叉连接后再过滤
SELECT u.name, o.amount
FROM users u, orders o
WHERE u.id = o.user_id AND o.amount > 1000;

-- 好的做法:先过滤再连接
SELECT u.name, o.amount
FROM users u
INNER JOIN (
    SELECT user_id, amount 
    FROM orders 
    WHERE amount > 1000
) o ON u.id = o.user_id;