MySQL JOIN 详解
1. JOIN 基本概念
JOIN 用于在两个或多个表之间,根据相关的列来合并数据行。它是关系型数据库实现表间关系查询的核心机制。
2. 示例表结构准备
-- 创建用户表
CREATE TABLE users (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(50) NOT NULL,
email VARCHAR(100) UNIQUE,
department_id INT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 创建部门表
CREATE TABLE departments (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(50) NOT NULL,
manager_id INT,
budget DECIMAL(10,2)
);
-- 创建订单表
CREATE TABLE orders (
id INT PRIMARY KEY AUTO_INCREMENT,
user_id INT,
amount DECIMAL(10,2),
status VARCHAR(20) DEFAULT 'pending',
order_date DATE
);
-- 创建产品表
CREATE TABLE products (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(100) NOT NULL,
price DECIMAL(10,2),
category_id INT
);
-- 创建分类表
CREATE TABLE categories (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(50) NOT NULL,
parent_id INT
);
-- 插入测试数据
INSERT INTO departments (id, name, manager_id, budget) VALUES
(1, 'IT部门', 1, 100000.00),
(2, '销售部门', 2, 80000.00),
(3, '人事部门', 3, 50000.00),
(4, '财务部门', NULL, 60000.00);
INSERT INTO users (id, name, email, department_id) VALUES
(1, '张三', 'zhang@example.com', 1),
(2, '李四', 'li@example.com', 2),
(3, '王五', 'wang@example.com', 1),
(4, '赵六', 'zhao@example.com', NULL),
(5, '钱七', 'qian@example.com', 3);
INSERT INTO orders (id, user_id, amount, status, order_date) VALUES
(1, 1, 1000.00, 'completed', '2024-01-15'),
(2, 1, 500.00, 'pending', '2024-02-10'),
(3, 2, 750.00, 'completed', '2024-01-20'),
(4, 3, 300.00, 'cancelled', '2024-02-05'),
(5, NULL, 200.00, 'completed', '2024-01-25');
INSERT INTO categories (id, name, parent_id) VALUES
(1, '电子产品', NULL),
(2, '服装', NULL),
(3, '手机', 1),
(4, '电脑', 1);
INSERT INTO products (id, name, price, category_id) VALUES
(1, 'iPhone 15', 8999.00, 3),
(2, 'MacBook Pro', 19999.00, 4),
(3, 'T恤衫', 99.00, 2),
(4, '无线耳机', 299.00, 1);
JOIN 类型详解
1. INNER JOIN(内连接)
作用:只返回两个表中都存在匹配关系的记录。
基本语法
SELECT columns
FROM table1
INNER JOIN table2 ON table1.column = table2.column;
实例演示
-- 查询有部门的用户信息
SELECT
u.id,
u.name,
u.email,
d.name AS department_name,
d.budget
FROM users u
INNER JOIN departments d ON u.department_id = d.id;
-- 结果:只显示有部门的用户(赵六被排除,因为department_id为NULL)
多表内连接
-- 查询用户、部门和订单信息
SELECT
u.name AS user_name,
d.name AS department,
o.amount,
o.status,
o.order_date
FROM users u
INNER JOIN departments d ON u.department_id = d.id
INNER JOIN orders o ON u.id = o.user_id
WHERE o.status = 'completed';
使用表别名和条件
-- 查询IT部门员工的已完成订单
SELECT
u.name,
o.amount,
o.order_date
FROM users u
INNER JOIN departments d ON u.department_id = d.id
INNER JOIN orders o ON u.id = o.user_id
WHERE d.name = 'IT部门'
AND o.status = 'completed'
ORDER BY o.amount DESC;
2. LEFT JOIN(左外连接)
作用:返回左表的所有记录,以及右表中匹配的记录。如果右表没有匹配,则显示 NULL。
基本语法
SELECT columns
FROM table1
LEFT JOIN table2 ON table1.column = table2.column;
实例演示
-- 查询所有用户及其部门信息(包括没有部门的用户)
SELECT
u.id,
u.name,
u.email,
COALESCE(d.name, '未分配部门') AS department_name,
d.budget
FROM users u
LEFT JOIN departments d ON u.department_id = d.id;
-- 结果:所有用户都会显示,没有部门的用户department_name显示为NULL或自定义值
查找孤立记录
-- 查找没有部门的用户
SELECT
u.id,
u.name,
u.email
FROM users u
LEFT JOIN departments d ON u.department_id = d.id
WHERE d.id IS NULL;
统计查询
-- 统计每个用户的订单数量(包括没有订单的用户)
SELECT
u.name,
COUNT(o.id) AS order_count,
COALESCE(SUM(o.amount), 0) AS total_amount
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name
ORDER BY order_count DESC;
3. RIGHT JOIN(右外连接)
作用:返回右表的所有记录,以及左表中匹配的记录。如果左表没有匹配,则显示 NULL。
基本语法
SELECT columns
FROM table1
RIGHT JOIN table2 ON table1.column = table2.column;
实例演示
-- 查询所有部门及其员工信息(包括没有员工的部门)
SELECT
d.name AS department_name,
d.budget,
u.name AS employee_name,
u.email
FROM users u
RIGHT JOIN departments d ON u.department_id = d.id;
-- 查找没有员工的部门
SELECT
d.id,
d.name AS department_name,
d.budget
FROM users u
RIGHT JOIN departments d ON u.department_id = d.id
WHERE u.id IS NULL;
实际应用场景
-- 统计每个部门的员工数量
SELECT
d.name AS department_name,
COUNT(u.id) AS employee_count,
d.budget,
CASE
WHEN COUNT(u.id) = 0 THEN '无员工部门'
WHEN COUNT(u.id) <= 2 THEN '小部门'
ELSE '大部门'
END AS department_size
FROM users u
RIGHT JOIN departments d ON u.department_id = d.id
GROUP BY d.id, d.name, d.budget
ORDER BY employee_count DESC;
4. FULL OUTER JOIN(全外连接)
注意:MySQL 不直接支持 FULL OUTER JOIN,但可以通过 UNION 模拟。
模拟全外连接
-- 使用 UNION 模拟全外连接
SELECT
u.name AS user_name,
d.name AS department_name
FROM users u
LEFT JOIN departments d ON u.department_id = d.id
UNION
SELECT
u.name AS user_name,
d.name AS department_name
FROM users u
RIGHT JOIN departments d ON u.department_id = d.id
WHERE u.id IS NULL;
5. CROSS JOIN(交叉连接/笛卡尔积)
作用:返回两个表的笛卡尔积,即第一个表的每一行与第二个表的每一行都组合。
基本语法
SELECT columns
FROM table1
CROSS JOIN table2;
实例演示
-- 生成用户和产品的所有组合
SELECT
u.name AS user_name,
p.name AS product_name,
p.price
FROM users u
CROSS JOIN products p
LIMIT 10;
-- 实际应用:生成日期范围内的数据模板
SELECT
u.name,
dates.date_value
FROM users u
CROSS JOIN (
SELECT DATE('2024-01-01') + INTERVAL numbers.n DAY AS date_value
FROM (
SELECT 0 AS n UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4
) numbers
WHERE DATE('2024-01-01') + INTERVAL numbers.n DAY <= '2024-01-05'
) dates;
6. SELF JOIN(自连接)
作用:表与自身进行连接,常用于处理层次结构数据。
实例演示
-- 查询员工及其经理信息(假设部门表的manager_id指向users表)
SELECT
emp.name AS employee_name,
mgr.name AS manager_name,
d.name AS department_name
FROM users emp
LEFT JOIN departments d ON emp.department_id = d.id
LEFT JOIN users mgr ON d.manager_id = mgr.id;
-- 查询分类及其父分类
SELECT
child.name AS category_name,
parent.name AS parent_category_name
FROM categories child
LEFT JOIN categories parent ON child.parent_id = parent.id;
-- 查找同部门的同事
SELECT
u1.name AS employee1,
u2.name AS employee2,
d.name AS department
FROM users u1
INNER JOIN users u2 ON u1.department_id = u2.department_id AND u1.id < u2.id
INNER JOIN departments d ON u1.department_id = d.id;
高级 JOIN 技巧
1. 多条件连接
-- 使用多个条件进行连接
SELECT
u.name,
o.amount,
o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id
AND o.status = 'completed'
AND o.order_date >= '2024-01-01'
ORDER BY o.amount DESC;
2. 使用子查询优化 JOIN
-- 先过滤再连接,提高性能
SELECT
u.name,
big_orders.total_amount
FROM users u
INNER JOIN (
SELECT
user_id,
SUM(amount) AS total_amount
FROM orders
WHERE status = 'completed'
GROUP BY user_id
HAVING SUM(amount) > 1000
) big_orders ON u.id = big_orders.user_id;
3. 条件连接(Conditional JOIN)
-- 根据条件决定连接方式
SELECT
u.name,
CASE
WHEN u.department_id IS NOT NULL THEN d.name
ELSE '临时员工'
END AS status,
COALESCE(recent_orders.order_count, 0) AS recent_orders
FROM users u
LEFT JOIN departments d ON u.department_id = d.id
LEFT JOIN (
SELECT
user_id,
COUNT(*) AS order_count
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
GROUP BY user_id
) recent_orders ON u.id = recent_orders.user_id;
4. 使用 USING 子句
-- 当连接字段名相同时,可以使用 USING
-- 假设两个表都有相同的字段名
SELECT
u.name,
d.name AS department_name
FROM users u
INNER JOIN departments d USING(id); -- 等价于 ON u.id = d.id
-- 多字段 USING
SELECT *
FROM table1 t1
INNER JOIN table2 t2 USING(id, status);
JOIN 性能优化
1. 索引优化
-- 为连接字段创建索引
CREATE INDEX idx_users_department_id ON users(department_id);
CREATE INDEX idx_orders_user_id ON orders(user_id);
CREATE INDEX idx_orders_status_date ON orders(status, order_date);
-- 复合索引优化多条件查询
CREATE INDEX idx_orders_user_status_date ON orders(user_id, status, order_date);
2. 查询优化技巧
-- 1. 使用 EXISTS 代替 IN 进行子查询
-- 不推荐
SELECT u.name
FROM users u
WHERE u.id IN (
SELECT DISTINCT user_id
FROM orders
WHERE status = 'completed'
);
-- 推荐
SELECT u.name
FROM users u
WHERE EXISTS (
SELECT 1
FROM orders o
WHERE o.user_id = u.id
AND o.status = 'completed'
);
-- 2. 适当使用 LIMIT 限制结果集
SELECT
u.name,
d.name AS department,
o.amount
FROM users u
INNER JOIN departments d ON u.department_id = d.id
INNER JOIN orders o ON u.id = o.user_id
ORDER BY o.amount DESC
LIMIT 10;
3. 分析查询执行计划
-- 使用 EXPLAIN 分析查询性能
EXPLAIN SELECT
u.name,
d.name AS department_name,
COUNT(o.id) AS order_count
FROM users u
LEFT JOIN departments d ON u.department_id = d.id
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name, d.name;
-- 使用 EXPLAIN ANALYZE 获取实际执行统计(MySQL 8.0+)
EXPLAIN ANALYZE SELECT
u.name,
SUM(o.amount) AS total_amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.status = 'completed'
GROUP BY u.id, u.name;
实际应用场景
1. 电商订单统计
-- 统计每个用户的订单情况
SELECT
u.name AS customer_name,
u.email,
d.name AS department,
COUNT(o.id) AS total_orders,
COUNT(CASE WHEN o.status = 'completed' THEN 1 END) AS completed_orders,
COUNT(CASE WHEN o.status = 'pending' THEN 1 END) AS pending_orders,
COUNT(CASE WHEN o.status = 'cancelled' THEN 1 END) AS cancelled_orders,
COALESCE(SUM(CASE WHEN o.status = 'completed' THEN o.amount END), 0) AS total_revenue,
COALESCE(AVG(CASE WHEN o.status = 'completed' THEN o.amount END), 0) AS avg_order_value
FROM users u
LEFT JOIN departments d ON u.department_id = d.id
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name, u.email, d.name
ORDER BY total_revenue DESC;
2. 层次结构查询
-- 查询产品分类层次结构
WITH RECURSIVE category_hierarchy AS (
-- 根分类
SELECT
id,
name,
parent_id,
name AS full_path,
0 AS level
FROM categories
WHERE parent_id IS NULL
UNION ALL
-- 子分类
SELECT
c.id,
c.name,
c.parent_id,
CONCAT(ch.full_path, ' > ', c.name) AS full_path,
ch.level + 1 AS level
FROM categories c
INNER JOIN category_hierarchy ch ON c.parent_id = ch.id
)
SELECT
ch.full_path,
ch.level,
COUNT(p.id) AS product_count,
COALESCE(AVG(p.price), 0) AS avg_price
FROM category_hierarchy ch
LEFT JOIN products p ON ch.id = p.category_id
GROUP BY ch.id, ch.full_path, ch.level
ORDER BY ch.level, ch.full_path;
3. 数据质量检查
-- 检查数据完整性问题
SELECT
'users_without_department' AS issue_type,
COUNT(*) AS count
FROM users u
LEFT JOIN departments d ON u.department_id = d.id
WHERE u.department_id IS NOT NULL AND d.id IS NULL
UNION ALL
SELECT
'orders_without_user' AS issue_type,
COUNT(*) AS count
FROM orders o
LEFT JOIN users u ON o.user_id = u.id
WHERE o.user_id IS NOT NULL AND u.id IS NULL
UNION ALL
SELECT
'departments_without_manager' AS issue_type,
COUNT(*) AS count
FROM departments d
LEFT JOIN users u ON d.manager_id = u.id
WHERE d.manager_id IS NOT NULL AND u.id IS NULL;
4. 销售报表
-- 按部门统计销售情况
SELECT
COALESCE(d.name, '未分配部门') AS department,
COUNT(DISTINCT u.id) AS employee_count,
COUNT(o.id) AS total_orders,
COALESCE(SUM(o.amount), 0) AS total_sales,
COALESCE(AVG(o.amount), 0) AS avg_order_value,
COUNT(CASE WHEN o.order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY) THEN 1 END) AS recent_orders
FROM departments d
LEFT JOIN users u ON d.id = u.department_id
LEFT JOIN orders o ON u.id = o.user_id AND o.status = 'completed'
GROUP BY d.id, d.name
ORDER BY total_sales DESC;
常见错误和解决方案
1. 避免重复数据
-- 错误:可能产生重复数据
SELECT u.name, d.name
FROM users u
INNER JOIN departments d ON u.department_id = d.id
INNER JOIN orders o ON u.id = o.user_id;
-- 正确:使用 DISTINCT 或适当的 GROUP BY
SELECT DISTINCT u.name, d.name
FROM users u
INNER JOIN departments d ON u.department_id = d.id
INNER JOIN orders o ON u.id = o.user_id;
-- 或者
SELECT u.name, d.name, COUNT(o.id) AS order_count
FROM users u
INNER JOIN departments d ON u.department_id = d.id
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name, d.name;
2. NULL 值处理
-- 正确处理 NULL 值
SELECT
u.name,
COALESCE(d.name, '未分配') AS department,
CASE
WHEN u.department_id IS NULL THEN '待分配'
WHEN d.id IS NULL THEN '部门已删除'
ELSE '正常'
END AS status
FROM users u
LEFT JOIN departments d ON u.department_id = d.id;
3. 性能问题排查
-- 使用合适的 JOIN 顺序和条件
-- 不好的做法:大表交叉连接后再过滤
SELECT u.name, o.amount
FROM users u, orders o
WHERE u.id = o.user_id AND o.amount > 1000;
-- 好的做法:先过滤再连接
SELECT u.name, o.amount
FROM users u
INNER JOIN (
SELECT user_id, amount
FROM orders
WHERE amount > 1000
) o ON u.id = o.user_id;