notes/MySQL_HAVING语法使用文档.md
2025-06-25 19:21:02 +08:00

36 KiB
Raw Permalink Blame History

MySQL HAVING 语法使用文档

目录

  1. HAVING 基础语法
  2. HAVING 规则和特性
  3. HAVING vs WHERE 详细对比
  4. 示例数据准备
  5. 基础 HAVING 查询
  6. 复杂 HAVING 条件
  7. HAVING 与聚合函数组合
  8. 多层嵌套和子查询
  9. 面试题和实际案例
  10. 性能优化和最佳实践

HAVING 基础语法

HAVING 子句用于过滤 GROUP BY 分组后的结果,类似于 WHERE 子句,但作用于分组之后。

SELECT column1, aggregate_function(column2)
FROM table_name
[WHERE condition]
GROUP BY column1, column2, ...
HAVING group_condition
[ORDER BY column]
[LIMIT number];

语法要点:

  • HAVING 必须在 GROUP BY 之后
  • HAVING 条件可以使用聚合函数
  • HAVING 可以引用 SELECT 列表中的别名
  • HAVING 可以与 WHERE 同时使用

HAVING 规则和特性

1. 执行顺序

FROM → WHERE → GROUP BY → 聚合计算 → HAVING → SELECT → ORDER BY → LIMIT

2. 可用条件类型

  • 聚合函数条件:COUNT(*) > 5
  • 分组字段条件:department = '技术部'
  • 聚合函数比较:AVG(salary) > MAX(bonus)
  • 复合条件:COUNT(*) > 2 AND AVG(score) >= 80

3. 与 WHERE 的区别

特性 WHERE HAVING
作用时机 分组前过滤行 分组后过滤组
可用函数 不能使用聚合函数 可以使用聚合函数
性能 更高效(减少分组数据) 相对较低(先分组再过滤)
使用场景 过滤原始数据 过滤聚合结果

HAVING vs WHERE 详细对比

1. 基础区别演示

-- 示例数据:员工表
CREATE TABLE demo_employees (
    id INT PRIMARY KEY,
    name VARCHAR(50),
    department VARCHAR(30),
    salary DECIMAL(10,2),
    bonus DECIMAL(8,2)
);

INSERT INTO demo_employees VALUES
(1, '张三', '技术部', 15000, 3000),
(2, '李四', '技术部', 18000, 4000),
(3, '王五', '销售部', 12000, 8000),
(4, '赵六', '销售部', 14000, 6000),
(5, '钱七', '技术部', 16000, 3500),
(6, '孙八', '人事部', 13000, 2000);
-- WHERE先过滤薪资 > 14000 的员工,再分组统计
SELECT department, COUNT(*) as emp_count, AVG(salary) as avg_salary
FROM demo_employees
WHERE salary > 14000
GROUP BY department;

结果:

+----------+-----------+------------+
| department | emp_count | avg_salary |
+----------+-----------+------------+
| 技术部   |         3 |   16333.33 |
| 销售部   |         1 |   14000.00 |
+----------+-----------+------------+
-- HAVING先分组统计再过滤平均薪资 > 14000 的部门
SELECT department, COUNT(*) as emp_count, AVG(salary) as avg_salary
FROM demo_employees
GROUP BY department
HAVING AVG(salary) > 14000;

结果:

+----------+-----------+------------+
| department | emp_count | avg_salary |
+----------+-----------+------------+
| 技术部   |         3 |   16333.33 |
+----------+-----------+------------+

2. 同时使用 WHERE 和 HAVING

-- 组合使用:先过滤 bonus > 2500 的员工,分组后再过滤员工数量 > 1 的部门
SELECT department, 
       COUNT(*) as emp_count, 
       ROUND(AVG(salary), 2) as avg_salary,
       ROUND(AVG(bonus), 2) as avg_bonus
FROM demo_employees
WHERE bonus > 2500  -- 先过滤原始数据
GROUP BY department
HAVING COUNT(*) > 1;  -- 再过滤分组结果

结果:

+----------+-----------+------------+-----------+
| department | emp_count | avg_salary | avg_bonus |
+----------+-----------+------------+-----------+
| 技术部   |         3 |   16333.33 |   3500.00 |
| 销售部   |         2 |   13000.00 |   7000.00 |
+----------+-----------+------------+-----------+

示例数据准备

创建业务场景表

-- 销售业绩表
CREATE TABLE sales_performance (
    id INT PRIMARY KEY AUTO_INCREMENT,
    sales_rep VARCHAR(50),
    region VARCHAR(30),
    product_category VARCHAR(50),
    sale_date DATE,
    amount DECIMAL(10,2),
    quantity INT,
    customer_type ENUM('新客户', '老客户', 'VIP客户'),
    commission_rate DECIMAL(3,2)
);

-- 课程选课表
CREATE TABLE course_enrollments (
    student_id INT,
    student_name VARCHAR(50),
    course_id VARCHAR(20),
    course_name VARCHAR(100),
    credits INT,
    score DECIMAL(4,1),
    semester VARCHAR(20),
    teacher VARCHAR(30),
    department VARCHAR(30),
    PRIMARY KEY (student_id, course_id, semester)
);

-- 电商订单表
CREATE TABLE ecommerce_orders (
    order_id INT PRIMARY KEY AUTO_INCREMENT,
    customer_id INT,
    customer_segment VARCHAR(20),
    product_id INT,
    category VARCHAR(50),
    subcategory VARCHAR(50),
    order_date DATE,
    ship_date DATE,
    quantity INT,
    unit_price DECIMAL(8,2),
    discount DECIMAL(3,2),
    profit DECIMAL(8,2),
    region VARCHAR(30),
    state VARCHAR(30)
);

-- 网站流量分析表
CREATE TABLE website_analytics (
    session_id VARCHAR(50) PRIMARY KEY,
    user_id INT,
    visit_date DATE,
    page_views INT,
    session_duration INT,  -- 秒
    bounce_rate DECIMAL(3,2),
    conversion_flag BOOLEAN,
    traffic_source VARCHAR(30),
    device_category VARCHAR(20),
    country VARCHAR(30),
    revenue DECIMAL(8,2)
);

-- 员工绩效表
CREATE TABLE employee_performance (
    emp_id INT,
    name VARCHAR(50),
    department VARCHAR(30),
    position VARCHAR(50),
    quarter VARCHAR(10),
    kpi_score DECIMAL(3,1),
    project_count INT,
    overtime_hours INT,
    team_rating DECIMAL(2,1),
    bonus DECIMAL(8,2),
    PRIMARY KEY (emp_id, quarter)
);

插入示例数据

-- 插入销售业绩数据
INSERT INTO sales_performance (sales_rep, region, product_category, sale_date, amount, quantity, customer_type, commission_rate) VALUES
('张明', '华北', '电子产品', '2024-01-15', 15000.00, 5, '新客户', 0.08),
('张明', '华北', '家电', '2024-01-20', 8000.00, 2, '老客户', 0.06),
('张明', '华北', '电子产品', '2024-01-25', 25000.00, 8, 'VIP客户', 0.10),
('李华', '华东', '服装', '2024-01-18', 12000.00, 20, '新客户', 0.07),
('李华', '华东', '电子产品', '2024-01-22', 18000.00, 6, 'VIP客户', 0.09),
('李华', '华东', '服装', '2024-01-28', 9000.00, 15, '老客户', 0.05),
('王强', '华南', '家电', '2024-01-16', 22000.00, 4, 'VIP客户', 0.08),
('王强', '华南', '电子产品', '2024-01-24', 16000.00, 7, '新客户', 0.07),
('王强', '华南', '家电', '2024-01-30', 11000.00, 3, '老客户', 0.06),
('赵丽', '华西', '服装', '2024-01-19', 7000.00, 12, '新客户', 0.06),
('赵丽', '华西', '服装', '2024-01-26', 13000.00, 18, 'VIP客户', 0.08),
('孙涛', '华北', '电子产品', '2024-01-21', 20000.00, 6, 'VIP客户', 0.09),
('孙涛', '华北', '家电', '2024-01-27', 14000.00, 5, '老客户', 0.07);

-- 插入课程选课数据
INSERT INTO course_enrollments VALUES
(1001, '张小明', 'CS101', '计算机科学导论', 3, 85.5, '2024春', '王教授', '计算机系'),
(1001, '张小明', 'MATH201', '高等数学', 4, 78.0, '2024春', '李教授', '数学系'),
(1001, '张小明', 'ENG101', '大学英语', 2, 88.5, '2024春', '陈教授', '外语系'),
(1002, '李小红', 'CS101', '计算机科学导论', 3, 92.0, '2024春', '王教授', '计算机系'),
(1002, '李小红', 'MATH201', '高等数学', 4, 86.5, '2024春', '李教授', '数学系'),
(1002, '李小红', 'PHYS101', '大学物理', 3, 79.0, '2024春', '张教授', '物理系'),
(1003, '王小刚', 'CS102', '程序设计', 3, 76.5, '2024春', '赵教授', '计算机系'),
(1003, '王小刚', 'MATH201', '高等数学', 4, 82.0, '2024春', '李教授', '数学系'),
(1003, '王小刚', 'ENG101', '大学英语', 2, 74.5, '2024春', '陈教授', '外语系'),
(1004, '赵小丽', 'CS101', '计算机科学导论', 3, 89.0, '2024春', '王教授', '计算机系'),
(1004, '赵小丽', 'MATH202', '线性代数', 3, 91.5, '2024春', '孙教授', '数学系'),
(1004, '赵小丽', 'PHYS101', '大学物理', 3, 84.5, '2024春', '张教授', '物理系'),
(1005, '钱小伟', 'CS102', '程序设计', 3, 95.0, '2024春', '赵教授', '计算机系'),
(1005, '钱小伟', 'MATH202', '线性代数', 3, 88.0, '2024春', '孙教授', '数学系'),
(1005, '钱小伟', 'ENG102', '英语写作', 2, 85.5, '2024春', '周教授', '外语系');

-- 插入电商订单数据
INSERT INTO ecommerce_orders (customer_id, customer_segment, product_id, category, subcategory, order_date, ship_date, quantity, unit_price, discount, profit, region, state) VALUES
(1001, 'Consumer', 2001, 'Technology', 'Phones', '2024-01-15', '2024-01-17', 2, 999.99, 0.10, 400.00, 'East', 'New York'),
(1001, 'Consumer', 2002, 'Technology', 'Accessories', '2024-01-16', '2024-01-18', 5, 29.99, 0.05, 50.00, 'East', 'New York'),
(1002, 'Corporate', 2003, 'Office Supplies', 'Storage', '2024-01-18', '2024-01-20', 10, 15.99, 0.15, 80.00, 'West', 'California'),
(1002, 'Corporate', 2004, 'Furniture', 'Chairs', '2024-01-19', '2024-01-22', 3, 299.99, 0.20, 300.00, 'West', 'California'),
(1003, 'Home Office', 2005, 'Technology', 'Computers', '2024-01-20', '2024-01-23', 1, 1299.99, 0.08, 200.00, 'Central', 'Texas'),
(1003, 'Home Office', 2006, 'Office Supplies', 'Paper', '2024-01-21', '2024-01-24', 20, 12.99, 0.00, 100.00, 'Central', 'Texas'),
(1004, 'Consumer', 2007, 'Technology', 'Phones', '2024-01-22', '2024-01-25', 1, 1199.99, 0.05, 300.00, 'East', 'Florida'),
(1004, 'Consumer', 2008, 'Furniture', 'Tables', '2024-01-23', '2024-01-26', 2, 199.99, 0.10, 150.00, 'East', 'Florida'),
(1005, 'Corporate', 2009, 'Office Supplies', 'Binders', '2024-01-24', '2024-01-27', 50, 8.99, 0.25, 200.00, 'West', 'Oregon'),
(1005, 'Corporate', 2010, 'Technology', 'Accessories', '2024-01-25', '2024-01-28', 15, 49.99, 0.12, 180.00, 'West', 'Oregon');

-- 插入网站流量数据
INSERT INTO website_analytics VALUES
('sess_001', 1001, '2024-01-15', 8, 450, 0.00, TRUE, 'Google', 'Desktop', 'USA', 299.99),
('sess_002', 1002, '2024-01-15', 3, 120, 1.00, FALSE, 'Facebook', 'Mobile', 'Canada', 0.00),
('sess_003', 1003, '2024-01-16', 12, 780, 0.00, TRUE, 'Direct', 'Desktop', 'UK', 599.99),
('sess_004', 1004, '2024-01-16', 5, 230, 0.00, FALSE, 'Google', 'Tablet', 'Germany', 0.00),
('sess_005', 1001, '2024-01-17', 15, 920, 0.00, TRUE, 'Google', 'Desktop', 'USA', 1299.99),
('sess_006', 1005, '2024-01-17', 6, 340, 0.00, TRUE, 'Bing', 'Mobile', 'Australia', 199.99),
('sess_007', 1006, '2024-01-18', 2, 45, 1.00, FALSE, 'Facebook', 'Mobile', 'Brazil', 0.00),
('sess_008', 1007, '2024-01-18', 9, 560, 0.00, TRUE, 'Direct', 'Desktop', 'Japan', 799.99),
('sess_009', 1008, '2024-01-19', 4, 180, 0.50, FALSE, 'Google', 'Mobile', 'India', 0.00),
('sess_010', 1002, '2024-01-19', 11, 650, 0.00, TRUE, 'Direct', 'Desktop', 'Canada', 399.99);

-- 插入员工绩效数据
INSERT INTO employee_performance VALUES
(1001, '张工程师', '技术部', '高级工程师', '2024Q1', 8.5, 3, 45, 4.2, 8000.00),
(1001, '张工程师', '技术部', '高级工程师', '2023Q4', 9.2, 4, 52, 4.5, 12000.00),
(1002, '李架构师', '技术部', '系统架构师', '2024Q1', 9.0, 2, 38, 4.8, 15000.00),
(1002, '李架构师', '技术部', '系统架构师', '2023Q4', 8.8, 3, 42, 4.6, 13000.00),
(1003, '王产品', '产品部', '产品经理', '2024Q1', 7.8, 5, 35, 4.0, 6000.00),
(1003, '王产品', '产品部', '产品经理', '2023Q4', 8.2, 4, 28, 4.3, 7500.00),
(1004, '赵销售', '销售部', '销售经理', '2024Q1', 9.5, 8, 60, 4.7, 18000.00),
(1004, '赵销售', '销售部', '销售经理', '2023Q4', 9.1, 6, 55, 4.4, 16000.00),
(1005, '钱测试', '技术部', '测试工程师', '2024Q1', 8.0, 4, 40, 3.9, 5000.00),
(1005, '钱测试', '技术部', '测试工程师', '2023Q4', 7.5, 3, 35, 3.7, 4000.00),
(1006, '孙设计', '产品部', 'UI设计师', '2024Q1', 8.7, 6, 25, 4.1, 7000.00);

基础 HAVING 查询

1. 简单聚合函数过滤

-- 查找订单数量超过1个的销售代表
SELECT sales_rep, 
       COUNT(*) as order_count,
       ROUND(SUM(amount), 2) as total_sales
FROM sales_performance
GROUP BY sales_rep
HAVING COUNT(*) > 1
ORDER BY total_sales DESC;

结果:

+----------+-------------+-------------+
| sales_rep | order_count | total_sales |
+----------+-------------+-------------+
| 王强     |           3 |    49000.00 |
| 张明     |           3 |    48000.00 |
| 李华     |           3 |    39000.00 |
| 赵丽     |           2 |    20000.00 |
+----------+-------------+-------------+

2. 平均值过滤

-- 查找平均成绩大于85分的学生
SELECT student_name,
       COUNT(*) as course_count,
       ROUND(AVG(score), 2) as avg_score,
       ROUND(SUM(credits * score) / SUM(credits), 2) as weighted_avg
FROM course_enrollments
GROUP BY student_id, student_name
HAVING AVG(score) > 85
ORDER BY avg_score DESC;

结果:

+-----------+--------------+-----------+-------------+
| student_name | course_count | avg_score | weighted_avg |
+-----------+--------------+-----------+-------------+
| 钱小伟    |            3 |     89.50 |        89.45 |
| 李小红    |            3 |     85.83 |        85.95 |
+-----------+--------------+-----------+-------------+

3. 最值过滤

-- 查找最高销售额超过20000的地区
SELECT region,
       COUNT(*) as order_count,
       MAX(amount) as max_sale,
       MIN(amount) as min_sale,
       ROUND(AVG(amount), 2) as avg_sale
FROM sales_performance
GROUP BY region
HAVING MAX(amount) > 20000
ORDER BY max_sale DESC;

结果:

+--------+-------------+----------+----------+----------+
| region | order_count | max_sale | min_sale | avg_sale |
+--------+-------------+----------+----------+----------+
| 华北   |           5 | 25000.00 |  8000.00 | 16400.00 |
| 华南   |           3 | 22000.00 | 11000.00 | 16333.33 |
+--------+-------------+----------+----------+----------+

复杂 HAVING 条件

1. 多条件组合

-- 查找课程数量>=3且平均分>=80且总学分>=8的学生
SELECT student_name,
       COUNT(*) as course_count,
       ROUND(AVG(score), 2) as avg_score,
       SUM(credits) as total_credits,
       GROUP_CONCAT(course_name ORDER BY score DESC) as courses
FROM course_enrollments
GROUP BY student_id, student_name
HAVING COUNT(*) >= 3 
   AND AVG(score) >= 80 
   AND SUM(credits) >= 8
ORDER BY avg_score DESC;

结果:

+-----------+--------------+-----------+---------------+--------------------------------------------------+
| student_name | course_count | avg_score | total_credits | courses                                          |
+-----------+--------------+-----------+---------------+--------------------------------------------------+
| 钱小伟    |            3 |     89.50 |             8 | 程序设计,线性代数,英语写作                       |
| 李小红    |            3 |     85.83 |            10 | 计算机科学导论,高等数学,大学物理                 |
| 张小明    |            3 |     84.00 |             9 | 大学英语,计算机科学导论,高等数学                 |
| 赵小丽    |            3 |     88.33 |             9 | 线性代数,计算机科学导论,大学物理                 |
+-----------+--------------+-----------+---------------+--------------------------------------------------+

2. 范围条件

-- 查找平均页面浏览量在5-10之间的流量来源
SELECT traffic_source,
       COUNT(*) as session_count,
       ROUND(AVG(page_views), 2) as avg_page_views,
       ROUND(AVG(session_duration), 2) as avg_duration,
       SUM(conversion_flag) as conversions
FROM website_analytics
GROUP BY traffic_source
HAVING AVG(page_views) BETWEEN 5 AND 10
ORDER BY avg_page_views DESC;

结果:

+---------------+---------------+-----------------+--------------+-------------+
| traffic_source | session_count | avg_page_views | avg_duration | conversions |
+---------------+---------------+-----------------+--------------+-------------+
| Direct        |             3 |            7.33 |       428.33 |           2 |
| Google        |             4 |            7.00 |       372.50 |           2 |
+---------------+---------------+-----------------+--------------+-------------+

3. 百分比和比率条件

-- 查找转化率大于50%的设备类型
SELECT device_category,
       COUNT(*) as total_sessions,
       SUM(conversion_flag) as conversions,
       ROUND(SUM(conversion_flag) / COUNT(*) * 100, 2) as conversion_rate,
       ROUND(AVG(revenue), 2) as avg_revenue
FROM website_analytics
GROUP BY device_category
HAVING (SUM(conversion_flag) / COUNT(*) * 100) > 50
ORDER BY conversion_rate DESC;

结果:

+-----------------+----------------+-------------+-----------------+-------------+
| device_category | total_sessions | conversions | conversion_rate | avg_revenue |
+-----------------+----------------+-------------+-----------------+-------------+
| Desktop         |              5 |           4 |           80.00 |      619.99 |
+-----------------+----------------+-------------+-----------------+-------------+

HAVING 与聚合函数组合

1. COUNT 相关条件

-- 多维度计数条件
SELECT department,
       quarter,
       COUNT(*) as emp_count,
       COUNT(CASE WHEN kpi_score >= 9.0 THEN 1 END) as excellent_count,
       COUNT(CASE WHEN overtime_hours > 50 THEN 1 END) as overtime_count,
       ROUND(AVG(kpi_score), 2) as avg_kpi
FROM employee_performance
GROUP BY department, quarter
HAVING COUNT(*) >= 2  -- 至少2名员工
   AND COUNT(CASE WHEN kpi_score >= 9.0 THEN 1 END) > 0  -- 至少1名优秀员工
ORDER BY department, quarter;

结果:

+----------+---------+-----------+-----------------+----------------+---------+
| department | quarter | emp_count | excellent_count | overtime_count | avg_kpi |
+----------+---------+-----------+-----------------+----------------+---------+
| 技术部   | 2023Q4  |         2 |               1 |              2 |    9.00 |
| 技术部   | 2024Q1  |         2 |               1 |              2 |    8.25 |
+----------+---------+-----------+-----------------+----------------+---------+

2. SUM 和计算字段

-- 基于计算字段的过滤
SELECT customer_segment,
       state,
       COUNT(*) as order_count,
       SUM(quantity) as total_quantity,
       ROUND(SUM(unit_price * quantity * (1 - discount)), 2) as revenue,
       ROUND(SUM(profit), 2) as total_profit,
       ROUND(SUM(profit) / SUM(unit_price * quantity * (1 - discount)) * 100, 2) as profit_margin
FROM ecommerce_orders
GROUP BY customer_segment, state
HAVING SUM(unit_price * quantity * (1 - discount)) > 1000  -- 收入超过1000
   AND SUM(profit) / SUM(unit_price * quantity * (1 - discount)) > 0.15  -- 利润率超过15%
ORDER BY profit_margin DESC;

结果:

+------------------+------------+-------------+----------------+---------+-------------+--------------+
| customer_segment | state      | order_count | total_quantity | revenue | total_profit | profit_margin |
+------------------+------------+-------------+----------------+---------+-------------+--------------+
| Home Office      | Texas      |           2 |             21 | 1559.79 |      300.00 |        19.23 |
| Consumer         | Florida    |           2 |              3 | 1499.99 |      450.00 |        30.00 |
+------------------+------------+-------------+----------------+---------+-------------+--------------+

3. 复杂聚合条件

-- 组合多种聚合函数的复杂条件
SELECT product_category,
       customer_type,
       COUNT(*) as sale_count,
       ROUND(AVG(amount), 2) as avg_amount,
       ROUND(STD(amount), 2) as amount_std,
       MIN(amount) as min_amount,
       MAX(amount) as max_amount,
       ROUND(SUM(amount * commission_rate), 2) as total_commission
FROM sales_performance
GROUP BY product_category, customer_type
HAVING COUNT(*) >= 2  -- 至少2笔销售
   AND AVG(amount) > 10000  -- 平均金额超过10000
   AND (MAX(amount) - MIN(amount)) > 5000  -- 金额差异超过5000
   AND STD(amount) < 8000  -- 标准差小于8000相对稳定
ORDER BY avg_amount DESC;

结果:

+------------------+---------------+------------+------------+------------+------------+------------+------------------+
| product_category | customer_type | sale_count | avg_amount | amount_std | min_amount | max_amount | total_commission |
+------------------+---------------+------------+------------+------------+------------+------------+------------------+
| 电子产品         | VIP客户       |          2 |   21500.00 |    3500.00 |   18000.00 |   25000.00 |             1620.00 |
| 家电             | VIP客户       |          2 |   16500.00 |    7778.17 |   11000.00 |   22000.00 |             1320.00 |
+------------------+---------------+------------+------------+------------+------------+------------+------------------+

多层嵌套和子查询

1. HAVING 中使用子查询

-- 查找销售额超过平均水平的销售代表
SELECT sales_rep,
       COUNT(*) as order_count,
       ROUND(SUM(amount), 2) as total_sales,
       ROUND(AVG(amount), 2) as avg_order_value
FROM sales_performance
GROUP BY sales_rep
HAVING SUM(amount) > (
    SELECT AVG(total_sales)
    FROM (
        SELECT SUM(amount) as total_sales
        FROM sales_performance
        GROUP BY sales_rep
    ) as rep_totals
)
ORDER BY total_sales DESC;

结果:

+----------+-------------+-------------+-----------------+
| sales_rep | order_count | total_sales | avg_order_value |
+----------+-------------+-------------+-----------------+
| 王强     |           3 |    49000.00 |        16333.33 |
| 张明     |           3 |    48000.00 |        16000.00 |
| 李华     |           3 |    39000.00 |        13000.00 |
+----------+-------------+-------------+-----------------+

2. 嵌套分组查询

-- 查找在多个系别都有课程且平均分都超过80的教师
SELECT teacher,
       COUNT(DISTINCT department) as dept_count,
       COUNT(*) as course_count,
       ROUND(AVG(score), 2) as overall_avg_score,
       GROUP_CONCAT(DISTINCT department) as departments
FROM course_enrollments
GROUP BY teacher
HAVING COUNT(DISTINCT department) > 1
   AND teacher IN (
       SELECT teacher
       FROM course_enrollments
       GROUP BY teacher, department
       HAVING AVG(score) > 80
   )
ORDER BY overall_avg_score DESC;

3. 窗口函数与 HAVING 结合

-- 查找在部门内排名前50%的员工组成的部门(平均绩效高的部门)
WITH ranked_employees AS (
    SELECT *,
           ROW_NUMBER() OVER (PARTITION BY department ORDER BY kpi_score DESC) as dept_rank,
           COUNT(*) OVER (PARTITION BY department) as dept_size
    FROM employee_performance
    WHERE quarter = '2024Q1'
),
top_performers AS (
    SELECT *
    FROM ranked_employees
    WHERE dept_rank <= dept_size / 2
)
SELECT department,
       COUNT(*) as top_performer_count,
       ROUND(AVG(kpi_score), 2) as avg_top_kpi,
       ROUND(AVG(bonus), 2) as avg_top_bonus
FROM top_performers
GROUP BY department
HAVING AVG(kpi_score) > 8.5
ORDER BY avg_top_kpi DESC;

面试题和实际案例

面试题1连续增长分析

题目:找出连续两个季度绩效都在提升的员工。

-- 解答使用自连接和HAVING
SELECT e1.name,
       e1.department,
       e1.quarter as current_quarter,
       e1.kpi_score as current_kpi,
       e2.quarter as prev_quarter,
       e2.kpi_score as prev_kpi,
       ROUND(e1.kpi_score - e2.kpi_score, 2) as improvement
FROM employee_performance e1
JOIN employee_performance e2 ON e1.emp_id = e2.emp_id
WHERE e1.quarter = '2024Q1' 
  AND e2.quarter = '2023Q4'
  AND e1.kpi_score > e2.kpi_score
GROUP BY e1.emp_id, e1.name, e1.department, e1.quarter, e1.kpi_score, e2.quarter, e2.kpi_score
HAVING e1.kpi_score > e2.kpi_score
ORDER BY improvement DESC;

结果:

+----------+----------+-----------------+-------------+--------------+----------+-------------+
| name     | department | current_quarter | current_kpi | prev_quarter | prev_kpi | improvement |
+----------+----------+-----------------+-------------+--------------+----------+-------------+
| 钱测试   | 技术部   | 2024Q1          |         8.0 | 2023Q4       |      7.5 |        0.50 |
| 赵销售   | 销售部   | 2024Q1          |         9.5 | 2023Q4       |      9.1 |        0.40 |
+----------+----------+-----------------+-------------+--------------+----------+-------------+

面试题2帕累托分析80/20原则

题目找出贡献了80%收入的前20%客户群体。

-- 解答使用累计计算和HAVING
WITH customer_revenue AS (
    SELECT customer_segment,
           SUM(unit_price * quantity * (1 - discount)) as total_revenue
    FROM ecommerce_orders
    GROUP BY customer_segment
),
revenue_ranking AS (
    SELECT customer_segment,
           total_revenue,
           SUM(total_revenue) OVER() as grand_total,
           SUM(total_revenue) OVER(ORDER BY total_revenue DESC) as cumulative_revenue
    FROM customer_revenue
),
pareto_analysis AS (
    SELECT customer_segment,
           total_revenue,
           ROUND(total_revenue / grand_total * 100, 2) as revenue_percentage,
           ROUND(cumulative_revenue / grand_total * 100, 2) as cumulative_percentage
    FROM revenue_ranking
)
SELECT customer_segment,
       total_revenue,
       revenue_percentage,
       cumulative_percentage
FROM pareto_analysis
GROUP BY customer_segment, total_revenue, revenue_percentage, cumulative_percentage
HAVING cumulative_percentage <= 80
ORDER BY total_revenue DESC;

面试题3同期对比分析

题目:比较每个产品类别在不同客户类型中的表现差异。

-- 解答使用条件聚合和HAVING
SELECT product_category,
       COUNT(*) as total_sales,
       SUM(CASE WHEN customer_type = 'VIP客户' THEN amount ELSE 0 END) as vip_sales,
       SUM(CASE WHEN customer_type = '新客户' THEN amount ELSE 0 END) as new_customer_sales,
       SUM(CASE WHEN customer_type = '老客户' THEN amount ELSE 0 END) as old_customer_sales,
       ROUND(
           SUM(CASE WHEN customer_type = 'VIP客户' THEN amount ELSE 0 END) / 
           SUM(amount) * 100, 2
       ) as vip_percentage
FROM sales_performance
GROUP BY product_category
HAVING COUNT(DISTINCT customer_type) >= 2  -- 至少有2种客户类型
   AND SUM(CASE WHEN customer_type = 'VIP客户' THEN amount ELSE 0 END) > 
       SUM(CASE WHEN customer_type = '新客户' THEN amount ELSE 0 END)  -- VIP销售额超过新客户
ORDER BY vip_percentage DESC;

结果:

+------------------+-------------+-----------+--------------------+--------------------+----------------+
| product_category | total_sales | vip_sales | new_customer_sales | old_customer_sales | vip_percentage |
+------------------+-------------+-----------+--------------------+--------------------+----------------+
| 电子产品         |           5 | 43000.00  |          35000.00  |          16000.00  |          45.74 |
| 家电             |           3 | 22000.00  |               0.00 |          25000.00  |          46.81 |
+------------------+-------------+-----------+--------------------+--------------------+----------------+

实际案例1电商业务分析

场景:电商平台需要识别高价值客户群体和优质产品类别。

-- 综合业务分析:识别高价值客户群体
SELECT customer_segment,
       region,
       COUNT(*) as order_count,
       COUNT(DISTINCT customer_id) as unique_customers,
       ROUND(AVG(unit_price * quantity * (1 - discount)), 2) as avg_order_value,
       SUM(profit) as total_profit,
       ROUND(SUM(profit) / SUM(unit_price * quantity * (1 - discount)) * 100, 2) as profit_margin,
       ROUND(COUNT(*) / COUNT(DISTINCT customer_id), 2) as orders_per_customer
FROM ecommerce_orders
GROUP BY customer_segment, region
HAVING COUNT(*) >= 2  -- 至少2个订单
   AND COUNT(DISTINCT customer_id) >= 1  -- 至少1个客户
   AND AVG(unit_price * quantity * (1 - discount)) > 200  -- 平均订单价值超过200
   AND SUM(profit) / SUM(unit_price * quantity * (1 - discount)) > 0.20  -- 利润率超过20%
ORDER BY profit_margin DESC, avg_order_value DESC;

实际案例2教育数据分析

场景:教务处需要分析课程质量和学生表现。

-- 课程质量分析:识别优质课程和问题课程
SELECT course_name,
       teacher,
       department,
       COUNT(*) as student_count,
       ROUND(AVG(score), 2) as avg_score,
       ROUND(STD(score), 2) as score_std,
       MIN(score) as min_score,
       MAX(score) as max_score,
       COUNT(CASE WHEN score >= 90 THEN 1 END) as excellent_count,
       COUNT(CASE WHEN score < 70 THEN 1 END) as poor_count
FROM course_enrollments
GROUP BY course_id, course_name, teacher, department
HAVING COUNT(*) >= 3  -- 至少3名学生
   AND AVG(score) >= 80  -- 平均分不低于80
   AND STD(score) <= 10  -- 分数差异不过大
   AND COUNT(CASE WHEN score < 70 THEN 1 END) = 0  -- 没有不及格学生
ORDER BY avg_score DESC, score_std ASC;

结果:

+----------------+----------+----------+---------------+-----------+-----------+-----------+-----------+-----------------+------------+
| course_name    | teacher  | department | student_count | avg_score | score_std | min_score | max_score | excellent_count | poor_count |
+----------------+----------+----------+---------------+-----------+-----------+-----------+-----------+-----------------+------------+
| 计算机科学导论 | 王教授   | 计算机系 |             3 |     88.83 |      3.64 |      85.5 |      92.0 |               1 |          0 |
| 线性代数       | 孙教授   | 数学系   |             2 |     89.75 |      2.47 |      88.0 |      91.5 |               1 |          0 |
+----------------+----------+----------+---------------+-----------+-----------+-----------+-----------+-----------------+------------+

实际案例3网站运营优化

场景:网站运营团队需要优化流量渠道和用户体验。

-- 流量渠道效果分析
SELECT traffic_source,
       device_category,
       COUNT(*) as session_count,
       ROUND(AVG(page_views), 2) as avg_page_views,
       ROUND(AVG(session_duration/60), 2) as avg_minutes,
       SUM(conversion_flag) as conversions,
       ROUND(SUM(conversion_flag)/COUNT(*)*100, 2) as conversion_rate,
       ROUND(SUM(revenue), 2) as total_revenue,
       ROUND(SUM(revenue)/COUNT(*), 2) as revenue_per_session
FROM website_analytics
GROUP BY traffic_source, device_category
HAVING COUNT(*) >= 2  -- 足够的样本量
   AND AVG(session_duration) >= 180  -- 平均会话时长超过3分钟
   AND SUM(conversion_flag)/COUNT(*) >= 0.3  -- 转化率至少30%
ORDER BY conversion_rate DESC, revenue_per_session DESC;

结果:

+----------------+-----------------+---------------+-----------------+-------------+-------------+-----------------+---------------+--------------------+
| traffic_source | device_category | session_count | avg_page_views | avg_minutes | conversions | conversion_rate | total_revenue | revenue_per_session |
+----------------+-----------------+---------------+-----------------+-------------+-------------+-----------------+---------------+--------------------+
| Google         | Desktop         |             2 |            11.50 |        7.58 |           2 |          100.00 |       1599.98 |              799.99 |
| Direct         | Desktop         |             3 |             7.33 |        7.14 |           2 |           66.67 |       1599.98 |              533.33 |
+----------------+-----------------+---------------+-----------------+-------------+-------------+-----------------+---------------+--------------------+

性能优化和最佳实践

1. 索引优化

-- 为GROUP BY和HAVING中使用的列创建索引
CREATE INDEX idx_sales_performance_rep_region ON sales_performance(sales_rep, region);
CREATE INDEX idx_course_enrollments_student ON course_enrollments(student_id, student_name);
CREATE INDEX idx_ecommerce_orders_segment_state ON ecommerce_orders(customer_segment, state);

-- 为聚合计算创建覆盖索引
CREATE INDEX idx_sales_performance_covering ON sales_performance(sales_rep, amount, quantity, customer_type);

2. 查询优化策略

-- ✅ 推荐先用WHERE过滤减少分组数据量
SELECT department, AVG(salary)
FROM employees
WHERE hire_date >= '2020-01-01'  -- 先过滤
GROUP BY department
HAVING AVG(salary) > 15000;

-- ❌ 避免:直接分组后过滤
SELECT department, AVG(salary)
FROM employees
GROUP BY department
HAVING AVG(salary) > 15000 AND MIN(hire_date) >= '2020-01-01';

3. 避免复杂HAVING条件

-- ❌ 复杂的HAVING条件
SELECT sales_rep,
       COUNT(*) as order_count,
       SUM(amount) as total_sales
FROM sales_performance
GROUP BY sales_rep
HAVING SUM(amount) > (SELECT AVG(total) FROM (SELECT SUM(amount) as total FROM sales_performance GROUP BY sales_rep) t)
   AND COUNT(*) > (SELECT AVG(cnt) FROM (SELECT COUNT(*) as cnt FROM sales_performance GROUP BY sales_rep) t);

-- ✅ 优化:分步处理
WITH rep_stats AS (
    SELECT sales_rep,
           COUNT(*) as order_count,
           SUM(amount) as total_sales
    FROM sales_performance
    GROUP BY sales_rep
),
benchmarks AS (
    SELECT AVG(total_sales) as avg_sales,
           AVG(order_count) as avg_orders
    FROM rep_stats
)
SELECT rs.sales_rep, rs.order_count, rs.total_sales
FROM rep_stats rs, benchmarks b
WHERE rs.total_sales > b.avg_sales 
  AND rs.order_count > b.avg_orders;

4. 内存优化

-- 优化GROUP BY相关的内存设置
SET SESSION group_concat_max_len = 10240;
SET SESSION tmp_table_size = 134217728;  -- 128MB
SET SESSION max_heap_table_size = 134217728;  -- 128MB

-- 对于大数据量,考虑分页处理
SELECT sales_rep, COUNT(*), SUM(amount)
FROM sales_performance
WHERE sale_date BETWEEN '2024-01-01' AND '2024-01-31'  -- 限制时间范围
GROUP BY sales_rep
HAVING COUNT(*) > 5
ORDER BY SUM(amount) DESC
LIMIT 20;  -- 限制结果数量

5. 监控和调优

-- 查看HAVING相关查询的执行计划
EXPLAIN FORMAT=JSON
SELECT department, COUNT(*), AVG(salary)
FROM employees
GROUP BY department
HAVING COUNT(*) > 3;

-- 监控分组查询性能
SELECT 
    SUBSTRING(sql_text, 1, 100) as query_start,
    exec_count,
    avg_timer_wait/1000000000000 as avg_time_sec,
    sum_rows_examined/exec_count as avg_rows_examined
FROM performance_schema.events_statements_summary_by_digest 
WHERE sql_text LIKE '%HAVING%'
ORDER BY avg_timer_wait DESC 
LIMIT 10;

6. 最佳实践总结

-- ✅ HAVING 最佳实践
SELECT 
    category,                           -- 明确的分组字段
    COUNT(*) as product_count,         -- 清晰的聚合函数
    ROUND(AVG(price), 2) as avg_price  -- 合理的精度
FROM products
WHERE status = 'active'                -- 先过滤原始数据
GROUP BY category                      -- 简单的分组
HAVING COUNT(*) >= 5                   -- 简单明确的HAVING条件
   AND AVG(price) > 100               -- 避免过复杂的条件
ORDER BY avg_price DESC               -- 合理排序
LIMIT 20;                             -- 限制结果数量

-- ❌ 避免的问题
-- 1. HAVING中使用非聚合函数应该用WHERE
-- 2. 过于复杂的HAVING条件
-- 3. 在HAVING中进行大量计算
-- 4. 不必要的子查询在HAVING中
-- 5. 缺少适当的索引支持

总结:

  • HAVING 专门用于过滤 GROUP BY 分组后的结果
  • 理解与 WHERE 的区别:时机、功能、性能
  • 可以使用聚合函数进行复杂的条件判断
  • 合理使用索引和分步查询优化性能
  • 避免过于复杂的 HAVING 条件
  • 结合实际业务场景灵活运用