956 lines
28 KiB
Markdown
956 lines
28 KiB
Markdown
# MySQL RANGE 查询优化详解
|
||
|
||
## 目录
|
||
1. [RANGE 查询基本概念](#range-查询基本概念)
|
||
2. [RANGE 查询原理和类型](#range-查询原理和类型)
|
||
3. [示例数据准备](#示例数据准备)
|
||
4. [RANGE 查询优化技巧](#range-查询优化技巧)
|
||
5. [索引策略和执行计划分析](#索引策略和执行计划分析)
|
||
6. [实际开发场景案例](#实际开发场景案例)
|
||
7. [分区表与 RANGE 优化](#分区表与-range-优化)
|
||
8. [性能监控和调优](#性能监控和调优)
|
||
9. [常见问题和最佳实践](#常见问题和最佳实践)
|
||
|
||
## RANGE 查询基本概念
|
||
|
||
### 什么是 RANGE 查询
|
||
|
||
RANGE 查询是指查询某个字段在指定范围内的数据,在 MySQL 执行计划中表现为 `type=range`。这类查询在实际开发中非常常见,包括:
|
||
|
||
- 时间范围查询:`WHERE create_time BETWEEN '2024-01-01' AND '2024-12-31'`
|
||
- 数值范围查询:`WHERE price BETWEEN 100 AND 500`
|
||
- 大于小于查询:`WHERE id > 1000 AND id < 2000`
|
||
- IN 查询:`WHERE status IN ('active', 'pending')`
|
||
|
||
### RANGE 查询的重要性
|
||
|
||
```sql
|
||
-- 典型的业务场景
|
||
-- 1. 订单查询:查询某个时间段的订单
|
||
SELECT * FROM orders WHERE order_date BETWEEN '2024-01-01' AND '2024-01-31';
|
||
|
||
-- 2. 商品筛选:查询某个价格区间的商品
|
||
SELECT * FROM products WHERE price >= 100 AND price <= 500;
|
||
|
||
-- 3. 日志分析:查询某个ID范围的日志
|
||
SELECT * FROM logs WHERE id > 10000 AND id <= 20000;
|
||
|
||
-- 4. 状态筛选:查询多个状态的记录
|
||
SELECT * FROM users WHERE status IN ('active', 'premium', 'vip');
|
||
```
|
||
|
||
## RANGE 查询原理和类型
|
||
|
||
### 1. MySQL 中 RANGE 查询的类型
|
||
|
||
| 类型 | 描述 | 示例 |
|
||
|------|------|------|
|
||
| **range** | 索引范围扫描 | `WHERE id BETWEEN 1 AND 100` |
|
||
| **index_merge** | 多个索引合并 | `WHERE id > 100 OR name = 'test'` |
|
||
| **ref_or_null** | 引用查询包含NULL | `WHERE id = 1 OR id IS NULL` |
|
||
|
||
### 2. RANGE 查询的执行流程
|
||
|
||
```sql
|
||
-- 执行流程示意
|
||
1. 解析 SQL 语句和条件
|
||
2. 选择最优索引
|
||
3. 定位起始位置(起始键值)
|
||
4. 扫描到结束位置(结束键值)
|
||
5. 返回符合条件的记录
|
||
```
|
||
|
||
### 3. 索引选择性和基数
|
||
|
||
```sql
|
||
-- 查看索引选择性
|
||
SELECT
|
||
COUNT(DISTINCT column_name) / COUNT(*) as selectivity,
|
||
COUNT(DISTINCT column_name) as cardinality,
|
||
COUNT(*) as total_rows
|
||
FROM table_name;
|
||
|
||
-- 选择性越高,索引效果越好
|
||
-- 选择性 = 不重复值数量 / 总行数
|
||
-- 选择性接近 1 表示索引效果最好
|
||
```
|
||
|
||
## 示例数据准备
|
||
|
||
### 创建测试表和数据
|
||
|
||
```sql
|
||
-- 创建订单表
|
||
CREATE TABLE orders (
|
||
order_id BIGINT PRIMARY KEY AUTO_INCREMENT,
|
||
user_id INT NOT NULL,
|
||
product_id INT NOT NULL,
|
||
order_date DATE NOT NULL,
|
||
order_time DATETIME NOT NULL,
|
||
amount DECIMAL(10,2) NOT NULL,
|
||
quantity INT NOT NULL,
|
||
status ENUM('pending', 'paid', 'shipped', 'delivered', 'cancelled') NOT NULL,
|
||
region VARCHAR(50) NOT NULL,
|
||
channel VARCHAR(30) NOT NULL,
|
||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||
|
||
KEY idx_user_id (user_id),
|
||
KEY idx_product_id (product_id),
|
||
KEY idx_order_date (order_date),
|
||
KEY idx_order_time (order_time),
|
||
KEY idx_amount (amount),
|
||
KEY idx_status (status),
|
||
KEY idx_region (region),
|
||
KEY idx_user_date (user_id, order_date),
|
||
KEY idx_status_date (status, order_date),
|
||
KEY idx_amount_date (amount, order_date)
|
||
) ENGINE=InnoDB;
|
||
|
||
-- 创建商品表
|
||
CREATE TABLE products (
|
||
product_id INT PRIMARY KEY AUTO_INCREMENT,
|
||
product_name VARCHAR(200) NOT NULL,
|
||
category_id INT NOT NULL,
|
||
price DECIMAL(8,2) NOT NULL,
|
||
cost DECIMAL(8,2) NOT NULL,
|
||
stock_quantity INT NOT NULL,
|
||
weight DECIMAL(6,3),
|
||
created_date DATE NOT NULL,
|
||
last_update_time DATETIME NOT NULL,
|
||
status TINYINT NOT NULL DEFAULT 1,
|
||
|
||
KEY idx_category (category_id),
|
||
KEY idx_price (price),
|
||
KEY idx_stock (stock_quantity),
|
||
KEY idx_created_date (created_date),
|
||
KEY idx_status (status),
|
||
KEY idx_category_price (category_id, price),
|
||
KEY idx_status_price (status, price)
|
||
) ENGINE=InnoDB;
|
||
|
||
-- 创建用户访问日志表
|
||
CREATE TABLE access_logs (
|
||
log_id BIGINT PRIMARY KEY AUTO_INCREMENT,
|
||
user_id INT,
|
||
ip_address VARCHAR(45),
|
||
request_url VARCHAR(500),
|
||
request_method VARCHAR(10),
|
||
response_code INT,
|
||
response_time_ms INT,
|
||
user_agent TEXT,
|
||
access_time DATETIME NOT NULL,
|
||
session_id VARCHAR(64),
|
||
|
||
KEY idx_user_id (user_id),
|
||
KEY idx_access_time (access_time),
|
||
KEY idx_response_code (response_code),
|
||
KEY idx_response_time (response_time_ms),
|
||
KEY idx_user_time (user_id, access_time),
|
||
KEY idx_code_time (response_code, access_time)
|
||
) ENGINE=InnoDB;
|
||
|
||
-- 插入测试数据(使用存储过程快速生成大量数据)
|
||
DELIMITER //
|
||
CREATE PROCEDURE GenerateTestData()
|
||
BEGIN
|
||
DECLARE i INT DEFAULT 1;
|
||
DECLARE max_records INT DEFAULT 100000;
|
||
|
||
-- 清空表
|
||
TRUNCATE TABLE orders;
|
||
TRUNCATE TABLE products;
|
||
TRUNCATE TABLE access_logs;
|
||
|
||
-- 生成商品数据
|
||
WHILE i <= 1000 DO
|
||
INSERT INTO products (
|
||
product_name, category_id, price, cost, stock_quantity,
|
||
weight, created_date, last_update_time, status
|
||
) VALUES (
|
||
CONCAT('产品_', i),
|
||
(i % 50) + 1,
|
||
ROUND(RAND() * 1000 + 10, 2),
|
||
ROUND(RAND() * 500 + 5, 2),
|
||
FLOOR(RAND() * 1000),
|
||
ROUND(RAND() * 10, 3),
|
||
DATE_SUB(CURDATE(), INTERVAL FLOOR(RAND() * 365) DAY),
|
||
DATE_SUB(NOW(), INTERVAL FLOOR(RAND() * 30) DAY),
|
||
IF(RAND() > 0.1, 1, 0)
|
||
);
|
||
SET i = i + 1;
|
||
END WHILE;
|
||
|
||
-- 生成订单数据
|
||
SET i = 1;
|
||
WHILE i <= max_records DO
|
||
INSERT INTO orders (
|
||
user_id, product_id, order_date, order_time, amount, quantity,
|
||
status, region, channel
|
||
) VALUES (
|
||
FLOOR(RAND() * 10000) + 1,
|
||
FLOOR(RAND() * 1000) + 1,
|
||
DATE_SUB(CURDATE(), INTERVAL FLOOR(RAND() * 365) DAY),
|
||
DATE_SUB(NOW(), INTERVAL FLOOR(RAND() * 365 * 24 * 60) MINUTE),
|
||
ROUND(RAND() * 1000 + 10, 2),
|
||
FLOOR(RAND() * 5) + 1,
|
||
ELT(FLOOR(RAND() * 5) + 1, 'pending', 'paid', 'shipped', 'delivered', 'cancelled'),
|
||
ELT(FLOOR(RAND() * 6) + 1, '北京', '上海', '广州', '深圳', '杭州', '成都'),
|
||
ELT(FLOOR(RAND() * 4) + 1, 'web', 'mobile', 'api', 'admin')
|
||
);
|
||
|
||
IF i % 10000 = 0 THEN
|
||
COMMIT;
|
||
END IF;
|
||
SET i = i + 1;
|
||
END WHILE;
|
||
|
||
-- 生成访问日志数据
|
||
SET i = 1;
|
||
WHILE i <= max_records DO
|
||
INSERT INTO access_logs (
|
||
user_id, ip_address, request_url, request_method,
|
||
response_code, response_time_ms, access_time, session_id
|
||
) VALUES (
|
||
IF(RAND() > 0.3, FLOOR(RAND() * 10000) + 1, NULL),
|
||
CONCAT(
|
||
FLOOR(RAND() * 255), '.',
|
||
FLOOR(RAND() * 255), '.',
|
||
FLOOR(RAND() * 255), '.',
|
||
FLOOR(RAND() * 255)
|
||
),
|
||
CONCAT('/api/v1/', ELT(FLOOR(RAND() * 5) + 1, 'users', 'orders', 'products', 'search', 'cart')),
|
||
ELT(FLOOR(RAND() * 4) + 1, 'GET', 'POST', 'PUT', 'DELETE'),
|
||
ELT(FLOOR(RAND() * 10) + 1, 200, 200, 200, 200, 200, 404, 500, 403, 401, 302),
|
||
FLOOR(RAND() * 5000) + 10,
|
||
DATE_SUB(NOW(), INTERVAL FLOOR(RAND() * 30 * 24 * 60) MINUTE),
|
||
MD5(CONCAT(i, RAND()))
|
||
);
|
||
|
||
IF i % 10000 = 0 THEN
|
||
COMMIT;
|
||
END IF;
|
||
SET i = i + 1;
|
||
END WHILE;
|
||
|
||
COMMIT;
|
||
END //
|
||
DELIMITER ;
|
||
|
||
-- 执行数据生成(注意:这会花费一些时间)
|
||
CALL GenerateTestData();
|
||
|
||
-- 更新表统计信息
|
||
ANALYZE TABLE orders, products, access_logs;
|
||
```
|
||
|
||
## RANGE 查询优化技巧
|
||
|
||
### 1. 日期范围查询优化
|
||
|
||
```sql
|
||
-- ❌ 低效的日期查询(无法使用索引)
|
||
SELECT * FROM orders
|
||
WHERE YEAR(order_date) = 2024 AND MONTH(order_date) = 1;
|
||
|
||
-- ❌ 函数导致全表扫描
|
||
SELECT * FROM orders
|
||
WHERE DATE_FORMAT(order_date, '%Y-%m') = '2024-01';
|
||
|
||
-- ✅ 高效的日期范围查询
|
||
SELECT * FROM orders
|
||
WHERE order_date >= '2024-01-01' AND order_date < '2024-02-01';
|
||
|
||
-- ✅ 使用 BETWEEN(包含边界)
|
||
SELECT * FROM orders
|
||
WHERE order_date BETWEEN '2024-01-01' AND '2024-01-31';
|
||
|
||
-- 执行计划对比
|
||
EXPLAIN FORMAT=JSON
|
||
SELECT * FROM orders
|
||
WHERE order_date >= '2024-01-01' AND order_date < '2024-02-01';
|
||
```
|
||
|
||
### 2. 数值范围查询优化
|
||
|
||
```sql
|
||
-- ✅ 基本数值范围查询
|
||
SELECT * FROM products
|
||
WHERE price BETWEEN 100 AND 500;
|
||
|
||
-- ✅ 组合条件优化
|
||
SELECT * FROM products
|
||
WHERE price >= 100 AND price <= 500
|
||
AND status = 1;
|
||
|
||
-- 🔍 查看执行计划
|
||
EXPLAIN FORMAT=JSON
|
||
SELECT * FROM products
|
||
WHERE price >= 100 AND price <= 500 AND status = 1;
|
||
|
||
-- ✅ 多条件范围查询优化
|
||
SELECT * FROM orders
|
||
WHERE amount >= 100 AND amount <= 1000
|
||
AND order_date >= '2024-01-01' AND order_date <= '2024-12-31';
|
||
|
||
-- 🔍 分析索引选择
|
||
EXPLAIN FORMAT=JSON
|
||
SELECT * FROM orders
|
||
WHERE amount >= 100 AND amount <= 1000
|
||
AND order_date >= '2024-01-01' AND order_date <= '2024-12-31';
|
||
```
|
||
|
||
### 3. IN 查询优化
|
||
|
||
```sql
|
||
-- ✅ 基本 IN 查询
|
||
SELECT * FROM orders WHERE status IN ('paid', 'shipped', 'delivered');
|
||
|
||
-- ✅ 大量 IN 值的优化
|
||
SELECT * FROM orders
|
||
WHERE user_id IN (1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50);
|
||
|
||
-- ❌ 避免过多的 IN 值(可能导致性能问题)
|
||
-- SELECT * FROM orders WHERE user_id IN (1,2,3,...,1000); -- 太多值
|
||
|
||
-- ✅ 使用 EXISTS 替代大量 IN 值
|
||
CREATE TEMPORARY TABLE temp_users (user_id INT PRIMARY KEY);
|
||
INSERT INTO temp_users VALUES (1), (5), (10), (15), (20);
|
||
|
||
SELECT o.* FROM orders o
|
||
WHERE EXISTS (SELECT 1 FROM temp_users t WHERE t.user_id = o.user_id);
|
||
|
||
-- ✅ 使用 JOIN 替代大量 IN 值
|
||
SELECT o.* FROM orders o
|
||
INNER JOIN temp_users t ON o.user_id = t.user_id;
|
||
```
|
||
|
||
### 4. 复合索引的 RANGE 查询优化
|
||
|
||
```sql
|
||
-- 创建复合索引
|
||
CREATE INDEX idx_user_status_date ON orders(user_id, status, order_date);
|
||
|
||
-- ✅ 充分利用复合索引(遵循最左前缀原则)
|
||
SELECT * FROM orders
|
||
WHERE user_id = 1001
|
||
AND status IN ('paid', 'shipped')
|
||
AND order_date >= '2024-01-01';
|
||
|
||
-- ✅ 部分利用复合索引
|
||
SELECT * FROM orders
|
||
WHERE user_id = 1001
|
||
AND order_date >= '2024-01-01';
|
||
|
||
-- ❌ 无法利用复合索引(跳过了前缀列)
|
||
SELECT * FROM orders
|
||
WHERE status = 'paid'
|
||
AND order_date >= '2024-01-01';
|
||
|
||
-- 🔍 验证索引使用情况
|
||
EXPLAIN FORMAT=JSON
|
||
SELECT * FROM orders
|
||
WHERE user_id = 1001
|
||
AND status IN ('paid', 'shipped')
|
||
AND order_date >= '2024-01-01';
|
||
```
|
||
|
||
## 索引策略和执行计划分析
|
||
|
||
### 1. 执行计划解读
|
||
|
||
```sql
|
||
-- 创建测试查询并分析执行计划
|
||
EXPLAIN FORMAT=JSON
|
||
SELECT * FROM orders
|
||
WHERE order_date BETWEEN '2024-01-01' AND '2024-01-31'
|
||
AND amount >= 100;
|
||
|
||
-- 关键信息解读:
|
||
-- "access_type": "range" - 表示使用了范围扫描
|
||
-- "key": "idx_order_date" - 使用的索引名称
|
||
-- "rows_examined_per_scan": 预估扫描行数
|
||
-- "cost_info": 查询成本信息
|
||
```
|
||
|
||
### 2. 索引选择性分析
|
||
|
||
```sql
|
||
-- 分析不同字段的选择性
|
||
SELECT
|
||
'order_date' as column_name,
|
||
COUNT(DISTINCT order_date) as distinct_values,
|
||
COUNT(*) as total_rows,
|
||
COUNT(DISTINCT order_date) / COUNT(*) as selectivity
|
||
FROM orders
|
||
|
||
UNION ALL
|
||
|
||
SELECT
|
||
'amount' as column_name,
|
||
COUNT(DISTINCT amount) as distinct_values,
|
||
COUNT(*) as total_rows,
|
||
COUNT(DISTINCT amount) / COUNT(*) as selectivity
|
||
FROM orders
|
||
|
||
UNION ALL
|
||
|
||
SELECT
|
||
'status' as column_name,
|
||
COUNT(DISTINCT status) as distinct_values,
|
||
COUNT(*) as total_rows,
|
||
COUNT(DISTINCT status) / COUNT(*) as selectivity
|
||
FROM orders;
|
||
|
||
-- 根据选择性结果调整索引策略
|
||
-- 高选择性字段适合做索引前缀
|
||
-- 低选择性字段适合放在组合索引后面
|
||
```
|
||
|
||
### 3. 成本分析和索引选择
|
||
|
||
```sql
|
||
-- 强制使用不同索引对比性能
|
||
-- 使用日期索引
|
||
SELECT * FROM orders USE INDEX (idx_order_date)
|
||
WHERE order_date BETWEEN '2024-01-01' AND '2024-01-31'
|
||
AND amount >= 100;
|
||
|
||
-- 使用金额索引
|
||
SELECT * FROM orders USE INDEX (idx_amount)
|
||
WHERE order_date BETWEEN '2024-01-01' AND '2024-01-31'
|
||
AND amount >= 100;
|
||
|
||
-- 使用复合索引
|
||
SELECT * FROM orders USE INDEX (idx_amount_date)
|
||
WHERE order_date BETWEEN '2024-01-01' AND '2024-01-31'
|
||
AND amount >= 100;
|
||
|
||
-- 让优化器自动选择
|
||
SELECT * FROM orders
|
||
WHERE order_date BETWEEN '2024-01-01' AND '2024-01-31'
|
||
AND amount >= 100;
|
||
```
|
||
|
||
### 4. 索引提示的使用
|
||
|
||
```sql
|
||
-- 强制使用特定索引
|
||
SELECT * FROM orders FORCE INDEX (idx_order_date)
|
||
WHERE order_date >= '2024-01-01' AND amount > 100;
|
||
|
||
-- 忽略某个索引
|
||
SELECT * FROM orders IGNORE INDEX (idx_amount)
|
||
WHERE order_date >= '2024-01-01' AND amount > 100;
|
||
|
||
-- 建议使用某个索引
|
||
SELECT * FROM orders USE INDEX (idx_amount_date)
|
||
WHERE order_date >= '2024-01-01' AND amount > 100;
|
||
```
|
||
|
||
## 实际开发场景案例
|
||
|
||
### 案例1:电商订单查询系统
|
||
|
||
```sql
|
||
-- 需求:查询某用户在指定时间段内的订单,按金额排序
|
||
-- 原始查询(可能性能较差)
|
||
SELECT * FROM orders
|
||
WHERE user_id = 1001
|
||
AND order_date BETWEEN '2024-01-01' AND '2024-01-31'
|
||
ORDER BY amount DESC;
|
||
|
||
-- 优化方案1:创建专门的复合索引
|
||
CREATE INDEX idx_user_date_amount ON orders(user_id, order_date, amount DESC);
|
||
|
||
-- 优化后的查询
|
||
SELECT * FROM orders
|
||
WHERE user_id = 1001
|
||
AND order_date BETWEEN '2024-01-01' AND '2024-01-31'
|
||
ORDER BY amount DESC;
|
||
|
||
-- 验证优化效果
|
||
EXPLAIN FORMAT=JSON
|
||
SELECT * FROM orders
|
||
WHERE user_id = 1001
|
||
AND order_date BETWEEN '2024-01-01' AND '2024-01-31'
|
||
ORDER BY amount DESC;
|
||
|
||
-- 如果需要分页
|
||
SELECT * FROM orders
|
||
WHERE user_id = 1001
|
||
AND order_date BETWEEN '2024-01-01' AND '2024-01-31'
|
||
ORDER BY amount DESC
|
||
LIMIT 20 OFFSET 0;
|
||
```
|
||
|
||
### 案例2:实时数据分析查询
|
||
|
||
```sql
|
||
-- 需求:按小时统计最近24小时的订单数量和金额
|
||
-- 原始查询
|
||
SELECT
|
||
DATE_FORMAT(order_time, '%Y-%m-%d %H:00:00') as hour_bucket,
|
||
COUNT(*) as order_count,
|
||
SUM(amount) as total_amount
|
||
FROM orders
|
||
WHERE order_time >= DATE_SUB(NOW(), INTERVAL 24 HOUR)
|
||
GROUP BY DATE_FORMAT(order_time, '%Y-%m-%d %H:00:00')
|
||
ORDER BY hour_bucket;
|
||
|
||
-- 优化方案:创建时间索引并使用覆盖索引
|
||
CREATE INDEX idx_order_time_amount ON orders(order_time, amount);
|
||
|
||
-- 优化查询(避免函数计算)
|
||
SELECT
|
||
FROM_UNIXTIME(UNIX_TIMESTAMP(order_time) - UNIX_TIMESTAMP(order_time) % 3600) as hour_bucket,
|
||
COUNT(*) as order_count,
|
||
SUM(amount) as total_amount
|
||
FROM orders
|
||
WHERE order_time >= DATE_SUB(NOW(), INTERVAL 24 HOUR)
|
||
GROUP BY FROM_UNIXTIME(UNIX_TIMESTAMP(order_time) - UNIX_TIMESTAMP(order_time) % 3600)
|
||
ORDER BY hour_bucket;
|
||
|
||
-- 进一步优化:预计算小时桶
|
||
ALTER TABLE orders ADD COLUMN hour_bucket DATETIME GENERATED ALWAYS AS
|
||
(FROM_UNIXTIME(UNIX_TIMESTAMP(order_time) - UNIX_TIMESTAMP(order_time) % 3600)) STORED;
|
||
|
||
CREATE INDEX idx_hour_bucket_amount ON orders(hour_bucket, amount);
|
||
|
||
-- 最优化查询
|
||
SELECT
|
||
hour_bucket,
|
||
COUNT(*) as order_count,
|
||
SUM(amount) as total_amount
|
||
FROM orders
|
||
WHERE hour_bucket >= DATE_SUB(DATE_SUB(NOW(), INTERVAL MINUTE(NOW()) MINUTE), INTERVAL 23 HOUR)
|
||
GROUP BY hour_bucket
|
||
ORDER BY hour_bucket;
|
||
```
|
||
|
||
### 案例3:用户行为分析
|
||
|
||
```sql
|
||
-- 需求:分析响应时间在不同区间的请求分布
|
||
-- 创建响应时间分析的优化索引
|
||
CREATE INDEX idx_response_time_code ON access_logs(response_time_ms, response_code);
|
||
|
||
-- 分析查询
|
||
SELECT
|
||
CASE
|
||
WHEN response_time_ms < 100 THEN '< 100ms'
|
||
WHEN response_time_ms < 500 THEN '100-500ms'
|
||
WHEN response_time_ms < 1000 THEN '500ms-1s'
|
||
WHEN response_time_ms < 3000 THEN '1-3s'
|
||
ELSE '> 3s'
|
||
END as response_time_bucket,
|
||
COUNT(*) as request_count,
|
||
COUNT(CASE WHEN response_code = 200 THEN 1 END) as success_count,
|
||
ROUND(AVG(response_time_ms), 2) as avg_response_time
|
||
FROM access_logs
|
||
WHERE access_time >= DATE_SUB(NOW(), INTERVAL 7 DAY)
|
||
GROUP BY
|
||
CASE
|
||
WHEN response_time_ms < 100 THEN '< 100ms'
|
||
WHEN response_time_ms < 500 THEN '100-500ms'
|
||
WHEN response_time_ms < 1000 THEN '500ms-1s'
|
||
WHEN response_time_ms < 3000 THEN '1-3s'
|
||
ELSE '> 3s'
|
||
END
|
||
ORDER BY
|
||
CASE
|
||
WHEN response_time_ms < 100 THEN 1
|
||
WHEN response_time_ms < 500 THEN 2
|
||
WHEN response_time_ms < 1000 THEN 3
|
||
WHEN response_time_ms < 3000 THEN 4
|
||
ELSE 5
|
||
END;
|
||
|
||
-- 优化版本:使用多个简单的RANGE查询代替复杂CASE
|
||
SELECT '< 100ms' as bucket, COUNT(*) as count FROM access_logs
|
||
WHERE access_time >= DATE_SUB(NOW(), INTERVAL 7 DAY) AND response_time_ms < 100
|
||
UNION ALL
|
||
SELECT '100-500ms', COUNT(*) FROM access_logs
|
||
WHERE access_time >= DATE_SUB(NOW(), INTERVAL 7 DAY) AND response_time_ms >= 100 AND response_time_ms < 500
|
||
UNION ALL
|
||
SELECT '500ms-1s', COUNT(*) FROM access_logs
|
||
WHERE access_time >= DATE_SUB(NOW(), INTERVAL 7 DAY) AND response_time_ms >= 500 AND response_time_ms < 1000
|
||
UNION ALL
|
||
SELECT '1-3s', COUNT(*) FROM access_logs
|
||
WHERE access_time >= DATE_SUB(NOW(), INTERVAL 7 DAY) AND response_time_ms >= 1000 AND response_time_ms < 3000
|
||
UNION ALL
|
||
SELECT '> 3s', COUNT(*) FROM access_logs
|
||
WHERE access_time >= DATE_SUB(NOW(), INTERVAL 7 DAY) AND response_time_ms >= 3000;
|
||
```
|
||
|
||
## 分区表与 RANGE 优化
|
||
|
||
### 1. 基于日期的范围分区
|
||
|
||
```sql
|
||
-- 创建按月份分区的订单表
|
||
CREATE TABLE orders_partitioned (
|
||
order_id BIGINT PRIMARY KEY AUTO_INCREMENT,
|
||
user_id INT NOT NULL,
|
||
product_id INT NOT NULL,
|
||
order_date DATE NOT NULL,
|
||
order_time DATETIME NOT NULL,
|
||
amount DECIMAL(10,2) NOT NULL,
|
||
status ENUM('pending', 'paid', 'shipped', 'delivered', 'cancelled') NOT NULL,
|
||
region VARCHAR(50) NOT NULL,
|
||
|
||
KEY idx_user_id (user_id),
|
||
KEY idx_order_date (order_date),
|
||
KEY idx_amount (amount),
|
||
KEY idx_user_date (user_id, order_date)
|
||
) ENGINE=InnoDB
|
||
PARTITION BY RANGE (YEAR(order_date) * 100 + MONTH(order_date)) (
|
||
PARTITION p202401 VALUES LESS THAN (202402),
|
||
PARTITION p202402 VALUES LESS THAN (202403),
|
||
PARTITION p202403 VALUES LESS THAN (202404),
|
||
PARTITION p202404 VALUES LESS THAN (202405),
|
||
PARTITION p202405 VALUES LESS THAN (202406),
|
||
PARTITION p202406 VALUES LESS THAN (202407),
|
||
PARTITION p202407 VALUES LESS THAN (202408),
|
||
PARTITION p202408 VALUES LESS THAN (202409),
|
||
PARTITION p202409 VALUES LESS THAN (202410),
|
||
PARTITION p202410 VALUES LESS THAN (202411),
|
||
PARTITION p202411 VALUES LESS THAN (202412),
|
||
PARTITION p202412 VALUES LESS THAN (202501),
|
||
PARTITION p_future VALUES LESS THAN MAXVALUE
|
||
);
|
||
|
||
-- 分区表的RANGE查询优化
|
||
-- ✅ 可以利用分区剪枝的查询
|
||
SELECT * FROM orders_partitioned
|
||
WHERE order_date BETWEEN '2024-03-01' AND '2024-03-31';
|
||
|
||
-- 查看分区剪枝效果
|
||
EXPLAIN PARTITIONS
|
||
SELECT * FROM orders_partitioned
|
||
WHERE order_date BETWEEN '2024-03-01' AND '2024-03-31';
|
||
|
||
-- ❌ 无法利用分区剪枝的查询(使用函数)
|
||
SELECT * FROM orders_partitioned
|
||
WHERE YEAR(order_date) = 2024 AND MONTH(order_date) = 3;
|
||
```
|
||
|
||
### 2. 基于数值的范围分区
|
||
|
||
```sql
|
||
-- 创建按用户ID范围分区的表
|
||
CREATE TABLE user_activities_partitioned (
|
||
activity_id BIGINT PRIMARY KEY AUTO_INCREMENT,
|
||
user_id INT NOT NULL,
|
||
activity_type VARCHAR(50) NOT NULL,
|
||
activity_time DATETIME NOT NULL,
|
||
activity_data JSON,
|
||
|
||
KEY idx_user_time (user_id, activity_time),
|
||
KEY idx_activity_time (activity_time)
|
||
) ENGINE=InnoDB
|
||
PARTITION BY RANGE (user_id) (
|
||
PARTITION p0 VALUES LESS THAN (1000),
|
||
PARTITION p1 VALUES LESS THAN (5000),
|
||
PARTITION p2 VALUES LESS THAN (10000),
|
||
PARTITION p3 VALUES LESS THAN (50000),
|
||
PARTITION p4 VALUES LESS THAN MAXVALUE
|
||
);
|
||
|
||
-- 针对特定用户范围的查询优化
|
||
SELECT * FROM user_activities_partitioned
|
||
WHERE user_id BETWEEN 1000 AND 4999
|
||
AND activity_time >= '2024-01-01';
|
||
|
||
-- 查看执行计划
|
||
EXPLAIN PARTITIONS
|
||
SELECT * FROM user_activities_partitioned
|
||
WHERE user_id BETWEEN 1000 AND 4999
|
||
AND activity_time >= '2024-01-01';
|
||
```
|
||
|
||
### 3. 分区表维护
|
||
|
||
```sql
|
||
-- 添加新分区
|
||
ALTER TABLE orders_partitioned
|
||
ADD PARTITION (PARTITION p202501 VALUES LESS THAN (202502));
|
||
|
||
-- 删除旧分区(删除数据)
|
||
ALTER TABLE orders_partitioned DROP PARTITION p202401;
|
||
|
||
-- 重组分区
|
||
ALTER TABLE orders_partitioned
|
||
REORGANIZE PARTITION p_future INTO (
|
||
PARTITION p202502 VALUES LESS THAN (202503),
|
||
PARTITION p202503 VALUES LESS THAN (202504),
|
||
PARTITION p_future VALUES LESS THAN MAXVALUE
|
||
);
|
||
|
||
-- 查看分区信息
|
||
SELECT
|
||
PARTITION_NAME,
|
||
TABLE_ROWS,
|
||
DATA_LENGTH,
|
||
PARTITION_DESCRIPTION
|
||
FROM information_schema.PARTITIONS
|
||
WHERE TABLE_SCHEMA = DATABASE()
|
||
AND TABLE_NAME = 'orders_partitioned';
|
||
```
|
||
|
||
## 性能监控和调优
|
||
|
||
### 1. 监控 RANGE 查询性能
|
||
|
||
```sql
|
||
-- 查看慢查询中的RANGE查询
|
||
SELECT
|
||
sql_text,
|
||
exec_count,
|
||
avg_timer_wait/1000000000000 as avg_time_sec,
|
||
sum_rows_examined/exec_count as avg_rows_examined,
|
||
sum_rows_sent/exec_count as avg_rows_sent,
|
||
(sum_rows_examined/exec_count) / (sum_rows_sent/exec_count) as examine_ratio
|
||
FROM performance_schema.events_statements_summary_by_digest
|
||
WHERE sql_text LIKE '%BETWEEN%'
|
||
OR sql_text LIKE '%>%'
|
||
OR sql_text LIKE '%<%'
|
||
ORDER BY avg_timer_wait DESC
|
||
LIMIT 10;
|
||
|
||
-- 监控索引使用情况
|
||
SELECT
|
||
object_schema,
|
||
object_name,
|
||
index_name,
|
||
count_star as usage_count,
|
||
sum_timer_wait/1000000000000 as total_time_sec
|
||
FROM performance_schema.table_io_waits_summary_by_index_usage
|
||
WHERE object_schema = DATABASE()
|
||
AND count_star > 0
|
||
ORDER BY sum_timer_wait DESC;
|
||
```
|
||
|
||
### 2. 性能测试脚本
|
||
|
||
```sql
|
||
-- 创建性能测试存储过程
|
||
DELIMITER //
|
||
CREATE PROCEDURE TestRangeQueryPerformance()
|
||
BEGIN
|
||
DECLARE start_time DATETIME;
|
||
DECLARE end_time DATETIME;
|
||
DECLARE duration_ms INT;
|
||
|
||
-- 测试1:日期范围查询
|
||
SET start_time = NOW(6);
|
||
SELECT COUNT(*) FROM orders
|
||
WHERE order_date BETWEEN '2024-01-01' AND '2024-01-31';
|
||
SET end_time = NOW(6);
|
||
SET duration_ms = TIMESTAMPDIFF(MICROSECOND, start_time, end_time) / 1000;
|
||
SELECT 'Date Range Query' as test_name, duration_ms as duration_ms;
|
||
|
||
-- 测试2:数值范围查询
|
||
SET start_time = NOW(6);
|
||
SELECT COUNT(*) FROM orders
|
||
WHERE amount BETWEEN 100 AND 500;
|
||
SET end_time = NOW(6);
|
||
SET duration_ms = TIMESTAMPDIFF(MICROSECOND, start_time, end_time) / 1000;
|
||
SELECT 'Amount Range Query' as test_name, duration_ms as duration_ms;
|
||
|
||
-- 测试3:复合条件查询
|
||
SET start_time = NOW(6);
|
||
SELECT COUNT(*) FROM orders
|
||
WHERE order_date BETWEEN '2024-01-01' AND '2024-01-31'
|
||
AND amount BETWEEN 100 AND 500;
|
||
SET end_time = NOW(6);
|
||
SET duration_ms = TIMESTAMPDIFF(MICROSECOND, start_time, end_time) / 1000;
|
||
SELECT 'Combined Range Query' as test_name, duration_ms as duration_ms;
|
||
|
||
END //
|
||
DELIMITER ;
|
||
|
||
-- 执行性能测试
|
||
CALL TestRangeQueryPerformance();
|
||
```
|
||
|
||
### 3. 自动化索引建议
|
||
|
||
```sql
|
||
-- 分析表的查询模式,建议索引
|
||
CREATE VIEW range_query_analysis AS
|
||
SELECT
|
||
'orders' as table_name,
|
||
'order_date' as column_name,
|
||
COUNT(DISTINCT order_date) as distinct_values,
|
||
COUNT(*) as total_rows,
|
||
COUNT(DISTINCT order_date) / COUNT(*) as selectivity,
|
||
CASE
|
||
WHEN COUNT(DISTINCT order_date) / COUNT(*) > 0.1 THEN '高选择性,建议单独建索引'
|
||
WHEN COUNT(DISTINCT order_date) / COUNT(*) > 0.01 THEN '中等选择性,建议组合索引'
|
||
ELSE '低选择性,不建议建索引'
|
||
END as index_recommendation
|
||
FROM orders
|
||
|
||
UNION ALL
|
||
|
||
SELECT
|
||
'orders' as table_name,
|
||
'amount' as column_name,
|
||
COUNT(DISTINCT amount) as distinct_values,
|
||
COUNT(*) as total_rows,
|
||
COUNT(DISTINCT amount) / COUNT(*) as selectivity,
|
||
CASE
|
||
WHEN COUNT(DISTINCT amount) / COUNT(*) > 0.1 THEN '高选择性,建议单独建索引'
|
||
WHEN COUNT(DISTINCT amount) / COUNT(*) > 0.01 THEN '中等选择性,建议组合索引'
|
||
ELSE '低选择性,不建议建索引'
|
||
END as index_recommendation
|
||
FROM orders;
|
||
|
||
-- 查看分析结果
|
||
SELECT * FROM range_query_analysis;
|
||
```
|
||
|
||
## 常见问题和最佳实践
|
||
|
||
### 1. 常见性能陷阱
|
||
|
||
```sql
|
||
-- ❌ 陷阱1:在WHERE条件中使用函数
|
||
-- 错误示例
|
||
SELECT * FROM orders WHERE YEAR(order_date) = 2024;
|
||
SELECT * FROM orders WHERE DATE_ADD(order_date, INTERVAL 1 DAY) = '2024-01-02';
|
||
|
||
-- ✅ 正确做法
|
||
SELECT * FROM orders WHERE order_date >= '2024-01-01' AND order_date < '2025-01-01';
|
||
SELECT * FROM orders WHERE order_date = '2024-01-01';
|
||
|
||
-- ❌ 陷阱2:数据类型不匹配导致的隐式转换
|
||
-- 错误示例(假设user_id是INT类型)
|
||
SELECT * FROM orders WHERE user_id = '1001'; -- 字符串比较整数
|
||
|
||
-- ✅ 正确做法
|
||
SELECT * FROM orders WHERE user_id = 1001;
|
||
|
||
-- ❌ 陷阱3:LIKE查询的误用
|
||
-- 错误示例
|
||
SELECT * FROM orders WHERE order_id LIKE '1001%'; -- 应该用数值比较
|
||
|
||
-- ✅ 正确做法
|
||
SELECT * FROM orders WHERE order_id >= 1001 AND order_id < 1002;
|
||
```
|
||
|
||
### 2. 索引设计最佳实践
|
||
|
||
```sql
|
||
-- ✅ 最佳实践1:根据查询频率设计索引
|
||
-- 高频查询的索引设计
|
||
CREATE INDEX idx_user_status_date ON orders(user_id, status, order_date);
|
||
CREATE INDEX idx_date_amount ON orders(order_date, amount);
|
||
|
||
-- ✅ 最佳实践2:考虑排序需求的索引设计
|
||
-- 如果经常需要按金额降序排序
|
||
CREATE INDEX idx_date_amount_desc ON orders(order_date, amount DESC);
|
||
|
||
-- ✅ 最佳实践3:覆盖索引的设计
|
||
-- 如果只查询特定列,可以设计覆盖索引
|
||
CREATE INDEX idx_covering_order_summary ON orders(order_date, user_id, amount, status);
|
||
|
||
-- 该查询只需访问索引,无需回表
|
||
SELECT user_id, amount, status FROM orders
|
||
WHERE order_date BETWEEN '2024-01-01' AND '2024-01-31';
|
||
```
|
||
|
||
### 3. 查询优化检查清单
|
||
|
||
```sql
|
||
-- 优化检查清单
|
||
/*
|
||
1. ✅ 避免在WHERE条件中使用函数
|
||
2. ✅ 确保数据类型匹配,避免隐式转换
|
||
3. ✅ 合理使用复合索引,遵循最左前缀原则
|
||
4. ✅ 考虑使用覆盖索引减少回表操作
|
||
5. ✅ 对于大量IN值,考虑使用JOIN或EXISTS
|
||
6. ✅ 利用分区表的分区剪枝特性
|
||
7. ✅ 定期分析表统计信息
|
||
8. ✅ 监控慢查询日志和执行计划
|
||
9. ✅ 考虑查询结果的数据量,适当使用LIMIT
|
||
10. ✅ 根据业务特点选择合适的索引策略
|
||
*/
|
||
|
||
-- 索引效果验证模板
|
||
EXPLAIN FORMAT=JSON SELECT * FROM table_name WHERE conditions;
|
||
|
||
-- 性能对比模板
|
||
SELECT
|
||
BENCHMARK(10000, (SELECT COUNT(*) FROM table_name WHERE conditions)) as execution_time;
|
||
```
|
||
|
||
### 4. 实际生产环境建议
|
||
|
||
```sql
|
||
-- 生产环境优化建议
|
||
|
||
-- 1. 定期收集表统计信息
|
||
ANALYZE TABLE orders, products, access_logs;
|
||
|
||
-- 2. 监控索引使用情况
|
||
CREATE EVENT check_index_usage
|
||
ON SCHEDULE EVERY 1 DAY
|
||
DO
|
||
INSERT INTO index_usage_log
|
||
SELECT
|
||
NOW() as check_time,
|
||
object_name,
|
||
index_name,
|
||
count_star as usage_count
|
||
FROM performance_schema.table_io_waits_summary_by_index_usage
|
||
WHERE object_schema = DATABASE()
|
||
AND count_star = 0; -- 未使用的索引
|
||
|
||
-- 3. 自动优化建议
|
||
DELIMITER //
|
||
CREATE PROCEDURE GenerateOptimizationSuggestions()
|
||
BEGIN
|
||
-- 查找可能需要索引的列
|
||
SELECT
|
||
'Consider adding index' as suggestion,
|
||
CONCAT('CREATE INDEX idx_', table_name, '_', column_name,
|
||
' ON ', table_name, '(', column_name, ');') as sql_statement
|
||
FROM (
|
||
SELECT 'orders' as table_name, 'region' as column_name
|
||
UNION ALL
|
||
SELECT 'products', 'category_id'
|
||
UNION ALL
|
||
SELECT 'access_logs', 'user_agent'
|
||
) potential_indexes;
|
||
|
||
-- 查找可能需要删除的索引
|
||
SELECT
|
||
'Consider dropping unused index' as suggestion,
|
||
CONCAT('DROP INDEX ', index_name, ' ON ', object_name, ';') as sql_statement
|
||
FROM performance_schema.table_io_waits_summary_by_index_usage
|
||
WHERE object_schema = DATABASE()
|
||
AND count_star = 0
|
||
AND index_name != 'PRIMARY';
|
||
END //
|
||
DELIMITER ;
|
||
|
||
-- 调用优化建议
|
||
CALL GenerateOptimizationSuggestions();
|
||
```
|
||
|
||
---
|
||
|
||
**总结:**
|
||
|
||
RANGE 查询优化是数据库性能优化的重要组成部分,关键要点包括:
|
||
|
||
1. **理解原理**:掌握 RANGE 查询的执行机制和索引选择逻辑
|
||
2. **索引设计**:根据查询模式设计合适的单列和复合索引
|
||
3. **查询优化**:避免函数使用,保证数据类型匹配,合理使用索引提示
|
||
4. **分区策略**:对于大表,考虑使用分区表提高查询效率
|
||
5. **性能监控**:建立完善的监控体系,持续优化查询性能
|
||
6. **最佳实践**:遵循数据库设计和查询优化的最佳实践
|
||
|
||
通过系统的 RANGE 查询优化,可以显著提升数据库查询性能,改善用户体验。 |