# MySQL索引优化
概述
索引是数据库性能优化的核心技术之一。本章将深入探讨MySQL索引的原理、类型、创建策略和优化技巧,帮助你构建高性能的数据库应用。
学习目标
- 理解索引的工作原理和数据结构
- 掌握不同类型索引的使用场景
- 学会索引设计和优化策略
- 了解索引对性能的影响
- 掌握索引监控和维护方法
核心概念
索引的工作原理
graph TB
A[查询请求] --> B{是否有索引}
B -->|有索引| C[使用索引查找]
B -->|无索引| D[全表扫描]
C --> E[快速定位数据]
D --> F[逐行检查数据]
E --> G[返回结果]
F --> G
subgraph "索引结构"
H[B+树索引]
I[哈希索引]
J[全文索引]
K[空间索引]
end
B+树索引结构
B+树是MySQL InnoDB存储引擎使用的主要索引结构,具有以下特点:
- 平衡树结构:所有叶子节点在同一层
- 顺序访问:叶子节点通过指针连接,支持范围查询
- 高扇出比:减少树的高度,提高查询效率
索引类型和创建
创建测试表
-- 删除已存在的测试表
DROP TABLE IF EXISTS performance_test;
-- 创建性能测试表
CREATE TABLE performance_test (
id INT AUTO_INCREMENT PRIMARY KEY,
user_id INT NOT NULL,
email VARCHAR(100) NOT NULL,
username VARCHAR(50) NOT NULL,
age INT,
city VARCHAR(50),
registration_date DATE,
last_login DATETIME,
status ENUM('active', 'inactive', 'suspended') DEFAULT 'active',
score DECIMAL(5,2),
description TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- 插入测试数据
INSERT INTO performance_test (user_id, email, username, age, city, registration_date, last_login, status, score, description) VALUES
(1001, 'alice@example.com', 'alice', 25, '北京', '2023-01-15', '2024-01-15 10:30:00', 'active', 85.50, '这是一个活跃用户的描述信息'),
(1002, 'bob@example.com', 'bob', 30, '上海', '2023-02-20', '2024-01-14 15:20:00', 'active', 92.30, '这是另一个用户的详细描述'),
(1003, 'charlie@example.com', 'charlie', 28, '广州', '2023-03-10', '2024-01-10 09:15:00', 'inactive', 78.90, '用户描述包含更多详细信息'),
(1004, 'diana@example.com', 'diana', 35, '深圳', '2023-04-05', '2024-01-16 14:45:00', 'active', 88.70, '这里是用户的个人简介和相关信息'),
(1005, 'eve@example.com', 'eve', 22, '杭州', '2023-05-12', '2024-01-12 11:30:00', 'suspended', 65.40, '包含关键词的用户描述文本内容');
索引类型演示
1. 普通索引(B-Tree)
-- 创建单列索引
CREATE INDEX idx_user_id ON performance_test(user_id);
CREATE INDEX idx_email ON performance_test(email);
CREATE INDEX idx_age ON performance_test(age);
CREATE INDEX idx_city ON performance_test(city);
CREATE INDEX idx_registration_date ON performance_test(registration_date);
-- 查看索引创建结果
SHOW INDEX FROM performance_test;
-- 结果示例:
-- +-----------------+------------+----------------------+--------------+-------------+
-- | Table | Non_unique | Key_name | Seq_in_index | Column_name |
-- +-----------------+------------+----------------------+--------------+-------------+
-- | performance_test| 0 | PRIMARY | 1 | id |
-- | performance_test| 1 | idx_user_id | 1 | user_id |
-- | performance_test| 1 | idx_email | 1 | email |
-- | performance_test| 1 | idx_age | 1 | age |
-- +-----------------+------------+----------------------+--------------+-------------+
2. 唯一索引
-- 创建唯一索引
CREATE UNIQUE INDEX idx_username_unique ON performance_test(username);
-- 验证唯一性约束
-- 以下插入会失败,因为username重复
-- INSERT INTO performance_test (user_id, email, username, age) VALUES (1006, 'test@example.com', 'alice', 26);
-- ERROR 1062 (23000): Duplicate entry 'alice' for key 'performance_test.idx_username_unique'
3. 复合索引
-- 创建复合索引
CREATE INDEX idx_city_age ON performance_test(city, age);
CREATE INDEX idx_status_score ON performance_test(status, score);
CREATE INDEX idx_user_date_status ON performance_test(user_id, registration_date, status);
-- 复合索引使用示例
-- 以下查询可以有效使用 idx_city_age 索引
SELECT * FROM performance_test WHERE city = '北京' AND age = 25;
-- 以下查询可以部分使用 idx_city_age 索引(只使用city部分)
SELECT * FROM performance_test WHERE city = '北京';
-- 以下查询无法使用 idx_city_age 索引(跳过了第一列)
SELECT * FROM performance_test WHERE age = 25;
4. 前缀索引
-- 创建前缀索引(只索引email的前10个字符)
CREATE INDEX idx_email_prefix ON performance_test(email(10));
-- 查看前缀索引的选择性
SELECT
COUNT(DISTINCT email) as total_unique,
COUNT(DISTINCT LEFT(email, 10)) as prefix_unique,
COUNT(DISTINCT LEFT(email, 10)) / COUNT(DISTINCT email) as selectivity
FROM performance_test;
-- 前缀索引适用于以下查询
SELECT * FROM performance_test WHERE email LIKE 'alice@%';
-- 但不适用于以下查询
SELECT * FROM performance_test WHERE email LIKE '%@example.com';
5. 全文索引
-- 创建全文索引
CREATE FULLTEXT INDEX idx_description_fulltext ON performance_test(description);
-- 全文搜索查询
SELECT *, MATCH(description) AGAINST('用户 描述' IN NATURAL LANGUAGE MODE) as relevance
FROM performance_test
WHERE MATCH(description) AGAINST('用户 描述' IN NATURAL LANGUAGE MODE)
ORDER BY relevance DESC;
-- 布尔模式全文搜索
SELECT * FROM performance_test
WHERE MATCH(description) AGAINST('+用户 -关键词' IN BOOLEAN MODE);
-- 查询扩展模式
SELECT * FROM performance_test
WHERE MATCH(description) AGAINST('用户' WITH QUERY EXPANSION);
6. 空间索引(适用于几何数据)
-- 创建包含空间数据的表
CREATE TABLE locations (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(100),
coordinates POINT NOT NULL,
SPATIAL INDEX idx_coordinates (coordinates)
) ENGINE=InnoDB;
-- 插入空间数据
INSERT INTO locations (name, coordinates) VALUES
('北京', POINT(116.4074, 39.9042)),
('上海', POINT(121.4737, 31.2304)),
('广州', POINT(113.2644, 23.1291));
-- 空间查询示例
SELECT name, ST_AsText(coordinates) as location
FROM locations
WHERE ST_Distance_Sphere(coordinates, POINT(116.4074, 39.9042)) < 1000000; -- 1000公里内
## 索引性能分析
### 性能测试查询
```sql
-- 测试不同类型的查询性能
-- 1. 按用户ID查询(使用idx_user_id索引)
EXPLAIN SELECT * FROM performance_test WHERE user_id = 1001;
-- 2. 按邮箱查询(使用idx_email索引)
EXPLAIN SELECT * FROM performance_test WHERE email = 'alice@example.com';
-- 3. 按年龄范围查询(使用idx_age索引)
EXPLAIN SELECT * FROM performance_test WHERE age BETWEEN 25 AND 35;
-- 4. 按城市查询(使用idx_city索引)
EXPLAIN SELECT * FROM performance_test WHERE city = '北京';
-- 5. 复合条件查询(使用idx_city_age复合索引)
EXPLAIN SELECT * FROM performance_test WHERE city = '上海' AND age > 30;
-- 6. 排序查询(使用idx_registration_date索引)
EXPLAIN SELECT * FROM performance_test ORDER BY registration_date DESC LIMIT 100;
-- 7. 聚合查询(使用idx_city索引)
EXPLAIN SELECT city, COUNT(*) FROM performance_test GROUP BY city;
查询执行计划分析
-- 查看详细的执行计划
EXPLAIN FORMAT=JSON
SELECT * FROM performance_test
WHERE city = '北京' AND age > 25 AND status = 'active';
-- 分析索引选择性
SELECT
'user_id' as column_name,
COUNT(DISTINCT user_id) as unique_values,
COUNT(*) as total_rows,
COUNT(DISTINCT user_id) / COUNT(*) as selectivity
FROM performance_test
UNION ALL
SELECT
'city' as column_name,
COUNT(DISTINCT city) as unique_values,
COUNT(*) as total_rows,
COUNT(DISTINCT city) / COUNT(*) as selectivity
FROM performance_test
UNION ALL
SELECT
'status' as column_name,
COUNT(DISTINCT status) as unique_values,
COUNT(*) as total_rows,
COUNT(DISTINCT status) / COUNT(*) as selectivity
FROM performance_test;
复合索引优化策略
复合索引设计原则
-- 创建测试查询的复合索引
CREATE INDEX idx_user_date_status ON performance_test(user_id, registration_date, status);
-- 测试复合索引的使用情况
-- 1. 使用索引的第一列(最优)
EXPLAIN SELECT * FROM performance_test WHERE user_id = 1001;
-- 2. 使用索引的前两列(较优)
EXPLAIN SELECT * FROM performance_test WHERE user_id = 1001 AND registration_date >= '2023-01-01';
-- 3. 使用索引的所有列(最优)
EXPLAIN SELECT * FROM performance_test
WHERE user_id = 1001 AND registration_date >= '2023-01-01' AND status = 'active';
-- 4. 跳过第一列(无法使用索引)
EXPLAIN SELECT * FROM performance_test WHERE registration_date >= '2023-01-01' AND status = 'active';
-- 5. 只使用最后一列(无法使用索引)
EXPLAIN SELECT * FROM performance_test WHERE status = 'active';
覆盖索引示例
-- 创建覆盖索引(包含查询所需的所有列)
CREATE INDEX idx_user_email_status ON performance_test(user_id, email, status);
-- 以下查询可以完全通过索引满足,无需回表
EXPLAIN SELECT user_id, email, status
FROM performance_test
WHERE user_id = 1001;
-- 查看是否使用了覆盖索引(Extra列显示"Using index")
EXPLAIN SELECT user_id, email
FROM performance_test
WHERE user_id BETWEEN 1001 AND 1003;
索引设计最佳实践
索引创建原则
1. 选择性原则
-- 计算列的选择性
SELECT
'user_id' as column_name,
COUNT(DISTINCT user_id) / COUNT(*) as selectivity,
CASE
WHEN COUNT(DISTINCT user_id) / COUNT(*) > 0.1 THEN '高选择性,适合创建索引'
ELSE '低选择性,不建议创建索引'
END as recommendation
FROM performance_test
UNION ALL
SELECT
'status' as column_name,
COUNT(DISTINCT status) / COUNT(*) as selectivity,
CASE
WHEN COUNT(DISTINCT status) / COUNT(*) > 0.1 THEN '高选择性,适合创建索引'
ELSE '低选择性,不建议创建索引'
END as recommendation
FROM performance_test;
-- 结果示例:
-- +-------------+-------------+--------------------------------+
-- | column_name | selectivity | recommendation |
-- +-------------+-------------+--------------------------------+
-- | user_id | 1.0000 | 高选择性,适合创建索引 |
-- | status | 0.6000 | 高选择性,适合创建索引 |
-- +-------------+-------------+--------------------------------+
2. 复合索引列顺序设计
-- 分析查询模式,确定复合索引列顺序
-- 假设有以下常见查询:
-- 1. WHERE city = ? AND age = ?
-- 2. WHERE city = ? AND age > ?
-- 3. WHERE city = ?
-- 分析各列的选择性
SELECT
COUNT(DISTINCT city) as city_unique,
COUNT(DISTINCT age) as age_unique,
COUNT(DISTINCT city) / COUNT(*) as city_selectivity,
COUNT(DISTINCT age) / COUNT(*) as age_selectivity
FROM performance_test;
-- 基于选择性和查询模式创建最优复合索引
-- city选择性更高,且所有查询都包含city条件,所以放在第一位
CREATE INDEX idx_city_age_optimized ON performance_test(city, age);
3. 覆盖索引设计
-- 分析查询需要的列
-- 查询:SELECT user_id, email, status FROM performance_test WHERE city = ?
-- 创建覆盖索引,包含WHERE条件列和SELECT列
CREATE INDEX idx_city_covering ON performance_test(city, user_id, email, status);
-- 验证覆盖索引效果
EXPLAIN SELECT user_id, email, status
FROM performance_test
WHERE city = '北京';
-- 期望看到 Extra: Using index(表示使用了覆盖索引)
4. 前缀索引长度选择
-- 分析不同前缀长度的选择性
SELECT
CONCAT('email(', LENGTH, ')') as prefix_length,
COUNT(DISTINCT LEFT(email, LENGTH)) as unique_values,
COUNT(DISTINCT LEFT(email, LENGTH)) / COUNT(*) as selectivity
FROM performance_test
CROSS JOIN (
SELECT 5 as LENGTH UNION ALL
SELECT 10 UNION ALL
SELECT 15 UNION ALL
SELECT 20
) lengths
GROUP BY LENGTH
ORDER BY LENGTH;
-- 选择合适的前缀长度(选择性接近完整列的长度)
CREATE INDEX idx_email_prefix_optimal ON performance_test(email(15));
索引监控和维护
索引使用情况监控
-- 查看表的所有索引
SHOW INDEX FROM performance_test;
-- 查看索引统计信息
SELECT
TABLE_NAME,
INDEX_NAME,
COLUMN_NAME,
CARDINALITY,
SUB_PART,
NULLABLE,
INDEX_TYPE
FROM information_schema.STATISTICS
WHERE TABLE_SCHEMA = DATABASE()
AND TABLE_NAME = 'performance_test'
ORDER BY TABLE_NAME, INDEX_NAME, SEQ_IN_INDEX;
-- 查看索引大小
SELECT
TABLE_NAME,
INDEX_NAME,
ROUND(STAT_VALUE * @@innodb_page_size / 1024 / 1024, 2) as 'Size (MB)'
FROM mysql.innodb_index_stats
WHERE DATABASE_NAME = DATABASE()
AND TABLE_NAME = 'performance_test'
AND STAT_NAME = 'size'
ORDER BY STAT_VALUE DESC;
查询执行计划分析
-- 分析不同查询的执行计划
EXPLAIN SELECT * FROM performance_test WHERE user_id = 1001;
EXPLAIN SELECT * FROM performance_test WHERE city = '北京' AND age > 30;
EXPLAIN SELECT * FROM performance_test WHERE email LIKE 'alice@%';
-- 使用EXPLAIN FORMAT=JSON获取详细信息
EXPLAIN FORMAT=JSON
SELECT * FROM performance_test
WHERE city = '北京' AND age BETWEEN 25 AND 35;
-- 分析索引使用效率
EXPLAIN SELECT * FROM performance_test
WHERE city = '北京'
ORDER BY registration_date DESC
LIMIT 10;
索引维护操作
-- 更新表统计信息
ANALYZE TABLE performance_test;
-- 优化表(重建索引,整理碎片)
OPTIMIZE TABLE performance_test;
-- 检查表完整性
CHECK TABLE performance_test;
-- 修复表
REPAIR TABLE performance_test;
常见索引问题和解决方案
索引失效场景
1. 使用函数或表达式
-- ❌ 错误:使用函数会导致索引失效
EXPLAIN SELECT * FROM performance_test WHERE UPPER(username) = 'ALICE';
-- type: ALL (全表扫描)
-- ✅ 正确:直接比较
EXPLAIN SELECT * FROM performance_test WHERE username = 'alice';
-- type: ref (使用索引)
-- 解决方案:创建函数索引(MySQL 8.0+)
CREATE INDEX idx_username_upper ON performance_test((UPPER(username)));
EXPLAIN SELECT * FROM performance_test WHERE UPPER(username) = 'ALICE';
2. LIKE通配符位置
-- ❌ 错误:通配符开头无法使用索引
EXPLAIN SELECT * FROM performance_test WHERE email LIKE '%@example.com';
-- type: ALL (全表扫描)
-- ✅ 正确:通配符在后面可以使用索引
EXPLAIN SELECT * FROM performance_test WHERE email LIKE 'alice@%';
-- type: range (使用索引范围扫描)
-- 解决方案:对于后缀搜索,考虑使用全文索引
CREATE FULLTEXT INDEX idx_email_fulltext ON performance_test(email);
SELECT * FROM performance_test
WHERE MATCH(email) AGAINST('example.com' IN BOOLEAN MODE);
3. 数据类型不匹配
-- ❌ 错误:数据类型不匹配
EXPLAIN SELECT * FROM performance_test WHERE user_id = '1001'; -- 字符串比较数字
-- 可能导致类型转换,影响索引使用
-- ✅ 正确:数据类型匹配
EXPLAIN SELECT * FROM performance_test WHERE user_id = 1001;
-- type: ref (正确使用索引)
4. OR条件优化
-- ❌ 低效:OR连接不同列
EXPLAIN SELECT * FROM performance_test
WHERE username = 'alice' OR email = 'alice@example.com';
-- 可能无法有效使用索引
-- ✅ 优化:使用UNION
EXPLAIN
(SELECT * FROM performance_test WHERE username = 'alice')
UNION
(SELECT * FROM performance_test WHERE email = 'alice@example.com');
-- 每个子查询都可以使用相应的索引
5. 复合索引使用不当
-- 假设有复合索引:idx_city_age_status(city, age, status)
-- ✅ 正确:按索引列顺序使用
EXPLAIN SELECT * FROM performance_test
WHERE city = '北京' AND age = 25 AND status = 'active';
-- 完全使用索引
-- ✅ 部分使用:使用索引前缀
EXPLAIN SELECT * FROM performance_test WHERE city = '北京' AND age = 25;
-- 使用索引的前两列
-- ❌ 错误:跳过索引前缀
EXPLAIN SELECT * FROM performance_test WHERE age = 25 AND status = 'active';
-- 无法使用索引(跳过了第一列city)
索引性能问题诊断
慢查询分析
-- 启用慢查询日志
SET GLOBAL slow_query_log = 'ON';
SET GLOBAL long_query_time = 1; -- 记录执行时间超过1秒的查询
-- 查看慢查询日志位置
SHOW VARIABLES LIKE 'slow_query_log_file';
-- 分析当前正在执行的查询
SELECT
ID,
USER,
HOST,
DB,
COMMAND,
TIME,
STATE,
LEFT(INFO, 100) as QUERY_PREVIEW
FROM information_schema.PROCESSLIST
WHERE COMMAND != 'Sleep'
AND TIME > 1
ORDER BY TIME DESC;
索引碎片检查
-- 检查表和索引的碎片情况
SELECT
TABLE_NAME,
ENGINE,
TABLE_ROWS,
DATA_LENGTH,
INDEX_LENGTH,
DATA_FREE,
ROUND(DATA_FREE / (DATA_LENGTH + INDEX_LENGTH) * 100, 2) as 'Fragmentation %'
FROM information_schema.TABLES
WHERE TABLE_SCHEMA = DATABASE()
AND DATA_FREE > 0
ORDER BY DATA_FREE DESC;
最佳实践总结
1. 索引创建策略
- 主键和唯一键:自动创建索引,无需手动添加
- 外键列:通常需要创建索引以提高JOIN性能
- WHERE条件列:为经常出现在WHERE子句中的列创建索引
- ORDER BY列:为排序列创建索引
- GROUP BY列:为分组列创建索引
2. 索引维护策略
- 定期监控:使用EXPLAIN分析查询计划
- 统计信息更新:定期执行ANALYZE TABLE
- 碎片整理:在低峰期执行OPTIMIZE TABLE
- 性能测试:在生产环境前进行充分测试
3. 索引优化技巧
- 复合索引优化:合理安排列的顺序
- 覆盖索引:减少回表操作
- 前缀索引:优化长字符串列的索引
- 部分索引:使用条件索引减少索引大小
相关资源
小结
本章深入介绍了MySQL索引优化的核心概念和实践技巧,包括:
- 索引的工作原理和数据结构
- 不同类型索引的创建和使用
- 索引设计和优化策略
- 索引监控和维护方法
- 常见问题的解决方案
掌握这些索引优化技巧,能够显著提升数据库查询性能,为构建高性能的Web应用奠定坚实基础。

浙公网安备 33010602011771号