# MySQL索引优化

概述

索引是数据库性能优化的核心技术之一。本章将深入探讨MySQL索引的原理、类型、创建策略和优化技巧,帮助你构建高性能的数据库应用。

学习目标

  • 理解索引的工作原理和数据结构
  • 掌握不同类型索引的使用场景
  • 学会索引设计和优化策略
  • 了解索引对性能的影响
  • 掌握索引监控和维护方法

核心概念

索引的工作原理

graph TB A[查询请求] --> B{是否有索引} B -->|有索引| C[使用索引查找] B -->|无索引| D[全表扫描] C --> E[快速定位数据] D --> F[逐行检查数据] E --> G[返回结果] F --> G subgraph "索引结构" H[B+树索引] I[哈希索引] J[全文索引] K[空间索引] end

B+树索引结构

B+树是MySQL InnoDB存储引擎使用的主要索引结构,具有以下特点:

  • 平衡树结构:所有叶子节点在同一层
  • 顺序访问:叶子节点通过指针连接,支持范围查询
  • 高扇出比:减少树的高度,提高查询效率

索引类型和创建

创建测试表

-- 删除已存在的测试表
DROP TABLE IF EXISTS performance_test;

-- 创建性能测试表
CREATE TABLE performance_test (
    id INT AUTO_INCREMENT PRIMARY KEY,
    user_id INT NOT NULL,
    email VARCHAR(100) NOT NULL,
    username VARCHAR(50) NOT NULL,
    age INT,
    city VARCHAR(50),
    registration_date DATE,
    last_login DATETIME,
    status ENUM('active', 'inactive', 'suspended') DEFAULT 'active',
    score DECIMAL(5,2),
    description TEXT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

-- 插入测试数据
INSERT INTO performance_test (user_id, email, username, age, city, registration_date, last_login, status, score, description) VALUES
(1001, 'alice@example.com', 'alice', 25, '北京', '2023-01-15', '2024-01-15 10:30:00', 'active', 85.50, '这是一个活跃用户的描述信息'),
(1002, 'bob@example.com', 'bob', 30, '上海', '2023-02-20', '2024-01-14 15:20:00', 'active', 92.30, '这是另一个用户的详细描述'),
(1003, 'charlie@example.com', 'charlie', 28, '广州', '2023-03-10', '2024-01-10 09:15:00', 'inactive', 78.90, '用户描述包含更多详细信息'),
(1004, 'diana@example.com', 'diana', 35, '深圳', '2023-04-05', '2024-01-16 14:45:00', 'active', 88.70, '这里是用户的个人简介和相关信息'),
(1005, 'eve@example.com', 'eve', 22, '杭州', '2023-05-12', '2024-01-12 11:30:00', 'suspended', 65.40, '包含关键词的用户描述文本内容');

索引类型演示

1. 普通索引(B-Tree)

-- 创建单列索引
CREATE INDEX idx_user_id ON performance_test(user_id);
CREATE INDEX idx_email ON performance_test(email);
CREATE INDEX idx_age ON performance_test(age);
CREATE INDEX idx_city ON performance_test(city);
CREATE INDEX idx_registration_date ON performance_test(registration_date);

-- 查看索引创建结果
SHOW INDEX FROM performance_test;

-- 结果示例:
-- +-----------------+------------+----------------------+--------------+-------------+
-- | Table           | Non_unique | Key_name             | Seq_in_index | Column_name |
-- +-----------------+------------+----------------------+--------------+-------------+
-- | performance_test|          0 | PRIMARY              |            1 | id          |
-- | performance_test|          1 | idx_user_id          |            1 | user_id     |
-- | performance_test|          1 | idx_email            |            1 | email       |
-- | performance_test|          1 | idx_age              |            1 | age         |
-- +-----------------+------------+----------------------+--------------+-------------+

2. 唯一索引

-- 创建唯一索引
CREATE UNIQUE INDEX idx_username_unique ON performance_test(username);

-- 验证唯一性约束
-- 以下插入会失败,因为username重复
-- INSERT INTO performance_test (user_id, email, username, age) VALUES (1006, 'test@example.com', 'alice', 26);
-- ERROR 1062 (23000): Duplicate entry 'alice' for key 'performance_test.idx_username_unique'

3. 复合索引

-- 创建复合索引
CREATE INDEX idx_city_age ON performance_test(city, age);
CREATE INDEX idx_status_score ON performance_test(status, score);
CREATE INDEX idx_user_date_status ON performance_test(user_id, registration_date, status);

-- 复合索引使用示例
-- 以下查询可以有效使用 idx_city_age 索引
SELECT * FROM performance_test WHERE city = '北京' AND age = 25;

-- 以下查询可以部分使用 idx_city_age 索引(只使用city部分)
SELECT * FROM performance_test WHERE city = '北京';

-- 以下查询无法使用 idx_city_age 索引(跳过了第一列)
SELECT * FROM performance_test WHERE age = 25;

4. 前缀索引

-- 创建前缀索引(只索引email的前10个字符)
CREATE INDEX idx_email_prefix ON performance_test(email(10));

-- 查看前缀索引的选择性
SELECT 
    COUNT(DISTINCT email) as total_unique,
    COUNT(DISTINCT LEFT(email, 10)) as prefix_unique,
    COUNT(DISTINCT LEFT(email, 10)) / COUNT(DISTINCT email) as selectivity
FROM performance_test;

-- 前缀索引适用于以下查询
SELECT * FROM performance_test WHERE email LIKE 'alice@%';

-- 但不适用于以下查询
SELECT * FROM performance_test WHERE email LIKE '%@example.com';

5. 全文索引

-- 创建全文索引
CREATE FULLTEXT INDEX idx_description_fulltext ON performance_test(description);

-- 全文搜索查询
SELECT *, MATCH(description) AGAINST('用户 描述' IN NATURAL LANGUAGE MODE) as relevance
FROM performance_test
WHERE MATCH(description) AGAINST('用户 描述' IN NATURAL LANGUAGE MODE)
ORDER BY relevance DESC;

-- 布尔模式全文搜索
SELECT * FROM performance_test
WHERE MATCH(description) AGAINST('+用户 -关键词' IN BOOLEAN MODE);

-- 查询扩展模式
SELECT * FROM performance_test
WHERE MATCH(description) AGAINST('用户' WITH QUERY EXPANSION);

6. 空间索引(适用于几何数据)

-- 创建包含空间数据的表
CREATE TABLE locations (
    id INT AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(100),
    coordinates POINT NOT NULL,
    SPATIAL INDEX idx_coordinates (coordinates)
) ENGINE=InnoDB;

-- 插入空间数据
INSERT INTO locations (name, coordinates) VALUES
('北京', POINT(116.4074, 39.9042)),
('上海', POINT(121.4737, 31.2304)),
('广州', POINT(113.2644, 23.1291));

-- 空间查询示例
SELECT name, ST_AsText(coordinates) as location
FROM locations
WHERE ST_Distance_Sphere(coordinates, POINT(116.4074, 39.9042)) < 1000000;  -- 1000公里内

## 索引性能分析

### 性能测试查询
```sql
-- 测试不同类型的查询性能
-- 1. 按用户ID查询(使用idx_user_id索引)
EXPLAIN SELECT * FROM performance_test WHERE user_id = 1001;

-- 2. 按邮箱查询(使用idx_email索引)
EXPLAIN SELECT * FROM performance_test WHERE email = 'alice@example.com';

-- 3. 按年龄范围查询(使用idx_age索引)
EXPLAIN SELECT * FROM performance_test WHERE age BETWEEN 25 AND 35;

-- 4. 按城市查询(使用idx_city索引)
EXPLAIN SELECT * FROM performance_test WHERE city = '北京';

-- 5. 复合条件查询(使用idx_city_age复合索引)
EXPLAIN SELECT * FROM performance_test WHERE city = '上海' AND age > 30;

-- 6. 排序查询(使用idx_registration_date索引)
EXPLAIN SELECT * FROM performance_test ORDER BY registration_date DESC LIMIT 100;

-- 7. 聚合查询(使用idx_city索引)
EXPLAIN SELECT city, COUNT(*) FROM performance_test GROUP BY city;

查询执行计划分析

-- 查看详细的执行计划
EXPLAIN FORMAT=JSON 
SELECT * FROM performance_test 
WHERE city = '北京' AND age > 25 AND status = 'active';

-- 分析索引选择性
SELECT 
    'user_id' as column_name,
    COUNT(DISTINCT user_id) as unique_values,
    COUNT(*) as total_rows,
    COUNT(DISTINCT user_id) / COUNT(*) as selectivity
FROM performance_test

UNION ALL

SELECT 
    'city' as column_name,
    COUNT(DISTINCT city) as unique_values,
    COUNT(*) as total_rows,
    COUNT(DISTINCT city) / COUNT(*) as selectivity
FROM performance_test

UNION ALL

SELECT 
    'status' as column_name,
    COUNT(DISTINCT status) as unique_values,
    COUNT(*) as total_rows,
    COUNT(DISTINCT status) / COUNT(*) as selectivity
FROM performance_test;

复合索引优化策略

复合索引设计原则

-- 创建测试查询的复合索引
CREATE INDEX idx_user_date_status ON performance_test(user_id, registration_date, status);

-- 测试复合索引的使用情况
-- 1. 使用索引的第一列(最优)
EXPLAIN SELECT * FROM performance_test WHERE user_id = 1001;

-- 2. 使用索引的前两列(较优)
EXPLAIN SELECT * FROM performance_test WHERE user_id = 1001 AND registration_date >= '2023-01-01';

-- 3. 使用索引的所有列(最优)
EXPLAIN SELECT * FROM performance_test 
WHERE user_id = 1001 AND registration_date >= '2023-01-01' AND status = 'active';

-- 4. 跳过第一列(无法使用索引)
EXPLAIN SELECT * FROM performance_test WHERE registration_date >= '2023-01-01' AND status = 'active';

-- 5. 只使用最后一列(无法使用索引)
EXPLAIN SELECT * FROM performance_test WHERE status = 'active';

覆盖索引示例

-- 创建覆盖索引(包含查询所需的所有列)
CREATE INDEX idx_user_email_status ON performance_test(user_id, email, status);

-- 以下查询可以完全通过索引满足,无需回表
EXPLAIN SELECT user_id, email, status 
FROM performance_test 
WHERE user_id = 1001;

-- 查看是否使用了覆盖索引(Extra列显示"Using index")
EXPLAIN SELECT user_id, email 
FROM performance_test 
WHERE user_id BETWEEN 1001 AND 1003;

索引设计最佳实践

索引创建原则

1. 选择性原则

-- 计算列的选择性
SELECT 
    'user_id' as column_name,
    COUNT(DISTINCT user_id) / COUNT(*) as selectivity,
    CASE 
        WHEN COUNT(DISTINCT user_id) / COUNT(*) > 0.1 THEN '高选择性,适合创建索引'
        ELSE '低选择性,不建议创建索引'
    END as recommendation
FROM performance_test

UNION ALL

SELECT 
    'status' as column_name,
    COUNT(DISTINCT status) / COUNT(*) as selectivity,
    CASE 
        WHEN COUNT(DISTINCT status) / COUNT(*) > 0.1 THEN '高选择性,适合创建索引'
        ELSE '低选择性,不建议创建索引'
    END as recommendation
FROM performance_test;

-- 结果示例:
-- +-------------+-------------+--------------------------------+
-- | column_name | selectivity | recommendation                 |
-- +-------------+-------------+--------------------------------+
-- | user_id     |      1.0000 | 高选择性,适合创建索引         |
-- | status      |      0.6000 | 高选择性,适合创建索引         |
-- +-------------+-------------+--------------------------------+

2. 复合索引列顺序设计

-- 分析查询模式,确定复合索引列顺序
-- 假设有以下常见查询:
-- 1. WHERE city = ? AND age = ?
-- 2. WHERE city = ? AND age > ?
-- 3. WHERE city = ?

-- 分析各列的选择性
SELECT 
    COUNT(DISTINCT city) as city_unique,
    COUNT(DISTINCT age) as age_unique,
    COUNT(DISTINCT city) / COUNT(*) as city_selectivity,
    COUNT(DISTINCT age) / COUNT(*) as age_selectivity
FROM performance_test;

-- 基于选择性和查询模式创建最优复合索引
-- city选择性更高,且所有查询都包含city条件,所以放在第一位
CREATE INDEX idx_city_age_optimized ON performance_test(city, age);

3. 覆盖索引设计

-- 分析查询需要的列
-- 查询:SELECT user_id, email, status FROM performance_test WHERE city = ?

-- 创建覆盖索引,包含WHERE条件列和SELECT列
CREATE INDEX idx_city_covering ON performance_test(city, user_id, email, status);

-- 验证覆盖索引效果
EXPLAIN SELECT user_id, email, status 
FROM performance_test 
WHERE city = '北京';
-- 期望看到 Extra: Using index(表示使用了覆盖索引)

4. 前缀索引长度选择

-- 分析不同前缀长度的选择性
SELECT 
    CONCAT('email(', LENGTH, ')') as prefix_length,
    COUNT(DISTINCT LEFT(email, LENGTH)) as unique_values,
    COUNT(DISTINCT LEFT(email, LENGTH)) / COUNT(*) as selectivity
FROM performance_test
CROSS JOIN (
    SELECT 5 as LENGTH UNION ALL
    SELECT 10 UNION ALL
    SELECT 15 UNION ALL
    SELECT 20
) lengths
GROUP BY LENGTH
ORDER BY LENGTH;

-- 选择合适的前缀长度(选择性接近完整列的长度)
CREATE INDEX idx_email_prefix_optimal ON performance_test(email(15));

索引监控和维护

索引使用情况监控

-- 查看表的所有索引
SHOW INDEX FROM performance_test;

-- 查看索引统计信息
SELECT 
    TABLE_NAME,
    INDEX_NAME,
    COLUMN_NAME,
    CARDINALITY,
    SUB_PART,
    NULLABLE,
    INDEX_TYPE
FROM information_schema.STATISTICS 
WHERE TABLE_SCHEMA = DATABASE() 
  AND TABLE_NAME = 'performance_test'
ORDER BY TABLE_NAME, INDEX_NAME, SEQ_IN_INDEX;

-- 查看索引大小
SELECT 
    TABLE_NAME,
    INDEX_NAME,
    ROUND(STAT_VALUE * @@innodb_page_size / 1024 / 1024, 2) as 'Size (MB)'
FROM mysql.innodb_index_stats 
WHERE DATABASE_NAME = DATABASE() 
  AND TABLE_NAME = 'performance_test'
  AND STAT_NAME = 'size'
ORDER BY STAT_VALUE DESC;

查询执行计划分析

-- 分析不同查询的执行计划
EXPLAIN SELECT * FROM performance_test WHERE user_id = 1001;
EXPLAIN SELECT * FROM performance_test WHERE city = '北京' AND age > 30;
EXPLAIN SELECT * FROM performance_test WHERE email LIKE 'alice@%';

-- 使用EXPLAIN FORMAT=JSON获取详细信息
EXPLAIN FORMAT=JSON 
SELECT * FROM performance_test 
WHERE city = '北京' AND age BETWEEN 25 AND 35;

-- 分析索引使用效率
EXPLAIN SELECT * FROM performance_test 
WHERE city = '北京' 
ORDER BY registration_date DESC 
LIMIT 10;

索引维护操作

-- 更新表统计信息
ANALYZE TABLE performance_test;

-- 优化表(重建索引,整理碎片)
OPTIMIZE TABLE performance_test;

-- 检查表完整性
CHECK TABLE performance_test;

-- 修复表
REPAIR TABLE performance_test;

常见索引问题和解决方案

索引失效场景

1. 使用函数或表达式

-- ❌ 错误:使用函数会导致索引失效
EXPLAIN SELECT * FROM performance_test WHERE UPPER(username) = 'ALICE';
-- type: ALL (全表扫描)

-- ✅ 正确:直接比较
EXPLAIN SELECT * FROM performance_test WHERE username = 'alice';
-- type: ref (使用索引)

-- 解决方案:创建函数索引(MySQL 8.0+)
CREATE INDEX idx_username_upper ON performance_test((UPPER(username)));
EXPLAIN SELECT * FROM performance_test WHERE UPPER(username) = 'ALICE';

2. LIKE通配符位置

-- ❌ 错误:通配符开头无法使用索引
EXPLAIN SELECT * FROM performance_test WHERE email LIKE '%@example.com';
-- type: ALL (全表扫描)

-- ✅ 正确:通配符在后面可以使用索引
EXPLAIN SELECT * FROM performance_test WHERE email LIKE 'alice@%';
-- type: range (使用索引范围扫描)

-- 解决方案:对于后缀搜索,考虑使用全文索引
CREATE FULLTEXT INDEX idx_email_fulltext ON performance_test(email);
SELECT * FROM performance_test 
WHERE MATCH(email) AGAINST('example.com' IN BOOLEAN MODE);

3. 数据类型不匹配

-- ❌ 错误:数据类型不匹配
EXPLAIN SELECT * FROM performance_test WHERE user_id = '1001';  -- 字符串比较数字
-- 可能导致类型转换,影响索引使用

-- ✅ 正确:数据类型匹配
EXPLAIN SELECT * FROM performance_test WHERE user_id = 1001;
-- type: ref (正确使用索引)

4. OR条件优化

-- ❌ 低效:OR连接不同列
EXPLAIN SELECT * FROM performance_test 
WHERE username = 'alice' OR email = 'alice@example.com';
-- 可能无法有效使用索引

-- ✅ 优化:使用UNION
EXPLAIN 
(SELECT * FROM performance_test WHERE username = 'alice')
UNION
(SELECT * FROM performance_test WHERE email = 'alice@example.com');
-- 每个子查询都可以使用相应的索引

5. 复合索引使用不当

-- 假设有复合索引:idx_city_age_status(city, age, status)

-- ✅ 正确:按索引列顺序使用
EXPLAIN SELECT * FROM performance_test 
WHERE city = '北京' AND age = 25 AND status = 'active';
-- 完全使用索引

-- ✅ 部分使用:使用索引前缀
EXPLAIN SELECT * FROM performance_test WHERE city = '北京' AND age = 25;
-- 使用索引的前两列

-- ❌ 错误:跳过索引前缀
EXPLAIN SELECT * FROM performance_test WHERE age = 25 AND status = 'active';
-- 无法使用索引(跳过了第一列city)

索引性能问题诊断

慢查询分析

-- 启用慢查询日志
SET GLOBAL slow_query_log = 'ON';
SET GLOBAL long_query_time = 1;  -- 记录执行时间超过1秒的查询

-- 查看慢查询日志位置
SHOW VARIABLES LIKE 'slow_query_log_file';

-- 分析当前正在执行的查询
SELECT 
    ID,
    USER,
    HOST,
    DB,
    COMMAND,
    TIME,
    STATE,
    LEFT(INFO, 100) as QUERY_PREVIEW
FROM information_schema.PROCESSLIST
WHERE COMMAND != 'Sleep'
  AND TIME > 1
ORDER BY TIME DESC;

索引碎片检查

-- 检查表和索引的碎片情况
SELECT 
    TABLE_NAME,
    ENGINE,
    TABLE_ROWS,
    DATA_LENGTH,
    INDEX_LENGTH,
    DATA_FREE,
    ROUND(DATA_FREE / (DATA_LENGTH + INDEX_LENGTH) * 100, 2) as 'Fragmentation %'
FROM information_schema.TABLES
WHERE TABLE_SCHEMA = DATABASE()
  AND DATA_FREE > 0
ORDER BY DATA_FREE DESC;

最佳实践总结

1. 索引创建策略

  • 主键和唯一键:自动创建索引,无需手动添加
  • 外键列:通常需要创建索引以提高JOIN性能
  • WHERE条件列:为经常出现在WHERE子句中的列创建索引
  • ORDER BY列:为排序列创建索引
  • GROUP BY列:为分组列创建索引

2. 索引维护策略

  • 定期监控:使用EXPLAIN分析查询计划
  • 统计信息更新:定期执行ANALYZE TABLE
  • 碎片整理:在低峰期执行OPTIMIZE TABLE
  • 性能测试:在生产环境前进行充分测试

3. 索引优化技巧

  • 复合索引优化:合理安排列的顺序
  • 覆盖索引:减少回表操作
  • 前缀索引:优化长字符串列的索引
  • 部分索引:使用条件索引减少索引大小

相关资源

小结

本章深入介绍了MySQL索引优化的核心概念和实践技巧,包括:

  • 索引的工作原理和数据结构
  • 不同类型索引的创建和使用
  • 索引设计和优化策略
  • 索引监控和维护方法
  • 常见问题的解决方案

掌握这些索引优化技巧,能够显著提升数据库查询性能,为构建高性能的Web应用奠定坚实基础。

posted @ 2025-07-18 14:30  春水鸿鹄  阅读(25)  评论(0)    收藏  举报