GaussDB SQL基础语法示例:数组表达式详解
GaussDB SQL基础语法示例:数组表达式详解
数组是数据库中处理批量数据的重要数据类型,GaussDB基于PostgreSQL支持丰富的数组操作功能。本文通过真实业务场景,系统讲解数组的定义、操作及优化技巧。
一、基础数组操作
- 数组定义与访问
- 创建包含数组字段的表
CREATE TABLE student_scores (
student_id INT,
scores INT[],
subjects TEXT[]
);
-- 插入数组数据
INSERT INTO student_scores VALUES
(1, ARRAY[85,92,78], ARRAY['Math','Physics','Chemistry']),
(2, ARRAY[76,88], ARRAY['History','Geography']);
-- 访问数组元素(索引从1开始)
SELECT
student_id,
scores[1] AS math_score,
subjects[2] AS second_subject
FROM student_scores;
- 数组运算符
-- 判断元素是否存在
SELECT
subjects @> ARRAY['Math'] AS has_math,
subjects && ARRAY['Math','Physics'] AS has_stem
FROM student_scores;
-- 数组连接与追加
SELECT
scores || 88 AS new_scores, -- 合并数组
subjects || 'Biology' AS new_subjects -- 追加元素
FROM student_scores;
二、进阶数组函数
- 数组展开与聚合
-- 展开数组为多行(需配合ORDER BY保留顺序)
SELECT student_id, unnest(scores) AS single_score
FROM student_scores
ORDER BY student_id;
-- 聚合多行为数组
SELECT
student_id,
array_agg(subjects) AS all_subjects
FROM (
SELECT student_id, unnest(subjects) AS subjects
FROM student_scores
) tmp
GROUP BY student_id;
- 数组统计函数
-- 计算数组元素个数
SELECT
student_id,
array_length(scores, 1) AS subject_count,
cardinality(subjects) AS subject_num -- 等效于array_length
FROM student_scores;
-- 查找最大/最小值
SELECT
student_id,
(array_agg(scores ORDER BY scores DESC))[1] AS highest_score,
array_agg(scores ORDER BY scores ASC)[1] AS lowest_score
FROM student_scores
GROUP BY student_id;
三、多维数组与JSON结合
- 多维数组操作
-- 创建二维数组表
CREATE TABLE matrix_data (
id SERIAL PRIMARY KEY,
matrix FLOAT[][]
);
-- 插入二维数据
INSERT INTO matrix_data (matrix) VALUES
(ARRAY[[1.1,2.2],[3.3,4.4]]),
(ARRAY[[5.5,6.6],[7.7,8.8]]);
-- 访问二维元素
SELECT matrix[1][2] AS element FROM matrix_data WHERE id=1; -- 返回2.2
- 数组与JSON互转
-- 数组转JSON
SELECT
student_id,
scores::JSON AS scores_json
FROM student_scores;
-- JSON转数组
SELECT
student_id,
scores::TEXT::FLOAT[] AS parsed_scores
FROM (
SELECT student_id, scores::JSON->0 AS scores
FROM student_scores
) tmp;
四、特殊场景应用
- 动态数组处理
-- 使用generate_series创建序列数组
SELECT generate_series(1,5) AS numbers; -- 生成1-5的数组
-- 结合数组生成测试数据
INSERT INTO test_table (id, values)
SELECT
g,
ARRAY(SELECT random() * 100 FROM generate_series(1,10))
FROM generate_series(1,5) g;
- 数组排序与去重
-- 数组元素排序
SELECT
student_id,
array_agg(subjects ORDER BY subjects) AS sorted_subjects
FROM student_scores
GROUP BY student_id;
-- 数组去重
SELECT
student_id,
array_agg(DISTINCT scores ORDER BY scores) AS unique_scores
FROM student_scores
GROUP BY student_id;
五、性能优化技巧
- 索引优化
-- 创建GIN索引加速包含查询
CREATE INDEX idx_student_subjects ON student_scores USING GIN (subjects);
-- 创建B-tree索引加速数值范围查询
CREATE INDEX idx_scores ON student_scores USING btree (scores[1]);
- 避免全表扫描
-- 错误示例:未利用索引
SELECT * FROM student_scores WHERE 85 = ANY(scores);
-- 正确写法:使用数组位置索引
SELECT * FROM student_scores WHERE scores[1] = 85;
六、典型错误规避
错误1:类型不匹配
SELECT * FROM student_scores WHERE 'Math' = ANY(subjects); -- 字符串未加引号
-- 正确写法
SELECT * FROM student_scores WHERE 'Math' = ANY(subjects::VARCHAR[]);
-- 错误2:多维数组越界
SELECT matrix[3][3] FROM matrix_data WHERE id=1; -- 超出维度范围
-- 错误3:NULL值处理
SELECT array_length(NULL::INT[]) FROM student_scores; -- 应使用COALESCE
七、实战技巧
数据清洗
-- 过滤空数组记录
SELECT * FROM student_scores WHERE subjects IS NOT NULL AND subjects <> '{}';
窗口函数结合
-- 按学科统计平均分
SELECT
subject,
AVG(score) OVER (PARTITION BY subject) AS avg_score
FROM (
SELECT
unnest(subjects) AS subject,
unnest(scores) AS score
FROM student_scores
) tmp;
数据验证
-- 检查数组元素合法性
SELECT * FROM student_scores
WHERE NOT scores ALL BETWEEN 0 AND 100;
通过灵活运用数组表达式,可以显著简化复杂数据处理逻辑。