GaussDB SQL基础语法示例：数组表达式详解

GaussDB SQL基础语法示例：数组表达式详解
数组是数据库中处理批量数据的重要数据类型，GaussDB基于PostgreSQL支持丰富的数组操作功能。本文通过真实业务场景，系统讲解数组的定义、操作及优化技巧。

一、基础数组操作

数组定义与访问

- 创建包含数组字段的表
CREATE TABLE student_scores (
    student_id INT,
    scores INT[],
    subjects TEXT[]
);

-- 插入数组数据
INSERT INTO student_scores VALUES 
(1, ARRAY[85,92,78], ARRAY['Math','Physics','Chemistry']),
(2, ARRAY[76,88], ARRAY['History','Geography']);

-- 访问数组元素（索引从1开始）
SELECT 
    student_id,
    scores[1] AS math_score,
    subjects[2] AS second_subject
FROM student_scores;

数组运算符

-- 判断元素是否存在
SELECT 
    subjects @> ARRAY['Math'] AS has_math,
    subjects && ARRAY['Math','Physics'] AS has_stem
FROM student_scores;

-- 数组连接与追加
SELECT 
    scores || 88 AS new_scores,          -- 合并数组
    subjects || 'Biology' AS new_subjects -- 追加元素
FROM student_scores;

二、进阶数组函数

数组展开与聚合

-- 展开数组为多行（需配合ORDER BY保留顺序）
SELECT student_id, unnest(scores) AS single_score
FROM student_scores
ORDER BY student_id;

-- 聚合多行为数组
SELECT 
    student_id,
    array_agg(subjects) AS all_subjects
FROM (
    SELECT student_id, unnest(subjects) AS subjects
    FROM student_scores
) tmp
GROUP BY student_id;

数组统计函数

-- 计算数组元素个数
SELECT 
    student_id,
    array_length(scores, 1) AS subject_count,
    cardinality(subjects) AS subject_num -- 等效于array_length
FROM student_scores;

-- 查找最大/最小值
SELECT 
    student_id,
    (array_agg(scores ORDER BY scores DESC))[1] AS highest_score,
    array_agg(scores ORDER BY scores ASC)[1] AS lowest_score
FROM student_scores
GROUP BY student_id;

三、多维数组与JSON结合

多维数组操作

-- 创建二维数组表
CREATE TABLE matrix_data (
    id SERIAL PRIMARY KEY,
    matrix FLOAT[][]
);

-- 插入二维数据
INSERT INTO matrix_data (matrix) VALUES 
(ARRAY[[1.1,2.2],[3.3,4.4]]),
(ARRAY[[5.5,6.6],[7.7,8.8]]);

-- 访问二维元素
SELECT matrix[1][2] AS element FROM matrix_data WHERE id=1; -- 返回2.2

数组与JSON互转

-- 数组转JSON
SELECT 
    student_id,
    scores::JSON AS scores_json
FROM student_scores;

-- JSON转数组
SELECT 
    student_id,
    scores::TEXT::FLOAT[] AS parsed_scores
FROM (
    SELECT student_id, scores::JSON->0 AS scores
    FROM student_scores
) tmp;

四、特殊场景应用

动态数组处理

-- 使用generate_series创建序列数组
SELECT generate_series(1,5) AS numbers; -- 生成1-5的数组

-- 结合数组生成测试数据
INSERT INTO test_table (id, values)
SELECT 
    g,
    ARRAY(SELECT random() * 100 FROM generate_series(1,10))
FROM generate_series(1,5) g;

数组排序与去重

-- 数组元素排序
SELECT 
    student_id,
    array_agg(subjects ORDER BY subjects) AS sorted_subjects
FROM student_scores
GROUP BY student_id;

-- 数组去重
SELECT 
    student_id,
    array_agg(DISTINCT scores ORDER BY scores) AS unique_scores
FROM student_scores
GROUP BY student_id;

五、性能优化技巧

索引优化

-- 创建GIN索引加速包含查询
CREATE INDEX idx_student_subjects ON student_scores USING GIN (subjects);

-- 创建B-tree索引加速数值范围查询
CREATE INDEX idx_scores ON student_scores USING btree (scores[1]);

避免全表扫描

-- 错误示例：未利用索引
SELECT * FROM student_scores WHERE 85 = ANY(scores);

-- 正确写法：使用数组位置索引
SELECT * FROM student_scores WHERE scores[1] = 85;

六、典型错误规避

错误1：类型不匹配
SELECT * FROM student_scores WHERE 'Math' = ANY(subjects); -- 字符串未加引号

-- 正确写法
SELECT * FROM student_scores WHERE 'Math' = ANY(subjects::VARCHAR[]);

-- 错误2：多维数组越界
SELECT matrix[3][3] FROM matrix_data WHERE id=1; -- 超出维度范围

-- 错误3：NULL值处理
SELECT array_length(NULL::INT[]) FROM student_scores; -- 应使用COALESCE

七、实战技巧

数据清洗

-- 过滤空数组记录
SELECT * FROM student_scores WHERE subjects IS NOT NULL AND subjects <> '{}';
窗口函数结合
-- 按学科统计平均分
SELECT 
    subject,
    AVG(score) OVER (PARTITION BY subject) AS avg_score
FROM (
    SELECT 
        unnest(subjects) AS subject,
        unnest(scores) AS score
    FROM student_scores
) tmp;

数据验证

-- 检查数组元素合法性
SELECT * FROM student_scores 
WHERE NOT scores ALL BETWEEN 0 AND 100;

通过灵活运用数组表达式，可以显著简化复杂数据处理逻辑。

posted @ 2025-05-27 15:05 喜酱喜酱阅读(3) 评论(0) 收藏举报

刷新页面返回顶部

GaussDB