GaussDB SQL基础语法示例:数组表达式详解

GaussDB SQL基础语法示例:数组表达式详解
数组是数据库中处理批量数据的重要数据类型,GaussDB基于PostgreSQL支持丰富的数组操作功能。本文通过真实业务场景,系统讲解数组的定义、操作及优化技巧。

一、基础数组操作

  1. 数组定义与访问
- 创建包含数组字段的表
CREATE TABLE student_scores (
    student_id INT,
    scores INT[],
    subjects TEXT[]
);

-- 插入数组数据
INSERT INTO student_scores VALUES 
(1, ARRAY[85,92,78], ARRAY['Math','Physics','Chemistry']),
(2, ARRAY[76,88], ARRAY['History','Geography']);

-- 访问数组元素(索引从1开始)
SELECT 
    student_id,
    scores[1] AS math_score,
    subjects[2] AS second_subject
FROM student_scores;
  1. 数组运算符
-- 判断元素是否存在
SELECT 
    subjects @> ARRAY['Math'] AS has_math,
    subjects && ARRAY['Math','Physics'] AS has_stem
FROM student_scores;

-- 数组连接与追加
SELECT 
    scores || 88 AS new_scores,          -- 合并数组
    subjects || 'Biology' AS new_subjects -- 追加元素
FROM student_scores;

二、进阶数组函数

  1. 数组展开与聚合
-- 展开数组为多行(需配合ORDER BY保留顺序)
SELECT student_id, unnest(scores) AS single_score
FROM student_scores
ORDER BY student_id;

-- 聚合多行为数组
SELECT 
    student_id,
    array_agg(subjects) AS all_subjects
FROM (
    SELECT student_id, unnest(subjects) AS subjects
    FROM student_scores
) tmp
GROUP BY student_id;
  1. 数组统计函数
-- 计算数组元素个数
SELECT 
    student_id,
    array_length(scores, 1) AS subject_count,
    cardinality(subjects) AS subject_num -- 等效于array_length
FROM student_scores;

-- 查找最大/最小值
SELECT 
    student_id,
    (array_agg(scores ORDER BY scores DESC))[1] AS highest_score,
    array_agg(scores ORDER BY scores ASC)[1] AS lowest_score
FROM student_scores
GROUP BY student_id;

三、多维数组与JSON结合

  1. 多维数组操作
-- 创建二维数组表
CREATE TABLE matrix_data (
    id SERIAL PRIMARY KEY,
    matrix FLOAT[][]
);

-- 插入二维数据
INSERT INTO matrix_data (matrix) VALUES 
(ARRAY[[1.1,2.2],[3.3,4.4]]),
(ARRAY[[5.5,6.6],[7.7,8.8]]);

-- 访问二维元素
SELECT matrix[1][2] AS element FROM matrix_data WHERE id=1; -- 返回2.2
  1. 数组与JSON互转
-- 数组转JSON
SELECT 
    student_id,
    scores::JSON AS scores_json
FROM student_scores;

-- JSON转数组
SELECT 
    student_id,
    scores::TEXT::FLOAT[] AS parsed_scores
FROM (
    SELECT student_id, scores::JSON->0 AS scores
    FROM student_scores
) tmp;

四、特殊场景应用

  1. 动态数组处理
-- 使用generate_series创建序列数组
SELECT generate_series(1,5) AS numbers; -- 生成1-5的数组

-- 结合数组生成测试数据
INSERT INTO test_table (id, values)
SELECT 
    g,
    ARRAY(SELECT random() * 100 FROM generate_series(1,10))
FROM generate_series(1,5) g;
  1. 数组排序与去重
-- 数组元素排序
SELECT 
    student_id,
    array_agg(subjects ORDER BY subjects) AS sorted_subjects
FROM student_scores
GROUP BY student_id;

-- 数组去重
SELECT 
    student_id,
    array_agg(DISTINCT scores ORDER BY scores) AS unique_scores
FROM student_scores
GROUP BY student_id;

五、性能优化技巧

  1. 索引优化
-- 创建GIN索引加速包含查询
CREATE INDEX idx_student_subjects ON student_scores USING GIN (subjects);

-- 创建B-tree索引加速数值范围查询
CREATE INDEX idx_scores ON student_scores USING btree (scores[1]);
  1. 避免全表扫描
-- 错误示例:未利用索引
SELECT * FROM student_scores WHERE 85 = ANY(scores);

-- 正确写法:使用数组位置索引
SELECT * FROM student_scores WHERE scores[1] = 85;

六、典型错误规避

错误1:类型不匹配
SELECT * FROM student_scores WHERE 'Math' = ANY(subjects); -- 字符串未加引号

-- 正确写法
SELECT * FROM student_scores WHERE 'Math' = ANY(subjects::VARCHAR[]);

-- 错误2:多维数组越界
SELECT matrix[3][3] FROM matrix_data WHERE id=1; -- 超出维度范围

-- 错误3:NULL值处理
SELECT array_length(NULL::INT[]) FROM student_scores; -- 应使用COALESCE

七、实战技巧

​​数据清洗​​

-- 过滤空数组记录
SELECT * FROM student_scores WHERE subjects IS NOT NULL AND subjects <> '{}';
​​窗口函数结合​​
-- 按学科统计平均分
SELECT 
    subject,
    AVG(score) OVER (PARTITION BY subject) AS avg_score
FROM (
    SELECT 
        unnest(subjects) AS subject,
        unnest(scores) AS score
    FROM student_scores
) tmp;

​​数据验证​​

-- 检查数组元素合法性
SELECT * FROM student_scores 
WHERE NOT scores ALL BETWEEN 0 AND 100;

通过灵活运用数组表达式,可以显著简化复杂数据处理逻辑。

posted @ 2025-05-27 15:05  喜酱喜酱  阅读(3)  评论(0)    收藏  举报