PostgreSQL创建测试数据
生成测试数据
用到的函数
基本函数
| 函数 | 说明 |
|---|---|
| string_agg() | 是 聚合函数,用于将一列字符串连接成一个字符串 |
| substring() | 用于从一个字符串中提取子串,substring(string, start, length) 其中,string: 要从中提取子串的原始字符串。start: 提取子串开始位置的索引(从1开始计数)。length: 子串的长度。 |
| generate_series() | 是 PG 中的内置函数,用于生成一系列连续的整数或时间值。可以用于创建测试数据、填充日期范围等。 |
| round() | 是 PG 中内置函数,用于将数值四舍五入到指定的小数位数。 |
| ceil(value) | 得到不小于参数的最小整数 |
| floor(value) | 得到不大于参数的最大整数 |
| trunc(value) | 截断小数点后的数(保留整数) |
| chr(value) | 获取给定代码的字符(根据Unicode代码) |
| random() | 是 PG 中的内置函数,用于生成一个随机的浮点数(获取0-1之间的随机数) |
| make_interval make_time make_timestamp make_timestamptz make_date |
make_date(2013, 7, 15) make_interval(days => 10) make_time(8, 15, 23.5) make_timestamp(2013, 7, 15, 8, 15, 23.5) make_timestamptz(2013, 7, 15, 8, 15, 23.5) |
使用场景
| 数据 | SQL | 分类 | 备注 |
|---|---|---|---|
| 指定范围的随机数 | (random()*(max-min)+min)::int | 数值 | |
| 连续的数字 | generate_series(1,1000) | 数值 | |
| 连续间隔的数字 | generate_series(1,1000,2) | 数值 | |
| 带小数的数字 | (random()*(10000-3000)+3000)::numeric(8,2) | 数值 | |
| 小于1的小数 | (random()*(1-0)+0)::numeric(4,2) | 数值 | |
| 字符串 | substr(md5(random()::text),1,15) | 字符串 | |
| 邮箱 | substr(md5(random()::text),2,5)||'@163.com' | 字符串 | |
| 电话号码 | concat('1',ceiling(random()*9000000000+1000000000)) | 字符串 | |
| 随机中文字符 | chr(19968+(random()*1000)::int) | 字符串 | |
| flag | (random()*(1))::int(random()*(1))::int::boolean |
布尔值 | 1/0 |
| 指定范围内的随机日期 | `date((random()*(2022-1990)+1990)::int | '-' | |
| 随机日期 | current_date - floor((random() * 25))::int | 日期 | |
| 连续日期 | date(generate_series('2000-01-01'::timestamp, now(), '1 day')) | 日期 | |
| 年月 | '2022'||lpad((random()*(12-1)+1)::int::text,2,'0') | 日期 | |
| 随机时间戳 | concat(current_date - floor((random() * 25))::int,' ',make_time(floor((random() * 12))::int, floor((random() * 60))::int, floor((random() * 60))::int)) | 时间 | |
| 随机时间 | make_time(floor((random() * 12))::int, floor((random() * 60))::int, floor((random() * 60))::int) | 时间 |
生成随机汉字
汉字Unicode编码范围
#php中utf-8编码下用正则表达式匹配汉字的最终正确表达式
/[\x{4e00}-\x{9fa5}]/u
\u4e00-\u9fa5 (中文)
\x3130-\x318F (韩文)
\xAC00-\xD7A3 (韩文)
\u0800-\u4e00 (日文)
4e00对应 19968
9fa5对应 40869
一共20901个汉字
例子1
注意,为了保证能够输出所有的汉字,确保数据库的编码为UTF8,否则会报类似错误
db=# select '\u9fa5'::text;
ERROR: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "'\u9fa5"
LINE 1: select '\u9fa5'::text;
^
postgres=# set standard_conforming_strings =off;
SET
postgres=# set escape_string_warning=off;
SET
postgres=# select '\u9fa5'::text;
text
------
龥
(1 row)
例子2
除了Unicode写法,PostgreSQL还提供了两个函数,支持数值的写法
| 函数 | 返回类型 | 描述 | 例子 | 结果 |
|---|---|---|---|---|
ascii(*string*) |
int |
参数第一个字符的ASCII代码。对于UTF8返回该字符的Unicode代码点。对于其他多字节编码,该参数必须是一个ASCII字符。 | ascii('x') |
120 |
chr(int) |
text |
给定代码的字符。对于UTF8该参数被视作一个Unicode代码点。对于其他多字节编码该参数必须指定一个ASCII字符。NULL (0) 字符不被允许,因为文本数据类型不能存储这种字节。 | chr(65) |
A |
postgres=# select chr(19968);
chr
-----
一
(1 row)
输出所有汉字
匿名块
do language plpgsql $$
declare
res text := '';
begin
for i in 19968..40869 loop
res := res||chr(i);
end loop;
raise notice '%', res;
end;
$$;
编码和汉字对照
select generate_series(19968,40869),chr(generate_series(19968,40869));
输出随机汉字
创建函数
create or replace function gen_hanzi(int) returns text as $$
declare
res text;
begin
if $1 >=1 then
select string_agg(chr(19968+(random()*20901)::int), '') into res from generate_series(1,$1);
return res;
end if;
return null;
end;
$$ language plpgsql strict;
从最常用的100个汉字中随机生成指定个汉字
create or replace function gen_zhchar(num int) returns text
as $$
declare
a1 integer[] :='{30340,19968,20102,26159,25105,19981,22312,20154,20204,26377,20010,22320,21040,22823,37324,26469,20182,36825,19978,30528,35828,23601,21435,23376,24471,20063,21644,37027,35201,19979,30475,22825,26102,36807,20986,23567,20040,36215,20320,37117,25226,22909,36824,22810,27809,20026,21448,21487,23478,23398,21482,20197,20027,20250,26679,24180,24819,33021,29983,21516,32769,20013,21313,20174,33258,38754,21069,22836,36947,23427,21518,28982,36208,24456,20687,35265,20004,29992,22905,22269,21160,36827,25104,22238,20160,36793,20316,23545,24320,32780,24049,20123,29616,23665,27665,20505,32463,21457,24037,21521}';
random_index int;
chr_result text;
chr_tmp_string text;
begin
chr_result :='';
for i in 1..num loop
random_index :=floor(random()*array_length(a1,1)+1);
-- execute 'select chr('||a1[random_index]||')' into chr_result;
select chr(a1[random_index]) into chr_tmp_string;
chr_result :=chr_result||chr_tmp_string;
end loop;
return chr_result;
-- raise notice 'the result of function is %',chr_result;
end;
$$ language plpgsql;
参考资料
https://www.postgresql.org/docs/9.6/functions-string.html
https://github.com/digoal/blog/blob/master/201704/20170402_01.md
https://en.wikipedia.org/wiki/Unicode
https://baike.baidu.com/item/统一码/2985798?fromtitle=Unicode&fromid=750500
生成随机字符
create or replace function random_string(integer)
returns text as
$body$
select array_to_string(array(select substring('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' FROM (ceil(random()*62))::int FOR 1) FROM generate_series(1, $1)), '');
$body$
language sql volatile;
创建测试函数
为了便于生成测试数据,创建以下两个函数用来随机生成指定长度的字符串,创建random_range(int4, int4)函数如下:
CREATE OR REPLACE FUNCTION random_range(int4, int4)
RETURNS int4
LANGUAGE SQL
AS $$
SELECT ($1 + FLOOR(($2 - $1 + 1) * random() ))::int4;
$$;
-- 接着创建random_text_simple(length int4)函数,此函数会调用random_range(int4, int4)函数。
CREATE OR REPLACE FUNCTION random_text_simple(length int4)
RETURNS text
LANGUAGE PLPGSQL
AS $$
DECLARE
possible_chars text := '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
output text := '';
i int4;
pos int4;
BEGIN
FOR i IN 1..length LOOP
pos := random_range(1, length(possible_chars));
output := output || substr(possible_chars, pos, 1);
END LOOP;
RETURN output;
END;
$$;
random_text_simple(length int4)函数可以随机生成指定长度字符串,如下示例:
postgres=# SELECT random_text_simple(3);
random_text_simple
--------------------
4dI
(1 row)
postgres=# SELECT random_text_simple(6);
random_text_simple
--------------------
3uLknl
(1 row)
生成随机数组
-- 生成随机数组
postgres=# select array(select floor(random()*1000) from generate_series(1, 10));
array
---------------------------------------
{414,797,61,793,346,60,23,312,706,21}
(1 row)
多属性笛卡尔积
WITH
xing AS (
SELECT *
FROM unnest(ARRAY['赵', '钱']) WITH ORDINALITY AS hon(n, i)
),
ming AS (
SELECT *
FROM unnest(ARRAY['梓', '明', '平']) WITH ORDINALITY AS fn(n, i)
)
SELECT initcap(concat_ws(' ', xing.n, ming.n)) AS name
FROM xing, ming, generate_series(1,2)
ORDER BY random();
-- 执行结果(有多少个属性每行就按照空格分隔多个值)
name
-------
赵 明
赵 明
赵 梓
钱 平
钱 明
赵 平
赵 平
钱 明
钱 梓
钱 梓
赵 梓
钱 平
(12 rows)
元命令
\watch 0.001 每隔0.001秒重复执行命令
create table test (id int ,name text);
insert into test values(1,'1'),(2,'2');
#每隔0.001秒做一次更新操作
update test set name = 'hello'; \watch 0.001
表
普通表
-- 创建测试表
create table employees
( employee_id int8 primary key
, first_name varchar(20)
, last_name varchar(25) constraint emp_last_name_nn not null
, email varchar(25) constraint emp_email_nn not null
, phone_number varchar(20)
, hire_date date constraint emp_hire_date_nn not null
, job_id int8 constraint emp_job_nn not null
, salary numeric(8,2)
, commission_pct numeric(4,2)
, manager_id int8
, department_id int8
, constraint emp_salary_min check (salary > 0)
--, constraint emp_email_uk unique (email)
) ;
-- 生成测试数据
insert into employees
select generate_series(1,1000) as key,
substr(md5(random()::text),2,5),
substr(md5(random()::text),2,8),
substr(md5(random()::text),2,5)||'@163.com',
concat('1',ceiling(random()*9000000000+1000000000)),
date((random()*(2022-1990)+1990)::int||'-'||(random()*(12-1)+1)::int||'-'||(random()*(28-1)+1)::int),
(random()*(50-10)+10)::int,
(random()*(10000-3000)+3000)::numeric(8,2),
(random()*(1-0)+0)::numeric(4,2),
(random()*(100-1)+1)::int,
(random()*(10-1)+1)::int;
create table t_test(
id varchar(200),
name varchar(200),
age integer,
password varchar(200)
);
-- 插入600w的模拟数据
INSERT INTO t_test (id, name, age, password)
SELECT
'id'||FLOOR(RANDOM() *10000), -- 生成随机的ID
'User'||FLOOR(RANDOM() *10000), -- 生成随机的用户名
FLOOR(RANDOM() *100) +18, -- 生成随机的年龄 (18-117)
'user'||FLOOR(RANDOM() *10000) ||'@example.com'-- 生成随机的邮箱地址
FROM
generate_series(1, 6000000);
分区表
范围分区-按id
create table tbp(n int, t text) partition by range(n);
create table tbp_1 partition of tbp for values from (MINVALUE) to (10);
create table tbp_2 partition of tbp for values from (10) to (100);
create table tbp_3 partition of tbp for values from (100) to (1000);
create table tbp_4 partition of tbp for values from (1000) to (MAXVALUE);
范围分区-按年分区
方式1
create table tbp(id int,date timestamp(6),col2 text) partition by range(date);
create table tbp_2020 partition of tbp for values from ('2020-01-01') to ('2021-01-01');
create table tbp_2021 partition of tbp for values from ('2021-01-01') to ('2022-01-01');
create table tbp_2022 partition of tbp for values from ('2022-01-01') to ('2023-01-01');
create table tbp_2023 partition of tbp for values from ('2023-01-01') to ('2024-01-01');
-- default
create table tbp_default partition of tbp default;
insert into tbp(id,date,col2)
select generate_series(1,400000) as id ,date((random()*(2023-2020)+2020)::int||'-'||(random()*(12-1)+1)::int||'-'||(random()*(28-1)+1)::int),'test';
方式2
create table tbp(id int,date timestamp(6),col2 text) partition by range(date);
create table tbp_2020(id int,date timestamp(6),col2 text);
create table tbp_2021(id int,date timestamp(6),col2 text);
create table tbp_2022(id int,date timestamp(6),col2 text);
create table tbp_2023(id int,date timestamp(6),col2 text);
create table tbp_default(id int,date timestamp(6),col2 text);
insert into tbp_2020(id,date,col2) select generate_series(1,100000) as id ,date('2020'||'-'||(random()*(12-1)+1)::int||'-'||(random()*(28-1)+1)::int),'test';
insert into tbp_2021(id,date,col2) select generate_series(100001,200000) as id ,date('2021'||'-'||(random()*(12-1)+1)::int||'-'||(random()*(28-1)+1)::int),'test';
insert into tbp_2022(id,date,col2) select generate_series(200001,300000) as id ,date('2022'||'-'||(random()*(12-1)+1)::int||'-'||(random()*(28-1)+1)::int),'test';
insert into tbp_2023(id,date,col2) select generate_series(300001,400000) as id ,date('2023'||'-'||(random()*(12-1)+1)::int||'-'||(random()*(28-1)+1)::int),'test';
alter table tbp attach partition tbp_2020 for values from ('2020-01-01') to ('2021-01-01');
alter table tbp attach partition tbp_2021 for values from ('2021-01-01') to ('2022-01-01');
alter table tbp attach partition tbp_2022 for values from ('2022-01-01') to ('2023-01-01');
alter table tbp attach partition tbp_2023 for values from ('2023-01-01') to ('2024-01-01');
-- default
alter table tbp attach partition tbp_default default;
特殊场景
批量生成n个字段
do $$
declare
i int :=5; -- 字段数量
sql_text text :='create table test_t(id int,';
begin
for n in 1..i loop
sql_text := sql_text||'col'||n||' varchar(100)';
if n < i then
sql_text := sql_text||',';
end if;
end loop;
sql_text := sql_text||');';
raise notice '%',sql_text;
execute sql_text;
end $$;
日期值递增不同的值
-- 创建测试表
create table test_t(id int,last_update_date timestamp);
insert into test_t select generate_series(1,1000),now();
-- 游标遍历表每一行,保证每一行日期都不一样(递增1秒)
do $$
declare
cur record;
begin
for cur in select * from test_t loop
update test_t set last_update_date = now() + cur.id * interval '1 second'
where id = cur.id;
end loop;
end $$;
-- 批量更新为当前时间加随机秒
update test_t set last_update_date = now() + make_interval(secs => random()*3600);
-- 插入的时候指定随机日期
insert into test_t (last_update_date)
select now()-(random() * interval '365 days') from generate_series(1,1000);
外键
drop table if exists author,page;
create table author(
author_id int primary key,
firstname varchar(50),
lastname varchar(50)
);
create table page (
page_id serial primary key,
title varchar(255),
content text,
author_id int,
foreign key (author_id) references author (author_id) on delete no action on update cascade deferrable initially deferred
);
完整场景数据
创建用户、表空间、数据库、模式
mkdir -p /postgresql/test_ts
psql -U postgres -c "create user test password 'test';"
psql -U postgres -c "CREATE TABLESPACE test_ts OWNER test LOCATION '/postgresql/test_ts';"
psql -U postgres -c "create database testdb tablespace test_ts;"
psql -U postgres -d testdb -c "CREATE SCHEMA AUTHORIZATION test;"
创建对象
export PGPASSWORD=test
psql -U test -d testdb
-- 创建域
create domain phone_type as text constraint phone_check check (VALUE ~ '^\d{11}$');
-- 创建数据类型
CREATE TYPE human_sex AS ENUM ('male', 'female');
CREATE TYPE test_type AS (f1 int, f2 text);
-- 创建序列
create sequence employees_s;
create sequence employees_s1;
-- 创建测试表
create table test_t1(id int);
create table test_t2(id int);
create table test_t3(id int);
create table test_t4(id int);
create table employees
( employee_id int8 primary key
, first_name varchar(20)
, last_name varchar(25)
, sex human_sex
, email varchar(25)
, phone_number phone_type
, salary numeric(8,2)
, last_update_date timestamp
, constraint emp_salary_min check (salary > 0)
, constraint emp_email_uk unique (email)
) ;
create table tbp(id int,date timestamp(6),col2 text) partition by range(date);
create table tbp_2020 partition of tbp for values from ('2020-01-01') to ('2021-01-01');
create table tbp_2021 partition of tbp for values from ('2021-01-01') to ('2022-01-01');
create table tbp_2022 partition of tbp for values from ('2022-01-01') to ('2023-01-01');
create table tbp_2023 partition of tbp for values from ('2023-01-01') to ('2024-01-01');
create table tbp_default partition of tbp default;
-- 创建主分区表
CREATE TABLE sales (
id int,
region VARCHAR(50),
sales_date DATE,
amount NUMERIC
) PARTITION BY RANGE (sales_date);
-- 创建一级分区
CREATE TABLE sales_2023 PARTITION OF sales FOR VALUES FROM ('2023-01-01') TO ('2024-01-01') PARTITION BY LIST (region);
CREATE TABLE sales_2024 PARTITION OF sales FOR VALUES FROM ('2024-01-01') TO ('2025-01-01') PARTITION BY LIST (region);
-- 创建二级分区
CREATE TABLE sales_2023_north PARTITION OF sales_2023 FOR VALUES IN ('North');
CREATE TABLE sales_2023_south PARTITION OF sales_2023 FOR VALUES IN ('South');
CREATE TABLE sales_2023_east PARTITION OF sales_2023 FOR VALUES IN ('East');
CREATE TABLE sales_2023_west PARTITION OF sales_2023 FOR VALUES IN ('West');
CREATE TABLE sales_2024_north PARTITION OF sales_2024 FOR VALUES IN ('North');
CREATE TABLE sales_2024_south PARTITION OF sales_2024 FOR VALUES IN ('South');
CREATE TABLE sales_2024_east PARTITION OF sales_2024 FOR VALUES IN ('East');
CREATE TABLE sales_2024_west PARTITION OF sales_2024 FOR VALUES IN ('West');
-- 插入数据
insert into employees values(nextval('employees_s'),'King','Johnn','male','johnn@163.com','15145264084',10000,now());
-- 以下两条应该报错
insert into employees values(nextval('employees_s'),'Job','Lucy','female','lucy@163.com','151452640841',10000,now());
insert into employees values(nextval('employees_s'),'Job','Lucy','females','lucy@163.com','15145264084',10000,now());
-- 创建索引
create index idx_name on employees(first_name,last_name);
create index idx_id on tbp(id);
create index idx_id2 on sales(id);
-- 创建约束
ALTER TABLE employees ADD CONSTRAINT check_salary CHECK (salary>0);
-- 普通视图
create view emp as select * from employees;
-- 物化视图
create materialized view emp_v as select * from employees;
refresh materialized view emp_v;
创建触发器
-- 普通触发器
CREATE OR REPLACE FUNCTION update_time_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.last_update_date := CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_time_trigger BEFORE UPDATE ON employees FOR EACH ROW EXECUTE FUNCTION update_time_column();
-- 事件触发器
CREATE OR REPLACE FUNCTION DISABLE_DROP_TABLE()
RETURNS event_trigger AS $$
BEGIN
if tg_tag = 'DROP TABLE' THEN
RAISE EXCEPTION 'Command % is disabled.', tg_tag;
END if;
END;
$$ LANGUAGE plpgsql;
-- 切换superuser创建事件事件触发器
CREATE EVENT TRIGGER DISABLE_DROP_TABLE on ddl_command_start EXECUTE FUNCTION DISABLE_DROP_TABLE();
函数和存储过程
-- 函数
CREATE FUNCTION inc(val integer) RETURNS integer AS $$
BEGIN
RETURN val + 1;
END;
$$ LANGUAGE PLPGSQL;
CREATE FUNCTION inc(val1 integer,val2 integer) RETURNS integer AS $$
BEGIN
RETURN val1 + val2;
END;
$$ LANGUAGE PLPGSQL;
-- 存储过程
CREATE OR REPLACE PROCEDURE test_sum(a NUMERIC,b NUMERIC,C NUMERIC)
AS $$
DECLARE
val int;
BEGIN
val := a+b+c;
RAISE NOTICE 'Total is : % !',val;
END;
$$ language plpgsql;

浙公网安备 33010602011771号