PostgreSQL创建测试数据

生成测试数据

用到的函数

基本函数

函数 说明
string_agg() 是 聚合函数,用于将一列字符串连接成一个字符串
substring() 用于从一个字符串中提取子串,substring(string, start, length)
其中,string: 要从中提取子串的原始字符串。start: 提取子串开始位置的索引(从1开始计数)。length: 子串的长度。
generate_series() 是 PG 中的内置函数,用于生成一系列连续的整数或时间值。可以用于创建测试数据、填充日期范围等。
round() 是 PG 中内置函数,用于将数值四舍五入到指定的小数位数。
ceil(value) 得到不小于参数的最小整数
floor(value) 得到不大于参数的最大整数
trunc(value) 截断小数点后的数(保留整数)
chr(value) 获取给定代码的字符(根据Unicode代码)
random() 是 PG 中的内置函数,用于生成一个随机的浮点数(获取0-1之间的随机数)
make_interval
make_time
make_timestamp
make_timestamptz
make_date
make_date(2013, 7, 15)
make_interval(days => 10)
make_time(8, 15, 23.5)
make_timestamp(2013, 7, 15, 8, 15, 23.5)
make_timestamptz(2013, 7, 15, 8, 15, 23.5)

使用场景

数据 SQL 分类 备注
指定范围的随机数 (random()*(max-min)+min)::int 数值
连续的数字 generate_series(1,1000) 数值
连续间隔的数字 generate_series(1,1000,2) 数值
带小数的数字 (random()*(10000-3000)+3000)::numeric(8,2) 数值
小于1的小数 (random()*(1-0)+0)::numeric(4,2) 数值
字符串 substr(md5(random()::text),1,15) 字符串
邮箱 substr(md5(random()::text),2,5)||'@163.com' 字符串
电话号码 concat('1',ceiling(random()*9000000000+1000000000)) 字符串
随机中文字符 chr(19968+(random()*1000)::int) 字符串
flag (random()*(1))::int
(random()*(1))::int::boolean
布尔值 1/0
指定范围内的随机日期 `date((random()*(2022-1990)+1990)::int '-'
随机日期 current_date - floor((random() * 25))::int 日期
连续日期 date(generate_series('2000-01-01'::timestamp, now(), '1 day')) 日期
年月 '2022'||lpad((random()*(12-1)+1)::int::text,2,'0') 日期
随机时间戳 concat(current_date - floor((random() * 25))::int,' ',make_time(floor((random() * 12))::int, floor((random() * 60))::int, floor((random() * 60))::int)) 时间
随机时间 make_time(floor((random() * 12))::int, floor((random() * 60))::int, floor((random() * 60))::int) 时间

生成随机汉字

汉字Unicode编码范围

#php中utf-8编码下用正则表达式匹配汉字的最终正确表达式
/[\x{4e00}-\x{9fa5}]/u
\u4e00-\u9fa5 (中文)  
\x3130-\x318F (韩文)  
\xAC00-\xD7A3 (韩文)  
\u0800-\u4e00 (日文)  

4e00对应 19968

9fa5对应 40869

一共20901个汉字

例子1

注意,为了保证能够输出所有的汉字,确保数据库的编码为UTF8,否则会报类似错误

db=# select '\u9fa5'::text;  
ERROR:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "'\u9fa5"  
LINE 1: select '\u9fa5'::text;  
               ^  
postgres=# set standard_conforming_strings =off;
SET
postgres=# set escape_string_warning=off;
SET
postgres=# select '\u9fa5'::text;
 text
------
 龥
(1 row)

例子2

除了Unicode写法,PostgreSQL还提供了两个函数,支持数值的写法

函数 返回类型 描述 例子 结果
ascii(*string*) int 参数第一个字符的ASCII代码。对于UTF8返回该字符的Unicode代码点。对于其他多字节编码,该参数必须是一个ASCII字符。 ascii('x') 120
chr(int) text 给定代码的字符。对于UTF8该参数被视作一个Unicode代码点。对于其他多字节编码该参数必须指定一个ASCII字符。NULL (0) 字符不被允许,因为文本数据类型不能存储这种字节。 chr(65) A
postgres=# select chr(19968);
 chr   
-----  
 一  
(1 row)  

输出所有汉字

匿名块

do language plpgsql $$  
declare  
  res text := '';  
begin  
  for i in 19968..40869 loop  
    res := res||chr(i);  
  end loop;  
  raise notice '%', res;  
end;  
$$;

编码和汉字对照

select generate_series(19968,40869),chr(generate_series(19968,40869));

输出随机汉字

创建函数

create or replace function gen_hanzi(int) returns text as $$
declare  
  res text;  
begin  
  if $1 >=1 then  
    select string_agg(chr(19968+(random()*20901)::int), '') into res from generate_series(1,$1);
    return res;  
  end if;  
  return null;  
end;  
$$ language plpgsql strict;

从最常用的100个汉字中随机生成指定个汉字

create or replace function gen_zhchar(num int) returns text
as $$
declare
  a1 integer[] :='{30340,19968,20102,26159,25105,19981,22312,20154,20204,26377,20010,22320,21040,22823,37324,26469,20182,36825,19978,30528,35828,23601,21435,23376,24471,20063,21644,37027,35201,19979,30475,22825,26102,36807,20986,23567,20040,36215,20320,37117,25226,22909,36824,22810,27809,20026,21448,21487,23478,23398,21482,20197,20027,20250,26679,24180,24819,33021,29983,21516,32769,20013,21313,20174,33258,38754,21069,22836,36947,23427,21518,28982,36208,24456,20687,35265,20004,29992,22905,22269,21160,36827,25104,22238,20160,36793,20316,23545,24320,32780,24049,20123,29616,23665,27665,20505,32463,21457,24037,21521}';
  random_index int;
  chr_result text;
  chr_tmp_string text;
begin
  chr_result :='';
  for i in 1..num loop
    random_index :=floor(random()*array_length(a1,1)+1);
	-- execute 'select chr('||a1[random_index]||')' into chr_result;
	select chr(a1[random_index]) into chr_tmp_string;
	chr_result :=chr_result||chr_tmp_string;
  end loop;
  return chr_result;
    -- raise notice 'the result of function is %',chr_result;
end;
$$ language plpgsql;

参考资料

https://www.postgresql.org/docs/9.6/functions-string.html

https://stackoverflow.com/questions/3970795/how-do-you-create-a-random-string-thats-suitable-for-a-session-id-in-postgresql

https://github.com/digoal/blog/blob/master/201704/20170402_01.md

https://en.wikipedia.org/wiki/Unicode

http://unicode.org/charts/

https://baike.baidu.com/item/统一码/2985798?fromtitle=Unicode&fromid=750500

生成随机字符

create or replace function random_string(integer)
returns text as
$body$
    select array_to_string(array(select substring('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' FROM (ceil(random()*62))::int FOR 1) FROM generate_series(1, $1)), '');
$body$
language sql volatile;

创建测试函数

为了便于生成测试数据,创建以下两个函数用来随机生成指定长度的字符串,创建random_range(int4, int4)函数如下:

CREATE OR REPLACE FUNCTION random_range(int4, int4)
RETURNS int4
LANGUAGE SQL
AS $$
    SELECT ($1 + FLOOR(($2 - $1 + 1) * random() ))::int4;
$$;

-- 接着创建random_text_simple(length int4)函数,此函数会调用random_range(int4, int4)函数。
CREATE OR REPLACE FUNCTION random_text_simple(length int4)
RETURNS text
LANGUAGE PLPGSQL
AS $$
DECLARE
    possible_chars text := '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
    output text := '';
    i int4;
    pos int4;
BEGIN
    FOR i IN 1..length LOOP
        pos := random_range(1, length(possible_chars));
        output := output || substr(possible_chars, pos, 1);
    END LOOP;
    RETURN output;
END;
$$;

random_text_simple(length int4)函数可以随机生成指定长度字符串,如下示例:

postgres=# SELECT random_text_simple(3);
 random_text_simple 
--------------------
 4dI
(1 row)

postgres=# SELECT random_text_simple(6);
 random_text_simple 
--------------------
 3uLknl
(1 row)

生成随机数组

-- 生成随机数组
postgres=# select array(select floor(random()*1000) from generate_series(1, 10));
                 array
---------------------------------------
 {414,797,61,793,346,60,23,312,706,21}
(1 row)

多属性笛卡尔积

WITH
xing AS (
    SELECT *
    FROM unnest(ARRAY['赵', '钱']) WITH ORDINALITY AS hon(n, i)
),
ming AS (
    SELECT *
    FROM unnest(ARRAY['梓', '明', '平']) WITH ORDINALITY AS fn(n, i)
)
SELECT initcap(concat_ws(' ', xing.n, ming.n)) AS name
FROM xing, ming, generate_series(1,2)
ORDER BY random();

-- 执行结果(有多少个属性每行就按照空格分隔多个值)
 name  
-------
 赵 明
 赵 明
 赵 梓
 钱 平
 钱 明
 赵 平
 赵 平
 钱 明
 钱 梓
 钱 梓
 赵 梓
 钱 平
(12 rows)

元命令

\watch 0.001 每隔0.001秒重复执行命令

create table test (id int ,name text);
insert into test values(1,'1'),(2,'2');
#每隔0.001秒做一次更新操作
update test set name = 'hello'; \watch 0.001

普通表

-- 创建测试表
create table employees
    ( employee_id    int8    primary key
    , first_name     varchar(20)
    , last_name      varchar(25) constraint     emp_last_name_nn  not null
    , email          varchar(25) constraint     emp_email_nn  not null
    , phone_number   varchar(20)
    , hire_date      date         constraint     emp_hire_date_nn  not null
    , job_id         int8         constraint     emp_job_nn  not null
    , salary         numeric(8,2)
    , commission_pct numeric(4,2)
    , manager_id     int8
    , department_id  int8
    , constraint     emp_salary_min check (salary > 0) 
    --, constraint     emp_email_uk unique (email)
    ) ;
-- 生成测试数据
insert into employees
select generate_series(1,1000) as key,
       substr(md5(random()::text),2,5),
       substr(md5(random()::text),2,8),
	   substr(md5(random()::text),2,5)||'@163.com',
	   concat('1',ceiling(random()*9000000000+1000000000)),
	   date((random()*(2022-1990)+1990)::int||'-'||(random()*(12-1)+1)::int||'-'||(random()*(28-1)+1)::int),
	   (random()*(50-10)+10)::int,
	   (random()*(10000-3000)+3000)::numeric(8,2),
	   (random()*(1-0)+0)::numeric(4,2),
	   (random()*(100-1)+1)::int,
	   (random()*(10-1)+1)::int;



create table t_test(
id varchar(200), 
name varchar(200), 
age integer, 
password varchar(200)
);

-- 插入600w的模拟数据
INSERT INTO t_test  (id, name, age, password)
SELECT
    'id'||FLOOR(RANDOM() *10000),  -- 生成随机的ID
    'User'||FLOOR(RANDOM() *10000),  -- 生成随机的用户名
    FLOOR(RANDOM() *100) +18,  -- 生成随机的年龄 (18-117)
    'user'||FLOOR(RANDOM() *10000) ||'@example.com'-- 生成随机的邮箱地址
FROM
    generate_series(1, 6000000);

分区表

范围分区-按id

create table tbp(n int, t text) partition by range(n);
create table tbp_1 partition of tbp for values from (MINVALUE) to (10);
create table tbp_2 partition of tbp for values from (10) to (100);
create table tbp_3 partition of tbp for values from (100) to (1000);
create table tbp_4 partition of tbp for values from (1000) to (MAXVALUE);

范围分区-按年分区

方式1

create table tbp(id int,date timestamp(6),col2 text) partition by range(date);

create table tbp_2020 partition of tbp for values from ('2020-01-01') to ('2021-01-01');
create table tbp_2021 partition of tbp for values from ('2021-01-01') to ('2022-01-01');
create table tbp_2022 partition of tbp for values from ('2022-01-01') to ('2023-01-01');
create table tbp_2023 partition of tbp for values from ('2023-01-01') to ('2024-01-01');
-- default
create table tbp_default partition of tbp default;


insert into tbp(id,date,col2)
select generate_series(1,400000) as id ,date((random()*(2023-2020)+2020)::int||'-'||(random()*(12-1)+1)::int||'-'||(random()*(28-1)+1)::int),'test';

方式2

create table tbp(id int,date timestamp(6),col2 text) partition by range(date);

create table tbp_2020(id int,date timestamp(6),col2 text);
create table tbp_2021(id int,date timestamp(6),col2 text);
create table tbp_2022(id int,date timestamp(6),col2 text);
create table tbp_2023(id int,date timestamp(6),col2 text);
create table tbp_default(id int,date timestamp(6),col2 text);

insert into tbp_2020(id,date,col2) select generate_series(1,100000) as id ,date('2020'||'-'||(random()*(12-1)+1)::int||'-'||(random()*(28-1)+1)::int),'test';
insert into tbp_2021(id,date,col2) select generate_series(100001,200000) as id ,date('2021'||'-'||(random()*(12-1)+1)::int||'-'||(random()*(28-1)+1)::int),'test';
insert into tbp_2022(id,date,col2) select generate_series(200001,300000) as id ,date('2022'||'-'||(random()*(12-1)+1)::int||'-'||(random()*(28-1)+1)::int),'test';
insert into tbp_2023(id,date,col2) select generate_series(300001,400000) as id ,date('2023'||'-'||(random()*(12-1)+1)::int||'-'||(random()*(28-1)+1)::int),'test';

alter table tbp attach partition tbp_2020 for values from ('2020-01-01') to ('2021-01-01');
alter table tbp attach partition tbp_2021 for values from ('2021-01-01') to ('2022-01-01');
alter table tbp attach partition tbp_2022 for values from ('2022-01-01') to ('2023-01-01');
alter table tbp attach partition tbp_2023 for values from ('2023-01-01') to ('2024-01-01');

-- default
alter table tbp attach partition tbp_default default;

特殊场景

批量生成n个字段

do $$
declare
  i int :=5; -- 字段数量
  sql_text text :='create table test_t(id int,';
begin
  for n in 1..i loop
    sql_text := sql_text||'col'||n||' varchar(100)';
	if n < i then
	  sql_text := sql_text||',';
	end if;
  end loop;
  sql_text := sql_text||');';
  raise notice '%',sql_text;
  execute sql_text;
end $$;

日期值递增不同的值

-- 创建测试表
create table test_t(id int,last_update_date timestamp);
insert into test_t select generate_series(1,1000),now();

-- 游标遍历表每一行,保证每一行日期都不一样(递增1秒)
do $$
declare
cur record;
begin
  for cur in select * from test_t loop
    update test_t set last_update_date = now() + cur.id * interval '1 second'
	where id = cur.id;
  end loop;
end $$;

-- 批量更新为当前时间加随机秒
update test_t set last_update_date = now() + make_interval(secs => random()*3600);

-- 插入的时候指定随机日期
insert into test_t (last_update_date)
select now()-(random() * interval '365 days') from generate_series(1,1000);

外键

drop table if exists author,page;
create table author(
   author_id int primary key,
   firstname varchar(50),
   lastname varchar(50)
);
create table page (
    page_id serial primary key,
    title varchar(255),
    content text,
    author_id int,
    foreign key (author_id) references author (author_id) on delete no action on update cascade deferrable initially deferred
);

完整场景数据

创建用户、表空间、数据库、模式

mkdir -p /postgresql/test_ts

psql -U postgres -c "create user test password 'test';"
psql -U postgres -c "CREATE TABLESPACE test_ts OWNER test LOCATION '/postgresql/test_ts';"
psql -U postgres -c "create database testdb tablespace test_ts;"
psql -U postgres -d testdb -c "CREATE SCHEMA AUTHORIZATION test;"

创建对象

export PGPASSWORD=test
psql -U test -d testdb

-- 创建域
create domain phone_type as text constraint phone_check check (VALUE ~ '^\d{11}$');
-- 创建数据类型
CREATE TYPE human_sex AS ENUM ('male', 'female');
CREATE TYPE test_type AS (f1 int, f2 text);
-- 创建序列
create sequence employees_s;
create sequence employees_s1;
-- 创建测试表
create table test_t1(id int);
create table test_t2(id int);
create table test_t3(id int);
create table test_t4(id int);
create table employees
    ( employee_id    int8    primary key
    , first_name     varchar(20)
    , last_name      varchar(25)
    , sex            human_sex
    , email          varchar(25)
    , phone_number   phone_type
    , salary         numeric(8,2)
    , last_update_date timestamp
    , constraint     emp_salary_min check (salary > 0) 
    , constraint     emp_email_uk unique (email)
    ) ;

create table tbp(id int,date timestamp(6),col2 text) partition by range(date);
create table tbp_2020 partition of tbp for values from ('2020-01-01') to ('2021-01-01');
create table tbp_2021 partition of tbp for values from ('2021-01-01') to ('2022-01-01');
create table tbp_2022 partition of tbp for values from ('2022-01-01') to ('2023-01-01');
create table tbp_2023 partition of tbp for values from ('2023-01-01') to ('2024-01-01');
create table tbp_default partition of tbp default;

-- 创建主分区表
CREATE TABLE sales (
    id int,
    region VARCHAR(50),
    sales_date DATE,
    amount NUMERIC
) PARTITION BY RANGE (sales_date);
-- 创建一级分区
CREATE TABLE sales_2023 PARTITION OF sales FOR VALUES FROM ('2023-01-01') TO ('2024-01-01') PARTITION BY LIST (region);
CREATE TABLE sales_2024 PARTITION OF sales FOR VALUES FROM ('2024-01-01') TO ('2025-01-01') PARTITION BY LIST (region);
-- 创建二级分区
CREATE TABLE sales_2023_north PARTITION OF sales_2023 FOR VALUES IN ('North');
CREATE TABLE sales_2023_south PARTITION OF sales_2023 FOR VALUES IN ('South');
CREATE TABLE sales_2023_east PARTITION OF sales_2023 FOR VALUES IN ('East');
CREATE TABLE sales_2023_west PARTITION OF sales_2023 FOR VALUES IN ('West');
CREATE TABLE sales_2024_north PARTITION OF sales_2024 FOR VALUES IN ('North');
CREATE TABLE sales_2024_south PARTITION OF sales_2024 FOR VALUES IN ('South');
CREATE TABLE sales_2024_east PARTITION OF sales_2024 FOR VALUES IN ('East');
CREATE TABLE sales_2024_west PARTITION OF sales_2024 FOR VALUES IN ('West');

-- 插入数据
insert into employees values(nextval('employees_s'),'King','Johnn','male','johnn@163.com','15145264084',10000,now());
-- 以下两条应该报错
insert into employees values(nextval('employees_s'),'Job','Lucy','female','lucy@163.com','151452640841',10000,now());
insert into employees values(nextval('employees_s'),'Job','Lucy','females','lucy@163.com','15145264084',10000,now());
-- 创建索引
create index idx_name on employees(first_name,last_name);
create index idx_id on tbp(id);
create index idx_id2 on sales(id);
-- 创建约束
ALTER TABLE employees ADD CONSTRAINT check_salary CHECK (salary>0);

-- 普通视图
create view emp as select * from employees;
-- 物化视图
create materialized view emp_v as select * from employees;
refresh materialized view emp_v;

创建触发器

-- 普通触发器
CREATE OR REPLACE FUNCTION update_time_column()
RETURNS TRIGGER AS $$
BEGIN
    NEW.last_update_date := CURRENT_TIMESTAMP;
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER update_time_trigger BEFORE UPDATE ON employees FOR EACH ROW EXECUTE FUNCTION update_time_column();

-- 事件触发器
CREATE OR REPLACE FUNCTION DISABLE_DROP_TABLE()
RETURNS event_trigger AS $$
BEGIN
    if tg_tag = 'DROP TABLE'  THEN
        RAISE EXCEPTION 'Command % is disabled.', tg_tag;
    END if;
END;
$$ LANGUAGE plpgsql;

-- 切换superuser创建事件事件触发器
CREATE EVENT TRIGGER DISABLE_DROP_TABLE on ddl_command_start EXECUTE FUNCTION DISABLE_DROP_TABLE();

函数和存储过程

-- 函数
CREATE FUNCTION inc(val integer) RETURNS integer AS $$
BEGIN
  RETURN val + 1;
END;
$$ LANGUAGE PLPGSQL;

CREATE FUNCTION inc(val1 integer,val2 integer) RETURNS integer AS $$
BEGIN
  RETURN val1 + val2;
END;
$$ LANGUAGE PLPGSQL;


-- 存储过程
CREATE OR REPLACE PROCEDURE test_sum(a NUMERIC,b NUMERIC,C NUMERIC)
AS $$
DECLARE
  val int;
BEGIN
  val := a+b+c;
  RAISE NOTICE 'Total is : % !',val;
END;
$$ language plpgsql;
posted @ 2025-05-17 10:08  kahnyao  阅读(60)  评论(0)    收藏  举报