pg批量删除大表数据
1.创建存储过程
存储过程1
-- DROP PROCEDURE public.sp_batch_delete_data();
CREATE OR REPLACE PROCEDURE public.sp_batch_delete_data()
LANGUAGE plpgsql
AS $procedure$
DECLARE
batch_size INTEGER := 10000; -- 每批删除的数量
rows_affected INTEGER := batch_size;
BEGIN
-- 循环直到没有更多数据可删除
WHILE rows_affected = batch_size LOOP
-- 单次删除一批数据
DELETE FROM tb_test01
where id IN (
SELECT id
FROM tb_test01
LIMIT batch_size
);
GET DIAGNOSTICS rows_affected = ROW_COUNT;
-- 提交当前批次(如果在事务块外执行)
COMMIT;
-- 可选:添加短暂延迟以减少对系统的影响
-- PERFORM pg_sleep(0.1);
RAISE NOTICE '已删除 % 行数据', rows_affected;
END LOOP;
END;
$procedure$
;
存储过程2
CREATE OR REPLACE PROCEDURE public.sp_batch_delete_data_new()
LANGUAGE plpgsql
AS $procedure$
DECLARE
_rows_deleted INTEGER;
_total_deleted INTEGER DEFAULT 0;
BEGIN
-- 循环,直到删除的行数为 0
LOOP
-- 在一个事务内删除一批数据
-- 注意:必须使用 USING 子句确保每次计划都使用索引
DELETE FROM tb_test01
WHERE id IN (
SELECT id
FROM tb_test01
--WHERE created_at < '2023-01-01' -- 替换为你的条件
--ORDER BY id -- 或 created_at,确保顺序一致
LIMIT 10000
FOR UPDATE SKIP LOCKED -- 跳过已被锁定的行,避免阻塞
);
-- 获取本轮删除的行数
GET DIAGNOSTICS _rows_deleted = ROW_COUNT;
-- 累加总行数
_total_deleted := _total_deleted + _rows_deleted;
-- 如果本轮删除的行数为 0,说明没有更多数据需要删除,退出循环
IF _rows_deleted = 0 THEN
EXIT;
END IF;
-- 提交当前批次的事务(存储过程中默认每个语句都是一个事务,但显式提交更好)
-- 注意:在存储过程中,如果你希望每个 DELETE 是独立事务,需要这样设计。
-- 但 PROCEDURE 默认不会自动提交,可以在调用时用事务控制,或者使用 FUNCTION 并在外部循环中提交。
-- 这里更常见的做法是:在外部调用此过程时,每次调用处理一个批次并提交。
-- 因此,另一种更安全的模式是使用方法二。
-- 短暂暂停,减轻数据库压力
-- PERFORM pg_sleep(EXTRACT(EPOCH FROM _pause_duration));
-- 打印进度(可选)
RAISE NOTICE 'Batch deleted % rows. Total deleted: %.', _rows_deleted, _total_deleted;
END LOOP;
RAISE NOTICE 'Finished. Total rows deleted: %.', _total_deleted;
END;
$procedure$
;
存储过程3(游标方式)
-- DROP PROCEDURE public.sp_batch_delete_data_new01();
CREATE OR REPLACE PROCEDURE public.sp_batch_delete_data_new01()
LANGUAGE plpgsql
AS $procedure$
DECLARE
_rows_deleted INTEGER;
_total_deleted INTEGER DEFAULT 0;
DECLARE cur CURSOR FOR
SELECT id FROM tb_test01;
--v_rec tb_test01%rowtype;
_rows_fetched INTEGER;
v_rec tb_test01%rowtype;
BEGIN
--FOR rec IN cur LOOP
-- DELETE FROM tb_test01 WHERE id = rec.id;
--END LOOP;
open cur;
LOOP
fetch next from cur INTO v_rec;
EXIT WHEN NOT FOUND;
-- 在这里执行删除操作,例如:
GET DIAGNOSTICS _rows_fetched = ROW_COUNT;
DELETE FROM tb_test01 WHERE id = v_rec.id;
END LOOP;
close cur;
END;
$procedure$
;
以上代码直接在postgres=# 模式下执行
执行存储过程
call sp_batch_delete_data();
2.查看存储过程
postgres=# \df
List of functions
Schema | Name | Result data type | Argument data types | Type
--------+--------------------------+------------------+---------------------+------
public | batch_delete_data | void | | func
public | sp_batch_delete_data | | | proc
public | sp_batch_delete_data_bak | | | proc
(3 rows)
3.查看存储过程定义
SELECT pg_get_functiondef(oid)
FROM pg_proc
WHERE proname = 'sp_batch_delete_data';
postgres=# SELECT pg_get_functiondef(oid)
postgres-# FROM pg_proc
postgres-# WHERE proname = 'sp_batch_delete_data';
pg_get_functiondef
-----------------------------------------------------------
CREATE OR REPLACE PROCEDURE public.sp_batch_delete_data()+
LANGUAGE plpgsql +
AS $procedure$ +
DECLARE +
batch_size INTEGER := 10000; -- 每批删除的数量 +
rows_affected INTEGER := batch_size; +
BEGIN +
-- 循环直到没有更多数据可删除 +
WHILE rows_affected = batch_size LOOP +
-- 单次删除一批数据 +
DELETE FROM tb_test01 +
where id IN ( +
SELECT id +
FROM tb_test01 +
LIMIT batch_size +
); +
+
GET DIAGNOSTICS rows_affected = ROW_COUNT; +
+
-- 提交当前批次(如果在事务块外执行) +
COMMIT; +
+
-- 可选:添加短暂延迟以减少对系统的影响 +
-- PERFORM pg_sleep(0.1); +
+
RAISE NOTICE '已删除 % 行数据', rows_affected; +
END LOOP; +
END; +
$procedure$ +
4.放到crontab中执行(非root账号下)
##清理pg大表数据
52 17 * * * /home/hxl/scripts/run_pg_clean_bigtable.sh>/dev/null 2>&1
[hxllocalhost scripts]$ more run_pg_clean_bigtable.sh
#!/bin/bash
start_time=`date +%Y%m%d%H%M%S`
start_date=`date +%Y%m%d`
logfile=/home/hxl/scripts/logs/pg_clean_bigtable_${start_date}.log
/home/hxl/scripts/pg_clean_bigtable.sh>${logfile} 2>&1
[hxl@localhost scripts]$ more pg_clean_bigtable.sh
#!/bin/bash
. /etc/profile
. ~/.bash_profile
export PGPASSWORD=postgres
/opt/pg16/bin/psql -h localhost -U postgres -p5432<<EOF
\timing on
call sp_batch_delete_data();
\q
EOF
5.单个语句执行
WITH rows AS (
SELECT
id
FROM
tb_test01
ORDER BY id
LIMIT 10
)
DELETE FROM tb_test01
USING rows WHERE tb_test01.id = rows.id
;
浙公网安备 33010602011771号