pg批量删除大表数据

 

1.创建存储过程

存储过程1

-- DROP PROCEDURE public.sp_batch_delete_data();

CREATE OR REPLACE PROCEDURE public.sp_batch_delete_data()
 LANGUAGE plpgsql
AS $procedure$
DECLARE
  batch_size INTEGER := 10000; -- 每批删除的数量
  rows_affected INTEGER := batch_size;
BEGIN
  -- 循环直到没有更多数据可删除
  WHILE rows_affected = batch_size LOOP
    -- 单次删除一批数据
    DELETE FROM tb_test01
    where id IN (
      SELECT id
      FROM tb_test01
      LIMIT batch_size
    );
    
    GET DIAGNOSTICS rows_affected = ROW_COUNT;
    
    -- 提交当前批次(如果在事务块外执行)
    COMMIT;
    
    -- 可选:添加短暂延迟以减少对系统的影响
    -- PERFORM pg_sleep(0.1);
    
    RAISE NOTICE '已删除 % 行数据', rows_affected;
  END LOOP;
END;
$procedure$
;

 

存储过程2

 

CREATE OR REPLACE PROCEDURE public.sp_batch_delete_data_new()
 LANGUAGE plpgsql
AS $procedure$
DECLARE
    _rows_deleted INTEGER;
    _total_deleted INTEGER DEFAULT 0;
BEGIN
    -- 循环,直到删除的行数为 0
    LOOP
        -- 在一个事务内删除一批数据
        -- 注意:必须使用 USING 子句确保每次计划都使用索引
        DELETE FROM tb_test01
        WHERE id IN (
            SELECT id
            FROM tb_test01
            --WHERE created_at < '2023-01-01' -- 替换为你的条件
            --ORDER BY id -- 或 created_at,确保顺序一致
            LIMIT 10000
            FOR UPDATE SKIP LOCKED -- 跳过已被锁定的行,避免阻塞
        );
        
        -- 获取本轮删除的行数
        GET DIAGNOSTICS _rows_deleted = ROW_COUNT;
        
        -- 累加总行数
        _total_deleted := _total_deleted + _rows_deleted;
        
        -- 如果本轮删除的行数为 0,说明没有更多数据需要删除,退出循环
        IF _rows_deleted = 0 THEN
            EXIT;
        END IF;
        
        -- 提交当前批次的事务(存储过程中默认每个语句都是一个事务,但显式提交更好)
        -- 注意:在存储过程中,如果你希望每个 DELETE 是独立事务,需要这样设计。
        -- 但 PROCEDURE 默认不会自动提交,可以在调用时用事务控制,或者使用 FUNCTION 并在外部循环中提交。
        -- 这里更常见的做法是:在外部调用此过程时,每次调用处理一个批次并提交。
        -- 因此,另一种更安全的模式是使用方法二。

        -- 短暂暂停,减轻数据库压力
        -- PERFORM pg_sleep(EXTRACT(EPOCH FROM _pause_duration));
        
        -- 打印进度(可选)
        RAISE NOTICE 'Batch deleted % rows. Total deleted: %.', _rows_deleted, _total_deleted;
    END LOOP;
    
    RAISE NOTICE 'Finished. Total rows deleted: %.', _total_deleted;
END;
$procedure$
;

 

存储过程3(游标方式)

-- DROP PROCEDURE public.sp_batch_delete_data_new01();

CREATE OR REPLACE PROCEDURE public.sp_batch_delete_data_new01()
 LANGUAGE plpgsql
AS $procedure$
DECLARE
    _rows_deleted INTEGER;
    _total_deleted INTEGER DEFAULT 0;

    DECLARE cur CURSOR FOR 
    SELECT id FROM tb_test01;
    
    --v_rec tb_test01%rowtype;
    _rows_fetched INTEGER;

    v_rec tb_test01%rowtype;


BEGIN
    
    --FOR rec IN cur LOOP
    --    DELETE FROM tb_test01 WHERE id = rec.id;
    --END LOOP;
    open cur;
    LOOP
        fetch next from cur INTO v_rec;
        EXIT WHEN NOT FOUND;
        -- 在这里执行删除操作,例如:
        GET DIAGNOSTICS _rows_fetched = ROW_COUNT;
        DELETE FROM tb_test01 WHERE id = v_rec.id;
    END LOOP;
    close cur;
END;
$procedure$
;

 

 

 

 

以上代码直接在postgres=# 模式下执行

 

执行存储过程

call sp_batch_delete_data();

 

2.查看存储过程

postgres=# \df
                                 List of functions
 Schema |           Name           | Result data type | Argument data types | Type 
--------+--------------------------+------------------+---------------------+------
 public | batch_delete_data        | void             |                     | func
 public | sp_batch_delete_data     |                  |                     | proc
 public | sp_batch_delete_data_bak |                  |                     | proc
(3 rows)

 

3.查看存储过程定义

SELECT pg_get_functiondef(oid)
FROM pg_proc
WHERE proname = 'sp_batch_delete_data';

 

postgres=# SELECT pg_get_functiondef(oid) 
postgres-# FROM pg_proc 
postgres-# WHERE proname = 'sp_batch_delete_data';
                    pg_get_functiondef                     
-----------------------------------------------------------
 CREATE OR REPLACE PROCEDURE public.sp_batch_delete_data()+
  LANGUAGE plpgsql                                        +
 AS $procedure$                                           +
 DECLARE                                                  +
   batch_size INTEGER := 10000; -- 每批删除的数量         +
   rows_affected INTEGER := batch_size;                   +
 BEGIN                                                    +
   -- 循环直到没有更多数据可删除                          +
   WHILE rows_affected = batch_size LOOP                  +
     -- 单次删除一批数据                                  +
     DELETE FROM tb_test01                                +
     where id IN (                                        +
       SELECT id                                          +
       FROM tb_test01                                     +
       LIMIT batch_size                                   +
     );                                                   +
                                                          +
     GET DIAGNOSTICS rows_affected = ROW_COUNT;           +
                                                          +
     -- 提交当前批次(如果在事务块外执行)                +
     COMMIT;                                              +
                                                          +
     -- 可选:添加短暂延迟以减少对系统的影响              +
     -- PERFORM pg_sleep(0.1);                            +
                                                          +
     RAISE NOTICE '已删除 % 行数据', rows_affected;       +
   END LOOP;                                              +
 END;                                                     +
 $procedure$                                              +
 

 

4.放到crontab中执行(非root账号下)

##清理pg大表数据
52 17 * * * /home/hxl/scripts/run_pg_clean_bigtable.sh>/dev/null 2>&1

 

[hxllocalhost scripts]$ more run_pg_clean_bigtable.sh
#!/bin/bash
start_time=`date +%Y%m%d%H%M%S`
start_date=`date +%Y%m%d`

logfile=/home/hxl/scripts/logs/pg_clean_bigtable_${start_date}.log
/home/hxl/scripts/pg_clean_bigtable.sh>${logfile} 2>&1

 

[hxl@localhost scripts]$ more pg_clean_bigtable.sh
#!/bin/bash
. /etc/profile
. ~/.bash_profile
export PGPASSWORD=postgres
/opt/pg16/bin/psql -h localhost -U postgres -p5432<<EOF
\timing on
call sp_batch_delete_data();
\q
EOF

 

 

 

 

 

 

5.单个语句执行

WITH rows AS (
    SELECT
      id
    FROM
      tb_test01
    ORDER BY id
    LIMIT 10
)
DELETE FROM tb_test01
USING rows WHERE tb_test01.id = rows.id
;

 

posted @ 2025-08-20 17:00  slnngk  阅读(24)  评论(0)    收藏  举报