postgresql 表和索引的膨胀简析

postgresql 表和索引的膨胀是非常常见的,一方面是因为 autovacuum 清理标记为 dead tuple 的速度跟不上,另一方面也可能是由于长事物,未决事物,复制槽引起的。

#初始化数据

zabbix=# create table tmp_t0(c0 varchar(100),c1 varchar(100), c2 varchar(100));
CREATE TABLE
zabbix=# 
zabbix=# create index idx_tmp_t0 on tmp_t0(c0);
CREATE INDEX
zabbix=# 
zabbix=# create index idx_tmp_t0_1 on tmp_t0(c1);
CREATE INDEX
zabbix=#
zabbix=# insert into tmp_t0 select id::varchar,md5(id::varchar),md5(md5(id::varchar)) from generate_series(1,1000000) as id ;
INSERT 0 1000000

查看大小

select pt.schemaname||'.'||pt.tablename,pg_relation_filepath(pt.schemaname||'.'||pt.tablename),
       pg_table_size(pt.schemaname||'.'||pt.tablename),
       pg_relation_size(pt.schemaname||'.'||pt.tablename),
       pg_total_relation_size(pt.schemaname||'.'||pt.tablename),
       pi.schemaname||'.'||pi.indexname,pg_relation_filepath(pi.schemaname||'.'||pi.indexname),
       pg_relation_size(pi.schemaname||'.'||pi.indexname),--指定的单个索引
       pg_indexes_size(pi.schemaname||'.'||pi.tablename) --表上的所有索引
 from pg_tables pt
      left outer join pg_indexes pi 
                   on pt.schemaname||'.'||pt.tablename = pi.schemaname||'.'||pi.tablename
where 1=1
  and pt.schemaname='public'
  and pt.tablename='tmp_t0'
;

   ?column?    | pg_relation_filepath | pg_table_size | pg_relation_size | pg_total_relation_size |      ?column?       | pg_relation_filepath | pg_relation_size | pg_indexes_size 
---------------+----------------------+---------------+------------------+------------------------+---------------------+----------------------+------------------+-----------------
 public.tmp_t0 | base/24579/24611     |     108470272 |        108421120 |              224714752 | public.idx_tmp_t0   | base/24579/24614     |         39215104 |       116244480
 public.tmp_t0 | base/24579/24611     |     108470272 |        108421120 |              224714752 | public.idx_tmp_t0_1 | base/24579/24615     |         77029376 |       116244480
(2 rows)

zabbix=# select * from pg_stat_all_tables where relname = 'tmp_t0';

 relid | schemaname | relname | seq_scan | seq_tup_read | idx_scan | idx_tup_fetch | n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze | last_vacuum | last_autovacuum | last_analyze |       last_autoanalyze        | vacuum_count | autovacuum_count | analyze_count | autoanalyze_count 
-------+------------+---------+----------+--------------+----------+---------------+-----------+-----------+-----------+---------------+------------+------------+---------------------+-------------+-----------------+--------------+-------------------------------+--------------+------------------+---------------+-------------------
 24611 | public     | tmp_t0  |        2 |            0 |        0 |             0 |   1000000 |         0 |         0 |             0 |    1000000 |          0 |                   0 |             |                 |              | 2018-09-19 15:22:10.987535+08 |            0 |                0 |             0 |                 1
(1 row)

#删除操作

zabbix=# delete from tmp_t0 where c0::int4 > '200000';
DELETE 800000

查看大小

select pt.schemaname||'.'||pt.tablename,pg_relation_filepath(pt.schemaname||'.'||pt.tablename),
       pg_table_size(pt.schemaname||'.'||pt.tablename),
       pg_relation_size(pt.schemaname||'.'||pt.tablename),
       pg_total_relation_size(pt.schemaname||'.'||pt.tablename),
       pi.schemaname||'.'||pi.indexname,pg_relation_filepath(pi.schemaname||'.'||pi.indexname),
       pg_relation_size(pi.schemaname||'.'||pi.indexname),--指定的单个索引
       pg_indexes_size(pi.schemaname||'.'||pi.tablename) --表上的所有索引
 from pg_tables pt
      left outer join pg_indexes pi 
                   on pt.schemaname||'.'||pt.tablename = pi.schemaname||'.'||pi.tablename
where 1=1
  and pt.schemaname='public'
  and pt.tablename='tmp_t0'
;

   ?column?    | pg_relation_filepath | pg_table_size | pg_relation_size | pg_total_relation_size |      ?column?       | pg_relation_filepath | pg_relation_size | pg_indexes_size 
---------------+----------------------+---------------+------------------+------------------------+---------------------+----------------------+------------------+-----------------
 public.tmp_t0 | base/24579/24611     |     108478464 |        108421120 |              224722944 | public.idx_tmp_t0   | base/24579/24614     |         39215104 |       116244480
 public.tmp_t0 | base/24579/24611     |     108478464 |        108421120 |              224722944 | public.idx_tmp_t0_1 | base/24579/24615     |         77029376 |       116244480
(2 rows)

可以看出,即使删除了大量的数据,表和索引的大小依然没有发生变化。

zabbix=# select * from pg_stat_all_tables where relname = 'tmp_t0';

 relid | schemaname | relname | seq_scan | seq_tup_read | idx_scan | idx_tup_fetch | n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze | last_vacuum |        last_autovacuum        | last_analyze |       last_autoanalyze        | vacuum_count | autovacuum_count | analyze_count | autoanalyze_count 
-------+------------+---------+----------+--------------+----------+---------------+-----------+-----------+-----------+---------------+------------+------------+---------------------+-------------+-------------------------------+--------------+-------------------------------+--------------+------------------+---------------+-------------------
 24611 | public     | tmp_t0  |        4 |      1800000 |        0 |             0 |   1000000 |         0 |    800000 |             0 |     200000 |     800000 |                   0 |             | 2018-09-19 15:26:13.424378+08 |              | 2018-09-19 15:24:15.300776+08 |            0 |                3 |             0 |                 2
(1 row)

zabbix=# select * from pg_class pc where pc.relname = 'tmp_t0';

 relname | relnamespace | reltype | reloftype | relowner | relam | relfilenode | reltablespace | relpages | reltuples | relallvisible | reltoastrelid | relhasindex | relisshared | relpersistence | relkind | relnatts | relchecks | relhasoids | relhaspkey | relhasrules | relhastriggers | relhassubclass | relrowsecurity | relforcerowsecurity | relispopulated | relreplident | relfrozenxid | relminmxid | relacl | reloptions 
---------+--------------+---------+-----------+----------+-------+-------------+---------------+----------+-----------+---------------+---------------+-------------+-------------+----------------+---------+----------+-----------+------------+------------+-------------+----------------+----------------+----------------+---------------------+----------------+--------------+--------------+------------+--------+------------
 tmp_t0  |         2200 |   24613 |         0 |       10 |     0 |       24611 |             0 |    13235 |     1e+06 |             0 |             0 | t           | f           | p              | r       |        3 |         0 | f          | f          | f           | f              | f              | f              | f                   | t              | d            |         1184 |          1 |        | 
(1 row)

从 pg_stat_all_tables.autovacuum_count 也可以看出,即使发生了 autovacuum 也依然没有收缩空间(只有 vacuum full 才会收缩空间,类似ctas)。

手动 vacuum

zabbix=# vacuum verbose tmp_t0;
INFO:  vacuuming "public.tmp_t0"
INFO:  index "idx_tmp_t0" now contains 1000000 row versions in 4787 pages
DETAIL:  0 index row versions were removed.
0 index pages have been deleted, 0 are currently reusable.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  index "idx_tmp_t0_1" now contains 1000000 row versions in 9403 pages
DETAIL:  0 index row versions were removed.
0 index pages have been deleted, 0 are currently reusable.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  "tmp_t0": found 0 removable, 1000000 nonremovable row versions in 13235 out of 13235 pages
DETAIL:  800000 dead row versions cannot be removed yet.
There were 0 unused item pointers.
Skipped 0 pages due to buffer pins.
0 pages are entirely empty.
CPU 0.00s/0.16u sec elapsed 0.16 sec.
vacuum

zabbix=# select * from pg_stat_all_tables where relname = 'tmp_t0';

 relid | schemaname | relname | seq_scan | seq_tup_read | idx_scan | idx_tup_fetch | n_tup_ins | n_tup_upd | n_tup_del | n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze |          last_vacuum          |        last_autovacuum        | last_analyze |       last_autoanalyze        | vacuum_count | autovacuum_count | analyze_count | autoanalyze_count 
-------+------------+---------+----------+--------------+----------+---------------+-----------+-----------+-----------+---------------+------------+------------+---------------------+-------------------------------+-------------------------------+--------------+-------------------------------+--------------+------------------+---------------+-------------------
 24611 | public     | tmp_t0  |        4 |      1800000 |        0 |             0 |   1000000 |         0 |    800000 |             0 |     200000 |     800000 |                   0 | 2018-09-19 15:42:22.095405+08 | 2018-09-19 15:42:13.804102+08 |              | 2018-09-19 15:24:15.300776+08 |            1 |               19 |             0 |                 2
(1 row)

注意这个提示 DETAIL: 800000 dead row versions cannot be removed yet.

这是因为在整个测试之前,另外一个毫不相干的数据库表上挂起了一个长事务,结束长事务后再 vacuum

zabbix=# vacuum verbose tmp_t0;
INFO:  vacuuming "public.tmp_t0"
INFO:  scanned index "idx_tmp_t0" to remove 800000 row versions
DETAIL:  CPU 0.00s/0.17u sec elapsed 0.19 sec
INFO:  scanned index "idx_tmp_t0_1" to remove 800000 row versions
DETAIL:  CPU 0.01s/0.49u sec elapsed 0.54 sec
INFO:  "tmp_t0": removed 800000 row versions in 10668 pages
DETAIL:  CPU 0.00s/0.02u sec elapsed 0.03 sec
INFO:  index "idx_tmp_t0" now contains 200000 row versions in 4787 pages
DETAIL:  800000 index row versions were removed.
0 index pages have been deleted, 0 are currently reusable.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  index "idx_tmp_t0_1" now contains 200000 row versions in 9403 pages
DETAIL:  800000 index row versions were removed.
0 index pages have been deleted, 0 are currently reusable.
CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO:  "tmp_t0": found 800000 removable, 200000 nonremovable row versions in 13235 out of 13235 pages
DETAIL:  0 dead row versions cannot be removed yet.
There were 0 unused item pointers.
Skipped 0 pages due to buffer pins.
0 pages are entirely empty.
CPU 0.04s/1.40u sec elapsed 1.57 sec.
INFO:  "tmp_t0": truncated 13235 to 2568 pages
DETAIL:  CPU 0.02s/0.01u sec elapsed 0.07 sec
VACUUM

注意这几个输出:

INFO:  "tmp_t0": removed 800000 row versions in 10668 pages
INFO:  "tmp_t0": found 800000 removable, 200000 nonremovable row versions in 13235 out of 13235 pages

测试环境使用的是单个表,数据量又少,autovacuum 能及时的处理 dead tuple,实际的生产环境大多数是由于 autovacuum 的不及时造成的。

预防表的膨胀,可以从以下几个方便入手:

1、调整autovacuum的参数,默认值总是设置的很保守。对于配置好的机器(配置SSD)
可以参考 http://postgres.cn/docs/9.6/runtime-config-autovacuum.html 认真阅读

select *
  from pg_settings ps
 where 1=1
   and lower(ps.name) like '%autovacuum%'
 order by ps.name
;

2、关注长事务

select extract(epoch FROM (clock_timestamp() - xact_start )) as longtrans,
       extract(epoch FROM (clock_timestamp() - query_start )) as longquery
  from pg_stat_activity 
 where 1=1
   and state <> 'idle'
;

3、关注表的膨胀率

select schemaname||'.'||relname,
	   n_dead_tup,
	   n_live_tup,
	   coalesce(round(n_dead_tup * 100 / (case when n_live_tup + n_dead_tup = 0 then null else n_live_tup + n_dead_tup end ),2),0.00) as dead_tup_ratio
  from pg_stat_all_tables
 where 1=1
   and n_dead_tup >= 10000
 order by dead_tup_ratio desc
 limit 10
; 

参考:
http://postgres.cn/docs/9.6/runtime-config-autovacuum.html

posted @ 2018-09-19 16:52  peiybpeiyb  阅读(2062)  评论(0编辑  收藏  举报