gp sql

appendonly

-- drop table if exists test_appendonly;
-- create table test_appendonly with(appendonly=true, compresslevel=5) as
-- select generate_series(0, 1000) a, 'helloworld'::varchar(50) b
-- distributed by(a);

select oid from pg_class where relname='test_appendonly';
select oid, oid::regclass from pg_class where relname='test_appendonly' or relname like '%212266%';
-- \d pg_aoseg.pg_aoseg_212266
select * from get_ao_compression_ratio('test_appendonly');
select sum(eofuncompressed)/sum(eof) as compression_ratio from gp_dist_random('pg_aoseg.pg_aoseg_212266');

select * from get_ao_distribution('test_appendonly') order by segmentid;
select gp_segment_id, tupcount from gp_dist_random('pg_aoseg.pg_aoseg_212266') order by gp_segment_id;

drop table if exists public.table_info cascade;
create table public.table_info (
tablename text, -- 表名
subparname text, -- 分区各
tablecount bigint, -- 表的行数
tablesize bigint, -- 表大小
prettysize text, -- 格式化大小输出
max_div_avg float, -- 斜率,最大节点数据量/平均节点数据量
compression_ratio text -- 压缩率
);

-- 获取表信息
create or replace function public.get_table_info(tablename text) returns setof table_info as $$

def one_table_info(plpy, tablename, subparname, aosegname, privilege):
    aosegsql = ""
    # plpy.info(privilege)
    if privilege == '1':
        aosegsql = '''
            select '%s' tablename, '%s' subparname, 
                coalesce(sum(tupcount)::bigint, 0) tablecount,
                coalesce(sum(eof)::bigint, 0) tablesize,
                pg_size_pretty(coalesce(sum(tupcount)::bigint, 0)) prettysize,
                coalesce(max(tupcount)::bigint, 1)/(case when coalesce(avg(tupcount), 1.0) = 0 then 1 
                    else coalesce(avg(tupcount), 1.0) end) max_div_avg,
                coalesce(sum(eofuncompressed), 1)/(case when coalesce(sum(eof), 1.0) = 0 then 1 
                    else coalesce(sum(eof), 1.0) end) compression_ratio
            from gp_dist_random('%s');
        '''%(tablename, subparname, aosegname)
    else:
        aosegsql = '''
            select '%s' tablename, '%s' subparname, 
                0 tablecount, 0 tablesize, 'permission denied' prettysize,
                0 max_div_avg, 0 compression_ratio;
        '''%(tablename, subparname)
    plpy.info(aosegsql)
    result_rv=plpy.execute(aosegsql)
    # plpy.info(result_rv[0]);
    return result_rv[0]

try:
    table_name = tablename.lower().split('.')[1]
    table_schema = tablename.lower().split('.')[0]
except(IndexError):
    plpy.error('Please in put "tableschema.table_name"')

# check version of database
check_version_sql = """
    select substring(version(), 'Database (.*) build') as version;
"""
rv = plpy.execute(check_version_sql)
version = rv[0]['version']
plpy.execute("set enable_seqscan=off")

# get table oid
get_table_oid = ''
if version > '3.4.0':
    get_table_oid = """
        select a.oid, reloptions, b.segrelid, regclass2text(b.segrelid::regclass) aosegname, relstorage,
            case has_table_privilege(user, b.segrelid, 'select') when 't' then '1' else '0' end privilege
        from pg_class a left join pg_appendonly b on a.oid=b.relid where a.oid='%s'::regclass;
    """%(tablename)
else:
    get_table_oid = """
        select oid, reloptions, relaosegrelid, regclass2text(relaosegrelid::regclass) aosegname, relstorage,
            case has_table_privilege(user, relaosegrelid, 'select') when 't' then '1' else '0' end privilege
        from pg_class where oid='%s'::regclass;
    """%(tablename)

try:
    rv_oid = plpy.execute(get_table_oid, 5)
    if not rv_oid:
        plpy.error('Did not find any relation named "' + tablename + '".')
except (Error):
    plpy.error('Did not find any relation named "' + tablename + '".')

# 
table_oid = rv_oid[0]['oid']
if rv_oid[0]['relstorage'] != 'a':
    plpy.error(tablename + ' is not appendonly table, this function only support appendonly talbe')
# plpy.info('table_oid')

# check if table is partitin table
check_par_table="select count(*) from pg_partition where parrelid=%s"%(table_oid)
if version > '3.4.0':
    tablecount_sql = """
        select regclass2text(pp.parrelid::regclass) tabname, prl.parname, parruleord, pa.segrelid, 
            regclass2text(pa.segrelid::regclass) aosegname,
            case has_table_privilege(user, pa.segrelid, 'select') when 't' then '1' else '0' end privilege
        from pg_partition pp, pg_partition_rule prl, pg_appendonly pa 
        where pp.paristemplate=false and pp.parrelid=%s and prl.paroid=pp.oid and pa.relid=prl.parchildrelid 
        order by prl.parruleord;
    """%(table_oid)
else:
    tablecount_sql = """
        select regclass2text(pp.parrelid::regclass) tabname, prl.parname, parruleord, pc.relaosegrelid, 
            regclass2text(pc.relaosegrelid::regclass) aosegname,
            case has_table_privilege(user, pc.relaosegrelid, 'select') when 't' then '1' else '0' end privilege
        from pg_partition pp, pg_partition_rule prl, pg_class pc 
        where pp.paristemplate=false and pp.parrelid=%s and prl.paroid=pp.oid and pc.oid=prl.parchildrelid 
        and relaosegrelid <> 0 order by prl.parruleord;
    """%(table_oid)

rv = plpy.execute(check_par_table)

if rv[0]['count'] == 1:
    al = plpy.execute(tablecount_sql)
    result_rv = []
    rv_tmp = []

    totalcount = 0
    totalsize = 0
    unzipsize = 0
    compression_ratio = 1
    for i in al:
        rv_ao = one_table_info(plpy, tablename, i['parname'], i['aosegname'], str(i['privilege']))
        rv_tmp.append(rv_ao)
        totalsize = totalsize + rv_ao['tablesize']
        totalcount = totalcount + rv_ao['tablecount']
        unzipsize = unzipsize + rv_ao['tablesize'] * rv_ao['compression_ratio']
    if totalsize == 0:
        compression_total = 1
    else:
        compression_ratio = unzipsize/totalsize
    total_count_sql = """
        select '%s' as tablename, '###ALL###' as subparname, %d as tablecount, %d as tablesize, 
            pg_size_pretty(%d::bigint) prettysize, null as max_div_avg, %f as compression_ratio;
    """%(tablename, totalcount, totalsize, totalsize, compression_ratio)
    a2 = plpy.execute(total_count_sql)
    result_rv.append(a2[0])

    plpy.info('===' + total_count_sql)

    for i in rv_tmp:
        result_rv.append(i)
    return result_rv;
else:
    result_rv = []
    rv_ao = one_table_info(plpy, tablename, '', rv_oid[0]['aosegname'], str(rv_oid[0]['privilege']));
    result_rv.append(rv_ao)
    return result_rv

$$ language plpythonu;

select * from get_table_info('public.test_appendonly');
select get_table_info('public.test_appendonly');

hostname

-- create language plpythonu ;
create or replace function public.hostname() returns text as $$
import socket;
return socket.gethostname();
$$ language plpythonu;

create or replace function public.reverse(str text) returns text as $$
if str != None:
return str[::-1]
else:
return None
$$ language plpythonu;

create or replace function public.json_parse(data text) returns text as $$
import json
try:
mydata = json.loads(data)
except:
return ['Parse json error']
returndata = []
try:
for people in mydata['people']:
returndata.append(people['firstName'] + ' ' + people['lastName'])
except:
return ['Parse json error']
return returndata
$$ language plpythonu;

select hostname();
select hostname() from gp_dist_random('gp_id');
select gp_segment_id, count(1) from gp_dist_random('pg_class') group by 1 order by 1;

select * from gp_configuration;
select * from gp_segment_configuration;
select * from pg_filespace_entry;
select * from gp_configuration_history;

-- 判断某个表是否是分区表
select count(1) from pg_partition where parrelid='public.tb_partition_list_yyyymmdd'::regclass;
select * from pg_partition_rule;

drop view public.v_pg_partitions;
create view public.v_pg_partitions as
select pp.parrelid tableoid, prl.parchildrelid, prl.parname as partitionname,
case
when pp.parkind='h'::"char" then 'hash'::text
when pp.parkind='r'::"char" then 'range'::text
when pp.parkind='l'::"char" then 'list'::text
else NULL::text
end as partitiontype,
case
when pg_get_expr(prl.parrangeend, prl.parchildrelid) = ''
then pg_get_expr(prl.parlistvalues, prl.parchildrelid)
else pg_get_expr(prl.parrangeend, prl.parchildrelid)
end as HIGH_VALUE,
pg_get_partition_rule_def(prl.oid, true) as partitionboundary,
prl.parruleord as partitionposition
from pg_partition pp, pg_partition_rule prl
where pp.paristemplate = false and prl.paroid=pp.oid;

select * from public.v_pg_partitions where tableoid='tb_partition_list_yyyymmdd'::regclass order by partitionposition;

partition

drop table if exists public.tb_partition_range_yyyymmdd cascade;
create table public.tb_partition_range_yyyymmdd (
id numeric,
yyyymmdd date
) with(appendonly=true, compresslevel=5)
distributed by(id)
partition by range(yyyymmdd)
(
partition p20120811 start ('2012-08-11'::date) end ('2012-08-12'::date)
-- partition p20120812 start ('2012-08-12'::date) end ('2012-08-13'::date)
);

drop table if exists public.tb_partition_list_yyyymmdd cascade;
create table public.tb_partition_list_yyyymmdd (
id numeric,
yyyymmdd varchar(128)
) with(appendonly=true, compresslevel=5)
distributed by(id)
partition by list(yyyymmdd)
(
partition p20120811 values('20120811'),
partition p20120812 values('20120812')
);

drop view if exists public.v_pg_add_partitions cascade;
create view public.v_pg_add_partitions as
select pp.parrelid tableoid, prl.parchildrelid, prl.parname as partitionname,
case
when pp.parkind='h'::"char" then 'hash'::text
when pp.parkind='r'::"char" then 'range'::text
when pp.parkind='l'::"char" then 'list'::text
else NULL::text
end as partitiontype,
translate(pg_get_expr(prl.parlistvalues, prl.parchildrelid), '-''::date
character varying bpchar numeric double precision timestamp without time zone', '') as partitionlistvalue,
substring(translate(pg_get_expr(prl.parrangestart, prl.parchildrelid), '-''::date
character varying bpchar numeric double precision timestamp without time zone', ''), 1, 8) as partitionrangestart,
substring(translate(pg_get_expr(prl.parrangeend, prl.parchildrelid), '-''::date
character varying bpchar numeric double precision timestamp without time zone', ''), 1, 8) as partitionrangeend,
prl.parruleord as partitionposition,
substring(parlistvalues, 'consttype ([0-9]+)')::integer::regtype listtype,
substring(parrangeend, 'consttype ([0-9]+)')::integer::regtype rangetype
from pg_partition pp, pg_partition_rule prl where pp.paristemplate=false and prl.paroid=pp.oid;

create or replace function public.add_partition_info(tableoid oid, days_from_now integer) returns setof text as $$

import datetime

def now():
    d = datetime.datetime.now()
    format = '%Y%m%d'
    return datetime.datetime.strftime(d, format)

def add_day(d, n):
    format = '%Y%m%d'
    d2 = datetime.datetime.strptime(d, format)
    d3 = d2 + datetime.timedelta(days = n)
    return datetime.datetime.strftime(d3, format)

def add_month(d, n):
    format = '%Y%m%d'
    formatymd = '%Y%m01'
    if d.__len__() == 6:
        format = '%Y%m'
        formatymd = '%Y%m'
    d2 = datetime.datetime.strptime(d, format)
    d3 = d2 + datetime.timedelta(days = 31 * n)
    return datetime.datetime.strftime(d3, formatymd)

relist = []

# pre_value 是上一个分区的值,主要是 list 分区时使用
sql = """select *, tableoid::regclass tablename, lead(case when partitionrangeend <>  '' then partitionrangeend 
 else partitionlistvalue end) over(partition by tableoid order by partitionposition desc) as pre_value,
 row_number() over(partition by tableoid order by partitionposition desc) rn
 from v_pg_add_partitions where substr(partitionname, 1, 3) = 'p20' and tableoid=%s;"""%(tableoid)
rv = plpy.execute(sql);
sql_relation = "select array_to_string(reloptions, ',') reloptions from pg_class where oid=%s"%(tableoid)
rv_relation = plpy.execute(sql_relation)

if rv.nrows() == -1:
    return []
else:
    reloptions = rv_relation[0]['reloptions']
    tablename = rv[0]['tablename']
    partitiontype = rv[0]['partitiontype']
    partitionname = rv[0]['partitionname']
    pre_value = rv[0]['pre_value']
    now_add_7days = add_day(now(), days_from_now)

    # 处理 range 分区
    if partitiontype == 'range':
        rangetype = rv[0]['rangetype']
        partitionrangestart = rv[0]['partitionrangestart']
        partitionrangeend = rv[0]['partitionrangeend']
        interval = int(partitionrangeend) - int(partitionrangestart)

        # 按月分区
        if partitionname.__len__() == 7:
            func_add = add_month
            interval = int(partitionrangeend[0:6]) - int(partitionrangestart[0:6])

        # 按天分区
        elif partitionname.__len__() == 9:
            func_add = add_day

        # 分区名不规范,不处理
        else:
            return []
        
        partitionrangestart = now()
        while partitionrangestart < now_add_7days:
            partitionrangeend = func_add(partitionrangestart,  )
            partitionname = 'p' + partitionrangestart
            add_sql = "alter table %s add partition %s start ('%s'::%s) end ('%s'::%s)"%(tablename, partitionname, partitionrangestart, rangetype, partitionrangeend, rangetype)
                
            if reloptions != None and reloptions != '':
                add_sql += 'with(%s);'%(reloptions)
            else:
                add_sql += ';'
            plpy.execute(add_sql);
            relist.append(add_sql)
            partitionrangestart = func_add(partitionrangestart, interval)

    # 处理 list 分区
    if partitiontype == 'list':
        listtype = rv[0]['listtype']
        partitionlistvalue = rv[0]['partitionlistvalue']
        interval = int(partitionlistvalue) - int(pre_value)

        # 按月分区
        if partitionname.__len__() == 7:
            func_add = add_month

        # 按天分区
        elif partitionname.__len__() == 9:
            func_add = add_day

        # 分区名不规范,不处理
        else:
            return []
        
        partitionlistvalue = now()
        while partitionlistvalue < now_add_7days:
            partitionname = 'p' + partitionlistvalue
            add_sql = "alter table %s add partition %s values('%s'::%s)"%(tablename, partitionname, partitionlistvalue, listtype)
                
            if reloptions != None and reloptions != '':
                add_sql += 'with(%s);'%(reloptions)
            else:
                add_sql += ';'
            plpy.execute(add_sql);
            relist.append(add_sql)
            partitionlistvalue = func_add(partitionlistvalue, interval)
return relist

$$ language plpythonu;

select add_partition_info('tb_partition_list_yyyymmdd'::regclass, 1);

select 'grant select on ' || nspname || '.' || relname || ' to gpadmin;' from pg_class a, pg_namespace b
where relname not like '%_1_prt%' and relkind='r' and has_table_privilege('gpadmin', a.oid, 'select')='f'
and a.relnamespace=b.oid and nspname not in ('pg_catalog', 'information_schema') and nspname not like '%pg_tmp%';

view

-- 1.获取表的字段信息,表名pg_class,schema在pg_namespace,字段信息pg_attribute
select a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod) as data_type from pg_catalog.pg_attribute a,
(
select c.oid from pg_catalog.pg_class c left join pg_catalog.pg_namespace n on n.oid=c.relnamespace
where c.relname='pg_class' and n.nspname='pg_catalog'
) b
where a.attrelid=b.oid and a.attnum > 0 and not a.attisdropped order by a.attnum;

-- 1.1 获取表的字段信息
select a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod) as data_type from pg_catalog.pg_attribute a
where a.attrelid='pg_catalog.pg_class'::regclass and a.attnum > 0 and not a.attisdropped order by a.attnum;

-- 2. 获取表的分布键gp_distribution_policy中
-- 2.1 localoid与pg_class的oid关联,attrnums是一个数组,记录字段attnum,与pg_attribute中的attnum关联
drop table if exists public.cxfa2 cascade;
create table public.cxfa2(a int, b int, c int, d int) distributed by(c, a);

select * from gp_distribution_policy where localoid='cxfa2'::regclass;

select * from gp_distribution_policy a, (select generate_series(1, 10)) i (i), pg_attribute b
where a.attrnums[i.i] is not null and a.localoid=b.attrelid and a.attrnums[i.i]=b.attnum
and a.localoid='public.cxfa2'::regclass order by i.i;

-- 3. 获取建表的时间
select * from pg_stat_file('pg_hba.conf');

drop type if exists public.stat_file cascade;
create type public.stat_file as (
size bigint,
access timestamp(0),
modification timestamp(0),
change timestamp(0)
);
create or replace function public.get_file_stat(filename text) returns stat_file as $$
import os, time
size = None
access = None
modification = None
change = None

try:
    a = os.stat(filename)
    size = int(a.st_size)
    access = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_atime))
    modification = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_mtime))
    change = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_ctime))
except Exception, e:
    pass
return [size, access, modification, change]

$$ language plpythonu;

select * from get_file_stat('pg_hba.conf');

drop view if exists public.v_table_modify_time cascade;
create view public.v_table_modify_time as
select tab_oid, schemaname, tablename, (filestat).access, (filestat).modification, (filestat).change
from
(
select a.oid tab_oid, e.nspname as schemaname, a.relname as tablename,
get_file_stat(fselocation || '/' ||
case when reltablespace=1664 then 'global'
when reltablespace=0 then 'base' || '/' || d.oid
else reltablespace || '/' || d.oid
end
|| '/' || relfilenode) as filestat
from pg_class a, pg_tablespace b, pg_filespace_entry c, pg_namespace e, pg_database d
where d.datname=current_database()
and (case when a.reltablespace = 0 then 1663 else a.reltablespace end)=b.oid
and b.spcfsoid=c.fsefsoid
and e.oid=a.relnamespace
and c.fsedbid=1
and a.relstorage in ('a', 'h')
and a.relkind='r'
) t;

select 'drop table ' || schemaname || '.' || tablename || ' cascade;'
from v_table_modify_time where access < now() - '1 days'::interval and tablename not like '%_1_prt_p%'
and schemaname='public' order by access;

-- 自定义类型转换
select castfunc::regprocedure from pg_cast where castsource='text'::regtype and casttarget='date'::regtype;

-- select '20180526'::date;
-- select date('20180526');
-- select date('2018-05-26');
-- select cast('2018-05-26' as date);

create or replace function public.regclass2text(a regclass) returns text as $$
return a;
$$ language plpythonu;
drop cast if exists (regclass as text) cascade;
create cast(regclass as text) with function regclass2text(a regclass);

select 57377::regclass::text;

create view v_gp_configuration as
select content
from gp_segment_configuration a, pg_filespace_entry b, pg_filespace create
where a.dbid=b.fsedbid and b.fsefsoid=c.oid and c.fsname='pg_system';

https://www.cnblogs.com/someblue/p/4225694.html
https://blog.csdn.net/menggudaoke/article/details/78843749

posted on 2018-06-05 07:43  binarylei  阅读(742)  评论(0编辑  收藏  举报

导航