史上最全PostgreSQL DBA最常用SQL



建立视图, 方便查询

create schema dba;  

create view dba.invalid_index as select indisvalid, indexrelid::regclass, indrelid::regclass, pg_get_indexdef(indexrelid) from pg_index where not indisvalid;
create view dba.ro_conflicts as select datname,pg_stat_get_db_conflict_all(oid) conflict_all,pg_stat_get_db_conflict_bufferpin(oid) conflict_bufferpin,pg_stat_get_db_conflict_lock(oid) conflict_lock,pg_stat_get_db_conflict_snapshot(oid) conflict_snapshot,pg_stat_get_db_conflict_startup_deadlock(oid) conflict_deadlock,pg_stat_get_db_conflict_tablespace(oid) conflict_tbs from pg_database;

create or replace procedure dba.tps() as $$
  v1 int8;
  v2 int8;
  select txid_snapshot_xmax(txid_current_snapshot()) into v1;
  perform pg_sleep(1);
  select txid_snapshot_xmax(txid_current_snapshot()) into v2;
  raise notice 'tps: %', v2-v1;
$$ language plpgsql ;
-- 在主节点查询
create view dba.ro_delay as select application_name,client_addr,client_port,write_lag,replay_lag,sync_state from pg_stat_replication ;  
-- 在standby节点执行, 检查replay比receive的延迟
create view dba.node_delay as select * from pg_size_pretty(pg_wal_lsn_diff(pg_last_wal_receive_lsn(),pg_last_wal_replay_lsn())) as t(delay);
-- 在standby节点执行, 检查receiver接收wal比上游产生wal的延迟. 
create view dba.ro_delay_on_standby as select pg_size_pretty(pg_wal_lsn_diff(latest_end_lsn , received_lsn)) from pg_stat_wal_receiver;
-- 在standby节点执行, 接收wal的速度。
create or replace procedure dba.wal_receive_bw()
 language plpgsql
as $procedure$
  v1 pg_lsn;
  v2 pg_lsn;
  select pg_last_wal_receive_lsn() into v1;
  perform pg_sleep(1);
  select pg_last_wal_receive_lsn() into v2;
  raise notice 'wal receive bw: %/s', pg_size_pretty(pg_wal_lsn_diff(v2,v1));

-- 在standby节点执行, replay wal的速度。 
create or replace procedure dba.wal_replay_bw()
 language plpgsql
as $procedure$
  v1 pg_lsn;
  v2 pg_lsn;
  select pg_last_wal_replay_lsn() into v1;
  perform pg_sleep(1);
  select pg_last_wal_replay_lsn() into v2;
  raise notice 'wal replay bw: %/s', pg_size_pretty(pg_wal_lsn_diff(v2,v1));

create view dba.topsql as select calls,total_time,total_time/calls,query from pg_stat_statements where query !~ 'rds' order by total_time desc limit 5;  
create view dba.qps as with                                                 
a as (select sum(calls) s from pg_stat_statements),     
b as (select sum(calls) s from pg_stat_statements , pg_sleep(1))     
b.s-a.s          -- qps    
from a,b;   
create view dba.session_acting_cnt as select count(*) from pg_stat_activity where wait_event is not null and (backend_xid is not null or backend_xmin is not null);  
create view dba.sessions as select * from pg_stat_activity where wait_event is not null and (backend_xid is not null or backend_xmin is not null);  
create view dba.locks as with      
t_wait as      
  select a.mode,a.locktype,a.database,a.relation,a.page,a.tuple,a.classid,a.granted,     
    from pg_locks a,pg_stat_activity b where a.pid=b.pid and not a.granted     
t_run as     
  select a.mode,a.locktype,a.database,a.relation,a.page,a.tuple,a.classid,a.granted,     
    from pg_locks a,pg_stat_activity b where a.pid=b.pid and a.granted     
t_overlap as     
  select r.* from t_wait w join t_run r on     
    r.locktype is not distinct from w.locktype and     
    r.database is not distinct from w.database and     
    r.relation is not distinct from w.relation and     
    r.page is not distinct from w.page and     
    r.tuple is not distinct from w.tuple and     
    r.virtualxid is not distinct from w.virtualxid and     
    r.transactionid is not distinct from w.transactionid and     
    r.classid is not distinct from w.classid and     
    r.objid is not distinct from w.objid and     
    r.objsubid is not distinct from w.objsubid and     
    r.pid <> w.pid     
t_unionall as      
  select r.* from t_overlap r      
  union all      
  select w.* from t_wait w      
select locktype,datname,relation::regclass,page,tuple,virtualxid,transactionid::text,classid::regclass,objid,objsubid,     
'pid: '||case when pid is null then 'null' else pid::text end||chr(10)||     
'lock_granted: '||case when granted is null then 'null' else granted::text end||' , mode: '||case when mode is null then 'null' else mode::text end||' , fastpath: '||case when fastpath is null then 'null' else fastpath::text end||' , virtualtransaction: '||case when virtualtransaction is null then 'null' else virtualtransaction::text end||' , session_state: '||case when state is null then 'null' else state::text end||chr(10)||     
'username: '||case when usename is null then 'null' else usename::text end||' , database: '||case when datname is null then 'null' else datname::text end||' , client_addr: '||case when client_addr is null then 'null' else client_addr::text end||' , client_port: '||case when client_port is null then 'null' else client_port::text end||' , application_name: '||case when application_name is null then 'null' else application_name::text end||chr(10)||      
'xact_start: '||case when xact_start is null then 'null' else xact_start::text end||' , query_start: '||case when query_start is null then 'null' else query_start::text end||' , xact_elapse: '||case when (now()-xact_start) is null then 'null' else (now()-xact_start)::text end||' , query_elapse: '||case when (now()-query_start) is null then 'null' else (now()-query_start)::text end||chr(10)||      
'sql (current sql in transaction): '||chr(10)||    
case when query is null then 'null' else query::text end,      
order by      
  (  case mode      
    when 'invalid' then 0     
    when 'accesssharelock' then 1     
    when 'rowsharelock' then 2     
    when 'rowexclusivelock' then 3     
    when 'shareupdateexclusivelock' then 4     
    when 'sharelock' then 5     
    when 'sharerowexclusivelock' then 6     
    when 'exclusivelock' then 7     
    when 'accessexclusivelock' then 8     
    else 0     
  end  ) desc,     
  (case when granted then 0 else 1 end)    
) as lock_conflict    
from t_unionall     
group by     
locktype,datname,relation,page,tuple,virtualxid,transactionid::text,classid,objid,objsubid ;    
create view dba.top10sizetable as   
select schemaname,tablename,pg_size_pretty(pg_relation_size((quote_ident(schemaname)||'.'||quote_ident(tablename))::regclass)) from pg_tables order by pg_relation_size((quote_ident(schemaname)||'.'||quote_ident(tablename))::regclass) desc limit 10;  
create view dba.top10sizeindex as   
select schemaname,tablename,indexname,pg_size_pretty(pg_relation_size((quote_ident(schemaname)||'.'||quote_ident(indexname))::regclass)) from pg_indexes order by pg_relation_size((quote_ident(schemaname)||'.'||quote_ident(indexname))::regclass) desc limit 10;  
create view dba.top10sizetableindex as   
select schemaname,tablename,pg_size_pretty(pg_total_relation_size((quote_ident(schemaname)||'.'||quote_ident(tablename))::regclass)) from pg_tables order by pg_total_relation_size((quote_ident(schemaname)||'.'||quote_ident(tablename))::regclass) desc limit 10;  
create view dba.top10updatetable as  
select schemaname,relname,n_tup_upd,n_tup_del,round(n_tup_hot_upd/(case when n_tup_upd=0 then 1.0 else n_tup_upd::numeric end),4) from pg_stat_all_tables order by n_tup_upd+n_tup_del desc limit 10;  
create view dba.top10inserttable as  
select schemaname,relname,n_tup_ins from pg_stat_all_tables order by n_tup_ins desc limit 10;  
create view dba.top10deadtable as  
select schemaname,relname,n_dead_tup from pg_stat_all_tables order by n_dead_tup desc limit 10;  
create view dba.top10age as  
select relnamespace::regnamespace,relname,pg_size_pretty(pg_relation_size(oid)),age(relfrozenxid) from pg_class where relkind='r' and relnamespace<>'pg_catalog'::regnamespace and relnamespace<>'information_schema'::regnamespace order by age(relfrozenxid) desc,pg_relation_size(oid) desc limit 10;  
-- 膨胀点查询
create view dba.oldestxact as
select datname,usename,xact_start,query_start,backend_xid,backend_xmin,
now()-xact_start as old_ts,
txid_current()-least(backend_xid::text::int8,backend_xmin::text::int8) as old_xacts,
from pg_stat_activity 
where ltrim(lower(query),' ') !~ '^vacuum'
and not (query ~ 'autovacuum' and backend_type <>'client backend')
order by least(backend_xid::text::int8,backend_xmin::text::int8) limit 1;
-- 查询膨胀空间top 10的表  
create view dba.top10bloatsizetable as  
  current_database() as db, schemaname, tablename, reltuples::bigint as tups, relpages::bigint as pages, otta,  
  round(case when otta=0 or sml.relpages=0 or sml.relpages=otta then 0.0 else sml.relpages/otta::numeric end,1) as tbloat,  
  case when relpages < otta then 0 else relpages::bigint - otta end as wastedpages,  
  case when relpages < otta then 0 else bs*(sml.relpages-otta)::bigint end as wastedbytes,  
  case when relpages < otta then '0 bytes'::text else pg_size_pretty((bs*(relpages-otta))::bigint) end as wastedsize,  
  iname, ituples::bigint as itups, ipages::bigint as ipages, iotta,  
  round(case when iotta=0 or ipages=0 or ipages=iotta then 0.0 else ipages/iotta::numeric end,1) as ibloat,  
  case when ipages < iotta then 0 else ipages::bigint - iotta end as wastedipages,  
  case when ipages < iotta then 0 else bs*(ipages-iotta) end as wastedibytes,  
  case when ipages < iotta then '0 bytes' else pg_size_pretty((bs*(ipages-iotta))::bigint) end as wastedisize,  
  pg_size_pretty(case when relpages < otta then  
    case when ipages < iotta then 0 else bs*(ipages-iotta::bigint) end  
    else case when ipages < iotta then bs*(relpages-otta::bigint)  
      else bs*(relpages-otta::bigint + ipages-iotta::bigint) end  
  end) as totalwastedbytes  
from (  
    nn.nspname as schemaname,  
    cc.relname as tablename,  
    coalesce(cc.reltuples,0) as reltuples,  
    coalesce(cc.relpages,0) as relpages,  
    coalesce(bs,0) as bs,  
      (case when datahdr%ma=0 then ma else datahdr%ma end))+nullhdr2+4))/(bs-20::float)),0) as otta,  
    coalesce(c2.relname,'?') as iname, coalesce(c2.reltuples,0) as ituples, coalesce(c2.relpages,0) as ipages,  
    coalesce(ceil((c2.reltuples*(datahdr-12))/(bs-20::float)),0) as iotta -- very rough approximation, assumes all cols  
     pg_class cc  
  join pg_namespace nn on cc.relnamespace = nn.oid and nn.nspname <> 'information_schema'  
  left join  
      (datawidth+(hdr+ma-(case when hdr%ma=0 then ma else hdr%ma end)))::numeric as datahdr,  
      (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 then ma else nullhdr%ma end))) as nullhdr2  
    from (  
        ns.nspname, tbl.relname, hdr, ma, bs,  
        sum((1-coalesce(null_frac,0))*coalesce(avg_width, 2048)) as datawidth,  
        max(coalesce(null_frac,0)) as maxfracsum,  
          select 1+count(*)/8  
          from pg_stats s2  
          where null_frac<>0 and s2.schemaname = ns.nspname and s2.tablename = tbl.relname  
        ) as nullhdr  
      from pg_attribute att  
      join pg_class tbl on att.attrelid = tbl.oid  
      join pg_namespace ns on ns.oid = tbl.relnamespace  
      left join pg_stats s on s.schemaname=ns.nspname  
      and s.tablename = tbl.relname  
      and s.inherited=false  
      and s.attname=att.attname,  
          (select current_setting('block_size')::numeric) as bs,  
            case when substring(split_part(v, ' ', 2) from '#"[0-9]+.[0-9]+#"%' for '#')  
              in ('8.0','8.1','8.2') then 27 else 23 end as hdr,  
          case when v ~ 'mingw32' or v ~ '64-bit' then 8 else 4 end as ma  
        from (select version() as v) as foo  
      ) as constants  
      where att.attnum > 0 and tbl.relkind='r'  
      group by 1,2,3,4,5  
    ) as foo  
  ) as rs  
  on cc.relname = rs.relname and nn.nspname = rs.nspname  
  left join pg_index i on indrelid = cc.oid  
  left join pg_class c2 on c2.oid = i.indexrelid  
) as sml order by wastedbytes desc limit 5;  
-- 查询膨胀空间top 10的索引  
create view dba.top10bloatsizeindex as  
  current_database() as db, schemaname, tablename, reltuples::bigint as tups, relpages::bigint as pages, otta,  
  round(case when otta=0 or sml.relpages=0 or sml.relpages=otta then 0.0 else sml.relpages/otta::numeric end,1) as tbloat,  
  case when relpages < otta then 0 else relpages::bigint - otta end as wastedpages,  
  case when relpages < otta then 0 else bs*(sml.relpages-otta)::bigint end as wastedbytes,  
  case when relpages < otta then '0 bytes'::text else pg_size_pretty((bs*(relpages-otta))::bigint) end as wastedsize,  
  iname, ituples::bigint as itups, ipages::bigint as ipages, iotta,  
  round(case when iotta=0 or ipages=0 or ipages=iotta then 0.0 else ipages/iotta::numeric end,1) as ibloat,  
  case when ipages < iotta then 0 else ipages::bigint - iotta end as wastedipages,  
  case when ipages < iotta then 0 else bs*(ipages-iotta) end as wastedibytes,  
  case when ipages < iotta then '0 bytes' else pg_size_pretty((bs*(ipages-iotta))::bigint) end as wastedisize,  
  pg_size_pretty(case when relpages < otta then  
    case when ipages < iotta then 0 else bs*(ipages-iotta::bigint) end  
    else case when ipages < iotta then bs*(relpages-otta::bigint)  
      else bs*(relpages-otta::bigint + ipages-iotta::bigint) end  
  end) as totalwastedbytes  
from (  
    nn.nspname as schemaname,  
    cc.relname as tablename,  
    coalesce(cc.reltuples,0) as reltuples,  
    coalesce(cc.relpages,0) as relpages,  
    coalesce(bs,0) as bs,  
      (case when datahdr%ma=0 then ma else datahdr%ma end))+nullhdr2+4))/(bs-20::float)),0) as otta,  
    coalesce(c2.relname,'?') as iname, coalesce(c2.reltuples,0) as ituples, coalesce(c2.relpages,0) as ipages,  
    coalesce(ceil((c2.reltuples*(datahdr-12))/(bs-20::float)),0) as iotta -- very rough approximation, assumes all cols  
     pg_class cc  
  join pg_namespace nn on cc.relnamespace = nn.oid and nn.nspname <> 'information_schema'  
  left join  
      (datawidth+(hdr+ma-(case when hdr%ma=0 then ma else hdr%ma end)))::numeric as datahdr,  
      (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 then ma else nullhdr%ma end))) as nullhdr2  
    from (  
        ns.nspname, tbl.relname, hdr, ma, bs,  
        sum((1-coalesce(null_frac,0))*coalesce(avg_width, 2048)) as datawidth,  
        max(coalesce(null_frac,0)) as maxfracsum,  
          select 1+count(*)/8  
          from pg_stats s2  
          where null_frac<>0 and s2.schemaname = ns.nspname and s2.tablename = tbl.relname  
        ) as nullhdr  
      from pg_attribute att  
      join pg_class tbl on att.attrelid = tbl.oid  
      join pg_namespace ns on ns.oid = tbl.relnamespace  
      left join pg_stats s on s.schemaname=ns.nspname  
      and s.tablename = tbl.relname  
      and s.inherited=false  
      and s.attname=att.attname,  
          (select current_setting('block_size')::numeric) as bs,  
            case when substring(split_part(v, ' ', 2) from '#"[0-9]+.[0-9]+#"%' for '#')  
              in ('8.0','8.1','8.2') then 27 else 23 end as hdr,  
          case when v ~ 'mingw32' or v ~ '64-bit' then 8 else 4 end as ma  
        from (select version() as v) as foo  
      ) as constants  
      where att.attnum > 0 and tbl.relkind='r'  
      group by 1,2,3,4,5  
    ) as foo  
  ) as rs  
  on cc.relname = rs.relname and nn.nspname = rs.nspname  
  left join pg_index i on indrelid = cc.oid  
  left join pg_class c2 on c2.oid = i.indexrelid  
) as sml order by wastedibytes desc limit 5;  
-- 查询膨胀比例top 10的表(浪费空间大于10mb的表)  
create view dba.top10bloatratiotable as  
  current_database() as db, schemaname, tablename, reltuples::bigint as tups, relpages::bigint as pages, otta,  
  round(case when otta=0 or sml.relpages=0 or sml.relpages=otta then 0.0 else sml.relpages/otta::numeric end,1) as tbloat,  
  case when relpages < otta then 0 else relpages::bigint - otta end as wastedpages,  
  case when relpages < otta then 0 else bs*(sml.relpages-otta)::bigint end as wastedbytes,  
  case when relpages < otta then '0 bytes'::text else pg_size_pretty((bs*(relpages-otta))::bigint) end as wastedsize,  
  iname, ituples::bigint as itups, ipages::bigint as ipages, iotta,  
  round(case when iotta=0 or ipages=0 or ipages=iotta then 0.0 else ipages/iotta::numeric end,1) as ibloat,  
  case when ipages < iotta then 0 else ipages::bigint - iotta end as wastedipages,  
  case when ipages < iotta then 0 else bs*(ipages-iotta) end as wastedibytes,  
  case when ipages < iotta then '0 bytes' else pg_size_pretty((bs*(ipages-iotta))::bigint) end as wastedisize,  
  pg_size_pretty(case when relpages < otta then  
    case when ipages < iotta then 0 else bs*(ipages-iotta::bigint) end  
    else case when ipages < iotta then bs*(relpages-otta::bigint)  
      else bs*(relpages-otta::bigint + ipages-iotta::bigint) end  
  end) as totalwastedbytes  
from (  
    nn.nspname as schemaname,  
    cc.relname as tablename,  
    coalesce(cc.reltuples,0) as reltuples,  
    coalesce(cc.relpages,0) as relpages,  
    coalesce(bs,0) as bs,  
      (case when datahdr%ma=0 then ma else datahdr%ma end))+nullhdr2+4))/(bs-20::float)),0) as otta,  
    coalesce(c2.relname,'?') as iname, coalesce(c2.reltuples,0) as ituples, coalesce(c2.relpages,0) as ipages,  
    coalesce(ceil((c2.reltuples*(datahdr-12))/(bs-20::float)),0) as iotta -- very rough approximation, assumes all cols  
     pg_class cc  
  join pg_namespace nn on cc.relnamespace = nn.oid and nn.nspname <> 'information_schema'  
  left join  
      (datawidth+(hdr+ma-(case when hdr%ma=0 then ma else hdr%ma end)))::numeric as datahdr,  
      (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 then ma else nullhdr%ma end))) as nullhdr2  
    from (  
        ns.nspname, tbl.relname, hdr, ma, bs,  
        sum((1-coalesce(null_frac,0))*coalesce(avg_width, 2048)) as datawidth,  
        max(coalesce(null_frac,0)) as maxfracsum,  
          select 1+count(*)/8  
          from pg_stats s2  
          where null_frac<>0 and s2.schemaname = ns.nspname and s2.tablename = tbl.relname  
        ) as nullhdr  
      from pg_attribute att  
      join pg_class tbl on att.attrelid = tbl.oid  
      join pg_namespace ns on ns.oid = tbl.relnamespace  
      left join pg_stats s on s.schemaname=ns.nspname  
      and s.tablename = tbl.relname  
      and s.inherited=false  
      and s.attname=att.attname,  
          (select current_setting('block_size')::numeric) as bs,  
            case when substring(split_part(v, ' ', 2) from '#"[0-9]+.[0-9]+#"%' for '#')  
              in ('8.0','8.1','8.2') then 27 else 23 end as hdr,  
          case when v ~ 'mingw32' or v ~ '64-bit' then 8 else 4 end as ma  
        from (select version() as v) as foo  
      ) as constants  
      where att.attnum > 0 and tbl.relkind='r'  
      group by 1,2,3,4,5  
    ) as foo  
  ) as rs  
  on cc.relname = rs.relname and nn.nspname = rs.nspname  
  left join pg_index i on indrelid = cc.oid  
  left join pg_class c2 on c2.oid = i.indexrelid  
) as sml   
where (case when relpages < otta then 0 else bs*(sml.relpages-otta)::bigint end) >= 10240000  
order by tbloat desc,wastedbytes desc limit 5;  
-- 查询膨胀比例top 10的索引(浪费空间大于10mb的索引)  
create view dba.top10bloatratioindex as  
  current_database() as db, schemaname, tablename, reltuples::bigint as tups, relpages::bigint as pages, otta,  
  round(case when otta=0 or sml.relpages=0 or sml.relpages=otta then 0.0 else sml.relpages/otta::numeric end,1) as tbloat,  
  case when relpages < otta then 0 else relpages::bigint - otta end as wastedpages,  
  case when relpages < otta then 0 else bs*(sml.relpages-otta)::bigint end as wastedbytes,  
  case when relpages < otta then '0 bytes'::text else pg_size_pretty((bs*(relpages-otta))::bigint) end as wastedsize,  
  iname, ituples::bigint as itups, ipages::bigint as ipages, iotta,  
  round(case when iotta=0 or ipages=0 or ipages=iotta then 0.0 else ipages/iotta::numeric end,1) as ibloat,  
  case when ipages < iotta then 0 else ipages::bigint - iotta end as wastedipages,  
  case when ipages < iotta then 0 else bs*(ipages-iotta) end as wastedibytes,  
  case when ipages < iotta then '0 bytes' else pg_size_pretty((bs*(ipages-iotta))::bigint) end as wastedisize,  
  pg_size_pretty(case when relpages < otta then  
    case when ipages < iotta then 0 else bs*(ipages-iotta::bigint) end  
    else case when ipages < iotta then bs*(relpages-otta::bigint)  
      else bs*(relpages-otta::bigint + ipages-iotta::bigint) end  
  end) as totalwastedbytes  
from (  
    nn.nspname as schemaname,  
    cc.relname as tablename,  
    coalesce(cc.reltuples,0) as reltuples,  
    coalesce(cc.relpages,0) as relpages,  
    coalesce(bs,0) as bs,  
      (case when datahdr%ma=0 then ma else datahdr%ma end))+nullhdr2+4))/(bs-20::float)),0) as otta,  
    coalesce(c2.relname,'?') as iname, coalesce(c2.reltuples,0) as ituples, coalesce(c2.relpages,0) as ipages,  
    coalesce(ceil((c2.reltuples*(datahdr-12))/(bs-20::float)),0) as iotta -- very rough approximation, assumes all cols  
     pg_class cc  
  join pg_namespace nn on cc.relnamespace = nn.oid and nn.nspname <> 'information_schema'  
  left join  
      (datawidth+(hdr+ma-(case when hdr%ma=0 then ma else hdr%ma end)))::numeric as datahdr,  
      (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 then ma else nullhdr%ma end))) as nullhdr2  
    from (  
        ns.nspname, tbl.relname, hdr, ma, bs,  
        sum((1-coalesce(null_frac,0))*coalesce(avg_width, 2048)) as datawidth,  
        max(coalesce(null_frac,0)) as maxfracsum,  
          select 1+count(*)/8  
          from pg_stats s2  
          where null_frac<>0 and s2.schemaname = ns.nspname and s2.tablename = tbl.relname  
        ) as nullhdr  
      from pg_attribute att  
      join pg_class tbl on att.attrelid = tbl.oid  
      join pg_namespace ns on ns.oid = tbl.relnamespace  
      left join pg_stats s on s.schemaname=ns.nspname  
      and s.tablename = tbl.relname  
      and s.inherited=false  
      and s.attname=att.attname,  
          (select current_setting('block_size')::numeric) as bs,  
            case when substring(split_part(v, ' ', 2) from '#"[0-9]+.[0-9]+#"%' for '#')  
              in ('8.0','8.1','8.2') then 27 else 23 end as hdr,  
          case when v ~ 'mingw32' or v ~ '64-bit' then 8 else 4 end as ma  
        from (select version() as v) as foo  
      ) as constants  
      where att.attnum > 0 and tbl.relkind='r'  
      group by 1,2,3,4,5  
    ) as foo  
  ) as rs  
  on cc.relname = rs.relname and nn.nspname = rs.nspname  
  left join pg_index i on indrelid = cc.oid  
  left join pg_class c2 on c2.oid = i.indexrelid  
) as sml   
where (case when ipages < iotta then 0 else bs*(ipages-iotta) end) >= 10240000  
order by ibloat desc,wastedibytes desc limit 5;  
create view dba.seqs as select max_value-last_value,* from pg_sequences order by max_value-last_value ;  

-- 查询没有使用过的大于1mb的索引 top 10 (注意, pk、uk如果只是用于约束, 可能不会被统计计数,但是不能删掉)    
create view dba.top10notusedidx as     
select pg_size_pretty(pg_relation_size(indexrelid)),* from pg_stat_all_indexes where pg_relation_size(indexrelid)>=1024000 and (idx_scan=0 or idx_tup_read=0 or idx_tup_fetch=0)    
and schemaname not in ('pg_toast','pg_catalog') order by pg_relation_size(indexrelid) desc limit 10;    
-- 查询没有使用过的大于1mb的表 top 10     
create view dba.top10notusedtab as     
select pg_size_pretty(pg_relation_size(relid)),* from pg_stat_all_tables     
where pg_relation_size(relid)>=1024000 and seq_scan=0 and idx_scan=0 and schemaname not in ('pg_toast','pg_catalog','information_schema') order by pg_relation_size(relid) desc limit 10;    
-- 查询热表top 10    
create view dba.top10hottab as     
select pg_size_pretty(pg_relation_size(relid)),* from pg_stat_all_tables where schemaname not in ('pg_toast','pg_catalog','information_schema') order by seq_scan+idx_scan desc, pg_relation_size(relid) desc limit 10;      
-- 查询大于1mb的冷表top 10    
create view dba.top10coldtab as     
select pg_size_pretty(pg_relation_size(relid)),* from pg_stat_all_tables where schemaname not in ('pg_toast','pg_catalog','information_schema') and pg_relation_size(relid)>=1024000 order by seq_scan+idx_scan,pg_relation_size(relid) desc   limit 10;      
-- 查询热索引top 10    
create view dba.top10hotidx as     
select pg_size_pretty(pg_relation_size(indexrelid)),* from pg_stat_all_indexes where     
schemaname not in ('pg_toast','pg_catalog') order by idx_scan+idx_tup_read+idx_tup_fetch desc, pg_relation_size(indexrelid) desc limit 10;    
-- 查询大于1mb的冷索引top 10(注意, pk、uk如果只是用于约束, 可能不会被统计计数,但是不能删掉)    
create view dba.top10coldidx as     
select pg_size_pretty(pg_relation_size(indexrelid)),* from pg_stat_all_indexes where pg_relation_size(indexrelid)>=1024000     
and schemaname not in ('pg_toast','pg_catalog') order by idx_scan+idx_tup_read+idx_tup_fetch , pg_relation_size(indexrelid) desc limit 10;    
-- freeze风暴预测相关的3个视图
create view dba.v_freeze as    
  current_setting('autovacuum_freeze_max_age')::int as v1,            -- 如果表的事务id年龄大于该值, 即使未开启autovacuum也会强制触发freeze, 并告警preventing transaction id wraparound failures    
  current_setting('autovacuum_multixact_freeze_max_age')::int as v2,  -- 如果表的并行事务id年龄大于该值, 即使未开启autovacuum也会强制触发freeze, 并告警preventing transaction id wraparound failures    
  current_setting('vacuum_freeze_min_age')::int as v3,                -- 手动或自动垃圾回收时, 如果记录的事务id年龄大于该值, 将被freeze    
  current_setting('vacuum_multixact_freeze_min_age')::int as v4,      -- 手动或自动垃圾回收时, 如果记录的并行事务id年龄大于该值, 将被freeze    
  current_setting('vacuum_freeze_table_age')::int as v5,              -- 手动垃圾回收时, 如果表的事务id年龄大于该值, 将触发freeze. 该参数的上限值为 %95 autovacuum_freeze_max_age    
  current_setting('vacuum_multixact_freeze_table_age')::int as v6,    -- 手动垃圾回收时, 如果表的并行事务id年龄大于该值, 将触发freeze. 该参数的上限值为 %95 autovacuum_multixact_freeze_max_age    
  current_setting('autovacuum_vacuum_cost_delay') as v7,              -- 自动垃圾回收时, 每轮回收周期后的一个休息时间, 主要防止垃圾回收太耗资源. -1 表示沿用vacuum_cost_delay的设置    
  current_setting('autovacuum_vacuum_cost_limit') as v8,              -- 自动垃圾回收时, 每轮回收周期设多大限制, 限制由vacuum_cost_page_hit,vacuum_cost_page_missvacuum_cost_page_dirty参数以及周期内的操作决定. -1 表示沿用vacuum_cost_limit的设置    
  current_setting('vacuum_cost_delay') as v9,                         -- 手动垃圾回收时, 每轮回收周期后的一个休息时间, 主要防止垃圾回收太耗资源.    
  current_setting('vacuum_cost_limit') as v10,                        -- 手动垃圾回收时, 每轮回收周期设多大限制, 限制由vacuum_cost_page_hit,vacuum_cost_page_missvacuum_cost_page_dirty参数以及周期内的操作决定.    
  current_setting('autovacuum') as autovacuum                         -- 是否开启自动垃圾回收    
) a,     
lateral (   -- lateral 允许你在这个subquery中直接引用前面的table, subquery中的column     
pg_size_pretty(pg_total_relation_size(oid)) sz,   -- 表的大小(含toast, 索引)    
oid::regclass as reloid,    -- 表名(物化视图)    
relkind,                    -- r=表, m=物化视图    
    substring(reloptions::text, 'autovacuum_freeze_max_age=(\d+)')::int,     
    substring(reloptions::text, 'autovacuum_freeze_table_age=(\d+)')::int     
age(case when relfrozenxid::text::int<3 then null else relfrozenxid end)     
as remain_ages_xid,   -- 再产生多少个事务后, 自动垃圾回收会触发freeze, 起因为事务id    
    substring(reloptions::text, 'autovacuum_multixact_freeze_max_age=(\d+)')::int,     
    substring(reloptions::text, 'autovacuum_multixact_freeze_table_age=(\d+)')::int     
age(case when relminmxid::text::int<3 then null else relminmxid end)     
as remain_ages_mxid,  -- 再产生多少个事务后, 自动垃圾回收会触发freeze, 起因为并发事务id    
    substring(reloptions::text, 'autovacuum_freeze_min_age=(\d+)')::int    
) as xid_lower_to_minage,    -- 如果触发freeze, 该表的事务id年龄会降到多少    
    substring(reloptions::text, 'autovacuum_multixact_freeze_min_age=(\d+)')::int    
) as mxid_lower_to_minage,   -- 如果触发freeze, 该表的并行事务id年龄会降到多少    
  when v5 <= age(case when relfrozenxid::text::int<3 then null else relfrozenxid end) then 'yes'    
  else 'not'    
end as vacuum_trigger_freeze1,    -- 如果手工执行vacuum, 是否会触发freeze, 触发起因(事务id年龄达到阈值)    
  when v6 <= age(case when relminmxid::text::int<3 then null else relminmxid end) then 'yes'    
  else 'not'    
end as vacuum_trigger_freeze2,    -- 如果手工执行vacuum, 是否会触发freeze, 触发起因(并行事务id年龄达到阈值)    
reloptions                        -- 表级参数, 优先. 例如是否开启自动垃圾回收, autovacuum_freeze_max_age, autovacuum_freeze_table_age, autovacuum_multixact_freeze_max_age, autovacuum_multixact_freeze_table_age    
from pg_class     
  where relkind in ('r','m')    
) e     
order by     
  least(e.remain_ages_xid , e.remain_ages_mxid),  -- 排在越前, 越先触发自动freeze, 即风暴来临的预测    
  pg_total_relation_size(reloid) desc   -- 同样剩余年龄, 表越大, 排越前    

create view dba.v_freeze_stat as    
wb,                                                     -- 第几个batch, 每个batch代表流逝100万个事务     
cnt,                                                    -- 这个batch 有多少表    
pg_size_pretty(ssz) as ssz1,                            -- 这个batch 这些 表+toast+索引 有多少容量    
pg_size_pretty(ssz) as ssz2,                            -- 这个batch freeze 会导致多少读io    
pg_size_pretty(ssz*3) as ssz3,                          -- 这个batch freeze 最多可能会导致多少写io (通常三份 : 数据文件, wal full page, wal)    
pg_size_pretty(min_sz) as ssz4,                         -- 这个batch 最小的表多大    
pg_size_pretty(max_sz) as ssz5,                         -- 这个batch 最大的表多大    
pg_size_pretty(avg_sz) as ssz6,                         -- 这个batch 平均表多大    
pg_size_pretty(stddev_sz) as ssz7,                      -- 这个batch 表大小的方差, 越大, 说明表大小差异化明显    
min_rest_age,                                           -- 这个batch 距离自动freeze最低剩余事务数    
max_rest_age,                                           -- 这个batch 距离自动freeze最高剩余事务数    
stddev_rest_age,                                        -- 这个batch 距离自动freeze剩余事务数的方差, 越小,说明这个batch触发freeze将越平缓, 越大, 说明这个batch将有可能在某些点集中触发freeze (但是可能集中触发的都是小表)    
corr_rest_age_sz,                                       -- 表大小与距离自动freeze剩余事务数的相关性,相关性越强(值趋向1或-1) stddev_rest_age 与 sz7 说明的问题越有价值    
round(100*(ssz/(sum(ssz) over ())), 2)||' %' as ratio   -- 这个batch的容量占比,占比如果非常不均匀,说明有必要调整表级freeze参数,让占比均匀化    
select a.*, b.* from     
  min(least(remain_ages_xid, remain_ages_mxid)) as v_min,   -- 整个数据库中离自动freeze的 最小 剩余事务id数    
  max(least(remain_ages_xid, remain_ages_mxid)) as v_max    -- 整个数据库中离自动freeze的 最大 剩余事务id数    
from v_freeze    
) as a,    
lateral (  -- 高级sql    
  least(remain_ages_xid, remain_ages_mxid),     
  greatest((a.v_max-a.v_min)/1000000, 1)   -- 100万个事务, 如果要更改统计例如,修改这个值即可    
) as wb,      
count(*) as cnt,     
sum(pg_total_relation_size(reloid)) as ssz,     
stddev_samp(pg_total_relation_size(reloid) order by least(remain_ages_xid, remain_ages_mxid)) as stddev_sz,     
min(pg_total_relation_size(reloid)) as min_sz,     
max(pg_total_relation_size(reloid)) as max_sz,     
avg(pg_total_relation_size(reloid)) as avg_sz,     
min(least(remain_ages_xid, remain_ages_mxid)) as min_rest_age,     
max(least(remain_ages_xid, remain_ages_mxid)) as max_rest_age,     
stddev_samp(least(remain_ages_xid, remain_ages_mxid) order by least(remain_ages_xid, remain_ages_mxid)) as stddev_rest_age,     
corr(least(remain_ages_xid, remain_ages_mxid), pg_total_relation_size(reloid)) as corr_rest_age_sz     
from v_freeze     
group by wb     
) as b     
) t     
order by wb; 

create view dba.v_freeze_stat_detail as      
pg_size_pretty(t.ssz) as ssz2,     -- 这个batch freeze 会导致多少读io (表+toast+索引)    
pg_size_pretty(t.ssz*3) as ssz3,   -- 这个batch freeze 最多可能会导致多少写io (通常三份 : 数据文件, wal full page, wal)    
pg_size_pretty(t.ssz_sum) as ssz4, -- 所有batch 所有表的总大小  (表+toast+索引)    
round(100*(t.ssz/t.ssz_sum), 2)||' %' as ratio_batch,     -- 这个batch的容量占比,目标是让所有batch占比尽量一致    
round(100*(pg_total_relation_size(t.reloid)/t.ssz), 2)||' %' as ratio_table,     -- 这个表占整个batch的容量占比,大表尽量错开freeze    
select a.*, b.* from       
    min(least(remain_ages_xid, remain_ages_mxid)) as v_min,   -- 整个数据库中离自动freeze的 最小 剩余事务id数    
    max(least(remain_ages_xid, remain_ages_mxid)) as v_max    -- 整个数据库中离自动freeze的 最大 剩余事务id数    
  from v_freeze     
) as a,     
lateral (     -- 高级sql    
  count(*) over w as cnt,                                                -- 这个batch 有多少表      
  sum(pg_total_relation_size(reloid)) over () as ssz_sum,                -- 所有batch 所有表的总大小  (表+toast+索引)    
  sum(pg_total_relation_size(reloid)) over w as ssz,                     -- 这个batch 的表大小总和 (表+toast+索引)    
  pg_size_pretty(min(pg_total_relation_size(reloid)) over w) as min_sz,  -- 这个batch 最小的表多大    
  pg_size_pretty(max(pg_total_relation_size(reloid)) over w) as max_sz,  -- 这个batch 最大的表多大    
  pg_size_pretty(avg(pg_total_relation_size(reloid)) over w) as avg_sz,  -- 这个batch 平均表多大    
  pg_size_pretty(stddev_samp(pg_total_relation_size(reloid)) over w) as stddev_sz,  -- 这个batch 表大小的方差, 越大, 说明表大小差异化明显                                                                                                                 
  min(least(remain_ages_xid, remain_ages_mxid)) over w as min_rest_age,             -- 这个batch 距离自动freeze最低剩余事务数                                                                                                                             
  max(least(remain_ages_xid, remain_ages_mxid)) over w as max_rest_age,             -- 这个batch 距离自动freeze最高剩余事务数                                                                                                                             
  stddev_samp(least(remain_ages_xid, remain_ages_mxid)) over w as stddev_rest_age,  -- 这个batch 距离自动freeze剩余事务数的方差, 越小,说明这个batch触发freeze将越平缓, 越大, 说明这个batch将有可能在某些点集中触发freeze (但是可能集中触发的都是小表)    
  corr(least(remain_ages_xid, remain_ages_mxid), pg_total_relation_size(reloid)) over w as corr_rest_age_sz,  -- 表大小与距离自动freeze剩余事务数的相关性,相关性越强(值趋向1或-1) stddev_rest_age 与 stddev_sz 说明的问题越有价值    
      least(tt.remain_ages_xid, tt.remain_ages_mxid),     
      greatest((a.v_max-a.v_min)/1000000, 1)         -- 100万个事务, 如果要更改统计例如,修改这个值即可    
    as wb,                                           -- 第几个batch, 每个batch代表流逝100万个事务      
    * from v_freeze tt    
  ) as t1      
  window w as     
    partition by t1.wb     
) as b    
) t    
order by     
  least(t.remain_ages_xid, t.remain_ages_mxid),       
  pg_total_relation_size(t.reloid) desc       
create view dba.top20freezebigtable as 
select relowner::regrole, relnamespace::regnamespace, relname, 
age(relfrozenxid),pg_size_pretty(pg_total_relation_size(oid)) , -- 当前年龄 
    substring(reloptions::text, 'autovacuum_freeze_max_age=(\d+)')::int,     
    substring(reloptions::text, 'autovacuum_freeze_table_age=(\d+)')::int     
age(case when relfrozenxid::text::int<3 then null else relfrozenxid end)     
as remain_ages_xid,  -- 再产生多少个事务后, 自动垃圾回收会触发freeze, 起因为事务id
    substring(reloptions::text, 'autovacuum_freeze_min_age=(\d+)')::int    
) as xid_lower_to_minage    -- 如果触发freeze, 该表的事务id年龄会降到多少  
from pg_class where relkind='r' order by pg_total_relation_size(oid) desc limit 20; 

-- 未归档wal文件
create view dba.arch_undone as 
select * from pg_ls_archive_statusdir() where name !~ 'done$';

-- 归档任务状态
create view dba.arch_status as
select * from pg_stat_get_archiver();

-- wal空间占用
create view dba.walsize as 
select pg_size_pretty(sum(size)) from pg_ls_waldir();

-- 复制槽状态(是否有未使用复制槽, 可能导致wal日志目录暴涨(不清理))
create view dba.repslots as 
select * from pg_replication_slots ;

-- 系统强制保留wal大小
create view dba.wal_keep_size as
with a as (select setting from pg_settings where name='wal_keep_segments') , b as (select setting,unit from pg_settings where name='wal_segment_size') select pg_size_pretty(a.setting::int8*b.setting::int8) from a,b;

-- 系统动态检查点最大wal保留大小
create view dba.max_wal_size as
select setting||' '||unit from pg_settings where name='max_wal_size';
-- 长事务、prepared statement
create view dba.long_snapshot as 
with a as (select min(transaction::text::int8) m from pg_prepared_xacts ),
b as (select txid_snapshot_xmin(txid_current_snapshot())::text::int8 as m),
c as (select min(least(backend_xid::text::int8,backend_xmin::text::int8)) m from pg_stat_activity ),
d as (select datname,usename,pid,query_start,xact_start,now(),wait_event,query from pg_stat_activity where backend_xid is not null or backend_xmin is not null
order by least(backend_xid::text::int8,backend_xmin::text::int8) limit 1),
e as (select * from pg_prepared_xacts order by transaction::text::int8 limit 1)
select b.m-least(a.m,c.m),d.*,e.* from a,b,c,d left join e on (1=1);



-- 在主节点查询
select * from dba.ro_delay;  

-- 在只读节点查询
set lock_timeout='10ms';
set statement_timeout='2s';
select * from dba.node_delay;
select * from dba.ro_delay_on_standby;

2、查询top query, 优化之首

select * from dba.topsql;  

3、重置top query统计计数器(通常在高峰期来临前可以重置,防止结果干扰)

select pg_stat_statements_reset();  

4、查询 qps , 在psql 终端可以每秒打印一次

select * from dba.qps;  
\watch 1  


call dba.tps();

5、查询活跃会话数, 如果超过cpu核数, 说明数据库非常非常繁忙, 需要注意优化

select * from dba.session_acting_cnt;  


select * from dba.sessions;  

7、查询锁等待, 如果有大量长时间等待, 需要注意业务逻辑是否有问题

select * from dba.locks;

8、查询占用空间top 10的表

select * from dba.top10sizetable; 

9、查询占用空间top 10的索引

select * from dba.top10sizeindex;

10、查询占用空间top 10的表(含索引)

select * from dba.top10sizetableindex; 

11、查询膨胀空间top 10的表

select * from dba.top10bloatsizetable;

12、查询膨胀空间top 10的索引

select * from dba.top10bloatsizeindex;

13、查询膨胀比例top 10的表

select * from dba.top10bloatratiotable; 

14、查询膨胀比例top 10的索引

select * from dba.top10bloatratioindex;

15、查询更新和删除记录条数top 10的表

select * from dba.top10updatetable;

16、查询插入记录条数top 10的表

select * from dba.top10inserttable; 

17、查询脏记录条数top 10的表

select * from dba.top10deadtable;

18、查询年龄top 10的表

select * from dba.top10age;

19、查询当前的最老事务距离当前时间、距离当前事务数, 说明膨胀空间大小, 越大可能导致越多膨胀垃圾.

select * from dba.oldestxact; select * from pg_prepared_xacts;


select * from dba.seqs; 

21、postgresql 谁堵塞了谁(锁等待检测)- pg_blocking_pids

《postgresql 谁堵塞了谁(锁等待检测)- pg_blocking_pids》

22、查询没有使用过的大于1mb的索引 top 10 (注意, pk、uk如果只是用于约束, 可能不会被统计计数,但是不能删掉)

select * from dba.top10notusedidx;

23、查询没有使用过的大于1mb的表 top 10

select * from dba.top10notusedtab;

24、查询热表top 10

select * from dba.top10hottab;  

25、查询大于1mb的冷表top 10

select * from dba.top10coldtab;   

26、查询热索引top 10

select * from dba.top10hotidx;  

27、查询大于1mb的冷索引top 10(注意, pk、uk如果只是用于约束, 可能不会被统计计数,但是不能删掉)

select * from dba.top10coldidx; 


select * from dba.v_freeze;

select * from dba.v_freeze_stat;

select * from dba.v_freeze_stat_detail;

查询top 20的大表大freeze剩余年龄。

select * from dba.top20freezebigtable;
-- 结合dba.tps, 可以通过remain_ages_xid/dba.tps估算每个表还有多久会发生freeze.
call dba.tps();

29、查询ro节点读与replay冲突次数, 建议高频恢复中的ro节点不要跑长sql。

select * from  dba.ro_conflicts;

30、dba在ro 节点人为执行sql前, 建议设置sql超时, 避免长时间跑 sql, 导致不必要的replay延迟和 conflict cancel statement

set statement_timeout ='1s';
set lock_timeout='10ms';

31、ro 节点的conflict容忍时间最长设置, 默认为5 min

show max_standby_streaming_delay ;
(1 row)


\df *.*reset*
                                              list of functions
   schema   |                  name                  |     result data type     | argument data types | type 
 pg_catalog | pg_replication_origin_session_reset    | void                     |                     | func
 pg_catalog | pg_replication_origin_xact_reset       | void                     |                     | func
 pg_catalog | pg_stat_get_bgwriter_stat_reset_time   | timestamp with time zone |                     | func
 pg_catalog | pg_stat_get_db_stat_reset_time         | timestamp with time zone | oid                 | func
 pg_catalog | pg_stat_reset                          | void                     |                     | func
 pg_catalog | pg_stat_reset_shared                   | void                     | text                | func
 pg_catalog | pg_stat_reset_single_function_counters | void                     | oid                 | func
 pg_catalog | pg_stat_reset_single_table_counters    | void                     | oid                 | func
 public     | pg_stat_statements_reset               | void                     |                     | func
(9 rows)

33、在standby节点执行, 检查当前standby节点接收wal的速度

call dba.wal_receive_bw();

34、在standby节点执行, 检查当前standby节点replay wal的速度

call dba.wal_replay_bw();


select * from dba.arch_undone;

select * from dba.arch_status;

select * from dba.walsize;

select * from dba.repslots;

select * from dba.wal_keep_size;

select * from dba.max_wal_size;

36、长事务、prepared statement

select * from dba.long_snapshot;


select * from dba.invalid_index;


《postgresql 实时健康监控 大屏 - 低频指标 - 珍藏级》
《postgresql 实时健康监控 大屏 - 高频指标(服务器) - 珍藏级》
《postgresql 实时健康监控 大屏 - 高频指标 - 珍藏级》
《postgresql freeze 风暴预测续 - 珍藏级sql》

