Greenplum常用SQL查询
抽空网上收罗Greenplum常用SQL查询语句整理备忘。欢迎各位留言补充。都是SQL命令以及数据字典的使用。熟悉数据字典非常重要。三个重要的schema:pg_catalog,pg_toolkit,information_schema,其中information_schema 中的数据字典都在视图中
目录
一、查看表某模式所有分布键信息
二、 数据库运行状态查询管理
1.greenplum查询正在运行的sql,session
2.终止执行的sql
3.查看greemplum资源队列状态
4.查看greemplum资源队列锁
5.查看greemplum资源队列优先级
6.查看greemplum所有连接 类似mysql SHOW PROCESSLIST
7.greemplum磁盘使用,通过SQL查看Greenplum中用了多少空间
8.查看greemplum节点状态
9.节点故障等历史信息
10.数据倾斜
11.greemplum表或索引大小 (占用空间)
12.greemplum表和索引大小(占用空间)
13.greemplum查看指定数据库大小(占用空间)
14.greemplum所有数据库大小(占用空间)
15.查看greemplum数据分布情况
三、查源数据
16.查看greemplum数据表更新时间
17.通过sql 获取greemplum表的预估数据量
18.通过sql 获取greemplum获取分布键
19.通过sql获取 greemplum指定表结构
20.显示哪些没有统计信息且可能需要ANALYZE的表
21.显示在系统表中被标记为掉线的Segment的信息
22.显示库中表的大小(单位G)
23.查询一个库中有多少表(如果有分区表不列出子分区)
24.查询某个用户对某个表有什么权限
25.查看分区表的信息
26.导入数据
27.远程导入数据
28.导数据指定分隔符(和mysql的select into outfile很像)
29.生成授权语句
30 .给用户授权
31、改变postgreSQL的默认schema
一、查看表某模式所有分布键信息
SELECT
aaa.nspname AS "模式名",
aaa.relname AS "表名",
aaa.table_comment AS "中文表明",
ccc.attname AS "分布键"
FROM
(
SELECT aa.oid,
obj_description (aa.oid) AS table_comment,
aa.relname,
bb.localoid,
bb.attrnums,
regexp_split_to_table (
array_to_string (bb.attrnums, ','),
','
) att,
dd.nspname FROM pg_class aa --原数据信息 重要的表!
LEFT
JOIN gp_distribution_policy bb ON bb.localoid = aa.oid --分布键表
LEFT
JOIN pg_namespace dd ON dd.oid = aa.relnamespace --模式
LEFT
JOIN pg_inherits hh ON aa.oid = hh.inhrelid --继承表
WHERE dd.nspname = 'dim' -- 替换成需要的模式
AND hh.inhrelid IS NULL
) aaa
LEFT JOIN pg_attribute ccc ON ccc.attrelid = aaa.oid
AND ccc.attnum = aaa.att
WHERE
ccc.attnum >
ORDER BY
aaa.relname;
二、 数据库运行状态查询管理
1.greenplum查询正在运行的sql,session
-- 方法1:
SELECT
tt.procpid, -- pid
usename user_name, -- 执行的用户
backend_start, -- 会话开始时间
query_start, -- 查询开始时间
waiting, -- 是否等待执行
now() - query_start AS current_query_time, -- 累计执行时间
now() - backend_start AS current_session_time,*/
current_query,
client_addr , datname
FROM
pg_stat_activity tt
WHERE current_query != '<IDLE>'
ORDER BY current_query_time DESC;
-- 方法2(通过视图查)
SELECT
procpid,
START,
now() - START AS lap,
current_query,
-- count() over() count_num,
t2.rolname,t3.rsqname,
ip
FROM
(
SELECT
backendid,
pg_stat_get_backend_userid(S.backendid) as uid,
pg_stat_get_backend_client_addr(S.backendid) as ip,
pg_stat_get_backend_pid (S.backendid) AS procpid,
pg_stat_get_backend_activity_start (S.backendid) AS START,
pg_stat_get_backend_activity (S.backendid) AS current_query
FROM
(
SELECT
pg_stat_get_backend_idset () AS backendid
) AS S
) AS t1 left join pg_authid t2 on t1.uid=t2.oid
left join pg_resqueue t3 on t2.rolresqueue=t3.oid
WHERE
current_query!= '<IDLE>'
ORDER BY lap DESC;
-- 方法3(限定了角色和资源队列,查当前账号正在查询的语句)
SELECT
rolname,
rsqname,
pid,
GRANTED,
current_query,
datname
FROM
pg_roles t1,
gp_toolkit.gp_resqueue_status t2 ,
pg_locks t3 ,
pg_stat_activity t4
WHERE
t1.rolresqueue = t3.objid
AND t3.objid=t2.queueid
and t4.procpid=t3.pid
2.终止执行的sql
select pg_terminate_backend(48988); --pid
3.查看greemplum资源队列状态
SELECT * FROM gp_toolkit.gp_resqueue_status;
4.查看greemplum资源队列锁
SELECT * FROM gp_toolkit.gp_locks_on_resqueue WHERE lorwaiting='true';
5.查看greemplum资源队列优先级
select * from gp_toolkit.gp_resq_priority_statement;
6.查看greemplum所有连接 类似mysql SHOW PROCESSLIST
select * from pg_stat_activity; -- 所有状态的连接
7.greemplum磁盘使用,通过SQL查看Greenplum中用了多少空间
select datname,pg_size_pretty(pg_database_size(datname)) from pg_database;
8.查看greemplum节点状态
select * from gp_segment_configuration tt
select * from gp_segment_configuration tt where tt.status='d'; -- 状态为down
9.节点故障等历史信息
select * from gp_configuration_history tt order by 1 desc ;
10.数据倾斜
SELECT
t1.gp_segment_id,
t1.count_tatol,
round(t1.count_tatol-(AVG(t1.count_tatol) over()) ,)
FROM
(
SELECT
gp_segment_id,
COUNT (*) count_tatol
FROM
<tablename> -- 要查的表
GROUP BY
gp_segment_id
) t1
order by 3
11.greemplum表或索引大小 (占用空间)
select pg_size_pretty(pg_relation_size('gp_test'));
12.greemplum表和索引大小(占用空间)
select pg_size_pretty(pg_total_relation_size('gp_test'));
13.greemplum查看指定数据库大小(占用空间)
select pg_size_pretty(pg_database_size('postgres'));
14.greemplum所有数据库大小(占用空间)
select datname,pg_size_pretty(pg_database_size(datname)) from pg_database;
15.查看greemplum数据分布情况
select gp_segment_id,count(*) from gp_test group by gp_segment_id order by 1;
三、查源数据
16.查看greemplum数据表更新时间
SELECT
*
FROM
pg_stat_last_operation,
pg_class
WHERE
objid = oid
AND relname = 'base_common'; -- 表名
17.通过sql 获取greemplum表的预估数据量
select
relname,
reltuples::int as total
from
pg_class
where
relname = 'base_common'
and relnamespace = (select oid from pg_namespace where nspname = 'positions');
18.通过sql 获取greemplum获取分布键
SELECT string_agg(att.attname,',' order by attrnums) as distribution
FROM gp_distribution_policy a,pg_attribute att
WHERE a.localoid ='bi_data.schoolmate_relations'::regclass
and a.localoid = att.attrelid
and att.attnum = any(a.attrnums);
19.通过sql获取 greemplum指定表结构
SELECT
attname, typname
FROM
pg_attribute
INNER JOIN pg_class ON pg_attribute.attrelid = pg_class.oid
INNER JOIN pg_type ON pg_attribute.atttypid = pg_type.oid
INNER JOIN pg_namespace on pg_class.relnamespace=pg_namespace.oid --
WHERE
pg_attribute.attnum >
AND attisdropped <> 't'
AND pg_namespace.nspname='resumes'
AND pg_class.relname= 'base_common'
-- and pg_class.relname ~~* any(array['%some%', '%someelse']));
order by pg_attribute.attnum
20.显示哪些没有统计信息且可能需要ANALYZE的表
SELECT * from gp_toolkit.gp_stats_missing ;
21.显示在系统表中被标记为掉线的Segment的信息
SELECT * from gp_toolkit.gp_pgdatabase_invalid;
22.显示库中表的大小(单位G)
SELECT sotdoid,sotdsize/1024/1024/1024 as sotdsize,sotdtoastsize,sotdadditionalsize,sotdschemaname,sotdtablename from gp_toolkit.gp_size_of_table_disk order by sotdsize desc;
23.查询一个库中有多少表(如果有分区表不列出子分区)
SELECT relname from pg_class a,pg_namespace b where relname not like '%prt%' and relkind ='r' and a.relnamespace=b.oid and nspname not in ('pg_catalog','information_schema','gp_toolkit') and nspname not like '%pg_temp%';
24.查询某个用户对某个表有什么权限
select * from INFORMATION_SCHEMA.role_table_grants where grantee='user_name' and table_name='table';
25.查看分区表的信息
SELECT tablename,partitiontablename,partitiontype,partitionboundary from pg_partitions where tablename='table_name' order by partitionboundary desc;
26.导入数据
copy t1 from '/home/gpadmin/t1.txt' with delimiter '|' LOG ERRORS INTO INSERT_ERRS SEGMENT REJECT LIMIT 100;
27.远程导入数据
psql -h 1.1.1.1 -U user_name -d db_name -W -c "copy tb1 from stdin with delimiter '|'" < /home/gpadmin/tb1.txt
28.导数据指定分隔符(和mysql的select into outfile很像)
psql -d db_name -c "select * from tb1" -o tb1.txt -t -A -F $'/t'
29.生成授权语句
SELECT 'grant select on '||relname||' to user_name;' from pg_class a,pg_namespace b where relname not like '%prt%' and relkind ='r' and has_table_privilege('user_name',a.oid,'select')='f' and a.relnamespace=b.oid and nspname not in ('pg_catalog','information_schema','gp_toolkit') and nspname not like '%pg_temp%';
30 .给用户授权
select 'grant all on SCHEMA ' || tt.autnspname || ' to tuser;' as grant_script from gp_toolkit.__gp_user_tables tt -- group by tt.autnspname
union -- all
select 'grant all on table ' || tt.autnspname || '.' ||tt.autrelname || ' to tuser;' grant_script from gp_toolkit.__gp_user_tables tt;
31、改变postgreSQL的默认schema
-- Use this to show the current search_path
-- Should return: "$user",public
SHOW search_path;
-- Create another schema
CREATE SCHEMA my_schema;
GRANT ALL ON SCHEMA my_schema TO my_user;
-- To change search_path on a connection-level
SET search_path TO my_schema;
-- To change search_path on a database-level
ALTER database "my_database" SET search_path TO my_schema;
相关文章