greenplum数据库基本操作

2023-03-20 00:00:00 创建数据库示例分区表分布

1.greenplum数据库创建分布键
greenplum数据库定义分布键有两种方式，一种是建表时定义，另一种是用alter修改分布键
如果不在建表时定义分布键或修改分布键，默认为表个字段。
1、建表分布键跟存储类型一起设定
2、修改test表分布键：
alter table test set distributed by (id)

2.设定存储类型
数据库存储类型分为行存储row和列存储column
greenplum数据库如果不设定存储类型，默认heap表，只有行存储，
如果设定存储类型，就会设置为AO表，包括行存储和列存储,
在建表时定义存储类型和分布键：

CREATE TABLE "public"."student" (
"name" varchar(255) COLLATE "pg_catalog"."default" DEFAULT NULL,
"age" int4 DEFAULT NULL,
"id" int4 NOT NULL DEFAULT NULL
)
// 设置行存储，id为分布键
WITH (appendonly=true, orientation=row) distributed by (id);
// 设置列存储，id为分布键
WITH (appendonly=true, orientation=column) distributed by (id);

3.快速新增数据
代码如下（示例）：

insert into student(id,name,age，create_dt) select generate_series(1,1000),'name',12,'2021-02-20';

4.创建分区表
1、

// 创建子表继承student
CREATE TABLE student_2021_02_20 () inherits (student);
// 增加表校验字段，create_dt = ‘2021-02-20’
ALTER TABLE student_2021_02_20
ADD CONSTRAINT almart_2021_02_20_check_date_key
CHECK (create_dt = '2021_02_20'::date);

CREATE INDEX almart_date_key_2021_02_20
ON student_2021_02_20 (create_dt);

2、

// 追加分区表
alter table public.student add partition p_order_detail_adt_20170601 START ('2017-05-01') END ('2017-06-01')
// 删除分区表
alter table public.student DROP partition p_order_detail_adt_20170601;
drop table tablename cascade;

3、创建表时创建分区表

CREATE TABLE "public"."student" (
"create_dt" date DEFAULT NULL,
"id" int4 NOT NULL DEFAULT NULL
)
WITH (APPENDONLY=true, COMPRESSLEVEL=1, ORIENTATION=column, COMPRESSTYPE=rle_type)
DISTRIBUTED by(id)
partition by range (create_dt)
(
partition "2021_" start ('2021-02-01') end ('2021-02-11') every (1),
default partition def
)

DROP TABLE IF EXISTS "public"."order_pickup_time";
CREATE TABLE "public"."order_pickup_time" (
"id" int8 DEFAULT NULL,
"order_id" int8 DEFAULT NULL,
"start_pickup_time" timestamp(6) DEFAULT NULL
)
distributed by (id)
partition by range(start_pickup_time)
(
partition "2021_" start ('2020-02-01') end ('2020-02-28') every (INTERVAL '1 day'),
default partition def
)
;

4、多级分区

create table "public"."student"
(
id int,
province varchar(64),
city varchar(64),
district varchar(64),
fdate date DEFAULT NULL
)
distributed by (id)
// fdate是一级分区，province是二级分区
partition by range(fdate)
subpartition by list(province)
subpartition template
(
subpartition c1 values ('黑龙江'),
subpartition c2 values ('辽宁')
)
(
partition "2021_" start ('2021-02-01') end ('2021-02-11') every (1),
default partition def
)

4.分页查询
代码如下（示例）：

// 查询1~1000条数据
SELECT FROM student ORDER BY name LIMIT 1000 OFFSET 0

4.时间函数
代码如下（示例）：

select date '2012-05-12 18:54:54'; 2012-05-12
select date (now() + interval '-1 month'); 2021-03-05 -> 2021-02-05
select to_char(now(), 'yyyy-mm-dd')

5.copy表结构
代码如下（示例）：

// 复制表结构，但是不能复制非空，主键和分布键
// 复制student表结构和数据
CREATE TABLE student_copy as SELECT * from student;
// 复制student表结构
CREATE TABLE student_copy as SELECT * from student WHERE 0 = 1;
// 只复制表结构(默认复制非空约束)，没有数据
CREATE TABLE student_copy (like student);
// 如果希望索引、主键约束和唯-一约束被复制的话，那么需要在后面加上参数including indexes（大小写无关）
CREATE TABLE student_copy (like student including indexes);

6.id自增
代码如下（示例）：

CREATE SEQUENCE order_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;

alter table order alter column id set default nextval('order_id_seq');

7.设置search_path

这个结构schema是ide_database,默认的查找路径是模块public，
如果你想新建一个模块ide_test,默认jdbc连接使用ide_test，就需要设置search_path

代码如下（示例）：

1）连接greenplum
C:\Windows\system32>psql -h 1.2.345.678 -p 5432 -d tpc_1 -U gpuser
2）查看search_path
tpc_1# show search_path;
"$user".public
3）修改search_path
tpc_1=# alter database tpc_1 set search_path to "$user", ide_test;

8.删除无用表空间
全数据库（很慢）
VACUUM ANALYZE；

分表执行
VACUUM ANALYZE table1;

查询库占用磁盘大小
select pg_size_pretty(pg_database_size('MyDatabase'));

查询表占用磁盘大小
普通表
select pg_size_pretty(pg_relation_size('relation_name')) ;

查看所有表占用的表空间
SELECT
table_schema || '.' || table_name AS table_full_name,
pg_size_pretty(pg_total_relation_size('"' || table_schema || '"."' || table_name || '"')) AS size
FROM information_schema.tables
ORDER BY
pg_total_relation_size('"' || table_schema || '"."' || table_name || '"') DESC;

9.greenplum invalid byte sequence for encoding “UTF8”: 0x00
// 将字段进行替换
REPLACE(create_branch_name,'\0','') create_branch_name

10.优化
//关闭优化器
gpconfig -c optimizer -v off
// 查看关联优化器
gpconfig --show enable_mergejoin
// 打开优化器
gpconfig -c enable_mergejoin -v on
// 查看全部参数
psql -c 'show all' -d postgres

11.gpfdist数据导入
目标：将mysql数据导入到greenplum数据库
1、mysql数据导出csv文件，将文件上传到服务器/home/gpadmin/datafile/test.csv
2、gp服务器启动gpfdist
$ gpfdist -d /home/gpadmin/datafile/ -p 8001 &
3、设置gpfdist大小
gpfdist -d /home/gpadmin/datafile/ -m 268435456 -p 8001 &
4、创建外部表关联到csv文件
create external table test(id int,name VARCHAR(30),age int)location(‘gpfdist://192.168.1.234:8001/drugs.csv’)format’CSV’;
5、导入数据
insert into gp_test select * from test;

删除外部表
drop external table test;

12.创建数据库
1、列举数据库，相当于mysql的show databases
\l
列举表，相当于mysql的show tables
\dt
查看表结构，相当于desc tblname,show columns from tbname
\d tblname
gp常用语句
https://blog.csdn.net/u010457406/article/details/75647167
2、create database devdw; 创建数据库
3、导出ddl语句
pg_dump -E UTF8 -s -v ide_warn_test -n public>ide_warn_test.ddl -
4、导入ddl语句
nohup psql -d “ide_warm” -f /home/gpadmin/ddl/ide_warn_test.ddl >ide_warm.log &

13.重启数据库
1、重新加载配置
gpstop -u
2、启动gp
gpstart
3、停止gp
gpstop
4、重启gp
gpstop -r

本文来源：https://blog.csdn.net/weixin_37603672/article/details/114136641

相关文章