HashData实战系列—云原生的支持地理信息数据分析
空间数据库的诞生
随着大数据技术的发展,急剧增长的地理空间大数据已成为海量数据处理的重要组成。地理空间大数据是指带有地理坐标的数据,包括资源、环境、经济和社会等领域一切带有地理坐标的数据,是地理实体的空间特征和属性特征的数字描述,应用前景多样。HashData通过将PostGIS用作插件,引入对空间数据类型、空间索引和空间函数的支持,成为了强大的空间数据库。在此基础上结合对象存储和OSS协议的特性,实现了raster/shapefile/netcdf 等格式的空间数据并行导入数据库。
空间数据库的替代优势
HashData 支持分布式PostGIS
作为典型的MPP(大规模并行处理Massively Parallel Processor)数据库代表,HashData充分利用其强大的计算能力满足了空间数据处理复杂计算的需求。在主流以PostGIS 为基础的数据库中,通常情况下可以利用PostGIS插件工具(raster2pgsql、shp2pgsql)将本地GIS数据转为SQL文件,将SQL文件导入数据库的表中进行存储和管理。这个过程的导入是通过数据库管理节点的使用(insert into ... values(...))方式串行导入数据。
HashData 空间数据导入示例
示例一:导入栅格数据格式
-Import Gis raster data to table:
CREATE READABLE EXTERNAL TABLE osstbl_example(filename text, rast raster, metadata text) LOCATION('oss://ossext-example.sh1a.qingstor.com/raster tile_size=100x100 oss_type=QS access_key_id=xxx secret_access_key=xxx') FORMAT 'raster';
SELECT filename, st_value(rast, 3, 4) from osstbl_example order by filename;
--Results of the raster
-- filename列说明
-- icg/gis/raster/test_input.tiff 是对象存储的文件路径。
-- tilenum 是当前切分的第几个瓦片。
-- xtile 表示坐标系x第几个瓦片。
-- ytile 表示坐标系y第几个瓦片。
-- tile_size 是当前切片大小。
filename | st_value
-----------------------------------------------------------------------------+------------------
icg/gis/raster/test_input.tiff tilenum:0 xtile:0 ytile:0 tile_size:100x100 | 260.100006103516
icg/gis/raster/test_input.tiff tilenum:1 xtile:1 ytile:0 tile_size:100x100 | 252.389999389648
icg/gis/raster/test_input.tiff tilenum:2 xtile:2 ytile:0 tile_size:100x100 | 255.429992675781
icg/gis/raster/test_input.tiff tilenum:3 xtile:3 ytile:0 tile_size:100x100 | 288.690002441406
icg/gis/raster/test_input.tiff tilenum:4 xtile:1 ytile:1 tile_size:100x100 | 280.169982910156
icg/gis/raster/test_input.tiff tilenum:5 xtile:2 ytile:1 tile_size:100x100 | 284.72998046875
icg/gis/raster/test_input.tiff tilenum:6 xtile:3 ytile:1 tile_size:100x100 | 301.100006103516
icg/gis/raster/test_input.tiff tilenum:7 xtile:1 ytile:2 tile_size:100x100 | 297.639984130859
icg/gis/raster/test_input.tiff tilenum:8 xtile:2 ytile:2 tile_size:100x100 | 301.940002441406
icg/gis/raster/test_output.tiff tilenum:0 xtile:0 ytile:0 tile_size:100x100 | 260.100006103516
icg/gis/raster/test_output.tiff tilenum:1 xtile:1 ytile:0 tile_size:100x100 | 252.389999389648
icg/gis/raster/test_output.tiff tilenum:2 xtile:2 ytile:0 tile_size:100x100 | 255.429992675781
icg/gis/raster/test_output.tiff tilenum:3 xtile:3 ytile:0 tile_size:100x100 | 288.690002441406
icg/gis/raster/test_output.tiff tilenum:4 xtile:1 ytile:1 tile_size:100x100 | 280.169982910156
icg/gis/raster/test_output.tiff tilenum:5 xtile:2 ytile:1 tile_size:100x100 | 284.72998046875
icg/gis/raster/test_output.tiff tilenum:6 xtile:3 ytile:1 tile_size:100x100 | 301.100006103516
icg/gis/raster/test_output.tiff tilenum:7 xtile:1 ytile:2 tile_size:100x100 | 297.639984130859
icg/gis/raster/test_output.tiff tilenum:8 xtile:2 ytile:2 tile_size:100x100 | 301.940002441406
Copy
--Create SQL Function:
CREATE OR REPLACE FUNCTION ogr_fdw_info(text) returns setof record as '$libdir/gpossext.so', 'Ogr_Fdw_Info' LANGUAGE C STRICT;
select * from ogr_fdw_info('oss://ossext-example.sh1a.qingstor.com/shape access_key_id=xxx secret_access_key=xxx oss_type=QS') AS tbl(name text, sqlq text);
--Results of the ogr_fdw_info SQL function
name | sqlq
----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
2launder | CREATE READABLE EXTERNAL TABLE shp_2launder (
: fid bigint,
: geom Geometry(Point,4326),
: n2ame varchar OPTIONS (column_name 2ame),
: age integer,
: height real,
: b_rthdate date OPTIONS (column_name b-rthdate)
: )
: LOCATION ('oss://ossext-example.sh1a.qingstor.com/shape access_key_id=xxx secret_access_key=xxx oss_type=QS layer=2launder')
: FORMAT 'Shapefile';
:
enc | CREATE READABLE EXTERNAL TABLE shp_enc (
: fid bigint,
: geom Geometry(Point,4326),
: name varchar,
: age integer,
: height real,
: birthdate date
: )
: LOCATION ('oss://ossext-example.sh1a.qingstor.com/shape access_key_id=xxx secret_access_key=xxx oss_type=QS layer=enc')
: FORMAT 'Shapefile';
:
pt_two | CREATE READABLE EXTERNAL TABLE shp_pt_two (
: fid bigint,
: geom Geometry(Point,4326),
: name varchar,
: age integer,
: height real,
: birthdate date
: )
: LOCATION ('oss://ossext-example.sh1a.qingstor.com/shape access_key_id=xxx secret_access_key=xxx oss_type=QS layer=pt_two')
: FORMAT 'Shapefile';
:
natural | CREATE READABLE EXTERNAL TABLE shp_natural (
: fid bigint,
: id real,
: natural varchar
: )
: LOCATION ('oss://ossext-example.sh1a.qingstor.com/shape access_key_id=xxx secret_access_key=xxx oss_type=QS layer=natural')
: FORMAT 'Shapefile';
:
(4 rows)
Copy
--Create shapefile table:
create readable external table shp_2launder (fid bigint, geom Geometry(Point,4326), name varchar, age integer, height real, birthdate date) location('oss://ossext-example.sh1a.qingstor.com/shape access_key_id=xxx secret_access_key=xxx oss_type=QS layer=2launder') format 'Shapefile';
SELECT * FROM shp_2launder;
--Results of the shapefile
fid | geom | name | age | height | birthdate
-----+----------------------------------------------------+-------+-----+--------+------------
0 | 0101000020E6100000C00497D1162CB93F8CBAEF08A080E63F | Peter | 45 | 5.6 | 04-12-1965
1 | 0101000020E610000054E943ACD697E2BFC0895EE54A46CF3F | Paul | 33 | 5.84 | 03-25-1971
(2 rows)
Copy
示例三:导入矢量数据格式
--Create SQL Function:
CREATE OR REPLACE FUNCTION nc_subdataset_info(text) returns setof record as '$libdir/gpossext.so', 'nc_subdataset_info' LANGUAGE C STRICT;
select * from nc_subdataset_info ('oss://ossext-example.sh1a.qingstor.com/netcdf/input.nc access_key_id=xxx secret_access_key=xxx oss_type=QS ') AS tbl(name text, sqlq text);
--Results of the netcdf SQL Function
name | sqlq
-------------------------+--------------------------------------------------------------------------------
icg/gis/netcdf/input.nc | SUBDATASET_1_NAME=NETCDF:"/vsiossext/netcdf/input.nc":TMP_P0_L103_GLL0
: SUBDATASET_1_DESC=[1x205x253] TMP_P0_L103_GLL0 (32-bit floating-point)
: SUBDATASET_2_NAME=NETCDF:"/vsiossext/netcdf/input.nc":initial_time0
: SUBDATASET_2_DESC=[1x18] initial_time0 (8-bit character)
:
Copy
--Create netcdf table:
CREATE READABLE EXTERNAL TABLE osstbl_netcdf(filename text, rast raster, metadata text) LOCATION('oss://ossext-example.sh1a.qingstor.com/netcdf/input.nc subdataset=1 access_key_id=xxx secret_access_key=xxx oss_type=QS') FORMAT 'netcdf';
SELECT filename, st_value(rast, 3, 4) from osstbl_netcdf order by filename;
--Results of the netcdf
filename | st_value
-----------------+------------------
netcdf/input.nc | 260.100006103516
(1 row)
小结