PostgreSQL统计与性能优化深度分析
基本关系级别的统计信息存储在pg_class系统目录的表中。统计数据包括以下数据:
关系的行数 ( reltuples)。 关系的页面大小 ( relpages)。 在关系的可见性图中标记的页面数(relallvisible)。
SELECT reltuples, relpages, relallvisible FROM pg_class WHERE relname = 'flights'; reltuples | relpages | relallvisible
−−−−−−−−−−−+−−−−−−−−−−+−−−−−−−−−−−−−−−
214867 | 2624 | 2624
(1 row)
EXPLAIN SELECT * FROM flights;
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Seq Scan on flights (cost=.00..4772.67 rows=214867 width=63)
(1 row)
CREATE TABLE pgccc_flights_copy(LIKE flights)
WITH (autovacuum_enabled = false);
SELECT reltuples, relpages, relallvisible
FROM pg_class WHERE relname = 'pgccc_flights_copy';
reltuples | relpages | relallvisible
−−−−−−−−−−−+−−−−−−−−−−+−−−−−−−−−−−−−−−
−1 | 0 | 0
(1 row)
EXPLAIN SELECT * FROM pgccc_flights_copy;
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Seq Scan on pgccc_flights_copy (cost=0.00..14.10 rows=410 width=170)
(1 row)
INSERT INTO pgccc_flights_copy SELECT * FROM flights;
INSERT 214867
ANALYZE pgccc_flights_copy;
SELECT reltuples, relpages, relallvisible
FROM pg_class WHERE relname = 'pgccc_flights_copy';
reltuples | relpages | relallvisible
−−−−−−−−−−−+−−−−−−−−−−+−−−−−−−−−−−−−−−
214867 | 2624 | 0
(1 row)
VACUUM pgccc_flights_copy; SELECT relallvisible FROM pg_class WHERE relname = 'pgccc_flights_copy';
relallvisible
−−−−−−−−−−−−−−−
2624
(1 row)
INSERT INTO pgccc_flights_copy SELECT * FROM flights;
SELECT count(*) FROM pgccc_flights_copy;
count
−−−−−−−−
429734
(1 row)
EXPLAIN SELECT * FROM pgccc_flights_copy;
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Seq Scan on pgccc_flights_copy (cost=0.00..9545.34 rows=429734 width=63)
(1 row)
SELECT reltuples, relpages
FROM pg_class WHERE relname = 'pgccc_flights_copy';
reltuples | relpages
−−−−−−−−−−−+−−−−−−−−−−
214867 | 2624
(1 row)
SELECT reltuples *
(pg_relation_size('flights_copy') / 8192) / relpages
FROM pg_class WHERE relname = 'pgccc_flights_copy';
?column?
−−−−−−−−−−
429734
(1 row)
空值
EXPLAIN SELECT * FROM flights WHERE actual_departure IS NULL;
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Seq Scan on flights (cost=0.00..4772.67 rows=16036 width=63)
Filter: (actual_departure IS NULL)
(2 rows)
SELECT round(reltuples * s.null_frac) AS rows
FROM pg_class
JOIN pg_stats s ON s.tablename = relname
WHERE s.tablename = 'flights'
AND s.attname = 'actual_departure';
rows
−−−−−−−
16036
(1 row)
Distinct values
ALTER TABLE ... ALTER COLUMN ... SET (n_distinct = ...);
EXPLAIN
SELECT * FROM flights WHERE departure_airport = (
SELECT airport_code FROM airports WHERE city = 'Saint Petersburg'
);
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Seq Scan on flights (cost=30.56..5340.40 rows=2066 width=63)
Filter: (departure_airport = $)
InitPlan 1 (returns $)
−> Seq Scan on airports_data ml (cost=.00..30.56 rows=1 wi...
Filter: ((city −>> lang()) = 'Saint Petersburg'::text)
(5 rows)
SELECT round(reltuples / s.n_distinct) AS rows
FROM pg_class
JOIN pg_stats s ON s.tablename = relname
WHERE s.tablename = 'flights'
AND s.attname = 'departure_airport';
rows
−−−−−−
2066
(1 row)
SELECT min(cnt), round(avg(cnt)) avg, max(cnt) FROM (
SELECT departure_airport, count(*) cnt
FROM flights GROUP BY departure_airport
) t;
min | avg | max
−−−−−+−−−−−−+−−−−−−−
113 | 2066 | 20875
(1 row)
常见的值
SELECT most_common_vals AS mcv,
left(most_common_freqs::text,60) || '...' AS mcf
FROM pg_stats
WHERE tablename = 'flights' AND attname = 'aircraft_code' \gx
−[ RECORD 1 ]−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
mcv | {CN1,CR2,SU9,321,763,733,319,773}
mcf | {0.2783,0.27473333,0.25816667,0.059233334,0.038533334,0.0370...
EXPLAIN SELECT * FROM flights WHERE aircraft_code = '733';
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Seq Scan on flights (cost=0.00..5309.84 rows=7957 width=63)
Filter: (aircraft_code = '733'::bpchar)
(2 rows)
SELECT round(reltuples * s.most_common_freqs[
array_position((s.most_common_vals::text::text[]),'733')
])
FROM pg_class
JOIN pg_stats s ON s.tablename = relname
WHERE s.tablename = 'flights'
AND s.attname = 'aircraft_code';
round
−−−−−−−
7957
(1 row)
ALTER TABLE ... ALTER COLUMN ... SET STATISTICS ...;
柱状图
SELECT left(histogram_bounds::text,60) || '...' AS histogram_bounds
FROM pg_stats s
WHERE s.tablename = 'boarding_passes' AND s.attname = 'seat_no';
histogram_bounds
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
{10B,10D,10D,10F,11B,11C,11H,12H,13B,14B,14H,15H,16D,16D,16H...
(1 row)
除其他外,直方图与 MCV 列表一起用于估计“大于”和“小于”操作的选择性。
示例:计算为后排座位签发的登机牌数量。
EXPLAIN SELECT * FROM boarding_passes WHERE seat_no > '30C';
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Seq Scan on boarding_passes (cost=.00..157353.30 rows=2943394 ...
Filter: ((seat_no)::text > '30C'::text)
(2 rows)
SELECT sum(s.most_common_freqs[
array_position((s.most_common_vals::text::text[]),v)
])
FROM pg_stats s, unnest(s.most_common_vals::text::text[]) v
WHERE s.tablename = 'boarding_passes' AND s.attname = 'seat_no'
AND v > '30C';
sum
−−−−−−−−
0.2127
(1 row)
SELECT sum(s.most_common_freqs[
array_position((s.most_common_vals::text::text[]),v)
])
FROM pg_stats s, unnest(s.most_common_vals::text::text[]) v
WHERE s.tablename = 'boarding_passes' AND s.attname = 'seat_no';
sum
−−−−−−−−
0.6762
(1 row)
SELECT s.null_frac
FROM pg_stats s
WHERE s.tablename = 'boarding_passes' AND s.attname = 'seat_no';
null_frac
−−−−−−−−−−−
0
(1 row)
SELECT round( reltuples * (
0.2127 -- from most common values
+ (1 - 0.6762 - ) * (49 / 100.0) -- from histogram
))
FROM pg_class WHERE relname = 'boarding_passes';
round
−−−−−−−−−
2943394
(1 row)
SELECT n_distinct FROM pg_stats
WHERE tablename = 'boarding_passes' AND attname = 'seat_no';
n_distinct
−−−−−−−−−−−−
461
(1 row)
非标量数据类型的统计
平均场宽度
SELECT attname, avg_width FROM pg_stats
WHERE (tablename, attname) IN ( VALUES
('tickets', 'passenger_name'), ('ticket_flights','fare_conditions')
);
attname | avg_width
−−−−−−−−−−−−−−−−−+−−−−−−−−−−−
fare_conditions | 8
passenger_name | 16
(2 rows)
相关性
SELECT attname, correlation
FROM pg_stats WHERE tablename = 'airports_data'
ORDER BY abs(correlation) DESC;
attname | correlation
−−−−−−−−−−−−−−+−−−−−−−−−−−−−
coordinates |
airport_code | −0.21120238
city | −0.1970127
airport_name | −0.18223621
timezone | 0.17961165
(5 rows)
表达统计
EXPLAIN SELECT * FROM flights
WHERE extract(
month FROM scheduled_departure AT TIME ZONE 'Europe/Moscow'
) = 1;
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Seq Scan on flights (cost=0.00..6384.17 rows=1074 width=63)
Filter: (EXTRACT(month FROM (scheduled_departure AT TIME ZONE ...
(2 rows)
SELECT round(reltuples * 0.005)
FROM pg_class WHERE relname = 'flights';
round
−−−−−−−
1074
(1 row)
SELECT count(*) AS total,
count(*) FILTER (WHERE extract(
month FROM scheduled_departure AT TIME ZONE 'Europe/Moscow'
) = 1) AS january
FROM flights;
total | january
−−−−−−−−+−−−−−−−−−
214867 | 16831
(1 row)
扩展表达式统计
CREATE STATISTICS flights_expr ON (extract(
month FROM scheduled_departure AT TIME ZONE 'Europe/Moscow'
))
FROM flights;
ANALYZE flights;
EXPLAIN SELECT * FROM flights
WHERE extract(
month FROM scheduled_departure AT TIME ZONE 'Europe/Moscow'
) = 1;
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Seq Scan on flights (cost=0.00..6384.17 rows=16222 width=63)
Filter: (EXTRACT(month FROM (scheduled_departure AT TIME ZONE ...
(2 rows)
SELECT left(expr,50) || '...' AS expr,
null_frac, avg_width, n_distinct,
most_common_vals AS mcv,
left(most_common_freqs::text,50) || '...' AS mcf,
correlation
FROM pg_stats_ext_exprs WHERE statistics_name = 'flights_expr' \gx
-[ RECORD 1 ]−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
expr | EXTRACT(month FROM (scheduled_departure AT TIME ZO...
null_frac |
avg_width | 8
n_distinct | 12
mcv | {8,9,3,5,12,4,10,7,11,1,6,2}
mcf | {0.12526667,0.11016667,0.07903333,0.07903333,0.078..
correlation | 0.095407926
ALTER STATISTICS flights_expr SET STATISTICS 42;
表达式索引统计
DROP STATISTICS flights_expr;
CREATE INDEX ON flights(extract(
month FROM scheduled_departure AT TIME ZONE 'Europe/Moscow'
)); => ANALYZE flights;
EXPLAIN SELECT * FROM flights WHERE extract(
month FROM scheduled_departure AT TIME ZONE 'Europe/Moscow'
) = 1;
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Bitmap Heap Scan on flights (cost=318.42..3235.96 rows=16774 wi...
Recheck Cond: (EXTRACT(month FROM (scheduled_departure AT TIME...
−> Bitmap Index Scan on flights_extract_idx (cost=0.00..314.2...
Index Cond: (EXTRACT(month FROM (scheduled_departure AT TI...
(4 rows)
SELECT n_distinct FROM pg_stats
WHERE tablename = 'flights_extract_idx';
n_distinct
−−−−−−−−−−−−
12
(1 row)
SELECT attname FROM pg_attribute
WHERE attrelid = 'flights_extract_idx'::regclass;
attname
−−−−−−−−−
extract
(1 row)
ALTER INDEX flights_extract_idx
ALTER COLUMN extract SET STATISTICS 42;
多元统计
SELECT count(*) FROM flights
WHERE flight_no = 'PG0007' AND departure_airport = 'VKO';
count
−−−−−−−
396
(1 row)
EXPLAIN SELECT * FROM flights
WHERE flight_no = 'PG0007' AND departure_airport = 'VKO';
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Bitmap Heap Scan on flights (cost=12.03..1238.70 rows=26 width=63)
Recheck Cond: (flight_no = 'PG0007'::bpchar)
Filter: (departure_airport = 'VKO'::bpchar)
−> Bitmap Index Scan on flights_flight_no_scheduled_departure_key
(cost=.00..12.02 rows=480 width=)
Index Cond: (flight_no = 'PG0007'::bpchar)
(6 rows)
CREATE STATISTICS flights_dep(dependencies)
ON flight_no, departure_airport FROM flights;
ANALYZE flights;
EXPLAIN SELECT * FROM flights
WHERE flight_no = 'PG0007' AND departure_airport = 'VKO';
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Bitmap Heap Scan on flights (cost=10.56..816.91 rows=276 width=63)
Recheck Cond: (flight_no = 'PG0007'::bpchar)
Filter: (departure_airport = 'VKO'::bpchar)
−> Bitmap Index Scan on flights_flight_no_scheduled_departure_key
(cost=.00..10.49 rows=276 width=)
Index Cond: (flight_no = 'PG0007'::bpchar)
(6 rows)
SELECT dependencies
FROM pg_stats_ext WHERE statistics_name = 'flights_dep';
dependencies
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
{"2 => 5": 1.000000, "5 => 2": .010567}
(1 row)
不同值的多元数量
SELECT count(*) FROM (
SELECT DISTINCT departure_airport, arrival_airport FROM flights
) t;
count
−−−−−−−
618
(1 row)
EXPLAIN
SELECT DISTINCT departure_airport, arrival_airport FROM flights;
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
HashAggregate (cost=5847.01..5955.16 rows=10816 width=8)
Group Key: departure_airport, arrival_airport
−> Seq Scan on flights (cost=0.00..4772.67 rows=214867 width=8)
(3 rows)
CREATE STATISTICS flights_nd(ndistinct)
ON departure_airport, arrival_airport FROM flights;
ANALYZE flights;
EXPLAIN
SELECT DISTINCT departure_airport, arrival_airport FROM flights;
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
HashAggregate (cost=5847.01..5853.19 rows=618 width=8)
Group Key: departure_airport, arrival_airport
−> Seq Scan on flights (cost=.00..4772.67 rows=214867 width=8)
(3 rows)
SELECT n_distinct
FROM pg_stats_ext WHERE statistics_name = 'flights_nd';
n_distinct
−−−−−−−−−−−−−−−
{"5, 6": 618}
(1 row)
多元常见值列表
SELECT count(*) FROM flights
WHERE departure_airport = 'SVO' AND aircraft_code = '733'
count
−−−−−−−
2037
(1 row)
EXPLAIN SELECT * FROM flights
WHERE departure_airport = 'SVO' AND aircraft_code = '733';
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Seq Scan on flights (cost=0.00..5847.00 rows=733 width=63)
Filter: ((departure_airport = 'SVO'::bpchar) AND (aircraft_cod...
(2 rows)
CREATE STATISTICS flights_mcv(mcv)
ON departure_airport, aircraft_code FROM flights;
ANALYZE flights;
EXPLAIN SELECT * FROM flights
WHERE departure_airport = 'SVO' AND aircraft_code = '733';
QUERY PLAN
−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
Seq Scan on flights (cost=0.00..5847.00 rows=2077 width=63)
Filter: ((departure_airport = 'SVO'::bpchar) AND (aircraft_cod...
(2 rows)
SELECT values, frequency
FROM pg_statistic_ext stx
JOIN pg_statistic_ext_data stxd ON stx.oid = stxd.stxoid,
pg_mcv_list_items(stxdmcv) m
WHERE stxname = 'flights_mcv'
AND values = '{SVO,773}';
values | frequency
−−−−−−−−−−−+−−−−−−−−−−−−−−−−−−−−−−
{SVO,773} | .005733333333333333
(1 row)
ALTER STATISTICS ... SET STATISTICS ...;
原文链接:https://mp.weixin.qq.com/s/D6EziA8ZqTbggLg_42zGlg
相关文章