Microsoft SQL Server 利用过滤索引提高查询语句的性能
在某个字段中只有少量相关值需要查询时,可以针对值的子集创建过滤索引。例如,当字段中的值大部分为 NULL 并且查询只从非 NULL 值中进行选择时,可以为非 NULL 数据行创建筛选索引。由此得到的索引与对相同字段定义的全表非聚集索引相比,前者更小且维护开销更低。
表中含有分类数据行时,可以为一种或多种类别的数据创建筛选索引。通过将查询范围缩小为表的特定区域,这可以提高针对这些数据行的查询性能。此外,由此得到的索引与全表非聚集索引相比,前者更小且维护开销更低。
CREATE TABLE orders (
id INTEGER PRIMARY KEY,
customer_id INTEGER,
status VARCHAR(10)
);
BEGIN
DECLARE @counter INT = 1
WHILE @counter <= 1000000
BEGIN
INSERT INTO orders
SELECT @counter, (rand() * 100000),
CASE
WHEN (rand() * 100)<1 THEN 'pending'
WHEN (rand() * 100)>99 THEN 'shipped'
ELSE 'completed'
END
SET @counter = @counter + 1
END
END;
CREATE INDEX full_idx ON orders (customer_id, status);
SET STATISTICS PROFILE ON
SELECT *
FROM orders
WHERE customer_id = 5043
AND status != 'completed';
id |customer_id|status |
------+-----------+-------+
743436| 5043|pending|
947848| 5043|shipped|
Rows Executes StmtText StmtId NodeId Parent PhysicalOp LogicalOp Argument DefinedValues EstimateRows EstimateIO EstimateCPU AvgRowSize TotalSubtreeCost OutputList Warnings Type Parallel EstimateExecutions
2 1 SELECT * FROM [orders] WHERE [customer_id]=@1 AND [status]<>@2 1 1 NULL NULL NULL NULL 1.405213 NULL NULL NULL 0.003283546 NULL NULL SELECT NULL
2 1 |--Index Seek(OBJECT:([hrdb].[dbo].[orders].[full_idx]), SEEK:([hrdb].[dbo].[orders].[customer_id]=(5043) AND [hrdb].[dbo].[orders].[status] < 'completed' OR [hrdb].[dbo].[orders].[customer_id]=(5043) AND [hrdb].[dbo].[orders].[status] > 'completed') ORDERED FORWARD) 1 2 1 Index Seek Index Seek OBJECT:([hrdb].[dbo].[orders].[full_idx]), SEEK:([hrdb].[dbo].[orders].[customer_id]=(5043) AND [hrdb].[dbo].[orders].[status] < 'completed' OR [hrdb].[dbo].[orders].[customer_id]=(5043) AND [hrdb].[dbo].[orders].[status] > 'completed') ORDERED FORWARD [hrdb].[dbo].[orders].[id], [hrdb].[dbo].[orders].[customer_id], [hrdb].[dbo].[orders].[status] 1.405213 0.003125 0.0001585457 27 0.003283546 [hrdb].[dbo].[orders].[id], [hrdb].[dbo].[orders].[customer_id], [hrdb].[dbo].[orders].[status] NULL PLAN_ROW 0 1
SELECT ix.name AS "Index name",
SUM(sz.used_page_count) * 8/1024.0 AS "Index size (MB)"
FROM sys.dm_db_partition_stats AS sz
INNER JOIN sys.indexes AS ix ON sz.object_id = ix.object_id
AND sz.index_id = ix.index_id
INNER JOIN sys.tables tn ON tn.OBJECT_ID = ix.object_id
WHERE tn.name = 'orders'
GROUP BY ix.name;
Index name |Index size (MB)|
----------------------------+---------------+
full_idx | 26.171875|
PK__orders__3213E83F1E3B8A3B| 29.062500|
CREATE INDEX partial_idx ON orders (customer_id)
WHERE status != 'completed';
SELECT ix.name AS "Index name",
SUM(sz.used_page_count) * 8/1024.0 AS "Index size (MB)"
FROM sys.dm_db_partition_stats AS sz
INNER JOIN sys.indexes AS ix ON sz.object_id = ix.object_id
AND sz.index_id = ix.index_id
INNER JOIN sys.tables tn ON tn.OBJECT_ID = ix.object_id
WHERE tn.name = 'orders'
GROUP BY ix.name;
Index name |Index size (MB)|
----------------------------+---------------+
full_idx | 26.171875|
partial_idx | 0.289062|
PK__orders__3213E83F1E3B8A3B| 29.062500|
SELECT *
FROM orders WITH ( INDEX ( partial_idx ) )
WHERE customer_id = 5043
AND status != 'completed';
Rows Executes StmtText StmtId NodeId Parent PhysicalOp LogicalOp Argument DefinedValues EstimateRows EstimateIO EstimateCPU AvgRowSize TotalSubtreeCost OutputList Warnings Type Parallel EstimateExecutions
2 1 SELECT * FROM orders WITH ( INDEX ( partial_idx ) ) WHERE customer_id = 5043 AND status != 'completed' 1 1 NULL NULL NULL NULL 1.124088 NULL NULL NULL 0.03279812 NULL NULL SELECT NULL
2 1 |--Nested Loops(Inner Join, OUTER REFERENCES:([hrdb].[dbo].[orders].[id])) 1 2 1 Nested Loops Inner Join OUTER REFERENCES:([hrdb].[dbo].[orders].[id]) NULL 1.124088 0 4.15295E-05 24 0.03279812 [hrdb].[dbo].[orders].[id], [hrdb].[dbo].[orders].[customer_id], [hrdb].[dbo].[orders].[status] NULL PLAN_ROW 0 1
2 1 |--Index Seek(OBJECT:([hrdb].[dbo].[orders].[partial_idx]), SEEK:([hrdb].[dbo].[orders].[customer_id]=(5043)) ORDERED FORWARD) 1 3 2 Index Seek Index Seek OBJECT:([hrdb].[dbo].[orders].[partial_idx]), SEEK:([hrdb].[dbo].[orders].[customer_id]=(5043)) ORDERED FORWARD, FORCEDINDEX [hrdb].[dbo].[orders].[id], [hrdb].[dbo].[orders].[customer_id] 9.935287 0.003125 0.0001679288 15 0.003292929 [hrdb].[dbo].[orders].[id], [hrdb].[dbo].[orders].[customer_id] NULL PLAN_ROW 0 1
2 2 |--Clustered Index Seek(OBJECT:([hrdb].[dbo].[orders].[PK__orders__3213E83F1E3B8A3B]), SEEK:([hrdb].[dbo].[orders].[id]=[hrdb].[dbo].[orders].[id]) LOOKUP ORDERED FORWARD) 1 5 2 Clustered Index Seek Clustered Index Seek OBJECT:([hrdb].[dbo].[orders].[PK__orders__3213E83F1E3B8A3B]), SEEK:([hrdb].[dbo].[orders].[id]=[hrdb].[dbo].[orders].[id]) LOOKUP ORDERED FORWARD, FORCEDINDEX [hrdb].[dbo].[orders].[status] 1 0.003125 0.0001581 16 0.02946366 [hrdb].[dbo].[orders].[status] NULL PLAN_ROW 0 9.935287
-- 300 ms
SELECT count(*)
FROM orders WITH ( INDEX ( full_idx ) )
WHERE status != 'completed';
-- 10 ms
SELECT count(*)
FROM orders WITH ( INDEX ( partial_idx ) )
WHERE status != 'completed';
DROP INDEX partial_idx ON orders;
TRUNCATE TABLE orders;
CREATE UNIQUE INDEX partial_idx ON orders (customer_id)
WHERE status != 'completed';
INSERT INTO orders(id, customer_id, status) VALUES (1, 1, 'pending');
INSERT INTO orders(id, customer_id, status) VALUES (2, 1, 'pending');
SQL 错误 [2601] [23000]: 不能在具有索引“partial_idx”的对象“dbo.orders”中插入重复键的行。重复键值为 (1)。
相关文章