From ec986020decff322723cf7b3a2696803d082ad17 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Tue, 7 Jan 2025 10:38:30 -0500 Subject: [PATCH] Improve nbtree unsatisfiable RowCompare detection. Move nbtree's detection of RowCompare quals that are unsatisfiable due to having a NULL in their first row element: rather than detecting these cases at the point where _bt_first builds its insertion scan key, do so earlier, during preprocessing proper. This brings the RowCompare case in line every other case involving an unsatisfiable-due-to-NULL qual. nbtree now consistently detects such unsatisfiable quals -- even when they happen to involve a key that isn't examined by _bt_first at all. Affected cases thereby avoid useless full index scans that cannot possibly return any matching rows. Author: Peter Geoghegan Reviewed-By: Matthias van de Meent Discussion: https://postgr.es/m/CAH2-WzmySVXst2hFrOATC-zw1Byg1XC-jYUS314=mzuqsNwk+Q@mail.gmail.com --- src/backend/access/nbtree/nbtsearch.c | 26 ++--- src/backend/access/nbtree/nbtutils.c | 14 ++- src/test/regress/expected/btree_index.out | 127 +++++++++++++++++++++ src/test/regress/expected/create_index.out | 13 +++ src/test/regress/sql/btree_index.sql | 79 +++++++++++++ src/test/regress/sql/create_index.sql | 5 + 6 files changed, 248 insertions(+), 16 deletions(-) diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 8f559629cd..472ce06f19 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1162,23 +1162,23 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (cur->sk_flags & SK_ROW_HEADER) { /* - * Row comparison header: look to the first row member instead. - * - * The member scankeys are already in insertion format (ie, they - * have sk_func = 3-way-comparison function), but we have to watch - * out for nulls, which _bt_preprocess_keys didn't check. A null - * in the first row member makes the condition unmatchable, just - * like qual_ok = false. + * Row comparison header: look to the first row member instead */ ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument); + /* + * Cannot be a NULL in the first row member: _bt_preprocess_keys + * would've marked the qual as unsatisfiable, preventing us from + * ever getting this far + */ Assert(subkey->sk_flags & SK_ROW_MEMBER); - if (subkey->sk_flags & SK_ISNULL) - { - Assert(!so->needPrimScan); - _bt_parallel_done(scan); - return false; - } + Assert(subkey->sk_attno == cur->sk_attno); + Assert(!(subkey->sk_flags & SK_ISNULL)); + + /* + * The member scankeys are already in insertion format (ie, they + * have sk_func = 3-way-comparison function) + */ memcpy(inskey.scankeys + i, subkey, sizeof(ScanKeyData)); /* diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 268b7b02ac..00e17a1f0f 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -3371,6 +3371,13 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption) { ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument); + if (subkey->sk_flags & SK_ISNULL) + { + /* First row member is NULL, so RowCompare is unsatisfiable */ + Assert(subkey->sk_flags & SK_ROW_MEMBER); + return false; + } + for (;;) { Assert(subkey->sk_flags & SK_ROW_MEMBER); @@ -3982,13 +3989,14 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, if (subkey->sk_flags & SK_ISNULL) { /* - * Unlike the simple-scankey case, this isn't a disallowed case. + * Unlike the simple-scankey case, this isn't a disallowed case + * (except when it's the first row element that has the NULL arg). * But it can never match. If all the earlier row comparison * columns are required for the scan direction, we can stop the * scan, because there can't be another tuple that will succeed. */ - if (subkey != (ScanKey) DatumGetPointer(skey->sk_argument)) - subkey--; + Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument)); + subkey--; if ((subkey->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) *continuescan = false; diff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out index def78ef858..8879554c3f 100644 --- a/src/test/regress/expected/btree_index.out +++ b/src/test/regress/expected/btree_index.out @@ -142,6 +142,133 @@ SELECT b.* 4500 | 2080851358 (1 row) +-- +-- Add coverage of RowCompare quals whose row omits a column ("proargtypes") +-- that's after the first column, but before the final column. The scan's +-- initial positioning strategy must become >= here (it's not the > strategy, +-- since the absence of "proargtypes" makes that tighter constraint unsafe). +-- +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE (proname, pronamespace) > ('abs', 0) +ORDER BY proname, proargtypes, pronamespace LIMIT 1; + QUERY PLAN +------------------------------------------------------------------------------- + Limit + -> Index Only Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: (ROW(proname, pronamespace) > ROW('abs'::name, '0'::oid)) +(3 rows) + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE (proname, pronamespace) > ('abs', 0) +ORDER BY proname, proargtypes, pronamespace LIMIT 1; + proname | proargtypes | pronamespace +---------+-------------+-------------- + abs | 20 | 11 +(1 row) + +-- +-- Similar to the previous test case, but this time it's a backwards scan +-- using a < RowCompare. Must use the <= strategy (and not the < strategy). +-- +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE (proname, pronamespace) < ('abs', 1_000_000) +ORDER BY proname DESC, proargtypes DESC, pronamespace DESC LIMIT 1; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> Index Only Scan Backward using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: (ROW(proname, pronamespace) < ROW('abs'::name, '1000000'::oid)) +(3 rows) + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE (proname, pronamespace) < ('abs', 1_000_000) +ORDER BY proname DESC, proargtypes DESC, pronamespace DESC LIMIT 1; + proname | proargtypes | pronamespace +---------+-------------+-------------- + abs | 1700 | 11 +(1 row) + +-- +-- Add coverage for RowCompare quals whose rhs row has a NULL that ends scan +-- +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL) +ORDER BY proname, proargtypes, pronamespace; + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Index Only Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: ((ROW(proname, proargtypes) < ROW('abs'::name, NULL::oidvector)) AND (proname = 'abs'::name)) +(2 rows) + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL) +ORDER BY proname, proargtypes, pronamespace; + proname | proargtypes | pronamespace +---------+-------------+-------------- +(0 rows) + +-- +-- Add coverage for backwards scan RowCompare quals whose rhs row has a NULL +-- that ends scan +-- +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL) +ORDER BY proname DESC, proargtypes DESC, pronamespace DESC; + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Index Only Scan Backward using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: ((ROW(proname, proargtypes) > ROW('abs'::name, NULL::oidvector)) AND (proname = 'abs'::name)) +(2 rows) + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL) +ORDER BY proname DESC, proargtypes DESC, pronamespace DESC; + proname | proargtypes | pronamespace +---------+-------------+-------------- +(0 rows) + +-- +-- Add coverage for recheck of > key following array advancement on previous +-- (left sibling) page that used a high key whose attribute value corresponding +-- to the > key was -inf (due to being truncated when the high key was created). +-- +-- XXX This relies on the assumption that tenk1_thous_tenthous has a truncated +-- high key "(183, -inf)" on the first page that we'll scan. The test will only +-- provide useful coverage when the default 8K BLCKSZ is in use. +-- +explain (costs off) +SELECT thousand, tenthous + FROM tenk1 + WHERE thousand IN (182, 183) AND tenthous > 7550; + QUERY PLAN +--------------------------------------------------------------------------------- + Index Only Scan using tenk1_thous_tenthous on tenk1 + Index Cond: ((thousand = ANY ('{182,183}'::integer[])) AND (tenthous > 7550)) +(2 rows) + +SELECT thousand, tenthous + FROM tenk1 + WHERE thousand IN (182, 183) AND tenthous > 7550; + thousand | tenthous +----------+---------- + 182 | 8182 + 182 | 9182 + 183 | 8183 + 183 | 9183 +(4 rows) + -- -- Add coverage for optimization of backwards scan index descents -- diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 1904eb65bb..8011c141bf 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -2428,6 +2428,19 @@ SELECT unique1 FROM tenk1 WHERE unique1 IN (1, 42, 7) and unique1 < (-1)::bigint --------- (0 rows) +explain (costs off) +SELECT unique1 FROM tenk1 WHERE (thousand, tenthous) > (NULL, 5); + QUERY PLAN +----------------------------------------------------------------- + Index Scan using tenk1_thous_tenthous on tenk1 + Index Cond: (ROW(thousand, tenthous) > ROW(NULL::integer, 5)) +(2 rows) + +SELECT unique1 FROM tenk1 WHERE (thousand, tenthous) > (NULL, 5); + unique1 +--------- +(0 rows) + -- -- Check elimination of constant-NULL subexpressions -- diff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql index 2c3b135292..670ad5c6e6 100644 --- a/src/test/regress/sql/btree_index.sql +++ b/src/test/regress/sql/btree_index.sql @@ -110,6 +110,85 @@ SELECT b.* FROM bt_f8_heap b WHERE b.seqno = '4500'::float8; +-- +-- Add coverage of RowCompare quals whose row omits a column ("proargtypes") +-- that's after the first column, but before the final column. The scan's +-- initial positioning strategy must become >= here (it's not the > strategy, +-- since the absence of "proargtypes" makes that tighter constraint unsafe). +-- +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE (proname, pronamespace) > ('abs', 0) +ORDER BY proname, proargtypes, pronamespace LIMIT 1; + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE (proname, pronamespace) > ('abs', 0) +ORDER BY proname, proargtypes, pronamespace LIMIT 1; + +-- +-- Similar to the previous test case, but this time it's a backwards scan +-- using a < RowCompare. Must use the <= strategy (and not the < strategy). +-- +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE (proname, pronamespace) < ('abs', 1_000_000) +ORDER BY proname DESC, proargtypes DESC, pronamespace DESC LIMIT 1; + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE (proname, pronamespace) < ('abs', 1_000_000) +ORDER BY proname DESC, proargtypes DESC, pronamespace DESC LIMIT 1; + +-- +-- Add coverage for RowCompare quals whose rhs row has a NULL that ends scan +-- +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL) +ORDER BY proname, proargtypes, pronamespace; + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL) +ORDER BY proname, proargtypes, pronamespace; + +-- +-- Add coverage for backwards scan RowCompare quals whose rhs row has a NULL +-- that ends scan +-- +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL) +ORDER BY proname DESC, proargtypes DESC, pronamespace DESC; + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL) +ORDER BY proname DESC, proargtypes DESC, pronamespace DESC; + +-- +-- Add coverage for recheck of > key following array advancement on previous +-- (left sibling) page that used a high key whose attribute value corresponding +-- to the > key was -inf (due to being truncated when the high key was created). +-- +-- XXX This relies on the assumption that tenk1_thous_tenthous has a truncated +-- high key "(183, -inf)" on the first page that we'll scan. The test will only +-- provide useful coverage when the default 8K BLCKSZ is in use. +-- +explain (costs off) +SELECT thousand, tenthous + FROM tenk1 + WHERE thousand IN (182, 183) AND tenthous > 7550; + +SELECT thousand, tenthous + FROM tenk1 + WHERE thousand IN (182, 183) AND tenthous > 7550; + -- -- Add coverage for optimization of backwards scan index descents -- diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index c085e05f05..068c66b95a 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -931,6 +931,11 @@ SELECT unique1 FROM tenk1 WHERE unique1 IN (1, 42, 7) and unique1 < (-1)::bigint SELECT unique1 FROM tenk1 WHERE unique1 IN (1, 42, 7) and unique1 < (-1)::bigint; +explain (costs off) +SELECT unique1 FROM tenk1 WHERE (thousand, tenthous) > (NULL, 5); + +SELECT unique1 FROM tenk1 WHERE (thousand, tenthous) > (NULL, 5); + -- -- Check elimination of constant-NULL subexpressions -- -- 2.39.5