diff options
| author | Noah Misch | 2025-01-25 19:28:14 +0000 |
|---|---|---|
| committer | Noah Misch | 2025-01-25 19:28:18 +0000 |
| commit | f4af4515bb5f3591d49bc16b6cb8ddbf66f98374 (patch) | |
| tree | f2faf0445b5304af6804d8f0d08362fedc7fc38a /src/test | |
| parent | 1587f7b9fc1a16df8b01b4f5f9e3c949325160b2 (diff) | |
At update of non-LP_NORMAL TID, fail instead of corrupting page header.
The right mix of DDL and VACUUM could corrupt a catalog page header such
that PageIsVerified() durably fails, requiring a restore from backup.
This affects only catalogs that both have a syscache and have DDL code
that uses syscache tuples to construct updates. One of the test
permutations shows a variant not yet fixed.
This makes !TransactionIdIsValid(TM_FailureData.xmax) possible with
TM_Deleted. I think core and PGXN are indifferent to that.
Per bug #17821 from Alexander Lakhin. Back-patch to v13 (all supported
versions). The test case is v17+, since it uses INJECTION_POINT.
Discussion: https://postgr.es/m/17821-dd8c334263399284@postgresql.org
Diffstat (limited to 'src/test')
7 files changed, 440 insertions, 4 deletions
diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile index d1375f78f7e..f19c9643ba9 100644 --- a/src/test/modules/injection_points/Makefile +++ b/src/test/modules/injection_points/Makefile @@ -1,7 +1,10 @@ # src/test/modules/injection_points/Makefile -MODULES = injection_points - +MODULE_big = injection_points +OBJS = \ + $(WIN32RES) \ + injection_points.o \ + regress_injection.o EXTENSION = injection_points DATA = injection_points--1.0.sql PGFILEDESC = "injection_points - facility for injection points" @@ -9,7 +12,7 @@ PGFILEDESC = "injection_points - facility for injection points" REGRESS = injection_points reindex_conc REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress -ISOLATION = inplace +ISOLATION = inplace syscache-update-pruned # The injection points are cluster-wide, so disable installcheck NO_INSTALLCHECK = 1 diff --git a/src/test/modules/injection_points/expected/syscache-update-pruned.out b/src/test/modules/injection_points/expected/syscache-update-pruned.out new file mode 100644 index 00000000000..5dc5a1ddc5e --- /dev/null +++ b/src/test/modules/injection_points/expected/syscache-update-pruned.out @@ -0,0 +1,87 @@ +Parsed test spec with 4 sessions + +starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 wakegrant4 +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + <waiting ...> +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...> +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + <waiting ...> +step at2: <... completed> +step wakeinval4: <... completed> +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + <waiting ...> +step grant1: <... completed> +ERROR: tuple concurrently deleted +step wakegrant4: <... completed> + +starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 mkrels4 wakegrant4 +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + <waiting ...> +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...> +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + <waiting ...> +step at2: <... completed> +step wakeinval4: <... completed> +step mkrels4: + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED + +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + <waiting ...> +step grant1: <... completed> +ERROR: duplicate key value violates unique constraint "pg_class_oid_index" +step wakegrant4: <... completed> + +starting permutation: snap3 cachefill1 at2 mkrels4 r3 waitprunable4 vac4 grant1 wakeinval4 at4 wakegrant4 inspect4 +step snap3: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + <waiting ...> +step mkrels4: + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED + +step r3: ROLLBACK; +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...> +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + <waiting ...> +step at2: <... completed> +step wakeinval4: <... completed> +step at4: ALTER TABLE vactest.child50 INHERIT vactest.orig50; +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + <waiting ...> +step grant1: <... completed> +step wakegrant4: <... completed> +step inspect4: + SELECT relhastriggers, relhassubclass FROM pg_class + WHERE oid = 'vactest.orig50'::regclass; + +relhastriggers|relhassubclass +--------------+-------------- +f |f +(1 row) + diff --git a/src/test/modules/injection_points/expected/syscache-update-pruned_1.out b/src/test/modules/injection_points/expected/syscache-update-pruned_1.out new file mode 100644 index 00000000000..b18857c902e --- /dev/null +++ b/src/test/modules/injection_points/expected/syscache-update-pruned_1.out @@ -0,0 +1,86 @@ +Parsed test spec with 4 sessions + +starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 wakegrant4 +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + <waiting ...> +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...> +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + <waiting ...> +step at2: <... completed> +step wakeinval4: <... completed> +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + <waiting ...> +step grant1: <... completed> +step wakegrant4: <... completed> + +starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 mkrels4 wakegrant4 +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + <waiting ...> +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...> +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + <waiting ...> +step at2: <... completed> +step wakeinval4: <... completed> +step mkrels4: + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED + +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + <waiting ...> +step grant1: <... completed> +step wakegrant4: <... completed> + +starting permutation: snap3 cachefill1 at2 mkrels4 r3 waitprunable4 vac4 grant1 wakeinval4 at4 wakegrant4 inspect4 +step snap3: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + <waiting ...> +step mkrels4: + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED + +step r3: ROLLBACK; +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...> +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + <waiting ...> +step at2: <... completed> +step wakeinval4: <... completed> +step at4: ALTER TABLE vactest.child50 INHERIT vactest.orig50; +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + <waiting ...> +step grant1: <... completed> +ERROR: tuple concurrently updated +step wakegrant4: <... completed> +step inspect4: + SELECT relhastriggers, relhassubclass FROM pg_class + WHERE oid = 'vactest.orig50'::regclass; + +relhastriggers|relhassubclass +--------------+-------------- +t |t +(1 row) + diff --git a/src/test/modules/injection_points/injection_points--1.0.sql b/src/test/modules/injection_points/injection_points--1.0.sql index c16a33b08db..519641e6d04 100644 --- a/src/test/modules/injection_points/injection_points--1.0.sql +++ b/src/test/modules/injection_points/injection_points--1.0.sql @@ -54,3 +54,11 @@ CREATE FUNCTION injection_points_detach(IN point_name TEXT) RETURNS void AS 'MODULE_PATHNAME', 'injection_points_detach' LANGUAGE C STRICT PARALLEL UNSAFE; + +-- +-- regress_injection.c functions +-- +CREATE FUNCTION removable_cutoff(rel regclass) +RETURNS xid8 +AS 'MODULE_PATHNAME' +LANGUAGE C CALLED ON NULL INPUT; diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build index 33303089cff..169c415f9c4 100644 --- a/src/test/modules/injection_points/meson.build +++ b/src/test/modules/injection_points/meson.build @@ -6,6 +6,7 @@ endif injection_points_sources = files( 'injection_points.c', + 'regress_injection.c', ) if host_system == 'windows' @@ -41,7 +42,8 @@ tests += { 'isolation': { 'specs': [ 'inplace', + 'syscache-update-pruned', ], - 'runningcheck': false, # align with GNU make build system + 'runningcheck': false, # see syscache-update-pruned }, } diff --git a/src/test/modules/injection_points/regress_injection.c b/src/test/modules/injection_points/regress_injection.c new file mode 100644 index 00000000000..422f4168935 --- /dev/null +++ b/src/test/modules/injection_points/regress_injection.c @@ -0,0 +1,71 @@ +/*-------------------------------------------------------------------------- + * + * regress_injection.c + * Functions supporting test-specific subject matter. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/test/modules/injection_points/regress_injection.c + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/table.h" +#include "fmgr.h" +#include "miscadmin.h" +#include "storage/procarray.h" +#include "utils/xid8.h" + +/* + * removable_cutoff - for syscache-update-pruned.spec + * + * Wrapper around GetOldestNonRemovableTransactionId(). In general, this can + * move backward. runningcheck=false isolation tests can reasonably prevent + * that. For the causes of backward movement, see + * postgr.es/m/CAEze2Wj%2BV0kTx86xB_YbyaqTr5hnE_igdWAwuhSyjXBYscf5-Q%40mail.gmail.com + * and the header comment for ComputeXidHorizons(). One can assume this + * doesn't move backward if one arranges for concurrent activity not to reach + * AbortTransaction() and not to allocate an XID while connected to another + * database. Non-runningcheck tests can control most concurrent activity, + * except autovacuum and the isolationtester control connection. Neither + * allocates XIDs, and AbortTransaction() in those would justify test failure. + */ +PG_FUNCTION_INFO_V1(removable_cutoff); +Datum +removable_cutoff(PG_FUNCTION_ARGS) +{ + Relation rel = NULL; + TransactionId xid; + FullTransactionId next_fxid_before, + next_fxid; + + /* could take other relkinds callee takes, but we've not yet needed it */ + if (!PG_ARGISNULL(0)) + rel = table_open(PG_GETARG_OID(0), AccessShareLock); + + /* + * No lock or snapshot necessarily prevents oldestXid from advancing past + * "xid" while this function runs. That concerns us only in that we must + * not ascribe "xid" to the wrong epoch. (That may never arise in + * isolation testing, but let's set a good example.) As a crude solution, + * retry until nextXid doesn't change. + */ + next_fxid = ReadNextFullTransactionId(); + do + { + CHECK_FOR_INTERRUPTS(); + next_fxid_before = next_fxid; + xid = GetOldestNonRemovableTransactionId(rel); + next_fxid = ReadNextFullTransactionId(); + } while (!FullTransactionIdEquals(next_fxid, next_fxid_before)); + + if (rel) + table_close(rel, AccessShareLock); + + PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromAllowableAt(next_fxid, + xid)); +} diff --git a/src/test/modules/injection_points/specs/syscache-update-pruned.spec b/src/test/modules/injection_points/specs/syscache-update-pruned.spec new file mode 100644 index 00000000000..b48e897431e --- /dev/null +++ b/src/test/modules/injection_points/specs/syscache-update-pruned.spec @@ -0,0 +1,179 @@ +# Test race conditions involving: +# - s1: heap_update($FROM_SYSCACHE), without a snapshot or pin +# - s2: ALTER TABLE making $FROM_SYSCACHE a dead tuple +# - s3: "VACUUM pg_class" making $FROM_SYSCACHE become LP_UNUSED + +# This is a derivative work of inplace.spec, which exercises the corresponding +# race condition for inplace updates. + +# Despite local injection points, this is incompatible with runningcheck. +# First, removable_cutoff() could move backward, per its header comment. +# Second, other activity could trigger sinval queue overflow, negating our +# efforts to delay inval. Third, this deadlock emerges: +# +# - step at2 waits at an injection point, with interrupts held +# - an unrelated backend waits for at2 to do PROCSIGNAL_BARRIER_SMGRRELEASE +# - step waitprunable4 waits for the unrelated backend to release its xmin + +# The alternative expected output is for -DCATCACHE_FORCE_RELEASE, a setting +# that thwarts testing the race conditions this spec seeks. + + +# Need s2 to make a non-HOT update. Otherwise, "VACUUM pg_class" would leave +# an LP_REDIRECT that persists. To get non-HOT, make rels so the pg_class row +# for vactest.orig50 is on a filled page (assuming BLCKSZ=8192). Just to save +# on filesystem syscalls, use relkind=c for every other rel. +setup +{ + CREATE EXTENSION injection_points; + CREATE SCHEMA vactest; + -- Ensure a leader RELOID catcache entry. PARALLEL RESTRICTED since a + -- parallel worker running pg_relation_filenode() would lack that effect. + CREATE FUNCTION vactest.reloid_catcache_set(regclass) RETURNS int + LANGUAGE sql PARALLEL RESTRICTED + AS 'SELECT 0 FROM pg_relation_filenode($1)'; + CREATE FUNCTION vactest.mkrels(text, int, int) RETURNS void + LANGUAGE plpgsql SET search_path = vactest AS $$ + DECLARE + tname text; + BEGIN + FOR i in $2 .. $3 LOOP + tname := $1 || i; + EXECUTE FORMAT('CREATE TYPE ' || tname || ' AS ()'); + RAISE DEBUG '% at %', tname, ctid + FROM pg_class WHERE oid = tname::regclass; + END LOOP; + END + $$; + CREATE PROCEDURE vactest.wait_prunable() LANGUAGE plpgsql AS $$ + DECLARE + barrier xid8; + cutoff xid8; + BEGIN + barrier := pg_current_xact_id(); + -- autovacuum worker RelationCacheInitializePhase3() or the + -- isolationtester control connection might hold a snapshot that + -- limits pruning. Sleep until that clears. + LOOP + ROLLBACK; -- release MyProc->xmin, which could be the oldest + cutoff := removable_cutoff('pg_class'); + EXIT WHEN cutoff >= barrier; + RAISE LOG 'removable cutoff %; waiting for %', cutoff, barrier; + PERFORM pg_sleep(.1); + END LOOP; + END + $$; +} +setup { CALL vactest.wait_prunable(); -- maximize next two VACUUMs } +setup { VACUUM FULL pg_class; -- reduce free space } +setup { VACUUM FREEZE pg_class; -- populate fsm etc. } +setup +{ + SELECT FROM vactest.mkrels('orig', 1, 49); + CREATE TABLE vactest.orig50 (c int) WITH (autovacuum_enabled = off); + CREATE TABLE vactest.child50 (c int) WITH (autovacuum_enabled = off); + SELECT FROM vactest.mkrels('orig', 51, 100); +} +teardown +{ + DROP SCHEMA vactest CASCADE; + DROP EXTENSION injection_points; +} + +# Wait during GRANT. Disable debug_discard_caches, since we're here to +# exercise an outcome that happens under permissible cache staleness. +session s1 +setup { + SET debug_discard_caches = 0; + SELECT FROM injection_points_set_local(); + SELECT FROM injection_points_attach('heap_update-before-pin', 'wait'); +} +step cachefill1 { SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); } +step grant1 { GRANT SELECT ON vactest.orig50 TO PUBLIC; } + +# Update of the tuple that grant1 will update. Wait before sending invals, so +# s1 will not get a cache miss. Choose the commands for making such updates +# from among those whose heavyweight locking does not conflict with GRANT's +# heavyweight locking. (GRANT will see our XID as committed, so observing +# that XID in the tuple xmax also won't block GRANT.) +session s2 +setup { + SELECT FROM injection_points_set_local(); + SELECT FROM + injection_points_attach('AtEOXact_Inval-with-transInvalInfo', 'wait'); +} +step at2 { + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); +} + +# Hold snapshot to block pruning. +session s3 +step snap3 { BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; } +step r3 { ROLLBACK; } + +# Non-blocking actions. +session s4 +step waitprunable4 { CALL vactest.wait_prunable(); } +step vac4 { VACUUM pg_class; } +# Reuse the lp that s1 is waiting to change. I've observed reuse at the 1st +# or 18th CREATE, so create excess. +step mkrels4 { + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED +} +step wakegrant4 { + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); +} +step at4 { ALTER TABLE vactest.child50 INHERIT vactest.orig50; } +step wakeinval4 { + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); +} +# Witness effects of steps at2 and/or at4. +step inspect4 { + SELECT relhastriggers, relhassubclass FROM pg_class + WHERE oid = 'vactest.orig50'::regclass; +} + +# TID from syscache becomes LP_UNUSED. Before the bug fix, this permutation +# made s1 fail with "attempted to update invisible tuple" or an assert. +# However, suppose a pd_lsn value such that (pd_lsn.xlogid, pd_lsn.xrecoff) +# passed for (xmin, xmax) with xmin known-committed and xmax known-aborted. +# Persistent page header corruption ensued. For example, s1 overwrote +# pd_lower, pd_upper, and pd_special as though they were t_ctid. +permutation + cachefill1 # reads pg_class tuple T0, xmax invalid + at2 # T0 dead, T1 live + waitprunable4 # T0 prunable + vac4 # T0 becomes LP_UNUSED + grant1 # pauses at heap_update(T0) + wakeinval4(at2) # at2 sends inval message + wakegrant4(grant1) # s1 wakes: "tuple concurrently deleted" + +# add mkrels4: LP_UNUSED becomes a different rel's row +permutation + cachefill1 # reads pg_class tuple T0, xmax invalid + at2 # T0 dead, T1 live + waitprunable4 # T0 prunable + vac4 # T0 becomes LP_UNUSED + grant1 # pauses at heap_update(T0) + wakeinval4(at2) # at2 sends inval message + mkrels4 # T0 becomes a new rel + wakegrant4(grant1) # s1 wakes: "duplicate key value violates unique" + +# TID from syscache becomes LP_UNUSED, then becomes a newer version of the +# original rel's row. +permutation + snap3 # sets MyProc->xmin + cachefill1 # reads pg_class tuple T0, xmax invalid + at2 # T0 dead, T1 live + mkrels4 # T1's page becomes full + r3 # clears MyProc->xmin + waitprunable4 # T0 prunable + vac4 # T0 becomes LP_UNUSED + grant1 # pauses at heap_update(T0) + wakeinval4(at2) # at2 sends inval message + at4 # T1 dead, T0 live + wakegrant4(grant1) # s1 wakes: T0 dead, T2 live + inspect4 # observe loss of at2+at4 changes XXX is an extant bug |
