Harden pmsignal.c against clobbered shared memory.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 11 Oct 2022 22:54:31 +0000 (18:54 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 11 Oct 2022 22:54:31 +0000 (18:54 -0400)
The postmaster is not supposed to do anything that depends
fundamentally on shared memory contents, because that creates
the risk that a backend crash that trashes shared memory will
take the postmaster down with it, preventing automatic recovery.
In commit 969d7cd43 I lost sight of this principle and coded
AssignPostmasterChildSlot() in such a way that it could fail
or even crash if the shared PMSignalState structure became
corrupted.  Remarkably, we've not seen field reports of such
crashes; but I managed to induce one while testing the recent
changes around palloc chunk headers.

To fix, make a semi-duplicative state array inside the postmaster
so that we need consult only local state while choosing a "child
slot" for a new backend.  Ensure that other postmaster-executed
routines in pmsignal.c don't have critical dependencies on the
shared state, either.  Corruption of PMSignalState might now
lead ReleasePostmasterChildSlot() to conclude that backend X
failed, when actually backend Y was the one that trashed things.
But that doesn't matter, because we'll force a cluster-wide reset
regardless.

Back-patch to all supported branches, since this is an old bug.

Discussion: https://postgr.es/m/3436789.1665187055@sss.pgh.pa.us

src/backend/storage/ipc/pmsignal.c

index 85db6b21f83ff6ed1caebbb74c77f87f649f66cf..482b6827b4888072267caa96047905192c8cf4e5 100644 (file)
@@ -22,6 +22,7 @@
 #include "replication/walsender.h"
 #include "storage/pmsignal.h"
 #include "storage/shmem.h"
+#include "utils/memutils.h"
 
 
 /*
@@ -65,12 +66,20 @@ struct PMSignalData
    sig_atomic_t PMSignalFlags[NUM_PMSIGNALS];
    /* per-child-process flags */
    int         num_child_flags;    /* # of entries in PMChildFlags[] */
-   int         next_child_flag;    /* next slot to try to assign */
    sig_atomic_t PMChildFlags[FLEXIBLE_ARRAY_MEMBER];
 };
 
+/* PMSignalState pointer is valid in both postmaster and child processes */
 NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL;
 
+/*
+ * These static variables are valid only in the postmaster.  We keep a
+ * duplicative private array so that we can trust its state even if some
+ * failing child has clobbered the PMSignalData struct in shared memory.
+ */
+static int num_child_inuse;    /* # of entries in PMChildInUse[] */
+static int next_child_inuse;   /* next slot to try to assign */
+static bool *PMChildInUse;     /* true if i'th flag slot is assigned */
 
 /*
  * PMSignalShmemSize
@@ -102,7 +111,25 @@ PMSignalShmemInit(void)
    if (!found)
    {
        MemSet(PMSignalState, 0, PMSignalShmemSize());
-       PMSignalState->num_child_flags = MaxLivePostmasterChildren();
+       num_child_inuse = MaxLivePostmasterChildren();
+       PMSignalState->num_child_flags = num_child_inuse;
+
+       /*
+        * Also allocate postmaster's private PMChildInUse[] array.  We
+        * might've already done that in a previous shared-memory creation
+        * cycle, in which case free the old array to avoid a leak.  (Do it
+        * like this to support the possibility that MaxLivePostmasterChildren
+        * changed.)  In a standalone backend, we do not need this.
+        */
+       if (PostmasterContext != NULL)
+       {
+           if (PMChildInUse)
+               pfree(PMChildInUse);
+           PMChildInUse = (bool *)
+               MemoryContextAllocZero(PostmasterContext,
+                                      num_child_inuse * sizeof(bool));
+       }
+       next_child_inuse = 0;
    }
 }
 
@@ -150,21 +177,24 @@ CheckPostmasterSignal(PMSignalReason reason)
 int
 AssignPostmasterChildSlot(void)
 {
-   int         slot = PMSignalState->next_child_flag;
+   int         slot = next_child_inuse;
    int         n;
 
    /*
-    * Scan for a free slot.  We track the last slot assigned so as not to
-    * waste time repeatedly rescanning low-numbered slots.
+    * Scan for a free slot.  Notice that we trust nothing about the contents
+    * of PMSignalState, but use only postmaster-local data for this decision.
+    * We track the last slot assigned so as not to waste time repeatedly
+    * rescanning low-numbered slots.
     */
-   for (n = PMSignalState->num_child_flags; n > 0; n--)
+   for (n = num_child_inuse; n > 0; n--)
    {
        if (--slot < 0)
-           slot = PMSignalState->num_child_flags - 1;
-       if (PMSignalState->PMChildFlags[slot] == PM_CHILD_UNUSED)
+           slot = num_child_inuse - 1;
+       if (!PMChildInUse[slot])
        {
+           PMChildInUse[slot] = true;
            PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
-           PMSignalState->next_child_flag = slot;
+           next_child_inuse = slot;
            return slot + 1;
        }
    }
@@ -186,7 +216,7 @@ ReleasePostmasterChildSlot(int slot)
 {
    bool        result;
 
-   Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
+   Assert(slot > 0 && slot <= num_child_inuse);
    slot--;
 
    /*
@@ -196,17 +226,18 @@ ReleasePostmasterChildSlot(int slot)
     */
    result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
    PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED;
+   PMChildInUse[slot] = false;
    return result;
 }
 
 /*
  * IsPostmasterChildWalSender - check if given slot is in use by a
- * walsender process.
+ * walsender process.  This is called only by the postmaster.
  */
 bool
 IsPostmasterChildWalSender(int slot)
 {
-   Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
+   Assert(slot > 0 && slot <= num_child_inuse);
    slot--;
 
    if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER)