Fix waiting in RegisterSyncRequest().
authorThomas Munro <tmunro@postgresql.org>
Wed, 16 Mar 2022 02:35:16 +0000 (15:35 +1300)
committerThomas Munro <tmunro@postgresql.org>
Wed, 16 Mar 2022 02:35:16 +0000 (15:35 +1300)
If we run out of space in the checkpointer sync request queue (which is
hopefully rare on real systems, but common with very small buffer pool),
we wait for it to drain.  While waiting, we should report that as a wait
event so that users know what is going on, and also handle postmaster
death, since otherwise the loop might never terminate if the
checkpointer has exited.

Back-patch to 12.  Although the problem exists in earlier releases too,
the code is structured differently before 12 so I haven't gone any
further for now, in the absence of field complaints.

Reported-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20220226213942.nb7uvb2pamyu26dj%40alap3.anarazel.de

doc/src/sgml/monitoring.sgml
src/backend/storage/sync/sync.c
src/backend/utils/activity/wait_event.c
src/include/utils/wait_event.h

index 8620aaddc79e68b3926f08b9ee50d6e0598cff1d..71559442f0b4b83786677e8d046f16673d636c16 100644 (file)
@@ -2254,6 +2254,11 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       <entry>Waiting during recovery when WAL data is not available from any
        source (<filename>pg_wal</filename>, archive or stream).</entry>
      </row>
+     <row>
+      <entry><literal>RegisterSyncRequest</literal></entry>
+      <entry>Waiting while sending synchronization requests to the
+       checkpointer, because the request queue is full.</entry>
+     </row>
      <row>
       <entry><literal>VacuumDelay</literal></entry>
       <entry>Waiting in a cost-based vacuum delay point.</entry>
index e161d57761ebb3ceaf070696bd662ee2dcbb5b93..0c4d9ce687f9b9e09279ff7ccd1c05433886e852 100644 (file)
@@ -31,6 +31,7 @@
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
+#include "storage/latch.h"
 #include "storage/md.h"
 #include "utils/hsearch.h"
 #include "utils/inval.h"
@@ -606,7 +607,8 @@ RegisterSyncRequest(const FileTag *ftag, SyncRequestType type,
                if (ret || (!ret && !retryOnError))
                        break;
 
-               pg_usleep(10000L);
+               WaitLatch(NULL, WL_EXIT_ON_PM_DEATH | WL_TIMEOUT, 10,
+                                 WAIT_EVENT_REGISTER_SYNC_REQUEST);
        }
 
        return ret;
index 0706e922b5369686c47de3cab9f3af0f70e6d674..ff46a0e3c717ec01329e7ce5fba8015038345d34 100644 (file)
@@ -497,6 +497,9 @@ pgstat_get_wait_timeout(WaitEventTimeout w)
                case WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL:
                        event_name = "RecoveryRetrieveRetryInterval";
                        break;
+               case WAIT_EVENT_REGISTER_SYNC_REQUEST:
+                       event_name = "RegisterSyncRequest";
+                       break;
                case WAIT_EVENT_VACUUM_DELAY:
                        event_name = "VacuumDelay";
                        break;
index d0345c6b49e85f60b3505b55f63752230e0de4e2..1c39ce031a797db4683a4359d6a2f4f98d81ad14 100644 (file)
@@ -145,6 +145,7 @@ typedef enum
        WAIT_EVENT_PG_SLEEP,
        WAIT_EVENT_RECOVERY_APPLY_DELAY,
        WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL,
+       WAIT_EVENT_REGISTER_SYNC_REQUEST,
        WAIT_EVENT_VACUUM_DELAY,
        WAIT_EVENT_VACUUM_TRUNCATE
 } WaitEventTimeout;