Log long wait time on recovery conflict when it's resolved.

author Fujii Masao <fujii@postgresql.org>

Wed, 13 Jan 2021 13:59:17 +0000 (22:59 +0900)

committer Fujii Masao <fujii@postgresql.org>

Wed, 13 Jan 2021 13:59:17 +0000 (22:59 +0900)
author Fujii Masao <fujii@postgresql.org>
Wed, 13 Jan 2021 13:59:17 +0000 (22:59 +0900)
committer Fujii Masao <fujii@postgresql.org>
Wed, 13 Jan 2021 13:59:17 +0000 (22:59 +0900)
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml

index 7c0a673a8dd42c433d12934d25beede27eb18cd3..82864bbb247f55c0b6bb3ce2686a8d6404673a82 100644 (file)
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6961,7 +6961,7 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
        <listitem>
         <para>
          Controls whether a log message is produced when the startup process
-        is waiting longer than <varname>deadlock_timeout</varname>
+        waits longer than <varname>deadlock_timeout</varname>
          for recovery conflicts.  This is useful in determining if recovery
          conflicts prevent the recovery from applying WAL.
         </para>
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c

index c46b8abad12e479b432490fd1d950ee295afa2dc..561c212092f768fc295d07a7ba5b958164712177 100644 (file)
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -4034,6 +4034,16 @@ LockBufferForCleanup(Buffer buffer)
                         /* Successfully acquired exclusive lock with pincount 1 */
                         UnlockBufHdr(bufHdr, buf_state);
  
+                       /*
+                        * Emit the log message if recovery conflict on buffer pin was
+                        * resolved but the startup process waited longer than
+                        * deadlock_timeout for it.
+                        */
+                       if (logged_recovery_conflict)
+                               LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
+                                                                       waitStart, GetCurrentTimestamp(),
+                                                                       NULL, false);
+
                         /* Report change to non-waiting status */
                         if (new_status)
                         {
@@ -4088,7 +4098,7 @@ LockBufferForCleanup(Buffer buffer)
                                                                                            DeadlockTimeout))
                                 {
                                         LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
-                                                                               waitStart, now, NULL);
+                                                                               waitStart, now, NULL, true);
                                         logged_recovery_conflict = true;
                                 }
                         }
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c

index d4b0f65ba20bac675816d886df42c4b93871f053..39a30c00f7a3670154c32b9b2f7f455c85a5a841 100644 (file)
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -226,11 +226,14 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
   * wait_start is the timestamp when the caller started to wait.
   * now is the timestamp when this function has been called.
   * wait_list is the list of virtual transaction ids assigned to
- * conflicting processes.
+ * conflicting processes. still_waiting indicates whether
+ * the startup process is still waiting for the recovery conflict
+ * to be resolved or not.
   */
  void
  LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
-                                       TimestampTz now, VirtualTransactionId *wait_list)
+                                       TimestampTz now, VirtualTransactionId *wait_list,
+                                       bool still_waiting)
  {
         long            secs;
         int                     usecs;
@@ -238,6 +241,12 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
         StringInfoData buf;
         int                     nprocs = 0;
  
+       /*
+        * There must be no conflicting processes when the recovery conflict has
+        * already been resolved.
+        */
+       Assert(still_waiting || wait_list == NULL);
+
         TimestampDifference(wait_start, now, &secs, &usecs);
         msecs = secs * 1000 + usecs / 1000;
         usecs = usecs % 1000;
@@ -275,12 +284,21 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
          * conflicting backends in a detail message. Note that if all the backends
          * in the list are not active, no detail message is logged.
          */
-       ereport(LOG,
-                       errmsg("recovery still waiting after %ld.%03d ms: %s",
-                                  msecs, usecs, _(get_recovery_conflict_desc(reason))),
-                       nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
-                                                                                         "Conflicting processes: %s.",
-                                                                                         nprocs, buf.data) : 0);
+       if (still_waiting)
+       {
+               ereport(LOG,
+                               errmsg("recovery still waiting after %ld.%03d ms: %s",
+                                          msecs, usecs, _(get_recovery_conflict_desc(reason))),
+                               nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
+                                                                                                 "Conflicting processes: %s.",
+                                                                                                 nprocs, buf.data) : 0);
+       }
+       else
+       {
+               ereport(LOG,
+                               errmsg("recovery finished waiting after %ld.%03d ms: %s",
+                                          msecs, usecs, _(get_recovery_conflict_desc(reason))));
+       }
  
         if (nprocs > 0)
                 pfree(buf.data);
@@ -375,13 +393,12 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
  
                                 /*
                                  * Emit the log message if the startup process is waiting
-                                * longer than deadlock_timeout for recovery conflict on
-                                * buffer pin.
+                                * longer than deadlock_timeout for recovery conflict.
                                  */
                                 if (maybe_log_conflict &&
                                         TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout))
                                 {
-                                       LogRecoveryConflict(reason, waitStart, now, waitlist);
+                                       LogRecoveryConflict(reason, waitStart, now, waitlist, true);
                                         logged_recovery_conflict = true;
                                 }
                         }
@@ -391,6 +408,14 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
                 waitlist++;
         }
  
+       /*
+        * Emit the log message if recovery conflict was resolved but the startup
+        * process waited longer than deadlock_timeout for it.
+        */
+       if (logged_recovery_conflict)
+               LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(),
+                                                       NULL, false);
+
         /* Reset ps display if we changed it */
         if (new_status)
         {
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c

index db0cfaa360031dca712f4092765c0a68e6bc0f87..c87ffc654919a5e248dab25f9b4bde82b6a6d500 100644 (file)
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1322,7 +1322,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
                                          * longer than deadlock_timeout.
                                          */
                                         LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
-                                                                               standbyWaitStart, now, cnt > 0 ? vxids : NULL);
+                                                                               standbyWaitStart, now,
+                                                                               cnt > 0 ? vxids : NULL, true);
                                         logged_recovery_conflict = true;
                                 }
                         }
@@ -1607,6 +1608,15 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
                         disable_timeout(DEADLOCK_TIMEOUT, false);
         }
  
+       /*
+        * Emit the log message if recovery conflict on lock was resolved but the
+        * startup process waited longer than deadlock_timeout for it.
+        */
+       if (InHotStandby && logged_recovery_conflict)
+               LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
+                                                       standbyWaitStart, GetCurrentTimestamp(),
+                                                       NULL, false);
+
         /*
          * Re-acquire the lock table's partition lock.  We have to do this to hold
          * off cancel/die interrupts before we can mess with lockAwaited (else we
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h

index a0f3e0bdf09e14c0fe4521a1d8fd0d431436cceb..2b1f340b82b8cbbfdc0a902250fc0445ded32d3a 100644 (file)
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -40,7 +40,8 @@ extern void StandbyDeadLockHandler(void);
  extern void StandbyTimeoutHandler(void);
  extern void StandbyLockTimeoutHandler(void);
  extern void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
-                                                               TimestampTz cur_ts, VirtualTransactionId *wait_list);
+                                                               TimestampTz cur_ts, VirtualTransactionId *wait_list,
+                                                               bool still_waiting);
  
  /*
   * Standby Rmgr (RM_STANDBY_ID)
author	Fujii Masao <fujii@postgresql.org>
	Wed, 13 Jan 2021 13:59:17 +0000 (22:59 +0900)
committer	Fujii Masao <fujii@postgresql.org>
	Wed, 13 Jan 2021 13:59:17 +0000 (22:59 +0900)
doc/src/sgml/config.sgml		patch \| blob \| blame \| history
src/backend/storage/buffer/bufmgr.c		patch \| blob \| blame \| history
src/backend/storage/ipc/standby.c		patch \| blob \| blame \| history
src/backend/storage/lmgr/proc.c		patch \| blob \| blame \| history
src/include/storage/standby.h		patch \| blob \| blame \| history