summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorThomas Munro2023-09-07 00:38:23 +0000
committerThomas Munro2023-09-07 00:39:24 +0000
commit0da096d78e1e49645ff9baf6e425d3c47c5a5dc0 (patch)
tree35b0869e2c7437a92b5d86615386635184d65466 /src/include
parent8c16ad3b43299695f203f9157a2b27c22b9ed634 (diff)
Fix recovery conflict SIGUSR1 handling.
We shouldn't be doing non-trivial work in signal handlers in general, and in this case the handler could reach unsafe code and corrupt state. It also clobbered its own "reason" code. Move all recovery conflict decision logic into the next CHECK_FOR_INTERRUPTS(), and have the signal handler just set flags and the latch, following the standard pattern. Since there are several different "reasons", use a separate flag for each. With this refactoring, the recovery conflict system no longer piggy-backs on top of the regular query cancelation mechanism, but instead raises an error directly if it decides that is necessary. It still needs to respect QueryCancelHoldoffCount, because otherwise the FEBE protocol might get out of sync (see commit 2b3a8b20c2d). This fixes one class of intermittent failure in the new 031_recovery_conflict.pl test added by commit 9f8a050f, though the buggy coding is much older. Failures outside contrived testing seem to be very rare (or perhaps incorrectly attributed) in the field, based on lack of reports. No back-patch for now due to complexity and release schedule. We have the option to back-patch into 16 later, as 16 has prerequisite commit bea3d7e. Reviewed-by: Andres Freund <andres@anarazel.de> (earlier version) Reviewed-by: Michael Paquier <michael@paquier.xyz> (earlier version) Reviewed-by: Robert Haas <robertmhaas@gmail.com> (earlier version) Tested-by: Christoph Berg <myon@debian.org> Discussion: https://postgr.es/m/CA%2BhUKGK3PGKwcKqzoosamn36YW-fsuTdOPPF1i_rtEO%3DnEYKSg%40mail.gmail.com Discussion: https://postgr.es/m/CALj2ACVr8au2J_9D88UfRCi0JdWhyQDDxAcSVav0B0irx9nXEg%40mail.gmail.com
Diffstat (limited to 'src/include')
-rw-r--r--src/include/storage/procsignal.h4
-rw-r--r--src/include/tcop/tcopprot.h3
2 files changed, 4 insertions, 3 deletions
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
index 2f52100b009..3a3a7eca77d 100644
--- a/src/include/storage/procsignal.h
+++ b/src/include/storage/procsignal.h
@@ -38,13 +38,15 @@ typedef enum
PROCSIG_PARALLEL_APPLY_MESSAGE, /* Message from parallel apply workers */
/* Recovery conflict reasons */
- PROCSIG_RECOVERY_CONFLICT_DATABASE,
+ PROCSIG_RECOVERY_CONFLICT_FIRST,
+ PROCSIG_RECOVERY_CONFLICT_DATABASE = PROCSIG_RECOVERY_CONFLICT_FIRST,
PROCSIG_RECOVERY_CONFLICT_TABLESPACE,
PROCSIG_RECOVERY_CONFLICT_LOCK,
PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT,
PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
+ PROCSIG_RECOVERY_CONFLICT_LAST = PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
NUM_PROCSIGNALS /* Must be last! */
} ProcSignalReason;
diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h
index abd7b4fff33..ab43b638eea 100644
--- a/src/include/tcop/tcopprot.h
+++ b/src/include/tcop/tcopprot.h
@@ -70,8 +70,7 @@ extern void die(SIGNAL_ARGS);
extern void quickdie(SIGNAL_ARGS) pg_attribute_noreturn();
extern void StatementCancelHandler(SIGNAL_ARGS);
extern void FloatExceptionHandler(SIGNAL_ARGS) pg_attribute_noreturn();
-extern void RecoveryConflictInterrupt(ProcSignalReason reason); /* called from SIGUSR1
- * handler */
+extern void HandleRecoveryConflictInterrupt(ProcSignalReason reason);
extern void ProcessClientReadInterrupt(bool blocked);
extern void ProcessClientWriteInterrupt(bool blocked);