summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2006-11-21 00:49:55 +0000
committerTom Lane2006-11-21 00:49:55 +0000
commite82d9e6283d6ca19d1ea7547e7e9ae8399471e1a (patch)
treeb6f413821ac86b98dad1dacc461dd6a70a570696
parent778bb7b60d8b36029f9e874a1b38428115bef0d1 (diff)
Adjust elog.c so that elog(FATAL) exits (including cases where ERROR is
promoted to FATAL) end in exit(1) not exit(0). Then change the postmaster to allow exit(1) without a system-wide panic, but not for the startup subprocess or the bgwriter. There were a couple of places that were using exit(1) to deliberately force a system-wide panic; adjust these to be exit(2) instead. This fixes the problem noted back in July that if the startup process exits with elog(ERROR), the postmaster would think everything is hunky-dory and proceed to start up. Alternative solutions such as trying to run the entire startup process as a critical section seem less clean, primarily because of the fact that a fair amount of startup code is shared by all postmaster children in the EXEC_BACKEND case. We'd need an ugly special case somewhere near the head of main.c to make it work if it's the child process's responsibility to determine what happens; and what's the point when the postmaster already treats different children differently?
-rw-r--r--src/backend/bootstrap/bootstrap.c9
-rw-r--r--src/backend/postmaster/bgwriter.c6
-rw-r--r--src/backend/postmaster/postmaster.c38
-rw-r--r--src/backend/tcop/postgres.c10
-rw-r--r--src/backend/utils/error/elog.c16
5 files changed, 39 insertions, 40 deletions
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 70b9172cd7f..b44c501cc69 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.225 2006/10/04 00:29:49 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.226 2006/11/21 00:49:54 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -421,15 +421,8 @@ BootstrapMain(int argc, char *argv[])
case BS_XLOG_STARTUP:
bootstrap_signals();
StartupXLOG();
-
- /*
- * These next two functions don't consider themselves critical,
- * but we'd best PANIC anyway if they fail.
- */
- START_CRIT_SECTION();
LoadFreeSpaceMap();
BuildFlatFiles(false);
- END_CRIT_SECTION();
proc_exit(0); /* startup done */
case BS_XLOG_BGWRITER:
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index e3a4925b911..f2cb3ff68de 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.29 2006/10/06 17:13:59 petere Exp $
+ * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.30 2006/11/21 00:49:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -503,12 +503,12 @@ bg_quickdie(SIGNAL_ARGS)
* corrupted, so we don't want to try to clean up our transaction. Just
* nail the windows shut and get out of town.
*
- * Note we do exit(1) not exit(0). This is to force the postmaster into a
+ * Note we do exit(2) not exit(0). This is to force the postmaster into a
* system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
* backend. This is necessary precisely because we don't clean up our
* shared memory state.
*/
- exit(1);
+ exit(2);
}
/* SIGHUP: set flag to re-read config file at next convenient time */
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index caf7d9a82d4..9d7b6065f46 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.501 2006/11/05 22:42:09 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.502 2006/11/21 00:49:55 tgl Exp $
*
* NOTES
*
@@ -358,6 +358,10 @@ static void ShmemBackendArrayRemove(pid_t pid);
#define StartupDataBase() StartChildProcess(BS_XLOG_STARTUP)
#define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
+/* Macros to check exit status of a child process */
+#define EXIT_STATUS_0(st) ((st) == 0)
+#define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
+
/*
* Postmaster main entry point
@@ -2025,7 +2029,8 @@ reaper(SIGNAL_ARGS)
if (StartupPID != 0 && pid == StartupPID)
{
StartupPID = 0;
- if (exitstatus != 0)
+ /* Note: FATAL exit of startup is treated as catastrophic */
+ if (!EXIT_STATUS_0(exitstatus))
{
LogChildExit(LOG, _("startup process"),
pid, exitstatus);
@@ -2078,7 +2083,8 @@ reaper(SIGNAL_ARGS)
if (BgWriterPID != 0 && pid == BgWriterPID)
{
BgWriterPID = 0;
- if (exitstatus == 0 && Shutdown > NoShutdown && !FatalError &&
+ if (EXIT_STATUS_0(exitstatus) &&
+ Shutdown > NoShutdown && !FatalError &&
!DLGetHead(BackendList) && AutoVacPID == 0)
{
/*
@@ -2096,7 +2102,8 @@ reaper(SIGNAL_ARGS)
}
/*
- * Any unexpected exit of the bgwriter is treated as a crash.
+ * Any unexpected exit of the bgwriter (including FATAL exit)
+ * is treated as a crash.
*/
HandleChildCrash(pid, exitstatus,
_("background writer process"));
@@ -2104,15 +2111,16 @@ reaper(SIGNAL_ARGS)
}
/*
- * Was it the autovacuum process? Normal exit can be ignored; we'll
- * start a new one at the next iteration of the postmaster's main
- * loop, if necessary. An unexpected exit is treated as a crash.
+ * Was it the autovacuum process? Normal or FATAL exit can be
+ * ignored; we'll start a new one at the next iteration of the
+ * postmaster's main loop, if necessary. Any other exit condition
+ * is treated as a crash.
*/
if (AutoVacPID != 0 && pid == AutoVacPID)
{
AutoVacPID = 0;
autovac_stopped();
- if (exitstatus != 0)
+ if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
HandleChildCrash(pid, exitstatus,
_("autovacuum process"));
continue;
@@ -2126,7 +2134,7 @@ reaper(SIGNAL_ARGS)
if (PgArchPID != 0 && pid == PgArchPID)
{
PgArchPID = 0;
- if (exitstatus != 0)
+ if (!EXIT_STATUS_0(exitstatus))
LogChildExit(LOG, _("archiver process"),
pid, exitstatus);
if (XLogArchivingActive() &&
@@ -2143,7 +2151,7 @@ reaper(SIGNAL_ARGS)
if (PgStatPID != 0 && pid == PgStatPID)
{
PgStatPID = 0;
- if (exitstatus != 0)
+ if (!EXIT_STATUS_0(exitstatus))
LogChildExit(LOG, _("statistics collector process"),
pid, exitstatus);
if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
@@ -2157,7 +2165,7 @@ reaper(SIGNAL_ARGS)
SysLoggerPID = 0;
/* for safety's sake, launch new logger *first* */
SysLoggerPID = SysLogger_Start();
- if (exitstatus != 0)
+ if (!EXIT_STATUS_0(exitstatus))
LogChildExit(LOG, _("system logger process"),
pid, exitstatus);
continue;
@@ -2229,12 +2237,12 @@ CleanupBackend(int pid,
LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
/*
- * If a backend dies in an ugly way (i.e. exit status not 0) then we must
- * signal all other backends to quickdie. If exit status is zero we
- * assume everything is hunky dory and simply remove the backend from the
+ * If a backend dies in an ugly way then we must signal all other backends
+ * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
+ * assume everything is all right and simply remove the backend from the
* active backend list.
*/
- if (exitstatus != 0)
+ if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
{
HandleChildCrash(pid, exitstatus, _("server process"));
return;
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index d019864fea4..8b014c887f5 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.516 2006/10/19 19:52:22 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.517 2006/11/21 00:49:55 tgl Exp $
*
* NOTES
* this is the "main" module of the postgres backend and
@@ -2327,12 +2327,12 @@ quickdie(SIGNAL_ARGS)
* corrupted, so we don't want to try to clean up our transaction. Just
* nail the windows shut and get out of town.
*
- * Note we do exit(1) not exit(0). This is to force the postmaster into a
+ * Note we do exit(2) not exit(0). This is to force the postmaster into a
* system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
* backend. This is necessary precisely because we don't clean up our
* shared memory state.
*/
- exit(1);
+ exit(2);
}
/*
@@ -2374,7 +2374,7 @@ die(SIGNAL_ARGS)
/*
* Timeout or shutdown signal from postmaster during client authentication.
- * Simply exit(0).
+ * Simply exit(1).
*
* XXX: possible future improvement: try to send a message indicating
* why we are disconnecting. Problem is to be sure we don't block while
@@ -2383,7 +2383,7 @@ die(SIGNAL_ARGS)
void
authdie(SIGNAL_ARGS)
{
- exit(0);
+ exit(1);
}
/*
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 61c413b4be3..6b6a9c51cf7 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -42,7 +42,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.175 2006/10/01 22:08:18 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.176 2006/11/21 00:49:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -421,25 +421,23 @@ errfinish(int dummy,...)
* fflush here is just to improve the odds that we get to see the
* error message, in case things are so hosed that proc_exit crashes.
* Any other code you might be tempted to add here should probably be
- * in an on_proc_exit callback instead.
+ * in an on_proc_exit or on_shmem_exit callback instead.
*/
fflush(stdout);
fflush(stderr);
/*
- * If proc_exit is already running, we exit with nonzero exit code to
- * indicate that something's pretty wrong. We also want to exit with
- * nonzero exit code if not running under the postmaster (for example,
- * if we are being run from the initdb script, we'd better return an
- * error status).
+ * Do normal process-exit cleanup, then return exit code 1 to indicate
+ * FATAL termination. The postmaster may or may not consider this
+ * worthy of panic, depending on which subprocess returns it.
*/
- proc_exit(proc_exit_inprogress || !IsUnderPostmaster);
+ proc_exit(1);
}
if (elevel >= PANIC)
{
/*
- * Serious crash time. Postmaster will observe nonzero process exit
+ * Serious crash time. Postmaster will observe SIGABRT process exit
* status and kill the other backends too.
*
* XXX: what if we are *in* the postmaster? abort() won't kill our