diff options
| author | Heikki Linnakangas | 2011-07-08 15:27:49 +0000 |
|---|---|---|
| committer | Heikki Linnakangas | 2011-07-08 15:44:07 +0000 |
| commit | 89fd72cbf26f5d2e3d86ab19c1ead73ab8fac0fe (patch) | |
| tree | fa0605f7ec1ff3a798b1c22fd847185158288a8b /src/backend/postmaster | |
| parent | 9598afa3b0f7a7fdcf3740173346950b2bd5942c (diff) | |
Introduce a pipe between postmaster and each backend, which can be used to
detect postmaster death. Postmaster keeps the write-end of the pipe open,
so when it dies, children get EOF in the read-end. That can conveniently
be waited for in select(), which allows eliminating some of the polling
loops that check for postmaster death. This patch doesn't yet change all
the loops to use the new mechanism, expect a follow-on patch to do that.
This changes the interface to WaitLatch, so that it takes as argument a
bitmask of events that it waits for. Possible events are latch set, timeout,
postmaster death, and socket becoming readable or writeable.
The pipe method behaves slightly differently from the kill() method
previously used in PostmasterIsAlive() in the case that postmaster has died,
but its parent has not yet read its exit code with waitpid(). The pipe
returns EOF as soon as the process dies, but kill() continues to return
true until waitpid() has been called (IOW while the process is a zombie).
Because of that, change PostmasterIsAlive() to use the pipe too, otherwise
WaitLatch() would return immediately with WL_POSTMASTER_DEATH, while
PostmasterIsAlive() would claim it's still alive. That could easily lead to
busy-waiting while postmaster is in zombie state.
Peter Geoghegan with further changes by me, reviewed by Fujii Masao and
Florian Pflug.
Diffstat (limited to 'src/backend/postmaster')
| -rw-r--r-- | src/backend/postmaster/autovacuum.c | 4 | ||||
| -rw-r--r-- | src/backend/postmaster/bgwriter.c | 2 | ||||
| -rw-r--r-- | src/backend/postmaster/pgarch.c | 61 | ||||
| -rw-r--r-- | src/backend/postmaster/pgstat.c | 2 | ||||
| -rw-r--r-- | src/backend/postmaster/postmaster.c | 106 | ||||
| -rw-r--r-- | src/backend/postmaster/walwriter.c | 2 |
6 files changed, 136 insertions, 41 deletions
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 483a82951b0..2f3fcbf0409 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -556,7 +556,7 @@ AutoVacLauncherMain(int argc, char *argv[]) * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ - if (!PostmasterIsAlive(true)) + if (!PostmasterIsAlive()) proc_exit(1); launcher_determine_sleep((AutoVacuumShmem->av_freeWorkers != NULL), @@ -593,7 +593,7 @@ AutoVacLauncherMain(int argc, char *argv[]) * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ - if (!PostmasterIsAlive(true)) + if (!PostmasterIsAlive()) proc_exit(1); if (got_SIGTERM || got_SIGHUP || got_SIGUSR2) diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 5643ec821af..14e592d7bca 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -381,7 +381,7 @@ BackgroundWriterMain(void) * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ - if (!PostmasterIsAlive(true)) + if (!PostmasterIsAlive()) exit(1); /* diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index b40375aaaa5..2070fbb375b 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -40,6 +40,7 @@ #include "postmaster/postmaster.h" #include "storage/fd.h" #include "storage/ipc.h" +#include "storage/latch.h" #include "storage/pg_shmem.h" #include "storage/pmsignal.h" #include "utils/guc.h" @@ -87,6 +88,11 @@ static volatile sig_atomic_t got_SIGTERM = false; static volatile sig_atomic_t wakened = false; static volatile sig_atomic_t ready_to_stop = false; +/* + * Latch used by signal handlers to wake up the sleep in the main loop. + */ +static Latch mainloop_latch; + /* ---------- * Local function forward declarations * ---------- @@ -228,6 +234,8 @@ PgArchiverMain(int argc, char *argv[]) MyProcPid = getpid(); /* reset MyProcPid */ + InitLatch(&mainloop_latch); /* initialize latch used in main loop */ + MyStartTime = time(NULL); /* record Start Time for logging */ /* @@ -282,6 +290,8 @@ ArchSigHupHandler(SIGNAL_ARGS) { /* set flag to re-read config file at next convenient time */ got_SIGHUP = true; + /* let the waiting loop iterate */ + SetLatch(&mainloop_latch); } /* SIGTERM signal handler for archiver process */ @@ -295,6 +305,8 @@ ArchSigTermHandler(SIGNAL_ARGS) * archive commands. */ got_SIGTERM = true; + /* let the waiting loop iterate */ + SetLatch(&mainloop_latch); } /* SIGUSR1 signal handler for archiver process */ @@ -303,6 +315,8 @@ pgarch_waken(SIGNAL_ARGS) { /* set flag that there is work to be done */ wakened = true; + /* let the waiting loop iterate */ + SetLatch(&mainloop_latch); } /* SIGUSR2 signal handler for archiver process */ @@ -311,6 +325,8 @@ pgarch_waken_stop(SIGNAL_ARGS) { /* set flag to do a final cycle and shut down afterwards */ ready_to_stop = true; + /* let the waiting loop iterate */ + SetLatch(&mainloop_latch); } /* @@ -321,7 +337,7 @@ pgarch_waken_stop(SIGNAL_ARGS) static void pgarch_MainLoop(void) { - time_t last_copy_time = 0; + pg_time_t last_copy_time = 0; bool time_to_stop; /* @@ -332,8 +348,15 @@ pgarch_MainLoop(void) */ wakened = true; + /* + * There shouldn't be anything for the archiver to do except to wait + * for a signal ... however, the archiver exists to protect our data, + * so she wakes up occasionally to allow herself to be proactive. + */ do { + ResetLatch(&mainloop_latch); + /* When we get SIGUSR2, we do one more archive cycle, then exit */ time_to_stop = ready_to_stop; @@ -371,24 +394,26 @@ pgarch_MainLoop(void) } /* - * There shouldn't be anything for the archiver to do except to wait - * for a signal ... however, the archiver exists to protect our data, - * so she wakes up occasionally to allow herself to be proactive. - * - * On some platforms, signals won't interrupt the sleep. To ensure we - * respond reasonably promptly when someone signals us, break down the - * sleep into 1-second increments, and check for interrupts after each - * nap. + * Sleep until a signal is received, or until a poll is forced by + * PGARCH_AUTOWAKE_INTERVAL having passed since last_copy_time, or + * until postmaster dies. */ - while (!(wakened || ready_to_stop || got_SIGHUP || - !PostmasterIsAlive(true))) + if (!time_to_stop) /* Don't wait during last iteration */ { - time_t curtime; + pg_time_t curtime = (pg_time_t) time(NULL); + int timeout; - pg_usleep(1000000L); - curtime = time(NULL); - if ((unsigned int) (curtime - last_copy_time) >= - (unsigned int) PGARCH_AUTOWAKE_INTERVAL) + timeout = PGARCH_AUTOWAKE_INTERVAL - (curtime - last_copy_time); + if (timeout > 0) + { + int rc; + rc = WaitLatch(&mainloop_latch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + timeout * 1000000L); + if (rc & WL_TIMEOUT) + wakened = true; + } + else wakened = true; } @@ -397,7 +422,7 @@ pgarch_MainLoop(void) * or after completing one more archiving cycle after receiving * SIGUSR2. */ - } while (PostmasterIsAlive(true) && !time_to_stop); + } while (PostmasterIsAlive() && !time_to_stop); } /* @@ -429,7 +454,7 @@ pgarch_ArchiverCopyLoop(void) * command, and the second is to avoid conflicts with another * archiver spawned by a newer postmaster. */ - if (got_SIGTERM || !PostmasterIsAlive(true)) + if (got_SIGTERM || !PostmasterIsAlive()) return; /* diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 1d80c311d87..28c90dcac9d 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -3111,7 +3111,7 @@ PgstatCollectorMain(int argc, char *argv[]) * We can only get here if the select/poll timeout elapsed. Check * for postmaster death. */ - if (!PostmasterIsAlive(true)) + if (!PostmasterIsAlive()) break; } } /* end of message-processing loop */ diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index bd0039e2626..dca5efc382f 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -368,6 +368,7 @@ static int CountChildren(int target); static bool CreateOptsFile(int argc, char *argv[], char *fullprogname); static pid_t StartChildProcess(AuxProcType type); static void StartAutovacuumWorker(void); +static void InitPostmasterDeathWatchHandle(void); #ifdef EXEC_BACKEND @@ -383,8 +384,6 @@ typedef struct HANDLE procHandle; DWORD procId; } win32_deadchild_waitinfo; - -HANDLE PostmasterHandle; #endif static pid_t backend_forkexec(Port *port); @@ -439,6 +438,7 @@ typedef struct HANDLE initial_signal_pipe; HANDLE syslogPipe[2]; #else + int postmaster_alive_fds[2]; int syslogPipe[2]; #endif char my_exec_path[MAXPGPATH]; @@ -469,6 +469,16 @@ static void ShmemBackendArrayRemove(Backend *bn); #define EXIT_STATUS_0(st) ((st) == 0) #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1) +#ifndef WIN32 +/* + * File descriptors for pipe used to monitor if postmaster is alive. + * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN. + */ +int postmaster_alive_fds[2] = { -1, -1 }; +#else +/* Process handle of postmaster used for the same purpose on Windows */ +HANDLE PostmasterHandle; +#endif /* * Postmaster main entry point @@ -962,8 +972,13 @@ PostmasterMain(int argc, char *argv[]) */ BackendList = DLNewList(); -#ifdef WIN32 + /* + * Initialize pipe (or process handle on Windows) that allows children to + * wake up from sleep on postmaster death. + */ + InitPostmasterDeathWatchHandle(); +#ifdef WIN32 /* * Initialize I/O completion port used to deliver list of dead children. */ @@ -971,21 +986,6 @@ PostmasterMain(int argc, char *argv[]) if (win32ChildQueue == NULL) ereport(FATAL, (errmsg("could not create I/O completion port for child queue"))); - - /* - * Set up a handle that child processes can use to check whether the - * postmaster is still running. - */ - if (DuplicateHandle(GetCurrentProcess(), - GetCurrentProcess(), - GetCurrentProcess(), - &PostmasterHandle, - 0, - TRUE, - DUPLICATE_SAME_ACCESS) == 0) - ereport(FATAL, - (errmsg_internal("could not duplicate postmaster handle: error code %d", - (int) GetLastError()))); #endif /* @@ -1965,6 +1965,19 @@ ClosePostmasterPorts(bool am_syslogger) { int i; +#ifndef WIN32 + /* + * Close the write end of postmaster death watch pipe. It's important to + * do this as early as possible, so that if postmaster dies, others won't + * think that it's still running because we're holding the pipe open. + */ + if (close(postmaster_alive_fds[POSTMASTER_FD_OWN])) + ereport(FATAL, + (errcode_for_file_access(), + errmsg_internal("could not close postmaster death monitoring pipe in child process: %m"))); + postmaster_alive_fds[POSTMASTER_FD_OWN] = -1; +#endif + /* Close the listen sockets */ for (i = 0; i < MAXLISTEN; i++) { @@ -4643,6 +4656,9 @@ save_backend_variables(BackendParameters *param, Port *port, pgwin32_create_signal_listener(childPid), childProcess)) return false; +#else + memcpy(¶m->postmaster_alive_fds, &postmaster_alive_fds, + sizeof(postmaster_alive_fds)); #endif memcpy(¶m->syslogPipe, &syslogPipe, sizeof(syslogPipe)); @@ -4858,6 +4874,9 @@ restore_backend_variables(BackendParameters *param, Port *port) #ifdef WIN32 PostmasterHandle = param->PostmasterHandle; pgwin32_initial_signal_pipe = param->initial_signal_pipe; +#else + memcpy(&postmaster_alive_fds, ¶m->postmaster_alive_fds, + sizeof(postmaster_alive_fds)); #endif memcpy(&syslogPipe, ¶m->syslogPipe, sizeof(syslogPipe)); @@ -4979,3 +4998,54 @@ pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired) } #endif /* WIN32 */ + +/* + * Initialize one and only handle for monitoring postmaster death. + * + * Called once in the postmaster, so that child processes can subsequently + * monitor if their parent is dead. + */ +static void +InitPostmasterDeathWatchHandle(void) +{ +#ifndef WIN32 + /* + * Create a pipe. Postmaster holds the write end of the pipe open + * (POSTMASTER_FD_OWN), and children hold the read end. Children can + * pass the read file descriptor to select() to wake up in case postmaster + * dies, or check for postmaster death with a (read() == 0). Children must + * close the write end as soon as possible after forking, because EOF + * won't be signaled in the read end until all processes have closed the + * write fd. That is taken care of in ClosePostmasterPorts(). + */ + Assert(MyProcPid == PostmasterPid); + if (pipe(postmaster_alive_fds)) + ereport(FATAL, + (errcode_for_file_access(), + errmsg_internal("could not create pipe to monitor postmaster death: %m"))); + + /* + * Set O_NONBLOCK to allow testing for the fd's presence with a read() + * call. + */ + if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK)) + ereport(FATAL, + (errcode_for_socket_access(), + errmsg_internal("could not set postmaster death monitoring pipe to non-blocking mode: %m"))); + +#else + /* + * On Windows, we use a process handle for the same purpose. + */ + if (DuplicateHandle(GetCurrentProcess(), + GetCurrentProcess(), + GetCurrentProcess(), + &PostmasterHandle, + 0, + TRUE, + DUPLICATE_SAME_ACCESS) == 0) + ereport(FATAL, + (errmsg_internal("could not duplicate postmaster handle: error code %d", + (int) GetLastError()))); +#endif /* WIN32 */ +} diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c index d0d7c9bebf0..141167786d9 100644 --- a/src/backend/postmaster/walwriter.c +++ b/src/backend/postmaster/walwriter.c @@ -227,7 +227,7 @@ WalWriterMain(void) * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ - if (!PostmasterIsAlive(true)) + if (!PostmasterIsAlive()) exit(1); /* |
