* latch.c
* Routines for inter-process latches
*
- * The Unix implementation uses the so-called self-pipe trick to overcome
- * the race condition involved with select() and setting a global flag
- * in the signal handler. When a latch is set and the current process
- * is waiting for it, the signal handler wakes up the select() in
- * WaitLatch by writing a byte to a pipe. A signal by itself doesn't
- * interrupt select() on all platforms, and even on platforms where it
- * does, a signal that arrives just before the select() call does not
- * prevent the select() from entering sleep. An incoming byte on a pipe
- * however reliably interrupts the sleep, and causes select() to return
- * immediately even if the signal arrives before select() begins.
- *
- * (Actually, we prefer epoll_wait() over poll() over select() where
- * available, but the same comments apply.)
+ * The Unix implementation uses the so-called self-pipe trick to overcome the
+ * race condition involved with poll() (or epoll_wait() on linux) and setting
+ * a global flag in the signal handler. When a latch is set and the current
+ * process is waiting for it, the signal handler wakes up the poll() in
+ * WaitLatch by writing a byte to a pipe. A signal by itself doesn't interrupt
+ * poll() on all platforms, and even on platforms where it does, a signal that
+ * arrives just before the poll() call does not prevent poll() from entering
+ * sleep. An incoming byte on a pipe however reliably interrupts the sleep,
+ * and causes poll() to return immediately even if the signal arrives before
+ * poll() begins.
*
* When SetLatch is called from the same process that owns the latch,
* SetLatch writes the byte directly to the pipe. If it's owned by another
* process, SIGUSR1 is sent and the signal handler in the waiting process
* writes the byte to the pipe on behalf of the signaling process.
*
- * The Windows implementation uses Windows events that are inherited by
- * all postmaster child processes.
+ * The Windows implementation uses Windows events that are inherited by all
+ * postmaster child processes. There's no need for the self-pipe trick there.
*
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
#include <limits.h>
#include <signal.h>
#include <unistd.h>
-#include <sys/time.h>
#ifdef HAVE_SYS_EPOLL_H
#include <sys/epoll.h>
#endif
#ifdef HAVE_SYS_POLL_H
#include <sys/poll.h>
#endif
-#ifdef HAVE_SYS_SELECT_H
-#include <sys/select.h>
-#endif
#include "miscadmin.h"
#include "pgstat.h"
* define somewhere before this block.
*/
#if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \
- defined(WAIT_USE_SELECT) || defined(WAIT_USE_WIN32)
+ defined(WAIT_USE_WIN32)
/* don't overwrite manual choice */
#elif defined(HAVE_SYS_EPOLL_H)
#define WAIT_USE_EPOLL
#elif defined(HAVE_POLL)
#define WAIT_USE_POLL
-#elif HAVE_SYS_SELECT_H
-#define WAIT_USE_SELECT
#elif WIN32
#define WAIT_USE_WIN32
#else
/*
* Set up the self-pipe that allows a signal handler to wake up the
- * select() in WaitLatch. Make the write-end non-blocking, so that
- * SetLatch won't block if the event has already been set many times
+ * poll()/epoll_wait() in WaitLatch. Make the write-end non-blocking, so
+ * that SetLatch won't block if the event has already been set many times
* filling the kernel buffer. Make the read-end non-blocking too, so that
* we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK.
*/
/*
* See if anyone's waiting for the latch. It can be the current process if
- * we're in a signal handler. We use the self-pipe to wake up the select()
- * in that case. If it's another process, send a signal.
+ * we're in a signal handler. We use the self-pipe to wake up the
+ * poll()/epoll_wait() in that case. If it's another process, send a
+ * signal.
*
* Fetch owner_pid only once, in case the latch is concurrently getting
* owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
WaitEventAdjustEpoll(set, event, EPOLL_CTL_ADD);
#elif defined(WAIT_USE_POLL)
WaitEventAdjustPoll(set, event);
-#elif defined(WAIT_USE_SELECT)
- /* nothing to do */
#elif defined(WAIT_USE_WIN32)
WaitEventAdjustWin32(set, event);
#endif
WaitEventAdjustEpoll(set, event, EPOLL_CTL_MOD);
#elif defined(WAIT_USE_POLL)
WaitEventAdjustPoll(set, event);
-#elif defined(WAIT_USE_SELECT)
- /* nothing to do */
#elif defined(WAIT_USE_WIN32)
WaitEventAdjustWin32(set, event);
#endif
* because we don't expect the pipe to become readable or to have
* any errors either, treat those cases as postmaster death, too.
*
- * As explained in the WAIT_USE_SELECT implementation, select(2)
- * may spuriously return. Be paranoid about that here too, a
- * spurious WL_POSTMASTER_DEATH would be painful.
+ * Be paranoid about a spurious event signalling the postmaster as
+ * being dead. There have been reports about that happening with
+ * older primitives (select(2) to be specific), and a spurious
+ * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
+ * cost much.
*/
if (!PostmasterIsAlive())
{
* we don't expect the pipe to become readable or to have any
* errors either, treat those cases as postmaster death, too.
*
- * As explained in the WAIT_USE_SELECT implementation, select(2)
- * may spuriously return. Be paranoid about that here too, a
- * spurious WL_POSTMASTER_DEATH would be painful.
+ * Be paranoid about a spurious event signalling the postmaster as
+ * being dead. There have been reports about that happening with
+ * older primitives (select(2) to be specific), and a spurious
+ * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
+ * cost much.
*/
if (!PostmasterIsAlive())
{
return returned_events;
}
-#elif defined(WAIT_USE_SELECT)
-
-/*
- * Wait using select(2).
- *
- * XXX: On at least older linux kernels select(), in violation of POSIX,
- * doesn't reliably return a socket as writable if closed - but we rely on
- * that. So far all the known cases of this problem are on platforms that also
- * provide a poll() implementation without that bug. If we find one where
- * that's not the case, we'll need to add a workaround.
- */
-static inline int
-WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
- WaitEvent *occurred_events, int nevents)
-{
- int returned_events = 0;
- int rc;
- WaitEvent *cur_event;
- fd_set input_mask;
- fd_set output_mask;
- int hifd;
- struct timeval tv;
- struct timeval *tvp = NULL;
-
- FD_ZERO(&input_mask);
- FD_ZERO(&output_mask);
-
- /*
- * Prepare input/output masks. We do so every loop iteration as there's no
- * entirely portable way to copy fd_sets.
- */
- for (cur_event = set->events;
- cur_event < (set->events + set->nevents);
- cur_event++)
- {
- if (cur_event->events == WL_LATCH_SET)
- FD_SET(cur_event->fd, &input_mask);
- else if (cur_event->events == WL_POSTMASTER_DEATH)
- FD_SET(cur_event->fd, &input_mask);
- else
- {
- Assert(cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE));
- if (cur_event->events == WL_SOCKET_READABLE)
- FD_SET(cur_event->fd, &input_mask);
- else if (cur_event->events == WL_SOCKET_WRITEABLE)
- FD_SET(cur_event->fd, &output_mask);
- }
-
- if (cur_event->fd > hifd)
- hifd = cur_event->fd;
- }
-
- /* Sleep */
- if (cur_timeout >= 0)
- {
- tv.tv_sec = cur_timeout / 1000L;
- tv.tv_usec = (cur_timeout % 1000L) * 1000L;
- tvp = &tv;
- }
- rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp);
-
- /* Check return code */
- if (rc < 0)
- {
- /* EINTR is okay, otherwise complain */
- if (errno != EINTR)
- {
- waiting = false;
- ereport(ERROR,
- (errcode_for_socket_access(),
- errmsg("select() failed: %m")));
- }
- return 0; /* retry */
- }
- else if (rc == 0)
- {
- /* timeout exceeded */
- return -1;
- }
-
- /*
- * To associate events with select's masks, we have to check the status of
- * the file descriptors associated with an event; by looping through all
- * events.
- */
- for (cur_event = set->events;
- cur_event < (set->events + set->nevents)
- && returned_events < nevents;
- cur_event++)
- {
- occurred_events->pos = cur_event->pos;
- occurred_events->user_data = cur_event->user_data;
- occurred_events->events = 0;
-
- if (cur_event->events == WL_LATCH_SET &&
- FD_ISSET(cur_event->fd, &input_mask))
- {
- /* There's data in the self-pipe, clear it. */
- drainSelfPipe();
-
- if (set->latch->is_set)
- {
- occurred_events->fd = PGINVALID_SOCKET;
- occurred_events->events = WL_LATCH_SET;
- occurred_events++;
- returned_events++;
- }
- }
- else if (cur_event->events == WL_POSTMASTER_DEATH &&
- FD_ISSET(cur_event->fd, &input_mask))
- {
- /*
- * According to the select(2) man page on Linux, select(2) may
- * spuriously return and report a file descriptor as readable,
- * when it's not; and presumably so can poll(2). It's not clear
- * that the relevant cases would ever apply to the postmaster
- * pipe, but since the consequences of falsely returning
- * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
- * trouble to positively verify EOF with PostmasterIsAlive().
- */
- if (!PostmasterIsAlive())
- {
- occurred_events->fd = PGINVALID_SOCKET;
- occurred_events->events = WL_POSTMASTER_DEATH;
- occurred_events++;
- returned_events++;
- }
- }
- else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
- {
- Assert(cur_event->fd != PGINVALID_SOCKET);
-
- if ((cur_event->events & WL_SOCKET_READABLE) &&
- FD_ISSET(cur_event->fd, &input_mask))
- {
- /* data available in socket, or EOF */
- occurred_events->events |= WL_SOCKET_READABLE;
- }
-
- if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
- FD_ISSET(cur_event->fd, &output_mask))
- {
- /* socket is writeable, or EOF */
- occurred_events->events |= WL_SOCKET_WRITEABLE;
- }
-
- if (occurred_events->events != 0)
- {
- occurred_events->fd = cur_event->fd;
- occurred_events++;
- returned_events++;
- }
- }
- }
- return returned_events;
-}
-
#elif defined(WAIT_USE_WIN32)
/*