Skip to content

Commit f3a4d7e

Browse files
committed
Distinguish wait-for-connection from wait-for-write-ready on Windows.
The API for WaitLatch and friends followed the Unix convention in which waiting for a socket connection to complete is identical to waiting for the socket to accept a write. While Windows provides a select(2) emulation that agrees with that, the native WaitForMultipleObjects API treats them as quite different --- and for some bizarre reason, it will report a not-yet-connected socket as write-ready. libpq itself has so far escaped dealing with this because it waits with select(), but in libpqwalreceiver.c we want to wait using WaitLatchOrSocket. The semantics mismatch resulted in replication connection failures on Windows, but only for remote connections (apparently, localhost connections complete immediately, or at least too fast for anyone to have noticed the problem in single-machine testing). To fix, introduce an additional WL_SOCKET_CONNECTED wait flag for WaitLatchOrSocket, which is identical to WL_SOCKET_WRITEABLE on non-Windows, but results in waiting for FD_CONNECT events on Windows. Ideally, we would also distinguish the two conditions in the API for PQconnectPoll(), but changing that API at this point seems infeasible. Instead, cheat by checking for PQstatus() == CONNECTION_STARTED to determine that we're still waiting for the connection to complete. (This is a cheat mainly because CONNECTION_STARTED is documented as an internal state rather than something callers should rely on. Perhaps we ought to change the documentation ... but this patch doesn't.) Per reports from Jobin Augustine and Igor Neyman. Back-patch to v10 where commit 1e8a850 exposed this longstanding shortcoming. Andres Freund, minor fix and some code review/beautification by me Discussion: https://postgr.es/m/CAHBggj8g2T+ZDcACZ2FmzX9CTxkWjKBsHd6NkYB4i9Ojf6K1Fw@mail.gmail.com
1 parent 480f1f4 commit f3a4d7e

File tree

3 files changed

+47
-26
lines changed

3 files changed

+47
-26
lines changed

src/backend/replication/libpqwalreceiver/libpqwalreceiver.c

+9-4
Original file line numberDiff line numberDiff line change
@@ -168,13 +168,18 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname,
168168
status = PGRES_POLLING_WRITING;
169169
do
170170
{
171-
/* Wait for socket ready and/or other events. */
172171
int io_flag;
173172
int rc;
174173

175-
io_flag = (status == PGRES_POLLING_READING
176-
? WL_SOCKET_READABLE
177-
: WL_SOCKET_WRITEABLE);
174+
if (status == PGRES_POLLING_READING)
175+
io_flag = WL_SOCKET_READABLE;
176+
#ifdef WIN32
177+
/* Windows needs a different test while waiting for connection-made */
178+
else if (PQstatus(conn->streamConn) == CONNECTION_STARTED)
179+
io_flag = WL_SOCKET_CONNECTED;
180+
#endif
181+
else
182+
io_flag = WL_SOCKET_WRITEABLE;
178183

179184
rc = WaitLatchOrSocket(MyLatch,
180185
WL_POSTMASTER_DEATH |

src/backend/storage/ipc/latch.c

+28-22
Original file line numberDiff line numberDiff line change
@@ -344,9 +344,9 @@ WaitLatch(volatile Latch *latch, int wakeEvents, long timeout,
344344
* Like WaitLatch, but with an extra socket argument for WL_SOCKET_*
345345
* conditions.
346346
*
347-
* When waiting on a socket, EOF and error conditions are reported by
348-
* returning the socket as readable/writable or both, depending on
349-
* WL_SOCKET_READABLE/WL_SOCKET_WRITEABLE being specified.
347+
* When waiting on a socket, EOF and error conditions always cause the socket
348+
* to be reported as readable/writable/connected, so that the caller can deal
349+
* with the condition.
350350
*
351351
* NB: These days this is just a wrapper around the WaitEventSet API. When
352352
* using a latch very frequently, consider creating a longer living
@@ -374,11 +374,11 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
374374
AddWaitEventToSet(set, WL_POSTMASTER_DEATH, PGINVALID_SOCKET,
375375
NULL, NULL);
376376

377-
if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
377+
if (wakeEvents & WL_SOCKET_MASK)
378378
{
379379
int ev;
380380

381-
ev = wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);
381+
ev = wakeEvents & WL_SOCKET_MASK;
382382
AddWaitEventToSet(set, ev, sock, NULL, NULL);
383383
}
384384

@@ -390,8 +390,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
390390
{
391391
ret |= event.events & (WL_LATCH_SET |
392392
WL_POSTMASTER_DEATH |
393-
WL_SOCKET_READABLE |
394-
WL_SOCKET_WRITEABLE);
393+
WL_SOCKET_MASK);
395394
}
396395

397396
FreeWaitEventSet(set);
@@ -640,10 +639,13 @@ FreeWaitEventSet(WaitEventSet *set)
640639
* Add an event to the set. Possible events are:
641640
* - WL_LATCH_SET: Wait for the latch to be set
642641
* - WL_POSTMASTER_DEATH: Wait for postmaster to die
643-
* - WL_SOCKET_READABLE: Wait for socket to become readable
644-
* can be combined in one event with WL_SOCKET_WRITEABLE
645-
* - WL_SOCKET_WRITEABLE: Wait for socket to become writeable
646-
* can be combined with WL_SOCKET_READABLE
642+
* - WL_SOCKET_READABLE: Wait for socket to become readable,
643+
* can be combined in one event with other WL_SOCKET_* events
644+
* - WL_SOCKET_WRITEABLE: Wait for socket to become writeable,
645+
* can be combined with other WL_SOCKET_* events
646+
* - WL_SOCKET_CONNECTED: Wait for socket connection to be established,
647+
* can be combined with other WL_SOCKET_* events (on non-Windows
648+
* platforms, this is the same as WL_SOCKET_WRITEABLE)
647649
*
648650
* Returns the offset in WaitEventSet->events (starting from 0), which can be
649651
* used to modify previously added wait events using ModifyWaitEvent().
@@ -652,9 +654,9 @@ FreeWaitEventSet(WaitEventSet *set)
652654
* i.e. it must be a process-local latch initialized with InitLatch, or a
653655
* shared latch associated with the current process by calling OwnLatch.
654656
*
655-
* In the WL_SOCKET_READABLE/WRITEABLE case, EOF and error conditions are
656-
* reported by returning the socket as readable/writable or both, depending on
657-
* WL_SOCKET_READABLE/WRITEABLE being specified.
657+
* In the WL_SOCKET_READABLE/WRITEABLE/CONNECTED cases, EOF and error
658+
* conditions cause the socket to be reported as readable/writable/connected,
659+
* so that the caller can deal with the condition.
658660
*
659661
* The user_data pointer specified here will be set for the events returned
660662
* by WaitEventSetWait(), allowing to easily associate additional data with
@@ -685,8 +687,7 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
685687
}
686688

687689
/* waiting for socket readiness without a socket indicates a bug */
688-
if (fd == PGINVALID_SOCKET &&
689-
(events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)))
690+
if (fd == PGINVALID_SOCKET && (events & WL_SOCKET_MASK))
690691
elog(ERROR, "cannot wait on socket event without a socket");
691692

692693
event = &set->events[set->nevents];
@@ -885,6 +886,8 @@ WaitEventAdjustWin32(WaitEventSet *set, WaitEvent *event)
885886
flags |= FD_READ;
886887
if (event->events & WL_SOCKET_WRITEABLE)
887888
flags |= FD_WRITE;
889+
if (event->events & WL_SOCKET_CONNECTED)
890+
flags |= FD_CONNECT;
888891

889892
if (*handle == WSA_INVALID_EVENT)
890893
{
@@ -1395,7 +1398,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
13951398
returned_events++;
13961399
}
13971400
}
1398-
else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
1401+
else if (cur_event->events & WL_SOCKET_MASK)
13991402
{
14001403
WSANETWORKEVENTS resEvents;
14011404
HANDLE handle = set->handles[cur_event->pos + 1];
@@ -1432,13 +1435,16 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
14321435
/* writeable */
14331436
occurred_events->events |= WL_SOCKET_WRITEABLE;
14341437
}
1438+
if ((cur_event->events & WL_SOCKET_CONNECTED) &&
1439+
(resEvents.lNetworkEvents & FD_CONNECT))
1440+
{
1441+
/* connected */
1442+
occurred_events->events |= WL_SOCKET_CONNECTED;
1443+
}
14351444
if (resEvents.lNetworkEvents & FD_CLOSE)
14361445
{
1437-
/* EOF */
1438-
if (cur_event->events & WL_SOCKET_READABLE)
1439-
occurred_events->events |= WL_SOCKET_READABLE;
1440-
if (cur_event->events & WL_SOCKET_WRITEABLE)
1441-
occurred_events->events |= WL_SOCKET_WRITEABLE;
1446+
/* EOF/error, so signal all caller-requested socket flags */
1447+
occurred_events->events |= (cur_event->events & WL_SOCKET_MASK);
14421448
}
14431449

14441450
if (occurred_events->events != 0)

src/include/storage/latch.h

+10
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,16 @@ typedef struct Latch
126126
#define WL_SOCKET_WRITEABLE (1 << 2)
127127
#define WL_TIMEOUT (1 << 3) /* not for WaitEventSetWait() */
128128
#define WL_POSTMASTER_DEATH (1 << 4)
129+
#ifdef WIN32
130+
#define WL_SOCKET_CONNECTED (1 << 5)
131+
#else
132+
/* avoid having to to deal with case on platforms not requiring it */
133+
#define WL_SOCKET_CONNECTED WL_SOCKET_WRITEABLE
134+
#endif
135+
136+
#define WL_SOCKET_MASK (WL_SOCKET_READABLE | \
137+
WL_SOCKET_WRITEABLE | \
138+
WL_SOCKET_CONNECTED)
129139

130140
typedef struct WaitEvent
131141
{

0 commit comments

Comments
 (0)