diff options
author | Tatsuo Ishii | 2024-06-11 11:15:08 +0000 |
---|---|---|
committer | Tatsuo Ishii | 2024-06-11 11:15:08 +0000 |
commit | 8c307f4bfafc1c92806bfebf75cd628d8952e7a3 (patch) | |
tree | 6abe3080c9696e991a1389c9d3538fce3f377c60 /src | |
parent | 570ddf65721ecf434d5db59ebb18aadb8f0a646d (diff) |
Fix segfault in a child process.
It is reported that pgpool child segfaulted [1].
[snip]
In the down thread it is reported that despite VALID_BACKEND(i)
returns true, backend->slots[i] is NULL, which should have been filled
by new_connection().
It seems there's a race condition. In new_connection(), there's a code
fragment:
/*
* Make sure that the global backend status in the shared memory
* agrees the local status checked by VALID_BACKEND. It is possible
* that the local status is up, while the global status has been
* changed to down by failover.
*/
A--> if (BACKEND_INFO(i).backend_status != CON_UP &&
BACKEND_INFO(i).backend_status != CON_CONNECT_WAIT)
{
ereport(DEBUG1,
(errmsg("creating new connection to backend"),
errdetail("skipping backend slot %d because global backend_status = %d",
i, BACKEND_INFO(i).backend_status)));
/* sync local status with global status */
B--> *(my_backend_status[i]) = BACKEND_INFO(i).backend_status;
continue;
}
It is possible that at A backend_status in the shared memory is down
but by the time it reaches B the status has been changed to up. And
new_connection() skipped to create a backend connection. This seems to
explain why the connection slot is NULL while VALID_BACKEND returns
true. To prevent the race condtion, backend_status in shared memory is
copied to a local variable and evaluate it. Also the VALID_BACKEND
just before:
pool_set_db_node_id(CONNECTION(backend, i), i);
is changed to:
if (VALID_BACKEND(i) && CONNECTION_SLOT(backend, i))
so that it prevents crash just in case.
[1] [pgpool-general: 9104] Another segmentation fault
Diffstat (limited to 'src')
-rw-r--r-- | src/protocol/child.c | 3 | ||||
-rw-r--r-- | src/protocol/pool_connection_pool.c | 7 |
2 files changed, 5 insertions, 5 deletions
diff --git a/src/protocol/child.c b/src/protocol/child.c index c3882b13d..22bfbcf20 100644 --- a/src/protocol/child.c +++ b/src/protocol/child.c @@ -1069,9 +1069,8 @@ static POOL_CONNECTION_POOL * connect_backend(StartupPacket *sp, POOL_CONNECTION for (i = 0; i < NUM_BACKENDS; i++) { - if (VALID_BACKEND(i)) + if (VALID_BACKEND(i) && CONNECTION_SLOT(backend, i)) { - /* set DB node id */ pool_set_db_node_id(CONNECTION(backend, i), i); diff --git a/src/protocol/pool_connection_pool.c b/src/protocol/pool_connection_pool.c index 1fd683721..1506f413a 100644 --- a/src/protocol/pool_connection_pool.c +++ b/src/protocol/pool_connection_pool.c @@ -876,6 +876,7 @@ static POOL_CONNECTION_POOL * new_connection(POOL_CONNECTION_POOL * p) int active_backend_count = 0; int i; bool status_changed = false; + volatile BACKEND_STATUS status; MemoryContext oldContext = MemoryContextSwitchTo(TopMemoryContext); @@ -900,8 +901,8 @@ static POOL_CONNECTION_POOL * new_connection(POOL_CONNECTION_POOL * p) * that the local status is up, while the global status has been * changed to down by failover. */ - if (BACKEND_INFO(i).backend_status != CON_UP && - BACKEND_INFO(i).backend_status != CON_CONNECT_WAIT) + status = BACKEND_INFO(i).backend_status; + if (status != CON_UP && status != CON_CONNECT_WAIT) { ereport(DEBUG1, (errmsg("creating new connection to backend"), @@ -909,7 +910,7 @@ static POOL_CONNECTION_POOL * new_connection(POOL_CONNECTION_POOL * p) i, BACKEND_INFO(i).backend_status))); /* sync local status with global status */ - *(my_backend_status[i]) = BACKEND_INFO(i).backend_status; + *(my_backend_status[i]) = status; continue; } |