Enhance connecting process to backend. master
authorTatsuo Ishii <ishii@postgresql.org>
Fri, 13 Jun 2025 01:08:12 +0000 (10:08 +0900)
committerTatsuo Ishii <ishii@postgresql.org>
Fri, 13 Jun 2025 01:08:12 +0000 (10:08 +0900)
In certain environment (especially k8s), DNS look up is unstable and
connecting to backend process fails.  This occurs in call to
getaddrinfo() in connect_inet_domain_socket_by_port(). To enhance the
situation, retry up to 5 times (at each retry, sleep 1 second) if
getaddrinfo() fails with EAI_AGAIN. Note that if
connect_inet_domain_socket_by_port() is called with "retry" argument
is false, the retry will not happen. Health check calls
connect_inet_domain_socket_by_port() with the retry flag to false so
that retrying is controlled health check's own parameters.

Since up to now there's no similar issue reported, back patch to only
4.6 to make backpatching minimal.

Discussion: https://github.com/pgpool/pgpool2/issues/104
Backpatch-through: v4.6

src/protocol/pool_connection_pool.c

index d7e1857a42350065b98ff567a349003ec7102c3d..225294a1b11fc8bdcf1798888455ba675a5b94fa 100644 (file)
@@ -5,7 +5,7 @@
  * pgpool: a language independent connection pool server for PostgreSQL
  * written by Tatsuo Ishii
  *
- * Copyright (c) 2003-2024     PgPool Global Development Group
+ * Copyright (c) 2003-2025     PgPool Global Development Group
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
@@ -780,6 +780,7 @@ connect_inet_domain_socket_by_port(char *host, int port, bool retry)
        struct addrinfo *res;
        struct addrinfo *walk;
        struct addrinfo hints;
+       int     retry_cnt = 5;  /* getaddrinfo() retry count in case EAI_AGAIN */
 
        /*
         * getaddrinfo() requires a string because it also accepts service names,
@@ -798,13 +799,34 @@ connect_inet_domain_socket_by_port(char *host, int port, bool retry)
        hints.ai_family = PF_UNSPEC;
        hints.ai_socktype = SOCK_STREAM;
 
-       if ((ret = getaddrinfo(host, portstr, &hints, &res)) != 0)
+       for (;;)
        {
-               ereport(WARNING,
-                               (errmsg("failed to connect to PostgreSQL server, getaddrinfo() failed with error \"%s\"", gai_strerror(ret))));
+               if ((ret = getaddrinfo(host, portstr, &hints, &res)) != 0)
+               {
+                       if (!retry || ret != EAI_AGAIN)
+                       {
+                               ereport(WARNING,
+                                               (errmsg("failed to connect to PostgreSQL server, getaddrinfo() failed with error \"%s\"",
+                                                               gai_strerror(ret))));
+                               free(portstr);
+                               return -1;
+                       }
 
-               free(portstr);
-               return -1;
+                       retry_cnt--;
+
+                       if (retry_cnt <= 0)
+                       {
+                               ereport(WARNING,
+                                               (errmsg("failed to connect to PostgreSQL server, getaddrinfo() failed due to retry count over")));
+                               free(portstr);
+                               return -1;
+                       }
+                       ereport(LOG,
+                                       (errmsg("failed to connect to PostgreSQL server, getaddrinfo() failed. retrying...")));
+                       sleep(1);
+               }
+               else
+                       break;
        }
 
        free(portstr);