Fix: 823: Watchdog dies and kills pgpool2 when network gets shortly interrupted.
authorMuhammad Usama <m.usama@gmail.com>
Tue, 11 Jun 2024 06:52:00 +0000 (11:52 +0500)
committerMuhammad Usama <m.usama@gmail.com>
Tue, 11 Jun 2024 06:55:36 +0000 (11:55 +0500)
With network monitoring enabled, a Pgpool node would shut down immediately if it
lost all network interfaces or assigned IP addresses, providing extra protection
by quickly removing a non-communicative node from the cluster.

The issue was that Pgpool responded to network blackout events even when network
monitoring was disabled. This fix ensures that the network monitoring socket is
not opened when network monitoring is not enabled, preventing unnecessary shutdowns.

src/watchdog/watchdog.c

index fdf6a65c00dc3a6980a6f689cef2ff8654fce40f..561d9a60fdd3331ab79773906dd62ffb3b2f2789 100644 (file)
@@ -812,6 +812,7 @@ wd_cluster_initialize(void)
        g_cluster.de_escalation_pid = 0;
        g_cluster.unidentified_socks = NULL;
        g_cluster.command_server_sock = 0;
+       g_cluster.network_monitor_sock = 0;
        g_cluster.notify_clients = NULL;
        g_cluster.ipc_command_socks = NULL;
        g_cluster.wd_timer_commands = NULL;
@@ -1195,8 +1196,8 @@ watchdog_main(void)
        /* open the command server */
        g_cluster.command_server_sock = wd_create_command_server_socket();
 
-       /* try connecting to all watchdog nodes */
-       g_cluster.network_monitor_sock = create_monitoring_socket();
+       if (g_cluster.wdInterfaceToMonitor)
+               g_cluster.network_monitor_sock = create_monitoring_socket();
 
        if (any_interface_available() == false)
        {
@@ -1207,6 +1208,7 @@ watchdog_main(void)
                                 errhint("you can disable interface checking by setting wd_monitoring_interfaces_list = '' in pgpool config")));
        }
 
+       /* try connecting to all watchdog nodes */
        connect_with_all_configured_nodes();
 
        /* set the initial state of local node */
@@ -1414,9 +1416,12 @@ prepare_fds(fd_set *rmask, fd_set *wmask, fd_set *emask)
        if (fd_max < g_cluster.command_server_sock)
                fd_max = g_cluster.command_server_sock;
 
-       FD_SET(g_cluster.network_monitor_sock, rmask);
-       if (fd_max < g_cluster.network_monitor_sock)
-               fd_max = g_cluster.network_monitor_sock;
+       if (g_cluster.network_monitor_sock > 0)
+       {
+               FD_SET(g_cluster.network_monitor_sock, rmask);
+               if (fd_max < g_cluster.network_monitor_sock)
+                       fd_max = g_cluster.network_monitor_sock;
+       }
 
        /*
         * set write fdset for all waiting for connection sockets, while already