Enhance the stability of detach_false_primary.
authorTatsuo Ishii <ishii@sraoss.co.jp>
Sun, 17 Mar 2024 01:11:04 +0000 (10:11 +0900)
committerTatsuo Ishii <ishii@sraoss.co.jp>
Sun, 17 Mar 2024 01:11:04 +0000 (10:11 +0900)
It was possible that enabling detach_false_primary caused that all
backend nodes went down.

Suppose watchdog is enabled and there are 3 watchdog nodes pgpool0,
pgpool1 and pgpool2. If pgpool0 and pgpool1 find primary PostgreSQL
goes down due to network trouble between pgpool and PostgreSQL, they
promote a standby node. pgpool2 could find that there are two primary
nodes because the backend status at pgpool2 has not been synced with
pgpool0 and pgpool1, and pgpool2 perform detach_false_primary against
the standby, which is being promoted.

To prevent the issue, now detach_false_primary is performed only by
watchdog leader node. With this, pgpool will not see half baked
backend status and the issue described above will not happen.

Discussion: https://www.pgpool.net/pipermail/pgpool-hackers/2024-February/004432.html
([pgpool-hackers: 4431] detach_false_primary could make all nodes go down)

doc.ja/src/sgml/failover.sgml
doc/src/sgml/failover.sgml
src/streaming_replication/pool_worker_child.c
src/test/regression/tests/081.detach_primary_all_down/test.sh [new file with mode: 0755]

index 4e25902a8f2177d3ff4dc646e3e9a355b64b8404..ea87a45f5ccd6aabb02efce5114b3b47182be75b 100644 (file)
@@ -934,6 +934,12 @@ if (we need to executed follow primary command)
       </para>
      </note>
 
+     <note>
+      <para>
+       watchdogが有効な場合、不正なプライマリの切り離しは、リーダーwatchdogでのみ実施されます。
+       たとえ<xref linkend="guc-failover-require-consensus">がオンでも、この機能によるプライマリの切り離しは、リーダーwatchdogの判断だけで行われ、多数派の承認は必要とされません。
+      </para>
+     </note>
      <para>
       <!--
       This parameter is only applicable in the streaming replication mode.
index c925881251290e3e5f4c8542087263214e327f93..2160e655b2527cd61f827297f0b8761cd0cd8485 100644 (file)
@@ -690,6 +690,17 @@ if (we need to executed follow primary command)
       </para>
      </note>
 
+     <note>
+      <para>
+       if watchdog is enabled, detaching false primary is only done by
+       leader watchdog node. Even
+       if <xref linkend="guc-failover-require-consensus"> is on,
+       detaching the false primary being performed is solely by the
+       judgment of the leader watchdog. No majority consensus is
+       required.
+      </para>
+     </note>
+
      <para>
       This parameter is only applicable in the streaming replication mode.
      </para>
index 7b69dd7b2cbb653b0901cb21d48068dba8981931..3b1d9cff69b9193cc06c2c28c8cf0129204c8125 100644 (file)
@@ -3,7 +3,7 @@
  * pgpool: a language independent connection pool server for PostgreSQL
  * written by Tatsuo Ishii
  *
- * Copyright (c) 2003-2023     PgPool Global Development Group
+ * Copyright (c) 2003-2024     PgPool Global Development Group
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
@@ -162,7 +162,8 @@ do_worker_child(void)
        {
                MemoryContextSwitchTo(WorkerMemoryContext);
                MemoryContextResetAndDeleteChildren(WorkerMemoryContext);
-               WD_STATES       wd_status;
+               bool    watchdog_leader;        /* true if I am the watchdog leader */
+
 
                CHECK_REQUEST;
 
@@ -176,9 +177,45 @@ do_worker_child(void)
                 */
                if (pool_config->use_watchdog)
                {
+                       WD_STATES       wd_status;
+                       WDPGBackendStatus       *backendStatus;
+
                        wd_status = wd_internal_get_watchdog_local_node_state();
                        ereport(DEBUG1,
                                        (errmsg("watchdog status: %d", wd_status)));
+                       /*
+                        * Ask the watchdog to get all the backend states from the
+                        * Leader/Coordinator Pgpool-II node.
+                        */
+                       watchdog_leader = false;
+                       backendStatus = get_pg_backend_status_from_leader_wd_node();
+
+                       if (!backendStatus)
+                               /*
+                                * Couldn't get leader status.
+                                */
+                               watchdog_leader = false;
+                       else
+                       {
+                               int     quorum = wd_internal_get_watchdog_quorum_state();
+                               int     node_count = backendStatus->node_count;
+
+                               ereport(DEBUG1,
+                                               (errmsg("quorum: %d node_count: %d",
+                                                               quorum, node_count)));
+                               if (quorum >= 0 && backendStatus->node_count <= 0)
+                               {
+                                       /*
+                                        * Quorum exists and node_count <= 0.
+                                        * Definitely I am the leader.
+                                        */
+                                       watchdog_leader = true;
+                               }
+                               else
+                                       watchdog_leader = false;
+
+                               pfree(backendStatus);
+                       }
                }
 
                /*
@@ -227,8 +264,33 @@ do_worker_child(void)
                                                         */
                                                        if (pool_config->detach_false_primary)
                                                        {
-                                                               n = i;
-                                                               degenerate_backend_set(&n, 1, REQ_DETAIL_SWITCHOVER);
+                                                               /*
+                                                                * However if watchdog is enabled and I am not
+                                                                * the leader, do not detach the invalid node
+                                                                * because the information to determine the
+                                                                * false primary might be outdated or
+                                                                * temporarily inconsistent.  See
+                                                                * [pgpool-hackers: 4431] for more details.
+                                                                */
+                                                               if (!pool_config->use_watchdog ||
+                                                                       (pool_config->use_watchdog && watchdog_leader))
+                                                               {
+                                                                       n = i;
+                                                                       /*
+                                                                        * In the case watchdog enabled, we need
+                                                                        * to add REQ_DETAIL_CONFIRMED, which
+                                                                        * means no quorum consensus is
+                                                                        * required. If we do not add this, the
+                                                                        * target node will remain quarantine
+                                                                        * state since other node does not request
+                                                                        * failover.
+                                                                        */
+                                                                       degenerate_backend_set(&n, 1,
+                                                                                                                  REQ_DETAIL_SWITCHOVER|REQ_DETAIL_CONFIRMED);
+                                                               }
+                                                               else if (pool_config->use_watchdog)
+                                                                       ereport(LOG,
+                                                                                       (errmsg("do not detach invalid node %d because I am not the leader or quorum does not exist", i)));
                                                        }
                                                }
                                        }
diff --git a/src/test/regression/tests/081.detach_primary_all_down/test.sh b/src/test/regression/tests/081.detach_primary_all_down/test.sh
new file mode 100755 (executable)
index 0000000..e97dcf9
--- /dev/null
@@ -0,0 +1,119 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------------
+# test script for that detach_false_primary could bring down all backends.
+# See [pgpool-hackers: 4431] for more details.
+#
+# It is possible that all DB nodes go down if detach_false_primary is enabled.
+# Story:
+# There are 3 watchdog nodes pgpool0, pgpool1 and pgpool2.
+# There are 2 DB nodes node0 and node1 (initially node 0 is primary).
+# follow_primary_command is disabled.
+# 1) Node 0 goes down at pgpool0 due to a network trouble. BUT actually
+# node 0 is alive.
+# 2) Node 0 goes down at pgpool1 due to a network trouble. BUT actually
+# node 0 is alive.
+# 3) Failover is triggered. Since pgpool0 and pgpool1 agree, node 0 is set to down.
+# node 1 is promoted.
+# 4) Before new status is synched with pgpool2, pgpool2's sr_check
+# finds that there are two primary nodes due to
+# #3. detach_false_primary is triggered and node 1 goes down.
+# 5) Now all backends are in down status.
+
+# wait for watchdog starting up by looking for "lifecheck started" in
+# the pgpool.log.  argument: $log: absolute path to the pgpool.log.
+function wait_for_watchdog_startup
+{
+    while :
+    do
+       grep "lifecheck started" $log >/dev/null
+       if [ $? = 0 ];then
+           break;
+       fi
+       sleep 1
+    done
+}
+
+source $TESTLIBS
+TESTDIR=testdir
+PSQL=$PGBIN/psql
+PG_CTL=$PGBIN/pg_ctl
+export PGDATABASE=test
+
+rm -fr $TESTDIR
+mkdir $TESTDIR
+cd $TESTDIR
+
+version=`$PSQL --version|awk '{print $3}'`
+result=`echo "$version >= 9.6"|bc`
+if [ $result = 0 ];then
+    echo "PostgreSQL version $version is 9.5 or before. Skipping test."
+    exit 0
+fi
+
+# create 3 node pgpool with 2 backends.
+$WATCHDOG_SETUP -wn 3 -n 2
+
+# enable detach_false_primary and health_check_test. We need to
+# disable follow_primary_command, othewise node 0 goes down by
+# follow_primary_command and the test may not reveals the problem.
+# Also we set sr_check_period to very short so that
+# detach_false_primary is triggered before the new status is synched
+# by watchdog leader.
+for i in 0 1 2
+do
+    echo "detach_false_primary = on" >> pgpool$i/etc/pgpool.conf
+    echo "health_check_test = on" >> pgpool$i/etc/pgpool.conf
+    echo "follow_primary_command = ''" >> pgpool$i/etc/pgpool.conf
+    echo "sr_check_period = 1"  >> pgpool$i/etc/pgpool.conf
+done
+
+cd pgpool0
+source ./bashrc.ports
+cd ..
+
+./startall
+
+echo -n "waiting for watchdog node 0 starting up... "
+log=pgpool0/log/pgpool.log
+wait_for_watchdog_startup $log
+echo "done."
+
+$PGPOOL_INSTALL_DIR/bin/pcp_watchdog_info -v -w -h localhost -p $PCP_PORT
+$PGPOOL_INSTALL_DIR/bin/pcp_node_info -h localhost -p $PCP_PORT
+
+# Let node 0 down at pgpool0
+echo "0        down" > pgpool0/log/backend_down_request
+# Let node 0 down at pgpool1
+echo "0        down" > pgpool1/log/backend_down_request
+
+# Wait up to 30 seconds before the problem (all nodes go down).
+# Observe that pgpool1 and pgpool2 print:
+# LOG:  pgpool_worker_child: invalid node found 1
+# which means sr_check ran detach_false_primary but did not trigger failover:
+# LOG:  do not detach invalid node 1 because I am not the leader or quorum does not exist
+for t in {1..30}
+do
+    for i in 0 1 2
+    do
+       date
+       echo "node info after failover at pgppol$i"
+       cd pgpool$i
+       source ./bashrc.ports
+       cd ..
+       $PGPOOL_INSTALL_DIR/bin/pcp_node_info -h localhost -p $PCP_PORT
+    done
+    # check whether all node down.
+    n0=`$PGPOOL_INSTALL_DIR/bin/pcp_node_info -h localhost -p $PCP_PORT 0|awk '{print $5}'`
+    n1=`$PGPOOL_INSTALL_DIR/bin/pcp_node_info -h localhost -p $PCP_PORT 1|awk '{print $5}'`
+    if [ $n0 = "down" -a $n1 = "down" ];then
+       echo "all nodes go down."
+       ./shutdownall
+       exit 1
+    fi
+    sleep 1
+done
+echo "test succeeded."
+
+./shutdownall
+
+exit 0