Enhance the stability of detach_false_primary.

author Tatsuo Ishii <ishii@sraoss.co.jp>

Sun, 17 Mar 2024 01:11:04 +0000 (10:11 +0900)

committer Tatsuo Ishii <ishii@sraoss.co.jp>

Sun, 17 Mar 2024 01:11:04 +0000 (10:11 +0900)
author Tatsuo Ishii <ishii@sraoss.co.jp>
Sun, 17 Mar 2024 01:11:04 +0000 (10:11 +0900)
committer Tatsuo Ishii <ishii@sraoss.co.jp>
Sun, 17 Mar 2024 01:11:04 +0000 (10:11 +0900)
diff --git a/doc.ja/src/sgml/failover.sgml b/doc.ja/src/sgml/failover.sgml

index 4e25902a8f2177d3ff4dc646e3e9a355b64b8404..ea87a45f5ccd6aabb02efce5114b3b47182be75b 100644 (file)
--- a/doc.ja/src/sgml/failover.sgml
+++ b/doc.ja/src/sgml/failover.sgml
@@ -934,6 +934,12 @@ if (we need to executed follow primary command)
        </para>
       </note>
  
+     <note>
+      <para>
+       watchdogが有効な場合、不正なプライマリの切り離しは、リーダーwatchdogでのみ実施されます。
+       たとえ<xref linkend="guc-failover-require-consensus">がオンでも、この機能によるプライマリの切り離しは、リーダーwatchdogの判断だけで行われ、多数派の承認は必要とされません。
+      </para>
+     </note>
       <para>
        <!--
        This parameter is only applicable in the streaming replication mode.
diff --git a/doc/src/sgml/failover.sgml b/doc/src/sgml/failover.sgml

index c925881251290e3e5f4c8542087263214e327f93..2160e655b2527cd61f827297f0b8761cd0cd8485 100644 (file)
--- a/doc/src/sgml/failover.sgml
+++ b/doc/src/sgml/failover.sgml
@@ -690,6 +690,17 @@ if (we need to executed follow primary command)
        </para>
       </note>
  
+     <note>
+      <para>
+       if watchdog is enabled, detaching false primary is only done by
+       leader watchdog node. Even
+       if <xref linkend="guc-failover-require-consensus"> is on,
+       detaching the false primary being performed is solely by the
+       judgment of the leader watchdog. No majority consensus is
+       required.
+      </para>
+     </note>
+
       <para>
        This parameter is only applicable in the streaming replication mode.
       </para>
diff --git a/src/streaming_replication/pool_worker_child.c b/src/streaming_replication/pool_worker_child.c

index 7b69dd7b2cbb653b0901cb21d48068dba8981931..3b1d9cff69b9193cc06c2c28c8cf0129204c8125 100644 (file)
--- a/src/streaming_replication/pool_worker_child.c
+++ b/src/streaming_replication/pool_worker_child.c
@@ -3,7 +3,7 @@
   * pgpool: a language independent connection pool server for PostgreSQL
   * written by Tatsuo Ishii
   *
- * Copyright (c) 2003-2023     PgPool Global Development Group
+ * Copyright (c) 2003-2024     PgPool Global Development Group
   *
   * Permission to use, copy, modify, and distribute this software and
   * its documentation for any purpose and without fee is hereby
@@ -162,7 +162,8 @@ do_worker_child(void)
         {
                 MemoryContextSwitchTo(WorkerMemoryContext);
                 MemoryContextResetAndDeleteChildren(WorkerMemoryContext);
-               WD_STATES       wd_status;
+               bool    watchdog_leader;        /* true if I am the watchdog leader */
+
  
                 CHECK_REQUEST;
  
@@ -176,9 +177,45 @@ do_worker_child(void)
                  */
                 if (pool_config->use_watchdog)
                 {
+                       WD_STATES       wd_status;
+                       WDPGBackendStatus       *backendStatus;
+
                         wd_status = wd_internal_get_watchdog_local_node_state();
                         ereport(DEBUG1,
                                         (errmsg("watchdog status: %d", wd_status)));
+                       /*
+                        * Ask the watchdog to get all the backend states from the
+                        * Leader/Coordinator Pgpool-II node.
+                        */
+                       watchdog_leader = false;
+                       backendStatus = get_pg_backend_status_from_leader_wd_node();
+
+                       if (!backendStatus)
+                               /*
+                                * Couldn't get leader status.
+                                */
+                               watchdog_leader = false;
+                       else
+                       {
+                               int     quorum = wd_internal_get_watchdog_quorum_state();
+                               int     node_count = backendStatus->node_count;
+
+                               ereport(DEBUG1,
+                                               (errmsg("quorum: %d node_count: %d",
+                                                               quorum, node_count)));
+                               if (quorum >= 0 && backendStatus->node_count <= 0)
+                               {
+                                       /*
+                                        * Quorum exists and node_count <= 0.
+                                        * Definitely I am the leader.
+                                        */
+                                       watchdog_leader = true;
+                               }
+                               else
+                                       watchdog_leader = false;
+
+                               pfree(backendStatus);
+                       }
                 }
  
                 /*
@@ -227,8 +264,33 @@ do_worker_child(void)
                                                          */
                                                         if (pool_config->detach_false_primary)
                                                         {
-                                                               n = i;
-                                                               degenerate_backend_set(&n, 1, REQ_DETAIL_SWITCHOVER);
+                                                               /*
+                                                                * However if watchdog is enabled and I am not
+                                                                * the leader, do not detach the invalid node
+                                                                * because the information to determine the
+                                                                * false primary might be outdated or
+                                                                * temporarily inconsistent.  See
+                                                                * [pgpool-hackers: 4431] for more details.
+                                                                */
+                                                               if (!pool_config->use_watchdog ||
+                                                                       (pool_config->use_watchdog && watchdog_leader))
+                                                               {
+                                                                       n = i;
+                                                                       /*
+                                                                        * In the case watchdog enabled, we need
+                                                                        * to add REQ_DETAIL_CONFIRMED, which
+                                                                        * means no quorum consensus is
+                                                                        * required. If we do not add this, the
+                                                                        * target node will remain quarantine
+                                                                        * state since other node does not request
+                                                                        * failover.
+                                                                        */
+                                                                       degenerate_backend_set(&n, 1,
+                                                                                                                  REQ_DETAIL_SWITCHOVER|REQ_DETAIL_CONFIRMED);
+                                                               }
+                                                               else if (pool_config->use_watchdog)
+                                                                       ereport(LOG,
+                                                                                       (errmsg("do not detach invalid node %d because I am not the leader or quorum does not exist", i)));
                                                         }
                                                 }
                                         }
diff --git a/src/test/regression/tests/081.detach_primary_all_down/test.sh b/src/test/regression/tests/081.detach_primary_all_down/test.sh

new file mode 100755 (executable)

index 0000000..e97dcf9
--- /dev/null
+++ b/src/test/regression/tests/081.detach_primary_all_down/test.sh
@@ -0,0 +1,119 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------------
+# test script for that detach_false_primary could bring down all backends.
+# See [pgpool-hackers: 4431] for more details.
+#
+# It is possible that all DB nodes go down if detach_false_primary is enabled.
+# Story:
+# There are 3 watchdog nodes pgpool0, pgpool1 and pgpool2.
+# There are 2 DB nodes node0 and node1 (initially node 0 is primary).
+# follow_primary_command is disabled.
+# 1) Node 0 goes down at pgpool0 due to a network trouble. BUT actually
+# node 0 is alive.
+# 2) Node 0 goes down at pgpool1 due to a network trouble. BUT actually
+# node 0 is alive.
+# 3) Failover is triggered. Since pgpool0 and pgpool1 agree, node 0 is set to down.
+# node 1 is promoted.
+# 4) Before new status is synched with pgpool2, pgpool2's sr_check
+# finds that there are two primary nodes due to
+# #3. detach_false_primary is triggered and node 1 goes down.
+# 5) Now all backends are in down status.
+
+# wait for watchdog starting up by looking for "lifecheck started" in
+# the pgpool.log.  argument: $log: absolute path to the pgpool.log.
+function wait_for_watchdog_startup
+{
+    while :
+    do
+       grep "lifecheck started" $log >/dev/null
+       if [ $? = 0 ];then
+           break;
+       fi
+       sleep 1
+    done
+}
+
+source $TESTLIBS
+TESTDIR=testdir
+PSQL=$PGBIN/psql
+PG_CTL=$PGBIN/pg_ctl
+export PGDATABASE=test
+
+rm -fr $TESTDIR
+mkdir $TESTDIR
+cd $TESTDIR
+
+version=`$PSQL --version|awk '{print $3}'`
+result=`echo "$version >= 9.6"|bc`
+if [ $result = 0 ];then
+    echo "PostgreSQL version $version is 9.5 or before. Skipping test."
+    exit 0
+fi
+
+# create 3 node pgpool with 2 backends.
+$WATCHDOG_SETUP -wn 3 -n 2
+
+# enable detach_false_primary and health_check_test. We need to
+# disable follow_primary_command, othewise node 0 goes down by
+# follow_primary_command and the test may not reveals the problem.
+# Also we set sr_check_period to very short so that
+# detach_false_primary is triggered before the new status is synched
+# by watchdog leader.
+for i in 0 1 2
+do
+    echo "detach_false_primary = on" >> pgpool$i/etc/pgpool.conf
+    echo "health_check_test = on" >> pgpool$i/etc/pgpool.conf
+    echo "follow_primary_command = ''" >> pgpool$i/etc/pgpool.conf
+    echo "sr_check_period = 1"  >> pgpool$i/etc/pgpool.conf
+done
+
+cd pgpool0
+source ./bashrc.ports
+cd ..
+
+./startall
+
+echo -n "waiting for watchdog node 0 starting up... "
+log=pgpool0/log/pgpool.log
+wait_for_watchdog_startup $log
+echo "done."
+
+$PGPOOL_INSTALL_DIR/bin/pcp_watchdog_info -v -w -h localhost -p $PCP_PORT
+$PGPOOL_INSTALL_DIR/bin/pcp_node_info -h localhost -p $PCP_PORT
+
+# Let node 0 down at pgpool0
+echo "0        down" > pgpool0/log/backend_down_request
+# Let node 0 down at pgpool1
+echo "0        down" > pgpool1/log/backend_down_request
+
+# Wait up to 30 seconds before the problem (all nodes go down).
+# Observe that pgpool1 and pgpool2 print:
+# LOG:  pgpool_worker_child: invalid node found 1
+# which means sr_check ran detach_false_primary but did not trigger failover:
+# LOG:  do not detach invalid node 1 because I am not the leader or quorum does not exist
+for t in {1..30}
+do
+    for i in 0 1 2
+    do
+       date
+       echo "node info after failover at pgppol$i"
+       cd pgpool$i
+       source ./bashrc.ports
+       cd ..
+       $PGPOOL_INSTALL_DIR/bin/pcp_node_info -h localhost -p $PCP_PORT
+    done
+    # check whether all node down.
+    n0=`$PGPOOL_INSTALL_DIR/bin/pcp_node_info -h localhost -p $PCP_PORT 0|awk '{print $5}'`
+    n1=`$PGPOOL_INSTALL_DIR/bin/pcp_node_info -h localhost -p $PCP_PORT 1|awk '{print $5}'`
+    if [ $n0 = "down" -a $n1 = "down" ];then
+       echo "all nodes go down."
+       ./shutdownall
+       exit 1
+    fi
+    sleep 1
+done
+echo "test succeeded."
+
+./shutdownall
+
+exit 0
author	Tatsuo Ishii <ishii@sraoss.co.jp>
	Sun, 17 Mar 2024 01:11:04 +0000 (10:11 +0900)
committer	Tatsuo Ishii <ishii@sraoss.co.jp>
	Sun, 17 Mar 2024 01:11:04 +0000 (10:11 +0900)
doc.ja/src/sgml/failover.sgml		patch \| blob \| blame \| history
doc/src/sgml/failover.sgml		patch \| blob \| blame \| history
src/streaming_replication/pool_worker_child.c		patch \| blob \| blame \| history
src/test/regression/tests/081.detach_primary_all_down/test.sh	[new file with mode: 0755]	patch \| blob