Fix delay_threshold_by_time and prefer_lower_standby_delay.
authorTatsuo Ishii <ishii@sraoss.co.jp>
Mon, 5 Jun 2023 11:18:36 +0000 (20:18 +0900)
committerTatsuo Ishii <ishii@sraoss.co.jp>
Mon, 5 Jun 2023 12:37:57 +0000 (21:37 +0900)
They were broken since delay_threshold_by_time was introduced in 4.4.

- delay_threshold_by_time was not checked in where_to_send. This broke
  load balancing when replication is delayed.

- select_load_balancing_node was broken if both
  delay_threshold_by_time and prefer_lower_standby_delay were enabled.

In order to fix the issue, where_to_send and
select_load_balancing_node are fixed.  Also add test case for
delay_threshold_by_time are added to 033.prefer_lower_standby_delay.

Discussion: https://www.pgpool.net/pipermail/pgpool-general/2023-June/008864.html

src/context/pool_query_context.c
src/protocol/pool_pg_utils.c
src/test/regression/tests/033.prefer_lower_standby_delay/test.sh

index c51ff7303a114eabf730d5bf936a619b7c7f3663..8afe9b39e8ffdd3996ff24ab795e00f902510d38 100644 (file)
@@ -2095,8 +2095,11 @@ where_to_send_main_replica(POOL_QUERY_CONTEXT * query_context, char *query, Node
                                         * false then send to the primary.
                                         */
                                        if (STREAM &&
-                                               pool_config->delay_threshold &&
-                                               bkinfo->standby_delay > pool_config->delay_threshold)
+                                               (
+                                                       (pool_config->delay_threshold &&
+                                                        (bkinfo->standby_delay > pool_config->delay_threshold)) ||
+                                                       (pool_config->delay_threshold_by_time &&
+                                                        (bkinfo->standby_delay > pool_config->delay_threshold_by_time*1000*1000))))
                                        {
                                                ereport(DEBUG1,
                                                                (errmsg("could not load balance because of too much replication delay"),
index 8ae2fd3484b4f1bb09de469c194eb98df2544876..2d786dbb143b4d1b8e2b932d95d9231d5bb811d9 100644 (file)
@@ -386,7 +386,7 @@ select_load_balancing_node(void)
 
                                /* Matches */
                                ereport(DEBUG1,
-                                               (errmsg("selecting load balance node db matched"),
+                                               (errmsg("selecting load balance node app name matched"),
                                                 errdetail("app_name: %s index is %d dbnode is %s weight is %f", app_name, index_app,
                                                                   pool_config->app_name_redirect_tokens->token[index_app].right_token,
                                                                   pool_config->app_name_redirect_tokens->token[index_app].weight_token)));
@@ -405,12 +405,10 @@ select_load_balancing_node(void)
                 * and prefer_lower_delay_standby are true, we choose the least delayed
                 * node if suggested_node is standby and delayed over delay_threshold.
                 */
-               if (STREAM &&
-                       pool_config->delay_threshold &&
-                       pool_config->prefer_lower_delay_standby &&
-                       (suggested_node_id != PRIMARY_NODE_ID) &&
-                       (((BACKEND_INFO(suggested_node_id).standby_delay_by_time == false && BACKEND_INFO(suggested_node_id).standby_delay > pool_config->delay_threshold)) ||
-                        ((BACKEND_INFO(suggested_node_id).standby_delay_by_time && BACKEND_INFO(suggested_node_id).standby_delay > pool_config->delay_threshold_by_time * 1000000))))
+               if (STREAM && pool_config->prefer_lower_delay_standby && suggested_node_id != PRIMARY_NODE_ID &&
+                       ((BACKEND_INFO(suggested_node_id).standby_delay_by_time && BACKEND_INFO(suggested_node_id).standby_delay > pool_config->delay_threshold_by_time * 1000000) ||
+                        (BACKEND_INFO(suggested_node_id).standby_delay_by_time == false && BACKEND_INFO(suggested_node_id).standby_delay > pool_config->delay_threshold)))
+
                {
                        ereport(DEBUG1,
                                (errmsg("selecting load balance node"),
@@ -420,7 +418,10 @@ select_load_balancing_node(void)
                         * The new load balancing node is seleted from the
                         * nodes which have the lowest delay.
                         */
-                       lowest_delay = pool_config->delay_threshold;
+                       if (pool_config->delay_threshold_by_time > 0)
+                               lowest_delay = pool_config->delay_threshold_by_time * 1000 * 1000;
+                       else
+                               lowest_delay = pool_config->delay_threshold;
 
                        /* Initialize */
                        total_weight = 0.0;
@@ -562,16 +563,20 @@ select_load_balancing_node(void)
         * prefer_lower_delay_standby is true, we elect the most lower delayed
         * node if suggested_node is standby and delayed over delay_threshold.
         */
-       if (STREAM &&
-               pool_config->delay_threshold &&
-               pool_config->prefer_lower_delay_standby &&
-               (BACKEND_INFO(selected_slot).standby_delay > pool_config->delay_threshold))
+       if (STREAM && pool_config->prefer_lower_delay_standby &&
+               ((pool_config->delay_threshold_by_time &&
+                 BACKEND_INFO(selected_slot).standby_delay > pool_config->delay_threshold_by_time*1000*1000) ||
+                (pool_config->delay_threshold &&
+                 BACKEND_INFO(selected_slot).standby_delay > pool_config->delay_threshold)))
        {
                ereport(DEBUG1,
                                (errmsg("selecting load balance node"),
                                 errdetail("backend id %d is streaming delayed over delay_threshold", selected_slot)));
 
-               lowest_delay = pool_config->delay_threshold;
+               if (pool_config->delay_threshold_by_time > 0)
+                       lowest_delay = pool_config->delay_threshold_by_time * 1000 * 1000;
+               else
+                       lowest_delay = pool_config->delay_threshold;
                total_weight = 0.0;
                for (i = 0; i < NUM_BACKENDS; i++)
                {
index af7c37146d07db3d879071d0fa01e3d628006777..9dc437693d771a0c23097a13e0803b3da7214ea5 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 #-------------------------------------------------------------------
-# test script for load balancing.
+# test script for prefer_lower_delay_standby and standby delay.
 #
 source $TESTLIBS
 TESTDIR=testdir
@@ -27,28 +27,39 @@ else
 fi
 
 
-# node 1 port number
+# node 1,2 port number
 PORT1=11003
+PORT2=11004
 
-# request replication pause and wait for confirmation
+# request replication pause and wait for confirmation.
+# argument is a list of port numbers
+# (currently only PORT1 is used)
 function replay_pause
 {
-    $PSQL -p $PORT1 test -c "$REPLAY_PAUSE"
-    for i in 1 2 3 4
+    for i in $1
     do
-       res=`$PSQL -p $PORT1 -q -t test -c "$REPLAY_STATE"|sed 's/ //'g`
-       if [ "$res" = "paused" ];then
-           break;
-       else
-           echo pause state: $res
+       echo ===$i===
+       $PSQL -p $i test -c "$REPLAY_PAUSE"
+    done
+
+    for p in $1
+    do
+       for i in 1 2 3 4
+       do
+           res=`$PSQL -p $p -q -t test -c "$REPLAY_STATE"|sed 's/ //'g`
+           if [ "$res" = "paused" ];then
+               break;
+           else
+               echo pause state: $res
+           fi
+           sleep 1
+       done
+       if [ "$res" != "paused" ];then
+           echo replay pause failed.
+           ./shutdownall
+           exit 1
        fi
-       sleep 1
     done
-    if [ "$res" != "paused" ];then
-       echo replay pause failed.
-       ./shutdownall
-       exit 1
-    fi
 }
 
 rm -fr $TESTDIR
@@ -59,44 +70,125 @@ cd $TESTDIR
 echo -n "creating test environment..."
 $PGPOOL_SETUP -m s -n 3 || exit 1
 echo "done."
-
 source ./bashrc.ports
-echo "app_name_redirect_preference_list = 'psql:1'" >> etc/pgpool.conf
-echo "delay_threshold = 10" >> etc/pgpool.conf
-echo "prefer_lower_delay_standby = on" >> etc/pgpool.conf
-echo "sr_check_period = 3" >> etc/pgpool.conf
+export PGPORT=$PGPOOL_PORT
 
-./startall
+# The default wal_receiver_status_interval is 10 seconds, which is too
+# slow to know the standby delay.
+echo "wal_receiver_status_interval = 1s" >> data1/postgresql.conf
+echo "wal_receiver_status_interval = 1s" >> data2/postgresql.conf
 
-export PGPORT=$PGPOOL_PORT
+# Sleep time in seconds after pausing wal replay in case of
+# delay_threshold_by_time.  By setting wal_receiver_status_interval to
+# 1 second, we could set this as short as 3 seconds.
+STIME=3
 
+# ----------------------------------------------------------------------------------------
+echo === Test1: delay_threshold with prefer_lower_delay_standby disabled. ===
+# ----------------------------------------------------------------------------------------
+echo "delay_threshold = 10" >> etc/pgpool.conf
+echo "sr_check_period = 1" >> etc/pgpool.conf
+echo "log_standby_delay = 'always'" >> etc/pgpool.conf
+echo "log_min_messages = 'DEBUG1'" >> etc/pgpool.conf
+# force load balance node to be 1.
+echo "backend_weight0 = 0" >> etc/pgpool.conf
+echo "backend_weight2 = 0" >> etc/pgpool.conf
+./startall
 wait_for_pgpool_startup
 
+# Pause replay on node 1. Since prefer_lower_delay_standby is
+# disabled, SELECT query should be sent to primary node.
+replay_pause $PORT1
+
 $PSQL test <<EOF
 CREATE TABLE t1(i INTEGER);
 CREATE TABLE t2(i INTEGER);
 CREATE SEQUENCE myseq;
 EOF
 
-echo start: prefer_lower_delay_standby is on.
-
-# check to see if pgpool selects proper node for load balance
-# at the connection time
+$PSQL test <<EOF
+INSERT INTO t1 SELECT * FROM generate_series(1,100);
+SELECT pg_sleep(4);
+SHOW POOL_NODES;
+SELECT * FROM t1 LIMIT 1;
+EOF
+fgrep "SELECT * FROM t1 LIMIT 1;" log/pgpool.log |grep "DB node id: 0">/dev/null 2>&1
+if [ $? != 0 ];then
+    # expected result not found
+    echo fail: query was not sent to primary node.
+    ./shutdownall
+    exit 1
+fi
+echo ok: testing delay_threshold with prefer_lower_delay_standby disabled succeeded.
+echo resume streaming replication node 1
+$PSQL -p $PORT1 test -c "$REPLAY_RESUME"
+sleep 2
+./shutdownall
 
+# ----------------------------------------------------------------------------------------
+echo === Test2: delay_threshold_by_time with prefer_lower_delay_standby disabled. ===
+# ----------------------------------------------------------------------------------------
+echo Start testing delay_threshold_by_time with prefer_lower_delay_standby disabled
+echo "delay_threshold = 0" >> etc/pgpool.conf
+echo "delay_threshold_by_time = 1" >> etc/pgpool.conf
+./startall
+wait_for_pgpool_startup
 # pause replay on node 1
-replay_pause
+replay_pause $PORT1
 
 $PSQL test <<EOF
-PGPOOL SET log_min_messages TO DEBUG1;
 INSERT INTO t1 SELECT * FROM generate_series(1,100);
-SELECT pg_sleep(4);
+EOF
+sleep $STIME
+$PSQL test <<EOF
+SHOW POOL_NODES;
+SELECT * FROM t1 LIMIT 1;
+EOF
+fgrep "SELECT * FROM t1 LIMIT 1;" log/pgpool.log |grep "DB node id: 0">/dev/null 2>&1
+if [ $? != 0 ];then
+    # expected result not found
+    echo fail: query was not sent to primary node.
+    ./shutdownall
+    exit 1
+fi
+echo ok: testing delay_threshold_by_time with prefer_lower_delay_standby disabled succeeded.
+./shutdownall
+# unforce load balance node to be 1.
+echo "backend_weight0 = 1" >> etc/pgpool.conf
+echo "backend_weight2 = 1" >> etc/pgpool.conf
+
+# ----------------------------------------------------------------------------------------
+echo === Test3: check to see if pgpool selects proper node for load balance ===
+echo at the connection time with prefer_lower_delay_standby enabled.
+# ----------------------------------------------------------------------------------------
+
+# Redirect connection from app "psql" to node 1. This will make writing test easier.
+echo "app_name_redirect_preference_list = 'psql:1'" >> etc/pgpool.conf
+echo "prefer_lower_delay_standby = on" >> etc/pgpool.conf
+./startall
+wait_for_pgpool_startup
+
+echo start: prefer_lower_delay_standby is on.
+$PSQL test <<EOF
+SHOW POOL_NODES;
+EOF
+
+# pause replay on node 1
+replay_pause $PORT1
+
+$PSQL test <<EOF
+INSERT INTO t1 SELECT * FROM generate_series(1,1000);
+EOF
+sleep $STIME
+$PSQL test <<EOF
+SHOW POOL_NODES;
 SELECT * FROM t1 LIMIT 1;
 EOF
 
 fgrep "SELECT * FROM t1 LIMIT 1;" log/pgpool.log |grep "DB node id: 2">/dev/null 2>&1
 if [ $? != 0 ];then
     # expected result not found
-    echo fail: query is sent to primary node.
+    echo fail: query was not sent to node 2.
     ./shutdownall
     exit 1
 fi
@@ -105,20 +197,24 @@ echo ok: query is sent to another standby node.
 
 echo resume streaming replication node 1
 $PSQL -p $PORT1 test -c "$REPLAY_RESUME"
-sleep 4
+sleep 2
 
-# check to see if pgpool selects proper node for load balance
-# while in a session. For the test we use SELECT using write
-# function. It should be sent to primary node.
+# ----------------------------------------------------------------------------------------
+echo === Test4: check to see if pgpool selects proper node for load balance ===
+echo while in a session. For the test we use SELECT using write
+echo function. It should be sent to primary node.
 # see bug #798.
 # https://www.pgpool.net/mantisbt/view.php?id=798
+# ----------------------------------------------------------------------------------------
 
 $PSQL test <<EOF
-PGPOOL SET log_min_messages TO DEBUG1;
 \! $PSQL -p $PORT1 test -c "$REPLAY_PAUSE"
-SELECT pg_sleep(4);
+SELECT pg_sleep(2);
 INSERT INTO t1 SELECT * FROM generate_series(1,100);
-SELECT pg_sleep(4);
+EOF
+sleep $STIME
+$PSQL test <<EOF
+SHOW POOL_NODES;
 SELECT nextval('myseq');
 EOF
 
@@ -130,7 +226,7 @@ if [ $? != 0 ];then
     exit 1
 fi
 
-echo start: prefer_lower_delay_standby is off.
+echo Test5: prefer_lower_delay_standby is off.
 
 $PSQL -p $PORT1 test -c "$REPLAY_RESUME"
 
@@ -148,19 +244,20 @@ do
 done
 
 # pause replay on node 1
-replay_pause
+replay_pause $PORT1
 
 $PSQL test <<EOF
-PGPOOL SET log_min_messages TO DEBUG1;
 INSERT INTO t2 SELECT * FROM generate_series(1,100);
-SELECT pg_sleep(4);
+EOF
+sleep $STIME
+$PSQL test <<EOF
 SELECT * FROM t2 LIMIT 1;
 EOF
 
 fgrep "SELECT * FROM t2 LIMIT 1;" log/pgpool.log |grep "DB node id: 0">/dev/null 2>&1
 if [ $? != 0 ];then
     # expected result not found
-    echo fail: query is sent to standby node.
+    echo fail: query was sent to standby node.
     ./shutdownall
     exit 1
 fi