Fix prefer_lower_delay_standby bug.
authorTatsuo Ishii <ishii@sraoss.co.jp>
Sun, 30 Apr 2023 06:41:02 +0000 (15:41 +0900)
committerTatsuo Ishii <ishii@sraoss.co.jp>
Sun, 30 Apr 2023 06:41:02 +0000 (15:41 +0900)
When client connects to pgpool, one of standbys are chosen as the load
balancing node.  If standby delay exceeds delay_threshold while the
session continues, prefer_lower_delay_standby will choose the least
delay standby node as the new load balancing node and set the target
backend to the node. Unfortunately the decision was made *before* the
checking that SELECT query includes writing function etc., pgpool
happily sends SELECT which cannot be executed on standby.  To fix
this, prefer_lower_delay_standby treatment is moved after the writing
function etc. check.

033.prefer_lower_standby_delay regression test is modified to include
the case above. Also I have done some refactoring:

- Remove unnecessary while loop for each clustering mode because the
  test is only useful for streaming replication mode.

- Add checking wal_replay_pause is actually executed.

Bug reported by: https://www.pgpool.net/mantisbt/view.php?id=798
along with suggested fix.

src/context/pool_query_context.c
src/test/regression/tests/033.prefer_lower_standby_delay/test.sh

index 9c576dcb9a31c258305e4f8899fae9f7100c74d7..f2a79994232af08b8ef9ef82965a1707185f6ae3 100644 (file)
@@ -532,34 +532,6 @@ pool_where_to_send(POOL_QUERY_CONTEXT * query_context, char *query, Node *node)
                                         * Load balance if possible
                                         */
 
-                                       /*
-                                        * As streaming replication delay is too much, if
-                                        * prefer_lower_delay_standby is true then elect new
-                                        * load balance node which is lowest delayed,
-                                        * false then send to the primary.
-                                        */
-                                       if (STREAM &&
-                                               pool_config->delay_threshold &&
-                                               bkinfo->standby_delay > pool_config->delay_threshold)
-                                       {
-                                               ereport(DEBUG1,
-                                                               (errmsg("could not load balance because of too much replication delay"),
-                                                                errdetail("destination = %d for query= \"%s\"", dest, query)));
-
-                                               if (pool_config->prefer_lower_delay_standby)
-                                               {
-                                                       int new_load_balancing_node = select_load_balancing_node();
-
-                                                       session_context->load_balance_node_id = new_load_balancing_node;
-                                                       session_context->query_context->load_balance_node_id = session_context->load_balance_node_id;
-                                                       pool_set_node_to_be_sent(query_context, session_context->query_context->load_balance_node_id);
-                                               }
-                                               else
-                                               {
-                                                       pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID);
-                                               }
-                                       }
-
                                        /*
                                         * If system catalog is used in the SELECT, we prefer to
                                         * send to the primary. Example: SELECT * FROM pg_class
@@ -570,7 +542,7 @@ pool_where_to_send(POOL_QUERY_CONTEXT * query_context, char *query, Node *node)
                                         * primary system catalog. Please note that this test must
                                         * be done *before* test using pool_has_temp_table.
                                         */
-                                       else if (pool_has_system_catalog(node))
+                                       if (pool_has_system_catalog(node))
                                        {
                                                ereport(DEBUG1,
                                                                (errmsg("could not load balance because systems catalogs are used"),
@@ -631,11 +603,44 @@ pool_where_to_send(POOL_QUERY_CONTEXT * query_context, char *query, Node *node)
                                        else
                                        {
                                                if (pool_config->statement_level_load_balance)
+                                               {
                                                        session_context->load_balance_node_id = select_load_balancing_node();
+                                                       bkinfo = pool_get_node_info(session_context->load_balance_node_id);
+                                               }
 
-                                               session_context->query_context->load_balance_node_id = session_context->load_balance_node_id;
-                                               pool_set_node_to_be_sent(query_context,
-                                                                                                session_context->query_context->load_balance_node_id);
+                                               /*
+                                                * As streaming replication delay is too much, if
+                                                * prefer_lower_delay_standby is true then elect new
+                                                * load balance node which is lowest delayed,
+                                                * false then send to the primary.
+                                                */
+                                               if (STREAM &&
+                                                       pool_config->delay_threshold &&
+                                                       bkinfo->standby_delay > pool_config->delay_threshold)
+                                               {
+                                                       ereport(DEBUG1,
+                                                                       (errmsg("could not load balance because of too much replication delay"),
+                                                                        errdetail("destination = %d for query= \"%s\"", dest, query)));
+
+                                                       if (pool_config->prefer_lower_delay_standby)
+                                                       {
+                                                               int new_load_balancing_node = select_load_balancing_node();
+
+                                                               session_context->load_balance_node_id = new_load_balancing_node;
+                                                               session_context->query_context->load_balance_node_id = session_context->load_balance_node_id;
+                                                               pool_set_node_to_be_sent(query_context, session_context->query_context->load_balance_node_id);
+                                                       }
+                                                       else
+                                                       {
+                                                               pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID);
+                                                       }
+                                               }
+                                               else
+                                               {
+                                                       session_context->query_context->load_balance_node_id = session_context->load_balance_node_id;
+                                                       pool_set_node_to_be_sent(query_context,
+                                                                                                        session_context->query_context->load_balance_node_id);
+                                               }
                                        }
                                }
                                else
index 7583984159a3c97dca68abeb0139a27bd7b6ce0b..6a64c318c7df5bb180a2f03603200ce37a7b0aea 100755 (executable)
@@ -14,97 +14,153 @@ result=`echo "$major_version >= 10"|bc`
 if [ $result == 1 ];then
        REPLAY_PAUSE="SELECT pg_wal_replay_pause();"
        REPLAY_RESUME="SELECT pg_wal_replay_resume();"
+       REPLAY_STATE="SELECT pg_get_wal_replay_pause_state()"
 else
        REPLAY_PAUSE="SELECT pg_xlog_replay_pause();"
        REPLAY_RESUME="SELECT pg_xlog_replay_resume();"
+       REPLAY_STATE="SELECT pg_get_xlog_replay_pause_state()"
 fi
 
-for mode in s
-do
-       rm -fr $TESTDIR
-       mkdir $TESTDIR
-       cd $TESTDIR
+# node 1 port number
+PORT1=11003
+
+# request replication pause and wait for confirmation
+function replay_pause
+{
+    $PSQL -p $PORT1 test -c "$REPLAY_PAUSE"
+    for i in 1 2 3 4
+    do
+       res=`$PSQL -p $PORT1 -q -t test -c "$REPLAY_STATE"|sed 's/ //'g`
+       if [ "$res" = "paused" ];then
+           break;
+       else
+           echo pause state: $res
+       fi
+       sleep 1
+    done
+    if [ "$res" != "paused" ];then
+       echo replay pause failed.
+       ./shutdownall
+       exit 1
+    fi
+}
 
-       # create test environment
-       echo -n "creating test environment..."
-       $PGPOOL_SETUP -m $mode -n 3 || exit 1
-       echo "done."
+rm -fr $TESTDIR
+mkdir $TESTDIR
+cd $TESTDIR
 
-       source ./bashrc.ports
-       echo "app_name_redirect_preference_list = 'psql:1'" >> etc/pgpool.conf
-       echo "delay_threshold = 10" >> etc/pgpool.conf
-       echo "prefer_lower_delay_standby = on" >> etc/pgpool.conf
-       echo "sr_check_period = 3" >> etc/pgpool.conf
+# create test environment
+echo -n "creating test environment..."
+$PGPOOL_SETUP -m s -n 3 || exit 1
+echo "done."
 
-       ./startall
+source ./bashrc.ports
+echo "app_name_redirect_preference_list = 'psql:1'" >> etc/pgpool.conf
+echo "delay_threshold = 10" >> etc/pgpool.conf
+echo "prefer_lower_delay_standby = on" >> etc/pgpool.conf
+echo "sr_check_period = 3" >> etc/pgpool.conf
 
-       export PGPORT=$PGPOOL_PORT
+./startall
 
-       wait_for_pgpool_startup
+export PGPORT=$PGPOOL_PORT
 
-       $PSQL test <<EOF
+wait_for_pgpool_startup
+
+$PSQL test <<EOF
 CREATE TABLE t1(i INTEGER);
 CREATE TABLE t2(i INTEGER);
+CREATE SEQUENCE myseq;
 EOF
 
-       echo start: prefer_lower_delay_standby is on.
+echo start: prefer_lower_delay_standby is on.
+
+# check to see if pgpool selects proper node for load balance
+# at the connection time
 
-       $PSQL -p 11003 test -c "$REPLAY_PAUSE"
+# pause replay on node 1
+replay_pause
 
-       $PSQL test <<EOF
+$PSQL test <<EOF
 PGPOOL SET log_min_messages TO DEBUG1;
-INSERT INTO t1 VALUES (1), (2), (3);
+INSERT INTO t1 SELECT * FROM generate_series(1,100);
 SELECT pg_sleep(4);
-SELECT * FROM t1;
+SELECT * FROM t1 LIMIT 1;
 EOF
 
-       fgrep "SELECT * FROM t1;" log/pgpool.log |grep "DB node id: 2">/dev/null 2>&1
-       if [ $? != 0 ];then
-       # expected result not found
-               echo fail: query is sent to primary node.
-               ./shutdownall
-               exit 1
-       fi
+fgrep "SELECT * FROM t1 LIMIT 1;" log/pgpool.log |grep "DB node id: 2">/dev/null 2>&1
+if [ $? != 0 ];then
+    # expected result not found
+    echo fail: query is sent to primary node.
+    ./shutdownall
+    exit 1
+fi
+
+echo ok: query is sent to another standby node.
+
+echo resume streaming replication node 1
+$PSQL -p $PORT1 test -c "$REPLAY_RESUME"
+sleep 4
+
+# check to see if pgpool selects proper node for load balance
+# while in a session. For the test we use SELECT using write
+# function. It should be sent to primary node.
+# see bug #798.
+# https://www.pgpool.net/mantisbt/view.php?id=798
+
+$PSQL test <<EOF
+PGPOOL SET log_min_messages TO DEBUG1;
+\! $PSQL -p $PORT1 test -c "$REPLAY_PAUSE"
+SELECT pg_sleep(4);
+INSERT INTO t1 SELECT * FROM generate_series(1,100);
+SELECT pg_sleep(4);
+SELECT nextval('myseq');
+EOF
 
-       echo ok: query is sent to another standby node.
+fgrep "SELECT nextval('myseq');" log/pgpool.log |grep "DB node id: 0">/dev/null 2>&1
+if [ $? != 0 ];then
+    # expected result not found
+    echo fail: write query is not sent to primary node.
+    ./shutdownall
+    exit 1
+fi
 
-       $PSQL -p 11003 test -c "$REPLAY_RESUME"
+echo start: prefer_lower_delay_standby is off.
 
-       echo start: prefer_lower_delay_standby is off.
+$PSQL -p $PORT1 test -c "$REPLAY_RESUME"
 
-       echo "prefer_lower_delay_standby = off" >> etc/pgpool.conf
+echo "prefer_lower_delay_standby = off" >> etc/pgpool.conf
 
-       $PGPOOL_INSTALL_DIR/bin/pcp_reload_config -w -h localhost -p $PCP_PORT
+$PGPOOL_INSTALL_DIR/bin/pcp_reload_config -w -h localhost -p $PCP_PORT
 
-       while :
-       do
-               $PSQL test -c "PGPOOL SHOW prefer_lower_delay_standby" |grep off
-               if [ $? = 0 ]; then
-                       break
-               fi
-               sleep 1
-       done
+while :
+do
+    $PSQL test -c "PGPOOL SHOW prefer_lower_delay_standby" |grep off
+    if [ $? = 0 ]; then
+       break
+    fi
+    sleep 1
+done
 
-       $PSQL -p 11003 test -c "$REPLAY_PAUSE"
+# pause replay on node 1
+replay_pause
 
-       $PSQL test <<EOF
+$PSQL test <<EOF
 PGPOOL SET log_min_messages TO DEBUG1;
-INSERT INTO t2 VALUES (1), (2), (3);
+INSERT INTO t2 SELECT * FROM generate_series(1,100);
 SELECT pg_sleep(4);
-SELECT * FROM t2;
+SELECT * FROM t2 LIMIT 1;
 EOF
 
-       fgrep "SELECT * FROM t2;" log/pgpool.log |grep "DB node id: 0">/dev/null 2>&1
-       if [ $? != 0 ];then
-       # expected result not found
-               echo fail: query is sent to standby node.
-               ./shutdownall
-               exit 1
-       fi
+fgrep "SELECT * FROM t2 LIMIT 1;" log/pgpool.log |grep "DB node id: 0">/dev/null 2>&1
+if [ $? != 0 ];then
+    # expected result not found
+    echo fail: query is sent to standby node.
+    ./shutdownall
+    exit 1
+fi
 
-       echo ok: prefer lower delay standby works.
+echo ok: prefer lower delay standby works.
 
-       ./shutdownall
+./shutdownall
 
-done
 exit 0