Feature: allow to set delay_threshold_by_time in milliseconds.
authorTatsuo Ishii <ishii@sraoss.co.jp>
Sat, 19 Aug 2023 06:44:02 +0000 (15:44 +0900)
committerTatsuo Ishii <ishii@sraoss.co.jp>
Sat, 19 Aug 2023 06:44:02 +0000 (15:44 +0900)
Previously it was allowed only in seconds.  Also put some
refactoring. Create new function "check_replication_delay" which
checks the replication delay and returns 0, -1 or -2, depending on "no
delay", "delayed (delay_threshold_by_time)" or "delayed
(delay_threshold)" accordingly. This should simplify the lengthy
if-statement to check the replication delay.

Discussion: https://www.pgpool.net/pipermail/pgpool-hackers/2023-August/004372.html

doc.ja/src/sgml/stream-check.sgml
doc/src/sgml/stream-check.sgml
src/config/pool_config_variables.c
src/context/pool_query_context.c
src/include/protocol/pool_pg_utils.h
src/protocol/pool_pg_utils.c
src/sample/pgpool.conf.sample-stream
src/streaming_replication/pool_worker_child.c
src/test/regression/tests/033.prefer_lower_standby_delay/test.sh

index dcb9846e4f7cacc31c632543390438a3f82818a7..396adf397c3069d6e8288d1ba5f0402cc6b61268 100644 (file)
 
     <para>
      <!--
-     Specifies the maximum tolerance level of replication delay in
-     seconds on the standby server against the primary server.  If the
-     specified value is greater than
+     Specifies the maximum tolerance level of replication delay 
+     on the standby server against the primary server.
+     If this value is specified without units, it is taken as milliseconds.
+     If the specified value is greater than
      0, <xref linkend="guc-delay-threshold"> is ignored.  If the delay
      exceeds this configured level,
      <productname>Pgpool-II</productname> stops sending the <acronym>
       This delay threshold check is performed every <xref linkend="guc-sr-check-period">.
        Default is 0.
      -->
-     プライマリサーバに対するスタンバイサーバのレプリケーション遅延の許容度を秒単位で指定します。
+     プライマリサーバに対するスタンバイサーバのレプリケーション遅延の許容時間を指定します。
+     この値が単位無しで指定された場合は、マイクロ秒単位であると見なします。
+       0よりも大きい値が指定されると、<xref linkend="guc-delay-threshold">は無視されます。
      <productname>Pgpool-II</productname>は、スタンバイサーバの遅延がこの設定レベルを超えた場合には、 <xref linkend="guc-load-balance-mode">が有効であっても、プライマリに追いつくまでそのスタンバイノードには<acronym>SELECT</acronym>クエリを送信せず、全てプライマリサーバに送るようにします。
       このパラメータが0の場合は、遅延のチェックを行ないません。
       この遅延閾値のチェックは<xref linkend="guc-sr-check-period">毎に行われます。
index 25b41235311cdad16f73f9511c8e448397e197ec..c86c9322c74ae16f78ddd30fe31a5c5e90111b78 100644 (file)
    <listitem>
 
     <para>
-     Specifies the maximum tolerance level of replication delay in
-     seconds on the standby server against the primary server.  If the
-     specified value is greater than
+     Specifies the maximum tolerance level of replication delay 
+     on the standby server against the primary server.
+     If this value is specified without units, it is taken as milliseconds.
+     If the specified value is greater than
      0, <xref linkend="guc-delay-threshold"> is ignored.  If the delay
      exceeds this configured level,
      <productname>Pgpool-II</productname> stops sending the <acronym>
index c5a6fb34b07d59d0656500cfc48b5128f1119009..639cf2250db9b7b56f37d0309cdd3b92013fd03c 100644 (file)
@@ -2295,7 +2295,7 @@ static struct config_int ConfigureNamesInt[] =
        {
                {"delay_threshold_by_time", CFGCXT_RELOAD, STREAMING_REPLICATION_CONFIG,
                        "standby delay threshold by time.",
-                       CONFIG_VAR_TYPE_INT, false, GUC_UNIT_S,
+                       CONFIG_VAR_TYPE_INT, false, GUC_UNIT_MS,
                },
                &g_pool_config.delay_threshold_by_time,
                0,
index 8e89a53be0bbac8c8577f97463e134a494224cdc..50d15e33e7d265eb8858b15c18d3f2c56d1cf393 100644 (file)
@@ -2018,8 +2018,6 @@ where_to_send_main_replica(POOL_QUERY_CONTEXT * query_context, char *query, Node
                                 !pool_is_failed_transaction() &&
                                 pool_get_transaction_isolation() != POOL_SERIALIZABLE))
                        {
-                               BackendInfo *bkinfo = pool_get_node_info(session_context->load_balance_node_id);
-
                                /*
                                 * Load balance if possible
                                 */
@@ -2097,7 +2095,6 @@ where_to_send_main_replica(POOL_QUERY_CONTEXT * query_context, char *query, Node
                                        if (pool_config->statement_level_load_balance)
                                        {
                                                session_context->load_balance_node_id = select_load_balancing_node();
-                                               bkinfo = pool_get_node_info(session_context->load_balance_node_id);
                                        }
 
                                        /*
@@ -2106,12 +2103,7 @@ where_to_send_main_replica(POOL_QUERY_CONTEXT * query_context, char *query, Node
                                         * load balance node which is lowest delayed,
                                         * false then send to the primary.
                                         */
-                                       if (STREAM &&
-                                               (
-                                                       (pool_config->delay_threshold &&
-                                                        (bkinfo->standby_delay > pool_config->delay_threshold)) ||
-                                                       (pool_config->delay_threshold_by_time &&
-                                                        (bkinfo->standby_delay > pool_config->delay_threshold_by_time*1000*1000))))
+                                       if (STREAM && check_replication_delay(session_context->load_balance_node_id))
                                        {
                                                ereport(DEBUG1,
                                                                (errmsg("could not load balance because of too much replication delay"),
index 25de6b2a6e9ee86d450ddaa8730bd3e423085333..bd94935726108945ce4669d33d6506e982f348cd 100644 (file)
@@ -60,5 +60,6 @@ extern void si_acquire_snapshot(void);
 extern void si_snapshot_acquired(void);
 extern void si_commit_request(void);
 extern void si_commit_done(void);
+extern int     check_replication_delay(int node_id);
 
 #endif /* pool_pg_utils_h */
index a81dfd170088fb812d441cbc5ce99230f35e50a1..8faff3e8b303fdd8b290d8657dc86b6ac50d25b1 100644 (file)
@@ -441,10 +441,9 @@ select_load_balancing_node(void)
                 * and prefer_lower_delay_standby are true, we choose the least delayed
                 * node if suggested_node is standby and delayed over delay_threshold.
                 */
-               if (STREAM && pool_config->prefer_lower_delay_standby && suggested_node_id != PRIMARY_NODE_ID &&
-                       ((BACKEND_INFO(suggested_node_id).standby_delay_by_time && BACKEND_INFO(suggested_node_id).standby_delay > pool_config->delay_threshold_by_time * 1000000) ||
-                        (BACKEND_INFO(suggested_node_id).standby_delay_by_time == false && BACKEND_INFO(suggested_node_id).standby_delay > pool_config->delay_threshold)))
-
+               if (STREAM && pool_config->prefer_lower_delay_standby &&
+                       suggested_node_id != PRIMARY_NODE_ID &&
+                       check_replication_delay(suggested_node_id) < 0)
                {
                        ereport(DEBUG1,
                                (errmsg("selecting load balance node"),
@@ -455,7 +454,7 @@ select_load_balancing_node(void)
                         * nodes which have the lowest delay.
                         */
                        if (pool_config->delay_threshold_by_time > 0)
-                               lowest_delay = pool_config->delay_threshold_by_time * 1000 * 1000;
+                               lowest_delay = pool_config->delay_threshold_by_time * 1000;     /* convert from milli seconds to micro seconds */
                        else
                                lowest_delay = pool_config->delay_threshold;
 
@@ -602,17 +601,14 @@ select_load_balancing_node(void)
         * node if suggested_node is standby and delayed over delay_threshold.
         */
        if (STREAM && pool_config->prefer_lower_delay_standby &&
-               ((pool_config->delay_threshold_by_time &&
-                 BACKEND_INFO(selected_slot).standby_delay > pool_config->delay_threshold_by_time*1000*1000) ||
-                (pool_config->delay_threshold &&
-                 BACKEND_INFO(selected_slot).standby_delay > pool_config->delay_threshold)))
+               check_replication_delay(selected_slot) < 0)
        {
                ereport(DEBUG1,
                                (errmsg("selecting load balance node"),
                                 errdetail("backend id %d is streaming delayed over delay_threshold", selected_slot)));
 
                if (pool_config->delay_threshold_by_time > 0)
-                       lowest_delay = pool_config->delay_threshold_by_time * 1000 * 1000;
+                       lowest_delay = pool_config->delay_threshold_by_time * 1000;
                else
                        lowest_delay = pool_config->delay_threshold;
                total_weight = 0.0;
@@ -1097,3 +1093,40 @@ si_commit_done(void)
                session->si_state = SI_NO_SNAPSHOT;
        }
 }
+
+/*
+ * Check replication delay and returns the status.
+ * Return values:
+ * 0: no delay or not in streaming repplication mode or
+ * delay_threshold(_by_time) is set to 0
+ * -1: delay exceeds delay_threshold_by_time
+ * -2: delay exceeds delay_threshold
+ */
+int    check_replication_delay(int node_id)
+{
+       BackendInfo *bkinfo;
+
+       if (!STREAM)
+               return 0;
+
+       bkinfo = pool_get_node_info(node_id);
+
+       /*
+        * Check delay_threshold_by_time.  bkinfo->standby_delay is in
+        * microseconds while delay_threshold_by_time is in milliseconds. We need
+        * to multiply delay_threshold_by_time by 1000 to normalize.
+        */
+       if (pool_config->delay_threshold_by_time > 0 &&
+               bkinfo->standby_delay > pool_config->delay_threshold_by_time*1000)
+               return -1;
+
+       /*
+        * Check delay_threshold.
+        */
+       if (pool_config->delay_threshold > 0 &&
+               bkinfo->standby_delay > pool_config->delay_threshold)
+               return -2;
+
+       return 0;
+}
+
index 769c516cad85a4670eec53a31cd96ae65334d790..e7b5b1c53e041bfc2b86e69a631fab37d7cb32dd 100644 (file)
@@ -520,7 +520,7 @@ backend_clustering_mode = 'streaming_replication'
                                    # Disabled (0) by default
 #delay_threshold_by_time = 0
                                    # Threshold before not dispatching query to standby node
-                                   # Unit is in second(s)
+                                   # The default unit is in millisecond(s)
                                    # Disabled (0) by default
 
 #prefer_lower_delay_standby = off
@@ -679,7 +679,7 @@ backend_clustering_mode = 'streaming_replication'
 
 #auto_failback = off
                                    # Dettached backend node reattach automatically
-                                   # if replication_state is 'streaming'.
+                                   # if replicatiotate is 'streaming'.
 #auto_failback_interval = 1min
                                    # Min interval of executing auto_failback in
                                    # seconds.
index 31bc1a62a34fb99b2f73008cc84a776b74e66ee7..7b69dd7b2cbb653b0901cb21d48068dba8981931 100644 (file)
@@ -495,7 +495,7 @@ check_replication_time_lag(void)
                                                {
                                                        bkinfo->standby_delay = atol(s);
                                                        ereport(DEBUG1,
-                                                                       (errmsg("standby delay in seconds * 1000000: " UINT64_FORMAT "", bkinfo->standby_delay)));
+                                                                       (errmsg("standby delay in milli seconds * 1000: " UINT64_FORMAT "", bkinfo->standby_delay)));
                                                }
                                                else
                                                        bkinfo->standby_delay = 0;
@@ -545,7 +545,7 @@ check_replication_time_lag(void)
                        {
                                lag = bkinfo->standby_delay;
                                delay_threshold_by_time = pool_config->delay_threshold_by_time;
-                               delay_threshold_by_time *= 1000000;
+                               delay_threshold_by_time *= 1000;        /* convert from milli seconds to micro seconds */
 
                                /* Log delay if necessary */
                                if ((pool_config->log_standby_delay == LSD_ALWAYS && lag > 0) ||
index 9dc437693d771a0c23097a13e0803b3da7214ea5..d877af6a4868dc607a9f3a4b28cf441cb9102f80 100755 (executable)
@@ -90,6 +90,7 @@ echo "delay_threshold = 10" >> etc/pgpool.conf
 echo "sr_check_period = 1" >> etc/pgpool.conf
 echo "log_standby_delay = 'always'" >> etc/pgpool.conf
 echo "log_min_messages = 'DEBUG1'" >> etc/pgpool.conf
+echo "log_error_verbosity = verbose" >> etc/pgpool.conf
 # force load balance node to be 1.
 echo "backend_weight0 = 0" >> etc/pgpool.conf
 echo "backend_weight2 = 0" >> etc/pgpool.conf
@@ -130,7 +131,8 @@ echo === Test2: delay_threshold_by_time with prefer_lower_delay_standby disabled
 # ----------------------------------------------------------------------------------------
 echo Start testing delay_threshold_by_time with prefer_lower_delay_standby disabled
 echo "delay_threshold = 0" >> etc/pgpool.conf
-echo "delay_threshold_by_time = 1" >> etc/pgpool.conf
+echo "delay_threshold_by_time = 1000" >> etc/pgpool.conf
+
 ./startall
 wait_for_pgpool_startup
 # pause replay on node 1