/* -*-pgsql-c-*- */ /* * $Header$ * * pgpool: a language independent connection pool server for PostgreSQL * written by Tatsuo Ishii * * Copyright (c) 2003-2025 PgPool Global Development Group * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that the above copyright notice appear in all * copies and that both that copyright notice and this permission * notice appear in supporting documentation, and that the name of the * author not be used in advertising or publicity pertaining to * distribution of the software without specific, written prior * permission. The author makes no representations about the * suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * health_check.c: health check related functions. * */ #include "config.h" #include #include #include #include #include #include #ifdef HAVE_NETINET_TCP_H #include #endif #ifdef HAVE_SYS_SELECT_H #include #endif #include #include #include #include #include #include #include #include #include #ifdef HAVE_CRYPT_H #include #endif #include "pool.h" #include "main/health_check.h" #include "main/pool_internal_comms.h" #include "utils/palloc.h" #include "utils/memutils.h" #include "utils/elog.h" #include "utils/pool_ip.h" #include "utils/ps_status.h" #include "utils/pool_stream.h" #include "context/pool_process_context.h" #include "context/pool_session_context.h" #include "protocol/pool_pg_utils.h" #include "pool_config.h" #include "auth/md5.h" #include "auth/pool_hba.h" volatile POOL_HEALTH_CHECK_STATISTICS *health_check_stats; /* health check stats area in shared memory */ static POOL_CONNECTION_POOL_SLOT * slot; static volatile sig_atomic_t reload_config_request = 0; static volatile sig_atomic_t restart_request = 0; volatile POOL_HEALTH_CHECK_STATISTICS *stats; static bool establish_persistent_connection(int node); static void discard_persistent_connection(int node); static RETSIGTYPE my_signal_handler(int sig); static RETSIGTYPE reload_config_handler(int sig); static void reload_config(void); static RETSIGTYPE health_check_timer_handler(int sig); static bool check_backend_down_request(int node, bool done_requests); #undef CHECK_REQUEST #define CHECK_REQUEST \ do { \ if (reload_config_request) \ { \ reload_config(); \ reload_config_request = 0; \ } else if (restart_request) \ { \ ereport(LOG,(errmsg("health check process received restart request"))); \ exit(1); \ } \ } while (0) #undef CLEAR_ALARM #define CLEAR_ALARM \ do { \ ereport(DEBUG1,(errmsg("health check: clearing alarm"))); \ } while (alarm(0) > 0) /* * health check child main loop */ void do_health_check_child(int *node_id) { sigjmp_buf local_sigjmp_buf; MemoryContext HealthCheckMemoryContext; char psbuffer[NI_MAXHOST]; static struct timeval start_time; static struct timeval end_time; long diff_t; POOL_HEALTH_CHECK_STATISTICS mystat; stats = &health_check_stats[*node_id]; /* Set application name */ set_application_name_with_suffix(PT_HEALTH_CHECK, *node_id); ereport(DEBUG1, (errmsg("I am health check process pid:%d DB node id:%d", getpid(), *node_id))); /* Identify myself via ps */ init_ps_display("", "", "", ""); snprintf(psbuffer, sizeof(psbuffer), "health check process(%d)", *node_id); set_ps_display(psbuffer, false); /* set up signal handlers */ signal(SIGALRM, SIG_DFL); signal(SIGTERM, my_signal_handler); signal(SIGINT, my_signal_handler); signal(SIGHUP, reload_config_handler); signal(SIGQUIT, my_signal_handler); signal(SIGCHLD, SIG_IGN); signal(SIGUSR1, my_signal_handler); signal(SIGUSR2, SIG_IGN); signal(SIGPIPE, SIG_IGN); /* Create per loop iteration memory context */ HealthCheckMemoryContext = AllocSetContextCreate(TopMemoryContext, "health_check_main_loop", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(TopMemoryContext); /* Initialize per process context */ pool_init_process_context(); /* * Open pool_passwd. */ if (strcmp("", pool_config->pool_passwd)) { pool_reopen_passwd_file(); } if (sigsetjmp(local_sigjmp_buf, 1) != 0) { pool_signal(SIGALRM, SIG_IGN); error_context_stack = NULL; EmitErrorReport(); MemoryContextSwitchTo(TopMemoryContext); FlushErrorState(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* Initialize health check stats area */ stats->min_health_check_duration = INT_MAX; for (;;) { MemoryContextSwitchTo(HealthCheckMemoryContext); MemoryContextResetAndDeleteChildren(HealthCheckMemoryContext); /* * Since HealthCheckMemoryContext is used for "slot", we need to clear it * so that new slot is allocated later on. */ slot = NULL; bool skipped = false; CHECK_REQUEST; if (pool_config->health_check_params[*node_id].health_check_period <= 0) { stats->min_health_check_duration = 0; sleep(30); } /* * If health checking is enabled and the node is not in down status, * do health check. */ else if (pool_config->health_check_params[*node_id].health_check_period > 0) { bool result; BackendInfo *bkinfo = pool_get_node_info(*node_id); stats->total_count++; gettimeofday(&start_time, NULL); stats->last_health_check = time(NULL); result = establish_persistent_connection(*node_id); if (result && slot == NULL) { stats->last_failed_health_check = time(NULL); if (POOL_DISALLOW_TO_FAILOVER(BACKEND_INFO(*node_id).flag)) { ereport(LOG, (errmsg("health check failed on node %d but failover is disallowed for the node", *node_id))); } else { bool partial; stats->fail_count++; ereport(LOG, (errmsg("health check failed on node %d (timeout:%d)", *node_id, health_check_timer_expired))); if (bkinfo->backend_status == CON_DOWN && bkinfo->quarantine == true) { ereport(LOG, (errmsg("health check failed on quarantine node %d (timeout:%d)", *node_id, health_check_timer_expired), errdetail("ignoring.."))); } else { /* trigger failover */ partial = health_check_timer_expired ? false : true; degenerate_backend_set(node_id, 1, partial ? REQ_DETAIL_SWITCHOVER : 0); } } } else if (slot && bkinfo->backend_status == CON_DOWN && bkinfo->quarantine == true) { stats->success_count++; stats->last_successful_health_check = time(NULL); /* The node has become reachable again. Reset * the quarantine state */ send_failback_request(*node_id, false, REQ_DETAIL_UPDATE | REQ_DETAIL_WATCHDOG); } else if (result && slot) { /* Health check succeeded */ stats->success_count++; stats->last_successful_health_check = time(NULL); } else if (!result) { /* Health check skipped */ stats->skip_count++; stats->last_skip_health_check = time(NULL); skipped = true; } /* Discard persistent connections */ discard_persistent_connection(*node_id); /* Update health check duration only if health check was not skipped since the duration could be very small (probably 0) if health check is skipped. */ if (!skipped) { gettimeofday(&end_time, NULL); if (end_time.tv_sec > start_time.tv_sec) diff_t = end_time.tv_usec - start_time.tv_usec + 1000000 * (end_time.tv_sec - start_time.tv_sec); else diff_t = end_time.tv_usec - start_time.tv_usec; diff_t /= 1000; stats->total_health_check_duration += diff_t; if (diff_t > stats->max_health_check_duration) stats->max_health_check_duration = diff_t; if (diff_t < stats->min_health_check_duration) stats->min_health_check_duration = diff_t; } memcpy(&mystat, (void *)stats, sizeof(mystat)); sleep(pool_config->health_check_params[*node_id].health_check_period); } } exit(0); } /* * Establish persistent connection to backend. * Return true if connection test is done. */ static bool establish_persistent_connection(int node) { BackendInfo *bkinfo; int retry_cnt; static time_t auto_failback_interval = 0; /* resume time of auto_failback */ bool check_failback = false; time_t now; char *dbname; bkinfo = pool_get_node_info(node); /* * If the node is already in down status or unused, do nothing. * except when the node state is down because of quarantine operation * since we want to detect when the node comes back to life again to * remove it from the quarantine state */ if (bkinfo->backend_status == CON_UNUSED || (bkinfo->backend_status == CON_DOWN && bkinfo->quarantine == false)) { /* get current time to use auto_failback_interval */ now = time(NULL); if (pool_config->auto_failback && auto_failback_interval < now && STREAM && !strcmp(bkinfo->replication_state, "streaming") && !Req_info->switching) { ereport(DEBUG1, (errmsg("health check DB node: %d (status:%d) for auto_failback", node, bkinfo->backend_status))); check_failback = true; } else return false; } /* * If database is not specified, "postgres" database is assumed. */ if (*pool_config->health_check_params[node].health_check_database == '\0') dbname = "postgres"; else dbname = pool_config->health_check_params[node].health_check_database; /* * Try to connect to the database. */ if (slot == NULL) { char *password = get_pgpool_config_user_password(pool_config->health_check_params[node].health_check_user, pool_config->health_check_params[node].health_check_password); retry_cnt = pool_config->health_check_params[node].health_check_max_retries; do { /* * Set health checker timeout. we want to detect communication * path failure much earlier before TCP/IP stack detects it. */ if (pool_config->health_check_params[node].health_check_timeout > 0) { CLEAR_ALARM; pool_signal(SIGALRM, health_check_timer_handler); alarm(pool_config->health_check_params[node].health_check_timeout); errno = 0; health_check_timer_expired = 0; } slot = make_persistent_db_connection_noerror(node, bkinfo->backend_hostname, bkinfo->backend_port, dbname, pool_config->health_check_params[node].health_check_user, password ? password : "", false); if (pool_config->health_check_params[node].health_check_timeout > 0) { /* cancel health check timer */ pool_signal(SIGALRM, SIG_IGN); CLEAR_ALARM; } /* * If health check test is enabled, check if fake down request is * set. If set, discard_persistent_connection will set slot to * NULL. This simulates a connection failure. */ if (pool_config->health_check_test) { if (slot && check_backend_down_request(node, false) == true) { discard_persistent_connection(node); } } if (slot) { if (retry_cnt != pool_config->health_check_params[node].health_check_max_retries) { ereport(LOG, (errmsg("health check retrying on DB node: %d succeeded", node))); } break; /* Success */ } retry_cnt--; if (retry_cnt >= 0) { stats->retry_count++; ereport(LOG, (errmsg("health check retrying on DB node: %d (round:%d)", node, pool_config->health_check_params[node].health_check_max_retries - retry_cnt))); sleep(pool_config->health_check_params[node].health_check_retry_delay); } } while (retry_cnt >= 0); /* Check if we need to refresh max retry count */ if (retry_cnt != pool_config->health_check_params[node].health_check_max_retries) { int ret_cnt; retry_cnt++; ret_cnt = pool_config->health_check_params[node].health_check_max_retries - retry_cnt; if (ret_cnt > stats->max_retry_count) stats->max_retry_count = ret_cnt; } if (password) pfree(password); if (check_failback && !Req_info->switching && slot) { ereport(LOG, (errmsg("request auto failback, node id:%d", node))); /* get current time to use auto_failback_interval */ now = time(NULL); auto_failback_interval = now + pool_config->auto_failback_interval; send_failback_request(node, true, REQ_DETAIL_CONFIRMED); } } /* if check_failback is true, backend_status is DOWN or UNUSED. */ if (check_failback) { return false; } return true; } /* * Discard persistent connection to backend */ static void discard_persistent_connection(int node) { if (slot) { discard_persistent_db_connection(slot); slot = NULL; } } static RETSIGTYPE my_signal_handler(int sig) { int save_errno = errno; POOL_SETMASK(&BlockSig); switch (sig) { case SIGTERM: case SIGINT: case SIGQUIT: exit(0); break; /* Failback or new node added */ case SIGUSR1: restart_request = 1; break; default: exit(1); break; } POOL_SETMASK(&UnBlockSig); errno = save_errno; } static RETSIGTYPE reload_config_handler(int sig) { int save_errno = errno; POOL_SETMASK(&BlockSig); reload_config_request = 1; POOL_SETMASK(&UnBlockSig); errno = save_errno; } static void reload_config(void) { ereport(LOG, (errmsg("reloading config file"))); MemoryContext oldContext = MemoryContextSwitchTo(TopMemoryContext); pool_get_config(get_config_file_name(), CFGCXT_RELOAD); MemoryContextSwitchTo(oldContext); if (pool_config->enable_pool_hba) load_hba(get_hba_file_name()); if (strcmp("", pool_config->pool_passwd)) pool_reopen_passwd_file(); reload_config_request = 0; } /* * health check timer handler */ static RETSIGTYPE health_check_timer_handler(int sig) { int save_errno = errno; POOL_SETMASK(&BlockSig); health_check_timer_expired = 1; POOL_SETMASK(&UnBlockSig); errno = save_errno; } /* * Returns the byte size of health check statistics area */ size_t health_check_stats_shared_memory_size(void) { size_t size; size = MAXALIGN(sizeof(POOL_HEALTH_CHECK_STATISTICS) * MAX_NUM_BACKENDS); elog(DEBUG1, "health_check_stats_shared_memory_size: requested size: %lu", size); return size; } /* * Initialize health check statistics area */ void health_check_stats_init(POOL_HEALTH_CHECK_STATISTICS *addr) { int i; health_check_stats = addr; memset((void *) health_check_stats, 0, health_check_stats_shared_memory_size()); for (i = 0 ;i < MAX_NUM_BACKENDS; i++) { health_check_stats[i].min_health_check_duration = INT_MAX; } } /* * Node down request file. In the file, each line consists of "backend node * id", tab and "down". If such a line found, check_backend_down_request() * will return true. */ #define BACKEND_DOWN_REQUEST_FILE "backend_down_request" /* * Check backend down request file with specified backend node id. If it's * down ("down"), returns true and set the status to "already_down" to * prevent repeating failover. If it's other than "down", returns false. * * When done_requests is true (second arg to function) the function returns * true if the node has already_done status in the file. */ static bool check_backend_down_request(int node, bool done_requests) { static char backend_down_request_file[POOLMAXPATHLEN]; FILE *fd; int i; #define MAXLINE 128 char linebuf[MAXLINE]; char readbuf[MAXLINE]; char buf[MAXLINE]; char *writebuf; char *p; bool found = false; int node_id; char status[MAXLINE]; if (backend_down_request_file[0] == '\0') { snprintf(backend_down_request_file, sizeof(backend_down_request_file), "%s/%s", pool_config->logdir, BACKEND_DOWN_REQUEST_FILE); } fd = fopen(backend_down_request_file, "r"); if (!fd) { ereport(WARNING, (errmsg("check_backend_down_request: failed to open file %s", backend_down_request_file), errdetail("%m"))); return false; } writebuf = NULL; for (i = 0;; i++) { readbuf[MAXLINE - 1] = '\0'; if (fgets(readbuf, MAXLINE - 1, fd) == 0) break; strncpy(buf, readbuf, sizeof(buf) - 1); if (strlen(readbuf) > 0 && readbuf[strlen(readbuf) - 1] == '\n') buf[strlen(readbuf) - 1] = '\0'; sscanf(buf, "%d\t%s", &node_id, status); if (done_requests) { if (node_id == node && !strcmp(status, "already_down")) { fclose(fd); return true; } continue; } p = readbuf; if (found == false) { if (node_id == node && !strcmp(status, "down")) { snprintf(linebuf, sizeof(linebuf), "%d\t%s\n", node_id, "already_down"); found = true; p = linebuf; } } if (writebuf == NULL) { writebuf = malloc(strlen(p) + 1); memset(writebuf, 0, strlen(p) + 1); } else writebuf = realloc(writebuf, strlen(p) + strlen(writebuf) + 1); if (!writebuf) { fclose(fd); return false; } strcat(writebuf, p); } fclose(fd); if (writebuf) free(writebuf); if (!found) return false; return true; }