From e688e5ca1805863c93b76bb64d43615bcf5a1924 Mon Sep 17 00:00:00 2001 From: Maksim Melnikov Date: Thu, 4 Sep 2025 17:37:47 +0300 Subject: [PATCH] Try to handle torn reads of pg_control in sub postmaster processes. The same problem was fixed in 63a582222c6b3db2b1103ddf67a04b31a8f8e9bb, but for frontends. Current commit is fixing this problem for cases when pg_control file is read by fork/exec'd processes. There can be race between process, that replays WAL on start and update control file and other sub-processes that read control file and were started with exec. As the result sub-processes can read partially updated file with incorrect crc. The reason is that LocalProcessControlFile don't acquire ControlFileLock and it can't do it. Current patch is just copy-paste of changes, applied for frontends, with little adaptation. --- src/backend/access/transam/xlog.c | 33 ++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 22d0a2e8c3a6..62f9f7a2ba80 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4371,6 +4371,15 @@ ReadControlFile(void) int fd; char wal_segsz_str[20]; int r; + bool crc_ok; +#ifdef EXEC_BACKEND + pg_crc32c last_crc; + int retries = 0; + + INIT_CRC32C(last_crc); + +retry: +#endif /* * Read data... @@ -4435,7 +4444,29 @@ ReadControlFile(void) offsetof(ControlFileData, crc)); FIN_CRC32C(crc); - if (!EQ_CRC32C(crc, ControlFile->crc)) + crc_ok = EQ_CRC32C(crc, ControlFile->crc); + +#ifdef EXEC_BACKEND + + /* + * If the server was writing at the same time, it is possible that we read + * partially updated contents on some systems. If the CRC doesn't match, + * retry a limited number of times until we compute the same bad CRC twice + * in a row with a short sleep in between. Then the failure is unlikely + * to be due to a concurrent write. + */ + if (!crc_ok && + (retries == 0 || !EQ_CRC32C(crc, last_crc)) && + retries < 10) + { + retries++; + last_crc = crc; + pg_usleep(10000); + goto retry; + } +#endif + + if (!crc_ok) ereport(FATAL, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("incorrect checksum in control file")));