Add more error context to RestoreBlockImage() and consume it
authorMichael Paquier <michael@paquier.xyz>
Fri, 9 Sep 2022 01:00:40 +0000 (10:00 +0900)
committerMichael Paquier <michael@paquier.xyz>
Fri, 9 Sep 2022 01:00:40 +0000 (10:00 +0900)
On failure in restoring a block image, no details were provided, while
it is possible to see failure with an inconsistent record state, a
failure in processing decompression or a failure in decompression
because a build does not support this option.

RestoreBlockImage() is used in two code paths in the backend code,
during recovery and when checking a page consistency after applying
masking, and both places are changed to consume the error message
produced by the internal routine when it returns a false status.  All
the error messages are reported under ERRCODE_INTERNAL_ERROR, that gets
used also when attempting to access a page compressed by a method
not supported by the build attempting the decompression.  This is
something that can happen in core when doing physical replication with
primary and standby using inconsistent build options, for example.

This routine is available since 2c03216d and it has never provided any
context about the error happening when it failed.  This change is
justified even more after 57aa5b2, that introduced compression of FPWs
in WAL.

Reported-by: Justin Prysby
Author: Michael Paquier
Discussion: https://postgr.es/m/20220905002320.GD31833@telsasoft.com
Backpatch-through: 15

src/backend/access/transam/xlogreader.c
src/backend/access/transam/xlogrecovery.c
src/backend/access/transam/xlogutils.c

index cd3dd8cc5c0a93f25a9c6ddb0d4bee15ff41e0a7..c4fbc37c74b47e794318f2aa8aab282ee7a1aa3a 100644 (file)
@@ -2036,7 +2036,8 @@ XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
 /*
  * Restore a full-page image from a backup block attached to an XLOG record.
  *
- * Returns true if a full-page image is restored.
+ * Returns true if a full-page image is restored, and false on failure with
+ * an error to be consumed by the caller.
  */
 bool
 RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
@@ -2047,9 +2048,20 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 
    if (block_id > record->record->max_block_id ||
        !record->record->blocks[block_id].in_use)
+   {
+       report_invalid_record(record,
+                             "could not restore image at %X/%X with invalid block %d specified",
+                             LSN_FORMAT_ARGS(record->ReadRecPtr),
+                             block_id);
        return false;
+   }
    if (!record->record->blocks[block_id].has_image)
+   {
+       report_invalid_record(record, "could not restore image at %X/%X with invalid state, block %d",
+                             LSN_FORMAT_ARGS(record->ReadRecPtr),
+                             block_id);
        return false;
+   }
 
    bkpb = &record->record->blocks[block_id];
    ptr = bkpb->bkp_image;
@@ -2072,7 +2084,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
                                    bkpb->bimg_len, BLCKSZ - bkpb->hole_length) <= 0)
                decomp_success = false;
 #else
-           report_invalid_record(record, "image at %X/%X compressed with %s not supported by build, block %d",
+           report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
                                  LSN_FORMAT_ARGS(record->ReadRecPtr),
                                  "LZ4",
                                  block_id);
@@ -2089,7 +2101,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
            if (ZSTD_isError(decomp_result))
                decomp_success = false;
 #else
-           report_invalid_record(record, "image at %X/%X compressed with %s not supported by build, block %d",
+           report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
                                  LSN_FORMAT_ARGS(record->ReadRecPtr),
                                  "zstd",
                                  block_id);
@@ -2098,7 +2110,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
        }
        else
        {
-           report_invalid_record(record, "image at %X/%X compressed with unknown method, block %d",
+           report_invalid_record(record, "could not restore image at %X/%X compressed with unknown method, block %d",
                                  LSN_FORMAT_ARGS(record->ReadRecPtr),
                                  block_id);
            return false;
@@ -2106,7 +2118,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 
        if (!decomp_success)
        {
-           report_invalid_record(record, "invalid compressed image at %X/%X, block %d",
+           report_invalid_record(record, "could not decompress image at %X/%X, block %d",
                                  LSN_FORMAT_ARGS(record->ReadRecPtr),
                                  block_id);
            return false;
index ae2af5ae3d6529a97a7870c7f573a1d4c8edeed8..9a80084a685801aa7debae33498b8ed428210142 100644 (file)
@@ -2412,7 +2412,9 @@ verifyBackupPageConsistency(XLogReaderState *record)
         * can be directly applied on it.
         */
        if (!RestoreBlockImage(record, block_id, primary_image_masked))
-           elog(ERROR, "failed to restore block image");
+           ereport(ERROR,
+                   (errcode(ERRCODE_INTERNAL_ERROR),
+                    errmsg_internal("%s", record->errormsg_buf)));
 
        /*
         * If masking function is defined, mask both the primary and replay
index 0cda22597fed114fc218418be7eebec96d373445..e60951a5fcd2154f76682a4ad62c5a8374a9b80d 100644 (file)
@@ -393,7 +393,9 @@ XLogReadBufferForRedoExtended(XLogReaderState *record,
                                      prefetch_buffer);
        page = BufferGetPage(*buf);
        if (!RestoreBlockImage(record, block_id, page))
-           elog(ERROR, "failed to restore block image");
+           ereport(ERROR,
+                   (errcode(ERRCODE_INTERNAL_ERROR),
+                    errmsg_internal("%s", record->errormsg_buf)));
 
        /*
         * The page may be uninitialized. If so, we can't set the LSN because