From dbc60c5593f26dc777a3be032bff4fb4eab1ddd1 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Sun, 12 Apr 2020 11:26:05 -0400
Subject: Rename pg_validatebackup to pg_verifybackup.
Also, use "verify" rather than "validate" to refer to the process
being undertaken here. Per discussion, that is a more appropriate
term.
Discussion: https://www.postgresql.org/message-id/172c9d9b-1d0a-1b94-1456-376b1e017322@2ndquadrant.com
Discussion: http://postgr.es/m/CA+TgmobLgMh6p8FmLbj_rv9Uhd7tPrLnAyLgGd2SoSj=qD-bVg@mail.gmail.com
---
doc/src/sgml/ref/allfiles.sgml | 2 +-
doc/src/sgml/ref/pg_basebackup.sgml | 4 +-
doc/src/sgml/ref/pg_validatebackup.sgml | 291 --------
doc/src/sgml/ref/pg_verifybackup.sgml | 291 ++++++++
doc/src/sgml/reference.sgml | 2 +-
src/bin/Makefile | 2 +-
src/bin/pg_validatebackup/.gitignore | 2 -
src/bin/pg_validatebackup/Makefile | 40 --
src/bin/pg_validatebackup/parse_manifest.c | 740 -------------------
src/bin/pg_validatebackup/parse_manifest.h | 45 --
src/bin/pg_validatebackup/pg_validatebackup.c | 905 ------------------------
src/bin/pg_validatebackup/t/001_basic.pl | 30 -
src/bin/pg_validatebackup/t/002_algorithm.pl | 58 --
src/bin/pg_validatebackup/t/003_corruption.pl | 288 --------
src/bin/pg_validatebackup/t/004_options.pl | 89 ---
src/bin/pg_validatebackup/t/005_bad_manifest.pl | 204 ------
src/bin/pg_validatebackup/t/006_encoding.pl | 27 -
src/bin/pg_validatebackup/t/007_wal.pl | 55 --
src/bin/pg_verifybackup/.gitignore | 2 +
src/bin/pg_verifybackup/Makefile | 40 ++
src/bin/pg_verifybackup/parse_manifest.c | 740 +++++++++++++++++++
src/bin/pg_verifybackup/parse_manifest.h | 45 ++
src/bin/pg_verifybackup/pg_verifybackup.c | 905 ++++++++++++++++++++++++
src/bin/pg_verifybackup/t/001_basic.pl | 30 +
src/bin/pg_verifybackup/t/002_algorithm.pl | 58 ++
src/bin/pg_verifybackup/t/003_corruption.pl | 288 ++++++++
src/bin/pg_verifybackup/t/004_options.pl | 89 +++
src/bin/pg_verifybackup/t/005_bad_manifest.pl | 204 ++++++
src/bin/pg_verifybackup/t/006_encoding.pl | 27 +
src/bin/pg_verifybackup/t/007_wal.pl | 55 ++
30 files changed, 2779 insertions(+), 2779 deletions(-)
delete mode 100644 doc/src/sgml/ref/pg_validatebackup.sgml
create mode 100644 doc/src/sgml/ref/pg_verifybackup.sgml
delete mode 100644 src/bin/pg_validatebackup/.gitignore
delete mode 100644 src/bin/pg_validatebackup/Makefile
delete mode 100644 src/bin/pg_validatebackup/parse_manifest.c
delete mode 100644 src/bin/pg_validatebackup/parse_manifest.h
delete mode 100644 src/bin/pg_validatebackup/pg_validatebackup.c
delete mode 100644 src/bin/pg_validatebackup/t/001_basic.pl
delete mode 100644 src/bin/pg_validatebackup/t/002_algorithm.pl
delete mode 100644 src/bin/pg_validatebackup/t/003_corruption.pl
delete mode 100644 src/bin/pg_validatebackup/t/004_options.pl
delete mode 100644 src/bin/pg_validatebackup/t/005_bad_manifest.pl
delete mode 100644 src/bin/pg_validatebackup/t/006_encoding.pl
delete mode 100644 src/bin/pg_validatebackup/t/007_wal.pl
create mode 100644 src/bin/pg_verifybackup/.gitignore
create mode 100644 src/bin/pg_verifybackup/Makefile
create mode 100644 src/bin/pg_verifybackup/parse_manifest.c
create mode 100644 src/bin/pg_verifybackup/parse_manifest.h
create mode 100644 src/bin/pg_verifybackup/pg_verifybackup.c
create mode 100644 src/bin/pg_verifybackup/t/001_basic.pl
create mode 100644 src/bin/pg_verifybackup/t/002_algorithm.pl
create mode 100644 src/bin/pg_verifybackup/t/003_corruption.pl
create mode 100644 src/bin/pg_verifybackup/t/004_options.pl
create mode 100644 src/bin/pg_verifybackup/t/005_bad_manifest.pl
create mode 100644 src/bin/pg_verifybackup/t/006_encoding.pl
create mode 100644 src/bin/pg_verifybackup/t/007_wal.pl
diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml
index ab71176cdf3..0f0064150c4 100644
--- a/doc/src/sgml/ref/allfiles.sgml
+++ b/doc/src/sgml/ref/allfiles.sgml
@@ -211,7 +211,7 @@ Complete list of usable sgml source files in this directory.
-
+
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index d9c981cebb9..01ce44ee220 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -568,7 +568,7 @@ PostgreSQL documentation
Disables generation of a backup manifest. If this option is not
specified, the server will generate and send a backup manifest
- which can be verified using .
+ which can be verified using .
The manifest is a list of every file present in the backup with the
exception of any WAL files that may be included. It also stores the
size, last modification time, and an optional checksum for each file.
@@ -620,7 +620,7 @@ PostgreSQL documentation
verified not to have been modified since the backup was taken.
- can be used to check the
+ can be used to check the
integrity of a backup against the backup manifest.
diff --git a/doc/src/sgml/ref/pg_validatebackup.sgml b/doc/src/sgml/ref/pg_validatebackup.sgml
deleted file mode 100644
index 5c626200f68..00000000000
--- a/doc/src/sgml/ref/pg_validatebackup.sgml
+++ /dev/null
@@ -1,291 +0,0 @@
-
-
-
-
- pg_validatebackup
-
-
-
- pg_validatebackup
- 1
- Application
-
-
-
- pg_validatebackup
- verify the integrity of a base backup of a
- PostgreSQL cluster
-
-
-
-
- pg_validatebackup
- option
-
-
-
-
-
- Description
-
-
- pg_validatebackup is used to check the
- integrity of a database cluster backup taken using
- pg_basebackup against a
- backup_manifest generated by the server at the time
- of the backup. The backup must be stored in the "plain"
- format; a "tar" format backup can be checked after extracting it.
-
-
-
- It is important to note that the validation which is performed by
- pg_validatebackup does not and can not include
- every check which will be performed by a running server when attempting
- to make use of the backup. Even if you use this tool, you should still
- perform test restores and verify that the resulting databases work as
- expected and that they appear to contain the correct data. However,
- pg_validatebackup can detect many problems
- that commonly occur due to storage problems or user error.
-
-
-
- Backup verification proceeds in four stages. First,
- pg_validatebackup reads the
- backup_manifest file. If that file
- does not exist, cannot be read, is malformed, or fails verification
- against its own internal checksum, pg_validatebackup
- will terminate with a fatal error.
-
-
-
- Second, pg_validatebackup will attempt to verify that
- the data files currently stored on disk are exactly the same as the data
- files which the server intended to send, with some exceptions that are
- described below. Extra and missing files will be detected, with a few
- exceptions. This step will ignore the presence or absence of, or any
- modifications to, postgresql.auto.conf,
- standby.signal, and recovery.signal,
- because it is expected that these files may have been created or modified
- as part of the process of taking the backup. It also won't complain about
- a backup_manifest file in the target directory or
- about anything inside pg_wal, even though these
- files won't be listed in the backup manifest. Only files are checked;
- the presence or absence of directories is not verified, except
- indirectly: if a directory is missing, any files it should have contained
- will necessarily also be missing.
-
-
-
- Next, pg_validatebackup will checksum all the files,
- compare the checksums against the values in the manifest, and emit errors
- for any files for which the computed checksum does not match the
- checksum stored in the manifest. This step is not performed for any files
- which produced errors in the previous step, since they are already known
- to have problems. Also, files which were ignored in the previous step are
- also ignored in this step.
-
-
-
- Finally, pg_validatebackup will use the manifest to
- verify that the write-ahead log records which will be needed to recover
- the backup are present and that they can be read and parsed. The
- backup_manifest contains information about which
- write-ahead log records will be needed, and
- pg_validatebackup will use that information to
- invoke pg_waldump to parse those write-ahed log
- records. The --quiet flag will be used, so that
- pg_waldump will only report errors, without producing
- any other output. While this level of verification is sufficient to
- detect obvious problems such as a missing file or one whose internal
- checksums do not match, they aren't extensive enough to detect every
- possible problem that might occur when attempting to recover. For
- instance, a server bug that produces write-ahead log records that have
- the correct checksums but specify nonsensical actions can't be detected
- by this method.
-
-
-
- Note that if extra WAL files which are not required to recover the backup
- are present, they will not be checked by this tool, although
- a separate invocation of pg_waldump could be used for
- that purpose. Also note that WAL verification is version-specific: you
- must use the version of pg_validatebackup, and thus of
- pg_waldump, which pertains to the backup being checked.
- In contrast, the data file integrity checks should work with any version
- of the server that generates a backup_manifest file.
-
-
-
-
- Options
-
-
- The following command-line options control the behavior.
-
-
-
-
-
-
-
- Exit as soon as a problem with the backup is detected. If this option
- is not specified, pg_validatebackup will continue
- checking the backup even after a problem has been detected, and will
- report all problems detected as errors.
-
-
-
-
-
-
-
-
-
- Ignore the specified file or directory, which should be expressed
- as a relative pathname, when comparing the list of data files
- actually present in the backup to those listed in the
- backup_manifest file. If a directory is
- specified, this option affects the entire subtree rooted at that
- location. Complaints about extra files, missing files, file size
- differences, or checksum mismatches will be suppressed if the
- relative pathname matches the specified pathname. This option
- can be specified multiple times.
-
-
-
-
-
-
-
-
-
- Use the manifest file at the specified path, rather than one located
- in the root of the backup directory.
-
-
-
-
-
-
-
-
-
- Don't attempt to parse write-ahead log data that will be needed
- to recover from this backup.
-
-
-
-
-
-
-
-
-
- Don't print anything when a backup is successfully validated.
-
-
-
-
-
-
-
-
-
- Do not validate data file checksums. The presence or absence of
- files and the sizes of those files will still be checked. This is
- much faster, because the files themselves do not need to be read.
-
-
-
-
-
-
-
-
-
- Try to parse WAL files stored in the specified directory, rather than
- in pg_wal. This may be useful if the backup is
- stored in a separate location from the WAL archive.
-
-
-
-
-
-
-
- Other options are also available:
-
-
-
-
-
-
-
- Print the pg_validatebackup version and exit.
-
-
-
-
-
-
-
-
-
- Show help about pg_validatebackup command
- line arguments, and exit.
-
-
-
-
-
-
-
-
-
-
- Examples
-
-
- To create a base backup of the server at mydbserver and
- validate the integrity of the backup:
-
-$pg_basebackup -h mydbserver -D /usr/local/pgsql/data
-$pg_validatebackup /usr/local/pgsql/data
-
-
-
-
- To create a base backup of the server at mydbserver, move
- the manifest somewhere outside the backup directory, and validate the
- backup:
-
-$pg_basebackup -h mydbserver -D /usr/local/pgsql/backup1234
-$mv /usr/local/pgsql/backup1234/backup_manifest /my/secure/location/backup_manifest.1234
-$pg_validatebackup -m /my/secure/location/backup_manifest.1234 /usr/local/pgsql/backup1234
-
-
-
-
- To validate a backup while ignoring a file that was added manually to the
- backup directory, and also skipping checksum verification:
-
-$pg_basebackup -h mydbserver -D /usr/local/pgsql/data
-$edit /usr/local/pgsql/data/note.to.self
-$pg_validatebackup --ignore=note.to.self --skip-checksums /usr/local/pgsql/data
-
-
-
-
-
-
- See Also
-
-
-
-
-
-
-
diff --git a/doc/src/sgml/ref/pg_verifybackup.sgml b/doc/src/sgml/ref/pg_verifybackup.sgml
new file mode 100644
index 00000000000..0ab92b38f7b
--- /dev/null
+++ b/doc/src/sgml/ref/pg_verifybackup.sgml
@@ -0,0 +1,291 @@
+
+
+
+
+ pg_verifybackup
+
+
+
+ pg_verifybackup
+ 1
+ Application
+
+
+
+ pg_verifybackup
+ verify the integrity of a base backup of a
+ PostgreSQL cluster
+
+
+
+
+ pg_verifybackup
+ option
+
+
+
+
+
+ Description
+
+
+ pg_verifybackup is used to check the
+ integrity of a database cluster backup taken using
+ pg_basebackup against a
+ backup_manifest generated by the server at the time
+ of the backup. The backup must be stored in the "plain"
+ format; a "tar" format backup can be checked after extracting it.
+
+
+
+ It is important to note that that the validation which is performed by
+ pg_verifybackup does not and can not include
+ every check which will be performed by a running server when attempting
+ to make use of the backup. Even if you use this tool, you should still
+ perform test restores and verify that the resulting databases work as
+ expected and that they appear to contain the correct data. However,
+ pg_verifybackup can detect many problems
+ that commonly occur due to storage problems or user error.
+
+
+
+ Backup verification proceeds in four stages. First,
+ pg_verifybackup reads the
+ backup_manifest file. If that file
+ does not exist, cannot be read, is malformed, or fails verification
+ against its own internal checksum, pg_verifybackup
+ will terminate with a fatal error.
+
+
+
+ Second, pg_verifybackup will attempt to verify that
+ the data files currently stored on disk are exactly the same as the data
+ files which the server intended to send, with some exceptions that are
+ described below. Extra and missing files will be detected, with a few
+ exceptions. This step will ignore the presence or absence of, or any
+ modifications to, postgresql.auto.conf,
+ standby.signal, and recovery.signal,
+ because it is expected that these files may have been created or modified
+ as part of the process of taking the backup. It also won't complain about
+ a backup_manifest file in the target directory or
+ about anything inside pg_wal, even though these
+ files won't be listed in the backup manifest. Only files are checked;
+ the presence or absence of directories is not verified, except
+ indirectly: if a directory is missing, any files it should have contained
+ will necessarily also be missing.
+
+
+
+ Next, pg_verifybackup will checksum all the files,
+ compare the checksums against the values in the manifest, and emit errors
+ for any files for which the computed checksum does not match the
+ checksum stored in the manifest. This step is not performed for any files
+ which produced errors in the previous step, since they are already known
+ to have problems. Also, files which were ignored in the previous step are
+ also ignored in this step.
+
+
+
+ Finally, pg_verifybackup will use the manifest to
+ verify that the write-ahead log records which will be needed to recover
+ the backup are present and that they can be read and parsed. The
+ backup_manifest contains information about which
+ write-ahead log records will be needed, and
+ pg_verifybackup will use that information to
+ invoke pg_waldump to parse those write-ahed log
+ records. The --quiet flag will be used, so that
+ pg_waldump will only report errors, without producing
+ any other output. While this level of verification is sufficient to
+ detect obvious problems such as a missing file or one whose internal
+ checksums do not match, they aren't extensive enough to detect every
+ possible problem that might occur when attempting to recover. For
+ instance, a server bug that produces write-ahead log records that have
+ the correct checksums but specify nonsensical actions can't be detected
+ by this method.
+
+
+
+ Note that if extra WAL files which are not required to recover the backup
+ are present, they will not be checked by this tool, although
+ a separate invocation of pg_waldump could be used for
+ that purpose. Also note that WAL verification is version-specific: you
+ must use the version of pg_verifybackup, and thus of
+ pg_waldump, which pertains to the backup being checked.
+ In contrast, the data file integrity checks should work with any version
+ of the server that generates a backup_manifest file.
+
+
+
+
+ Options
+
+
+ The following command-line options control the behavior.
+
+
+
+
+
+
+
+ Exit as soon as a problem with the backup is detected. If this option
+ is not specified, pg_validatebackup will continue
+ checking the backup even after a problem has been detected, and will
+ report all problems detected as errors.
+
+
+
+
+
+
+
+
+
+ Ignore the specified file or directory, which should be expressed
+ as a relative pathname, when comparing the list of data files
+ actually present in the backup to those listed in the
+ backup_manifest file. If a directory is
+ specified, this option affects the entire subtree rooted at that
+ location. Complaints about extra files, missing files, file size
+ differences, or checksum mismatches will be suppressed if the
+ relative pathname matches the specified pathname. This option
+ can be specified multiple times.
+
+
+
+
+
+
+
+
+
+ Use the manifest file at the specified path, rather than one located
+ in the root of the backup directory.
+
+
+
+
+
+
+
+
+
+ Don't attempt to parse write-ahead log data that will be needed
+ to recover from this backup.
+
+
+
+
+
+
+
+
+
+ Don't print anything when a backup is successfully verified.
+
+
+
+
+
+
+
+
+
+ Do not verify data file checksums. The presence or absence of
+ files and the sizes of those files will still be checked. This is
+ much faster, because the files themselves do not need to be read.
+
+
+
+
+
+
+
+
+
+ Try to parse WAL files stored in the specified directory, rather than
+ in pg_wal. This may be useful if the backup is
+ stored in a separate location from the WAL archive.
+
+
+
+
+
+
+
+ Other options are also available:
+
+
+
+
+
+
+
+ Print the pg_verifybackup version and exit.
+
+
+
+
+
+
+
+
+
+ Show help about pg_verifybackup command
+ line arguments, and exit.
+
+
+
+
+
+
+
+
+
+
+ Examples
+
+
+ To create a base backup of the server at mydbserver and
+ verify the integrity of the backup:
+
+$pg_basebackup -h mydbserver -D /usr/local/pgsql/data
+$pg_verifybackup /usr/local/pgsql/data
+
+
+
+
+ To create a base backup of the server at mydbserver, move
+ the manifest somewhere outside the backup directory, and verify the
+ backup:
+
+$pg_basebackup -h mydbserver -D /usr/local/pgsql/backup1234
+$mv /usr/local/pgsql/backup1234/backup_manifest /my/secure/location/backup_manifest.1234
+$pg_verifybackup -m /my/secure/location/backup_manifest.1234 /usr/local/pgsql/backup1234
+
+
+
+
+ To verify a backup while ignoring a file that was added manually to the
+ backup directory, and also skipping checksum verification:
+
+$pg_basebackup -h mydbserver -D /usr/local/pgsql/data
+$edit /usr/local/pgsql/data/note.to.self
+$pg_verifybackup --ignore=note.to.self --skip-checksums /usr/local/pgsql/data
+
+
+
+
+
+
+ See Also
+
+
+
+
+
+
+
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml
index d25a77b13c8..dd2bddab8c4 100644
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -255,7 +255,7 @@
&pgReceivewal;
&pgRecvlogical;
&pgRestore;
- &pgValidateBackup;
+ &pgVerifyBackup;
&psqlRef;
&reindexdb;
&vacuumdb;
diff --git a/src/bin/Makefile b/src/bin/Makefile
index 77bceea4fe6..8b870357a14 100644
--- a/src/bin/Makefile
+++ b/src/bin/Makefile
@@ -27,7 +27,7 @@ SUBDIRS = \
pg_test_fsync \
pg_test_timing \
pg_upgrade \
- pg_validatebackup \
+ pg_verifybackup \
pg_waldump \
pgbench \
psql \
diff --git a/src/bin/pg_validatebackup/.gitignore b/src/bin/pg_validatebackup/.gitignore
deleted file mode 100644
index 21e0a92429c..00000000000
--- a/src/bin/pg_validatebackup/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-/pg_validatebackup
-/tmp_check/
diff --git a/src/bin/pg_validatebackup/Makefile b/src/bin/pg_validatebackup/Makefile
deleted file mode 100644
index b1c2b7c1be3..00000000000
--- a/src/bin/pg_validatebackup/Makefile
+++ /dev/null
@@ -1,40 +0,0 @@
-# src/bin/pg_validatebackup/Makefile
-
-PGFILEDESC = "pg_validatebackup - validate a backup against a backup manifest"
-PGAPPICON = win32
-
-subdir = src/bin/pg_validatebackup
-top_builddir = ../../..
-include $(top_builddir)/src/Makefile.global
-
-# We need libpq only because fe_utils does.
-LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
-
-OBJS = \
- $(WIN32RES) \
- parse_manifest.o \
- pg_validatebackup.o
-
-all: pg_validatebackup
-
-pg_validatebackup: $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils
- $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
-
-install: all installdirs
- $(INSTALL_PROGRAM) pg_validatebackup$(X) '$(DESTDIR)$(bindir)/pg_validatebackup$(X)'
-
-installdirs:
- $(MKDIR_P) '$(DESTDIR)$(bindir)'
-
-uninstall:
- rm -f '$(DESTDIR)$(bindir)/pg_validatebackup$(X)'
-
-clean distclean maintainer-clean:
- rm -f pg_validatebackup$(X) $(OBJS)
- rm -rf tmp_check
-
-check:
- $(prove_check)
-
-installcheck:
- $(prove_installcheck)
diff --git a/src/bin/pg_validatebackup/parse_manifest.c b/src/bin/pg_validatebackup/parse_manifest.c
deleted file mode 100644
index 0ec9dd6a13f..00000000000
--- a/src/bin/pg_validatebackup/parse_manifest.c
+++ /dev/null
@@ -1,740 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * parse_manifest.c
- * Parse a backup manifest in JSON format.
- *
- * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/bin/pg_validatebackup/parse_manifest.c
- *
- *-------------------------------------------------------------------------
- */
-
-#include "postgres_fe.h"
-
-#include "parse_manifest.h"
-#include "common/jsonapi.h"
-
-/*
- * Semantic states for JSON manifest parsing.
- */
-typedef enum
-{
- JM_EXPECT_TOPLEVEL_START,
- JM_EXPECT_TOPLEVEL_END,
- JM_EXPECT_TOPLEVEL_FIELD,
- JM_EXPECT_VERSION_VALUE,
- JM_EXPECT_FILES_START,
- JM_EXPECT_FILES_NEXT,
- JM_EXPECT_THIS_FILE_FIELD,
- JM_EXPECT_THIS_FILE_VALUE,
- JM_EXPECT_WAL_RANGES_START,
- JM_EXPECT_WAL_RANGES_NEXT,
- JM_EXPECT_THIS_WAL_RANGE_FIELD,
- JM_EXPECT_THIS_WAL_RANGE_VALUE,
- JM_EXPECT_MANIFEST_CHECKSUM_VALUE,
- JM_EXPECT_EOF
-} JsonManifestSemanticState;
-
-/*
- * Possible fields for one file as described by the manifest.
- */
-typedef enum
-{
- JMFF_PATH,
- JMFF_ENCODED_PATH,
- JMFF_SIZE,
- JMFF_LAST_MODIFIED,
- JMFF_CHECKSUM_ALGORITHM,
- JMFF_CHECKSUM
-} JsonManifestFileField;
-
-/*
- * Possible fields for one file as described by the manifest.
- */
-typedef enum
-{
- JMWRF_TIMELINE,
- JMWRF_START_LSN,
- JMWRF_END_LSN
-} JsonManifestWALRangeField;
-
-/*
- * Internal state used while decoding the JSON-format backup manifest.
- */
-typedef struct
-{
- JsonManifestParseContext *context;
- JsonManifestSemanticState state;
-
- /* These fields are used for parsing objects in the list of files. */
- JsonManifestFileField file_field;
- char *pathname;
- char *encoded_pathname;
- char *size;
- char *algorithm;
- pg_checksum_type checksum_algorithm;
- char *checksum;
-
- /* These fields are used for parsing objects in the list of WAL ranges. */
- JsonManifestWALRangeField wal_range_field;
- char *timeline;
- char *start_lsn;
- char *end_lsn;
-
- /* Miscellaneous other stuff. */
- bool saw_version_field;
- char *manifest_checksum;
-} JsonManifestParseState;
-
-static void json_manifest_object_start(void *state);
-static void json_manifest_object_end(void *state);
-static void json_manifest_array_start(void *state);
-static void json_manifest_array_end(void *state);
-static void json_manifest_object_field_start(void *state, char *fname,
- bool isnull);
-static void json_manifest_scalar(void *state, char *token,
- JsonTokenType tokentype);
-static void json_manifest_finalize_file(JsonManifestParseState *parse);
-static void json_manifest_finalize_wal_range(JsonManifestParseState *parse);
-static void verify_manifest_checksum(JsonManifestParseState *parse,
- char *buffer, size_t size);
-static void json_manifest_parse_failure(JsonManifestParseContext *context,
- char *msg);
-
-static int hexdecode_char(char c);
-static bool hexdecode_string(uint8 *result, char *input, int nbytes);
-static bool parse_xlogrecptr(XLogRecPtr *result, char *input);
-
-/*
- * Main entrypoint to parse a JSON-format backup manifest.
- *
- * Caller should set up the parsing context and then invoke this function.
- * For each file whose information is extracted from the manifest,
- * context->perfile_cb is invoked. In case of trouble, context->error_cb is
- * invoked and is expected not to return.
- */
-void
-json_parse_manifest(JsonManifestParseContext *context, char *buffer,
- size_t size)
-{
- JsonLexContext *lex;
- JsonParseErrorType json_error;
- JsonSemAction sem;
- JsonManifestParseState parse;
-
- /* Set up our private parsing context. */
- parse.context = context;
- parse.state = JM_EXPECT_TOPLEVEL_START;
- parse.saw_version_field = false;
-
- /* Create a JSON lexing context. */
- lex = makeJsonLexContextCstringLen(buffer, size, PG_UTF8, true);
-
- /* Set up semantic actions. */
- sem.semstate = &parse;
- sem.object_start = json_manifest_object_start;
- sem.object_end = json_manifest_object_end;
- sem.array_start = json_manifest_array_start;
- sem.array_end = json_manifest_array_end;
- sem.object_field_start = json_manifest_object_field_start;
- sem.object_field_end = NULL;
- sem.array_element_start = NULL;
- sem.array_element_end = NULL;
- sem.scalar = json_manifest_scalar;
-
- /* Run the actual JSON parser. */
- json_error = pg_parse_json(lex, &sem);
- if (json_error != JSON_SUCCESS)
- json_manifest_parse_failure(context, json_errdetail(json_error, lex));
- if (parse.state != JM_EXPECT_EOF)
- json_manifest_parse_failure(context, "manifest ended unexpectedly");
-
- /* Validate the checksum. */
- verify_manifest_checksum(&parse, buffer, size);
-}
-
-/*
- * Invoked at the start of each object in the JSON document.
- *
- * The document as a whole is expected to be an object; each file and each
- * WAL range is also expected to be an object. If we're anywhere else in the
- * document, it's an error.
- */
-static void
-json_manifest_object_start(void *state)
-{
- JsonManifestParseState *parse = state;
-
- switch (parse->state)
- {
- case JM_EXPECT_TOPLEVEL_START:
- parse->state = JM_EXPECT_TOPLEVEL_FIELD;
- break;
- case JM_EXPECT_FILES_NEXT:
- parse->state = JM_EXPECT_THIS_FILE_FIELD;
- parse->pathname = NULL;
- parse->encoded_pathname = NULL;
- parse->size = NULL;
- parse->algorithm = NULL;
- parse->checksum = NULL;
- break;
- case JM_EXPECT_WAL_RANGES_NEXT:
- parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
- parse->timeline = NULL;
- parse->start_lsn = NULL;
- parse->end_lsn = NULL;
- break;
- default:
- json_manifest_parse_failure(parse->context,
- "unexpected object start");
- break;
- }
-}
-
-/*
- * Invoked at the end of each object in the JSON document.
- *
- * The possible cases here are the same as for json_manifest_object_start.
- * There's nothing special to do at the end of the document, but when we
- * reach the end of an object representing a particular file or WAL range,
- * we must call json_manifest_finalize_file() to save the associated details.
- */
-static void
-json_manifest_object_end(void *state)
-{
- JsonManifestParseState *parse = state;
-
- switch (parse->state)
- {
- case JM_EXPECT_TOPLEVEL_END:
- parse->state = JM_EXPECT_EOF;
- break;
- case JM_EXPECT_THIS_FILE_FIELD:
- json_manifest_finalize_file(parse);
- parse->state = JM_EXPECT_FILES_NEXT;
- break;
- case JM_EXPECT_THIS_WAL_RANGE_FIELD:
- json_manifest_finalize_wal_range(parse);
- parse->state = JM_EXPECT_WAL_RANGES_NEXT;
- break;
- default:
- json_manifest_parse_failure(parse->context,
- "unexpected object end");
- break;
- }
-}
-
-/*
- * Invoked at the start of each array in the JSON document.
- *
- * Within the toplevel object, the value associated with the "Files" key
- * should be an array. Similarly for the "WAL-Ranges" key. No other arrays
- * are expected.
- */
-static void
-json_manifest_array_start(void *state)
-{
- JsonManifestParseState *parse = state;
-
- switch (parse->state)
- {
- case JM_EXPECT_FILES_START:
- parse->state = JM_EXPECT_FILES_NEXT;
- break;
- case JM_EXPECT_WAL_RANGES_START:
- parse->state = JM_EXPECT_WAL_RANGES_NEXT;
- break;
- default:
- json_manifest_parse_failure(parse->context,
- "unexpected array start");
- break;
- }
-}
-
-/*
- * Invoked at the end of each array in the JSON document.
- *
- * The cases here are analogous to those in json_manifest_array_start.
- */
-static void
-json_manifest_array_end(void *state)
-{
- JsonManifestParseState *parse = state;
-
- switch (parse->state)
- {
- case JM_EXPECT_FILES_NEXT:
- case JM_EXPECT_WAL_RANGES_NEXT:
- parse->state = JM_EXPECT_TOPLEVEL_FIELD;
- break;
- default:
- json_manifest_parse_failure(parse->context,
- "unexpected array end");
- break;
- }
-}
-
-/*
- * Invoked at the start of each object field in the JSON document.
- */
-static void
-json_manifest_object_field_start(void *state, char *fname, bool isnull)
-{
- JsonManifestParseState *parse = state;
-
- switch (parse->state)
- {
- case JM_EXPECT_TOPLEVEL_FIELD:
-
- /*
- * Inside toplevel object. The version indicator should always be
- * the first field.
- */
- if (!parse->saw_version_field)
- {
- if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0)
- json_manifest_parse_failure(parse->context,
- "expected version indicator");
- parse->state = JM_EXPECT_VERSION_VALUE;
- parse->saw_version_field = true;
- break;
- }
-
- /* Is this the list of files? */
- if (strcmp(fname, "Files") == 0)
- {
- parse->state = JM_EXPECT_FILES_START;
- break;
- }
-
- /* Is this the list of WAL ranges? */
- if (strcmp(fname, "WAL-Ranges") == 0)
- {
- parse->state = JM_EXPECT_WAL_RANGES_START;
- break;
- }
-
- /* Is this the manifest checksum? */
- if (strcmp(fname, "Manifest-Checksum") == 0)
- {
- parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE;
- break;
- }
-
- /* It's not a field we recognize. */
- json_manifest_parse_failure(parse->context,
- "unknown toplevel field");
- break;
-
- case JM_EXPECT_THIS_FILE_FIELD:
- /* Inside object for one file; which key have we got? */
- if (strcmp(fname, "Path") == 0)
- parse->file_field = JMFF_PATH;
- else if (strcmp(fname, "Encoded-Path") == 0)
- parse->file_field = JMFF_ENCODED_PATH;
- else if (strcmp(fname, "Size") == 0)
- parse->file_field = JMFF_SIZE;
- else if (strcmp(fname, "Last-Modified") == 0)
- parse->file_field = JMFF_LAST_MODIFIED;
- else if (strcmp(fname, "Checksum-Algorithm") == 0)
- parse->file_field = JMFF_CHECKSUM_ALGORITHM;
- else if (strcmp(fname, "Checksum") == 0)
- parse->file_field = JMFF_CHECKSUM;
- else
- json_manifest_parse_failure(parse->context,
- "unexpected file field");
- parse->state = JM_EXPECT_THIS_FILE_VALUE;
- break;
-
- case JM_EXPECT_THIS_WAL_RANGE_FIELD:
- /* Inside object for one file; which key have we got? */
- if (strcmp(fname, "Timeline") == 0)
- parse->wal_range_field = JMWRF_TIMELINE;
- else if (strcmp(fname, "Start-LSN") == 0)
- parse->wal_range_field = JMWRF_START_LSN;
- else if (strcmp(fname, "End-LSN") == 0)
- parse->wal_range_field = JMWRF_END_LSN;
- else
- json_manifest_parse_failure(parse->context,
- "unexpected wal range field");
- parse->state = JM_EXPECT_THIS_WAL_RANGE_VALUE;
- break;
-
- default:
- json_manifest_parse_failure(parse->context,
- "unexpected object field");
- break;
- }
-}
-
-/*
- * Invoked at the start of each scalar in the JSON document.
- *
- * Object field names don't reach this code; those are handled by
- * json_manifest_object_field_start. When we're inside of the object for
- * a particular file or WAL range, that function will have noticed the name
- * of the field, and we'll get the corresponding value here. When we're in
- * the toplevel object, the parse state itself tells us which field this is.
- *
- * In all cases except for PostgreSQL-Backup-Manifest-Version, which we
- * can just check on the spot, the goal here is just to save the value in
- * the parse state for later use. We don't actually do anything until we
- * reach either the end of the object representing this file, or the end
- * of the manifest, as the case may be.
- */
-static void
-json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
-{
- JsonManifestParseState *parse = state;
-
- switch (parse->state)
- {
- case JM_EXPECT_VERSION_VALUE:
- if (strcmp(token, "1") != 0)
- json_manifest_parse_failure(parse->context,
- "unexpected manifest version");
- parse->state = JM_EXPECT_TOPLEVEL_FIELD;
- break;
-
- case JM_EXPECT_THIS_FILE_VALUE:
- switch (parse->file_field)
- {
- case JMFF_PATH:
- parse->pathname = token;
- break;
- case JMFF_ENCODED_PATH:
- parse->encoded_pathname = token;
- break;
- case JMFF_SIZE:
- parse->size = token;
- break;
- case JMFF_LAST_MODIFIED:
- pfree(token); /* unused */
- break;
- case JMFF_CHECKSUM_ALGORITHM:
- parse->algorithm = token;
- break;
- case JMFF_CHECKSUM:
- parse->checksum = token;
- break;
- }
- parse->state = JM_EXPECT_THIS_FILE_FIELD;
- break;
-
- case JM_EXPECT_THIS_WAL_RANGE_VALUE:
- switch (parse->wal_range_field)
- {
- case JMWRF_TIMELINE:
- parse->timeline = token;
- break;
- case JMWRF_START_LSN:
- parse->start_lsn = token;
- break;
- case JMWRF_END_LSN:
- parse->end_lsn = token;
- break;
- }
- parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
- break;
-
- case JM_EXPECT_MANIFEST_CHECKSUM_VALUE:
- parse->state = JM_EXPECT_TOPLEVEL_END;
- parse->manifest_checksum = token;
- break;
-
- default:
- json_manifest_parse_failure(parse->context, "unexpected scalar");
- break;
- }
-}
-
-/*
- * Do additional parsing and sanity-checking of the details gathered for one
- * file, and invoke the per-file callback so that the caller gets those
- * details. This happens for each file when the corresponding JSON object is
- * completely parsed.
- */
-static void
-json_manifest_finalize_file(JsonManifestParseState *parse)
-{
- JsonManifestParseContext *context = parse->context;
- size_t size;
- char *ep;
- int checksum_string_length;
- pg_checksum_type checksum_type;
- int checksum_length;
- uint8 *checksum_payload;
-
- /* Pathname and size are required. */
- if (parse->pathname == NULL && parse->encoded_pathname == NULL)
- json_manifest_parse_failure(parse->context, "missing pathname");
- if (parse->pathname != NULL && parse->encoded_pathname != NULL)
- json_manifest_parse_failure(parse->context,
- "both pathname and encoded pathname");
- if (parse->size == NULL)
- json_manifest_parse_failure(parse->context, "missing size");
- if (parse->algorithm == NULL && parse->checksum != NULL)
- json_manifest_parse_failure(parse->context,
- "checksum without algorithm");
-
- /* Decode encoded pathname, if that's what we have. */
- if (parse->encoded_pathname != NULL)
- {
- int encoded_length = strlen(parse->encoded_pathname);
- int raw_length = encoded_length / 2;
-
- parse->pathname = palloc(raw_length + 1);
- if (encoded_length % 2 != 0 ||
- !hexdecode_string((uint8 *) parse->pathname,
- parse->encoded_pathname,
- raw_length))
- json_manifest_parse_failure(parse->context,
- "unable to decode filename");
- parse->pathname[raw_length] = '\0';
- pfree(parse->encoded_pathname);
- parse->encoded_pathname = NULL;
- }
-
- /* Parse size. */
- size = strtoul(parse->size, &ep, 10);
- if (*ep)
- json_manifest_parse_failure(parse->context,
- "file size is not an integer");
-
- /* Parse the checksum algorithm, if it's present. */
- if (parse->algorithm == NULL)
- checksum_type = CHECKSUM_TYPE_NONE;
- else if (!pg_checksum_parse_type(parse->algorithm, &checksum_type))
- context->error_cb(context, "unrecognized checksum algorithm: \"%s\"",
- parse->algorithm);
-
- /* Parse the checksum payload, if it's present. */
- checksum_string_length = parse->checksum == NULL ? 0
- : strlen(parse->checksum);
- if (checksum_string_length == 0)
- {
- checksum_length = 0;
- checksum_payload = NULL;
- }
- else
- {
- checksum_length = checksum_string_length / 2;
- checksum_payload = palloc(checksum_length);
- if (checksum_string_length % 2 != 0 ||
- !hexdecode_string(checksum_payload, parse->checksum,
- checksum_length))
- context->error_cb(context,
- "invalid checksum for file \"%s\": \"%s\"",
- parse->pathname, parse->checksum);
- }
-
- /* Invoke the callback with the details we've gathered. */
- context->perfile_cb(context, parse->pathname, size,
- checksum_type, checksum_length, checksum_payload);
-
- /* Free memory we no longer need. */
- if (parse->size != NULL)
- {
- pfree(parse->size);
- parse->size = NULL;
- }
- if (parse->algorithm != NULL)
- {
- pfree(parse->algorithm);
- parse->algorithm = NULL;
- }
- if (parse->checksum != NULL)
- {
- pfree(parse->checksum);
- parse->checksum = NULL;
- }
-}
-
-/*
- * Do additional parsing and sanity-checking of the details gathered for one
- * WAL range, and invoke the per-WAL-range callback so that the caller gets
- * those details. This happens for each WAL range when the corresponding JSON
- * object is completely parsed.
- */
-static void
-json_manifest_finalize_wal_range(JsonManifestParseState *parse)
-{
- JsonManifestParseContext *context = parse->context;
- TimeLineID tli;
- XLogRecPtr start_lsn,
- end_lsn;
- char *ep;
-
- /* Make sure all fields are present. */
- if (parse->timeline == NULL)
- json_manifest_parse_failure(parse->context, "missing timeline");
- if (parse->start_lsn == NULL)
- json_manifest_parse_failure(parse->context, "missing start LSN");
- if (parse->end_lsn == NULL)
- json_manifest_parse_failure(parse->context, "missing end LSN");
-
- /* Parse timeline. */
- tli = strtoul(parse->timeline, &ep, 10);
- if (*ep)
- json_manifest_parse_failure(parse->context,
- "timeline is not an integer");
- if (!parse_xlogrecptr(&start_lsn, parse->start_lsn))
- json_manifest_parse_failure(parse->context,
- "unable to parse start LSN");
- if (!parse_xlogrecptr(&end_lsn, parse->end_lsn))
- json_manifest_parse_failure(parse->context,
- "unable to parse end LSN");
-
- /* Invoke the callback with the details we've gathered. */
- context->perwalrange_cb(context, tli, start_lsn, end_lsn);
-
- /* Free memory we no longer need. */
- if (parse->timeline != NULL)
- {
- pfree(parse->timeline);
- parse->timeline = NULL;
- }
- if (parse->start_lsn != NULL)
- {
- pfree(parse->start_lsn);
- parse->start_lsn = NULL;
- }
- if (parse->end_lsn != NULL)
- {
- pfree(parse->end_lsn);
- parse->end_lsn = NULL;
- }
-}
-
-/*
- * Verify that the manifest checksum is correct.
- *
- * The last line of the manifest file is excluded from the manifest checksum,
- * because the last line is expected to contain the checksum that covers
- * the rest of the file.
- */
-static void
-verify_manifest_checksum(JsonManifestParseState *parse, char *buffer,
- size_t size)
-{
- JsonManifestParseContext *context = parse->context;
- size_t i;
- size_t number_of_newlines = 0;
- size_t ultimate_newline = 0;
- size_t penultimate_newline = 0;
- pg_sha256_ctx manifest_ctx;
- uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH];
- uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH];
-
- /* Find the last two newlines in the file. */
- for (i = 0; i < size; ++i)
- {
- if (buffer[i] == '\n')
- {
- ++number_of_newlines;
- penultimate_newline = ultimate_newline;
- ultimate_newline = i;
- }
- }
-
- /*
- * Make sure that the last newline is right at the end, and that there are
- * at least two lines total. We need this to be true in order for the
- * following code, which computes the manifest checksum, to work properly.
- */
- if (number_of_newlines < 2)
- json_manifest_parse_failure(parse->context,
- "expected at least 2 lines");
- if (ultimate_newline != size - 1)
- json_manifest_parse_failure(parse->context,
- "last line not newline-terminated");
-
- /* Checksum the rest. */
- pg_sha256_init(&manifest_ctx);
- pg_sha256_update(&manifest_ctx, (uint8 *) buffer, penultimate_newline + 1);
- pg_sha256_final(&manifest_ctx, manifest_checksum_actual);
-
- /* Now verify it. */
- if (parse->manifest_checksum == NULL)
- context->error_cb(parse->context, "manifest has no checksum");
- if (strlen(parse->manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 ||
- !hexdecode_string(manifest_checksum_expected, parse->manifest_checksum,
- PG_SHA256_DIGEST_LENGTH))
- context->error_cb(context, "invalid manifest checksum: \"%s\"",
- parse->manifest_checksum);
- if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
- PG_SHA256_DIGEST_LENGTH) != 0)
- context->error_cb(context, "manifest checksum mismatch");
-}
-
-/*
- * Report a parse error.
- *
- * This is intended to be used for fairly low-level failures that probably
- * shouldn't occur unless somebody has deliberately constructed a bad manifest,
- * or unless the server is generating bad manifests due to some bug. msg should
- * be a short string giving some hint as to what the problem is.
- */
-static void
-json_manifest_parse_failure(JsonManifestParseContext *context, char *msg)
-{
- context->error_cb(context, "could not parse backup manifest: %s", msg);
-}
-
-/*
- * Convert a character which represents a hexadecimal digit to an integer.
- *
- * Returns -1 if the character is not a hexadecimal digit.
- */
-static int
-hexdecode_char(char c)
-{
- if (c >= '0' && c <= '9')
- return c - '0';
- if (c >= 'a' && c <= 'f')
- return c - 'a' + 10;
- if (c >= 'A' && c <= 'F')
- return c - 'A' + 10;
-
- return -1;
-}
-
-/*
- * Decode a hex string into a byte string, 2 hex chars per byte.
- *
- * Returns false if invalid characters are encountered; otherwise true.
- */
-static bool
-hexdecode_string(uint8 *result, char *input, int nbytes)
-{
- int i;
-
- for (i = 0; i < nbytes; ++i)
- {
- int n1 = hexdecode_char(input[i * 2]);
- int n2 = hexdecode_char(input[i * 2 + 1]);
-
- if (n1 < 0 || n2 < 0)
- return false;
- result[i] = n1 * 16 + n2;
- }
-
- return true;
-}
-
-/*
- * Parse an XLogRecPtr expressed using the usual string format.
- */
-static bool
-parse_xlogrecptr(XLogRecPtr *result, char *input)
-{
- uint32 hi;
- uint32 lo;
-
- if (sscanf(input, "%X/%X", &hi, &lo) != 2)
- return false;
- *result = ((uint64) hi) << 32 | lo;
- return true;
-}
diff --git a/src/bin/pg_validatebackup/parse_manifest.h b/src/bin/pg_validatebackup/parse_manifest.h
deleted file mode 100644
index f0a4fac36bc..00000000000
--- a/src/bin/pg_validatebackup/parse_manifest.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * parse_manifest.h
- * Parse a backup manifest in JSON format.
- *
- * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/bin/pg_validatebackup/parse_manifest.h
- *
- *-------------------------------------------------------------------------
- */
-
-#ifndef PARSE_MANIFEST_H
-#define PARSE_MANIFEST_H
-
-#include "access/xlogdefs.h"
-#include "common/checksum_helper.h"
-#include "mb/pg_wchar.h"
-
-struct JsonManifestParseContext;
-typedef struct JsonManifestParseContext JsonManifestParseContext;
-
-typedef void (*json_manifest_perfile_callback)(JsonManifestParseContext *,
- char *pathname,
- size_t size, pg_checksum_type checksum_type,
- int checksum_length, uint8 *checksum_payload);
-typedef void (*json_manifest_perwalrange_callback)(JsonManifestParseContext *,
- TimeLineID tli,
- XLogRecPtr start_lsn, XLogRecPtr end_lsn);
-typedef void (*json_manifest_error_callback)(JsonManifestParseContext *,
- char *fmt, ...) pg_attribute_printf(2, 3);
-
-struct JsonManifestParseContext
-{
- void *private_data;
- json_manifest_perfile_callback perfile_cb;
- json_manifest_perwalrange_callback perwalrange_cb;
- json_manifest_error_callback error_cb;
-};
-
-extern void json_parse_manifest(JsonManifestParseContext *context,
- char *buffer, size_t size);
-
-#endif
diff --git a/src/bin/pg_validatebackup/pg_validatebackup.c b/src/bin/pg_validatebackup/pg_validatebackup.c
deleted file mode 100644
index f2746385005..00000000000
--- a/src/bin/pg_validatebackup/pg_validatebackup.c
+++ /dev/null
@@ -1,905 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * pg_validatebackup.c
- * Validate a backup against a backup manifest.
- *
- * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/bin/pg_validatebackup/pg_validatebackup.c
- *
- *-------------------------------------------------------------------------
- */
-
-#include "postgres_fe.h"
-
-#include
-#include
-#include
-
-#include "common/hashfn.h"
-#include "common/logging.h"
-#include "fe_utils/simple_list.h"
-#include "getopt_long.h"
-#include "parse_manifest.h"
-
-/*
- * For efficiency, we'd like our hash table containing information about the
- * manifest to start out with approximately the correct number of entries.
- * There's no way to know the exact number of entries without reading the whole
- * file, but we can get an estimate by dividing the file size by the estimated
- * number of bytes per line.
- *
- * This could be off by about a factor of two in either direction, because the
- * checksum algorithm has a big impact on the line lengths; e.g. a SHA512
- * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there
- * might be no checksum at all.
- */
-#define ESTIMATED_BYTES_PER_MANIFEST_LINE 100
-
-/*
- * How many bytes should we try to read from a file at once?
- */
-#define READ_CHUNK_SIZE 4096
-
-/*
- * Each file described by the manifest file is parsed to produce an object
- * like this.
- */
-typedef struct manifest_file
-{
- uint32 status; /* hash status */
- char *pathname;
- size_t size;
- pg_checksum_type checksum_type;
- int checksum_length;
- uint8 *checksum_payload;
- bool matched;
- bool bad;
-} manifest_file;
-
-/*
- * Define a hash table which we can use to store information about the files
- * mentioned in the backup manifest.
- */
-static uint32 hash_string_pointer(char *s);
-#define SH_PREFIX manifest_files
-#define SH_ELEMENT_TYPE manifest_file
-#define SH_KEY_TYPE char *
-#define SH_KEY pathname
-#define SH_HASH_KEY(tb, key) hash_string_pointer(key)
-#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
-#define SH_SCOPE static inline
-#define SH_RAW_ALLOCATOR pg_malloc0
-#define SH_DECLARE
-#define SH_DEFINE
-#include "lib/simplehash.h"
-
-/*
- * Each WAL range described by the manifest file is parsed to produce an
- * object like this.
- */
-typedef struct manifest_wal_range
-{
- TimeLineID tli;
- XLogRecPtr start_lsn;
- XLogRecPtr end_lsn;
- struct manifest_wal_range *next;
- struct manifest_wal_range *prev;
-} manifest_wal_range;
-
-/*
- * Details we need in callbacks that occur while parsing a backup manifest.
- */
-typedef struct parser_context
-{
- manifest_files_hash *ht;
- manifest_wal_range *first_wal_range;
- manifest_wal_range *last_wal_range;
-} parser_context;
-
-/*
- * All of the context information we need while checking a backup manifest.
- */
-typedef struct validator_context
-{
- manifest_files_hash *ht;
- char *backup_directory;
- SimpleStringList ignore_list;
- bool exit_on_error;
- bool saw_any_error;
-} validator_context;
-
-static void parse_manifest_file(char *manifest_path,
- manifest_files_hash **ht_p,
- manifest_wal_range **first_wal_range_p);
-
-static void record_manifest_details_for_file(JsonManifestParseContext *context,
- char *pathname, size_t size,
- pg_checksum_type checksum_type,
- int checksum_length,
- uint8 *checksum_payload);
-static void record_manifest_details_for_wal_range(JsonManifestParseContext *context,
- TimeLineID tli,
- XLogRecPtr start_lsn,
- XLogRecPtr end_lsn);
-static void report_manifest_error(JsonManifestParseContext *context,
- char *fmt,...)
- pg_attribute_printf(2, 3) pg_attribute_noreturn();
-
-static void validate_backup_directory(validator_context *context,
- char *relpath, char *fullpath);
-static void validate_backup_file(validator_context *context,
- char *relpath, char *fullpath);
-static void report_extra_backup_files(validator_context *context);
-static void validate_backup_checksums(validator_context *context);
-static void validate_file_checksum(validator_context *context,
- manifest_file *m, char *pathname);
-static void parse_required_wal(validator_context *context,
- char *pg_waldump_path,
- char *wal_directory,
- manifest_wal_range *first_wal_range);
-
-static void report_backup_error(validator_context *context,
- const char *pg_restrict fmt,...)
- pg_attribute_printf(2, 3);
-static void report_fatal_error(const char *pg_restrict fmt,...)
- pg_attribute_printf(1, 2) pg_attribute_noreturn();
-static bool should_ignore_relpath(validator_context *context, char *relpath);
-
-static void usage(void);
-
-static const char *progname;
-
-/*
- * Main entry point.
- */
-int
-main(int argc, char **argv)
-{
- static struct option long_options[] = {
- {"exit-on-error", no_argument, NULL, 'e'},
- {"ignore", required_argument, NULL, 'i'},
- {"manifest-path", required_argument, NULL, 'm'},
- {"no-parse-wal", no_argument, NULL, 'n'},
- {"print-parse-wal", no_argument, NULL, 'p'},
- {"quiet", no_argument, NULL, 'q'},
- {"skip-checksums", no_argument, NULL, 's'},
- {"wal-directory", required_argument, NULL, 'w'},
- {NULL, 0, NULL, 0}
- };
-
- int c;
- validator_context context;
- manifest_wal_range *first_wal_range;
- char *manifest_path = NULL;
- bool no_parse_wal = false;
- bool quiet = false;
- bool skip_checksums = false;
- char *wal_directory = NULL;
- char *pg_waldump_path = NULL;
-
- pg_logging_init(argv[0]);
- set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_validatebackup"));
- progname = get_progname(argv[0]);
-
- memset(&context, 0, sizeof(context));
-
- if (argc > 1)
- {
- if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
- {
- usage();
- exit(0);
- }
- if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
- {
- puts("pg_validatebackup (PostgreSQL) " PG_VERSION);
- exit(0);
- }
- }
-
- /*
- * Skip certain files in the toplevel directory.
- *
- * Ignore the backup_manifest file, because it's not included in the
- * backup manifest.
- *
- * Ignore the pg_wal directory, because those files are not included in
- * the backup manifest either, since they are fetched separately from the
- * backup itself, and validated via a separate mechanism.
- *
- * Ignore postgresql.auto.conf, recovery.signal, and standby.signal,
- * because we expect that those files may sometimes be created or changed
- * as part of the backup process. For example, pg_basebackup -R will
- * modify postgresql.auto.conf and create standby.signal.
- */
- simple_string_list_append(&context.ignore_list, "backup_manifest");
- simple_string_list_append(&context.ignore_list, "pg_wal");
- simple_string_list_append(&context.ignore_list, "postgresql.auto.conf");
- simple_string_list_append(&context.ignore_list, "recovery.signal");
- simple_string_list_append(&context.ignore_list, "standby.signal");
-
- while ((c = getopt_long(argc, argv, "ei:m:nqsw:", long_options, NULL)) != -1)
- {
- switch (c)
- {
- case 'e':
- context.exit_on_error = true;
- break;
- case 'i':
- {
- char *arg = pstrdup(optarg);
-
- canonicalize_path(arg);
- simple_string_list_append(&context.ignore_list, arg);
- break;
- }
- case 'm':
- manifest_path = pstrdup(optarg);
- canonicalize_path(manifest_path);
- break;
- case 'n':
- no_parse_wal = true;
- break;
- case 'q':
- quiet = true;
- break;
- case 's':
- skip_checksums = true;
- break;
- case 'w':
- wal_directory = pstrdup(optarg);
- canonicalize_path(wal_directory);
- break;
- default:
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
- progname);
- exit(1);
- }
- }
-
- /* Get backup directory name */
- if (optind >= argc)
- {
- pg_log_fatal("no backup directory specified");
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
- progname);
- exit(1);
- }
- context.backup_directory = pstrdup(argv[optind++]);
- canonicalize_path(context.backup_directory);
-
- /* Complain if any arguments remain */
- if (optind < argc)
- {
- pg_log_fatal("too many command-line arguments (first is \"%s\")",
- argv[optind]);
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
- progname);
- exit(1);
- }
-
- /* Unless --no-parse-wal was specified, we will need pg_waldump. */
- if (!no_parse_wal)
- {
- int ret;
-
- pg_waldump_path = pg_malloc(MAXPGPATH);
- ret = find_other_exec(argv[0], "pg_waldump",
- "pg_waldump (PostgreSQL) " PG_VERSION "\n",
- pg_waldump_path);
- if (ret < 0)
- {
- char full_path[MAXPGPATH];
-
- if (find_my_exec(argv[0], full_path) < 0)
- strlcpy(full_path, progname, sizeof(full_path));
- if (ret == -1)
- pg_log_fatal("The program \"%s\" is needed by %s but was\n"
- "not found in the same directory as \"%s\".\n"
- "Check your installation.",
- "pg_waldump", "pg_validatebackup", full_path);
- else
- pg_log_fatal("The program \"%s\" was found by \"%s\" but was\n"
- "not the same version as %s.\n"
- "Check your installation.",
- "pg_waldump", full_path, "pg_validatebackup");
- }
- }
-
- /* By default, look for the manifest in the backup directory. */
- if (manifest_path == NULL)
- manifest_path = psprintf("%s/backup_manifest",
- context.backup_directory);
-
- /* By default, look for the WAL in the backup directory, too. */
- if (wal_directory == NULL)
- wal_directory = psprintf("%s/pg_wal", context.backup_directory);
-
- /*
- * Try to read the manifest. We treat any errors encountered while parsing
- * the manifest as fatal; there doesn't seem to be much point in trying to
- * validate the backup directory against a corrupted manifest.
- */
- parse_manifest_file(manifest_path, &context.ht, &first_wal_range);
-
- /*
- * Now scan the files in the backup directory. At this stage, we verify
- * that every file on disk is present in the manifest and that the sizes
- * match. We also set the "matched" flag on every manifest entry that
- * corresponds to a file on disk.
- */
- validate_backup_directory(&context, NULL, context.backup_directory);
-
- /*
- * The "matched" flag should now be set on every entry in the hash table.
- * Any entries for which the bit is not set are files mentioned in the
- * manifest that don't exist on disk.
- */
- report_extra_backup_files(&context);
-
- /*
- * Now do the expensive work of verifying file checksums, unless we were
- * told to skip it.
- */
- if (!skip_checksums)
- validate_backup_checksums(&context);
-
- /*
- * Try to parse the required ranges of WAL records, unless we were told
- * not to do so.
- */
- if (!no_parse_wal)
- parse_required_wal(&context, pg_waldump_path,
- wal_directory, first_wal_range);
-
- /*
- * If everything looks OK, tell the user this, unless we were asked to
- * work quietly.
- */
- if (!context.saw_any_error && !quiet)
- printf("backup successfully verified\n");
-
- return context.saw_any_error ? 1 : 0;
-}
-
-/*
- * Parse a manifest file. Construct a hash table with information about
- * all the files it mentions, and a linked list of all the WAL ranges it
- * mentions.
- */
-static void
-parse_manifest_file(char *manifest_path, manifest_files_hash **ht_p,
- manifest_wal_range **first_wal_range_p)
-{
- int fd;
- struct stat statbuf;
- off_t estimate;
- uint32 initial_size;
- manifest_files_hash *ht;
- char *buffer;
- int rc;
- parser_context private_context;
- JsonManifestParseContext context;
-
- /* Open the manifest file. */
- if ((fd = open(manifest_path, O_RDONLY | PG_BINARY, 0)) < 0)
- report_fatal_error("could not open file \"%s\": %m", manifest_path);
-
- /* Figure out how big the manifest is. */
- if (fstat(fd, &statbuf) != 0)
- report_fatal_error("could not stat file \"%s\": %m", manifest_path);
-
- /* Guess how large to make the hash table based on the manifest size. */
- estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE;
- initial_size = Min(PG_UINT32_MAX, Max(estimate, 256));
-
- /* Create the hash table. */
- ht = manifest_files_create(initial_size, NULL);
-
- /*
- * Slurp in the whole file.
- *
- * This is not ideal, but there's currently no easy way to get
- * pg_parse_json() to perform incremental parsing.
- */
- buffer = pg_malloc(statbuf.st_size);
- rc = read(fd, buffer, statbuf.st_size);
- if (rc != statbuf.st_size)
- {
- if (rc < 0)
- report_fatal_error("could not read file \"%s\": %m",
- manifest_path);
- else
- report_fatal_error("could not read file \"%s\": read %d of %zu",
- manifest_path, rc, (size_t) statbuf.st_size);
- }
-
- /* Close the manifest file. */
- close(fd);
-
- /* Parse the manifest. */
- private_context.ht = ht;
- private_context.first_wal_range = NULL;
- private_context.last_wal_range = NULL;
- context.private_data = &private_context;
- context.perfile_cb = record_manifest_details_for_file;
- context.perwalrange_cb = record_manifest_details_for_wal_range;
- context.error_cb = report_manifest_error;
- json_parse_manifest(&context, buffer, statbuf.st_size);
-
- /* Done with the buffer. */
- pfree(buffer);
-
- /* Return the file hash table and WAL range list we constructed. */
- *ht_p = ht;
- *first_wal_range_p = private_context.first_wal_range;
-}
-
-/*
- * Report an error while parsing the manifest.
- *
- * We consider all such errors to be fatal errors. The manifest parser
- * expects this function not to return.
- */
-static void
-report_manifest_error(JsonManifestParseContext *context, char *fmt,...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- pg_log_generic_v(PG_LOG_FATAL, fmt, ap);
- va_end(ap);
-
- exit(1);
-}
-
-/*
- * Record details extracted from the backup manifest for one file.
- */
-static void
-record_manifest_details_for_file(JsonManifestParseContext *context,
- char *pathname, size_t size,
- pg_checksum_type checksum_type,
- int checksum_length, uint8 *checksum_payload)
-{
- parser_context *pcxt = context->private_data;
- manifest_files_hash *ht = pcxt->ht;
- manifest_file *m;
- bool found;
-
- /* Make a new entry in the hash table for this file. */
- m = manifest_files_insert(ht, pathname, &found);
- if (found)
- report_fatal_error("duplicate pathname in backup manifest: \"%s\"",
- pathname);
-
- /* Initialize the entry. */
- m->size = size;
- m->checksum_type = checksum_type;
- m->checksum_length = checksum_length;
- m->checksum_payload = checksum_payload;
- m->matched = false;
- m->bad = false;
-}
-
-/*
- * Record details extracted from the backup manifest for one WAL range.
- */
-static void
-record_manifest_details_for_wal_range(JsonManifestParseContext *context,
- TimeLineID tli,
- XLogRecPtr start_lsn, XLogRecPtr end_lsn)
-{
- parser_context *pcxt = context->private_data;
- manifest_wal_range *range;
-
- /* Allocate and initialize a struct describing this WAL range. */
- range = palloc(sizeof(manifest_wal_range));
- range->tli = tli;
- range->start_lsn = start_lsn;
- range->end_lsn = end_lsn;
- range->prev = pcxt->last_wal_range;
- range->next = NULL;
-
- /* Add it to the end of the list. */
- if (pcxt->first_wal_range == NULL)
- pcxt->first_wal_range = range;
- else
- pcxt->last_wal_range->next = range;
- pcxt->last_wal_range = range;
-}
-
-/*
- * Validate one directory.
- *
- * 'relpath' is NULL if we are to validate the top-level backup directory,
- * and otherwise the relative path to the directory that is to be validated.
- *
- * 'fullpath' is the backup directory with 'relpath' appended; i.e. the actual
- * filesystem path at which it can be found.
- */
-static void
-validate_backup_directory(validator_context *context, char *relpath,
- char *fullpath)
-{
- DIR *dir;
- struct dirent *dirent;
-
- dir = opendir(fullpath);
- if (dir == NULL)
- {
- /*
- * If even the toplevel backup directory cannot be found, treat this
- * as a fatal error.
- */
- if (relpath == NULL)
- report_fatal_error("could not open directory \"%s\": %m", fullpath);
-
- /*
- * Otherwise, treat this as a non-fatal error, but ignore any further
- * errors related to this path and anything beneath it.
- */
- report_backup_error(context,
- "could not open directory \"%s\": %m", fullpath);
- simple_string_list_append(&context->ignore_list, relpath);
-
- return;
- }
-
- while (errno = 0, (dirent = readdir(dir)) != NULL)
- {
- char *filename = dirent->d_name;
- char *newfullpath = psprintf("%s/%s", fullpath, filename);
- char *newrelpath;
-
- /* Skip "." and ".." */
- if (filename[0] == '.' && (filename[1] == '\0'
- || strcmp(filename, "..") == 0))
- continue;
-
- if (relpath == NULL)
- newrelpath = pstrdup(filename);
- else
- newrelpath = psprintf("%s/%s", relpath, filename);
-
- if (!should_ignore_relpath(context, newrelpath))
- validate_backup_file(context, newrelpath, newfullpath);
-
- pfree(newfullpath);
- pfree(newrelpath);
- }
-
- if (closedir(dir))
- {
- report_backup_error(context,
- "could not close directory \"%s\": %m", fullpath);
- return;
- }
-}
-
-/*
- * Validate one file (which might actually be a directory or a symlink).
- *
- * The arguments to this function have the same meaning as the arguments to
- * validate_backup_directory.
- */
-static void
-validate_backup_file(validator_context *context, char *relpath, char *fullpath)
-{
- struct stat sb;
- manifest_file *m;
-
- if (stat(fullpath, &sb) != 0)
- {
- report_backup_error(context,
- "could not stat file or directory \"%s\": %m",
- relpath);
-
- /*
- * Suppress further errors related to this path name and, if it's a
- * directory, anything underneath it.
- */
- simple_string_list_append(&context->ignore_list, relpath);
-
- return;
- }
-
- /* If it's a directory, just recurse. */
- if (S_ISDIR(sb.st_mode))
- {
- validate_backup_directory(context, relpath, fullpath);
- return;
- }
-
- /* If it's not a directory, it should be a plain file. */
- if (!S_ISREG(sb.st_mode))
- {
- report_backup_error(context,
- "\"%s\" is not a file or directory",
- relpath);
- return;
- }
-
- /* Check whether there's an entry in the manifest hash. */
- m = manifest_files_lookup(context->ht, relpath);
- if (m == NULL)
- {
- report_backup_error(context,
- "\"%s\" is present on disk but not in the manifest",
- relpath);
- return;
- }
-
- /* Flag this entry as having been encountered in the filesystem. */
- m->matched = true;
-
- /* Check that the size matches. */
- if (m->size != sb.st_size)
- {
- report_backup_error(context,
- "\"%s\" has size %zu on disk but size %zu in the manifest",
- relpath, (size_t) sb.st_size, m->size);
- m->bad = true;
- }
-
- /*
- * We don't validate checksums at this stage. We first finish validating
- * that we have the expected set of files with the expected sizes, and
- * only afterwards verify the checksums. That's because computing
- * checksums may take a while, and we'd like to report more obvious
- * problems quickly.
- */
-}
-
-/*
- * Scan the hash table for entries where the 'matched' flag is not set; report
- * that such files are present in the manifest but not on disk.
- */
-static void
-report_extra_backup_files(validator_context *context)
-{
- manifest_files_iterator it;
- manifest_file *m;
-
- manifest_files_start_iterate(context->ht, &it);
- while ((m = manifest_files_iterate(context->ht, &it)) != NULL)
- if (!m->matched && !should_ignore_relpath(context, m->pathname))
- report_backup_error(context,
- "\"%s\" is present in the manifest but not on disk",
- m->pathname);
-}
-
-/*
- * Validate checksums for hash table entries that are otherwise unproblematic.
- * If we've already reported some problem related to a hash table entry, or
- * if it has no checksum, just skip it.
- */
-static void
-validate_backup_checksums(validator_context *context)
-{
- manifest_files_iterator it;
- manifest_file *m;
-
- manifest_files_start_iterate(context->ht, &it);
- while ((m = manifest_files_iterate(context->ht, &it)) != NULL)
- {
- if (m->matched && !m->bad && m->checksum_type != CHECKSUM_TYPE_NONE &&
- !should_ignore_relpath(context, m->pathname))
- {
- char *fullpath;
-
- /* Compute the full pathname to the target file. */
- fullpath = psprintf("%s/%s", context->backup_directory,
- m->pathname);
-
- /* Do the actual checksum validation. */
- validate_file_checksum(context, m, fullpath);
-
- /* Avoid leaking memory. */
- pfree(fullpath);
- }
- }
-}
-
-/*
- * Validate the checksum of a single file.
- */
-static void
-validate_file_checksum(validator_context *context, manifest_file *m,
- char *fullpath)
-{
- pg_checksum_context checksum_ctx;
- char *relpath = m->pathname;
- int fd;
- int rc;
- size_t bytes_read = 0;
- uint8 buffer[READ_CHUNK_SIZE];
- uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
- int checksumlen;
-
- /* Open the target file. */
- if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0)
- {
- report_backup_error(context, "could not open file \"%s\": %m",
- relpath);
- return;
- }
-
- /* Initialize checksum context. */
- pg_checksum_init(&checksum_ctx, m->checksum_type);
-
- /* Read the file chunk by chunk, updating the checksum as we go. */
- while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0)
- {
- bytes_read += rc;
- pg_checksum_update(&checksum_ctx, buffer, rc);
- }
- if (rc < 0)
- report_backup_error(context, "could not read file \"%s\": %m",
- relpath);
-
- /* Close the file. */
- if (close(fd) != 0)
- {
- report_backup_error(context, "could not close file \"%s\": %m",
- relpath);
- return;
- }
-
- /* If we didn't manage to read the whole file, bail out now. */
- if (rc < 0)
- return;
-
- /*
- * Double-check that we read the expected number of bytes from the file.
- * Normally, a file size mismatch would be caught in validate_backup_file
- * and this check would never be reached, but this provides additional
- * safety and clarity in the event of concurrent modifications or
- * filesystem misbehavior.
- */
- if (bytes_read != m->size)
- {
- report_backup_error(context,
- "file \"%s\" should contain %zu bytes, but read %zu bytes",
- relpath, m->size, bytes_read);
- return;
- }
-
- /* Get the final checksum. */
- checksumlen = pg_checksum_final(&checksum_ctx, checksumbuf);
-
- /* And check it against the manifest. */
- if (checksumlen != m->checksum_length)
- report_backup_error(context,
- "file \"%s\" has checksum of length %d, but expected %d",
- relpath, m->checksum_length, checksumlen);
- else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
- report_backup_error(context,
- "checksum mismatch for file \"%s\"",
- relpath);
-}
-
-/*
- * Attempt to parse the WAL files required to restore from backup using
- * pg_waldump.
- */
-static void
-parse_required_wal(validator_context *context, char *pg_waldump_path,
- char *wal_directory, manifest_wal_range *first_wal_range)
-{
- manifest_wal_range *this_wal_range = first_wal_range;
-
- while (this_wal_range != NULL)
- {
- char *pg_waldump_cmd;
-
- pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%X --end=%X/%X\n",
- pg_waldump_path, wal_directory, this_wal_range->tli,
- (uint32) (this_wal_range->start_lsn >> 32),
- (uint32) this_wal_range->start_lsn,
- (uint32) (this_wal_range->end_lsn >> 32),
- (uint32) this_wal_range->end_lsn);
- if (system(pg_waldump_cmd) != 0)
- report_backup_error(context,
- "WAL parsing failed for timeline %u",
- this_wal_range->tli);
-
- this_wal_range = this_wal_range->next;
- }
-}
-
-/*
- * Report a problem with the backup.
- *
- * Update the context to indicate that we saw an error, and exit if the
- * context says we should.
- */
-static void
-report_backup_error(validator_context *context, const char *pg_restrict fmt,...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- pg_log_generic_v(PG_LOG_ERROR, fmt, ap);
- va_end(ap);
-
- context->saw_any_error = true;
- if (context->exit_on_error)
- exit(1);
-}
-
-/*
- * Report a fatal error and exit
- */
-static void
-report_fatal_error(const char *pg_restrict fmt,...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- pg_log_generic_v(PG_LOG_FATAL, fmt, ap);
- va_end(ap);
-
- exit(1);
-}
-
-/*
- * Is the specified relative path, or some prefix of it, listed in the set
- * of paths to ignore?
- *
- * Note that by "prefix" we mean a parent directory; for this purpose,
- * "aa/bb" is not a prefix of "aa/bbb", but it is a prefix of "aa/bb/cc".
- */
-static bool
-should_ignore_relpath(validator_context *context, char *relpath)
-{
- SimpleStringListCell *cell;
-
- for (cell = context->ignore_list.head; cell != NULL; cell = cell->next)
- {
- char *r = relpath;
- char *v = cell->val;
-
- while (*v != '\0' && *r == *v)
- ++r, ++v;
-
- if (*v == '\0' && (*r == '\0' || *r == '/'))
- return true;
- }
-
- return false;
-}
-
-/*
- * Helper function for manifest_files hash table.
- */
-static uint32
-hash_string_pointer(char *s)
-{
- unsigned char *ss = (unsigned char *) s;
-
- return hash_bytes(ss, strlen(s));
-}
-
-/*
- * Print out usage information and exit.
- */
-static void
-usage(void)
-{
- printf(_("%s validates a backup against the backup manifest.\n\n"), progname);
- printf(_("Usage:\n %s [OPTION]... BACKUPDIR\n\n"), progname);
- printf(_("Options:\n"));
- printf(_(" -e, --exit-on-error exit immediately on error\n"));
- printf(_(" -i, --ignore=RELATIVE_PATH ignore indicated path\n"));
- printf(_(" -m, --manifest=PATH use specified path for manifest\n"));
- printf(_(" -n, --no-parse-wal do not try to parse WAL files\n"));
- printf(_(" -s, --skip-checksums skip checksum verification\n"));
- printf(_(" -w, --wal-directory=PATH use specified path for WAL files\n"));
- printf(_(" -V, --version output version information, then exit\n"));
- printf(_(" -?, --help show this help, then exit\n"));
- printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
- printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
-}
diff --git a/src/bin/pg_validatebackup/t/001_basic.pl b/src/bin/pg_validatebackup/t/001_basic.pl
deleted file mode 100644
index 6d4b8ea01a6..00000000000
--- a/src/bin/pg_validatebackup/t/001_basic.pl
+++ /dev/null
@@ -1,30 +0,0 @@
-use strict;
-use warnings;
-use TestLib;
-use Test::More tests => 16;
-
-my $tempdir = TestLib::tempdir;
-
-program_help_ok('pg_validatebackup');
-program_version_ok('pg_validatebackup');
-program_options_handling_ok('pg_validatebackup');
-
-command_fails_like(['pg_validatebackup'],
- qr/no backup directory specified/,
- 'target directory must be specified');
-command_fails_like(['pg_validatebackup', $tempdir],
- qr/could not open file.*\/backup_manifest\"/,
- 'pg_validatebackup requires a manifest');
-command_fails_like(['pg_validatebackup', $tempdir, $tempdir],
- qr/too many command-line arguments/,
- 'multiple target directories not allowed');
-
-# create fake manifest file
-open(my $fh, '>', "$tempdir/backup_manifest") || die "open: $!";
-close($fh);
-
-# but then try to use an alternate, nonexisting manifest
-command_fails_like(['pg_validatebackup', '-m', "$tempdir/not_the_manifest",
- $tempdir],
- qr/could not open file.*\/not_the_manifest\"/,
- 'pg_validatebackup respects -m flag');
diff --git a/src/bin/pg_validatebackup/t/002_algorithm.pl b/src/bin/pg_validatebackup/t/002_algorithm.pl
deleted file mode 100644
index 98871e12a5e..00000000000
--- a/src/bin/pg_validatebackup/t/002_algorithm.pl
+++ /dev/null
@@ -1,58 +0,0 @@
-# Verify that we can take and validate backups with various checksum types.
-
-use strict;
-use warnings;
-use Cwd;
-use Config;
-use File::Path qw(rmtree);
-use PostgresNode;
-use TestLib;
-use Test::More tests => 19;
-
-my $master = get_new_node('master');
-$master->init(allows_streaming => 1);
-$master->start;
-
-for my $algorithm (qw(bogus none crc32c sha224 sha256 sha384 sha512))
-{
- my $backup_path = $master->backup_dir . '/' . $algorithm;
- my @backup = ('pg_basebackup', '-D', $backup_path,
- '--manifest-checksums', $algorithm,
- '--no-sync');
- my @validate = ('pg_validatebackup', '-e', $backup_path);
-
- # A backup with a bogus algorithm should fail.
- if ($algorithm eq 'bogus')
- {
- $master->command_fails(\@backup,
- "backup fails with algorithm \"$algorithm\"");
- next;
- }
-
- # A backup with a valid algorithm should work.
- $master->command_ok(\@backup, "backup ok with algorithm \"$algorithm\"");
-
- # We expect each real checksum algorithm to be mentioned on every line of
- # the backup manifest file except the first and last; for simplicity, we
- # just check that it shows up lots of times. When the checksum algorithm
- # is none, we just check that the manifest exists.
- if ($algorithm eq 'none')
- {
- ok(-f "$backup_path/backup_manifest", "backup manifest exists");
- }
- else
- {
- my $manifest = slurp_file("$backup_path/backup_manifest");
- my $count_of_algorithm_in_manifest =
- (() = $manifest =~ /$algorithm/mig);
- cmp_ok($count_of_algorithm_in_manifest, '>', 100,
- "$algorithm is mentioned many times in the manifest");
- }
-
- # Make sure that it validates OK.
- $master->command_ok(\@validate,
- "validate backup with algorithm \"$algorithm\"");
-
- # Remove backup immediately to save disk space.
- rmtree($backup_path);
-}
diff --git a/src/bin/pg_validatebackup/t/003_corruption.pl b/src/bin/pg_validatebackup/t/003_corruption.pl
deleted file mode 100644
index 09f8b982504..00000000000
--- a/src/bin/pg_validatebackup/t/003_corruption.pl
+++ /dev/null
@@ -1,288 +0,0 @@
-# Verify that various forms of corruption are detected by pg_validatebackup.
-
-use strict;
-use warnings;
-use Cwd;
-use Config;
-use File::Path qw(rmtree);
-use PostgresNode;
-use TestLib;
-use Test::More tests => 44;
-
-my $master = get_new_node('master');
-$master->init(allows_streaming => 1);
-$master->start;
-
-# Include a user-defined tablespace in the hopes of detecting problems in that
-# area.
-my $source_ts_path = TestLib::perl2host(TestLib::tempdir_short());
-my $source_ts_prefix = $source_ts_path;
-$source_ts_prefix =~ s!(^[A-Z]:/[^/]*)/.*!$1!;
-
-$master->safe_psql('postgres', < 'extra_file',
- 'mutilate' => \&mutilate_extra_file,
- 'fails_like' =>
- qr/extra_file.*present on disk but not in the manifest/
- },
- {
- 'name' => 'extra_tablespace_file',
- 'mutilate' => \&mutilate_extra_tablespace_file,
- 'fails_like' =>
- qr/extra_ts_file.*present on disk but not in the manifest/
- },
- {
- 'name' => 'missing_file',
- 'mutilate' => \&mutilate_missing_file,
- 'fails_like' =>
- qr/pg_xact\/0000.*present in the manifest but not on disk/
- },
- {
- 'name' => 'missing_tablespace',
- 'mutilate' => \&mutilate_missing_tablespace,
- 'fails_like' =>
- qr/pg_tblspc.*present in the manifest but not on disk/
- },
- {
- 'name' => 'append_to_file',
- 'mutilate' => \&mutilate_append_to_file,
- 'fails_like' =>
- qr/has size \d+ on disk but size \d+ in the manifest/
- },
- {
- 'name' => 'truncate_file',
- 'mutilate' => \&mutilate_truncate_file,
- 'fails_like' =>
- qr/has size 0 on disk but size \d+ in the manifest/
- },
- {
- 'name' => 'replace_file',
- 'mutilate' => \&mutilate_replace_file,
- 'fails_like' => qr/checksum mismatch for file/
- },
- {
- 'name' => 'bad_manifest',
- 'mutilate' => \&mutilate_bad_manifest,
- 'fails_like' => qr/manifest checksum mismatch/
- },
- {
- 'name' => 'open_file_fails',
- 'mutilate' => \&mutilate_open_file_fails,
- 'fails_like' => qr/could not open file/,
- 'skip_on_windows' => 1
- },
- {
- 'name' => 'open_directory_fails',
- 'mutilate' => \&mutilate_open_directory_fails,
- 'cleanup' => \&cleanup_open_directory_fails,
- 'fails_like' => qr/could not open directory/,
- 'skip_on_windows' => 1
- },
- {
- 'name' => 'search_directory_fails',
- 'mutilate' => \&mutilate_search_directory_fails,
- 'cleanup' => \&cleanup_search_directory_fails,
- 'fails_like' => qr/could not stat file or directory/,
- 'skip_on_windows' => 1
- }
-);
-
-for my $scenario (@scenario)
-{
- my $name = $scenario->{'name'};
-
- SKIP:
- {
- skip "unix-style permissions not supported on Windows", 4
- if $scenario->{'skip_on_windows'} && $windows_os;
-
- # Take a backup and check that it validates OK.
- my $backup_path = $master->backup_dir . '/' . $name;
- my $backup_ts_path = TestLib::perl2host(TestLib::tempdir_short());
- # The tablespace map parameter confuses Msys2, which tries to mangle
- # it. Tell it not to.
- # See https://www.msys2.org/wiki/Porting/#filesystem-namespaces
- local $ENV{MSYS2_ARG_CONV_EXCL} = $source_ts_prefix;
- $master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync',
- '-T', "${source_ts_path}=${backup_ts_path}"],
- "base backup ok");
- command_ok(['pg_validatebackup', $backup_path ],
- "intact backup validated");
-
- # Mutilate the backup in some way.
- $scenario->{'mutilate'}->($backup_path);
-
- # Now check that the backup no longer validates.
- command_fails_like(['pg_validatebackup', $backup_path ],
- $scenario->{'fails_like'},
- "corrupt backup fails validation: $name");
-
- # Run cleanup hook, if provided.
- $scenario->{'cleanup'}->($backup_path)
- if exists $scenario->{'cleanup'};
-
- # Finally, use rmtree to reclaim space.
- rmtree($backup_path);
- }
-}
-
-sub create_extra_file
-{
- my ($backup_path, $relative_path) = @_;
- my $pathname = "$backup_path/$relative_path";
- open(my $fh, '>', $pathname) || die "open $pathname: $!";
- print $fh "This is an extra file.\n";
- close($fh);
- return;
-}
-
-# Add a file into the root directory of the backup.
-sub mutilate_extra_file
-{
- my ($backup_path) = @_;
- create_extra_file($backup_path, "extra_file");
- return;
-}
-
-# Add a file inside the user-defined tablespace.
-sub mutilate_extra_tablespace_file
-{
- my ($backup_path) = @_;
- my ($tsoid) = grep { $_ ne '.' && $_ ne '..' }
- slurp_dir("$backup_path/pg_tblspc");
- my ($catvdir) = grep { $_ ne '.' && $_ ne '..' }
- slurp_dir("$backup_path/pg_tblspc/$tsoid");
- my ($tsdboid) = grep { $_ ne '.' && $_ ne '..' }
- slurp_dir("$backup_path/pg_tblspc/$tsoid/$catvdir");
- create_extra_file($backup_path,
- "pg_tblspc/$tsoid/$catvdir/$tsdboid/extra_ts_file");
- return;
-}
-
-# Remove a file.
-sub mutilate_missing_file
-{
- my ($backup_path) = @_;
- my $pathname = "$backup_path/pg_xact/0000";
- unlink($pathname) || die "$pathname: $!";
- return;
-}
-
-# Remove the symlink to the user-defined tablespace.
-sub mutilate_missing_tablespace
-{
- my ($backup_path) = @_;
- my ($tsoid) = grep { $_ ne '.' && $_ ne '..' }
- slurp_dir("$backup_path/pg_tblspc");
- my $pathname = "$backup_path/pg_tblspc/$tsoid";
- if ($windows_os)
- {
- # rmdir works on some windows setups, unlink on others.
- # Instead of trying to implement precise rules, just try one and then
- # the other.
- unless (rmdir($pathname))
- {
- my $err = $!;
- unlink($pathname) || die "$pathname: rmdir: $err, unlink: $!";
- }
- }
- else
- {
- unlink($pathname) || die "$pathname: $!";
- }
- return;
-}
-
-# Append an additional bytes to a file.
-sub mutilate_append_to_file
-{
- my ($backup_path) = @_;
- append_to_file "$backup_path/global/pg_control", 'x';
- return;
-}
-
-# Truncate a file to zero length.
-sub mutilate_truncate_file
-{
- my ($backup_path) = @_;
- my $pathname = "$backup_path/global/pg_control";
- open(my $fh, '>', $pathname) || die "open $pathname: $!";
- close($fh);
- return;
-}
-
-# Replace a file's contents without changing the length of the file. This is
-# not a particularly efficient way to do this, so we pick a file that's
-# expected to be short.
-sub mutilate_replace_file
-{
- my ($backup_path) = @_;
- my $pathname = "$backup_path/PG_VERSION";
- my $contents = slurp_file($pathname);
- open(my $fh, '>', $pathname) || die "open $pathname: $!";
- print $fh 'q' x length($contents);
- close($fh);
- return;
-}
-
-# Corrupt the backup manifest.
-sub mutilate_bad_manifest
-{
- my ($backup_path) = @_;
- append_to_file "$backup_path/backup_manifest", "\n";
- return;
-}
-
-# Create a file that can't be opened. (This is skipped on Windows.)
-sub mutilate_open_file_fails
-{
- my ($backup_path) = @_;
- my $pathname = "$backup_path/PG_VERSION";
- chmod(0, $pathname) || die "chmod $pathname: $!";
- return;
-}
-
-# Create a directory that can't be opened. (This is skipped on Windows.)
-sub mutilate_open_directory_fails
-{
- my ($backup_path) = @_;
- my $pathname = "$backup_path/pg_subtrans";
- chmod(0, $pathname) || die "chmod $pathname: $!";
- return;
-}
-
-# restore permissions on the unreadable directory we created.
-sub cleanup_open_directory_fails
-{
- my ($backup_path) = @_;
- my $pathname = "$backup_path/pg_subtrans";
- chmod(0700, $pathname) || die "chmod $pathname: $!";
- return;
-}
-
-# Create a directory that can't be searched. (This is skipped on Windows.)
-sub mutilate_search_directory_fails
-{
- my ($backup_path) = @_;
- my $pathname = "$backup_path/base";
- chmod(0400, $pathname) || die "chmod $pathname: $!";
- return;
-}
-
-# rmtree can't cope with a mode 400 directory, so change back to 700.
-sub cleanup_search_directory_fails
-{
- my ($backup_path) = @_;
- my $pathname = "$backup_path/base";
- chmod(0700, $pathname) || die "chmod $pathname: $!";
- return;
-}
diff --git a/src/bin/pg_validatebackup/t/004_options.pl b/src/bin/pg_validatebackup/t/004_options.pl
deleted file mode 100644
index 8f185626ed6..00000000000
--- a/src/bin/pg_validatebackup/t/004_options.pl
+++ /dev/null
@@ -1,89 +0,0 @@
-# Verify the behavior of assorted pg_validatebackup options.
-
-use strict;
-use warnings;
-use Cwd;
-use Config;
-use File::Path qw(rmtree);
-use PostgresNode;
-use TestLib;
-use Test::More tests => 25;
-
-# Start up the server and take a backup.
-my $master = get_new_node('master');
-$master->init(allows_streaming => 1);
-$master->start;
-my $backup_path = $master->backup_dir . '/test_options';
-$master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync' ],
- "base backup ok");
-
-# Verify that pg_validatebackup -q succeeds and produces no output.
-my $stdout;
-my $stderr;
-my $result = IPC::Run::run ['pg_validatebackup', '-q', $backup_path ],
- '>', \$stdout, '2>', \$stderr;
-ok($result, "-q succeeds: exit code 0");
-is($stdout, '', "-q succeeds: no stdout");
-is($stderr, '', "-q succeeds: no stderr");
-
-# Corrupt the PG_VERSION file.
-my $version_pathname = "$backup_path/PG_VERSION";
-my $version_contents = slurp_file($version_pathname);
-open(my $fh, '>', $version_pathname) || die "open $version_pathname: $!";
-print $fh 'q' x length($version_contents);
-close($fh);
-
-# Verify that pg_validatebackup -q now fails.
-command_fails_like(['pg_validatebackup', '-q', $backup_path ],
- qr/checksum mismatch for file \"PG_VERSION\"/,
- '-q checksum mismatch');
-
-# Since we didn't change the length of the file, validation should succeed
-# if we ignore checksums. Check that we get the right message, too.
-command_like(['pg_validatebackup', '-s', $backup_path ],
- qr/backup successfully verified/,
- '-s skips checksumming');
-
-# Validation should succeed if we ignore the problem file.
-command_like(['pg_validatebackup', '-i', 'PG_VERSION', $backup_path ],
- qr/backup successfully verified/,
- '-i ignores problem file');
-
-# PG_VERSION is already corrupt; let's try also removing all of pg_xact.
-rmtree($backup_path . "/pg_xact");
-
-# We're ignoring the problem with PG_VERSION, but not the problem with
-# pg_xact, so validation should fail here.
-command_fails_like(['pg_validatebackup', '-i', 'PG_VERSION', $backup_path ],
- qr/pg_xact.*is present in the manifest but not on disk/,
- '-i does not ignore all problems');
-
-# If we use -i twice, we should be able to ignore all of the problems.
-command_like(['pg_validatebackup', '-i', 'PG_VERSION', '-i', 'pg_xact',
- $backup_path ],
- qr/backup successfully verified/,
- 'multiple -i options work');
-
-# Verify that when -i is not used, both problems are reported.
-$result = IPC::Run::run ['pg_validatebackup', $backup_path ],
- '>', \$stdout, '2>', \$stderr;
-ok(!$result, "multiple problems: fails");
-like($stderr, qr/pg_xact.*is present in the manifest but not on disk/,
- "multiple problems: missing files reported");
-like($stderr, qr/checksum mismatch for file \"PG_VERSION\"/,
- "multiple problems: checksum mismatch reported");
-
-# Verify that when -e is used, only the problem detected first is reported.
-$result = IPC::Run::run ['pg_validatebackup', '-e', $backup_path ],
- '>', \$stdout, '2>', \$stderr;
-ok(!$result, "-e reports 1 error: fails");
-like($stderr, qr/pg_xact.*is present in the manifest but not on disk/,
- "-e reports 1 error: missing files reported");
-unlike($stderr, qr/checksum mismatch for file \"PG_VERSION\"/,
- "-e reports 1 error: checksum mismatch not reported");
-
-# Test valid manifest with nonexistent backup directory.
-command_fails_like(['pg_validatebackup', '-m', "$backup_path/backup_manifest",
- "$backup_path/fake" ],
- qr/could not open directory/,
- 'nonexistent backup directory');
diff --git a/src/bin/pg_validatebackup/t/005_bad_manifest.pl b/src/bin/pg_validatebackup/t/005_bad_manifest.pl
deleted file mode 100644
index f52a8b71ea9..00000000000
--- a/src/bin/pg_validatebackup/t/005_bad_manifest.pl
+++ /dev/null
@@ -1,204 +0,0 @@
-# Test the behavior of pg_validatebackup when the backup manifest has
-# problems.
-
-use strict;
-use warnings;
-use Cwd;
-use Config;
-use PostgresNode;
-use TestLib;
-use Test::More tests => 58;
-
-my $tempdir = TestLib::tempdir;
-
-test_bad_manifest('input string ended unexpectedly',
- qr/could not parse backup manifest: The input string ended unexpectedly/,
- <', "$tempdir/backup_manifest") || die "open: $!";
- print $fh $manifest_contents;
- close($fh);
-
- command_fails_like(['pg_validatebackup', $tempdir], $regexp,
- $test_name);
- return;
-}
diff --git a/src/bin/pg_validatebackup/t/006_encoding.pl b/src/bin/pg_validatebackup/t/006_encoding.pl
deleted file mode 100644
index 5e3e7152a52..00000000000
--- a/src/bin/pg_validatebackup/t/006_encoding.pl
+++ /dev/null
@@ -1,27 +0,0 @@
-# Verify that pg_validatebackup handles hex-encoded filenames correctly.
-
-use strict;
-use warnings;
-use Cwd;
-use Config;
-use PostgresNode;
-use TestLib;
-use Test::More tests => 5;
-
-my $master = get_new_node('master');
-$master->init(allows_streaming => 1);
-$master->start;
-my $backup_path = $master->backup_dir . '/test_encoding';
-$master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync',
- '--manifest-force-encode' ],
- "backup ok with forced hex encoding");
-
-my $manifest = slurp_file("$backup_path/backup_manifest");
-my $count_of_encoded_path_in_manifest =
- (() = $manifest =~ /Encoded-Path/mig);
-cmp_ok($count_of_encoded_path_in_manifest, '>', 100,
- "many paths are encoded in the manifest");
-
-command_like(['pg_validatebackup', '-s', $backup_path ],
- qr/backup successfully verified/,
- 'backup with forced encoding validated');
diff --git a/src/bin/pg_validatebackup/t/007_wal.pl b/src/bin/pg_validatebackup/t/007_wal.pl
deleted file mode 100644
index b755e0f5e7f..00000000000
--- a/src/bin/pg_validatebackup/t/007_wal.pl
+++ /dev/null
@@ -1,55 +0,0 @@
-# Test pg_validatebackup's WAL validation.
-
-use strict;
-use warnings;
-use Cwd;
-use Config;
-use File::Path qw(rmtree);
-use PostgresNode;
-use TestLib;
-use Test::More tests => 7;
-
-# Start up the server and take a backup.
-my $master = get_new_node('master');
-$master->init(allows_streaming => 1);
-$master->start;
-my $backup_path = $master->backup_dir . '/test_wal';
-$master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync' ],
- "base backup ok");
-
-# Rename pg_wal.
-my $original_pg_wal = $backup_path . '/pg_wal';
-my $relocated_pg_wal = $master->backup_dir . '/relocated_pg_wal';
-rename($original_pg_wal, $relocated_pg_wal) || die "rename pg_wal: $!";
-
-# WAL validation should fail.
-command_fails_like(['pg_validatebackup', $backup_path ],
- qr/WAL parsing failed for timeline 1/,
- 'missing pg_wal causes failure');
-
-# Should work if we skip WAL verification.
-command_ok(['pg_validatebackup', '-n', $backup_path ],
- 'missing pg_wal OK if not verifying WAL');
-
-# Should also work if we specify the correct WAL location.
-command_ok(['pg_validatebackup', '-w', $relocated_pg_wal, $backup_path ],
- '-w can be used to specify WAL directory');
-
-# Move directory back to original location.
-rename($relocated_pg_wal, $original_pg_wal) || die "rename pg_wal back: $!";
-
-# Get a list of files in that directory that look like WAL files.
-my @walfiles = grep { /^[0-9A-F]{24}$/ } slurp_dir($original_pg_wal);
-
-# Replace the contents of one of the files with garbage of equal length.
-my $wal_corruption_target = $original_pg_wal . '/' . $walfiles[0];
-my $wal_size = -s $wal_corruption_target;
-open(my $fh, '>', $wal_corruption_target)
- || die "open $wal_corruption_target: $!";
-print $fh 'w' x $wal_size;
-close($fh);
-
-# WAL validation should fail.
-command_fails_like(['pg_validatebackup', $backup_path ],
- qr/WAL parsing failed for timeline 1/,
- 'corrupt WAL file causes failure');
diff --git a/src/bin/pg_verifybackup/.gitignore b/src/bin/pg_verifybackup/.gitignore
new file mode 100644
index 00000000000..910b227ce99
--- /dev/null
+++ b/src/bin/pg_verifybackup/.gitignore
@@ -0,0 +1,2 @@
+/pg_verifybackup
+/tmp_check/
diff --git a/src/bin/pg_verifybackup/Makefile b/src/bin/pg_verifybackup/Makefile
new file mode 100644
index 00000000000..c07643b1297
--- /dev/null
+++ b/src/bin/pg_verifybackup/Makefile
@@ -0,0 +1,40 @@
+# src/bin/pg_verifybackup/Makefile
+
+PGFILEDESC = "pg_verifybackup - verify a backup against using a backup manifest"
+PGAPPICON = win32
+
+subdir = src/bin/pg_verifybackup
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+# We need libpq only because fe_utils does.
+LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
+
+OBJS = \
+ $(WIN32RES) \
+ parse_manifest.o \
+ pg_verifybackup.o
+
+all: pg_verifybackup
+
+pg_verifybackup: $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils
+ $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
+
+install: all installdirs
+ $(INSTALL_PROGRAM) pg_verifybackup$(X) '$(DESTDIR)$(bindir)/pg_verifybackup$(X)'
+
+installdirs:
+ $(MKDIR_P) '$(DESTDIR)$(bindir)'
+
+uninstall:
+ rm -f '$(DESTDIR)$(bindir)/pg_verifybackup$(X)'
+
+clean distclean maintainer-clean:
+ rm -f pg_verifybackup$(X) $(OBJS)
+ rm -rf tmp_check
+
+check:
+ $(prove_check)
+
+installcheck:
+ $(prove_installcheck)
diff --git a/src/bin/pg_verifybackup/parse_manifest.c b/src/bin/pg_verifybackup/parse_manifest.c
new file mode 100644
index 00000000000..faee423c7ec
--- /dev/null
+++ b/src/bin/pg_verifybackup/parse_manifest.c
@@ -0,0 +1,740 @@
+/*-------------------------------------------------------------------------
+ *
+ * parse_manifest.c
+ * Parse a backup manifest in JSON format.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/bin/pg_verifybackup/parse_manifest.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include "parse_manifest.h"
+#include "common/jsonapi.h"
+
+/*
+ * Semantic states for JSON manifest parsing.
+ */
+typedef enum
+{
+ JM_EXPECT_TOPLEVEL_START,
+ JM_EXPECT_TOPLEVEL_END,
+ JM_EXPECT_TOPLEVEL_FIELD,
+ JM_EXPECT_VERSION_VALUE,
+ JM_EXPECT_FILES_START,
+ JM_EXPECT_FILES_NEXT,
+ JM_EXPECT_THIS_FILE_FIELD,
+ JM_EXPECT_THIS_FILE_VALUE,
+ JM_EXPECT_WAL_RANGES_START,
+ JM_EXPECT_WAL_RANGES_NEXT,
+ JM_EXPECT_THIS_WAL_RANGE_FIELD,
+ JM_EXPECT_THIS_WAL_RANGE_VALUE,
+ JM_EXPECT_MANIFEST_CHECKSUM_VALUE,
+ JM_EXPECT_EOF
+} JsonManifestSemanticState;
+
+/*
+ * Possible fields for one file as described by the manifest.
+ */
+typedef enum
+{
+ JMFF_PATH,
+ JMFF_ENCODED_PATH,
+ JMFF_SIZE,
+ JMFF_LAST_MODIFIED,
+ JMFF_CHECKSUM_ALGORITHM,
+ JMFF_CHECKSUM
+} JsonManifestFileField;
+
+/*
+ * Possible fields for one file as described by the manifest.
+ */
+typedef enum
+{
+ JMWRF_TIMELINE,
+ JMWRF_START_LSN,
+ JMWRF_END_LSN
+} JsonManifestWALRangeField;
+
+/*
+ * Internal state used while decoding the JSON-format backup manifest.
+ */
+typedef struct
+{
+ JsonManifestParseContext *context;
+ JsonManifestSemanticState state;
+
+ /* These fields are used for parsing objects in the list of files. */
+ JsonManifestFileField file_field;
+ char *pathname;
+ char *encoded_pathname;
+ char *size;
+ char *algorithm;
+ pg_checksum_type checksum_algorithm;
+ char *checksum;
+
+ /* These fields are used for parsing objects in the list of WAL ranges. */
+ JsonManifestWALRangeField wal_range_field;
+ char *timeline;
+ char *start_lsn;
+ char *end_lsn;
+
+ /* Miscellaneous other stuff. */
+ bool saw_version_field;
+ char *manifest_checksum;
+} JsonManifestParseState;
+
+static void json_manifest_object_start(void *state);
+static void json_manifest_object_end(void *state);
+static void json_manifest_array_start(void *state);
+static void json_manifest_array_end(void *state);
+static void json_manifest_object_field_start(void *state, char *fname,
+ bool isnull);
+static void json_manifest_scalar(void *state, char *token,
+ JsonTokenType tokentype);
+static void json_manifest_finalize_file(JsonManifestParseState *parse);
+static void json_manifest_finalize_wal_range(JsonManifestParseState *parse);
+static void verify_manifest_checksum(JsonManifestParseState *parse,
+ char *buffer, size_t size);
+static void json_manifest_parse_failure(JsonManifestParseContext *context,
+ char *msg);
+
+static int hexdecode_char(char c);
+static bool hexdecode_string(uint8 *result, char *input, int nbytes);
+static bool parse_xlogrecptr(XLogRecPtr *result, char *input);
+
+/*
+ * Main entrypoint to parse a JSON-format backup manifest.
+ *
+ * Caller should set up the parsing context and then invoke this function.
+ * For each file whose information is extracted from the manifest,
+ * context->perfile_cb is invoked. In case of trouble, context->error_cb is
+ * invoked and is expected not to return.
+ */
+void
+json_parse_manifest(JsonManifestParseContext *context, char *buffer,
+ size_t size)
+{
+ JsonLexContext *lex;
+ JsonParseErrorType json_error;
+ JsonSemAction sem;
+ JsonManifestParseState parse;
+
+ /* Set up our private parsing context. */
+ parse.context = context;
+ parse.state = JM_EXPECT_TOPLEVEL_START;
+ parse.saw_version_field = false;
+
+ /* Create a JSON lexing context. */
+ lex = makeJsonLexContextCstringLen(buffer, size, PG_UTF8, true);
+
+ /* Set up semantic actions. */
+ sem.semstate = &parse;
+ sem.object_start = json_manifest_object_start;
+ sem.object_end = json_manifest_object_end;
+ sem.array_start = json_manifest_array_start;
+ sem.array_end = json_manifest_array_end;
+ sem.object_field_start = json_manifest_object_field_start;
+ sem.object_field_end = NULL;
+ sem.array_element_start = NULL;
+ sem.array_element_end = NULL;
+ sem.scalar = json_manifest_scalar;
+
+ /* Run the actual JSON parser. */
+ json_error = pg_parse_json(lex, &sem);
+ if (json_error != JSON_SUCCESS)
+ json_manifest_parse_failure(context, json_errdetail(json_error, lex));
+ if (parse.state != JM_EXPECT_EOF)
+ json_manifest_parse_failure(context, "manifest ended unexpectedly");
+
+ /* Verify the manifest checksum. */
+ verify_manifest_checksum(&parse, buffer, size);
+}
+
+/*
+ * Invoked at the start of each object in the JSON document.
+ *
+ * The document as a whole is expected to be an object; each file and each
+ * WAL range is also expected to be an object. If we're anywhere else in the
+ * document, it's an error.
+ */
+static void
+json_manifest_object_start(void *state)
+{
+ JsonManifestParseState *parse = state;
+
+ switch (parse->state)
+ {
+ case JM_EXPECT_TOPLEVEL_START:
+ parse->state = JM_EXPECT_TOPLEVEL_FIELD;
+ break;
+ case JM_EXPECT_FILES_NEXT:
+ parse->state = JM_EXPECT_THIS_FILE_FIELD;
+ parse->pathname = NULL;
+ parse->encoded_pathname = NULL;
+ parse->size = NULL;
+ parse->algorithm = NULL;
+ parse->checksum = NULL;
+ break;
+ case JM_EXPECT_WAL_RANGES_NEXT:
+ parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
+ parse->timeline = NULL;
+ parse->start_lsn = NULL;
+ parse->end_lsn = NULL;
+ break;
+ default:
+ json_manifest_parse_failure(parse->context,
+ "unexpected object start");
+ break;
+ }
+}
+
+/*
+ * Invoked at the end of each object in the JSON document.
+ *
+ * The possible cases here are the same as for json_manifest_object_start.
+ * There's nothing special to do at the end of the document, but when we
+ * reach the end of an object representing a particular file or WAL range,
+ * we must call json_manifest_finalize_file() to save the associated details.
+ */
+static void
+json_manifest_object_end(void *state)
+{
+ JsonManifestParseState *parse = state;
+
+ switch (parse->state)
+ {
+ case JM_EXPECT_TOPLEVEL_END:
+ parse->state = JM_EXPECT_EOF;
+ break;
+ case JM_EXPECT_THIS_FILE_FIELD:
+ json_manifest_finalize_file(parse);
+ parse->state = JM_EXPECT_FILES_NEXT;
+ break;
+ case JM_EXPECT_THIS_WAL_RANGE_FIELD:
+ json_manifest_finalize_wal_range(parse);
+ parse->state = JM_EXPECT_WAL_RANGES_NEXT;
+ break;
+ default:
+ json_manifest_parse_failure(parse->context,
+ "unexpected object end");
+ break;
+ }
+}
+
+/*
+ * Invoked at the start of each array in the JSON document.
+ *
+ * Within the toplevel object, the value associated with the "Files" key
+ * should be an array. Similarly for the "WAL-Ranges" key. No other arrays
+ * are expected.
+ */
+static void
+json_manifest_array_start(void *state)
+{
+ JsonManifestParseState *parse = state;
+
+ switch (parse->state)
+ {
+ case JM_EXPECT_FILES_START:
+ parse->state = JM_EXPECT_FILES_NEXT;
+ break;
+ case JM_EXPECT_WAL_RANGES_START:
+ parse->state = JM_EXPECT_WAL_RANGES_NEXT;
+ break;
+ default:
+ json_manifest_parse_failure(parse->context,
+ "unexpected array start");
+ break;
+ }
+}
+
+/*
+ * Invoked at the end of each array in the JSON document.
+ *
+ * The cases here are analogous to those in json_manifest_array_start.
+ */
+static void
+json_manifest_array_end(void *state)
+{
+ JsonManifestParseState *parse = state;
+
+ switch (parse->state)
+ {
+ case JM_EXPECT_FILES_NEXT:
+ case JM_EXPECT_WAL_RANGES_NEXT:
+ parse->state = JM_EXPECT_TOPLEVEL_FIELD;
+ break;
+ default:
+ json_manifest_parse_failure(parse->context,
+ "unexpected array end");
+ break;
+ }
+}
+
+/*
+ * Invoked at the start of each object field in the JSON document.
+ */
+static void
+json_manifest_object_field_start(void *state, char *fname, bool isnull)
+{
+ JsonManifestParseState *parse = state;
+
+ switch (parse->state)
+ {
+ case JM_EXPECT_TOPLEVEL_FIELD:
+
+ /*
+ * Inside toplevel object. The version indicator should always be
+ * the first field.
+ */
+ if (!parse->saw_version_field)
+ {
+ if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0)
+ json_manifest_parse_failure(parse->context,
+ "expected version indicator");
+ parse->state = JM_EXPECT_VERSION_VALUE;
+ parse->saw_version_field = true;
+ break;
+ }
+
+ /* Is this the list of files? */
+ if (strcmp(fname, "Files") == 0)
+ {
+ parse->state = JM_EXPECT_FILES_START;
+ break;
+ }
+
+ /* Is this the list of WAL ranges? */
+ if (strcmp(fname, "WAL-Ranges") == 0)
+ {
+ parse->state = JM_EXPECT_WAL_RANGES_START;
+ break;
+ }
+
+ /* Is this the manifest checksum? */
+ if (strcmp(fname, "Manifest-Checksum") == 0)
+ {
+ parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE;
+ break;
+ }
+
+ /* It's not a field we recognize. */
+ json_manifest_parse_failure(parse->context,
+ "unknown toplevel field");
+ break;
+
+ case JM_EXPECT_THIS_FILE_FIELD:
+ /* Inside object for one file; which key have we got? */
+ if (strcmp(fname, "Path") == 0)
+ parse->file_field = JMFF_PATH;
+ else if (strcmp(fname, "Encoded-Path") == 0)
+ parse->file_field = JMFF_ENCODED_PATH;
+ else if (strcmp(fname, "Size") == 0)
+ parse->file_field = JMFF_SIZE;
+ else if (strcmp(fname, "Last-Modified") == 0)
+ parse->file_field = JMFF_LAST_MODIFIED;
+ else if (strcmp(fname, "Checksum-Algorithm") == 0)
+ parse->file_field = JMFF_CHECKSUM_ALGORITHM;
+ else if (strcmp(fname, "Checksum") == 0)
+ parse->file_field = JMFF_CHECKSUM;
+ else
+ json_manifest_parse_failure(parse->context,
+ "unexpected file field");
+ parse->state = JM_EXPECT_THIS_FILE_VALUE;
+ break;
+
+ case JM_EXPECT_THIS_WAL_RANGE_FIELD:
+ /* Inside object for one file; which key have we got? */
+ if (strcmp(fname, "Timeline") == 0)
+ parse->wal_range_field = JMWRF_TIMELINE;
+ else if (strcmp(fname, "Start-LSN") == 0)
+ parse->wal_range_field = JMWRF_START_LSN;
+ else if (strcmp(fname, "End-LSN") == 0)
+ parse->wal_range_field = JMWRF_END_LSN;
+ else
+ json_manifest_parse_failure(parse->context,
+ "unexpected wal range field");
+ parse->state = JM_EXPECT_THIS_WAL_RANGE_VALUE;
+ break;
+
+ default:
+ json_manifest_parse_failure(parse->context,
+ "unexpected object field");
+ break;
+ }
+}
+
+/*
+ * Invoked at the start of each scalar in the JSON document.
+ *
+ * Object field names don't reach this code; those are handled by
+ * json_manifest_object_field_start. When we're inside of the object for
+ * a particular file or WAL range, that function will have noticed the name
+ * of the field, and we'll get the corresponding value here. When we're in
+ * the toplevel object, the parse state itself tells us which field this is.
+ *
+ * In all cases except for PostgreSQL-Backup-Manifest-Version, which we
+ * can just check on the spot, the goal here is just to save the value in
+ * the parse state for later use. We don't actually do anything until we
+ * reach either the end of the object representing this file, or the end
+ * of the manifest, as the case may be.
+ */
+static void
+json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ JsonManifestParseState *parse = state;
+
+ switch (parse->state)
+ {
+ case JM_EXPECT_VERSION_VALUE:
+ if (strcmp(token, "1") != 0)
+ json_manifest_parse_failure(parse->context,
+ "unexpected manifest version");
+ parse->state = JM_EXPECT_TOPLEVEL_FIELD;
+ break;
+
+ case JM_EXPECT_THIS_FILE_VALUE:
+ switch (parse->file_field)
+ {
+ case JMFF_PATH:
+ parse->pathname = token;
+ break;
+ case JMFF_ENCODED_PATH:
+ parse->encoded_pathname = token;
+ break;
+ case JMFF_SIZE:
+ parse->size = token;
+ break;
+ case JMFF_LAST_MODIFIED:
+ pfree(token); /* unused */
+ break;
+ case JMFF_CHECKSUM_ALGORITHM:
+ parse->algorithm = token;
+ break;
+ case JMFF_CHECKSUM:
+ parse->checksum = token;
+ break;
+ }
+ parse->state = JM_EXPECT_THIS_FILE_FIELD;
+ break;
+
+ case JM_EXPECT_THIS_WAL_RANGE_VALUE:
+ switch (parse->wal_range_field)
+ {
+ case JMWRF_TIMELINE:
+ parse->timeline = token;
+ break;
+ case JMWRF_START_LSN:
+ parse->start_lsn = token;
+ break;
+ case JMWRF_END_LSN:
+ parse->end_lsn = token;
+ break;
+ }
+ parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
+ break;
+
+ case JM_EXPECT_MANIFEST_CHECKSUM_VALUE:
+ parse->state = JM_EXPECT_TOPLEVEL_END;
+ parse->manifest_checksum = token;
+ break;
+
+ default:
+ json_manifest_parse_failure(parse->context, "unexpected scalar");
+ break;
+ }
+}
+
+/*
+ * Do additional parsing and sanity-checking of the details gathered for one
+ * file, and invoke the per-file callback so that the caller gets those
+ * details. This happens for each file when the corresponding JSON object is
+ * completely parsed.
+ */
+static void
+json_manifest_finalize_file(JsonManifestParseState *parse)
+{
+ JsonManifestParseContext *context = parse->context;
+ size_t size;
+ char *ep;
+ int checksum_string_length;
+ pg_checksum_type checksum_type;
+ int checksum_length;
+ uint8 *checksum_payload;
+
+ /* Pathname and size are required. */
+ if (parse->pathname == NULL && parse->encoded_pathname == NULL)
+ json_manifest_parse_failure(parse->context, "missing pathname");
+ if (parse->pathname != NULL && parse->encoded_pathname != NULL)
+ json_manifest_parse_failure(parse->context,
+ "both pathname and encoded pathname");
+ if (parse->size == NULL)
+ json_manifest_parse_failure(parse->context, "missing size");
+ if (parse->algorithm == NULL && parse->checksum != NULL)
+ json_manifest_parse_failure(parse->context,
+ "checksum without algorithm");
+
+ /* Decode encoded pathname, if that's what we have. */
+ if (parse->encoded_pathname != NULL)
+ {
+ int encoded_length = strlen(parse->encoded_pathname);
+ int raw_length = encoded_length / 2;
+
+ parse->pathname = palloc(raw_length + 1);
+ if (encoded_length % 2 != 0 ||
+ !hexdecode_string((uint8 *) parse->pathname,
+ parse->encoded_pathname,
+ raw_length))
+ json_manifest_parse_failure(parse->context,
+ "unable to decode filename");
+ parse->pathname[raw_length] = '\0';
+ pfree(parse->encoded_pathname);
+ parse->encoded_pathname = NULL;
+ }
+
+ /* Parse size. */
+ size = strtoul(parse->size, &ep, 10);
+ if (*ep)
+ json_manifest_parse_failure(parse->context,
+ "file size is not an integer");
+
+ /* Parse the checksum algorithm, if it's present. */
+ if (parse->algorithm == NULL)
+ checksum_type = CHECKSUM_TYPE_NONE;
+ else if (!pg_checksum_parse_type(parse->algorithm, &checksum_type))
+ context->error_cb(context, "unrecognized checksum algorithm: \"%s\"",
+ parse->algorithm);
+
+ /* Parse the checksum payload, if it's present. */
+ checksum_string_length = parse->checksum == NULL ? 0
+ : strlen(parse->checksum);
+ if (checksum_string_length == 0)
+ {
+ checksum_length = 0;
+ checksum_payload = NULL;
+ }
+ else
+ {
+ checksum_length = checksum_string_length / 2;
+ checksum_payload = palloc(checksum_length);
+ if (checksum_string_length % 2 != 0 ||
+ !hexdecode_string(checksum_payload, parse->checksum,
+ checksum_length))
+ context->error_cb(context,
+ "invalid checksum for file \"%s\": \"%s\"",
+ parse->pathname, parse->checksum);
+ }
+
+ /* Invoke the callback with the details we've gathered. */
+ context->perfile_cb(context, parse->pathname, size,
+ checksum_type, checksum_length, checksum_payload);
+
+ /* Free memory we no longer need. */
+ if (parse->size != NULL)
+ {
+ pfree(parse->size);
+ parse->size = NULL;
+ }
+ if (parse->algorithm != NULL)
+ {
+ pfree(parse->algorithm);
+ parse->algorithm = NULL;
+ }
+ if (parse->checksum != NULL)
+ {
+ pfree(parse->checksum);
+ parse->checksum = NULL;
+ }
+}
+
+/*
+ * Do additional parsing and sanity-checking of the details gathered for one
+ * WAL range, and invoke the per-WAL-range callback so that the caller gets
+ * those details. This happens for each WAL range when the corresponding JSON
+ * object is completely parsed.
+ */
+static void
+json_manifest_finalize_wal_range(JsonManifestParseState *parse)
+{
+ JsonManifestParseContext *context = parse->context;
+ TimeLineID tli;
+ XLogRecPtr start_lsn,
+ end_lsn;
+ char *ep;
+
+ /* Make sure all fields are present. */
+ if (parse->timeline == NULL)
+ json_manifest_parse_failure(parse->context, "missing timeline");
+ if (parse->start_lsn == NULL)
+ json_manifest_parse_failure(parse->context, "missing start LSN");
+ if (parse->end_lsn == NULL)
+ json_manifest_parse_failure(parse->context, "missing end LSN");
+
+ /* Parse timeline. */
+ tli = strtoul(parse->timeline, &ep, 10);
+ if (*ep)
+ json_manifest_parse_failure(parse->context,
+ "timeline is not an integer");
+ if (!parse_xlogrecptr(&start_lsn, parse->start_lsn))
+ json_manifest_parse_failure(parse->context,
+ "unable to parse start LSN");
+ if (!parse_xlogrecptr(&end_lsn, parse->end_lsn))
+ json_manifest_parse_failure(parse->context,
+ "unable to parse end LSN");
+
+ /* Invoke the callback with the details we've gathered. */
+ context->perwalrange_cb(context, tli, start_lsn, end_lsn);
+
+ /* Free memory we no longer need. */
+ if (parse->timeline != NULL)
+ {
+ pfree(parse->timeline);
+ parse->timeline = NULL;
+ }
+ if (parse->start_lsn != NULL)
+ {
+ pfree(parse->start_lsn);
+ parse->start_lsn = NULL;
+ }
+ if (parse->end_lsn != NULL)
+ {
+ pfree(parse->end_lsn);
+ parse->end_lsn = NULL;
+ }
+}
+
+/*
+ * Verify that the manifest checksum is correct.
+ *
+ * The last line of the manifest file is excluded from the manifest checksum,
+ * because the last line is expected to contain the checksum that covers
+ * the rest of the file.
+ */
+static void
+verify_manifest_checksum(JsonManifestParseState *parse, char *buffer,
+ size_t size)
+{
+ JsonManifestParseContext *context = parse->context;
+ size_t i;
+ size_t number_of_newlines = 0;
+ size_t ultimate_newline = 0;
+ size_t penultimate_newline = 0;
+ pg_sha256_ctx manifest_ctx;
+ uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH];
+ uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH];
+
+ /* Find the last two newlines in the file. */
+ for (i = 0; i < size; ++i)
+ {
+ if (buffer[i] == '\n')
+ {
+ ++number_of_newlines;
+ penultimate_newline = ultimate_newline;
+ ultimate_newline = i;
+ }
+ }
+
+ /*
+ * Make sure that the last newline is right at the end, and that there are
+ * at least two lines total. We need this to be true in order for the
+ * following code, which computes the manifest checksum, to work properly.
+ */
+ if (number_of_newlines < 2)
+ json_manifest_parse_failure(parse->context,
+ "expected at least 2 lines");
+ if (ultimate_newline != size - 1)
+ json_manifest_parse_failure(parse->context,
+ "last line not newline-terminated");
+
+ /* Checksum the rest. */
+ pg_sha256_init(&manifest_ctx);
+ pg_sha256_update(&manifest_ctx, (uint8 *) buffer, penultimate_newline + 1);
+ pg_sha256_final(&manifest_ctx, manifest_checksum_actual);
+
+ /* Now verify it. */
+ if (parse->manifest_checksum == NULL)
+ context->error_cb(parse->context, "manifest has no checksum");
+ if (strlen(parse->manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 ||
+ !hexdecode_string(manifest_checksum_expected, parse->manifest_checksum,
+ PG_SHA256_DIGEST_LENGTH))
+ context->error_cb(context, "invalid manifest checksum: \"%s\"",
+ parse->manifest_checksum);
+ if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
+ PG_SHA256_DIGEST_LENGTH) != 0)
+ context->error_cb(context, "manifest checksum mismatch");
+}
+
+/*
+ * Report a parse error.
+ *
+ * This is intended to be used for fairly low-level failures that probably
+ * shouldn't occur unless somebody has deliberately constructed a bad manifest,
+ * or unless the server is generating bad manifests due to some bug. msg should
+ * be a short string giving some hint as to what the problem is.
+ */
+static void
+json_manifest_parse_failure(JsonManifestParseContext *context, char *msg)
+{
+ context->error_cb(context, "could not parse backup manifest: %s", msg);
+}
+
+/*
+ * Convert a character which represents a hexadecimal digit to an integer.
+ *
+ * Returns -1 if the character is not a hexadecimal digit.
+ */
+static int
+hexdecode_char(char c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+
+ return -1;
+}
+
+/*
+ * Decode a hex string into a byte string, 2 hex chars per byte.
+ *
+ * Returns false if invalid characters are encountered; otherwise true.
+ */
+static bool
+hexdecode_string(uint8 *result, char *input, int nbytes)
+{
+ int i;
+
+ for (i = 0; i < nbytes; ++i)
+ {
+ int n1 = hexdecode_char(input[i * 2]);
+ int n2 = hexdecode_char(input[i * 2 + 1]);
+
+ if (n1 < 0 || n2 < 0)
+ return false;
+ result[i] = n1 * 16 + n2;
+ }
+
+ return true;
+}
+
+/*
+ * Parse an XLogRecPtr expressed using the usual string format.
+ */
+static bool
+parse_xlogrecptr(XLogRecPtr *result, char *input)
+{
+ uint32 hi;
+ uint32 lo;
+
+ if (sscanf(input, "%X/%X", &hi, &lo) != 2)
+ return false;
+ *result = ((uint64) hi) << 32 | lo;
+ return true;
+}
diff --git a/src/bin/pg_verifybackup/parse_manifest.h b/src/bin/pg_verifybackup/parse_manifest.h
new file mode 100644
index 00000000000..49254bfb324
--- /dev/null
+++ b/src/bin/pg_verifybackup/parse_manifest.h
@@ -0,0 +1,45 @@
+/*-------------------------------------------------------------------------
+ *
+ * parse_manifest.h
+ * Parse a backup manifest in JSON format.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/bin/pg_verifybackup/parse_manifest.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef PARSE_MANIFEST_H
+#define PARSE_MANIFEST_H
+
+#include "access/xlogdefs.h"
+#include "common/checksum_helper.h"
+#include "mb/pg_wchar.h"
+
+struct JsonManifestParseContext;
+typedef struct JsonManifestParseContext JsonManifestParseContext;
+
+typedef void (*json_manifest_perfile_callback)(JsonManifestParseContext *,
+ char *pathname,
+ size_t size, pg_checksum_type checksum_type,
+ int checksum_length, uint8 *checksum_payload);
+typedef void (*json_manifest_perwalrange_callback)(JsonManifestParseContext *,
+ TimeLineID tli,
+ XLogRecPtr start_lsn, XLogRecPtr end_lsn);
+typedef void (*json_manifest_error_callback)(JsonManifestParseContext *,
+ char *fmt, ...) pg_attribute_printf(2, 3);
+
+struct JsonManifestParseContext
+{
+ void *private_data;
+ json_manifest_perfile_callback perfile_cb;
+ json_manifest_perwalrange_callback perwalrange_cb;
+ json_manifest_error_callback error_cb;
+};
+
+extern void json_parse_manifest(JsonManifestParseContext *context,
+ char *buffer, size_t size);
+
+#endif
diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c
new file mode 100644
index 00000000000..9c0a8c55507
--- /dev/null
+++ b/src/bin/pg_verifybackup/pg_verifybackup.c
@@ -0,0 +1,905 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_verifybackup.c
+ * Verify a backup against a backup manifest.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/bin/pg_verifybackup/pg_verifybackup.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include
+#include
+#include
+
+#include "common/hashfn.h"
+#include "common/logging.h"
+#include "fe_utils/simple_list.h"
+#include "getopt_long.h"
+#include "parse_manifest.h"
+
+/*
+ * For efficiency, we'd like our hash table containing information about the
+ * manifest to start out with approximately the correct number of entries.
+ * There's no way to know the exact number of entries without reading the whole
+ * file, but we can get an estimate by dividing the file size by the estimated
+ * number of bytes per line.
+ *
+ * This could be off by about a factor of two in either direction, because the
+ * checksum algorithm has a big impact on the line lengths; e.g. a SHA512
+ * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there
+ * might be no checksum at all.
+ */
+#define ESTIMATED_BYTES_PER_MANIFEST_LINE 100
+
+/*
+ * How many bytes should we try to read from a file at once?
+ */
+#define READ_CHUNK_SIZE 4096
+
+/*
+ * Each file described by the manifest file is parsed to produce an object
+ * like this.
+ */
+typedef struct manifest_file
+{
+ uint32 status; /* hash status */
+ char *pathname;
+ size_t size;
+ pg_checksum_type checksum_type;
+ int checksum_length;
+ uint8 *checksum_payload;
+ bool matched;
+ bool bad;
+} manifest_file;
+
+/*
+ * Define a hash table which we can use to store information about the files
+ * mentioned in the backup manifest.
+ */
+static uint32 hash_string_pointer(char *s);
+#define SH_PREFIX manifest_files
+#define SH_ELEMENT_TYPE manifest_file
+#define SH_KEY_TYPE char *
+#define SH_KEY pathname
+#define SH_HASH_KEY(tb, key) hash_string_pointer(key)
+#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
+#define SH_SCOPE static inline
+#define SH_RAW_ALLOCATOR pg_malloc0
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+/*
+ * Each WAL range described by the manifest file is parsed to produce an
+ * object like this.
+ */
+typedef struct manifest_wal_range
+{
+ TimeLineID tli;
+ XLogRecPtr start_lsn;
+ XLogRecPtr end_lsn;
+ struct manifest_wal_range *next;
+ struct manifest_wal_range *prev;
+} manifest_wal_range;
+
+/*
+ * Details we need in callbacks that occur while parsing a backup manifest.
+ */
+typedef struct parser_context
+{
+ manifest_files_hash *ht;
+ manifest_wal_range *first_wal_range;
+ manifest_wal_range *last_wal_range;
+} parser_context;
+
+/*
+ * All of the context information we need while checking a backup manifest.
+ */
+typedef struct verifier_context
+{
+ manifest_files_hash *ht;
+ char *backup_directory;
+ SimpleStringList ignore_list;
+ bool exit_on_error;
+ bool saw_any_error;
+} verifier_context;
+
+static void parse_manifest_file(char *manifest_path,
+ manifest_files_hash **ht_p,
+ manifest_wal_range **first_wal_range_p);
+
+static void record_manifest_details_for_file(JsonManifestParseContext *context,
+ char *pathname, size_t size,
+ pg_checksum_type checksum_type,
+ int checksum_length,
+ uint8 *checksum_payload);
+static void record_manifest_details_for_wal_range(JsonManifestParseContext *context,
+ TimeLineID tli,
+ XLogRecPtr start_lsn,
+ XLogRecPtr end_lsn);
+static void report_manifest_error(JsonManifestParseContext *context,
+ char *fmt,...)
+ pg_attribute_printf(2, 3) pg_attribute_noreturn();
+
+static void verify_backup_directory(verifier_context *context,
+ char *relpath, char *fullpath);
+static void verify_backup_file(verifier_context *context,
+ char *relpath, char *fullpath);
+static void report_extra_backup_files(verifier_context *context);
+static void verify_backup_checksums(verifier_context *context);
+static void verify_file_checksum(verifier_context *context,
+ manifest_file *m, char *pathname);
+static void parse_required_wal(verifier_context *context,
+ char *pg_waldump_path,
+ char *wal_directory,
+ manifest_wal_range *first_wal_range);
+
+static void report_backup_error(verifier_context *context,
+ const char *pg_restrict fmt,...)
+ pg_attribute_printf(2, 3);
+static void report_fatal_error(const char *pg_restrict fmt,...)
+ pg_attribute_printf(1, 2) pg_attribute_noreturn();
+static bool should_ignore_relpath(verifier_context *context, char *relpath);
+
+static void usage(void);
+
+static const char *progname;
+
+/*
+ * Main entry point.
+ */
+int
+main(int argc, char **argv)
+{
+ static struct option long_options[] = {
+ {"exit-on-error", no_argument, NULL, 'e'},
+ {"ignore", required_argument, NULL, 'i'},
+ {"manifest-path", required_argument, NULL, 'm'},
+ {"no-parse-wal", no_argument, NULL, 'n'},
+ {"print-parse-wal", no_argument, NULL, 'p'},
+ {"quiet", no_argument, NULL, 'q'},
+ {"skip-checksums", no_argument, NULL, 's'},
+ {"wal-directory", required_argument, NULL, 'w'},
+ {NULL, 0, NULL, 0}
+ };
+
+ int c;
+ verifier_context context;
+ manifest_wal_range *first_wal_range;
+ char *manifest_path = NULL;
+ bool no_parse_wal = false;
+ bool quiet = false;
+ bool skip_checksums = false;
+ char *wal_directory = NULL;
+ char *pg_waldump_path = NULL;
+
+ pg_logging_init(argv[0]);
+ set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_verifybackup"));
+ progname = get_progname(argv[0]);
+
+ memset(&context, 0, sizeof(context));
+
+ if (argc > 1)
+ {
+ if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
+ {
+ usage();
+ exit(0);
+ }
+ if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
+ {
+ puts("pg_verifybackup (PostgreSQL) " PG_VERSION);
+ exit(0);
+ }
+ }
+
+ /*
+ * Skip certain files in the toplevel directory.
+ *
+ * Ignore the backup_manifest file, because it's not included in the
+ * backup manifest.
+ *
+ * Ignore the pg_wal directory, because those files are not included in
+ * the backup manifest either, since they are fetched separately from the
+ * backup itself, and verified via a separate mechanism.
+ *
+ * Ignore postgresql.auto.conf, recovery.signal, and standby.signal,
+ * because we expect that those files may sometimes be created or changed
+ * as part of the backup process. For example, pg_basebackup -R will
+ * modify postgresql.auto.conf and create standby.signal.
+ */
+ simple_string_list_append(&context.ignore_list, "backup_manifest");
+ simple_string_list_append(&context.ignore_list, "pg_wal");
+ simple_string_list_append(&context.ignore_list, "postgresql.auto.conf");
+ simple_string_list_append(&context.ignore_list, "recovery.signal");
+ simple_string_list_append(&context.ignore_list, "standby.signal");
+
+ while ((c = getopt_long(argc, argv, "ei:m:nqsw:", long_options, NULL)) != -1)
+ {
+ switch (c)
+ {
+ case 'e':
+ context.exit_on_error = true;
+ break;
+ case 'i':
+ {
+ char *arg = pstrdup(optarg);
+
+ canonicalize_path(arg);
+ simple_string_list_append(&context.ignore_list, arg);
+ break;
+ }
+ case 'm':
+ manifest_path = pstrdup(optarg);
+ canonicalize_path(manifest_path);
+ break;
+ case 'n':
+ no_parse_wal = true;
+ break;
+ case 'q':
+ quiet = true;
+ break;
+ case 's':
+ skip_checksums = true;
+ break;
+ case 'w':
+ wal_directory = pstrdup(optarg);
+ canonicalize_path(wal_directory);
+ break;
+ default:
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ progname);
+ exit(1);
+ }
+ }
+
+ /* Get backup directory name */
+ if (optind >= argc)
+ {
+ pg_log_fatal("no backup directory specified");
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ progname);
+ exit(1);
+ }
+ context.backup_directory = pstrdup(argv[optind++]);
+ canonicalize_path(context.backup_directory);
+
+ /* Complain if any arguments remain */
+ if (optind < argc)
+ {
+ pg_log_fatal("too many command-line arguments (first is \"%s\")",
+ argv[optind]);
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ progname);
+ exit(1);
+ }
+
+ /* Unless --no-parse-wal was specified, we will need pg_waldump. */
+ if (!no_parse_wal)
+ {
+ int ret;
+
+ pg_waldump_path = pg_malloc(MAXPGPATH);
+ ret = find_other_exec(argv[0], "pg_waldump",
+ "pg_waldump (PostgreSQL) " PG_VERSION "\n",
+ pg_waldump_path);
+ if (ret < 0)
+ {
+ char full_path[MAXPGPATH];
+
+ if (find_my_exec(argv[0], full_path) < 0)
+ strlcpy(full_path, progname, sizeof(full_path));
+ if (ret == -1)
+ pg_log_fatal("The program \"%s\" is needed by %s but was\n"
+ "not found in the same directory as \"%s\".\n"
+ "Check your installation.",
+ "pg_waldump", "pg_verifybackup", full_path);
+ else
+ pg_log_fatal("The program \"%s\" was found by \"%s\" but was\n"
+ "not the same version as %s.\n"
+ "Check your installation.",
+ "pg_waldump", full_path, "pg_verifybackup");
+ }
+ }
+
+ /* By default, look for the manifest in the backup directory. */
+ if (manifest_path == NULL)
+ manifest_path = psprintf("%s/backup_manifest",
+ context.backup_directory);
+
+ /* By default, look for the WAL in the backup directory, too. */
+ if (wal_directory == NULL)
+ wal_directory = psprintf("%s/pg_wal", context.backup_directory);
+
+ /*
+ * Try to read the manifest. We treat any errors encountered while parsing
+ * the manifest as fatal; there doesn't seem to be much point in trying to
+ * verify the backup directory against a corrupted manifest.
+ */
+ parse_manifest_file(manifest_path, &context.ht, &first_wal_range);
+
+ /*
+ * Now scan the files in the backup directory. At this stage, we verify
+ * that every file on disk is present in the manifest and that the sizes
+ * match. We also set the "matched" flag on every manifest entry that
+ * corresponds to a file on disk.
+ */
+ verify_backup_directory(&context, NULL, context.backup_directory);
+
+ /*
+ * The "matched" flag should now be set on every entry in the hash table.
+ * Any entries for which the bit is not set are files mentioned in the
+ * manifest that don't exist on disk.
+ */
+ report_extra_backup_files(&context);
+
+ /*
+ * Now do the expensive work of verifying file checksums, unless we were
+ * told to skip it.
+ */
+ if (!skip_checksums)
+ verify_backup_checksums(&context);
+
+ /*
+ * Try to parse the required ranges of WAL records, unless we were told
+ * not to do so.
+ */
+ if (!no_parse_wal)
+ parse_required_wal(&context, pg_waldump_path,
+ wal_directory, first_wal_range);
+
+ /*
+ * If everything looks OK, tell the user this, unless we were asked to
+ * work quietly.
+ */
+ if (!context.saw_any_error && !quiet)
+ printf("backup successfully verified\n");
+
+ return context.saw_any_error ? 1 : 0;
+}
+
+/*
+ * Parse a manifest file. Construct a hash table with information about
+ * all the files it mentions, and a linked list of all the WAL ranges it
+ * mentions.
+ */
+static void
+parse_manifest_file(char *manifest_path, manifest_files_hash **ht_p,
+ manifest_wal_range **first_wal_range_p)
+{
+ int fd;
+ struct stat statbuf;
+ off_t estimate;
+ uint32 initial_size;
+ manifest_files_hash *ht;
+ char *buffer;
+ int rc;
+ parser_context private_context;
+ JsonManifestParseContext context;
+
+ /* Open the manifest file. */
+ if ((fd = open(manifest_path, O_RDONLY | PG_BINARY, 0)) < 0)
+ report_fatal_error("could not open file \"%s\": %m", manifest_path);
+
+ /* Figure out how big the manifest is. */
+ if (fstat(fd, &statbuf) != 0)
+ report_fatal_error("could not stat file \"%s\": %m", manifest_path);
+
+ /* Guess how large to make the hash table based on the manifest size. */
+ estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE;
+ initial_size = Min(PG_UINT32_MAX, Max(estimate, 256));
+
+ /* Create the hash table. */
+ ht = manifest_files_create(initial_size, NULL);
+
+ /*
+ * Slurp in the whole file.
+ *
+ * This is not ideal, but there's currently no easy way to get
+ * pg_parse_json() to perform incremental parsing.
+ */
+ buffer = pg_malloc(statbuf.st_size);
+ rc = read(fd, buffer, statbuf.st_size);
+ if (rc != statbuf.st_size)
+ {
+ if (rc < 0)
+ report_fatal_error("could not read file \"%s\": %m",
+ manifest_path);
+ else
+ report_fatal_error("could not read file \"%s\": read %d of %zu",
+ manifest_path, rc, (size_t) statbuf.st_size);
+ }
+
+ /* Close the manifest file. */
+ close(fd);
+
+ /* Parse the manifest. */
+ private_context.ht = ht;
+ private_context.first_wal_range = NULL;
+ private_context.last_wal_range = NULL;
+ context.private_data = &private_context;
+ context.perfile_cb = record_manifest_details_for_file;
+ context.perwalrange_cb = record_manifest_details_for_wal_range;
+ context.error_cb = report_manifest_error;
+ json_parse_manifest(&context, buffer, statbuf.st_size);
+
+ /* Done with the buffer. */
+ pfree(buffer);
+
+ /* Return the file hash table and WAL range list we constructed. */
+ *ht_p = ht;
+ *first_wal_range_p = private_context.first_wal_range;
+}
+
+/*
+ * Report an error while parsing the manifest.
+ *
+ * We consider all such errors to be fatal errors. The manifest parser
+ * expects this function not to return.
+ */
+static void
+report_manifest_error(JsonManifestParseContext *context, char *fmt,...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ pg_log_generic_v(PG_LOG_FATAL, fmt, ap);
+ va_end(ap);
+
+ exit(1);
+}
+
+/*
+ * Record details extracted from the backup manifest for one file.
+ */
+static void
+record_manifest_details_for_file(JsonManifestParseContext *context,
+ char *pathname, size_t size,
+ pg_checksum_type checksum_type,
+ int checksum_length, uint8 *checksum_payload)
+{
+ parser_context *pcxt = context->private_data;
+ manifest_files_hash *ht = pcxt->ht;
+ manifest_file *m;
+ bool found;
+
+ /* Make a new entry in the hash table for this file. */
+ m = manifest_files_insert(ht, pathname, &found);
+ if (found)
+ report_fatal_error("duplicate pathname in backup manifest: \"%s\"",
+ pathname);
+
+ /* Initialize the entry. */
+ m->size = size;
+ m->checksum_type = checksum_type;
+ m->checksum_length = checksum_length;
+ m->checksum_payload = checksum_payload;
+ m->matched = false;
+ m->bad = false;
+}
+
+/*
+ * Record details extracted from the backup manifest for one WAL range.
+ */
+static void
+record_manifest_details_for_wal_range(JsonManifestParseContext *context,
+ TimeLineID tli,
+ XLogRecPtr start_lsn, XLogRecPtr end_lsn)
+{
+ parser_context *pcxt = context->private_data;
+ manifest_wal_range *range;
+
+ /* Allocate and initialize a struct describing this WAL range. */
+ range = palloc(sizeof(manifest_wal_range));
+ range->tli = tli;
+ range->start_lsn = start_lsn;
+ range->end_lsn = end_lsn;
+ range->prev = pcxt->last_wal_range;
+ range->next = NULL;
+
+ /* Add it to the end of the list. */
+ if (pcxt->first_wal_range == NULL)
+ pcxt->first_wal_range = range;
+ else
+ pcxt->last_wal_range->next = range;
+ pcxt->last_wal_range = range;
+}
+
+/*
+ * Verify one directory.
+ *
+ * 'relpath' is NULL if we are to verify the top-level backup directory,
+ * and otherwise the relative path to the directory that is to be verified.
+ *
+ * 'fullpath' is the backup directory with 'relpath' appended; i.e. the actual
+ * filesystem path at which it can be found.
+ */
+static void
+verify_backup_directory(verifier_context *context, char *relpath,
+ char *fullpath)
+{
+ DIR *dir;
+ struct dirent *dirent;
+
+ dir = opendir(fullpath);
+ if (dir == NULL)
+ {
+ /*
+ * If even the toplevel backup directory cannot be found, treat this
+ * as a fatal error.
+ */
+ if (relpath == NULL)
+ report_fatal_error("could not open directory \"%s\": %m", fullpath);
+
+ /*
+ * Otherwise, treat this as a non-fatal error, but ignore any further
+ * errors related to this path and anything beneath it.
+ */
+ report_backup_error(context,
+ "could not open directory \"%s\": %m", fullpath);
+ simple_string_list_append(&context->ignore_list, relpath);
+
+ return;
+ }
+
+ while (errno = 0, (dirent = readdir(dir)) != NULL)
+ {
+ char *filename = dirent->d_name;
+ char *newfullpath = psprintf("%s/%s", fullpath, filename);
+ char *newrelpath;
+
+ /* Skip "." and ".." */
+ if (filename[0] == '.' && (filename[1] == '\0'
+ || strcmp(filename, "..") == 0))
+ continue;
+
+ if (relpath == NULL)
+ newrelpath = pstrdup(filename);
+ else
+ newrelpath = psprintf("%s/%s", relpath, filename);
+
+ if (!should_ignore_relpath(context, newrelpath))
+ verify_backup_file(context, newrelpath, newfullpath);
+
+ pfree(newfullpath);
+ pfree(newrelpath);
+ }
+
+ if (closedir(dir))
+ {
+ report_backup_error(context,
+ "could not close directory \"%s\": %m", fullpath);
+ return;
+ }
+}
+
+/*
+ * Verify one file (which might actually be a directory or a symlink).
+ *
+ * The arguments to this function have the same meaning as the arguments to
+ * verify_backup_directory.
+ */
+static void
+verify_backup_file(verifier_context *context, char *relpath, char *fullpath)
+{
+ struct stat sb;
+ manifest_file *m;
+
+ if (stat(fullpath, &sb) != 0)
+ {
+ report_backup_error(context,
+ "could not stat file or directory \"%s\": %m",
+ relpath);
+
+ /*
+ * Suppress further errors related to this path name and, if it's a
+ * directory, anything underneath it.
+ */
+ simple_string_list_append(&context->ignore_list, relpath);
+
+ return;
+ }
+
+ /* If it's a directory, just recurse. */
+ if (S_ISDIR(sb.st_mode))
+ {
+ verify_backup_directory(context, relpath, fullpath);
+ return;
+ }
+
+ /* If it's not a directory, it should be a plain file. */
+ if (!S_ISREG(sb.st_mode))
+ {
+ report_backup_error(context,
+ "\"%s\" is not a file or directory",
+ relpath);
+ return;
+ }
+
+ /* Check whether there's an entry in the manifest hash. */
+ m = manifest_files_lookup(context->ht, relpath);
+ if (m == NULL)
+ {
+ report_backup_error(context,
+ "\"%s\" is present on disk but not in the manifest",
+ relpath);
+ return;
+ }
+
+ /* Flag this entry as having been encountered in the filesystem. */
+ m->matched = true;
+
+ /* Check that the size matches. */
+ if (m->size != sb.st_size)
+ {
+ report_backup_error(context,
+ "\"%s\" has size %zu on disk but size %zu in the manifest",
+ relpath, (size_t) sb.st_size, m->size);
+ m->bad = true;
+ }
+
+ /*
+ * We don't verify checksums at this stage. We first finish verifying
+ * that we have the expected set of files with the expected sizes, and
+ * only afterwards verify the checksums. That's because computing
+ * checksums may take a while, and we'd like to report more obvious
+ * problems quickly.
+ */
+}
+
+/*
+ * Scan the hash table for entries where the 'matched' flag is not set; report
+ * that such files are present in the manifest but not on disk.
+ */
+static void
+report_extra_backup_files(verifier_context *context)
+{
+ manifest_files_iterator it;
+ manifest_file *m;
+
+ manifest_files_start_iterate(context->ht, &it);
+ while ((m = manifest_files_iterate(context->ht, &it)) != NULL)
+ if (!m->matched && !should_ignore_relpath(context, m->pathname))
+ report_backup_error(context,
+ "\"%s\" is present in the manifest but not on disk",
+ m->pathname);
+}
+
+/*
+ * Verify checksums for hash table entries that are otherwise unproblematic.
+ * If we've already reported some problem related to a hash table entry, or
+ * if it has no checksum, just skip it.
+ */
+static void
+verify_backup_checksums(verifier_context *context)
+{
+ manifest_files_iterator it;
+ manifest_file *m;
+
+ manifest_files_start_iterate(context->ht, &it);
+ while ((m = manifest_files_iterate(context->ht, &it)) != NULL)
+ {
+ if (m->matched && !m->bad && m->checksum_type != CHECKSUM_TYPE_NONE &&
+ !should_ignore_relpath(context, m->pathname))
+ {
+ char *fullpath;
+
+ /* Compute the full pathname to the target file. */
+ fullpath = psprintf("%s/%s", context->backup_directory,
+ m->pathname);
+
+ /* Do the actual checksum verification. */
+ verify_file_checksum(context, m, fullpath);
+
+ /* Avoid leaking memory. */
+ pfree(fullpath);
+ }
+ }
+}
+
+/*
+ * Verify the checksum of a single file.
+ */
+static void
+verify_file_checksum(verifier_context *context, manifest_file *m,
+ char *fullpath)
+{
+ pg_checksum_context checksum_ctx;
+ char *relpath = m->pathname;
+ int fd;
+ int rc;
+ size_t bytes_read = 0;
+ uint8 buffer[READ_CHUNK_SIZE];
+ uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
+ int checksumlen;
+
+ /* Open the target file. */
+ if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0)
+ {
+ report_backup_error(context, "could not open file \"%s\": %m",
+ relpath);
+ return;
+ }
+
+ /* Initialize checksum context. */
+ pg_checksum_init(&checksum_ctx, m->checksum_type);
+
+ /* Read the file chunk by chunk, updating the checksum as we go. */
+ while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0)
+ {
+ bytes_read += rc;
+ pg_checksum_update(&checksum_ctx, buffer, rc);
+ }
+ if (rc < 0)
+ report_backup_error(context, "could not read file \"%s\": %m",
+ relpath);
+
+ /* Close the file. */
+ if (close(fd) != 0)
+ {
+ report_backup_error(context, "could not close file \"%s\": %m",
+ relpath);
+ return;
+ }
+
+ /* If we didn't manage to read the whole file, bail out now. */
+ if (rc < 0)
+ return;
+
+ /*
+ * Double-check that we read the expected number of bytes from the file.
+ * Normally, a file size mismatch would be caught in verify_backup_file
+ * and this check would never be reached, but this provides additional
+ * safety and clarity in the event of concurrent modifications or
+ * filesystem misbehavior.
+ */
+ if (bytes_read != m->size)
+ {
+ report_backup_error(context,
+ "file \"%s\" should contain %zu bytes, but read %zu bytes",
+ relpath, m->size, bytes_read);
+ return;
+ }
+
+ /* Get the final checksum. */
+ checksumlen = pg_checksum_final(&checksum_ctx, checksumbuf);
+
+ /* And check it against the manifest. */
+ if (checksumlen != m->checksum_length)
+ report_backup_error(context,
+ "file \"%s\" has checksum of length %d, but expected %d",
+ relpath, m->checksum_length, checksumlen);
+ else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
+ report_backup_error(context,
+ "checksum mismatch for file \"%s\"",
+ relpath);
+}
+
+/*
+ * Attempt to parse the WAL files required to restore from backup using
+ * pg_waldump.
+ */
+static void
+parse_required_wal(verifier_context *context, char *pg_waldump_path,
+ char *wal_directory, manifest_wal_range *first_wal_range)
+{
+ manifest_wal_range *this_wal_range = first_wal_range;
+
+ while (this_wal_range != NULL)
+ {
+ char *pg_waldump_cmd;
+
+ pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%X --end=%X/%X\n",
+ pg_waldump_path, wal_directory, this_wal_range->tli,
+ (uint32) (this_wal_range->start_lsn >> 32),
+ (uint32) this_wal_range->start_lsn,
+ (uint32) (this_wal_range->end_lsn >> 32),
+ (uint32) this_wal_range->end_lsn);
+ if (system(pg_waldump_cmd) != 0)
+ report_backup_error(context,
+ "WAL parsing failed for timeline %u",
+ this_wal_range->tli);
+
+ this_wal_range = this_wal_range->next;
+ }
+}
+
+/*
+ * Report a problem with the backup.
+ *
+ * Update the context to indicate that we saw an error, and exit if the
+ * context says we should.
+ */
+static void
+report_backup_error(verifier_context *context, const char *pg_restrict fmt,...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ pg_log_generic_v(PG_LOG_ERROR, fmt, ap);
+ va_end(ap);
+
+ context->saw_any_error = true;
+ if (context->exit_on_error)
+ exit(1);
+}
+
+/*
+ * Report a fatal error and exit
+ */
+static void
+report_fatal_error(const char *pg_restrict fmt,...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ pg_log_generic_v(PG_LOG_FATAL, fmt, ap);
+ va_end(ap);
+
+ exit(1);
+}
+
+/*
+ * Is the specified relative path, or some prefix of it, listed in the set
+ * of paths to ignore?
+ *
+ * Note that by "prefix" we mean a parent directory; for this purpose,
+ * "aa/bb" is not a prefix of "aa/bbb", but it is a prefix of "aa/bb/cc".
+ */
+static bool
+should_ignore_relpath(verifier_context *context, char *relpath)
+{
+ SimpleStringListCell *cell;
+
+ for (cell = context->ignore_list.head; cell != NULL; cell = cell->next)
+ {
+ char *r = relpath;
+ char *v = cell->val;
+
+ while (*v != '\0' && *r == *v)
+ ++r, ++v;
+
+ if (*v == '\0' && (*r == '\0' || *r == '/'))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Helper function for manifest_files hash table.
+ */
+static uint32
+hash_string_pointer(char *s)
+{
+ unsigned char *ss = (unsigned char *) s;
+
+ return hash_bytes(ss, strlen(s));
+}
+
+/*
+ * Print out usage information and exit.
+ */
+static void
+usage(void)
+{
+ printf(_("%s verifies a backup against the backup manifest.\n\n"), progname);
+ printf(_("Usage:\n %s [OPTION]... BACKUPDIR\n\n"), progname);
+ printf(_("Options:\n"));
+ printf(_(" -e, --exit-on-error exit immediately on error\n"));
+ printf(_(" -i, --ignore=RELATIVE_PATH ignore indicated path\n"));
+ printf(_(" -m, --manifest=PATH use specified path for manifest\n"));
+ printf(_(" -n, --no-parse-wal do not try to parse WAL files\n"));
+ printf(_(" -s, --skip-checksums skip checksum verification\n"));
+ printf(_(" -w, --wal-directory=PATH use specified path for WAL files\n"));
+ printf(_(" -V, --version output version information, then exit\n"));
+ printf(_(" -?, --help show this help, then exit\n"));
+ printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
+ printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
+}
diff --git a/src/bin/pg_verifybackup/t/001_basic.pl b/src/bin/pg_verifybackup/t/001_basic.pl
new file mode 100644
index 00000000000..0d4d71aaa10
--- /dev/null
+++ b/src/bin/pg_verifybackup/t/001_basic.pl
@@ -0,0 +1,30 @@
+use strict;
+use warnings;
+use TestLib;
+use Test::More tests => 16;
+
+my $tempdir = TestLib::tempdir;
+
+program_help_ok('pg_verifybackup');
+program_version_ok('pg_verifybackup');
+program_options_handling_ok('pg_verifybackup');
+
+command_fails_like(['pg_verifybackup'],
+ qr/no backup directory specified/,
+ 'target directory must be specified');
+command_fails_like(['pg_verifybackup', $tempdir],
+ qr/could not open file.*\/backup_manifest\"/,
+ 'pg_verifybackup requires a manifest');
+command_fails_like(['pg_verifybackup', $tempdir, $tempdir],
+ qr/too many command-line arguments/,
+ 'multiple target directories not allowed');
+
+# create fake manifest file
+open(my $fh, '>', "$tempdir/backup_manifest") || die "open: $!";
+close($fh);
+
+# but then try to use an alternate, nonexisting manifest
+command_fails_like(['pg_verifybackup', '-m', "$tempdir/not_the_manifest",
+ $tempdir],
+ qr/could not open file.*\/not_the_manifest\"/,
+ 'pg_verifybackup respects -m flag');
diff --git a/src/bin/pg_verifybackup/t/002_algorithm.pl b/src/bin/pg_verifybackup/t/002_algorithm.pl
new file mode 100644
index 00000000000..ee82dcee376
--- /dev/null
+++ b/src/bin/pg_verifybackup/t/002_algorithm.pl
@@ -0,0 +1,58 @@
+# Verify that we can take and verify backups with various checksum types.
+
+use strict;
+use warnings;
+use Cwd;
+use Config;
+use File::Path qw(rmtree);
+use PostgresNode;
+use TestLib;
+use Test::More tests => 19;
+
+my $master = get_new_node('master');
+$master->init(allows_streaming => 1);
+$master->start;
+
+for my $algorithm (qw(bogus none crc32c sha224 sha256 sha384 sha512))
+{
+ my $backup_path = $master->backup_dir . '/' . $algorithm;
+ my @backup = ('pg_basebackup', '-D', $backup_path,
+ '--manifest-checksums', $algorithm,
+ '--no-sync');
+ my @verify = ('pg_verifybackup', '-e', $backup_path);
+
+ # A backup with a bogus algorithm should fail.
+ if ($algorithm eq 'bogus')
+ {
+ $master->command_fails(\@backup,
+ "backup fails with algorithm \"$algorithm\"");
+ next;
+ }
+
+ # A backup with a valid algorithm should work.
+ $master->command_ok(\@backup, "backup ok with algorithm \"$algorithm\"");
+
+ # We expect each real checksum algorithm to be mentioned on every line of
+ # the backup manifest file except the first and last; for simplicity, we
+ # just check that it shows up lots of times. When the checksum algorithm
+ # is none, we just check that the manifest exists.
+ if ($algorithm eq 'none')
+ {
+ ok(-f "$backup_path/backup_manifest", "backup manifest exists");
+ }
+ else
+ {
+ my $manifest = slurp_file("$backup_path/backup_manifest");
+ my $count_of_algorithm_in_manifest =
+ (() = $manifest =~ /$algorithm/mig);
+ cmp_ok($count_of_algorithm_in_manifest, '>', 100,
+ "$algorithm is mentioned many times in the manifest");
+ }
+
+ # Make sure that it verifies OK.
+ $master->command_ok(\@verify,
+ "verify backup with algorithm \"$algorithm\"");
+
+ # Remove backup immediately to save disk space.
+ rmtree($backup_path);
+}
diff --git a/src/bin/pg_verifybackup/t/003_corruption.pl b/src/bin/pg_verifybackup/t/003_corruption.pl
new file mode 100644
index 00000000000..113959420ee
--- /dev/null
+++ b/src/bin/pg_verifybackup/t/003_corruption.pl
@@ -0,0 +1,288 @@
+# Verify that various forms of corruption are detected by pg_verifybackup.
+
+use strict;
+use warnings;
+use Cwd;
+use Config;
+use File::Path qw(rmtree);
+use PostgresNode;
+use TestLib;
+use Test::More tests => 44;
+
+my $master = get_new_node('master');
+$master->init(allows_streaming => 1);
+$master->start;
+
+# Include a user-defined tablespace in the hopes of detecting problems in that
+# area.
+my $source_ts_path = TestLib::perl2host(TestLib::tempdir_short());
+my $source_ts_prefix = $source_ts_path;
+$source_ts_prefix =~ s!(^[A-Z]:/[^/]*)/.*!$1!;
+
+$master->safe_psql('postgres', < 'extra_file',
+ 'mutilate' => \&mutilate_extra_file,
+ 'fails_like' =>
+ qr/extra_file.*present on disk but not in the manifest/
+ },
+ {
+ 'name' => 'extra_tablespace_file',
+ 'mutilate' => \&mutilate_extra_tablespace_file,
+ 'fails_like' =>
+ qr/extra_ts_file.*present on disk but not in the manifest/
+ },
+ {
+ 'name' => 'missing_file',
+ 'mutilate' => \&mutilate_missing_file,
+ 'fails_like' =>
+ qr/pg_xact\/0000.*present in the manifest but not on disk/
+ },
+ {
+ 'name' => 'missing_tablespace',
+ 'mutilate' => \&mutilate_missing_tablespace,
+ 'fails_like' =>
+ qr/pg_tblspc.*present in the manifest but not on disk/
+ },
+ {
+ 'name' => 'append_to_file',
+ 'mutilate' => \&mutilate_append_to_file,
+ 'fails_like' =>
+ qr/has size \d+ on disk but size \d+ in the manifest/
+ },
+ {
+ 'name' => 'truncate_file',
+ 'mutilate' => \&mutilate_truncate_file,
+ 'fails_like' =>
+ qr/has size 0 on disk but size \d+ in the manifest/
+ },
+ {
+ 'name' => 'replace_file',
+ 'mutilate' => \&mutilate_replace_file,
+ 'fails_like' => qr/checksum mismatch for file/
+ },
+ {
+ 'name' => 'bad_manifest',
+ 'mutilate' => \&mutilate_bad_manifest,
+ 'fails_like' => qr/manifest checksum mismatch/
+ },
+ {
+ 'name' => 'open_file_fails',
+ 'mutilate' => \&mutilate_open_file_fails,
+ 'fails_like' => qr/could not open file/,
+ 'skip_on_windows' => 1
+ },
+ {
+ 'name' => 'open_directory_fails',
+ 'mutilate' => \&mutilate_open_directory_fails,
+ 'cleanup' => \&cleanup_open_directory_fails,
+ 'fails_like' => qr/could not open directory/,
+ 'skip_on_windows' => 1
+ },
+ {
+ 'name' => 'search_directory_fails',
+ 'mutilate' => \&mutilate_search_directory_fails,
+ 'cleanup' => \&cleanup_search_directory_fails,
+ 'fails_like' => qr/could not stat file or directory/,
+ 'skip_on_windows' => 1
+ }
+);
+
+for my $scenario (@scenario)
+{
+ my $name = $scenario->{'name'};
+
+ SKIP:
+ {
+ skip "unix-style permissions not supported on Windows", 4
+ if $scenario->{'skip_on_windows'} && $windows_os;
+
+ # Take a backup and check that it verifies OK.
+ my $backup_path = $master->backup_dir . '/' . $name;
+ my $backup_ts_path = TestLib::perl2host(TestLib::tempdir_short());
+ # The tablespace map parameter confuses Msys2, which tries to mangle
+ # it. Tell it not to.
+ # See https://www.msys2.org/wiki/Porting/#filesystem-namespaces
+ local $ENV{MSYS2_ARG_CONV_EXCL} = $source_ts_prefix;
+ $master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync',
+ '-T', "${source_ts_path}=${backup_ts_path}"],
+ "base backup ok");
+ command_ok(['pg_verifybackup', $backup_path ],
+ "intact backup verified");
+
+ # Mutilate the backup in some way.
+ $scenario->{'mutilate'}->($backup_path);
+
+ # Now check that the backup no longer verifies.
+ command_fails_like(['pg_verifybackup', $backup_path ],
+ $scenario->{'fails_like'},
+ "corrupt backup fails verification: $name");
+
+ # Run cleanup hook, if provided.
+ $scenario->{'cleanup'}->($backup_path)
+ if exists $scenario->{'cleanup'};
+
+ # Finally, use rmtree to reclaim space.
+ rmtree($backup_path);
+ }
+}
+
+sub create_extra_file
+{
+ my ($backup_path, $relative_path) = @_;
+ my $pathname = "$backup_path/$relative_path";
+ open(my $fh, '>', $pathname) || die "open $pathname: $!";
+ print $fh "This is an extra file.\n";
+ close($fh);
+ return;
+}
+
+# Add a file into the root directory of the backup.
+sub mutilate_extra_file
+{
+ my ($backup_path) = @_;
+ create_extra_file($backup_path, "extra_file");
+ return;
+}
+
+# Add a file inside the user-defined tablespace.
+sub mutilate_extra_tablespace_file
+{
+ my ($backup_path) = @_;
+ my ($tsoid) = grep { $_ ne '.' && $_ ne '..' }
+ slurp_dir("$backup_path/pg_tblspc");
+ my ($catvdir) = grep { $_ ne '.' && $_ ne '..' }
+ slurp_dir("$backup_path/pg_tblspc/$tsoid");
+ my ($tsdboid) = grep { $_ ne '.' && $_ ne '..' }
+ slurp_dir("$backup_path/pg_tblspc/$tsoid/$catvdir");
+ create_extra_file($backup_path,
+ "pg_tblspc/$tsoid/$catvdir/$tsdboid/extra_ts_file");
+ return;
+}
+
+# Remove a file.
+sub mutilate_missing_file
+{
+ my ($backup_path) = @_;
+ my $pathname = "$backup_path/pg_xact/0000";
+ unlink($pathname) || die "$pathname: $!";
+ return;
+}
+
+# Remove the symlink to the user-defined tablespace.
+sub mutilate_missing_tablespace
+{
+ my ($backup_path) = @_;
+ my ($tsoid) = grep { $_ ne '.' && $_ ne '..' }
+ slurp_dir("$backup_path/pg_tblspc");
+ my $pathname = "$backup_path/pg_tblspc/$tsoid";
+ if ($windows_os)
+ {
+ # rmdir works on some windows setups, unlink on others.
+ # Instead of trying to implement precise rules, just try one and then
+ # the other.
+ unless (rmdir($pathname))
+ {
+ my $err = $!;
+ unlink($pathname) || die "$pathname: rmdir: $err, unlink: $!";
+ }
+ }
+ else
+ {
+ unlink($pathname) || die "$pathname: $!";
+ }
+ return;
+}
+
+# Append an additional bytes to a file.
+sub mutilate_append_to_file
+{
+ my ($backup_path) = @_;
+ append_to_file "$backup_path/global/pg_control", 'x';
+ return;
+}
+
+# Truncate a file to zero length.
+sub mutilate_truncate_file
+{
+ my ($backup_path) = @_;
+ my $pathname = "$backup_path/global/pg_control";
+ open(my $fh, '>', $pathname) || die "open $pathname: $!";
+ close($fh);
+ return;
+}
+
+# Replace a file's contents without changing the length of the file. This is
+# not a particularly efficient way to do this, so we pick a file that's
+# expected to be short.
+sub mutilate_replace_file
+{
+ my ($backup_path) = @_;
+ my $pathname = "$backup_path/PG_VERSION";
+ my $contents = slurp_file($pathname);
+ open(my $fh, '>', $pathname) || die "open $pathname: $!";
+ print $fh 'q' x length($contents);
+ close($fh);
+ return;
+}
+
+# Corrupt the backup manifest.
+sub mutilate_bad_manifest
+{
+ my ($backup_path) = @_;
+ append_to_file "$backup_path/backup_manifest", "\n";
+ return;
+}
+
+# Create a file that can't be opened. (This is skipped on Windows.)
+sub mutilate_open_file_fails
+{
+ my ($backup_path) = @_;
+ my $pathname = "$backup_path/PG_VERSION";
+ chmod(0, $pathname) || die "chmod $pathname: $!";
+ return;
+}
+
+# Create a directory that can't be opened. (This is skipped on Windows.)
+sub mutilate_open_directory_fails
+{
+ my ($backup_path) = @_;
+ my $pathname = "$backup_path/pg_subtrans";
+ chmod(0, $pathname) || die "chmod $pathname: $!";
+ return;
+}
+
+# restore permissions on the unreadable directory we created.
+sub cleanup_open_directory_fails
+{
+ my ($backup_path) = @_;
+ my $pathname = "$backup_path/pg_subtrans";
+ chmod(0700, $pathname) || die "chmod $pathname: $!";
+ return;
+}
+
+# Create a directory that can't be searched. (This is skipped on Windows.)
+sub mutilate_search_directory_fails
+{
+ my ($backup_path) = @_;
+ my $pathname = "$backup_path/base";
+ chmod(0400, $pathname) || die "chmod $pathname: $!";
+ return;
+}
+
+# rmtree can't cope with a mode 400 directory, so change back to 700.
+sub cleanup_search_directory_fails
+{
+ my ($backup_path) = @_;
+ my $pathname = "$backup_path/base";
+ chmod(0700, $pathname) || die "chmod $pathname: $!";
+ return;
+}
diff --git a/src/bin/pg_verifybackup/t/004_options.pl b/src/bin/pg_verifybackup/t/004_options.pl
new file mode 100644
index 00000000000..9bae8eb565b
--- /dev/null
+++ b/src/bin/pg_verifybackup/t/004_options.pl
@@ -0,0 +1,89 @@
+# Verify the behavior of assorted pg_verifybackup options.
+
+use strict;
+use warnings;
+use Cwd;
+use Config;
+use File::Path qw(rmtree);
+use PostgresNode;
+use TestLib;
+use Test::More tests => 25;
+
+# Start up the server and take a backup.
+my $master = get_new_node('master');
+$master->init(allows_streaming => 1);
+$master->start;
+my $backup_path = $master->backup_dir . '/test_options';
+$master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync' ],
+ "base backup ok");
+
+# Verify that pg_verifybackup -q succeeds and produces no output.
+my $stdout;
+my $stderr;
+my $result = IPC::Run::run ['pg_verifybackup', '-q', $backup_path ],
+ '>', \$stdout, '2>', \$stderr;
+ok($result, "-q succeeds: exit code 0");
+is($stdout, '', "-q succeeds: no stdout");
+is($stderr, '', "-q succeeds: no stderr");
+
+# Corrupt the PG_VERSION file.
+my $version_pathname = "$backup_path/PG_VERSION";
+my $version_contents = slurp_file($version_pathname);
+open(my $fh, '>', $version_pathname) || die "open $version_pathname: $!";
+print $fh 'q' x length($version_contents);
+close($fh);
+
+# Verify that pg_verifybackup -q now fails.
+command_fails_like(['pg_verifybackup', '-q', $backup_path ],
+ qr/checksum mismatch for file \"PG_VERSION\"/,
+ '-q checksum mismatch');
+
+# Since we didn't change the length of the file, verification should succeed
+# if we ignore checksums. Check that we get the right message, too.
+command_like(['pg_verifybackup', '-s', $backup_path ],
+ qr/backup successfully verified/,
+ '-s skips checksumming');
+
+# Validation should succeed if we ignore the problem file.
+command_like(['pg_verifybackup', '-i', 'PG_VERSION', $backup_path ],
+ qr/backup successfully verified/,
+ '-i ignores problem file');
+
+# PG_VERSION is already corrupt; let's try also removing all of pg_xact.
+rmtree($backup_path . "/pg_xact");
+
+# We're ignoring the problem with PG_VERSION, but not the problem with
+# pg_xact, so verification should fail here.
+command_fails_like(['pg_verifybackup', '-i', 'PG_VERSION', $backup_path ],
+ qr/pg_xact.*is present in the manifest but not on disk/,
+ '-i does not ignore all problems');
+
+# If we use -i twice, we should be able to ignore all of the problems.
+command_like(['pg_verifybackup', '-i', 'PG_VERSION', '-i', 'pg_xact',
+ $backup_path ],
+ qr/backup successfully verified/,
+ 'multiple -i options work');
+
+# Verify that when -i is not used, both problems are reported.
+$result = IPC::Run::run ['pg_verifybackup', $backup_path ],
+ '>', \$stdout, '2>', \$stderr;
+ok(!$result, "multiple problems: fails");
+like($stderr, qr/pg_xact.*is present in the manifest but not on disk/,
+ "multiple problems: missing files reported");
+like($stderr, qr/checksum mismatch for file \"PG_VERSION\"/,
+ "multiple problems: checksum mismatch reported");
+
+# Verify that when -e is used, only the problem detected first is reported.
+$result = IPC::Run::run ['pg_verifybackup', '-e', $backup_path ],
+ '>', \$stdout, '2>', \$stderr;
+ok(!$result, "-e reports 1 error: fails");
+like($stderr, qr/pg_xact.*is present in the manifest but not on disk/,
+ "-e reports 1 error: missing files reported");
+unlike($stderr, qr/checksum mismatch for file \"PG_VERSION\"/,
+ "-e reports 1 error: checksum mismatch not reported");
+
+# Test valid manifest with nonexistent backup directory.
+command_fails_like(['pg_verifybackup', '-m', "$backup_path/backup_manifest",
+ "$backup_path/fake" ],
+ qr/could not open directory/,
+ 'nonexistent backup directory');
diff --git a/src/bin/pg_verifybackup/t/005_bad_manifest.pl b/src/bin/pg_verifybackup/t/005_bad_manifest.pl
new file mode 100644
index 00000000000..3dd2b5a20df
--- /dev/null
+++ b/src/bin/pg_verifybackup/t/005_bad_manifest.pl
@@ -0,0 +1,204 @@
+# Test the behavior of pg_verifybackup when the backup manifest has
+# problems.
+
+use strict;
+use warnings;
+use Cwd;
+use Config;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 58;
+
+my $tempdir = TestLib::tempdir;
+
+test_bad_manifest('input string ended unexpectedly',
+ qr/could not parse backup manifest: The input string ended unexpectedly/,
+ <', "$tempdir/backup_manifest") || die "open: $!";
+ print $fh $manifest_contents;
+ close($fh);
+
+ command_fails_like(['pg_verifybackup', $tempdir], $regexp,
+ $test_name);
+ return;
+}
diff --git a/src/bin/pg_verifybackup/t/006_encoding.pl b/src/bin/pg_verifybackup/t/006_encoding.pl
new file mode 100644
index 00000000000..3c6b57adcd4
--- /dev/null
+++ b/src/bin/pg_verifybackup/t/006_encoding.pl
@@ -0,0 +1,27 @@
+# Verify that pg_verifybackup handles hex-encoded filenames correctly.
+
+use strict;
+use warnings;
+use Cwd;
+use Config;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 5;
+
+my $master = get_new_node('master');
+$master->init(allows_streaming => 1);
+$master->start;
+my $backup_path = $master->backup_dir . '/test_encoding';
+$master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync',
+ '--manifest-force-encode' ],
+ "backup ok with forced hex encoding");
+
+my $manifest = slurp_file("$backup_path/backup_manifest");
+my $count_of_encoded_path_in_manifest =
+ (() = $manifest =~ /Encoded-Path/mig);
+cmp_ok($count_of_encoded_path_in_manifest, '>', 100,
+ "many paths are encoded in the manifest");
+
+command_like(['pg_verifybackup', '-s', $backup_path ],
+ qr/backup successfully verified/,
+ 'backup with forced encoding verified');
diff --git a/src/bin/pg_verifybackup/t/007_wal.pl b/src/bin/pg_verifybackup/t/007_wal.pl
new file mode 100644
index 00000000000..5e891d1b6f4
--- /dev/null
+++ b/src/bin/pg_verifybackup/t/007_wal.pl
@@ -0,0 +1,55 @@
+# Test pg_verifybackup's WAL verification.
+
+use strict;
+use warnings;
+use Cwd;
+use Config;
+use File::Path qw(rmtree);
+use PostgresNode;
+use TestLib;
+use Test::More tests => 7;
+
+# Start up the server and take a backup.
+my $master = get_new_node('master');
+$master->init(allows_streaming => 1);
+$master->start;
+my $backup_path = $master->backup_dir . '/test_wal';
+$master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync' ],
+ "base backup ok");
+
+# Rename pg_wal.
+my $original_pg_wal = $backup_path . '/pg_wal';
+my $relocated_pg_wal = $master->backup_dir . '/relocated_pg_wal';
+rename($original_pg_wal, $relocated_pg_wal) || die "rename pg_wal: $!";
+
+# WAL verification should fail.
+command_fails_like(['pg_verifybackup', $backup_path ],
+ qr/WAL parsing failed for timeline 1/,
+ 'missing pg_wal causes failure');
+
+# Should work if we skip WAL verification.
+command_ok(['pg_verifybackup', '-n', $backup_path ],
+ 'missing pg_wal OK if not verifying WAL');
+
+# Should also work if we specify the correct WAL location.
+command_ok(['pg_verifybackup', '-w', $relocated_pg_wal, $backup_path ],
+ '-w can be used to specify WAL directory');
+
+# Move directory back to original location.
+rename($relocated_pg_wal, $original_pg_wal) || die "rename pg_wal back: $!";
+
+# Get a list of files in that directory that look like WAL files.
+my @walfiles = grep { /^[0-9A-F]{24}$/ } slurp_dir($original_pg_wal);
+
+# Replace the contents of one of the files with garbage of equal length.
+my $wal_corruption_target = $original_pg_wal . '/' . $walfiles[0];
+my $wal_size = -s $wal_corruption_target;
+open(my $fh, '>', $wal_corruption_target)
+ || die "open $wal_corruption_target: $!";
+print $fh 'w' x $wal_size;
+close($fh);
+
+# WAL verification should fail.
+command_fails_like(['pg_verifybackup', $backup_path ],
+ qr/WAL parsing failed for timeline 1/,
+ 'corrupt WAL file causes failure');
--
cgit v1.2.3