Improve server code to read files as part of a base backup.
authorRobert Haas <rhaas@postgresql.org>
Wed, 17 Jun 2020 15:39:17 +0000 (11:39 -0400)
committerRobert Haas <rhaas@postgresql.org>
Wed, 17 Jun 2020 15:39:17 +0000 (11:39 -0400)
Don't use fread(), since that doesn't necessarily set errno. We could
use read() instead, but it's even better to use pg_pread(), which
allows us to avoid some extra calls to seek to the desired location in
the file.

Also, advertise a wait event while reading from a file, as we do for
most other places where we're reading data from files.

Patch by me, reviewed by Hamid Akhtar.

Discussion: http://postgr.es/m/CA+TgmobBw-3573vMosGj06r72ajHsYeKtksT_oTxH8XvTL7DxA@mail.gmail.com

doc/src/sgml/monitoring.sgml
src/backend/postmaster/pgstat.c
src/backend/replication/basebackup.c
src/include/pgstat.h

index 89662cc0a3673aa5973d23c1a9a7d4fb50eab860..dfa9d0d6410c8e773a662be0ef18438432a1a544 100644 (file)
@@ -1193,6 +1193,10 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
     </thead>
 
     <tbody>
+     <row>
+      <entry><literal>BaseBackupRead</literal></entry>
+      <entry>Waiting for base backup to read from a file.</entry>
+     </row>
      <row>
       <entry><literal>BufFileRead</literal></entry>
       <entry>Waiting for a read from a buffered file.</entry>
index e96134dac8aaee7f544a0874e8c0771d359105ce..c022597bc09ab9ac5a34f6d875203a5fa4427eff 100644 (file)
@@ -3931,6 +3931,9 @@ pgstat_get_wait_io(WaitEventIO w)
 
        switch (w)
        {
+               case WAIT_EVENT_BASEBACKUP_READ:
+                       event_name = "BaseBackupRead";
+                       break;
                case WAIT_EVENT_BUFFILE_READ:
                        event_name = "BufFileRead";
                        break;
index efcf1e6eb56a8530fee45ddf2659429b8a08ba21..096b0fcef0d1023951a41ad293cff6624fa88765 100644 (file)
@@ -81,6 +81,8 @@ static int    compareWalFileNames(const ListCell *a, const ListCell *b);
 static void throttle(size_t increment);
 static void update_basebackup_progress(int64 delta);
 static bool is_checksummed_file(const char *fullpath, const char *filename);
+static int     basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
+                                                                const char *filename, bool partial_read_ok);
 
 /* Was the backup currently in-progress initiated in recovery mode? */
 static bool backup_started_in_recovery = false;
@@ -98,18 +100,6 @@ static char *statrelpath = NULL;
  */
 #define THROTTLING_FREQUENCY   8
 
-/*
- * Checks whether we encountered any error in fread().  fread() doesn't give
- * any clue what has happened, so we check with ferror().  Also, neither
- * fread() nor ferror() set errno, so we just throw a generic error.
- */
-#define CHECK_FREAD_ERROR(fp, filename) \
-do { \
-       if (ferror(fp)) \
-               ereport(ERROR, \
-                               (errmsg("could not read from file \"%s\"", filename))); \
-} while (0)
-
 /* The actual number of bytes, transfer of which may cause sleep. */
 static uint64 throttling_sample;
 
@@ -600,7 +590,7 @@ perform_base_backup(basebackup_options *opt)
                foreach(lc, walFileList)
                {
                        char       *walFileName = (char *) lfirst(lc);
-                       FILE       *fp;
+                       int                     fd;
                        char            buf[TAR_SEND_SIZE];
                        size_t          cnt;
                        pgoff_t         len = 0;
@@ -608,8 +598,8 @@ perform_base_backup(basebackup_options *opt)
                        snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName);
                        XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
 
-                       fp = AllocateFile(pathbuf, "rb");
-                       if (fp == NULL)
+                       fd = OpenTransientFile(pathbuf, O_RDONLY | PG_BINARY);
+                       if (fd < 0)
                        {
                                int                     save_errno = errno;
 
@@ -626,7 +616,7 @@ perform_base_backup(basebackup_options *opt)
                                                 errmsg("could not open file \"%s\": %m", pathbuf)));
                        }
 
-                       if (fstat(fileno(fp), &statbuf) != 0)
+                       if (fstat(fd, &statbuf) != 0)
                                ereport(ERROR,
                                                (errcode_for_file_access(),
                                                 errmsg("could not stat file \"%s\": %m",
@@ -642,9 +632,10 @@ perform_base_backup(basebackup_options *opt)
                        /* send the WAL file itself */
                        _tarWriteHeader(pathbuf, NULL, &statbuf, false);
 
-                       while ((cnt = fread(buf, 1,
-                                                               Min(sizeof(buf), wal_segment_size - len),
-                                                               fp)) > 0)
+                       while ((cnt = basebackup_read_file(fd, buf,
+                                                                                          Min(sizeof(buf),
+                                                                                                  wal_segment_size - len),
+                                                                                          len, pathbuf, true)) > 0)
                        {
                                CheckXLogRemoved(segno, tli);
                                /* Send the chunk as a CopyData message */
@@ -660,8 +651,6 @@ perform_base_backup(basebackup_options *opt)
                                        break;
                        }
 
-                       CHECK_FREAD_ERROR(fp, pathbuf);
-
                        if (len != wal_segment_size)
                        {
                                CheckXLogRemoved(segno, tli);
@@ -676,7 +665,7 @@ perform_base_backup(basebackup_options *opt)
                         */
                        Assert(wal_segment_size % TAR_BLOCK_SIZE == 0);
 
-                       FreeFile(fp);
+                       CloseTransientFile(fd);
 
                        /*
                         * Mark file as archived, otherwise files can get archived again
@@ -1575,7 +1564,7 @@ sendFile(const char *readfilename, const char *tarfilename,
                 struct stat *statbuf, bool missing_ok, Oid dboid,
                 backup_manifest_info *manifest, const char *spcoid)
 {
-       FILE       *fp;
+       int                     fd;
        BlockNumber blkno = 0;
        bool            block_retry = false;
        char            buf[TAR_SEND_SIZE];
@@ -1594,8 +1583,8 @@ sendFile(const char *readfilename, const char *tarfilename,
 
        pg_checksum_init(&checksum_ctx, manifest->checksum_type);
 
-       fp = AllocateFile(readfilename, "rb");
-       if (fp == NULL)
+       fd = OpenTransientFile(readfilename, O_RDONLY | PG_BINARY);
+       if (fd < 0)
        {
                if (errno == ENOENT && missing_ok)
                        return false;
@@ -1637,8 +1626,27 @@ sendFile(const char *readfilename, const char *tarfilename,
                }
        }
 
-       while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
+       /*
+        * Loop until we read the amount of data the caller told us to expect. The
+        * file could be longer, if it was extended while we were sending it, but
+        * for a base backup we can ignore such extended data. It will be restored
+        * from WAL.
+        */
+       while (len < statbuf->st_size)
        {
+               /* Try to read some more data. */
+               cnt = basebackup_read_file(fd, buf,
+                                                                  Min(sizeof(buf), statbuf->st_size - len),
+                                                                  len, readfilename, true);
+
+               /*
+                * If we hit end-of-file, a concurrent truncation must have occurred.
+                * That's not an error condition, because WAL replay will fix things
+                * up.
+                */
+               if (cnt == 0)
+                       break;
+
                /*
                 * The checksums are verified at block level, so we iterate over the
                 * buffer in chunks of BLCKSZ, after making sure that
@@ -1689,16 +1697,15 @@ sendFile(const char *readfilename, const char *tarfilename,
                                                 */
                                                if (block_retry == false)
                                                {
-                                                       /* Reread the failed block */
-                                                       if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1)
-                                                       {
-                                                               ereport(ERROR,
-                                                                               (errcode_for_file_access(),
-                                                                                errmsg("could not fseek in file \"%s\": %m",
-                                                                                               readfilename)));
-                                                       }
+                                                       int                     reread_cnt;
 
-                                                       if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ)
+                                                       /* Reread the failed block */
+                                                       reread_cnt =
+                                                               basebackup_read_file(fd, buf + BLCKSZ * i,
+                                                                                                        BLCKSZ, len + BLCKSZ * i,
+                                                                                                        readfilename,
+                                                                                                        false);
+                                                       if (reread_cnt == 0)
                                                        {
                                                                /*
                                                                 * If we hit end-of-file, a concurrent
@@ -1708,24 +1715,8 @@ sendFile(const char *readfilename, const char *tarfilename,
                                                                 * code that handles that case. (We must fix
                                                                 * up cnt first, though.)
                                                                 */
-                                                               if (feof(fp))
-                                                               {
-                                                                       cnt = BLCKSZ * i;
-                                                                       break;
-                                                               }
-
-                                                               ereport(ERROR,
-                                                                               (errcode_for_file_access(),
-                                                                                errmsg("could not reread block %d of file \"%s\": %m",
-                                                                                               blkno, readfilename)));
-                                                       }
-
-                                                       if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1)
-                                                       {
-                                                               ereport(ERROR,
-                                                                               (errcode_for_file_access(),
-                                                                                errmsg("could not fseek in file \"%s\": %m",
-                                                                                               readfilename)));
+                                                               cnt = BLCKSZ * i;
+                                                               break;
                                                        }
 
                                                        /* Set flag so we know a retry was attempted */
@@ -1768,20 +1759,8 @@ sendFile(const char *readfilename, const char *tarfilename,
 
                len += cnt;
                throttle(cnt);
-
-               if (feof(fp) || len >= statbuf->st_size)
-               {
-                       /*
-                        * Reached end of file. The file could be longer, if it was
-                        * extended while we were sending it, but for a base backup we can
-                        * ignore such extended data. It will be restored from WAL.
-                        */
-                       break;
-               }
        }
 
-       CHECK_FREAD_ERROR(fp, readfilename);
-
        /* If the file was truncated while we were sending it, pad it with zeros */
        if (len < statbuf->st_size)
        {
@@ -1810,7 +1789,7 @@ sendFile(const char *readfilename, const char *tarfilename,
                update_basebackup_progress(pad);
        }
 
-       FreeFile(fp);
+       CloseTransientFile(fd);
 
        if (checksum_failures > 1)
        {
@@ -1996,3 +1975,35 @@ update_basebackup_progress(int64 delta)
 
        pgstat_progress_update_multi_param(nparam, index, val);
 }
+
+/*
+ * Read some data from a file, setting a wait event and reporting any error
+ * encountered.
+ *
+ * If partial_read_ok is false, also report an error if the number of bytes
+ * read is not equal to the number of bytes requested.
+ *
+ * Returns the number of bytes read.
+ */
+static int
+basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
+                                        const char *filename, bool partial_read_ok)
+{
+       int                     rc;
+
+       pgstat_report_wait_start(WAIT_EVENT_BASEBACKUP_READ);
+       rc = pg_pread(fd, buf, nbytes, offset);
+       pgstat_report_wait_end();
+
+       if (rc < 0)
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not read file \"%s\": %m", filename)));
+       if (!partial_read_ok && rc > 0 && rc != nbytes)
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not read file \"%s\": read %d of %zu",
+                                               filename, rc, nbytes)));
+
+       return rc;
+}
index c55dc1481ca5b4bbf3dd2acca621e06de6195c38..13872013823ec6bdbdaa87845ed911d583f68bab 100644 (file)
@@ -913,7 +913,8 @@ typedef enum
  */
 typedef enum
 {
-       WAIT_EVENT_BUFFILE_READ = PG_WAIT_IO,
+       WAIT_EVENT_BASEBACKUP_READ = PG_WAIT_IO,
+       WAIT_EVENT_BUFFILE_READ,
        WAIT_EVENT_BUFFILE_WRITE,
        WAIT_EVENT_CONTROL_FILE_READ,
        WAIT_EVENT_CONTROL_FILE_SYNC,