Improve server code to read files as part of a base backup.
authorRobert Haas <rhaas@postgresql.org>
Wed, 17 Jun 2020 15:39:17 +0000 (11:39 -0400)
committerRobert Haas <rhaas@postgresql.org>
Wed, 17 Jun 2020 15:39:17 +0000 (11:39 -0400)
Don't use fread(), since that doesn't necessarily set errno. We could
use read() instead, but it's even better to use pg_pread(), which
allows us to avoid some extra calls to seek to the desired location in
the file.

Also, advertise a wait event while reading from a file, as we do for
most other places where we're reading data from files.

Patch by me, reviewed by Hamid Akhtar.

Discussion: http://postgr.es/m/CA+TgmobBw-3573vMosGj06r72ajHsYeKtksT_oTxH8XvTL7DxA@mail.gmail.com

doc/src/sgml/monitoring.sgml
src/backend/postmaster/pgstat.c
src/backend/replication/basebackup.c
src/include/pgstat.h

index 89662cc0a3673aa5973d23c1a9a7d4fb50eab860..dfa9d0d6410c8e773a662be0ef18438432a1a544 100644 (file)
@@ -1193,6 +1193,10 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
     </thead>
 
     <tbody>
+     <row>
+      <entry><literal>BaseBackupRead</literal></entry>
+      <entry>Waiting for base backup to read from a file.</entry>
+     </row>
      <row>
       <entry><literal>BufFileRead</literal></entry>
       <entry>Waiting for a read from a buffered file.</entry>
index e96134dac8aaee7f544a0874e8c0771d359105ce..c022597bc09ab9ac5a34f6d875203a5fa4427eff 100644 (file)
@@ -3931,6 +3931,9 @@ pgstat_get_wait_io(WaitEventIO w)
 
    switch (w)
    {
+       case WAIT_EVENT_BASEBACKUP_READ:
+           event_name = "BaseBackupRead";
+           break;
        case WAIT_EVENT_BUFFILE_READ:
            event_name = "BufFileRead";
            break;
index efcf1e6eb56a8530fee45ddf2659429b8a08ba21..096b0fcef0d1023951a41ad293cff6624fa88765 100644 (file)
@@ -81,6 +81,8 @@ static int    compareWalFileNames(const ListCell *a, const ListCell *b);
 static void throttle(size_t increment);
 static void update_basebackup_progress(int64 delta);
 static bool is_checksummed_file(const char *fullpath, const char *filename);
+static int basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
+                                const char *filename, bool partial_read_ok);
 
 /* Was the backup currently in-progress initiated in recovery mode? */
 static bool backup_started_in_recovery = false;
@@ -98,18 +100,6 @@ static char *statrelpath = NULL;
  */
 #define THROTTLING_FREQUENCY   8
 
-/*
- * Checks whether we encountered any error in fread().  fread() doesn't give
- * any clue what has happened, so we check with ferror().  Also, neither
- * fread() nor ferror() set errno, so we just throw a generic error.
- */
-#define CHECK_FREAD_ERROR(fp, filename) \
-do { \
-   if (ferror(fp)) \
-       ereport(ERROR, \
-               (errmsg("could not read from file \"%s\"", filename))); \
-} while (0)
-
 /* The actual number of bytes, transfer of which may cause sleep. */
 static uint64 throttling_sample;
 
@@ -600,7 +590,7 @@ perform_base_backup(basebackup_options *opt)
        foreach(lc, walFileList)
        {
            char       *walFileName = (char *) lfirst(lc);
-           FILE       *fp;
+           int         fd;
            char        buf[TAR_SEND_SIZE];
            size_t      cnt;
            pgoff_t     len = 0;
@@ -608,8 +598,8 @@ perform_base_backup(basebackup_options *opt)
            snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName);
            XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
 
-           fp = AllocateFile(pathbuf, "rb");
-           if (fp == NULL)
+           fd = OpenTransientFile(pathbuf, O_RDONLY | PG_BINARY);
+           if (fd < 0)
            {
                int         save_errno = errno;
 
@@ -626,7 +616,7 @@ perform_base_backup(basebackup_options *opt)
                         errmsg("could not open file \"%s\": %m", pathbuf)));
            }
 
-           if (fstat(fileno(fp), &statbuf) != 0)
+           if (fstat(fd, &statbuf) != 0)
                ereport(ERROR,
                        (errcode_for_file_access(),
                         errmsg("could not stat file \"%s\": %m",
@@ -642,9 +632,10 @@ perform_base_backup(basebackup_options *opt)
            /* send the WAL file itself */
            _tarWriteHeader(pathbuf, NULL, &statbuf, false);
 
-           while ((cnt = fread(buf, 1,
-                               Min(sizeof(buf), wal_segment_size - len),
-                               fp)) > 0)
+           while ((cnt = basebackup_read_file(fd, buf,
+                                              Min(sizeof(buf),
+                                                  wal_segment_size - len),
+                                              len, pathbuf, true)) > 0)
            {
                CheckXLogRemoved(segno, tli);
                /* Send the chunk as a CopyData message */
@@ -660,8 +651,6 @@ perform_base_backup(basebackup_options *opt)
                    break;
            }
 
-           CHECK_FREAD_ERROR(fp, pathbuf);
-
            if (len != wal_segment_size)
            {
                CheckXLogRemoved(segno, tli);
@@ -676,7 +665,7 @@ perform_base_backup(basebackup_options *opt)
             */
            Assert(wal_segment_size % TAR_BLOCK_SIZE == 0);
 
-           FreeFile(fp);
+           CloseTransientFile(fd);
 
            /*
             * Mark file as archived, otherwise files can get archived again
@@ -1575,7 +1564,7 @@ sendFile(const char *readfilename, const char *tarfilename,
         struct stat *statbuf, bool missing_ok, Oid dboid,
         backup_manifest_info *manifest, const char *spcoid)
 {
-   FILE       *fp;
+   int         fd;
    BlockNumber blkno = 0;
    bool        block_retry = false;
    char        buf[TAR_SEND_SIZE];
@@ -1594,8 +1583,8 @@ sendFile(const char *readfilename, const char *tarfilename,
 
    pg_checksum_init(&checksum_ctx, manifest->checksum_type);
 
-   fp = AllocateFile(readfilename, "rb");
-   if (fp == NULL)
+   fd = OpenTransientFile(readfilename, O_RDONLY | PG_BINARY);
+   if (fd < 0)
    {
        if (errno == ENOENT && missing_ok)
            return false;
@@ -1637,8 +1626,27 @@ sendFile(const char *readfilename, const char *tarfilename,
        }
    }
 
-   while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
+   /*
+    * Loop until we read the amount of data the caller told us to expect. The
+    * file could be longer, if it was extended while we were sending it, but
+    * for a base backup we can ignore such extended data. It will be restored
+    * from WAL.
+    */
+   while (len < statbuf->st_size)
    {
+       /* Try to read some more data. */
+       cnt = basebackup_read_file(fd, buf,
+                                  Min(sizeof(buf), statbuf->st_size - len),
+                                  len, readfilename, true);
+
+       /*
+        * If we hit end-of-file, a concurrent truncation must have occurred.
+        * That's not an error condition, because WAL replay will fix things
+        * up.
+        */
+       if (cnt == 0)
+           break;
+
        /*
         * The checksums are verified at block level, so we iterate over the
         * buffer in chunks of BLCKSZ, after making sure that
@@ -1689,16 +1697,15 @@ sendFile(const char *readfilename, const char *tarfilename,
                         */
                        if (block_retry == false)
                        {
-                           /* Reread the failed block */
-                           if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1)
-                           {
-                               ereport(ERROR,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not fseek in file \"%s\": %m",
-                                               readfilename)));
-                           }
+                           int         reread_cnt;
 
-                           if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ)
+                           /* Reread the failed block */
+                           reread_cnt =
+                               basebackup_read_file(fd, buf + BLCKSZ * i,
+                                                    BLCKSZ, len + BLCKSZ * i,
+                                                    readfilename,
+                                                    false);
+                           if (reread_cnt == 0)
                            {
                                /*
                                 * If we hit end-of-file, a concurrent
@@ -1708,24 +1715,8 @@ sendFile(const char *readfilename, const char *tarfilename,
                                 * code that handles that case. (We must fix
                                 * up cnt first, though.)
                                 */
-                               if (feof(fp))
-                               {
-                                   cnt = BLCKSZ * i;
-                                   break;
-                               }
-
-                               ereport(ERROR,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not reread block %d of file \"%s\": %m",
-                                               blkno, readfilename)));
-                           }
-
-                           if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1)
-                           {
-                               ereport(ERROR,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not fseek in file \"%s\": %m",
-                                               readfilename)));
+                               cnt = BLCKSZ * i;
+                               break;
                            }
 
                            /* Set flag so we know a retry was attempted */
@@ -1768,20 +1759,8 @@ sendFile(const char *readfilename, const char *tarfilename,
 
        len += cnt;
        throttle(cnt);
-
-       if (feof(fp) || len >= statbuf->st_size)
-       {
-           /*
-            * Reached end of file. The file could be longer, if it was
-            * extended while we were sending it, but for a base backup we can
-            * ignore such extended data. It will be restored from WAL.
-            */
-           break;
-       }
    }
 
-   CHECK_FREAD_ERROR(fp, readfilename);
-
    /* If the file was truncated while we were sending it, pad it with zeros */
    if (len < statbuf->st_size)
    {
@@ -1810,7 +1789,7 @@ sendFile(const char *readfilename, const char *tarfilename,
        update_basebackup_progress(pad);
    }
 
-   FreeFile(fp);
+   CloseTransientFile(fd);
 
    if (checksum_failures > 1)
    {
@@ -1996,3 +1975,35 @@ update_basebackup_progress(int64 delta)
 
    pgstat_progress_update_multi_param(nparam, index, val);
 }
+
+/*
+ * Read some data from a file, setting a wait event and reporting any error
+ * encountered.
+ *
+ * If partial_read_ok is false, also report an error if the number of bytes
+ * read is not equal to the number of bytes requested.
+ *
+ * Returns the number of bytes read.
+ */
+static int
+basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
+                    const char *filename, bool partial_read_ok)
+{
+   int         rc;
+
+   pgstat_report_wait_start(WAIT_EVENT_BASEBACKUP_READ);
+   rc = pg_pread(fd, buf, nbytes, offset);
+   pgstat_report_wait_end();
+
+   if (rc < 0)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not read file \"%s\": %m", filename)));
+   if (!partial_read_ok && rc > 0 && rc != nbytes)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not read file \"%s\": read %d of %zu",
+                       filename, rc, nbytes)));
+
+   return rc;
+}
index c55dc1481ca5b4bbf3dd2acca621e06de6195c38..13872013823ec6bdbdaa87845ed911d583f68bab 100644 (file)
@@ -913,7 +913,8 @@ typedef enum
  */
 typedef enum
 {
-   WAIT_EVENT_BUFFILE_READ = PG_WAIT_IO,
+   WAIT_EVENT_BASEBACKUP_READ = PG_WAIT_IO,
+   WAIT_EVENT_BUFFILE_READ,
    WAIT_EVENT_BUFFILE_WRITE,
    WAIT_EVENT_CONTROL_FILE_READ,
    WAIT_EVENT_CONTROL_FILE_SYNC,