Avoid unlikely data-loss scenarios due to rename() without fsync.
authorAndres Freund <andres@anarazel.de>
Thu, 10 Mar 2016 02:53:54 +0000 (18:53 -0800)
committerAndres Freund <andres@anarazel.de>
Thu, 10 Mar 2016 02:53:54 +0000 (18:53 -0800)
Renaming a file using rename(2) is not guaranteed to be durable in face
of crashes. Use the previously added durable_rename()/durable_link_or_rename()
in various places where we previously just renamed files.

Most of the changed call sites are arguably not critical, but it seems
better to err on the side of too much durability.  The most prominent
known case where the previously missing fsyncs could cause data loss is
crashes at the end of a checkpoint. After the actual checkpoint has been
performed, old WAL files are recycled. When they're filled, their
contents are fdatasynced, but we did not fsync the containing
directory. An OS/hardware crash in an unfortunate moment could then end
up leaving that file with its old name, but new content; WAL replay
would thus not replay it.

Reported-By: Tomas Vondra
Author: Michael Paquier, Tomas Vondra, Andres Freund
Discussion: 56583BDD.9060302@2ndquadrant.com
Backpatch: All supported branches

contrib/pg_stat_statements/pg_stat_statements.c
src/backend/access/transam/timeline.c
src/backend/access/transam/xlog.c
src/backend/access/transam/xlogarchive.c
src/backend/postmaster/pgarch.c
src/backend/utils/misc/guc.c

index c3c744dae964205499af2b57114e57725871b869..c3ba6058fb05d7c1c9304e68864af722b0654aad 100644 (file)
@@ -735,11 +735,7 @@ pgss_shmem_shutdown(int code, Datum arg)
    /*
     * Rename file into place, so we atomically replace any old one.
     */
-   if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
-       ereport(LOG,
-               (errcode_for_file_access(),
-                errmsg("could not rename pg_stat_statement file \"%s\": %m",
-                       PGSS_DUMP_FILE ".tmp")));
+   (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
 
    /* Unlink query-texts file; it's not needed while shutdown */
    unlink(PGSS_TEXT_FILE);
index 50e64882b8cbe7d94dc8c5badc12eebe7837827f..4c1d4336c7afa513c3ece1d092ea5aa0f5af60a7 100644 (file)
@@ -418,24 +418,10 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
    TLHistoryFilePath(path, newTLI);
 
    /*
-    * Prefer link() to rename() here just to be really sure that we don't
-    * overwrite an existing file.  However, there shouldn't be one, so
-    * rename() is an acceptable substitute except for the truly paranoid.
+    * Perform the rename using link if available, paranoidly trying to avoid
+    * overwriting an existing file (there shouldn't be one).
     */
-#if HAVE_WORKING_LINK
-   if (link(tmppath, path) < 0)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not link file \"%s\" to \"%s\": %m",
-                       tmppath, path)));
-   unlink(tmppath);
-#else
-   if (rename(tmppath, path) < 0)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                       tmppath, path)));
-#endif
+   durable_link_or_rename(tmppath, path, ERROR);
 
    /* The history file can be archived immediately. */
    if (XLogArchivingActive())
@@ -508,24 +494,10 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
    TLHistoryFilePath(path, tli);
 
    /*
-    * Prefer link() to rename() here just to be really sure that we don't
-    * overwrite an existing logfile.  However, there shouldn't be one, so
-    * rename() is an acceptable substitute except for the truly paranoid.
+    * Perform the rename using link if available, paranoidly trying to avoid
+    * overwriting an existing file (there shouldn't be one).
     */
-#if HAVE_WORKING_LINK
-   if (link(tmppath, path) < 0)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not link file \"%s\" to \"%s\": %m",
-                       tmppath, path)));
-   unlink(tmppath);
-#else
-   if (rename(tmppath, path) < 0)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                       tmppath, path)));
-#endif
+   durable_link_or_rename(tmppath, path, ERROR);
 }
 
 /*
index 7d23d8e21c66d6ca4639e6f4bef9181c88ed3b50..7108bec69bb8103fce991c96787734266aa4fe0b 100644 (file)
@@ -3420,34 +3420,16 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
    }
 
    /*
-    * Prefer link() to rename() here just to be really sure that we don't
-    * overwrite an existing logfile.  However, there shouldn't be one, so
-    * rename() is an acceptable substitute except for the truly paranoid.
+    * Perform the rename using link if available, paranoidly trying to avoid
+    * overwriting an existing file (there shouldn't be one).
     */
-#if HAVE_WORKING_LINK
-   if (link(tmppath, path) < 0)
+   if (durable_link_or_rename(tmppath, path, LOG) != 0)
    {
        if (use_lock)
            LWLockRelease(ControlFileLock);
-       ereport(LOG,
-               (errcode_for_file_access(),
-                errmsg("could not link file \"%s\" to \"%s\" (initialization of log file): %m",
-                       tmppath, path)));
-       return false;
-   }
-   unlink(tmppath);
-#else
-   if (rename(tmppath, path) < 0)
-   {
-       if (use_lock)
-           LWLockRelease(ControlFileLock);
-       ereport(LOG,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file): %m",
-                       tmppath, path)));
+       /* durable_link_or_rename already emitted log message */
        return false;
    }
-#endif
 
    if (use_lock)
        LWLockRelease(ControlFileLock);
@@ -5428,11 +5410,7 @@ exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo)
     * re-enter archive recovery mode in a subsequent crash.
     */
    unlink(RECOVERY_COMMAND_DONE);
-   if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0)
-       ereport(FATAL,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                       RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE)));
+   durable_rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE, FATAL);
 
    ereport(LOG,
            (errmsg("archive recovery complete")));
@@ -6619,11 +6597,7 @@ StartupXLOG(void)
        if (haveBackupLabel)
        {
            unlink(BACKUP_LABEL_OLD);
-           if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) != 0)
-               ereport(FATAL,
-                       (errcode_for_file_access(),
-                        errmsg("could not rename file \"%s\" to \"%s\": %m",
-                               BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
+           durable_rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, FATAL);
        }
 
        /* Check that the GUCs used to generate the WAL allow recovery */
@@ -10705,7 +10679,7 @@ CancelBackup(void)
    /* remove leftover file from previously canceled backup if it exists */
    unlink(BACKUP_LABEL_OLD);
 
-   if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) == 0)
+   if (durable_rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, DEBUG1) == 0)
    {
        ereport(LOG,
                (errmsg("online backup mode canceled"),
index 7f7e614c1b5cd8185aa63c9962988e64cee5839f..1b502877be0010e83381e4bbc885c9e6728f546c 100644 (file)
@@ -469,11 +469,7 @@ KeepFileRestoredFromArchive(char *path, char *xlogfname)
        reload = true;
    }
 
-   if (rename(path, xlogfpath) < 0)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                       path, xlogfpath)));
+   durable_rename(path, xlogfpath, ERROR);
 
    /*
     * Create .done file forcibly to prevent the restored segment from being
@@ -576,12 +572,7 @@ XLogArchiveForceDone(const char *xlog)
    StatusFilePath(archiveReady, xlog, ".ready");
    if (stat(archiveReady, &stat_buf) == 0)
    {
-       if (rename(archiveReady, archiveDone) < 0)
-           ereport(WARNING,
-                   (errcode_for_file_access(),
-                    errmsg("could not rename file \"%s\" to \"%s\": %m",
-                           archiveReady, archiveDone)));
-
+       (void) durable_rename(archiveReady, archiveDone, WARNING);
        return;
    }
 
index 6a5c5b07136229a2727da2d3ab77f6701a0fd44f..7bc7abfef297afa4914352f1bd47c2664a3d9304 100644 (file)
@@ -753,9 +753,5 @@ pgarch_archiveDone(char *xlog)
 
    StatusFilePath(rlogready, xlog, ".ready");
    StatusFilePath(rlogdone, xlog, ".done");
-   if (rename(rlogready, rlogdone) < 0)
-       ereport(WARNING,
-               (errcode_for_file_access(),
-                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                       rlogready, rlogdone)));
+   (void) durable_rename(rlogready, rlogdone, WARNING);
 }
index 14d016d29a7fa995cc1f398f1dc52caf5f20fd8a..db0304fc18187ebc57c38ee6d0c7b070d6f66e00 100644 (file)
@@ -6817,11 +6817,7 @@ AlterSystemSetConfigFile(AlterSystemStmt *altersysstmt)
         * at worst it can lose the parameters set by last ALTER SYSTEM
         * command.
         */
-       if (rename(AutoConfTmpFileName, AutoConfFileName) < 0)
-           ereport(ERROR,
-                   (errcode_for_file_access(),
-                    errmsg("could not rename file \"%s\" to \"%s\": %m",
-                           AutoConfTmpFileName, AutoConfFileName)));
+       durable_rename(AutoConfTmpFileName, AutoConfFileName, ERROR);
    }
    PG_CATCH();
    {