Have pg_rewind run crash recovery before rewinding
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Fri, 27 Sep 2019 19:40:01 +0000 (16:40 -0300)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Fri, 27 Sep 2019 19:40:01 +0000 (16:40 -0300)
If we don't do this, the rewind fails if the server wasn't cleanly shut
down, which seems unhelpful serving no purpose.

Also provide a new option --no-ensure-shutdown to suppress this
behavior, for alleged advanced usage that prefers to avoid the crash
recovery.

Authors: Paul Guo, Jimmy Yih, Ashwin Agrawal
Reviewed-by: Álvaro Herrera
Discussion: https://postgr.es/m/CAEET0ZEffUkXc48pg2iqARQgGRYDiiVxDu+yYek_bTwJF+q=Uw@mail.gmail.com

doc/src/sgml/ref/pg_rewind.sgml
src/bin/pg_rewind/pg_rewind.c

index ac142d22fcd11ad5d1022c0ea6ccacf87bbf790a..a06e5ac5e11b8b0ff716370c5077acaa7f74aac5 100644 (file)
@@ -165,6 +165,21 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--no-ensure-shutdown</option></term>
+      <listitem>
+       <para>
+        <application>pg_rewind</application> verifies that the target server
+        is cleanly shutdown before rewinding; by default, if it isn't, it
+        starts the server in single-user mode to complete crash recovery.
+        By passing this option, <application>pg_rewind</application> skips
+        this and errors out immediately if the server is not cleanly shut
+        down.  Users are expected to handle the situation themselves in that
+        case.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-n</option></term>
       <term><option>--dry-run</option></term>
index 15e3eab55050c6328a82e4f01f42f11254fdf321..8cb0d726cfeef5a552c1385ae508735ebd1c7192 100644 (file)
@@ -40,6 +40,7 @@ static void digestControlFile(ControlFileData *ControlFile, char *source,
 static void syncTargetDirectory(void);
 static void sanityChecks(void);
 static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex);
+static void ensureCleanShutdown(const char *argv0);
 
 static ControlFileData ControlFile_target;
 static ControlFileData ControlFile_source;
@@ -79,6 +80,7 @@ usage(const char *progname)
    printf(_("  -N, --no-sync                  do not wait for changes to be written\n"
             "                                 safely to disk\n"));
    printf(_("  -P, --progress                 write progress messages\n"));
+   printf(_("      --no-ensure-shutdown       do not automatically fix unclean shutdown\n"));
    printf(_("      --debug                    write a lot of debug messages\n"));
    printf(_("  -V, --version                  output version information, then exit\n"));
    printf(_("  -?, --help                     show this help, then exit\n"));
@@ -94,6 +96,7 @@ main(int argc, char **argv)
        {"target-pgdata", required_argument, NULL, 'D'},
        {"source-pgdata", required_argument, NULL, 1},
        {"source-server", required_argument, NULL, 2},
+       {"no-ensure-shutdown", no_argument, NULL, 44},
        {"version", no_argument, NULL, 'V'},
        {"dry-run", no_argument, NULL, 'n'},
        {"no-sync", no_argument, NULL, 'N'},
@@ -110,6 +113,7 @@ main(int argc, char **argv)
    XLogRecPtr  chkptredo;
    size_t      size;
    char       *buffer;
+   bool        no_ensure_shutdown = false;
    bool        rewind_needed;
    XLogRecPtr  endrec;
    TimeLineID  endtli;
@@ -169,6 +173,9 @@ main(int argc, char **argv)
            case 2:             /* --source-server */
                connstr_source = pg_strdup(optarg);
                break;
+           case 4:
+               no_ensure_shutdown = true;
+               break;
        }
    }
 
@@ -241,6 +248,24 @@ main(int argc, char **argv)
    digestControlFile(&ControlFile_target, buffer, size);
    pg_free(buffer);
 
+   /*
+    * If the target instance was not cleanly shut down, run a single-user
+    * postgres session really quickly and reload the control file to get the
+    * new state. Note if no_ensure_shutdown is specified, pg_rewind won't do
+    * that automatically. That means users need to do themselves in advance,
+    * else pg_rewind will soon quit, see sanityChecks().
+    */
+   if (!no_ensure_shutdown &&
+       ControlFile_target.state != DB_SHUTDOWNED &&
+       ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
+   {
+       ensureCleanShutdown(argv[0]);
+
+       buffer = slurpFile(datadir_target, "global/pg_control", &size);
+       digestControlFile(&ControlFile_target, buffer, size);
+       pg_free(buffer);
+   }
+
    buffer = fetchFile("global/pg_control", &size);
    digestControlFile(&ControlFile_source, buffer, size);
    pg_free(buffer);
@@ -748,3 +773,58 @@ syncTargetDirectory(void)
 
    fsync_pgdata(datadir_target, PG_VERSION_NUM);
 }
+
+/*
+ * Ensure clean shutdown of target instance by launching single-user mode
+ * postgres to do crash recovery.
+ */
+static void
+ensureCleanShutdown(const char *argv0)
+{
+   int         ret;
+#define MAXCMDLEN (2 * MAXPGPATH)
+   char        exec_path[MAXPGPATH];
+   char        cmd[MAXCMDLEN];
+
+   /* locate postgres binary */
+   if ((ret = find_other_exec(argv0, "postgres",
+                              PG_BACKEND_VERSIONSTR,
+                              exec_path)) < 0)
+   {
+       char        full_path[MAXPGPATH];
+
+       if (find_my_exec(argv0, full_path) < 0)
+           strlcpy(full_path, progname, sizeof(full_path));
+
+       if (ret == -1)
+           pg_fatal("The program \"%s\" is needed by %s but was\n"
+                    "not found in the same directory as \"%s\".\n"
+                    "Check your installation.",
+                    "postgres", progname, full_path);
+       else
+           pg_fatal("The program \"%s\" was found by \"%s\" but was\n"
+                    "not the same version as %s.\n"
+                    "Check your installation.",
+                    "postgres", full_path, progname);
+   }
+
+   pg_log_info("executing \"%s\" for target server to complete crash recovery",
+               exec_path);
+
+   /*
+    * Skip processing if requested, but only after ensuring presence of
+    * postgres.
+    */
+   if (dry_run)
+       return;
+
+   /* finally run postgres in single-user mode */
+   snprintf(cmd, MAXCMDLEN, "\"%s\" --single -D \"%s\" template1 < \"%s\"",
+            exec_path, datadir_target, DEVNULL);
+
+   if (system(cmd) != 0)
+   {
+       pg_log_error("postgres single-user mode of target instance failed");
+       pg_fatal("Command was: %s", cmd);
+   }
+}