pg_resetxlog: add option to set oldest xid & use by pg_upgrade
authorBruce Momjian <bruce@momjian.us>
Tue, 27 Jul 2021 02:38:15 +0000 (22:38 -0400)
committerBruce Momjian <bruce@momjian.us>
Tue, 27 Jul 2021 02:38:15 +0000 (22:38 -0400)
Add pg_resetxlog -u option to set the oldest xid in pg_control.
Previously -x set this value be -2 billion less than the -x value.
However, this causes the server to immediately scan all relation's
relfrozenxid so it can advance pg_control's oldest xid to be inside the
autovacuum_freeze_max_age range, which is inefficient and might disrupt
diagnostic recovery.  pg_upgrade will use this option to better create
the new cluster to match the old cluster.

Reported-by: Jason Harvey, Floris Van Nee
Discussion: https://postgr.es/m/20190615183759.GB239428@rfd.leadboat.com, 87da83168c644fd9aae38f546cc70295@opammb0562.comp.optiver.com

Author: Bertrand Drouvot

Backpatch-through: 9.6

doc/src/sgml/ref/pg_resetwal.sgml
src/bin/pg_resetwal/pg_resetwal.c
src/bin/pg_upgrade/controldata.c
src/bin/pg_upgrade/pg_upgrade.c
src/bin/pg_upgrade/pg_upgrade.h

index b3b750e722a43fb8f0916ae01bf80186d257342d..3e4882cdc65dd82d1a1c548d530c7aa007b9ff40 100644 (file)
@@ -297,6 +297,26 @@ PostgreSQL documentation
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><option>-u <replaceable class="parameter">xid</replaceable></option></term>
+    <term><option>--oldest-transaction-id=<replaceable class="parameter">xid</replaceable></option></term>
+    <listitem>
+     <para>
+      Manually set the oldest unfrozen transaction ID.
+     </para>
+
+     <para>
+      A safe value can be determined by looking for the numerically smallest
+      file name in the directory <filename>pg_xact</filename> under the data directory
+      and then multiplying by 1048576 (0x100000).  Note that the file names are in
+      hexadecimal.  It is usually easiest to specify the option value in
+      hexadecimal too. For example, if <filename>0007</filename> is the smallest entry
+      in <filename>pg_xact</filename>, <literal>-u 0x700000</literal> will work (five
+      trailing zeroes provide the proper multiplier).
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><option>-x <replaceable class="parameter">xid</replaceable></option></term>
     <term><option>--next-transaction-id=<replaceable class="parameter">xid</replaceable></option></term>
index 2601f70a047ab3cca29c3fcc15187826fa353111..b79f70a60d4ef6456e2c41baecaa0f703ad0e919 100644 (file)
@@ -64,6 +64,7 @@ static XLogSegNo newXlogSegNo;        /* new XLOG segment # */
 static bool guessed = false;   /* T if we had to guess at any values */
 static const char *progname;
 static uint32 set_xid_epoch = (uint32) -1;
+static TransactionId set_oldest_xid = 0;
 static TransactionId set_xid = 0;
 static TransactionId set_oldest_commit_ts_xid = 0;
 static TransactionId set_newest_commit_ts_xid = 0;
@@ -101,6 +102,7 @@ main(int argc, char *argv[])
                {"dry-run", no_argument, NULL, 'n'},
                {"next-oid", required_argument, NULL, 'o'},
                {"multixact-offset", required_argument, NULL, 'O'},
+               {"oldest-transaction-id", required_argument, NULL, 'u'},
                {"next-transaction-id", required_argument, NULL, 'x'},
                {"wal-segsize", required_argument, NULL, 1},
                {NULL, 0, NULL, 0}
@@ -135,7 +137,7 @@ main(int argc, char *argv[])
        }
 
 
-       while ((c = getopt_long(argc, argv, "c:D:e:fl:m:no:O:x:", long_options, NULL)) != -1)
+       while ((c = getopt_long(argc, argv, "c:D:e:fl:m:no:O:u:x:", long_options, NULL)) != -1)
        {
                switch (c)
                {
@@ -168,6 +170,21 @@ main(int argc, char *argv[])
                                }
                                break;
 
+                       case 'u':
+                               set_oldest_xid = strtoul(optarg, &endptr, 0);
+                               if (endptr == optarg || *endptr != '\0')
+                               {
+                                       pg_log_error("invalid argument for option %s", "-u");
+                                       fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+                                       exit(1);
+                               }
+                               if (!TransactionIdIsNormal(set_oldest_xid))
+                               {
+                                       pg_log_error("oldest transaction ID (-u) must be greater or equal to %u", FirstNormalTransactionId);
+                                       exit(1);
+                               }
+                               break;
+
                        case 'x':
                                set_xid = strtoul(optarg, &endptr, 0);
                                if (endptr == optarg || *endptr != '\0')
@@ -176,9 +193,9 @@ main(int argc, char *argv[])
                                        fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
                                        exit(1);
                                }
-                               if (set_xid == 0)
+                               if (!TransactionIdIsNormal(set_xid))
                                {
-                                       pg_log_error("transaction ID (-x) must not be 0");
+                                       pg_log_error("transaction ID (-x) must be greater or equal to %u", FirstNormalTransactionId);
                                        exit(1);
                                }
                                break;
@@ -428,25 +445,17 @@ main(int argc, char *argv[])
                        FullTransactionIdFromEpochAndXid(set_xid_epoch,
                                                                                         XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid));
 
-       if (set_xid != 0)
+       if (set_oldest_xid != 0)
        {
+               ControlFile.checkPointCopy.oldestXid = set_oldest_xid;
+               ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
+       }
+
+       if (set_xid != 0)
                ControlFile.checkPointCopy.nextXid =
                        FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid),
                                                                                         set_xid);
 
-               /*
-                * For the moment, just set oldestXid to a value that will force
-                * immediate autovacuum-for-wraparound.  It's not clear whether adding
-                * user control of this is useful, so let's just do something that's
-                * reasonably safe.  The magic constant here corresponds to the
-                * maximum allowed value of autovacuum_freeze_max_age.
-                */
-               ControlFile.checkPointCopy.oldestXid = set_xid - 2000000000;
-               if (ControlFile.checkPointCopy.oldestXid < FirstNormalTransactionId)
-                       ControlFile.checkPointCopy.oldestXid += FirstNormalTransactionId;
-               ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
-       }
-
        if (set_oldest_commit_ts_xid != 0)
                ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid;
        if (set_newest_commit_ts_xid != 0)
@@ -1209,20 +1218,21 @@ usage(void)
        printf(_("Usage:\n  %s [OPTION]... DATADIR\n\n"), progname);
        printf(_("Options:\n"));
        printf(_("  -c, --commit-timestamp-ids=XID,XID\n"
-                        "                                 set oldest and newest transactions bearing\n"
-                        "                                 commit timestamp (zero means no change)\n"));
-       printf(_(" [-D, --pgdata=]DATADIR          data directory\n"));
-       printf(_("  -e, --epoch=XIDEPOCH           set next transaction ID epoch\n"));
-       printf(_("  -f, --force                    force update to be done\n"));
-       printf(_("  -l, --next-wal-file=WALFILE    set minimum starting location for new WAL\n"));
-       printf(_("  -m, --multixact-ids=MXID,MXID  set next and oldest multitransaction ID\n"));
-       printf(_("  -n, --dry-run                  no update, just show what would be done\n"));
-       printf(_("  -o, --next-oid=OID             set next OID\n"));
-       printf(_("  -O, --multixact-offset=OFFSET  set next multitransaction offset\n"));
-       printf(_("  -V, --version                  output version information, then exit\n"));
-       printf(_("  -x, --next-transaction-id=XID  set next transaction ID\n"));
-       printf(_("      --wal-segsize=SIZE         size of WAL segments, in megabytes\n"));
-       printf(_("  -?, --help                     show this help, then exit\n"));
+                        "                                   set oldest and newest transactions bearing\n"
+                        "                                   commit timestamp (zero means no change)\n"));
+       printf(_(" [-D, --pgdata=]DATADIR            data directory\n"));
+       printf(_("  -e, --epoch=XIDEPOCH             set next transaction ID epoch\n"));
+       printf(_("  -f, --force                      force update to be done\n"));
+       printf(_("  -l, --next-wal-file=WALFILE      set minimum starting location for new WAL\n"));
+       printf(_("  -m, --multixact-ids=MXID,MXID    set next and oldest multitransaction ID\n"));
+       printf(_("  -n, --dry-run                    no update, just show what would be done\n"));
+       printf(_("  -o, --next-oid=OID               set next OID\n"));
+       printf(_("  -O, --multixact-offset=OFFSET    set next multitransaction offset\n"));
+       printf(_("  -u, --oldest-transaction-id=XID  set oldest transaction ID\n"));
+       printf(_("  -V, --version                    output version information, then exit\n"));
+       printf(_("  -x, --next-transaction-id=XID    set next transaction ID\n"));
+       printf(_("      --wal-segsize=SIZE           size of WAL segments, in megabytes\n"));
+       printf(_("  -?, --help                       show this help, then exit\n"));
        printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
        printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
 }
index 4f647cdf334741a28b290e59ed6653445f5fd1fe..a4b6375403a2352693bec09948b1b2c5ffb3c739 100644 (file)
@@ -44,6 +44,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
        bool            got_oid = false;
        bool            got_multi = false;
        bool            got_oldestmulti = false;
+       bool            got_oldestxid = false;
        bool            got_mxoff = false;
        bool            got_nextxlogfile = false;
        bool            got_float8_pass_by_value = false;
@@ -312,6 +313,17 @@ get_control_data(ClusterInfo *cluster, bool live_check)
                        cluster->controldata.chkpnt_nxtmulti = str2uint(p);
                        got_multi = true;
                }
+               else if ((p = strstr(bufin, "Latest checkpoint's oldestXID:")) != NULL)
+               {
+                       p = strchr(p, ':');
+
+                       if (p == NULL || strlen(p) <= 1)
+                               pg_fatal("%d: controldata retrieval problem\n", __LINE__);
+
+                       p++;                            /* remove ':' char */
+                       cluster->controldata.chkpnt_oldstxid = str2uint(p);
+                       got_oldestxid = true;
+               }
                else if ((p = strstr(bufin, "Latest checkpoint's oldestMultiXid:")) != NULL)
                {
                        p = strchr(p, ':');
@@ -544,7 +556,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
 
        /* verify that we got all the mandatory pg_control data */
        if (!got_xid || !got_oid ||
-               !got_multi ||
+               !got_multi || !got_oldestxid ||
                (!got_oldestmulti &&
                 cluster->controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER) ||
                !got_mxoff || (!live_check && !got_nextxlogfile) ||
@@ -575,6 +587,9 @@ get_control_data(ClusterInfo *cluster, bool live_check)
                        cluster->controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
                        pg_log(PG_REPORT, "  latest checkpoint oldest MultiXactId\n");
 
+               if (!got_oldestxid)
+                       pg_log(PG_REPORT, "  latest checkpoint oldestXID\n");
+
                if (!got_mxoff)
                        pg_log(PG_REPORT, "  latest checkpoint next MultiXactOffset\n");
 
index e23b8ca88d919e9ca12533c3f81589bb622d1e85..3628bd74a7b276d82619ff06f34b763ef12a953f 100644 (file)
@@ -467,6 +467,13 @@ copy_xact_xlog_xid(void)
                                          GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ?
                                          "pg_clog" : "pg_xact");
 
+       prep_status("Setting oldest XID for new cluster");
+       exec_prog(UTILITY_LOG_FILE, NULL, true, true,
+                         "\"%s/pg_resetwal\" -f -u %u \"%s\"",
+                         new_cluster.bindir, old_cluster.controldata.chkpnt_oldstxid,
+                         new_cluster.pgdata);
+       check_ok();
+
        /* set the next transaction id and epoch of the new cluster */
        prep_status("Setting next transaction ID and epoch for new cluster");
        exec_prog(UTILITY_LOG_FILE, NULL, true, true,
index f7eb2349e66d3138d54cdb866de431dbe6903eba..db96627ccb34519e6246a0fc3a5ab02c3fc5af0a 100644 (file)
@@ -207,6 +207,7 @@ typedef struct
        uint32          chkpnt_nxtmulti;
        uint32          chkpnt_nxtmxoff;
        uint32          chkpnt_oldstMulti;
+       uint32          chkpnt_oldstxid;
        uint32          align;
        uint32          blocksz;
        uint32          largesz;