First cut at XLOG file reset utility.
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 14 Mar 2001 00:57:43 +0000 (00:57 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 14 Mar 2001 00:57:43 +0000 (00:57 +0000)
Could do with more testing, but it works in the simple cases.

contrib/Makefile
contrib/README
contrib/pg_resetxlog/Makefile [new file with mode: 0644]
contrib/pg_resetxlog/README.pg_resetxlog [new file with mode: 0644]
contrib/pg_resetxlog/pg_resetxlog.c [new file with mode: 0644]

index 05aa567a1765ca8b299fbba433df7c5fde3b3453..819e5046ad8fd9605d2f512c853b7cd8898c14e2 100644 (file)
@@ -1,4 +1,4 @@
-# $Header: /cvsroot/pgsql/contrib/Makefile,v 1.17 2001/03/13 19:28:02 petere Exp $
+# $Header: /cvsroot/pgsql/contrib/Makefile,v 1.18 2001/03/14 00:57:43 tgl Exp $
 
 subdir = contrib
 top_builddir = ..
@@ -18,8 +18,10 @@ WANTED_DIRS = \
                miscutil        \
                noupdate        \
                oid2name        \
+               pg_controldata  \
                pg_dumplo       \
                pg_logger       \
+               pg_resetxlog    \
                pgbench         \
                pgcrypto        \
                rserv           \
index 79d95b2e3e18daa90c653289b88da25b481c3270..c0470a8b0a4c46257f29d4dfeb41207ead17ac56 100644 (file)
@@ -52,6 +52,10 @@ intarray -
        by Teodor Sigaev <teodor@stack.net> and Oleg Bartunov
        <oleg@sai.msu.su>.
 
+ipc_check -
+       Simple test script to help in configuring IPC.
+       FreeBSD only, for now.
+
 isbn_issn -
        PostgreSQL type extensions for ISBN (books) and ISSN (serials)
        by Garrett A. Wollman <wollman@khavrinen.lcs.mit.edu>
@@ -86,7 +90,7 @@ oid2name -
        by B Palmer <bpalmer@crimelabs.net>
 
 pg_controldata -
-       Dump internal database site structures
+       Dump contents of pg_control (database master file)
        by Oliver Elphick <olly@lfix.co.uk>
 
 pg_dumplo -
@@ -97,6 +101,10 @@ pg_logger -
        Stdin-to-syslog gateway for PostgreSQL
        by Nathan Myers <ncm@nospam.cantrip.org>
 
+pg_resetxlog -
+       Reset the WAL log (pg_xlog) to recover from crash or format change
+       by Tom Lane <tgl@sss.pgh.pa.us>
+
 pgbench -
        TPC-B like benchmarking tool
        by Tatsuo Ishii <t-ishii@sra.co.jp>
diff --git a/contrib/pg_resetxlog/Makefile b/contrib/pg_resetxlog/Makefile
new file mode 100644 (file)
index 0000000..2169d7a
--- /dev/null
@@ -0,0 +1,37 @@
+#
+# $Header: /cvsroot/pgsql/contrib/pg_resetxlog/Attic/Makefile,v 1.1 2001/03/14 00:57:43 tgl Exp $
+#
+
+subdir = contrib/pg_resetxlog
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS   = pg_resetxlog.o pg_crc.o
+
+all: pg_resetxlog
+
+pg_resetxlog: $(OBJS)
+       $(CC) $(CFLAGS) $(OBJS) $(LDFLAGS) $(LIBS) -o $@
+
+pg_crc.c: $(top_builddir)/src/backend/utils/hash/pg_crc.c
+       rm -f $@ && $(LN_S) $< .
+
+install: all installdirs
+       $(INSTALL_PROGRAM) pg_resetxlog$(X)     $(bindir)
+       $(INSTALL_DATA) README.pg_resetxlog     $(docdir)/contrib
+
+installdirs:
+       $(mkinstalldirs) $(bindir) $(docdir)/contrib
+
+uninstall:
+       rm -f $(bindir)/pg_resetxlog$(X) $(docdir)/contrib/README.pg_resetxlog
+
+clean distclean maintainer-clean:
+       rm -f pg_resetxlog$(X) $(OBJS) pg_crc.c
+
+depend dep:
+       $(CC) -MM -MG $(CFLAGS) *.c > depend
+
+ifeq (depend,$(wildcard depend))
+include depend
+endif
diff --git a/contrib/pg_resetxlog/README.pg_resetxlog b/contrib/pg_resetxlog/README.pg_resetxlog
new file mode 100644 (file)
index 0000000..f9521ee
--- /dev/null
@@ -0,0 +1,39 @@
+pg_resetxlog is a program to clear the WAL transaction log (stored in
+$PGDATA/pg_xlog/), replacing whatever had been in it with just a dummy
+shutdown-checkpoint record.  It also regenerates the pg_control file
+if necessary.
+
+THIS PROGRAM WILL DESTROY VALUABLE LOG DATA!!!  Don't run it unless you
+really need it!!!
+
+pg_resetxlog is primarily intended for disaster recovery --- that is,
+if your pg_control and/or xlog are hosed badly enough that Postgres refuses
+to start up, this program will get you past that problem and let you get to
+your data files.  But realize that without the xlog, your data files may be
+corrupt due to partially-applied transactions, incomplete index-file
+updates, etc.  You should dump your data, check it for accuracy, then initdb
+and reload.
+
+A secondary purpose is to cope with xlog format changes without requiring
+initdb.  To use pg_resetxlog for this purpose, just be sure that you have
+cleanly shut down your old postmaster (if you're not sure, see the contrib
+module pg_controldata and run it to be sure the DB state is SHUTDOWN).
+Then run pg_resetxlog, and finally install and start the new version of
+the database software.
+
+To run the program, make sure your postmaster is not running, then
+(as the Postgres admin user) do
+
+       pg_resetxlog $PGDATA
+
+As a safety measure, the target data directory must be specified on the
+command line, it cannot be defaulted.
+
+If pg_resetxlog complains that it can't reconstruct valid data for pg_control,
+you can force it to invent plausible data values with
+
+       pg_resetxlog -f $PGDATA
+
+If this turns out to be necessary then you *definitely* should plan on
+immediate dump, initdb, reload --- any modifications you do to the database
+after "pg_resetxlog -f" would be likely to corrupt things even worse.
diff --git a/contrib/pg_resetxlog/pg_resetxlog.c b/contrib/pg_resetxlog/pg_resetxlog.c
new file mode 100644 (file)
index 0000000..0e6f747
--- /dev/null
@@ -0,0 +1,991 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_resetxlog.c
+ *       A utility to "zero out" the xlog when it's corrupt beyond recovery.
+ *       Can also rebuild pg_control if needed.
+ *
+ * The theory of operation is fairly simple:
+ *    1. Read the existing pg_control (which will include the last
+ *              checkpoint record).  If it is an old format then update to
+ *              current format.
+ *       2. If pg_control is corrupt, attempt to intuit reasonable values,
+ *              by scanning the old xlog if necessary.
+ *       3. Modify pg_control to reflect a "shutdown" state with a checkpoint
+ *          record at the start of xlog.
+ *       4. Flush the existing xlog files and write a new segment 0 with
+ *          just a checkpoint record in it.
+ * This is all pretty straightforward except for the intuition part of
+ * step 2 ...
+ *
+ *
+ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $Header: /cvsroot/pgsql/contrib/pg_resetxlog/Attic/pg_resetxlog.c,v 1.1 2001/03/14 00:57:43 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#ifdef USE_LOCALE
+#include <locale.h>
+#endif
+
+#include "access/xlog.h"
+#include "catalog/catversion.h"
+#include "catalog/pg_control.h"
+
+
+/******************** stuff copied from xlog.c ********************/
+
+/* Increment an xlogid/segment pair */
+#define NextLogSeg(logId, logSeg)      \
+       do { \
+               if ((logSeg) >= XLogSegsPerFile-1) \
+               { \
+                       (logId)++; \
+                       (logSeg) = 0; \
+               } \
+               else \
+                       (logSeg)++; \
+       } while (0)
+
+/*
+ * Compute ID and segment from an XLogRecPtr.
+ *
+ * For XLByteToSeg, do the computation at face value.  For XLByteToPrevSeg,
+ * a boundary byte is taken to be in the previous segment.  This is suitable
+ * for deciding which segment to write given a pointer to a record end,
+ * for example.
+ */
+#define XLByteToSeg(xlrp, logId, logSeg)       \
+       ( logId = (xlrp).xlogid, \
+         logSeg = (xlrp).xrecoff / XLogSegSize \
+       )
+#define XLByteToPrevSeg(xlrp, logId, logSeg)   \
+       ( logId = (xlrp).xlogid, \
+         logSeg = ((xlrp).xrecoff - 1) / XLogSegSize \
+       )
+
+/*
+ * Is an XLogRecPtr within a particular XLOG segment?
+ *
+ * For XLByteInSeg, do the computation at face value.  For XLByteInPrevSeg,
+ * a boundary byte is taken to be in the previous segment.
+ */
+#define XLByteInSeg(xlrp, logId, logSeg)       \
+       ((xlrp).xlogid == (logId) && \
+        (xlrp).xrecoff / XLogSegSize == (logSeg))
+
+#define XLByteInPrevSeg(xlrp, logId, logSeg)   \
+       ((xlrp).xlogid == (logId) && \
+        ((xlrp).xrecoff - 1) / XLogSegSize == (logSeg))
+
+
+#define XLogFileName(path, log, seg)   \
+                       snprintf(path, MAXPGPATH, "%s%c%08X%08X",       \
+                                        XLogDir, SEP_CHAR, log, seg)
+
+/*
+ * _INTL_MAXLOGRECSZ: max space needed for a record including header and
+ * any backup-block data.
+ */
+#define _INTL_MAXLOGRECSZ      (SizeOfXLogRecord + MAXLOGRECSZ + \
+                                                        XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
+
+/******************** end of stuff copied from xlog.c ********************/
+
+
+static char *DataDir;                  /* locations of important stuff */
+static char XLogDir[MAXPGPATH];
+static char ControlFilePath[MAXPGPATH];
+
+static ControlFileData ControlFile;    /* pg_control values */
+static bool guessed = false;   /* T if we had to guess at any values */
+
+
+static bool CheckControlVersion0(char *buffer, int len);
+
+
+static int
+XLogFileOpen(uint32 log, uint32 seg)
+{
+       char            path[MAXPGPATH];
+       int                     fd;
+
+       XLogFileName(path, log, seg);
+
+       fd = open(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
+       return (fd);
+}
+
+
+/*
+ * Try to read the existing pg_control file.
+ *
+ * This routine is also responsible for updating old pg_control versions
+ * to the current format.
+ */
+static bool
+ReadControlFile(void)
+{
+       int             fd;
+       int             len;
+       char   *buffer;
+       crc64   crc;
+
+       if ((fd = open(ControlFilePath, O_RDONLY)) < 0)
+       {
+               /*
+                * If pg_control is not there at all, or we can't read it,
+                * the odds are we've been handed a bad DataDir path, so give up.
+                * User can do "touch pg_control" to force us to proceed.
+                */
+               perror("Failed to open $PGDATA/global/pg_control for reading");
+               if (errno == ENOENT)
+                       fprintf(stderr, "If you're sure the PGDATA path is correct, do\n"
+                                       "  touch %s\n"
+                                       "and try again.\n", ControlFilePath);
+               exit(1);
+       }
+
+       /* Use malloc to ensure we have a maxaligned buffer */
+       buffer = (char *) malloc(BLCKSZ);
+
+       len = read(fd, buffer, BLCKSZ);
+       if (len < 0)
+       {
+               perror("Failed to read $PGDATA/global/pg_control");
+               exit(1);
+       }
+       close(fd);
+
+       if (len >= sizeof(ControlFileData) &&
+               ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
+       {
+               /* Seems to be current version --- check the CRC. */
+               INIT_CRC64(crc);
+               COMP_CRC64(crc, 
+                                  buffer + sizeof(crc64),
+                                  sizeof(ControlFileData) - sizeof(crc64));
+               FIN_CRC64(crc);
+
+               if (EQ_CRC64(crc, ((ControlFileData *) buffer)->crc))
+               {
+                       /* Valid data... */
+                       memcpy(&ControlFile, buffer, sizeof(ControlFile));
+                       return true;
+               }
+
+               fprintf(stderr, "pg_control exists but has invalid CRC; proceed with caution.\n");
+               /* We will use the data anyway, but treat it as guessed. */
+               memcpy(&ControlFile, buffer, sizeof(ControlFile));
+               guessed = true;
+               return true;
+       }
+       /*
+        * Maybe it's a 7.1beta pg_control.
+        */
+       if (CheckControlVersion0(buffer, len))
+               return true;
+
+       /* Looks like it's a mess. */
+       fprintf(stderr, "pg_control exists but is broken or unknown version; ignoring it.\n");
+       return false;
+}
+
+
+/******************* routines for old XLOG format *******************/
+
+
+/*
+ * This format was in use in 7.1 beta releases through 7.1beta5.  The
+ * pg_control layout was different, and so were the XLOG page headers.
+ * The XLOG record header format was physically the same as 7.1 release,
+ * but interpretation of the xl_len field was not.
+ */
+
+typedef struct crc64V0
+{
+       uint32                  crc1;
+       uint32                  crc2;
+} crc64V0;
+
+static uint32 crc_tableV0[] = {
+0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
+0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
+0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
+0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
+0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
+0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
+0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
+0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
+0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
+0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
+0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
+0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
+0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
+0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
+0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
+0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
+0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
+0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+#define INIT_CRC64V0(crc)      ((crc).crc1 = 0xffffffff, (crc).crc2 = 0xffffffff)
+#define FIN_CRC64V0(crc)       ((crc).crc1 ^= 0xffffffff, (crc).crc2 ^= 0xffffffff)
+#define COMP_CRC64V0(crc, data, len)   \
+{\
+        uint32       __c1 = (crc).crc1;\
+        uint32       __c2 = (crc).crc2;\
+        char        *__data = (char *) (data);\
+        uint32       __len = (len);\
+\
+        while (__len >= 2)\
+        {\
+                __c1 = crc_tableV0[(__c1 ^ *__data++) & 0xff] ^ (__c1 >> 8);\
+                __c2 = crc_tableV0[(__c2 ^ *__data++) & 0xff] ^ (__c2 >> 8);\
+                __len -= 2;\
+        }\
+        if (__len > 0)\
+                __c1 = crc_tableV0[(__c1 ^ *__data++) & 0xff] ^ (__c1 >> 8);\
+        (crc).crc1 = __c1;\
+        (crc).crc2 = __c2;\
+}
+
+#define EQ_CRC64V0(c1,c2)  ((c1).crc1 == (c2).crc1 && (c1).crc2 == (c2).crc2)
+
+
+#define LOCALE_NAME_BUFLEN_V0  128
+
+typedef struct ControlFileDataV0
+{
+       crc64V0                 crc;
+       uint32                  logId;          /* current log file id */
+       uint32                  logSeg;         /* current log file segment (1-based) */
+       XLogRecPtr              checkPoint;     /* last check point record ptr */
+       time_t                  time;           /* time stamp of last modification */
+       DBState                 state;          /* see enum above */
+       uint32                  blcksz;         /* block size for this DB */
+       uint32                  relseg_size; /* blocks per segment of large relation */
+       uint32                  catalog_version_no;     /* internal version number */
+       char                    lc_collate[LOCALE_NAME_BUFLEN_V0];
+       char                    lc_ctype[LOCALE_NAME_BUFLEN_V0];
+       char                    archdir[MAXPGPATH];     /* where to move offline log files */
+} ControlFileDataV0;
+
+typedef struct CheckPointV0
+{
+       XLogRecPtr              redo;           /* next RecPtr available when we */
+                                                               /* began to create CheckPoint */
+                                                               /* (i.e. REDO start point) */
+       XLogRecPtr              undo;           /* first record of oldest in-progress */
+                                                               /* transaction when we started */
+                                                               /* (i.e. UNDO end point) */
+       StartUpID               ThisStartUpID;
+       TransactionId   nextXid;
+       Oid                             nextOid;
+       bool                    Shutdown;
+} CheckPointV0;
+
+typedef struct XLogRecordV0
+{
+       crc64V0                 xl_crc;
+       XLogRecPtr              xl_prev;        /* ptr to previous record in log */
+       XLogRecPtr              xl_xact_prev; /* ptr to previous record of this xact */
+       TransactionId   xl_xid;         /* xact id */
+       uint16                  xl_len;         /* total len of record *data* */
+       uint8                   xl_info;
+       RmgrId                  xl_rmid;        /* resource manager inserted this record */
+} XLogRecordV0;
+
+#define SizeOfXLogRecordV0     DOUBLEALIGN(sizeof(XLogRecordV0))
+
+typedef struct XLogContRecordV0
+{
+       uint16                  xl_len;         /* len of data left */
+} XLogContRecordV0;
+
+#define SizeOfXLogContRecordV0 DOUBLEALIGN(sizeof(XLogContRecordV0))
+
+#define XLOG_PAGE_MAGIC_V0 0x17345168
+
+typedef struct XLogPageHeaderDataV0
+{
+       uint32                xlp_magic;
+       uint16                xlp_info;
+} XLogPageHeaderDataV0;
+
+#define SizeOfXLogPHDV0   DOUBLEALIGN(sizeof(XLogPageHeaderDataV0))
+
+typedef XLogPageHeaderDataV0 *XLogPageHeaderV0;
+
+
+static bool RecordIsValidV0(XLogRecordV0 *record);
+static XLogRecordV0 *ReadRecordV0(XLogRecPtr *RecPtr, char *buffer);
+static bool ValidXLOGHeaderV0(XLogPageHeaderV0 hdr);
+
+
+/*
+ * Try to interpret pg_control contents as "version 0" format.
+ */
+static bool
+CheckControlVersion0(char *buffer, int len)
+{
+       crc64V0         crc;
+       ControlFileDataV0 *oldfile;
+       XLogRecordV0 *record;
+       CheckPointV0 *oldchkpt;
+
+       if (len < sizeof(ControlFileDataV0))
+               return false;
+       /* Check CRC the version-0 way. */
+       INIT_CRC64V0(crc);
+       COMP_CRC64V0(crc, 
+                                buffer + sizeof(crc64V0),
+                                sizeof(ControlFileDataV0) - sizeof(crc64V0));
+       FIN_CRC64V0(crc);
+
+       if (!EQ_CRC64V0(crc, ((ControlFileDataV0 *) buffer)->crc))
+               return false;
+
+       /* Valid data, convert useful fields to new-style pg_control format */
+       oldfile = (ControlFileDataV0 *) buffer;
+
+       memset(&ControlFile, 0, sizeof(ControlFile));
+
+       ControlFile.pg_control_version = PG_CONTROL_VERSION;
+       ControlFile.catalog_version_no = oldfile->catalog_version_no;
+
+       ControlFile.state = oldfile->state;
+
+       ControlFile.blcksz = oldfile->blcksz;
+       ControlFile.relseg_size = oldfile->relseg_size;
+       strcpy(ControlFile.lc_collate, oldfile->lc_collate);
+       strcpy(ControlFile.lc_ctype, oldfile->lc_ctype);
+
+       /*
+        * Since this format did not include a copy of the latest checkpoint
+        * record, we have to go rooting in the old XLOG to get that.
+        */
+       record = ReadRecordV0(&oldfile->checkPoint,
+                                                 (char *) malloc(_INTL_MAXLOGRECSZ));
+       if (record == NULL)
+       {
+               /*
+                * We have to guess at the checkpoint contents.
+                */
+               guessed = true;
+               ControlFile.checkPointCopy.ThisStartUpID = 0;
+               ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
+               ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData;
+               return true;
+       }
+       oldchkpt = (CheckPointV0 *) XLogRecGetData(record);
+
+       ControlFile.checkPointCopy.ThisStartUpID = oldchkpt->ThisStartUpID;
+       ControlFile.checkPointCopy.nextXid = oldchkpt->nextXid;
+       ControlFile.checkPointCopy.nextOid = oldchkpt->nextOid;
+
+       return true;
+}
+
+/*
+ * CRC-check an XLOG V0 record.  We do not believe the contents of an XLOG
+ * record (other than to the minimal extent of computing the amount of
+ * data to read in) until we've checked the CRCs.
+ *
+ * We assume all of the record has been read into memory at *record.
+ */
+static bool
+RecordIsValidV0(XLogRecordV0 *record)
+{
+       crc64V0         crc;
+       uint32          len = record->xl_len;
+
+       /*
+        * NB: this code is not right for V0 records containing backup blocks,
+        * but for now it's only going to be applied to checkpoint records,
+        * so I'm not going to worry about it...
+        */
+       INIT_CRC64V0(crc);
+       COMP_CRC64V0(crc, XLogRecGetData(record), len);
+       COMP_CRC64V0(crc, (char*) record + sizeof(crc64V0),
+                                SizeOfXLogRecordV0 - sizeof(crc64V0));
+       FIN_CRC64V0(crc);
+
+       if (!EQ_CRC64V0(record->xl_crc, crc))
+               return false;
+
+       return(true);
+}
+
+/*
+ * Attempt to read an XLOG V0 record at recptr.
+ *
+ * If no valid record is available, returns NULL.
+ *
+ * buffer is a workspace at least _INTL_MAXLOGRECSZ bytes long.  It is needed
+ * to reassemble a record that crosses block boundaries.  Note that on
+ * successful return, the returned record pointer always points at buffer.
+ */
+static XLogRecordV0 *
+ReadRecordV0(XLogRecPtr *RecPtr, char *buffer)
+{
+       static int      readFile = -1;
+       static uint32 readId = 0;
+       static uint32 readSeg = 0;
+       static uint32 readOff = 0;
+       static char *readBuf = NULL;
+
+       XLogRecordV0 *record;
+       uint32          len,
+                               total_len;
+       uint32          targetPageOff;
+
+       if (readBuf == NULL)
+               readBuf = (char *) malloc(BLCKSZ);
+
+       XLByteToSeg(*RecPtr, readId, readSeg);
+       if (readFile < 0)
+       {
+               readFile = XLogFileOpen(readId, readSeg);
+               if (readFile < 0)
+                       goto next_record_is_invalid;
+               readOff = (uint32) (-1); /* force read to occur below */
+       }
+
+       targetPageOff = ((RecPtr->xrecoff % XLogSegSize) / BLCKSZ) * BLCKSZ;
+       if (readOff != targetPageOff)
+       {
+               readOff = targetPageOff;
+               if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
+                       goto next_record_is_invalid;
+               if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
+                       goto next_record_is_invalid;
+               if (!ValidXLOGHeaderV0((XLogPageHeaderV0) readBuf))
+                       goto next_record_is_invalid;
+       }
+       if ((((XLogPageHeaderV0) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
+               RecPtr->xrecoff % BLCKSZ == SizeOfXLogPHDV0)
+               goto next_record_is_invalid;
+       record = (XLogRecordV0 *) ((char *) readBuf + RecPtr->xrecoff % BLCKSZ);
+
+       if (record->xl_len == 0)
+               goto next_record_is_invalid;
+       /*
+        * Compute total length of record including any appended backup blocks.
+        */
+       total_len = SizeOfXLogRecordV0 + record->xl_len;
+       /*
+        * Make sure it will fit in buffer (currently, it is mechanically
+        * impossible for this test to fail, but it seems like a good idea
+        * anyway).
+        */
+       if (total_len > _INTL_MAXLOGRECSZ)
+               goto next_record_is_invalid;
+       len = BLCKSZ - RecPtr->xrecoff % BLCKSZ;
+       if (total_len > len)
+       {
+               /* Need to reassemble record */
+               XLogContRecordV0 *contrecord;
+               uint32                  gotlen = len;
+
+               memcpy(buffer, record, len);
+               record = (XLogRecordV0 *) buffer;
+               buffer += len;
+               for (;;)
+               {
+                       readOff += BLCKSZ;
+                       if (readOff >= XLogSegSize)
+                       {
+                               close(readFile);
+                               readFile = -1;
+                               NextLogSeg(readId, readSeg);
+                               readFile = XLogFileOpen(readId, readSeg);
+                               if (readFile < 0)
+                                       goto next_record_is_invalid;
+                               readOff = 0;
+                       }
+                       if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
+                               goto next_record_is_invalid;
+                       if (!ValidXLOGHeaderV0((XLogPageHeaderV0) readBuf))
+                               goto next_record_is_invalid;
+                       if (!(((XLogPageHeaderV0) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD))
+                               goto next_record_is_invalid;
+                       contrecord = (XLogContRecordV0 *) ((char *) readBuf + SizeOfXLogPHDV0);
+                       if (contrecord->xl_len == 0 || 
+                               total_len != (contrecord->xl_len + gotlen))
+                               goto next_record_is_invalid;
+                       len = BLCKSZ - SizeOfXLogPHDV0 - SizeOfXLogContRecordV0;
+                       if (contrecord->xl_len > len)
+                       {
+                               memcpy(buffer, (char *)contrecord + SizeOfXLogContRecordV0, len);
+                               gotlen += len;
+                               buffer += len;
+                               continue;
+                       }
+                       memcpy(buffer, (char *) contrecord + SizeOfXLogContRecordV0,
+                                  contrecord->xl_len);
+                       break;
+               }
+               if (!RecordIsValidV0(record))
+                       goto next_record_is_invalid;
+               return record;
+       }
+
+       /* Record does not cross a page boundary */
+       if (!RecordIsValidV0(record))
+               goto next_record_is_invalid;
+       memcpy(buffer, record, total_len);
+       return (XLogRecordV0 *) buffer;
+
+next_record_is_invalid:;
+       close(readFile);
+       readFile = -1;
+       return NULL;
+}
+
+/*
+ * Check whether the xlog header of a page just read in looks valid.
+ *
+ * This is just a convenience subroutine to avoid duplicated code in
+ * ReadRecord.  It's not intended for use from anywhere else.
+ */
+static bool
+ValidXLOGHeaderV0(XLogPageHeaderV0 hdr)
+{
+       if (hdr->xlp_magic != XLOG_PAGE_MAGIC_V0)
+               return false;
+       if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0)
+               return false;
+       return true;
+}
+
+/******************* end of routines for old XLOG format *******************/
+
+
+/*
+ * Guess at pg_control values when we can't read the old ones.
+ */
+static void
+GuessControlValues(void)
+{
+#ifdef USE_LOCALE
+       char       *localeptr;
+#endif
+
+       /*
+        * Set up a completely default set of pg_control values.
+        */
+       guessed = true;
+       memset(&ControlFile, 0, sizeof(ControlFile));
+
+       ControlFile.pg_control_version = PG_CONTROL_VERSION;
+       ControlFile.catalog_version_no = CATALOG_VERSION_NO;
+
+       ControlFile.checkPointCopy.redo.xlogid = 0;
+       ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogPHD;
+       ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
+       ControlFile.checkPointCopy.ThisStartUpID = 0;
+       ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
+       ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData;
+       ControlFile.checkPointCopy.time = time(NULL);
+
+       ControlFile.state = DB_SHUTDOWNED;
+       ControlFile.time = time(NULL);
+       ControlFile.logId = 0;
+       ControlFile.logSeg = 1;
+       ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
+
+       ControlFile.blcksz = BLCKSZ;
+       ControlFile.relseg_size = RELSEG_SIZE;
+#ifdef USE_LOCALE
+       localeptr = setlocale(LC_COLLATE, "");
+       if (!localeptr)
+       {
+               fprintf(stderr, "Invalid LC_COLLATE setting\n");
+               exit(1);
+       }
+       StrNCpy(ControlFile.lc_collate, localeptr, LOCALE_NAME_BUFLEN);
+       localeptr = setlocale(LC_CTYPE, "");
+       if (!localeptr)
+       {
+               fprintf(stderr, "Invalid LC_CTYPE setting\n");
+               exit(1);
+       }
+       StrNCpy(ControlFile.lc_ctype, localeptr, LOCALE_NAME_BUFLEN);
+#else
+       strcpy(ControlFile.lc_collate, "C");
+       strcpy(ControlFile.lc_ctype, "C");
+#endif
+
+       /*
+        * XXX eventually, should try to grovel through old XLOG to develop
+        * more accurate values for startupid, nextXID, and nextOID.
+        */
+}
+
+
+/*
+ * Print the guessed pg_control values when we had to guess.
+ *
+ * NB: this display should be just those fields that will not be
+ * reset by RewriteControlFile().
+ */
+static void
+PrintControlValues(void)
+{
+       printf("Guessed-at pg_control values:\n\n"
+                  "pg_control version number:            %u\n"
+                  "Catalog version number:               %u\n"
+                  "Latest checkpoint's StartUpID:        %u\n"
+                  "Latest checkpoint's NextXID:          %u\n"
+                  "Latest checkpoint's NextOID:          %u\n"
+                  "Database block size:                  %u\n"
+                  "Blocks per segment of large relation: %u\n"
+                  "LC_COLLATE:                           %s\n"
+                  "LC_CTYPE:                             %s\n",
+
+                  ControlFile.pg_control_version,
+                  ControlFile.catalog_version_no,
+                  ControlFile.checkPointCopy.ThisStartUpID,
+                  ControlFile.checkPointCopy.nextXid,
+                  ControlFile.checkPointCopy.nextOid,
+                  ControlFile.blcksz,
+                  ControlFile.relseg_size,
+                  ControlFile.lc_collate,
+                  ControlFile.lc_ctype);
+}
+
+
+/*
+ * Write out the new pg_control file.
+ */
+static void
+RewriteControlFile(void)
+{
+       int                     fd;
+       char            buffer[BLCKSZ]; /* need not be aligned */
+
+       /*
+        * Adjust fields as needed to force an empty XLOG.
+        */
+       ControlFile.checkPointCopy.redo.xlogid = 0;
+       ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogPHD;
+       ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
+       ControlFile.checkPointCopy.time = time(NULL);
+
+       ControlFile.state = DB_SHUTDOWNED;
+       ControlFile.time = time(NULL);
+       ControlFile.logId = 0;
+       ControlFile.logSeg = 1;
+       ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
+       ControlFile.prevCheckPoint.xlogid = 0;
+       ControlFile.prevCheckPoint.xrecoff = 0;
+
+       /* Contents are protected with a CRC */
+       INIT_CRC64(ControlFile.crc);
+       COMP_CRC64(ControlFile.crc, 
+                          (char*) &ControlFile + sizeof(crc64),
+                          sizeof(ControlFileData) - sizeof(crc64));
+       FIN_CRC64(ControlFile.crc);
+
+       /*
+        * We write out BLCKSZ bytes into pg_control, zero-padding the
+        * excess over sizeof(ControlFileData).  This reduces the odds
+        * of premature-EOF errors when reading pg_control.  We'll still
+        * fail when we check the contents of the file, but hopefully with
+        * a more specific error than "couldn't read pg_control".
+        */
+       if (sizeof(ControlFileData) > BLCKSZ)
+       {
+               fprintf(stderr, "sizeof(ControlFileData) is too large ... fix xlog.c\n");
+               exit(1);
+       }
+
+       memset(buffer, 0, BLCKSZ);
+       memcpy(buffer, &ControlFile, sizeof(ControlFileData));
+
+       unlink(ControlFilePath);
+
+       fd = open(ControlFilePath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR);
+       if (fd < 0)
+       {
+               perror("RewriteControlFile failed to create pg_control file");
+               exit(1);
+       }
+
+       if (write(fd, buffer, BLCKSZ) != BLCKSZ)
+       {
+               perror("RewriteControlFile failed to write pg_control file");
+               exit(1);
+       }
+
+       if (fsync(fd) != 0)
+       {
+               perror("fsync");
+               exit(1);
+       }
+
+       close(fd);
+}
+
+
+/*
+ * Remove existing XLOG files
+ */
+static void
+KillExistingXLOG(void)
+{
+       DIR                        *xldir;
+       struct dirent  *xlde;
+       char                    path[MAXPGPATH];
+
+       xldir = opendir(XLogDir);
+       if (xldir == NULL)
+       {
+               perror("KillExistingXLOG: cannot open $PGDATA/pg_xlog directory");
+               exit(1);
+       }
+
+       errno = 0;
+       while ((xlde = readdir(xldir)) != NULL)
+       {
+               if (strlen(xlde->d_name) == 16 &&
+                       strspn(xlde->d_name, "0123456789ABCDEF") == 16)
+               {
+                       sprintf(path, "%s%c%s", XLogDir, SEP_CHAR, xlde->d_name);
+                       if (unlink(path) < 0)
+                       {
+                               perror(path);
+                               exit(1);
+                       }
+               }
+               errno = 0;
+       }
+       if (errno)
+       {
+               perror("KillExistingXLOG: cannot read $PGDATA/pg_xlog directory");
+               exit(1);
+       }
+       closedir(xldir);
+}
+
+
+/*
+ * Write an empty XLOG file, containing only the checkpoint record
+ * already set up in ControlFile.
+ */
+static void
+WriteEmptyXLOG(void)
+{
+       char       *buffer;
+       XLogPageHeader page;
+       XLogRecord *record;
+       crc64           crc;
+       char            path[MAXPGPATH];
+       int                     fd;
+       int                     nbytes;
+
+       /* Use malloc() to ensure buffer is MAXALIGNED */
+       buffer = (char *) malloc(BLCKSZ);
+       page = (XLogPageHeader) buffer;
+
+       /* Set up the first page with initial record */
+       memset(buffer, 0, BLCKSZ);
+       page->xlp_magic = XLOG_PAGE_MAGIC;
+       page->xlp_info = 0;
+       page->xlp_sui = ControlFile.checkPointCopy.ThisStartUpID;
+       record = (XLogRecord *) ((char *) page + SizeOfXLogPHD);
+       record->xl_prev.xlogid = 0;
+       record->xl_prev.xrecoff = 0;
+       record->xl_xact_prev = record->xl_prev;
+       record->xl_xid = InvalidTransactionId;
+       record->xl_len = sizeof(CheckPoint);
+       record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
+       record->xl_rmid = RM_XLOG_ID;
+       memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
+                  sizeof(CheckPoint));
+
+       INIT_CRC64(crc);
+       COMP_CRC64(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
+       COMP_CRC64(crc, (char*) record + sizeof(crc64),
+                          SizeOfXLogRecord - sizeof(crc64));
+       FIN_CRC64(crc);
+       record->xl_crc = crc;
+
+       /* Write the first page */
+       XLogFileName(path, 0, 0);
+
+       unlink(path);
+
+       fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+                         S_IRUSR | S_IWUSR);
+       if (fd < 0)
+       {
+               perror(path);
+               exit(1);
+       }
+
+       if (write(fd, buffer, BLCKSZ) != BLCKSZ)
+       {
+               perror("WriteEmptyXLOG: failed to write xlog file");
+               exit(1);
+       }
+
+       /* Fill the rest of the file with zeroes */
+       memset(buffer, 0, BLCKSZ);
+       for (nbytes = BLCKSZ; nbytes < XLogSegSize; nbytes += BLCKSZ)
+       {
+               if (write(fd, buffer, BLCKSZ) != BLCKSZ)
+               {
+                       perror("WriteEmptyXLOG: failed to write xlog file");
+                       exit(1);
+               }
+       }
+
+       if (fsync(fd) != 0)
+       {
+               perror("fsync");
+               exit(1);
+       }
+
+       close(fd);
+}
+
+
+static void
+usage(void)
+{
+       fprintf(stderr, "Usage: pg_resetxlog [-f] [-n] PGDataDirectory\n\n"
+                       "  -f\tforce update to be done\n"
+                       "  -n\tno update, just show extracted pg_control values (for testing)\n");
+       exit(1);
+}
+
+
+int
+main(int argc, char ** argv)
+{
+       int             argn;
+       bool    force = false;
+       bool    noupdate = false;
+       int             fd;
+       char    path[MAXPGPATH];
+
+       for (argn = 1; argn < argc; argn++)
+       {
+               if (argv[argn][0] != '-')
+                       break;                          /* end of switches */
+               if (strcmp(argv[argn], "-f") == 0)
+                       force = true;
+               else if (strcmp(argv[argn], "-n") == 0)
+                       noupdate = true;
+               else
+                       usage();
+       }
+
+       if (argn != argc-1)                     /* one required non-switch argument */
+               usage();
+
+       DataDir = argv[argn++];
+
+       snprintf(XLogDir, MAXPGPATH, "%s%cpg_xlog", DataDir, SEP_CHAR);
+
+       snprintf(ControlFilePath, MAXPGPATH, "%s%cglobal%cpg_control",
+                        DataDir, SEP_CHAR, SEP_CHAR);
+
+       /*
+        * Check for a postmaster lock file --- if there is one, refuse to
+        * proceed, on grounds we might be interfering with a live installation.
+        */
+       snprintf(path, MAXPGPATH, "%s%cpostmaster.pid", DataDir, SEP_CHAR);
+
+       if ((fd = open(path, O_RDONLY)) < 0)
+       {
+               if (errno != ENOENT)
+               {
+                       perror("Failed to open $PGDATA/postmaster.pid for reading");
+                       exit(1);
+               }
+       }
+       else
+       {
+               fprintf(stderr, "Lock file '%s' exists --- is a postmaster running?\n"
+                               "If not, delete the lock file and try again.\n",
+                               path);
+               exit(1);
+       }
+
+       /*
+        * Attempt to read the existing pg_control file
+        */
+       if (!ReadControlFile())
+               GuessControlValues();
+
+       /*
+        * If we had to guess anything, and -f was not given, just print
+        * the guessed values and exit.  Also print if -n is given.
+        */
+       if ((guessed && !force) || noupdate)
+       {
+               PrintControlValues();
+               if (!noupdate)
+                       printf("\nIf these values seem acceptable, use -f to force reset.\n");
+               exit(1);
+       }
+
+       /*
+        * Don't reset from a dirty pg_control without -f, either.
+        */
+       if (ControlFile.state != DB_SHUTDOWNED && !force)
+       {
+               printf("The database was not shut down cleanly.\n"
+                          "Resetting the xlog may cause data to be lost!\n"
+                          "If you want to proceed anyway, use -f to force reset.\n");
+               exit(1);
+       }
+
+       /*
+        * Else, do the dirty deed.
+        */
+       RewriteControlFile();
+       KillExistingXLOG();
+       WriteEmptyXLOG();
+
+       printf("XLOG reset.\n");
+       return 0;
+}