Move WAL continuation record information to WAL page header.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Sun, 24 Jun 2012 15:15:00 +0000 (18:15 +0300)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Sun, 24 Jun 2012 15:35:30 +0000 (18:35 +0300)
The continuation record only contained one field, xl_rem_len, so it makes
things simpler to just include it in the WAL page header. This wastes four
bytes on pages that don't begin with a continuation from previos page, plus
four bytes on every page, because of padding.

The motivation of this is to make it easier to calculate how much space a
WAL record needs. Before this patch, it depended on how many page boundaries
the record crosses. The motivation of that, in turn, is to separate the
allocation of space in the WAL from the copying of the record data to the
allocated space. Keeping the calculation of space required simple helps to
keep the critical section of allocating the space from WAL short. But that's
not included in this patch yet.

Bump WAL version number again, as this is an incompatible change.

src/backend/access/transam/xlog.c
src/include/access/xlog_internal.h

index 2f9209f3b8e7a1e29aeb9a293b7def49d5aad6df..03feb14551492268a5c2de8051414f916d3e5a2a 100644 (file)
@@ -696,7 +696,6 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
 {
        XLogCtlInsert *Insert = &XLogCtl->Insert;
        XLogRecord *record;
-       XLogContRecord *contrecord;
        XLogRecPtr      RecPtr;
        XLogRecPtr      WriteRqst;
        uint32          freespace;
@@ -1085,9 +1084,7 @@ begin:;
                curridx = Insert->curridx;
                /* Insert cont-record header */
                Insert->currpage->xlp_info |= XLP_FIRST_IS_CONTRECORD;
-               contrecord = (XLogContRecord *) Insert->currpos;
-               contrecord->xl_rem_len = write_len;
-               Insert->currpos += SizeOfXLogContRecord;
+               Insert->currpage->xlp_rem_len = write_len;
                freespace = INSERT_FREESPACE(Insert);
        }
 
@@ -3941,7 +3938,8 @@ retry:
        if (total_len > len)
        {
                /* Need to reassemble record */
-               XLogContRecord *contrecord;
+               char       *contrecord;
+               XLogPageHeader pageHeader;
                XLogRecPtr      pagelsn;
                uint32          gotlen = len;
 
@@ -3969,30 +3967,30 @@ retry:
                                                                readOff)));
                                goto next_record_is_invalid;
                        }
-                       pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
-                       contrecord = (XLogContRecord *) ((char *) readBuf + pageHeaderSize);
-                       if (contrecord->xl_rem_len == 0 ||
-                               total_len != (contrecord->xl_rem_len + gotlen))
+                       pageHeader = (XLogPageHeader) readBuf;
+                       pageHeaderSize = XLogPageHeaderSize(pageHeader);
+                       contrecord = (char *) readBuf + pageHeaderSize;
+                       if (pageHeader->xlp_rem_len == 0 ||
+                               total_len != (pageHeader->xlp_rem_len + gotlen))
                        {
                                char fname[MAXFNAMELEN];
                                XLogFileName(fname, curFileTLI, readSegNo);
                                ereport(emode_for_corrupt_record(emode, *RecPtr),
                                                (errmsg("invalid contrecord length %u in log segment %s, offset %u",
-                                                               contrecord->xl_rem_len,
+                                                               pageHeader->xlp_rem_len,
                                                                XLogFileNameP(curFileTLI, readSegNo),
                                                                readOff)));
                                goto next_record_is_invalid;
                        }
-                       len = XLOG_BLCKSZ - pageHeaderSize - SizeOfXLogContRecord;
-                       if (contrecord->xl_rem_len > len)
+                       len = XLOG_BLCKSZ - pageHeaderSize;
+                       if (pageHeader->xlp_rem_len > len)
                        {
-                               memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len);
+                               memcpy(buffer, (char *) contrecord, len);
                                gotlen += len;
                                buffer += len;
                                continue;
                        }
-                       memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord,
-                                  contrecord->xl_rem_len);
+                       memcpy(buffer, (char *) contrecord, pageHeader->xlp_rem_len);
                        break;
                }
                if (!RecordIsValid(record, *RecPtr, emode))
@@ -4000,8 +3998,7 @@ retry:
                pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
                XLogSegNoOffsetToRecPtr(
                        readSegNo,
-                       readOff + pageHeaderSize +
-                               MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len),
+                       readOff + pageHeaderSize + MAXALIGN(pageHeader->xlp_rem_len),
                        EndRecPtr);
                ReadRecPtr = *RecPtr;
                /* needn't worry about XLOG SWITCH, it can't cross page boundaries */
index 50b4c2f3782194cd180c77ac01b7a9a24a9b7216..c3fcc8a6a5af83782bf6dd118c4033429b8cb72a 100644 (file)
@@ -48,30 +48,10 @@ typedef struct BkpBlock
        /* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */
 } BkpBlock;
 
-/*
- * When there is not enough space on current page for whole record, we
- * continue on the next page with continuation record. (However, the
- * XLogRecord header will never be split across pages; if there's less than
- * SizeOfXLogRecord space left at the end of a page, we just waste it.)
- *
- * Note that xl_rem_len includes backup-block data; that is, it tracks
- * xl_tot_len not xl_len in the initial header.  Also note that the
- * continuation data isn't necessarily aligned.
- */
-typedef struct XLogContRecord
-{
-       uint32          xl_rem_len;             /* total len of remaining data for record */
-
-       /* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
-
-} XLogContRecord;
-
-#define SizeOfXLogContRecord   sizeof(XLogContRecord)
-
 /*
  * Each page of XLOG file has a header like this:
  */
-#define XLOG_PAGE_MAGIC 0xD072 /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD073 /* can be used as WAL version indicator */
 
 typedef struct XLogPageHeaderData
 {
@@ -79,6 +59,19 @@ typedef struct XLogPageHeaderData
        uint16          xlp_info;               /* flag bits, see below */
        TimeLineID      xlp_tli;                /* TimeLineID of first record on page */
        XLogRecPtr      xlp_pageaddr;   /* XLOG address of this page */
+
+       /*
+        * When there is not enough space on current page for whole record, we
+        * continue on the next page.  xlp_rem_len is the number of bytes
+        * remaining from a previous page. (However, the XLogRecord header will
+        * never be split across pages; if there's less than SizeOfXLogRecord
+        * space left at the end of a page, we just waste it.)
+        *
+        * Note that xl_rem_len includes backup-block data; that is, it tracks
+        * xl_tot_len not xl_len in the initial header.  Also note that the
+        * continuation data isn't necessarily aligned.
+        */
+       uint32          xlp_rem_len;    /* total len of remaining data for record */
 } XLogPageHeaderData;
 
 #define SizeOfXLogShortPHD     MAXALIGN(sizeof(XLogPageHeaderData))