heap' logging
authorVadim B. Mikheev <vadim4o@yahoo.com>
Mon, 3 Jul 2000 02:54:21 +0000 (02:54 +0000)
committerVadim B. Mikheev <vadim4o@yahoo.com>
Mon, 3 Jul 2000 02:54:21 +0000 (02:54 +0000)
src/backend/access/heap/heapam.c
src/backend/access/heap/hio.c
src/backend/storage/page/bufpage.c
src/include/access/hio.h
src/include/access/htup.h
src/include/access/xlog.h
src/include/storage/bufpage.h

index d671036f0499158aedae528de4d0c58799fa29c8..9f3a7ac7140b0539ae4e64a13adc25b613b2b48b 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.74 2000/07/02 22:00:27 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.75 2000/07/03 02:54:15 vadim Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1271,10 +1271,9 @@ heap_get_latest_tid(Relation relation,
 Oid
 heap_insert(Relation relation, HeapTuple tup)
 {
-       /* ----------------
-        *      increment access statistics
-        * ----------------
-        */
+       Buffer buffer;
+
+       /* increment access statistics */
        tup->tableOid = relation->rd_id;
        IncrHeapAccessStat(local_insert);
        IncrHeapAccessStat(global_insert);
@@ -1300,7 +1299,11 @@ heap_insert(Relation relation, HeapTuple tup)
        tup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
        tup->t_data->t_infomask |= HEAP_XMAX_INVALID;
 
-       RelationPutHeapTupleAtEnd(relation, tup);
+       /* Find buffer for this tuple */
+       buffer = RelationGetBufferForTuple(relation, tup->t_len, InvalidBuffer);
+
+       /* NO ELOG(ERROR) from here till changes are logged */
+       RelationPutHeapTuple(relation, buffer, tup);
 
 #ifdef XLOG
        /* XLOG stuff */
@@ -1308,7 +1311,8 @@ heap_insert(Relation relation, HeapTuple tup)
                xl_heap_insert  xlrec;
                xlrec.itid.dbId = relation->rd_lockInfo.lockRelId.dbId;
                xlrec.itid.relId = relation->rd_lockInfo.lockRelId.relId;
-XXX            xlrec.itid.tid = tp.t_self;
+               xlrec.itid.cid = GetCurrentCommandId();
+               xlrec.itid.tid = tup->t_self;
                xlrec.t_natts = tup->t_data->t_natts;
                xlrec.t_oid = tup->t_data->t_oid;
                xlrec.t_hoff = tup->t_data->t_hoff;
@@ -1319,10 +1323,14 @@ XXX             xlrec.itid.tid = tp.t_self;
                        (char*) tup->t_data + offsetof(HeapTupleHeaderData, tbits), 
                        tup->t_len - offsetof(HeapTupleHeaderData, tbits));
 
-               dp->pd_lsn = recptr;
+               ((PageHeader) BufferGetPage(buffer))->pd_lsn = recptr;
+               ((PageHeader) BufferGetPage(buffer))->pd_sui = ThisStartUpID;
        }
 #endif
 
+       LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+       WriteBuffer(buffer);
+
        if (IsSystemRelationName(RelationGetRelationName(relation)))
                RelationMark4RollbackHeapTuple(relation, tup);
 
@@ -1417,11 +1425,13 @@ l1:
                xl_heap_delete  xlrec;
                xlrec.dtid.dbId = relation->rd_lockInfo.lockRelId.dbId;
                xlrec.dtid.relId = relation->rd_lockInfo.lockRelId.relId;
+               xlrec.dtid.cid = GetCurrentCommandId();
                xlrec.dtid.tid = tp.t_self;
                XLogRecPtr recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE,
                        (char*) xlrec, sizeof(xlrec), NULL, 0);
 
                dp->pd_lsn = recptr;
+               dp->pd_sui = ThisStartUpID;
        }
 #endif
 
@@ -1451,7 +1461,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
        ItemId          lp;
        HeapTupleData oldtup;
        PageHeader      dp;
-       Buffer          buffer;
+       Buffer          buffer, newbuf;
        int                     result;
 
        newtup->tableOid = relation->rd_id;
@@ -1531,43 +1541,65 @@ l2:
        newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
        newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED);
 
-       /* logically delete old item */
+       /* Find buffer for new tuple */
+
+       if ((unsigned) MAXALIGN(newtup->t_len) <= PageGetFreeSpace((Page) dp))
+               newbuf = buffer;
+       else
+               newbuf = RelationGetBufferForTuple(relation, newtup->t_len, buffer);
+
+       /* NO ELOG(ERROR) from here till changes are logged */
+
+       /* insert new tuple */
+       RelationPutHeapTuple(relation, newbuf, newtup);
+
+       /* logically delete old tuple */
        TransactionIdStore(GetCurrentTransactionId(), &(oldtup.t_data->t_xmax));
        oldtup.t_data->t_cmax = GetCurrentCommandId();
        oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
                                                         HEAP_XMAX_INVALID | HEAP_MARKED_FOR_UPDATE);
 
-       /* insert new item */
-       if ((unsigned) MAXALIGN(newtup->t_len) <= PageGetFreeSpace((Page) dp))
-               RelationPutHeapTuple(relation, buffer, newtup);
-       else
+       /* record address of new tuple in t_ctid of old one */
+       oldtup.t_data->t_ctid = newtup->t_self;
+
+#ifdef XLOG
+       /* XLOG stuff */
        {
+               xl_heap_update  xlrec;
+               xlrec.dtid.dbId = relation->rd_lockInfo.lockRelId.dbId;
+               xlrec.dtid.relId = relation->rd_lockInfo.lockRelId.relId;
+               xlrec.dtid.cid = GetCurrentCommandId();
+               xlrec.itid.tid = newtup->t_self;
+               xlrec.t_natts = newtup->t_data->t_natts;
+               xlrec.t_hoff = newtup->t_data->t_hoff;
+               xlrec.mask = newtup->t_data->t_infomask;
+               
+               XLogRecPtr recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_UPDATE,
+                       (char*) xlrec, sizeof(xlrec), 
+                       (char*) newtup->t_data + offsetof(HeapTupleHeaderData, tbits), 
+                       newtup->t_len - offsetof(HeapTupleHeaderData, tbits));
 
-               /*
-                * New item won't fit on same page as old item, have to look for a
-                * new place to put it. Note that we have to unlock current buffer
-                * context - not good but RelationPutHeapTupleAtEnd uses extend
-                * lock.
-                */
-               LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-               RelationPutHeapTupleAtEnd(relation, newtup);
-               LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+               if (newbuf != buffer)
+               {
+                       ((PageHeader) BufferGetPage(newbuf))->pd_lsn = recptr;
+                       ((PageHeader) BufferGetPage(newbuf))->pd_sui = ThisStartUpID;
+               }
+               ((PageHeader) BufferGetPage(buffer))->pd_lsn = recptr;
+               ((PageHeader) BufferGetPage(buffer))->pd_sui = ThisStartUpID;
        }
-       /* mark for rollback caches */
-       RelationMark4RollbackHeapTuple(relation, newtup);
-
-       /*
-        * New item in place, now record address of new tuple in t_ctid of old
-        * one.
-        */
-       oldtup.t_data->t_ctid = newtup->t_self;
+#endif
 
+       if (newbuf != buffer)
+       {
+               LockBuffer(newbuf, BUFFER_LOCK_UNLOCK);
+               WriteBuffer(newbuf);
+       }
        LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+       WriteBuffer(buffer);
 
        /* invalidate caches */
        RelationInvalidateHeapTuple(relation, &oldtup);
-
-       WriteBuffer(buffer);
+       RelationMark4RollbackHeapTuple(relation, newtup);
 
        return HeapTupleMayBeUpdated;
 }
@@ -1648,6 +1680,14 @@ l3:
                return result;
        }
 
+#ifdef XLOG
+       /*
+        * XLOG stuff: no logging is required as long as we have no
+        * savepoints. For savepoints private log could be used...
+        */
+       ((PageHeader) BufferGetPage(*buffer))->pd_sui = ThisStartUpID;
+#endif
+
        /* store transaction information of xact marking the tuple */
        TransactionIdStore(GetCurrentTransactionId(), &(tuple->t_data->t_xmax));
        tuple->t_data->t_cmax = GetCurrentCommandId();
index 3fc2a69df1a3fe914c9ee30418bda41cbad593b0..9181a7984d7826be9b764dc70294449e7ea8f6e8 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Id: hio.c,v 1.31 2000/04/12 17:14:45 momjian Exp $
+ *       $Id: hio.c,v 1.32 2000/07/03 02:54:15 vadim Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "access/hio.h"
 
 /*
- * amputunique - place tuple at tid
- *      Currently on errors, calls elog.  Perhaps should return -1?
- *      Possible errors include the addition of a tuple to the page
- *      between the time the linep is chosen and the page is L_UP'd.
+ * RelationPutHeapTuple        - place tuple at specified page
  *
- *      This should be coordinated with the B-tree code.
- *      Probably needs to have an amdelunique to allow for
- *      internal index records to be deleted and reordered as needed.
- *      For the heap AM, this should never be needed.
+ * !!! ELOG(ERROR) IS DISALLOWED HERE !!!
  *
- *      Note - we assume that caller hold BUFFER_LOCK_EXCLUSIVE on the buffer.
+ * Note - we assume that caller hold BUFFER_LOCK_EXCLUSIVE on the buffer.
  *
  */
 void
@@ -57,62 +51,41 @@ RelationPutHeapTuple(Relation relation,
        offnum = PageAddItem((Page) pageHeader, (Item) tuple->t_data,
                                                 tuple->t_len, InvalidOffsetNumber, LP_USED);
 
+       if (offnum == InvalidOffsetNumber)
+               elog(STOP, "RelationPutHeapTuple: failed to add tuple");
+
        itemId = PageGetItemId((Page) pageHeader, offnum);
        item = PageGetItem((Page) pageHeader, itemId);
 
        ItemPointerSet(&((HeapTupleHeader) item)->t_ctid,
                                   BufferGetBlockNumber(buffer), offnum);
 
-       /*
-        * Let the caller do this!
-        *
-        * WriteBuffer(buffer);
-        */
-
        /* return an accurate tuple */
        ItemPointerSet(&tuple->t_self, BufferGetBlockNumber(buffer), offnum);
 }
 
 /*
- * This routine is another in the series of attempts to reduce the number
- * of I/O's and system calls executed in the various benchmarks.  In
- * particular, this routine is used to append data to the end of a relation
- * file without excessive lseeks.  This code should do no more than 2 semops
- * in the ideal case.
+ * RelationGetBufferForTuple
  *
- * Eventually, we should cache the number of blocks in a relation somewhere.
- * Until that time, this code will have to do an lseek to determine the number
- * of blocks in a relation.
+ * Returns (locked) buffer to add tuple with given len.
+ * If Ubuf is valid then no attempt to lock it should be made -
+ * this is for heap_update...
  *
- * This code should ideally do at most 4 semops, 1 lseek, and possibly 1 write
- * to do an append; it's possible to eliminate 2 of the semops if we do direct
- * buffer stuff (!); the lseek and the write can go if we get
- * RelationGetNumberOfBlocks to be useful.
+ * ELOG(ERROR) is allowed here, so this routine *must* be called
+ * before any (unlogged) changes are made in buffer pool.
  *
- * NOTE: This code presumes that we have a write lock on the relation.
- * Not now - we use extend locking...
- *
- * Also note that this routine probably shouldn't have to exist, and does
- * screw up the call graph rather badly, but we are wasting so much time and
- * system resources being massively general that we are losing badly in our
- * performance benchmarks.
  */
-void
-RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
+Buffer
+RelationGetBufferForTuple(Relation relation, Size len, Buffer Ubuf)
 {
        Buffer          buffer;
        Page            pageHeader;
        BlockNumber lastblock;
-       OffsetNumber offnum;
-       Size            len;
-       ItemId          itemId;
-       Item            item;
 
-       len = MAXALIGN(tuple->t_len);           /* be conservative */
+       len = MAXALIGN(len);            /* be conservative */
 
        /*
-        * If we're gonna fail for oversize tuple, do it right away... this
-        * code should go away eventually.
+        * If we're gonna fail for oversize tuple, do it right away
         */
        if (len > MaxTupleSize)
                elog(ERROR, "Tuple is too big: size %u, max size %ld",
@@ -152,7 +125,8 @@ RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
        else
                buffer = ReadBuffer(relation, lastblock - 1);
 
-       LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+       if (buffer != Ubuf)
+               LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
        pageHeader = (Page) BufferGetPage(buffer);
 
        /*
@@ -160,7 +134,8 @@ RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
         */
        if (len > PageGetFreeSpace(pageHeader))
        {
-               LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+               if (buffer != Ubuf)
+                       LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
                buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
                LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
                pageHeader = (Page) BufferGetPage(buffer);
@@ -168,36 +143,22 @@ RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
 
                if (len > PageGetFreeSpace(pageHeader))
                {
-
-                       /*
-                        * BUG: by elog'ing here, we leave the new buffer locked and
-                        * not marked dirty, which may result in an invalid page
-                        * header being left on disk.  But we should not get here
-                        * given the test at the top of the routine, and the whole
-                        * deal should go away when we implement tuple splitting
-                        * anyway...
-                        */
-                       elog(ERROR, "Tuple is too big: size %u", len);
+                       /* We should not get here given the test at the top */
+                       elog(STOP, "Tuple is too big: size %u", len);
                }
        }
+       /*
+        * Caller should check space in Ubuf but...
+        */
+       else if (buffer == Ubuf)
+       {
+               ReleaseBuffer(buffer);
+               buffer = Ubuf;
+       }
 
        if (!relation->rd_myxactonly)
                UnlockPage(relation, 0, ExclusiveLock);
 
-       offnum = PageAddItem((Page) pageHeader, (Item) tuple->t_data,
-                                                tuple->t_len, InvalidOffsetNumber, LP_USED);
-
-       itemId = PageGetItemId((Page) pageHeader, offnum);
-       item = PageGetItem((Page) pageHeader, itemId);
-
-       lastblock = BufferGetBlockNumber(buffer);
-
-       ItemPointerSet(&((HeapTupleHeader) item)->t_ctid, lastblock, offnum);
-
-       /* return an accurate tuple self-pointer */
-       ItemPointerSet(&tuple->t_self, lastblock, offnum);
-
-       LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-       WriteBuffer(buffer);
+       return(buffer);
 
 }
index f25faf180c39421a7f77d5040523db7956f535a4..43cabceba141bfa224ec9e6a4ed42bd492ba3db5 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/page/bufpage.c,v 1.29 2000/04/12 17:15:40 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/page/bufpage.c,v 1.30 2000/07/03 02:54:16 vadim Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -50,26 +50,13 @@ PageInit(Page page, Size pageSize, Size specialSize)
        PageSetPageSize(page, pageSize);
 }
 
-/*
- * PageAddItem
- *             Adds item to the given page.
- *
- * Note:
- *             This does not assume that the item resides on a single page.
- *             It is the responsiblity of the caller to act appropriately
- *             depending on this fact.  The "pskip" routines provide a
- *             friendlier interface, in this case.
- *
- *             This does change the status of any of the resources passed.
- *             The semantics may change in the future.
- *
- *             This routine should probably be combined with others?
- */
 /* ----------------
  *             PageAddItem
  *
  *             add an item to a page.
  *
+ *   !!! ELOG(ERROR) IS DISALLOWED HERE !!!
+ *
  *      Notes on interface:
  *             If offsetNumber is valid, shuffle ItemId's down to make room
  *             to use it, if PageManagerShuffle is true.  If PageManagerShuffle is
@@ -126,7 +113,7 @@ PageAddItem(Page page,
                        if (((*itemId).lp_flags & LP_USED) ||
                                ((*itemId).lp_len != 0))
                        {
-                               elog(ERROR, "PageAddItem: tried overwrite of used ItemId");
+                               elog(NOTICE, "PageAddItem: tried overwrite of used ItemId");
                                return InvalidOffsetNumber;
                        }
                }
index 999d2412e76725792bc09774b52396e5cf406186..c0636a4ff34f280dc3758a73ed525d6e2d945dd0 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: hio.h,v 1.14 2000/01/26 05:57:50 momjian Exp $
+ * $Id: hio.h,v 1.15 2000/07/03 02:54:17 vadim Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -18,6 +18,6 @@
 
 extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
                                         HeapTuple tuple);
-extern void RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple);
+extern Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer Ubuf);
 
 #endif  /* HIO_H */
index 3370960e2fd64298c949b4e11cb4e9eca232ed3a..4665fe60bb774d9019a4099dc0a1f11b619d1ef1 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: htup.h,v 1.31 2000/07/02 22:01:00 momjian Exp $
+ * $Id: htup.h,v 1.32 2000/07/03 02:54:17 vadim Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -69,22 +69,25 @@ typedef HeapTupleHeaderData *HeapTupleHeader;
 #define        XLOG_HEAP_MOVE          0x30
 
 /*
- * All what we need to find changed tuple (14 bytes)
+ * All what we need to find changed tuple (18 bytes)
  */
 typedef struct xl_heaptid
 {
        Oid                                     dbId;           /* database */
        Oid                                     relId;          /* relation */
+       CommandId                       cid;            /* this is for "better" tuple' */
+                                                                       /* identification - it allows to avoid */
+                                                                       /* "compensation" records for undo */
        ItemPointerData         tid;            /* changed tuple id */
 } xl_heaptid;
 
-/* This is what we need to know about delete - ALIGN(14) = 16 bytes */
+/* This is what we need to know about delete - ALIGN(18) = 24 bytes */
 typedef struct xl_heap_delete
 {
        xl_heaptid                      dtid;           /* deleted tuple id */
 } xl_heap_delete;
 
-/* This is what we need to know about insert - 22 + data */
+/* This is what we need to know about insert - 26 + data */
 typedef struct xl_heap_insert
 {
        xl_heaptid                      itid;           /* inserted tuple id */
@@ -108,7 +111,7 @@ typedef struct xl_heap_update
        /* NEW TUPLE DATA FOLLOWS AT END OF STRUCT */
 } xl_heap_update;
 
-/* This is what we need to know about tuple move - ALIGN(20) = 24 bytes */
+/* This is what we need to know about tuple move - 24 bytes */
 typedef struct xl_heap_move
 {
        xl_heaptid                      ftid;           /* moved from */
index b5fda0b58ad4c6dc448edccdd373a917c8049408..b86339f072f7bcdac52c90e09db70422679e9094 100644 (file)
@@ -68,6 +68,13 @@ typedef XLogPageHeaderData *XLogPageHeader;
 
 #define XLP_FIRST_IS_SUBRECORD 0x0001
 
+/*
+ * StartUpID (SUI) - system startups counter.
+ * It's to allow removing pg_log after shutdown.
+ */
+typedef        uint32          StartUpID;
+extern StartUpID       ThisStartUpID;
+
 extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, 
                        char *hdr, uint32 hdrlen,
                        char *buf, uint32 buflen);
index 15d1106f26c11271cb4c84882764be8bba7d12f8..30b5a93ad6497e001c26cf8d8d8be95dbb9bf22c 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: bufpage.h,v 1.29 2000/06/02 10:20:27 vadim Exp $
+ * $Id: bufpage.h,v 1.30 2000/07/03 02:54:21 vadim Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -118,8 +118,10 @@ typedef OpaqueData *Opaque;
 typedef struct PageHeaderData
 {
 #ifdef XLOG
-       XLogRecPtr      pd_lsn;                 /* XLOG: next byte after last byte of xlog */
+       XLogRecPtr      pd_lsn;                 /* LSN: next byte after last byte of xlog */
                                                                /* record for last change of this page */
+       StartUpID       pd_sui;                 /* SUI of last changes (currently it's */
+                                                               /* used by heap AM only) */
 #endif
        LocationIndex pd_lower;         /* offset to start of free space */
        LocationIndex pd_upper;         /* offset to end of free space */