Fix aboriginal mistake in lazy VACUUM's code for truncating away
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 16 Sep 2007 02:37:46 +0000 (02:37 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 16 Sep 2007 02:37:46 +0000 (02:37 +0000)
no-longer-needed pages at the end of a table.  We thought we could throw away
pages containing HEAPTUPLE_DEAD tuples; but this is not so, because such
tuples very likely have index entries pointing at them, and we wouldn't have
removed the index entries.  The problem only emerges in a somewhat unlikely
race condition: the dead tuples have to have been inserted by a transaction
that later aborted, and this has to have happened between VACUUM's initial
scan of the page and then rechecking it for empty in count_nondeletable_pages.
But that timespan will include an index-cleaning pass, so it's not all that
hard to hit.  This seems to explain a couple of previously unsolved bug
reports.

src/backend/commands/vacuumlazy.c

index 3dbd17de58dd3f981629a8d2e6f3111d31dc5185..3faf172acbf10a9a67424d2877a189e3af8800ec 100644 (file)
@@ -36,7 +36,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.95 2007/09/12 22:10:26 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.96 2007/09/16 02:37:46 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -784,9 +784,9 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
 
        /*
         * Scan backwards from the end to verify that the end pages actually
-        * contain nothing we need to keep.  This is *necessary*, not optional,
-        * because other backends could have added tuples to these pages whilst we
-        * were vacuuming.
+        * contain no tuples.  This is *necessary*, not optional, because other
+        * backends could have added tuples to these pages whilst we were
+        * vacuuming.
         */
        new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
 
@@ -846,7 +846,7 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
 }
 
 /*
- * Rescan end pages to verify that they are (still) empty of needed tuples.
+ * Rescan end pages to verify that they are (still) empty of tuples.
  *
  * Returns number of nondeletable pages (last nonempty page + 1).
  */
@@ -854,7 +854,6 @@ static BlockNumber
 count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
 {
        BlockNumber blkno;
-       HeapTupleData tuple;
 
        /* Strange coding of loop control is needed because blkno is unsigned */
        blkno = vacrelstats->rel_pages;
@@ -864,8 +863,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
                Page            page;
                OffsetNumber offnum,
                                        maxoff;
-               bool            tupgone,
-                                       hastup;
+               bool            hastup;
 
                /*
                 * We don't insert a vacuum delay point here, because we have an
@@ -901,42 +899,13 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
 
                        itemid = PageGetItemId(page, offnum);
 
-                       if (!ItemIdIsUsed(itemid))
-                               continue;
-
-                       tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
-                       tuple.t_len = ItemIdGetLength(itemid);
-                       ItemPointerSet(&(tuple.t_self), blkno, offnum);
-
-                       tupgone = false;
-
-                       switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
-                       {
-                               case HEAPTUPLE_DEAD:
-                                       tupgone = true;         /* we can delete the tuple */
-                                       break;
-                               case HEAPTUPLE_LIVE:
-                                       /* Shouldn't be necessary to re-freeze anything */
-                                       break;
-                               case HEAPTUPLE_RECENTLY_DEAD:
-
-                                       /*
-                                        * If tuple is recently deleted then we must not remove it
-                                        * from relation.
-                                        */
-                                       break;
-                               case HEAPTUPLE_INSERT_IN_PROGRESS:
-                                       /* This is an expected case during concurrent vacuum */
-                                       break;
-                               case HEAPTUPLE_DELETE_IN_PROGRESS:
-                                       /* This is an expected case during concurrent vacuum */
-                                       break;
-                               default:
-                                       elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
-                                       break;
-                       }
-
-                       if (!tupgone)
+                       /*
+                        * Note: any non-unused item should be taken as a reason to keep
+                        * this page.  We formerly thought that DEAD tuples could be
+                        * thrown away, but that's not so, because we'd not have cleaned
+                        * out their index entries.
+                        */
+                       if (ItemIdIsUsed(itemid))
                        {
                                hastup = true;
                                break;                  /* can stop scanning */
@@ -952,7 +921,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
 
        /*
         * If we fall out of the loop, all the previously-thought-to-be-empty
-        * pages really are; we need not bother to look at the last known-nonempty
+        * pages still are; we need not bother to look at the last known-nonempty
         * page.
         */
        return vacrelstats->nonempty_pages;