Set PD_ALL_VISIBLE and visibility map bits in COPY FREEZE
authorTomas Vondra <tomas.vondra@postgresql.org>
Sun, 17 Jan 2021 21:11:39 +0000 (22:11 +0100)
committerTomas Vondra <tomas.vondra@postgresql.org>
Sun, 17 Jan 2021 21:28:26 +0000 (22:28 +0100)
Make sure COPY FREEZE marks the pages as PD_ALL_VISIBLE and updates the
visibility map. Until now we only marked individual tuples as frozen,
but page-level flags were not updated, so the first VACUUM after the
COPY FREEZE had to rewrite the whole table.

This is a fairly old patch, and multiple people worked on it. The first
version was written by Jeff Janes, and then reworked by Pavan Deolasee
and Anastasia Lubennikova.

Author: Anastasia Lubennikova, Pavan Deolasee, Jeff Janes
Reviewed-by: Kuntal Ghosh, Jeff Janes, Tomas Vondra, Masahiko Sawada,
             Andres Freund, Ibrar Ahmed, Robert Haas, Tatsuro Ishii,
             Darafei Praliaskouski
Discussion: https://postgr.es/m/CABOikdN-ptGv0mZntrK2Q8OtfUuAjqaYMGmkdU1dCKFtUxVLrg@mail.gmail.com
Discussion: https://postgr.es/m/CAMkU%3D1w3osJJ2FneELhhNRLxfZitDgp9FPHee08NT2FQFmz_pQ%40mail.gmail.com

contrib/pg_visibility/expected/pg_visibility.out
contrib/pg_visibility/sql/pg_visibility.sql
src/backend/access/heap/heapam.c
src/backend/access/heap/hio.c
src/include/access/heapam_xlog.h

index ca4b6e186bcaf1001336be60e4a44f37a1ed0917..0017e3415c84bfd5f56f0627d261fc1d1f1a9422 100644 (file)
@@ -179,6 +179,69 @@ select pg_truncate_visibility_map('test_partition');
  
 (1 row)
 
+-- test copy freeze
+create table copyfreeze (a int, b char(1500));
+-- load all rows via COPY FREEZE and ensure that all pages are set all-visible
+-- and all-frozen.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+commit;
+select * from pg_visibility_map('copyfreeze');
+ blkno | all_visible | all_frozen 
+-------+-------------+------------
+     0 | t           | t
+     1 | t           | t
+     2 | t           | t
+(3 rows)
+
+select * from pg_check_frozen('copyfreeze');
+ t_ctid 
+--------
+(0 rows)
+
+-- load half the rows via regular COPY and rest via COPY FREEZE. The pages
+-- which are touched by regular COPY must not be set all-visible/all-frozen. On
+-- the other hand, pages allocated by COPY FREEZE should be marked
+-- all-frozen/all-visible.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin;
+copy copyfreeze from stdin freeze;
+commit;
+select * from pg_visibility_map('copyfreeze');
+ blkno | all_visible | all_frozen 
+-------+-------------+------------
+     0 | f           | f
+     1 | f           | f
+     2 | t           | t
+(3 rows)
+
+select * from pg_check_frozen('copyfreeze');
+ t_ctid 
+--------
+(0 rows)
+
+-- Try a mix of regular COPY and COPY FREEZE.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+copy copyfreeze from stdin;
+copy copyfreeze from stdin freeze;
+commit;
+select * from pg_visibility_map('copyfreeze');
+ blkno | all_visible | all_frozen 
+-------+-------------+------------
+     0 | t           | t
+     1 | f           | f
+     2 | t           | t
+(3 rows)
+
+select * from pg_check_frozen('copyfreeze');
+ t_ctid 
+--------
+(0 rows)
+
 -- cleanup
 drop table test_partitioned;
 drop view test_view;
@@ -188,3 +251,4 @@ drop server dummy_server;
 drop foreign data wrapper dummy;
 drop materialized view matview_visibility_test;
 drop table regular_table;
+drop table copyfreeze;
index f79b54480b701fd7a669ff12de00a264c9baf8fd..ec1afd490657bdbaed5eca4cc60a0d6ea45fa61e 100644 (file)
@@ -94,6 +94,82 @@ select count(*) > 0 from pg_visibility_map_summary('test_partition');
 select * from pg_check_frozen('test_partition'); -- hopefully none
 select pg_truncate_visibility_map('test_partition');
 
+-- test copy freeze
+create table copyfreeze (a int, b char(1500));
+
+-- load all rows via COPY FREEZE and ensure that all pages are set all-visible
+-- and all-frozen.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+1      '1'
+2      '2'
+3      '3'
+4      '4'
+5      '5'
+6      '6'
+7      '7'
+8      '8'
+9      '9'
+10     '10'
+11     '11'
+12     '12'
+\.
+commit;
+select * from pg_visibility_map('copyfreeze');
+select * from pg_check_frozen('copyfreeze');
+
+-- load half the rows via regular COPY and rest via COPY FREEZE. The pages
+-- which are touched by regular COPY must not be set all-visible/all-frozen. On
+-- the other hand, pages allocated by COPY FREEZE should be marked
+-- all-frozen/all-visible.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin;
+1      '1'
+2      '2'
+3      '3'
+4      '4'
+5      '5'
+6      '6'
+\.
+copy copyfreeze from stdin freeze;
+7      '7'
+8      '8'
+9      '9'
+10     '10'
+11     '11'
+12     '12'
+\.
+commit;
+select * from pg_visibility_map('copyfreeze');
+select * from pg_check_frozen('copyfreeze');
+
+-- Try a mix of regular COPY and COPY FREEZE.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+1      '1'
+2      '2'
+3      '3'
+4      '4'
+5      '5'
+\.
+copy copyfreeze from stdin;
+6      '6'
+\.
+copy copyfreeze from stdin freeze;
+7      '7'
+8      '8'
+9      '9'
+10     '10'
+11     '11'
+12     '12'
+\.
+commit;
+select * from pg_visibility_map('copyfreeze');
+select * from pg_check_frozen('copyfreeze');
+
 -- cleanup
 drop table test_partitioned;
 drop view test_view;
@@ -103,3 +179,4 @@ drop server dummy_server;
 drop foreign data wrapper dummy;
 drop materialized view matview_visibility_test;
 drop table regular_table;
+drop table copyfreeze;
index 5b9cfb26cf76f6b9b9aeec7a719808076fa9d001..faffbb18658d7a7b2fe1929686352271a3d52137 100644 (file)
@@ -2121,6 +2121,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
        int                     ndone;
        PGAlignedBlock scratch;
        Page            page;
+       Buffer          vmbuffer = InvalidBuffer;
        bool            needwal;
        Size            saveFreeSpace;
        bool            need_tuple_data = RelationIsLogicallyLogged(relation);
@@ -2175,8 +2176,9 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
        while (ndone < ntuples)
        {
                Buffer          buffer;
-               Buffer          vmbuffer = InvalidBuffer;
+               bool            starting_with_empty_page;
                bool            all_visible_cleared = false;
+               bool            all_frozen_set = false;
                int                     nthispage;
 
                CHECK_FOR_INTERRUPTS();
@@ -2184,12 +2186,20 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                /*
                 * Find buffer where at least the next tuple will fit.  If the page is
                 * all-visible, this will also pin the requisite visibility map page.
+                *
+                * Also pin visibility map page if COPY FREEZE inserts tuples into an
+                * empty page. See all_frozen_set below.
                 */
                buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
                                                                                   InvalidBuffer, options, bistate,
                                                                                   &vmbuffer, NULL);
                page = BufferGetPage(buffer);
 
+               starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
+
+               if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
+                       all_frozen_set = true;
+
                /* NO EREPORT(ERROR) from here till changes are logged */
                START_CRIT_SECTION();
 
@@ -2223,7 +2233,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                                log_heap_new_cid(relation, heaptup);
                }
 
-               if (PageIsAllVisible(page))
+               /*
+                * If the page is all visible, need to clear that, unless we're only
+                * going to add further frozen rows to it.
+                *
+                * If we're only adding already frozen rows to a previously empty
+                * page, mark it as all-visible.
+                */
+               if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
                {
                        all_visible_cleared = true;
                        PageClearAllVisible(page);
@@ -2231,6 +2248,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                                                                BufferGetBlockNumber(buffer),
                                                                vmbuffer, VISIBILITYMAP_VALID_BITS);
                }
+               else if (all_frozen_set)
+                       PageSetAllVisible(page);
 
                /*
                 * XXX Should we set PageSetPrunable on this page ? See heap_insert()
@@ -2254,8 +2273,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                         * If the page was previously empty, we can reinit the page
                         * instead of restoring the whole thing.
                         */
-                       init = (ItemPointerGetOffsetNumber(&(heaptuples[ndone]->t_self)) == FirstOffsetNumber &&
-                                       PageGetMaxOffsetNumber(page) == FirstOffsetNumber + nthispage - 1);
+                       init = starting_with_empty_page;
 
                        /* allocate xl_heap_multi_insert struct from the scratch area */
                        xlrec = (xl_heap_multi_insert *) scratchptr;
@@ -2273,7 +2291,15 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                        /* the rest of the scratch space is used for tuple data */
                        tupledata = scratchptr;
 
-                       xlrec->flags = all_visible_cleared ? XLH_INSERT_ALL_VISIBLE_CLEARED : 0;
+                       /* check that the mutually exclusive flags are not both set */
+                       Assert (!(all_visible_cleared && all_frozen_set));
+
+                       xlrec->flags = 0;
+                       if (all_visible_cleared)
+                               xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED;
+                       if (all_frozen_set)
+                               xlrec->flags = XLH_INSERT_ALL_FROZEN_SET;
+
                        xlrec->ntuples = nthispage;
 
                        /*
@@ -2347,13 +2373,40 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 
                END_CRIT_SECTION();
 
-               UnlockReleaseBuffer(buffer);
-               if (vmbuffer != InvalidBuffer)
-                       ReleaseBuffer(vmbuffer);
+               /*
+                * If we've frozen everything on the page, update the visibilitymap.
+                * We're already holding pin on the vmbuffer.
+                */
+               if (all_frozen_set)
+               {
+                       Assert(PageIsAllVisible(page));
+                       Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
 
+                       /*
+                        * It's fine to use InvalidTransactionId here - this is only used
+                        * when HEAP_INSERT_FROZEN is specified, which intentionally
+                        * violates visibility rules.
+                        */
+                       visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
+                                                         InvalidXLogRecPtr, vmbuffer,
+                                                         InvalidTransactionId,
+                                                         VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
+               }
+
+               UnlockReleaseBuffer(buffer);
                ndone += nthispage;
+
+               /*
+                * NB: Only release vmbuffer after inserting all tuples - it's fairly
+                * likely that we'll insert into subsequent heap pages that are likely
+                * to use the same vm page.
+                */
        }
 
+       /* We're done with inserting all tuples, so release the last vmbuffer. */
+       if (vmbuffer != InvalidBuffer)
+               ReleaseBuffer(vmbuffer);
+
        /*
         * We're done with the actual inserts.  Check for conflicts again, to
         * ensure that all rw-conflicts in to these inserts are detected.  Without
@@ -8725,6 +8778,10 @@ heap_xlog_insert(XLogReaderState *record)
                if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
                        PageClearAllVisible(page);
 
+               /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
+               if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
+                       PageSetAllVisible(page);
+
                MarkBufferDirty(buffer);
        }
        if (BufferIsValid(buffer))
@@ -8775,6 +8832,10 @@ heap_xlog_multi_insert(XLogReaderState *record)
 
        XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
 
+       /* check that the mutually exclusive flags are not both set */
+       Assert (!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
+                         (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
+
        /*
         * The visibility map may need to be fixed even if the heap page is
         * already up-to-date.
index fac3b8e9ff28731e99f861f7048bc2718df96b26..2d23b3ef7194516d5494e62e0aae595f67da6b45 100644 (file)
@@ -433,6 +433,14 @@ loop:
                        buffer = ReadBufferBI(relation, targetBlock, RBM_NORMAL, bistate);
                        if (PageIsAllVisible(BufferGetPage(buffer)))
                                visibilitymap_pin(relation, targetBlock, vmbuffer);
+
+                       /*
+                        * If the page is empty, pin vmbuffer to set all_frozen bit later.
+                        */
+                       if ((options & HEAP_INSERT_FROZEN) &&
+                               (PageGetMaxOffsetNumber(BufferGetPage(buffer)) == 0))
+                               visibilitymap_pin(relation, targetBlock, vmbuffer);
+
                        LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
                }
                else if (otherBlock == targetBlock)
@@ -619,6 +627,15 @@ loop:
        PageInit(page, BufferGetPageSize(buffer), 0);
        MarkBufferDirty(buffer);
 
+       /*
+        * The page is empty, pin vmbuffer to set all_frozen bit.
+        */
+       if (options & HEAP_INSERT_FROZEN)
+       {
+               Assert(PageGetMaxOffsetNumber(BufferGetPage(buffer)) == 0);
+               visibilitymap_pin(relation, BufferGetBlockNumber(buffer), vmbuffer);
+       }
+
        /*
         * Release the file-extension lock; it's now OK for someone else to extend
         * the relation some more.
index 51586b883d373acd2cde670502a723355d8f16a3..178d49710a553fe37608d3b09c7a5985c9b469f4 100644 (file)
@@ -69,6 +69,9 @@
 #define XLH_INSERT_CONTAINS_NEW_TUPLE                  (1<<3)
 #define XLH_INSERT_ON_TOAST_RELATION                   (1<<4)
 
+/* all_frozen_set always implies all_visible_set */
+#define XLH_INSERT_ALL_FROZEN_SET                              (1<<5)
+
 /*
  * xl_heap_update flag values, 8 bits are available.
  */