Measure the number of all-visible pages for use in index-only scan costing.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 14 Oct 2011 21:23:01 +0000 (17:23 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 14 Oct 2011 21:23:46 +0000 (17:23 -0400)
Add a column pg_class.relallvisible to remember the number of pages that
were all-visible according to the visibility map as of the last VACUUM
(or ANALYZE, or some other operations that update pg_class.relpages).
Use relallvisible/relpages, instead of an arbitrary constant, to estimate
how many heap page fetches can be avoided during an index-only scan.

This is pretty primitive and will no doubt see refinements once we've
acquired more field experience with the index-only scan mechanism, but
it's way better than using a constant.

Note: I had to adjust an underspecified query in the window.sql regression
test, because it was changing answers when the plan changed to use an
index-only scan.  Some of the adjacent tests perhaps should be adjusted
as well, but I didn't do that here.

22 files changed:
doc/src/sgml/catalogs.sgml
src/backend/access/hash/hash.c
src/backend/access/heap/visibilitymap.c
src/backend/catalog/heap.c
src/backend/catalog/index.c
src/backend/commands/analyze.c
src/backend/commands/cluster.c
src/backend/commands/vacuum.c
src/backend/commands/vacuumlazy.c
src/backend/nodes/outfuncs.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/util/plancat.c
src/backend/optimizer/util/relnode.c
src/backend/utils/cache/relcache.c
src/include/access/visibilitymap.h
src/include/catalog/catversion.h
src/include/catalog/pg_class.h
src/include/commands/vacuum.h
src/include/nodes/relation.h
src/include/optimizer/plancat.h
src/test/regress/expected/window.out
src/test/regress/sql/window.sql

index 0495bd03bd5ab3cae67cb41a648a8833b1a3dc4f..e830c5f3d44c8e3c5adb1163b7bd2d750f77a638 100644 (file)
       </entry>
      </row>
 
+     <row>
+      <entry><structfield>relallvisible</structfield></entry>
+      <entry><type>int4</type></entry>
+      <entry></entry>
+      <entry>
+       Number of pages that are marked all-visible in the table's
+       visibility map.  This is only an estimate used by the
+       planner.  It is updated by <command>VACUUM</command>,
+       <command>ANALYZE</command>, and a few DDL commands such as
+       <command>CREATE INDEX</command>.
+      </entry>
+     </row>
+
      <row>
       <entry><structfield>reltoastrelid</structfield></entry>
       <entry><type>oid</type></entry>
index bfb10897d20e6a260cdb5807d9b7f1d68faa547b..770b3ef76d91d8e0da451d15beefd29cfcbf1644 100644 (file)
@@ -55,6 +55,7 @@ hashbuild(PG_FUNCTION_ARGS)
        IndexBuildResult *result;
        BlockNumber relpages;
        double          reltuples;
+       double          allvisfrac;
        uint32          num_buckets;
        HashBuildState buildstate;
 
@@ -67,7 +68,7 @@ hashbuild(PG_FUNCTION_ARGS)
                         RelationGetRelationName(index));
 
        /* Estimate the number of rows currently present in the table */
-       estimate_rel_size(heap, NULL, &relpages, &reltuples);
+       estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
 
        /* Initialize the hash index metadata page and initial buckets */
        num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM);
index 5a0511f1988cf89cf734fbe2d71bab15f2eaa4b4..919e8de042602b21c3b3da6b6771097e1a20ae02 100644 (file)
@@ -16,6 +16,8 @@
  *             visibilitymap_pin_ok - check whether correct map page is already pinned
  *             visibilitymap_set        - set a bit in a previously pinned page
  *             visibilitymap_test       - test if a bit is set
+ *             visibilitymap_count      - count number of bits set in visibility map
+ *             visibilitymap_truncate  - truncate the visibility map
  *
  * NOTES
  *
 #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
 #define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE)
 
+/* table for fast counting of set bits */
+static const uint8 number_of_ones[256] = {
+       0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+       4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+};
+
 /* prototypes for internal routines */
 static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
 static void vm_extend(Relation rel, BlockNumber nvmblocks);
@@ -307,6 +329,52 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
        return result;
 }
 
+/*
+ *     visibilitymap_count      - count number of bits set in visibility map
+ *
+ * Note: we ignore the possibility of race conditions when the table is being
+ * extended concurrently with the call.  New pages added to the table aren't
+ * going to be marked all-visible, so they won't affect the result.
+ */
+BlockNumber
+visibilitymap_count(Relation rel)
+{
+       BlockNumber result = 0;
+       BlockNumber mapBlock;
+
+       for (mapBlock = 0; ; mapBlock++)
+       {
+               Buffer          mapBuffer;
+               unsigned char *map;
+               int                     i;
+
+               /*
+                * Read till we fall off the end of the map.  We assume that any
+                * extra bytes in the last page are zeroed, so we don't bother
+                * excluding them from the count.
+                */
+               mapBuffer = vm_readbuf(rel, mapBlock, false);
+               if (!BufferIsValid(mapBuffer))
+                       break;
+
+               /*
+                * We choose not to lock the page, since the result is going to be
+                * immediately stale anyway if anyone is concurrently setting or
+                * clearing bits, and we only really need an approximate value.
+                */
+               map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer));
+
+               for (i = 0; i < MAPSIZE; i++)
+               {
+                       result += number_of_ones[map[i]];
+               }
+
+               ReleaseBuffer(mapBuffer);
+       }
+
+       return result;
+}
+
 /*
  *     visibilitymap_truncate - truncate the visibility map
  *
index 2aaf77523fe1b4e86288905a8b07fb65c86a4d6c..e11d896ec8cd3a15f33393cf7f2b517520860890 100644 (file)
@@ -772,6 +772,7 @@ InsertPgClassTuple(Relation pg_class_desc,
        values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace);
        values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages);
        values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples);
+       values[Anum_pg_class_relallvisible - 1] = Int32GetDatum(rd_rel->relallvisible);
        values[Anum_pg_class_reltoastrelid - 1] = ObjectIdGetDatum(rd_rel->reltoastrelid);
        values[Anum_pg_class_reltoastidxid - 1] = ObjectIdGetDatum(rd_rel->reltoastidxid);
        values[Anum_pg_class_relhasindex - 1] = BoolGetDatum(rd_rel->relhasindex);
@@ -845,16 +846,19 @@ AddNewRelationTuple(Relation pg_class_desc,
                        /* The relation is real, but as yet empty */
                        new_rel_reltup->relpages = 0;
                        new_rel_reltup->reltuples = 0;
+                       new_rel_reltup->relallvisible = 0;
                        break;
                case RELKIND_SEQUENCE:
                        /* Sequences always have a known size */
                        new_rel_reltup->relpages = 1;
                        new_rel_reltup->reltuples = 1;
+                       new_rel_reltup->relallvisible = 0;
                        break;
                default:
                        /* Views, etc, have no disk storage */
                        new_rel_reltup->relpages = 0;
                        new_rel_reltup->reltuples = 0;
+                       new_rel_reltup->relallvisible = 0;
                        break;
        }
 
index 67ade8f5e9da79eada944ef4120b468a8fc90f4e..99e130c1b0d9fea27e7de86225f646907d144601 100644 (file)
@@ -26,6 +26,7 @@
 #include "access/relscan.h"
 #include "access/sysattr.h"
 #include "access/transam.h"
+#include "access/visibilitymap.h"
 #include "access/xact.h"
 #include "bootstrap/bootstrap.h"
 #include "catalog/catalog.h"
@@ -1059,7 +1060,7 @@ index_create(Relation heapRelation,
                                                   true,
                                                   isprimary,
                                                   InvalidOid,
-                                                  heapRelation->rd_rel->reltuples);
+                                                  -1.0);
                /* Make the above update visible */
                CommandCounterIncrement();
        }
@@ -1225,7 +1226,7 @@ index_constraint_create(Relation heapRelation,
                                                   true,
                                                   true,
                                                   InvalidOid,
-                                                  heapRelation->rd_rel->reltuples);
+                                                  -1.0);
 
        /*
         * If needed, mark the index as primary and/or deferred in pg_index.
@@ -1533,9 +1534,10 @@ FormIndexDatum(IndexInfo *indexInfo,
  * isprimary: if true, set relhaspkey true; else no change
  * reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
  *             else no change
- * reltuples: set reltuples to this value
+ * reltuples: if >= 0, set reltuples to this value; else no change
  *
- * relpages is also updated (using RelationGetNumberOfBlocks()).
+ * If reltuples >= 0, relpages and relallvisible are also updated (using
+ * RelationGetNumberOfBlocks() and visibilitymap_count()).
  *
  * NOTE: an important side-effect of this operation is that an SI invalidation
  * message is sent out to all backends --- including me --- causing relcache
@@ -1550,7 +1552,6 @@ index_update_stats(Relation rel,
                                   bool hasindex, bool isprimary,
                                   Oid reltoastidxid, double reltuples)
 {
-       BlockNumber relpages = RelationGetNumberOfBlocks(rel);
        Oid                     relid = RelationGetRelid(rel);
        Relation        pg_class;
        HeapTuple       tuple;
@@ -1586,9 +1587,11 @@ index_update_stats(Relation rel,
         * It is safe to use a non-transactional update even though our
         * transaction could still fail before committing.      Setting relhasindex
         * true is safe even if there are no indexes (VACUUM will eventually fix
-        * it), likewise for relhaspkey.  And of course the relpages and reltuples
-        * counts are correct (or at least more so than the old values)
-        * regardless.
+        * it), likewise for relhaspkey.  And of course the new relpages and
+        * reltuples counts are correct regardless.  However, we don't want to
+        * change relpages (or relallvisible) if the caller isn't providing an
+        * updated reltuples count, because that would bollix the
+        * reltuples/relpages ratio which is what's really important.
         */
 
        pg_class = heap_open(RelationRelationId, RowExclusiveLock);
@@ -1650,15 +1653,32 @@ index_update_stats(Relation rel,
                        dirty = true;
                }
        }
-       if (rd_rel->reltuples != (float4) reltuples)
-       {
-               rd_rel->reltuples = (float4) reltuples;
-               dirty = true;
-       }
-       if (rd_rel->relpages != (int32) relpages)
+
+       if (reltuples >= 0)
        {
-               rd_rel->relpages = (int32) relpages;
-               dirty = true;
+               BlockNumber relpages = RelationGetNumberOfBlocks(rel);
+               BlockNumber relallvisible;
+
+               if (rd_rel->relkind != RELKIND_INDEX)
+                       relallvisible = visibilitymap_count(rel);
+               else                                    /* don't bother for indexes */
+                       relallvisible = 0;
+
+               if (rd_rel->relpages != (int32) relpages)
+               {
+                       rd_rel->relpages = (int32) relpages;
+                       dirty = true;
+               }
+               if (rd_rel->reltuples != (float4) reltuples)
+               {
+                       rd_rel->reltuples = (float4) reltuples;
+                       dirty = true;
+               }
+               if (rd_rel->relallvisible != (int32) relallvisible)
+               {
+                       rd_rel->relallvisible = (int32) relallvisible;
+                       dirty = true;
+               }
        }
 
        /*
index 18d44c572c755daea2f5ecaf7df060ab43e090ce..32985a4a0a04f7e55ee517b14158f551b1d5c520 100644 (file)
@@ -19,6 +19,7 @@
 #include "access/transam.h"
 #include "access/tupconvert.h"
 #include "access/tuptoaster.h"
+#include "access/visibilitymap.h"
 #include "access/xact.h"
 #include "catalog/index.h"
 #include "catalog/indexing.h"
@@ -534,7 +535,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
        if (!inh)
                vac_update_relstats(onerel,
                                                        RelationGetNumberOfBlocks(onerel),
-                                                       totalrows, hasindex, InvalidTransactionId);
+                                                       totalrows,
+                                                       visibilitymap_count(onerel),
+                                                       hasindex,
+                                                       InvalidTransactionId);
 
        /*
         * Same for indexes. Vacuum always scans all indexes, so if we're part of
@@ -551,7 +555,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
                        totalindexrows = ceil(thisdata->tupleFract * totalrows);
                        vac_update_relstats(Irel[ind],
                                                                RelationGetNumberOfBlocks(Irel[ind]),
-                                                               totalindexrows, false, InvalidTransactionId);
+                                                               totalindexrows,
+                                                               0,
+                                                               false,
+                                                               InvalidTransactionId);
                }
        }
 
index 8200d2095a74594a53acbda590a24381d62e060f..edec44d2c3d6e5d59cedf63a8083b1e7beb815fb 100644 (file)
@@ -1205,6 +1205,7 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
        {
                int4            swap_pages;
                float4          swap_tuples;
+               int4            swap_allvisible;
 
                swap_pages = relform1->relpages;
                relform1->relpages = relform2->relpages;
@@ -1213,6 +1214,10 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
                swap_tuples = relform1->reltuples;
                relform1->reltuples = relform2->reltuples;
                relform2->reltuples = swap_tuples;
+
+               swap_allvisible = relform1->relallvisible;
+               relform1->relallvisible = relform2->relallvisible;
+               relform2->relallvisible = swap_allvisible;
        }
 
        /*
index 7fe787ecb74f72caac9f6e4fa515ba9a0000d8a6..f42504cf9fd5ad8187127abce9da589eefd999cf 100644 (file)
@@ -569,6 +569,7 @@ vac_estimate_reltuples(Relation relation, bool is_analyze,
 void
 vac_update_relstats(Relation relation,
                                        BlockNumber num_pages, double num_tuples,
+                                       BlockNumber num_all_visible_pages,
                                        bool hasindex, TransactionId frozenxid)
 {
        Oid                     relid = RelationGetRelid(relation);
@@ -599,6 +600,11 @@ vac_update_relstats(Relation relation,
                pgcform->reltuples = (float4) num_tuples;
                dirty = true;
        }
+       if (pgcform->relallvisible != (int32) num_all_visible_pages)
+       {
+               pgcform->relallvisible = (int32) num_all_visible_pages;
+               dirty = true;
+       }
        if (pgcform->relhasindex != hasindex)
        {
                pgcform->relhasindex = hasindex;
index cf8337b9e5d9e137c83665e745e7969cee3185a8..b197b45c127b5816f2e2feaa18fb3dd3fd1eeaf3 100644 (file)
@@ -158,6 +158,7 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
        TransactionId freezeTableLimit;
        BlockNumber new_rel_pages;
        double          new_rel_tuples;
+       BlockNumber new_rel_allvisible;
        TransactionId new_frozen_xid;
 
        /* measure elapsed time iff autovacuum logging requires it */
@@ -222,6 +223,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
         * density") with nonzero relpages and reltuples=0 (which means "zero
         * tuple density") unless there's some actual evidence for the latter.
         *
+        * We do update relallvisible even in the corner case, since if the
+        * table is all-visible we'd definitely like to know that.  But clamp
+        * the value to be not more than what we're setting relpages to.
+        *
         * Also, don't change relfrozenxid if we skipped any pages, since then
         * we don't know for certain that all tuples have a newer xmin.
         */
@@ -233,12 +238,18 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
                new_rel_tuples = vacrelstats->old_rel_tuples;
        }
 
+       new_rel_allvisible = visibilitymap_count(onerel);
+       if (new_rel_allvisible > new_rel_pages)
+               new_rel_allvisible = new_rel_pages;
+
        new_frozen_xid = FreezeLimit;
        if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
                new_frozen_xid = InvalidTransactionId;
 
        vac_update_relstats(onerel,
-                                               new_rel_pages, new_rel_tuples,
+                                               new_rel_pages,
+                                               new_rel_tuples,
+                                               new_rel_allvisible,
                                                vacrelstats->hasindex,
                                                new_frozen_xid);
 
@@ -1063,8 +1074,11 @@ lazy_cleanup_index(Relation indrel,
         */
        if (!stats->estimated_count)
                vac_update_relstats(indrel,
-                                                       stats->num_pages, stats->num_index_tuples,
-                                                       false, InvalidTransactionId);
+                                                       stats->num_pages,
+                                                       stats->num_index_tuples,
+                                                       0,
+                                                       false,
+                                                       InvalidTransactionId);
 
        ereport(elevel,
                        (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
index eba3d6d57970518cf42032fea4737490ff99802a..98a02b27ddfd7e05e89f4ece181ced5aa60b3f83 100644 (file)
@@ -1743,6 +1743,7 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node)
        WRITE_NODE_FIELD(indexlist);
        WRITE_UINT_FIELD(pages);
        WRITE_FLOAT_FIELD(tuples, "%.0f");
+       WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
        WRITE_NODE_FIELD(subplan);
        WRITE_NODE_FIELD(subroot);
        WRITE_NODE_FIELD(baserestrictinfo);
index 45c5524d3098feff130e2e7486717c5a5c57bb82..f821b508d6c888870b30eb8891a02aea1241b63b 100644 (file)
@@ -120,9 +120,6 @@ bool                enable_material = true;
 bool           enable_mergejoin = true;
 bool           enable_hashjoin = true;
 
-/* Possibly this should become a GUC too */
-static double visibility_fraction = 0.9;
-
 typedef struct
 {
        PlannerInfo *root;
@@ -324,9 +321,10 @@ cost_index(IndexPath *path, PlannerInfo *root,
         *
         * If it's an index-only scan, then we will not need to fetch any heap
         * pages for which the visibility map shows all tuples are visible.
-        * Unfortunately, we have no stats as to how much of the heap is
-        * all-visible, and that's likely to be a rather unstable number anyway.
-        * We use an arbitrary constant visibility_fraction to estimate this.
+        * Hence, reduce the estimated number of heap fetches accordingly.
+        * We use the measured fraction of the entire heap that is all-visible,
+        * which might not be particularly relevant to the subset of the heap
+        * that this query will fetch; but it's not clear how to do better.
         *----------
         */
        if (outer_rel != NULL && outer_rel->rows > 1)
@@ -347,7 +345,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
                                                                                        root);
 
                if (indexonly)
-                       pages_fetched = ceil(pages_fetched * visibility_fraction);
+                       pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
 
                max_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
 
@@ -369,7 +367,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
                                                                                        root);
 
                if (indexonly)
-                       pages_fetched = ceil(pages_fetched * visibility_fraction);
+                       pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
 
                min_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
        }
@@ -385,7 +383,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
                                                                                        root);
 
                if (indexonly)
-                       pages_fetched = ceil(pages_fetched * visibility_fraction);
+                       pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
 
                /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
                max_IO_cost = pages_fetched * spc_random_page_cost;
@@ -394,7 +392,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
                pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
 
                if (indexonly)
-                       pages_fetched = ceil(pages_fetched * visibility_fraction);
+                       pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
 
                min_IO_cost = spc_random_page_cost;
                if (pages_fetched > 1)
index 0b3675f1461bd928416d1e749aef5e23550eda7b..aa436004f89894c3c8e723a1a1d71168f9dec625 100644 (file)
@@ -116,7 +116,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
         */
        if (!inhparent)
                estimate_rel_size(relation, rel->attr_widths - rel->min_attr,
-                                                 &rel->pages, &rel->tuples);
+                                                 &rel->pages, &rel->tuples, &rel->allvisfrac);
 
        /*
         * Make list of indexes.  Ignore indexes on system catalogs if told to.
@@ -339,8 +339,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
                        }
                        else
                        {
+                               double          allvisfrac;                             /* dummy */
+
                                estimate_rel_size(indexRelation, NULL,
-                                                                 &info->pages, &info->tuples);
+                                                                 &info->pages, &info->tuples, &allvisfrac);
                                if (info->tuples > rel->tuples)
                                        info->tuples = rel->tuples;
                        }
@@ -369,17 +371,21 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 /*
  * estimate_rel_size - estimate # pages and # tuples in a table or index
  *
+ * We also estimate the fraction of the pages that are marked all-visible in
+ * the visibility map, for use in estimation of index-only scans.
+ *
  * If attr_widths isn't NULL, it points to the zero-index entry of the
  * relation's attr_widths[] cache; we fill this in if we have need to compute
  * the attribute widths for estimation purposes.
  */
 void
 estimate_rel_size(Relation rel, int32 *attr_widths,
-                                 BlockNumber *pages, double *tuples)
+                                 BlockNumber *pages, double *tuples, double *allvisfrac)
 {
        BlockNumber curpages;
        BlockNumber relpages;
        double          reltuples;
+       BlockNumber relallvisible;
        double          density;
 
        switch (rel->rd_rel->relkind)
@@ -432,11 +438,13 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
                        if (curpages == 0)
                        {
                                *tuples = 0;
+                               *allvisfrac = 0;
                                break;
                        }
                        /* coerce values in pg_class to more desirable types */
                        relpages = (BlockNumber) rel->rd_rel->relpages;
                        reltuples = (double) rel->rd_rel->reltuples;
+                       relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
 
                        /*
                         * If it's an index, discount the metapage while estimating the
@@ -480,21 +488,37 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
                                density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width;
                        }
                        *tuples = rint(density * (double) curpages);
+
+                       /*
+                        * We use relallvisible as-is, rather than scaling it up like we
+                        * do for the pages and tuples counts, on the theory that any
+                        * pages added since the last VACUUM are most likely not marked
+                        * all-visible.  But costsize.c wants it converted to a fraction.
+                        */
+                       if (relallvisible == 0 || curpages <= 0)
+                               *allvisfrac = 0;
+                       else if ((double) relallvisible >= curpages)
+                               *allvisfrac = 1;
+                       else
+                               *allvisfrac = (double) relallvisible / curpages;
                        break;
                case RELKIND_SEQUENCE:
                        /* Sequences always have a known size */
                        *pages = 1;
                        *tuples = 1;
+                       *allvisfrac = 0;
                        break;
                case RELKIND_FOREIGN_TABLE:
                        /* Just use whatever's in pg_class */
                        *pages = rel->rd_rel->relpages;
                        *tuples = rel->rd_rel->reltuples;
+                       *allvisfrac = 0;
                        break;
                default:
                        /* else it has no disk storage; probably shouldn't get here? */
                        *pages = 0;
                        *tuples = 0;
+                       *allvisfrac = 0;
                        break;
        }
 }
index 1df727d9fc80b0841c0d42c5d0dce4700b989ccd..37187e20735dabcfa07e8b5a3afcbc48ca804f00 100644 (file)
@@ -109,6 +109,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
        rel->indexlist = NIL;
        rel->pages = 0;
        rel->tuples = 0;
+       rel->allvisfrac = 0;
        rel->subplan = NULL;
        rel->subroot = NULL;
        rel->baserestrictinfo = NIL;
@@ -362,6 +363,7 @@ build_join_rel(PlannerInfo *root,
        joinrel->indexlist = NIL;
        joinrel->pages = 0;
        joinrel->tuples = 0;
+       joinrel->allvisfrac = 0;
        joinrel->subplan = NULL;
        joinrel->subroot = NULL;
        joinrel->baserestrictinfo = NIL;
index 9f6b12707bfed4eb59186465b50a298d784b5931..603e4c1b621ba672a1f1b1088e78f83cf18f4bd5 100644 (file)
@@ -1414,6 +1414,7 @@ formrdesc(const char *relationName, Oid relationReltype,
 
        relation->rd_rel->relpages = 0;
        relation->rd_rel->reltuples = 0;
+       relation->rd_rel->relallvisible = 0;
        relation->rd_rel->relkind = RELKIND_RELATION;
        relation->rd_rel->relhasoids = hasoids;
        relation->rd_rel->relnatts = (int16) natts;
@@ -2668,6 +2669,7 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid)
        {
                classform->relpages = 0;        /* it's empty until further notice */
                classform->reltuples = 0;
+               classform->relallvisible = 0;
        }
        classform->relfrozenxid = freezeXid;
 
index 7d62c126407f4112290e92745f6c085fae20890e..4e5c0a0e4ed5ac1fac072c3e657182923fe3a93d 100644 (file)
@@ -27,6 +27,7 @@ extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
 extern void visibilitymap_set(Relation rel, BlockNumber heapBlk,
                                  XLogRecPtr recptr, Buffer vmbuf);
 extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
-extern void visibilitymap_truncate(Relation rel, BlockNumber heapblk);
+extern BlockNumber visibilitymap_count(Relation rel);
+extern void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks);
 
 #endif   /* VISIBILITYMAP_H */
index e4eb7b1294f0b80d44886fef6e91abab6871412f..8fff3675ef2b601e5ded0e41a0a3da587bc2bdde 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201110071
+#define CATALOG_VERSION_NO     201110141
 
 #endif
index e00618026e40f233b9b67594e505f54cf01727da..06120e481e839221f01c23bdd231683046a6a32d 100644 (file)
@@ -45,6 +45,8 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
        Oid                     reltablespace;  /* identifier of table space for relation */
        int4            relpages;               /* # of blocks (not always up-to-date) */
        float4          reltuples;              /* # of tuples (not always up-to-date) */
+       int4            relallvisible;  /* # of all-visible blocks (not always
+                                                                * up-to-date) */
        Oid                     reltoastrelid;  /* OID of toast table; 0 if none */
        Oid                     reltoastidxid;  /* if toast table, OID of chunk_id index */
        bool            relhasindex;    /* T if has (or has had) any indexes */
@@ -92,7 +94,7 @@ typedef FormData_pg_class *Form_pg_class;
  * ----------------
  */
 
-#define Natts_pg_class                                 26
+#define Natts_pg_class                                 27
 #define Anum_pg_class_relname                  1
 #define Anum_pg_class_relnamespace             2
 #define Anum_pg_class_reltype                  3
@@ -103,22 +105,23 @@ typedef FormData_pg_class *Form_pg_class;
 #define Anum_pg_class_reltablespace            8
 #define Anum_pg_class_relpages                 9
 #define Anum_pg_class_reltuples                        10
-#define Anum_pg_class_reltoastrelid            11
-#define Anum_pg_class_reltoastidxid            12
-#define Anum_pg_class_relhasindex              13
-#define Anum_pg_class_relisshared              14
-#define Anum_pg_class_relpersistence   15
-#define Anum_pg_class_relkind                  16
-#define Anum_pg_class_relnatts                 17
-#define Anum_pg_class_relchecks                        18
-#define Anum_pg_class_relhasoids               19
-#define Anum_pg_class_relhaspkey               20
-#define Anum_pg_class_relhasrules              21
-#define Anum_pg_class_relhastriggers   22
-#define Anum_pg_class_relhassubclass   23
-#define Anum_pg_class_relfrozenxid             24
-#define Anum_pg_class_relacl                   25
-#define Anum_pg_class_reloptions               26
+#define Anum_pg_class_relallvisible            11
+#define Anum_pg_class_reltoastrelid            12
+#define Anum_pg_class_reltoastidxid            13
+#define Anum_pg_class_relhasindex              14
+#define Anum_pg_class_relisshared              15
+#define Anum_pg_class_relpersistence   16
+#define Anum_pg_class_relkind                  17
+#define Anum_pg_class_relnatts                 18
+#define Anum_pg_class_relchecks                        19
+#define Anum_pg_class_relhasoids               20
+#define Anum_pg_class_relhaspkey               21
+#define Anum_pg_class_relhasrules              22
+#define Anum_pg_class_relhastriggers   23
+#define Anum_pg_class_relhassubclass   24
+#define Anum_pg_class_relfrozenxid             25
+#define Anum_pg_class_relacl                   26
+#define Anum_pg_class_reloptions               27
 
 /* ----------------
  *             initial contents of pg_class
@@ -130,13 +133,13 @@ typedef FormData_pg_class *Form_pg_class;
  */
 
 /* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */
-DATA(insert OID = 1247 (  pg_type              PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1247 (  pg_type              PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f 3 _null_ _null_ ));
 DESCR("");
-DATA(insert OID = 1249 (  pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1249 (  pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 _null_ _null_ ));
 DESCR("");
-DATA(insert OID = 1255 (  pg_proc              PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1255 (  pg_proc              PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ ));
 DESCR("");
-DATA(insert OID = 1259 (  pg_class             PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1259 (  pg_class             PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ ));
 DESCR("");
 
 
index cfbe0c43924029843f636845d87a09996d706af3..d8fd0caa6b61c709421e4da9956f3476ddeefcff 100644 (file)
@@ -149,6 +149,7 @@ extern double vac_estimate_reltuples(Relation relation, bool is_analyze,
 extern void vac_update_relstats(Relation relation,
                                        BlockNumber num_pages,
                                        double num_tuples,
+                                       BlockNumber num_all_visible_pages,
                                        bool hasindex,
                                        TransactionId frozenxid);
 extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age,
index 45ca52e516e279dd88521ae4464df189c353960d..ef84e9f138de6988b277d216ede509465e53f865 100644 (file)
@@ -319,6 +319,7 @@ typedef struct PlannerInfo
  *                                     (always NIL if it's not a table)
  *             pages - number of disk pages in relation (zero if not a table)
  *             tuples - number of tuples in relation (not considering restrictions)
+ *             allvisfrac - fraction of disk pages that are marked all-visible
  *             subplan - plan for subquery (NULL if it's not a subquery)
  *             subroot - PlannerInfo for subquery (NULL if it's not a subquery)
  *
@@ -402,8 +403,9 @@ typedef struct RelOptInfo
        Relids     *attr_needed;        /* array indexed [min_attr .. max_attr] */
        int32      *attr_widths;        /* array indexed [min_attr .. max_attr] */
        List       *indexlist;          /* list of IndexOptInfo */
-       BlockNumber pages;
+       BlockNumber pages;                      /* size estimates derived from pg_class */
        double          tuples;
+       double          allvisfrac;
        struct Plan *subplan;           /* if subquery */
        PlannerInfo *subroot;           /* if subquery */
 
index c0b8eda8137dc23ff6707e7d90da0ed6473ea4e5..05843615d66fb6b456e424332120f0c59d500dec 100644 (file)
@@ -29,7 +29,7 @@ extern void get_relation_info(PlannerInfo *root, Oid relationObjectId,
                                  bool inhparent, RelOptInfo *rel);
 
 extern void estimate_rel_size(Relation rel, int32 *attr_widths,
-                                 BlockNumber *pages, double *tuples);
+                                 BlockNumber *pages, double *tuples, double *allvisfrac);
 
 extern int32 get_relation_data_width(Oid relid, int32 *attr_widths);
 
index 048d463533ad54d915f4be16376ad7026d7c4000..fde375cc9f7052200b44f05fe1b1892a655298e0 100644 (file)
@@ -901,21 +901,22 @@ WINDOW w AS (order by four range between current row and unbounded following);
 (10 rows)
 
 SELECT sum(unique1) over
-       (rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING),
+       (order by unique1
+        rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING),
        unique1
 FROM tenk1 WHERE unique1 < 10;
  sum | unique1 
 -----+---------
-   4 |       4
-   6 |       2
-   3 |       1
-   7 |       6
-  15 |       9
-  17 |       8
-  13 |       5
-   8 |       3
-  10 |       7
-   7 |       0
+   0 |       0
+   1 |       1
+   3 |       2
+   5 |       3
+   7 |       4
+   9 |       5
+  11 |       6
+  13 |       7
+  15 |       8
+  17 |       9
 (10 rows)
 
 CREATE TEMP VIEW v_window AS
index 268430a260ba317af63bdf219f3c526db8b8c160..d8e9e7e3b1c13b437843ff7dbd75de0de1c85537 100644 (file)
@@ -211,7 +211,8 @@ FROM tenk1 WHERE unique1 < 10
 WINDOW w AS (order by four range between current row and unbounded following);
 
 SELECT sum(unique1) over
-       (rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING),
+       (order by unique1
+        rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING),
        unique1
 FROM tenk1 WHERE unique1 < 10;