Redesign tablesample method API, and do extensive code review.

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 25 Jul 2015 18:39:00 +0000 (14:39 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 25 Jul 2015 18:39:00 +0000 (14:39 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 25 Jul 2015 18:39:00 +0000 (14:39 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 25 Jul 2015 18:39:00 +0000 (14:39 -0400)
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c

index 0eb991cdf0e86f50e182ef07b7bd2a22f7a5b0b6..59b8a2e2b3d9cd99ae6eb38746a85de44a74862d 100644 (file)
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2297,6 +2297,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
         {
             case RTE_RELATION:
                 APP_JUMB(rte->relid);
+               JumbleExpr(jstate, (Node *) rte->tablesample);
                 break;
             case RTE_SUBQUERY:
                 JumbleQuery(jstate, rte->subquery);
@@ -2767,6 +2768,15 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
                 JumbleExpr(jstate, rtfunc->funcexpr);
             }
             break;
+       case T_TableSampleClause:
+           {
+               TableSampleClause *tsc = (TableSampleClause *) node;
+
+               APP_JUMB(tsc->tsmhandler);
+               JumbleExpr(jstate, (Node *) tsc->args);
+               JumbleExpr(jstate, (Node *) tsc->repeatable);
+           }
+           break;
         default:
             /* Only a warning, since we can stumble along anyway */
             elog(WARNING, "unrecognized node type: %d",
diff --git a/contrib/tsm_system_rows/Makefile b/contrib/tsm_system_rows/Makefile

index 700ab276db2e95b546dee914751387ce3bb940b6..609af463c5c2438b340c8ec6c32fdcddb7d627a1 100644 (file)
--- a/contrib/tsm_system_rows/Makefile
+++ b/contrib/tsm_system_rows/Makefile
@@ -1,8 +1,8 @@
-# src/test/modules/tsm_system_rows/Makefile
+# contrib/tsm_system_rows/Makefile
  
  MODULE_big = tsm_system_rows
  OBJS = tsm_system_rows.o $(WIN32RES)
-PGFILEDESC = "tsm_system_rows - SYSTEM TABLESAMPLE method which accepts number of rows as a limit"
+PGFILEDESC = "tsm_system_rows - TABLESAMPLE method which accepts number of rows as a limit"
  
  EXTENSION = tsm_system_rows
  DATA = tsm_system_rows--1.0.sql
diff --git a/contrib/tsm_system_rows/expected/tsm_system_rows.out b/contrib/tsm_system_rows/expected/tsm_system_rows.out

index 7e0f72b02b7df38392293f314341058ceb293d06..87b4a8fc64bd222438b14dbce830923f862b0c6a 100644 (file)
--- a/contrib/tsm_system_rows/expected/tsm_system_rows.out
+++ b/contrib/tsm_system_rows/expected/tsm_system_rows.out
@@ -1,31 +1,83 @@
  CREATE EXTENSION tsm_system_rows;
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+  FROM generate_series(0, 30) s(i);
  ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1000);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (0);
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1);
+ count 
+-------
+     1
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (10);
+ count 
+-------
+    10
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (100);
   count 
  -------
      31
  (1 row)
  
-SELECT id FROM test_tablesample TABLESAMPLE system_rows (8) REPEATABLE (5432);
- id 
-----
-  7
- 14
- 21
- 28
-  4
- 11
- 18
- 25
-(8 rows)
-
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_rows (20) REPEATABLE (10);
-                                    QUERY PLAN                                     
------------------------------------------------------------------------------------
- Sample Scan (system_rows) on test_tablesample  (cost=0.00..80.20 rows=20 width=4)
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+               QUERY PLAN               
+----------------------------------------
+ Sample Scan on test_tablesample
+   Sampling: system_rows ('-1'::bigint)
+(2 rows)
+
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+ERROR:  sample size must not be negative
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) REPEATABLE (0);
+ERROR:  tablesample method system_rows does not support REPEATABLE
+LINE 1: SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) ...
+                                                   ^
+-- but a join should be allowed:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (VALUES (0),(10),(100)) v(nrows),
+  LATERAL (SELECT count(*) FROM test_tablesample
+           TABLESAMPLE system_rows (nrows)) ss;
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Nested Loop
+   ->  Values Scan on "*VALUES*"
+   ->  Aggregate
+         ->  Sample Scan on test_tablesample
+               Sampling: system_rows ("*VALUES*".column1)
+(5 rows)
+
+SELECT * FROM
+  (VALUES (0),(10),(100)) v(nrows),
+  LATERAL (SELECT count(*) FROM test_tablesample
+           TABLESAMPLE system_rows (nrows)) ss;
+ nrows | count 
+-------+-------
+     0 |     0
+    10 |    10
+   100 |    31
+(3 rows)
+
+CREATE VIEW vv AS
+  SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (20);
+SELECT * FROM vv;
+ count 
+-------
+    20
  (1 row)
  
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_rows;  -- fail, view depends on extension
+ERROR:  cannot drop extension tsm_system_rows because other objects depend on it
+DETAIL:  view vv depends on function system_rows(internal)
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
diff --git a/contrib/tsm_system_rows/sql/tsm_system_rows.sql b/contrib/tsm_system_rows/sql/tsm_system_rows.sql

index bd812220ed98dcab5f8e51128062fc65a50b6f95..e3ab4204eea5ae601aaf9e43c1edec6d0bda29d5 100644 (file)
--- a/contrib/tsm_system_rows/sql/tsm_system_rows.sql
+++ b/contrib/tsm_system_rows/sql/tsm_system_rows.sql
@@ -1,14 +1,39 @@
  CREATE EXTENSION tsm_system_rows;
  
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+  FROM generate_series(0, 30) s(i);
  ANALYZE test_tablesample;
  
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1000);
-SELECT id FROM test_tablesample TABLESAMPLE system_rows (8) REPEATABLE (5432);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (0);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (10);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (100);
+
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) REPEATABLE (0);
+
+-- but a join should be allowed:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (VALUES (0),(10),(100)) v(nrows),
+  LATERAL (SELECT count(*) FROM test_tablesample
+           TABLESAMPLE system_rows (nrows)) ss;
+
+SELECT * FROM
+  (VALUES (0),(10),(100)) v(nrows),
+  LATERAL (SELECT count(*) FROM test_tablesample
+           TABLESAMPLE system_rows (nrows)) ss;
+
+CREATE VIEW vv AS
+  SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (20);
  
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_rows (20) REPEATABLE (10);
+SELECT * FROM vv;
  
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_rows;  -- fail, view depends on extension
diff --git a/contrib/tsm_system_rows/tsm_system_rows--1.0.sql b/contrib/tsm_system_rows/tsm_system_rows--1.0.sql

index 1a29c584b5a8386180083120e25fb80edd8004e7..de508ed72675fe3cb51fd1dce770227b97d31acc 100644 (file)
--- a/contrib/tsm_system_rows/tsm_system_rows--1.0.sql
+++ b/contrib/tsm_system_rows/tsm_system_rows--1.0.sql
@@ -1,44 +1,9 @@
-/* src/test/modules/tablesample/tsm_system_rows--1.0.sql */
+/* contrib/tsm_system_rows/tsm_system_rows--1.0.sql */
  
  -- complain if script is sourced in psql, rather than via CREATE EXTENSION
  \echo Use "CREATE EXTENSION tsm_system_rows" to load this file. \quit
  
-CREATE FUNCTION tsm_system_rows_init(internal, int4, int4)
-RETURNS void
-AS 'MODULE_PATHNAME'
+CREATE FUNCTION system_rows(internal)
+RETURNS tsm_handler
+AS 'MODULE_PATHNAME', 'tsm_system_rows_handler'
  LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_nextblock(internal)
-RETURNS int4
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_nexttuple(internal, int4, int2)
-RETURNS int2
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_examinetuple(internal, int4, internal, bool)
-RETURNS bool
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_end(internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_reset(internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_cost(internal, internal, internal, internal, internal, internal, internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-INSERT INTO pg_tablesample_method VALUES('system_rows', false, true,
-   'tsm_system_rows_init', 'tsm_system_rows_nextblock',
-   'tsm_system_rows_nexttuple', 'tsm_system_rows_examinetuple',
-   'tsm_system_rows_end', 'tsm_system_rows_reset', 'tsm_system_rows_cost');
diff --git a/contrib/tsm_system_rows/tsm_system_rows.c b/contrib/tsm_system_rows/tsm_system_rows.c

index e325eaff498972b46595b3be60b2f5ce92c8ed6d..f251e3e5e06dd9416d5a8bac14417cfa70311ca2 100644 (file)
--- a/contrib/tsm_system_rows/tsm_system_rows.c
+++ b/contrib/tsm_system_rows/tsm_system_rows.c
@@ -1,240 +1,356 @@
  /*-------------------------------------------------------------------------
   *
   * tsm_system_rows.c
- *   interface routines for system_rows tablesample method
+ *   support routines for SYSTEM_ROWS tablesample method
   *
+ * The desire here is to produce a random sample with a given number of rows
+ * (or the whole relation, if that is fewer rows).  We use a block-sampling
+ * approach.  To ensure that the whole relation will be visited if necessary,
+ * we start at a randomly chosen block and then advance with a stride that
+ * is randomly chosen but is relatively prime to the relation's nblocks.
   *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Because of the dependence on nblocks, this method cannot be repeatable
+ * across queries.  (Even if the user hasn't explicitly changed the relation,
+ * maintenance activities such as autovacuum might change nblocks.)  However,
+ * we can at least make it repeatable across scans, by determining the
+ * sampling pattern only once on the first scan.  This means that rescans
+ * won't visit blocks added after the first scan, but that is fine since
+ * such blocks shouldn't contain any visible tuples anyway.
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   contrib/tsm_system_rows_rowlimit/tsm_system_rows.c
+ *   contrib/tsm_system_rows/tsm_system_rows.c
   *
   *-------------------------------------------------------------------------
   */
  
  #include "postgres.h"
  
-#include "fmgr.h"
-
-#include "access/tablesample.h"
  #include "access/relscan.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
  #include "miscadmin.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
  #include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
+#include "optimizer/cost.h"
  #include "utils/sampling.h"
  
  PG_MODULE_MAGIC;
  
-/*
- * State
- */
+PG_FUNCTION_INFO_V1(tsm_system_rows_handler);
+
+
+/* Private state */
  typedef struct
  {
-   SamplerRandomState randstate;
     uint32      seed;           /* random seed */
-   BlockNumber nblocks;        /* number of block in relation */
-   int32       ntuples;        /* number of tuples to return */
-   int32       donetuples;     /* tuples already returned */
+   int64       ntuples;        /* number of tuples to return */
+   int64       donetuples;     /* number of tuples already returned */
     OffsetNumber lt;            /* last tuple returned from current block */
-   BlockNumber step;           /* step size */
+   BlockNumber doneblocks;     /* number of already-scanned blocks */
     BlockNumber lb;             /* last block visited */
-   BlockNumber doneblocks;     /* number of already returned blocks */
-} SystemSamplerData;
-
-
-PG_FUNCTION_INFO_V1(tsm_system_rows_init);
-PG_FUNCTION_INFO_V1(tsm_system_rows_nextblock);
-PG_FUNCTION_INFO_V1(tsm_system_rows_nexttuple);
-PG_FUNCTION_INFO_V1(tsm_system_rows_examinetuple);
-PG_FUNCTION_INFO_V1(tsm_system_rows_end);
-PG_FUNCTION_INFO_V1(tsm_system_rows_reset);
-PG_FUNCTION_INFO_V1(tsm_system_rows_cost);
-
+   /* these three values are not changed during a rescan: */
+   BlockNumber nblocks;        /* number of blocks in relation */
+   BlockNumber firstblock;     /* first block to sample from */
+   BlockNumber step;           /* step size, or 0 if not set yet */
+} SystemRowsSamplerData;
+
+static void system_rows_samplescangetsamplesize(PlannerInfo *root,
+                                   RelOptInfo *baserel,
+                                   List *paramexprs,
+                                   BlockNumber *pages,
+                                   double *tuples);
+static void system_rows_initsamplescan(SampleScanState *node,
+                          int eflags);
+static void system_rows_beginsamplescan(SampleScanState *node,
+                           Datum *params,
+                           int nparams,
+                           uint32 seed);
+static BlockNumber system_rows_nextsampleblock(SampleScanState *node);
+static OffsetNumber system_rows_nextsampletuple(SampleScanState *node,
+                           BlockNumber blockno,
+                           OffsetNumber maxoffset);
+static bool SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan);
  static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate);
  
+
  /*
- * Initializes the state.
+ * Create a TsmRoutine descriptor for the SYSTEM_ROWS method.
   */
  Datum
-tsm_system_rows_init(PG_FUNCTION_ARGS)
+tsm_system_rows_handler(PG_FUNCTION_ARGS)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   uint32      seed = PG_GETARG_UINT32(1);
-   int32       ntuples = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2);
-   HeapScanDesc scan = tsdesc->heapScan;
-   SystemSamplerData *sampler;
+   TsmRoutine *tsm = makeNode(TsmRoutine);
  
-   if (ntuples < 1)
-       ereport(ERROR,
-               (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-                errmsg("invalid sample size"),
-                errhint("Sample size must be positive integer value.")));
+   tsm->parameterTypes = list_make1_oid(INT8OID);
  
-   sampler = palloc0(sizeof(SystemSamplerData));
+   /* See notes at head of file */
+   tsm->repeatable_across_queries = false;
+   tsm->repeatable_across_scans = true;
  
-   /* Remember initial values for reinit */
-   sampler->seed = seed;
-   sampler->nblocks = scan->rs_nblocks;
-   sampler->ntuples = ntuples;
-   sampler->donetuples = 0;
-   sampler->lt = InvalidOffsetNumber;
-   sampler->doneblocks = 0;
-
-   sampler_random_init_state(sampler->seed, sampler->randstate);
-
-   /* Find relative prime as step size for linear probing. */
-   sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-
-   /*
-    * Randomize start position so that blocks close to step size don't have
-    * higher probability of being chosen on very short scan.
-    */
-   sampler->lb = sampler_random_fract(sampler->randstate) *
-       (sampler->nblocks / sampler->step);
+   tsm->SampleScanGetSampleSize = system_rows_samplescangetsamplesize;
+   tsm->InitSampleScan = system_rows_initsamplescan;
+   tsm->BeginSampleScan = system_rows_beginsamplescan;
+   tsm->NextSampleBlock = system_rows_nextsampleblock;
+   tsm->NextSampleTuple = system_rows_nextsampletuple;
+   tsm->EndSampleScan = NULL;
  
-   tsdesc->tsmdata = (void *) sampler;
-
-   PG_RETURN_VOID();
+   PG_RETURN_POINTER(tsm);
  }
  
  /*
- * Get next block number or InvalidBlockNumber when we're done.
- *
- * Uses linear probing algorithm for picking next block.
+ * Sample size estimation.
   */
-Datum
-tsm_system_rows_nextblock(PG_FUNCTION_ARGS)
+static void
+system_rows_samplescangetsamplesize(PlannerInfo *root,
+                                   RelOptInfo *baserel,
+                                   List *paramexprs,
+                                   BlockNumber *pages,
+                                   double *tuples)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+   Node       *limitnode;
+   int64       ntuples;
+   double      npages;
  
-   sampler->lb = (sampler->lb + sampler->step) % sampler->nblocks;
-   sampler->doneblocks++;
+   /* Try to extract an estimate for the limit rowcount */
+   limitnode = (Node *) linitial(paramexprs);
+   limitnode = estimate_expression_value(root, limitnode);
  
-   /* All blocks have been read, we're done */
-   if (sampler->doneblocks > sampler->nblocks ||
-       sampler->donetuples >= sampler->ntuples)
-       PG_RETURN_UINT32(InvalidBlockNumber);
+   if (IsA(limitnode, Const) &&
+       !((Const *) limitnode)->constisnull)
+   {
+       ntuples = DatumGetInt64(((Const *) limitnode)->constvalue);
+       if (ntuples < 0)
+       {
+           /* Default ntuples if the value is bogus */
+           ntuples = 1000;
+       }
+   }
+   else
+   {
+       /* Default ntuples if we didn't obtain a non-null Const */
+       ntuples = 1000;
+   }
  
-   PG_RETURN_UINT32(sampler->lb);
-}
+   /* Clamp to the estimated relation size */
+   if (ntuples > baserel->tuples)
+       ntuples = (int64) baserel->tuples;
+   ntuples = clamp_row_est(ntuples);
  
-/*
- * Get next tuple offset in current block or InvalidOffsetNumber if we are done
- * with this block.
- */
-Datum
-tsm_system_rows_nexttuple(PG_FUNCTION_ARGS)
-{
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   OffsetNumber maxoffset = PG_GETARG_UINT16(2);
-   SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-   OffsetNumber tupoffset = sampler->lt;
+   if (baserel->tuples > 0 && baserel->pages > 0)
+   {
+       /* Estimate number of pages visited based on tuple density */
+       double      density = baserel->tuples / (double) baserel->pages;
  
-   if (tupoffset == InvalidOffsetNumber)
-       tupoffset = FirstOffsetNumber;
+       npages = ntuples / density;
+   }
     else
-       tupoffset++;
-
-   if (tupoffset > maxoffset ||
-       sampler->donetuples >= sampler->ntuples)
-       tupoffset = InvalidOffsetNumber;
+   {
+       /* For lack of data, assume one tuple per page */
+       npages = ntuples;
+   }
  
-   sampler->lt = tupoffset;
+   /* Clamp to sane value */
+   npages = clamp_row_est(Min((double) baserel->pages, npages));
  
-   PG_RETURN_UINT16(tupoffset);
+   *pages = npages;
+   *tuples = ntuples;
  }
  
  /*
- * Examine tuple and decide if it should be returned.
+ * Initialize during executor setup.
   */
-Datum
-tsm_system_rows_examinetuple(PG_FUNCTION_ARGS)
+static void
+system_rows_initsamplescan(SampleScanState *node, int eflags)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   bool        visible = PG_GETARG_BOOL(3);
-   SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-
-   if (!visible)
-       PG_RETURN_BOOL(false);
-
-   sampler->donetuples++;
-
-   PG_RETURN_BOOL(true);
+   node->tsm_state = palloc0(sizeof(SystemRowsSamplerData));
+   /* Note the above leaves tsm_state->step equal to zero */
  }
  
  /*
- * Cleanup method.
+ * Examine parameters and prepare for a sample scan.
   */
-Datum
-tsm_system_rows_end(PG_FUNCTION_ARGS)
+static void
+system_rows_beginsamplescan(SampleScanState *node,
+                           Datum *params,
+                           int nparams,
+                           uint32 seed)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+   SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
+   int64       ntuples = DatumGetInt64(params[0]);
+
+   if (ntuples < 0)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+                errmsg("sample size must not be negative")));
  
-   pfree(tsdesc->tsmdata);
+   sampler->seed = seed;
+   sampler->ntuples = ntuples;
+   sampler->donetuples = 0;
+   sampler->lt = InvalidOffsetNumber;
+   sampler->doneblocks = 0;
+   /* lb will be initialized during first NextSampleBlock call */
+   /* we intentionally do not change nblocks/firstblock/step here */
  
-   PG_RETURN_VOID();
+   /*
+    * We *must* use pagemode visibility checking in this module, so force
+    * that even though it's currently default.
+    */
+   node->use_pagemode = true;
  }
  
  /*
- * Reset state (called by ReScan).
+ * Select next block to sample.
+ *
+ * Uses linear probing algorithm for picking next block.
   */
-Datum
-tsm_system_rows_reset(PG_FUNCTION_ARGS)
+static BlockNumber
+system_rows_nextsampleblock(SampleScanState *node)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+   SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
+   HeapScanDesc scan = node->ss.ss_currentScanDesc;
  
-   sampler->lt = InvalidOffsetNumber;
-   sampler->donetuples = 0;
-   sampler->doneblocks = 0;
+   /* First call within scan? */
+   if (sampler->doneblocks == 0)
+   {
+       /* First scan within query? */
+       if (sampler->step == 0)
+       {
+           /* Initialize now that we have scan descriptor */
+           SamplerRandomState randstate;
+
+           /* If relation is empty, there's nothing to scan */
+           if (scan->rs_nblocks == 0)
+               return InvalidBlockNumber;
+
+           /* We only need an RNG during this setup step */
+           sampler_random_init_state(sampler->seed, randstate);
+
+           /* Compute nblocks/firstblock/step only once per query */
+           sampler->nblocks = scan->rs_nblocks;
  
-   sampler_random_init_state(sampler->seed, sampler->randstate);
-   sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-   sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
+           /* Choose random starting block within the relation */
+           /* (Actually this is the predecessor of the first block visited) */
+           sampler->firstblock = sampler_random_fract(randstate) *
+               sampler->nblocks;
+
+           /* Find relative prime as step size for linear probing */
+           sampler->step = random_relative_prime(sampler->nblocks, randstate);
+       }
+
+       /* Reinitialize lb */
+       sampler->lb = sampler->firstblock;
+   }
+
+   /* If we've read all blocks or returned all needed tuples, we're done */
+   if (++sampler->doneblocks > sampler->nblocks ||
+       sampler->donetuples >= sampler->ntuples)
+       return InvalidBlockNumber;
+
+   /*
+    * It's probably impossible for scan->rs_nblocks to decrease between scans
+    * within a query; but just in case, loop until we select a block number
+    * less than scan->rs_nblocks.  We don't care if scan->rs_nblocks has
+    * increased since the first scan.
+    */
+   do
+   {
+       /* Advance lb, using uint64 arithmetic to forestall overflow */
+       sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
+   } while (sampler->lb >= scan->rs_nblocks);
  
-   PG_RETURN_VOID();
+   return sampler->lb;
  }
  
  /*
- * Costing function.
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
   */
-Datum
-tsm_system_rows_cost(PG_FUNCTION_ARGS)
+static OffsetNumber
+system_rows_nextsampletuple(SampleScanState *node,
+                           BlockNumber blockno,
+                           OffsetNumber maxoffset)
  {
-   PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-   Path       *path = (Path *) PG_GETARG_POINTER(1);
-   RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
-   List       *args = (List *) PG_GETARG_POINTER(3);
-   BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
-   double     *tuples = (double *) PG_GETARG_POINTER(5);
-   Node       *limitnode;
-   int32       ntuples;
+   SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
+   HeapScanDesc scan = node->ss.ss_currentScanDesc;
+   OffsetNumber tupoffset = sampler->lt;
  
-   limitnode = linitial(args);
-   limitnode = estimate_expression_value(root, limitnode);
+   /* Quit if we've returned all needed tuples */
+   if (sampler->donetuples >= sampler->ntuples)
+       return InvalidOffsetNumber;
  
-   if (IsA(limitnode, RelabelType))
-       limitnode = (Node *) ((RelabelType *) limitnode)->arg;
+   /*
+    * Because we should only count visible tuples as being returned, we need
+    * to search for a visible tuple rather than just let the core code do it.
+    */
  
-   if (IsA(limitnode, Const))
-       ntuples = DatumGetInt32(((Const *) limitnode)->constvalue);
-   else
+   /* We rely on the data accumulated in pagemode access */
+   Assert(scan->rs_pageatatime);
+   for (;;)
     {
-       /* Default ntuples if the estimation didn't return Const. */
-       ntuples = 1000;
+       /* Advance to next possible offset on page */
+       if (tupoffset == InvalidOffsetNumber)
+           tupoffset = FirstOffsetNumber;
+       else
+           tupoffset++;
+
+       /* Done? */
+       if (tupoffset > maxoffset)
+       {
+           tupoffset = InvalidOffsetNumber;
+           break;
+       }
+
+       /* Found a candidate? */
+       if (SampleOffsetVisible(tupoffset, scan))
+       {
+           sampler->donetuples++;
+           break;
+       }
     }
  
-   *pages = Min(baserel->pages, ntuples);
-   *tuples = ntuples;
-   path->rows = *tuples;
+   sampler->lt = tupoffset;
  
-   PG_RETURN_VOID();
+   return tupoffset;
  }
  
+/*
+ * Check if tuple offset is visible
+ *
+ * In pageatatime mode, heapgetpage() already did visibility checks,
+ * so just look at the info it left in rs_vistuples[].
+ */
+static bool
+SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan)
+{
+   int         start = 0,
+               end = scan->rs_ntuples - 1;
+
+   while (start <= end)
+   {
+       int         mid = (start + end) / 2;
+       OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+       if (tupoffset == curoffset)
+           return true;
+       else if (tupoffset < curoffset)
+           end = mid - 1;
+       else
+           start = mid + 1;
+   }
+
+   return false;
+}
  
+/*
+ * Compute greatest common divisor of two uint32's.
+ */
  static uint32
  gcd(uint32 a, uint32 b)
  {
@@ -250,22 +366,29 @@ gcd(uint32 a, uint32 b)
     return b;
  }
  
+/*
+ * Pick a random value less than and relatively prime to n, if possible
+ * (else return 1).
+ */
  static uint32
  random_relative_prime(uint32 n, SamplerRandomState randstate)
  {
-   /* Pick random starting number, with some limits on what it can be. */
-   uint32      r = (uint32) sampler_random_fract(randstate) * n / 2 + n / 4,
-               t;
+   uint32      r;
+
+   /* Safety check to avoid infinite loop or zero result for small n. */
+   if (n <= 1)
+       return 1;
  
     /*
      * This should only take 2 or 3 iterations as the probability of 2 numbers
-    * being relatively prime is ~61%.
+    * being relatively prime is ~61%; but just in case, we'll include a
+    * CHECK_FOR_INTERRUPTS in the loop.
      */
-   while ((t = gcd(r, n)) > 1)
+   do
     {
         CHECK_FOR_INTERRUPTS();
-       r /= t;
-   }
+       r = (uint32) (sampler_random_fract(randstate) * n);
+   } while (r == 0 || gcd(r, n) > 1);
  
     return r;
  }
diff --git a/contrib/tsm_system_rows/tsm_system_rows.control b/contrib/tsm_system_rows/tsm_system_rows.control

index 84ea7adb49a261247b936ba4c884b2edb165529a..4bd0232f97215933516bf5ea801a49f43f8c0d6c 100644 (file)
--- a/contrib/tsm_system_rows/tsm_system_rows.control
+++ b/contrib/tsm_system_rows/tsm_system_rows.control
@@ -1,5 +1,5 @@
  # tsm_system_rows extension
-comment = 'SYSTEM TABLESAMPLE method which accepts number rows as a limit'
+comment = 'TABLESAMPLE method which accepts number of rows as a limit'
  default_version = '1.0'
  module_pathname = '$libdir/tsm_system_rows'
  relocatable = true
diff --git a/contrib/tsm_system_time/Makefile b/contrib/tsm_system_time/Makefile

index c42c1c6bb61f22f1e01925c02e41ecba53f184e6..168becf54e2ff225a583437d87d590602b6677e4 100644 (file)
--- a/contrib/tsm_system_time/Makefile
+++ b/contrib/tsm_system_time/Makefile
@@ -1,8 +1,8 @@
-# src/test/modules/tsm_system_time/Makefile
+# contrib/tsm_system_time/Makefile
  
  MODULE_big = tsm_system_time
  OBJS = tsm_system_time.o $(WIN32RES)
-PGFILEDESC = "tsm_system_time - SYSTEM TABLESAMPLE method which accepts number rows of as a limit"
+PGFILEDESC = "tsm_system_time - TABLESAMPLE method which accepts time in milliseconds as a limit"
  
  EXTENSION = tsm_system_time
  DATA = tsm_system_time--1.0.sql
diff --git a/contrib/tsm_system_time/expected/tsm_system_time.out b/contrib/tsm_system_time/expected/tsm_system_time.out

index 32ad03c4bdcef47e9bfce5ed7ab2060b528e77bc..ac44f30be90386407273b213239aa234a5dd0bd1 100644 (file)
--- a/contrib/tsm_system_time/expected/tsm_system_time.out
+++ b/contrib/tsm_system_time/expected/tsm_system_time.out
@@ -1,54 +1,100 @@
  CREATE EXTENSION tsm_system_time;
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+  FROM generate_series(0, 30) s(i);
  ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000);
+-- It's a bit tricky to test SYSTEM_TIME in a platform-independent way.
+-- We can test the zero-time corner case ...
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (0);
   count 
  -------
-    31
+     0
  (1 row)
  
-SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432);
- id 
-----
-  7
- 14
- 21
- 28
-  4
- 11
- 18
- 25
-  1
-  8
- 15
- 22
- 29
-  5
- 12
- 19
- 26
-  2
-  9
- 16
- 23
- 30
-  6
- 13
- 20
- 27
-  3
- 10
- 17
- 24
-  0
-(31 rows)
-
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10);
-                                     QUERY PLAN                                     
-------------------------------------------------------------------------------------
- Sample Scan (system_time) on test_tablesample  (cost=0.00..100.25 rows=25 width=4)
+-- ... and we assume that this will finish before running out of time:
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (100000);
+ count 
+-------
+    31
  (1 row)
  
--- done
-DROP TABLE test_tablesample CASCADE;
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+                    QUERY PLAN                    
+--------------------------------------------------
+ Sample Scan on test_tablesample
+   Sampling: system_time ('-1'::double precision)
+(2 rows)
+
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+ERROR:  sample collection time must not be negative
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_time (10) REPEATABLE (0);
+ERROR:  tablesample method system_time does not support REPEATABLE
+LINE 1: SELECT * FROM test_tablesample TABLESAMPLE system_time (10) ...
+                                                   ^
+-- since it's not repeatable, we expect a Materialize node in these plans:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (100000)) ss;
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Nested Loop
+   ->  Aggregate
+         ->  Materialize
+               ->  Sample Scan on test_tablesample
+                     Sampling: system_time ('100000'::double precision)
+   ->  Values Scan on "*VALUES*"
+(6 rows)
+
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (100000)) ss;
+  time  | count 
+--------+-------
+      0 |    31
+ 100000 |    31
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (time)) ss;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Nested Loop
+   ->  Values Scan on "*VALUES*"
+   ->  Aggregate
+         ->  Materialize
+               ->  Sample Scan on test_tablesample
+                     Sampling: system_time ("*VALUES*".column1)
+(6 rows)
+
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (time)) ss;
+  time  | count 
+--------+-------
+      0 |     0
+ 100000 |    31
+(2 rows)
+
+CREATE VIEW vv AS
+  SELECT * FROM test_tablesample TABLESAMPLE system_time (20);
+EXPLAIN (COSTS OFF) SELECT * FROM vv;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Sample Scan on test_tablesample
+   Sampling: system_time ('20'::double precision)
+(2 rows)
+
+DROP EXTENSION tsm_system_time;  -- fail, view depends on extension
+ERROR:  cannot drop extension tsm_system_time because other objects depend on it
+DETAIL:  view vv depends on function system_time(internal)
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
diff --git a/contrib/tsm_system_time/sql/tsm_system_time.sql b/contrib/tsm_system_time/sql/tsm_system_time.sql

index 68dbbf98afd2df5f66e0aa5258c4e00166613d33..117de163d85059f362cda878ef94f5f6b43aa65e 100644 (file)
--- a/contrib/tsm_system_time/sql/tsm_system_time.sql
+++ b/contrib/tsm_system_time/sql/tsm_system_time.sql
@@ -1,14 +1,51 @@
  CREATE EXTENSION tsm_system_time;
  
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+  FROM generate_series(0, 30) s(i);
  ANALYZE test_tablesample;
  
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000);
-SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432);
+-- It's a bit tricky to test SYSTEM_TIME in a platform-independent way.
+-- We can test the zero-time corner case ...
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (0);
+-- ... and we assume that this will finish before running out of time:
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (100000);
+
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_time (10) REPEATABLE (0);
+
+-- since it's not repeatable, we expect a Materialize node in these plans:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (100000)) ss;
+
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (100000)) ss;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (time)) ss;
+
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (time)) ss;
+
+CREATE VIEW vv AS
+  SELECT * FROM test_tablesample TABLESAMPLE system_time (20);
  
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10);
+EXPLAIN (COSTS OFF) SELECT * FROM vv;
  
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_time;  -- fail, view depends on extension
diff --git a/contrib/tsm_system_time/tsm_system_time--1.0.sql b/contrib/tsm_system_time/tsm_system_time--1.0.sql

index 1f390d6ed7acac601567e67a42fdbc2220802ac8..c59d2e84efdabfa9e9163c036eb702e8ad981d26 100644 (file)
--- a/contrib/tsm_system_time/tsm_system_time--1.0.sql
+++ b/contrib/tsm_system_time/tsm_system_time--1.0.sql
@@ -1,39 +1,9 @@
-/* src/test/modules/tablesample/tsm_system_time--1.0.sql */
+/* contrib/tsm_system_time/tsm_system_time--1.0.sql */
  
  -- complain if script is sourced in psql, rather than via CREATE EXTENSION
  \echo Use "CREATE EXTENSION tsm_system_time" to load this file. \quit
  
-CREATE FUNCTION tsm_system_time_init(internal, int4, int4)
-RETURNS void
-AS 'MODULE_PATHNAME'
+CREATE FUNCTION system_time(internal)
+RETURNS tsm_handler
+AS 'MODULE_PATHNAME', 'tsm_system_time_handler'
  LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_nextblock(internal)
-RETURNS int4
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_nexttuple(internal, int4, int2)
-RETURNS int2
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_end(internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_reset(internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_cost(internal, internal, internal, internal, internal, internal, internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-INSERT INTO pg_tablesample_method VALUES('system_time', false, true,
-   'tsm_system_time_init', 'tsm_system_time_nextblock',
-   'tsm_system_time_nexttuple', '-', 'tsm_system_time_end',
-   'tsm_system_time_reset', 'tsm_system_time_cost');
diff --git a/contrib/tsm_system_time/tsm_system_time.c b/contrib/tsm_system_time/tsm_system_time.c

index 7708fc07617488e9a57128a72eba9707004dc9f3..83f1455c5fa248b3028e095acd8ceedd6ae4c9e1 100644 (file)
--- a/contrib/tsm_system_time/tsm_system_time.c
+++ b/contrib/tsm_system_time/tsm_system_time.c
@@ -1,286 +1,320 @@
  /*-------------------------------------------------------------------------
   *
   * tsm_system_time.c
- *   interface routines for system_time tablesample method
+ *   support routines for SYSTEM_TIME tablesample method
   *
+ * The desire here is to produce a random sample with as many rows as possible
+ * in no more than the specified amount of time.  We use a block-sampling
+ * approach.  To ensure that the whole relation will be visited if necessary,
+ * we start at a randomly chosen block and then advance with a stride that
+ * is randomly chosen but is relatively prime to the relation's nblocks.
   *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Because of the time dependence, this method is necessarily unrepeatable.
+ * However, we do what we can to reduce surprising behavior by selecting
+ * the sampling pattern just once per query, much as in tsm_system_rows.
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   contrib/tsm_system_time_rowlimit/tsm_system_time.c
+ *   contrib/tsm_system_time/tsm_system_time.c
   *
   *-------------------------------------------------------------------------
   */
  
  #include "postgres.h"
  
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h>             /* for _isnan */
+#endif
+#include <math.h>
  
-#include "access/tablesample.h"
  #include "access/relscan.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
  #include "miscadmin.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
  #include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
+#include "optimizer/cost.h"
  #include "utils/sampling.h"
  #include "utils/spccache.h"
-#include "utils/timestamp.h"
  
  PG_MODULE_MAGIC;
  
-/*
- * State
- */
+PG_FUNCTION_INFO_V1(tsm_system_time_handler);
+
+
+/* Private state */
  typedef struct
  {
-   SamplerRandomState randstate;
     uint32      seed;           /* random seed */
-   BlockNumber nblocks;        /* number of block in relation */
-   int32       time;           /* time limit for sampling */
-   TimestampTz start_time;     /* start time of sampling */
-   TimestampTz end_time;       /* end time of sampling */
+   double      millis;         /* time limit for sampling */
+   instr_time  start_time;     /* scan start time */
     OffsetNumber lt;            /* last tuple returned from current block */
-   BlockNumber step;           /* step size */
+   BlockNumber doneblocks;     /* number of already-scanned blocks */
     BlockNumber lb;             /* last block visited */
-   BlockNumber estblocks;      /* estimated number of returned blocks
-                                * (moving) */
-   BlockNumber doneblocks;     /* number of already returned blocks */
-} SystemSamplerData;
-
-
-PG_FUNCTION_INFO_V1(tsm_system_time_init);
-PG_FUNCTION_INFO_V1(tsm_system_time_nextblock);
-PG_FUNCTION_INFO_V1(tsm_system_time_nexttuple);
-PG_FUNCTION_INFO_V1(tsm_system_time_end);
-PG_FUNCTION_INFO_V1(tsm_system_time_reset);
-PG_FUNCTION_INFO_V1(tsm_system_time_cost);
-
+   /* these three values are not changed during a rescan: */
+   BlockNumber nblocks;        /* number of blocks in relation */
+   BlockNumber firstblock;     /* first block to sample from */
+   BlockNumber step;           /* step size, or 0 if not set yet */
+} SystemTimeSamplerData;
+
+static void system_time_samplescangetsamplesize(PlannerInfo *root,
+                                   RelOptInfo *baserel,
+                                   List *paramexprs,
+                                   BlockNumber *pages,
+                                   double *tuples);
+static void system_time_initsamplescan(SampleScanState *node,
+                          int eflags);
+static void system_time_beginsamplescan(SampleScanState *node,
+                           Datum *params,
+                           int nparams,
+                           uint32 seed);
+static BlockNumber system_time_nextsampleblock(SampleScanState *node);
+static OffsetNumber system_time_nextsampletuple(SampleScanState *node,
+                           BlockNumber blockno,
+                           OffsetNumber maxoffset);
  static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate);
  
+
  /*
- * Initializes the state.
+ * Create a TsmRoutine descriptor for the SYSTEM_TIME method.
   */
  Datum
-tsm_system_time_init(PG_FUNCTION_ARGS)
+tsm_system_time_handler(PG_FUNCTION_ARGS)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   uint32      seed = PG_GETARG_UINT32(1);
-   int32       time = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2);
-   HeapScanDesc scan = tsdesc->heapScan;
-   SystemSamplerData *sampler;
-
-   if (time < 1)
-       ereport(ERROR,
-               (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-                errmsg("invalid time limit"),
-                errhint("Time limit must be positive integer value.")));
+   TsmRoutine *tsm = makeNode(TsmRoutine);
  
-   sampler = palloc0(sizeof(SystemSamplerData));
+   tsm->parameterTypes = list_make1_oid(FLOAT8OID);
  
-   /* Remember initial values for reinit */
-   sampler->seed = seed;
-   sampler->nblocks = scan->rs_nblocks;
-   sampler->lt = InvalidOffsetNumber;
-   sampler->estblocks = 2;
-   sampler->doneblocks = 0;
-   sampler->time = time;
-   sampler->start_time = GetCurrentTimestamp();
-   sampler->end_time = TimestampTzPlusMilliseconds(sampler->start_time,
-                                                   sampler->time);
+   /* See notes at head of file */
+   tsm->repeatable_across_queries = false;
+   tsm->repeatable_across_scans = false;
  
-   sampler_random_init_state(sampler->seed, sampler->randstate);
+   tsm->SampleScanGetSampleSize = system_time_samplescangetsamplesize;
+   tsm->InitSampleScan = system_time_initsamplescan;
+   tsm->BeginSampleScan = system_time_beginsamplescan;
+   tsm->NextSampleBlock = system_time_nextsampleblock;
+   tsm->NextSampleTuple = system_time_nextsampletuple;
+   tsm->EndSampleScan = NULL;
  
-   /* Find relative prime as step size for linear probing. */
-   sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-
-   /*
-    * Randomize start position so that blocks close to step size don't have
-    * higher probability of being chosen on very short scan.
-    */
-   sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
-
-   tsdesc->tsmdata = (void *) sampler;
-
-   PG_RETURN_VOID();
+   PG_RETURN_POINTER(tsm);
  }
  
  /*
- * Get next block number or InvalidBlockNumber when we're done.
- *
- * Uses linear probing algorithm for picking next block.
+ * Sample size estimation.
   */
-Datum
-tsm_system_time_nextblock(PG_FUNCTION_ARGS)
+static void
+system_time_samplescangetsamplesize(PlannerInfo *root,
+                                   RelOptInfo *baserel,
+                                   List *paramexprs,
+                                   BlockNumber *pages,
+                                   double *tuples)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-
-   sampler->lb = (sampler->lb + sampler->step) % sampler->nblocks;
-   sampler->doneblocks++;
+   Node       *limitnode;
+   double      millis;
+   double      spc_random_page_cost;
+   double      npages;
+   double      ntuples;
  
-   /* All blocks have been read, we're done */
-   if (sampler->doneblocks > sampler->nblocks)
-       PG_RETURN_UINT32(InvalidBlockNumber);
+   /* Try to extract an estimate for the limit time spec */
+   limitnode = (Node *) linitial(paramexprs);
+   limitnode = estimate_expression_value(root, limitnode);
  
-   /*
-    * Update the estimations for time limit at least 10 times per estimated
-    * number of returned blocks to handle variations in block read speed.
-    */
-   if (sampler->doneblocks % Max(sampler->estblocks / 10, 1) == 0)
+   if (IsA(limitnode, Const) &&
+       !((Const *) limitnode)->constisnull)
+   {
+       millis = DatumGetFloat8(((Const *) limitnode)->constvalue);
+       if (millis < 0 || isnan(millis))
+       {
+           /* Default millis if the value is bogus */
+           millis = 1000;
+       }
+   }
+   else
     {
-       TimestampTz now = GetCurrentTimestamp();
-       long        secs;
-       int         usecs;
-       int         usecs_remaining;
-       int         time_per_block;
+       /* Default millis if we didn't obtain a non-null Const */
+       millis = 1000;
+   }
  
-       TimestampDifference(sampler->start_time, now, &secs, &usecs);
-       usecs += (int) secs *1000000;
+   /* Get the planner's idea of cost per page read */
+   get_tablespace_page_costs(baserel->reltablespace,
+                             &spc_random_page_cost,
+                             NULL);
  
-       time_per_block = usecs / sampler->doneblocks;
+   /*
+    * Estimate the number of pages we can read by assuming that the cost
+    * figure is expressed in milliseconds.  This is completely, unmistakably
+    * bogus, but we have to do something to produce an estimate and there's
+    * no better answer.
+    */
+   if (spc_random_page_cost > 0)
+       npages = millis / spc_random_page_cost;
+   else
+       npages = millis;        /* even more bogus, but whatcha gonna do? */
  
-       /* No time left, end. */
-       TimestampDifference(now, sampler->end_time, &secs, &usecs);
-       if (secs <= 0 && usecs <= 0)
-           PG_RETURN_UINT32(InvalidBlockNumber);
+   /* Clamp to sane value */
+   npages = clamp_row_est(Min((double) baserel->pages, npages));
  
-       /* Remaining microseconds */
-       usecs_remaining = usecs + (int) secs *1000000;
+   if (baserel->tuples > 0 && baserel->pages > 0)
+   {
+       /* Estimate number of tuples returned based on tuple density */
+       double      density = baserel->tuples / (double) baserel->pages;
  
-       /* Recalculate estimated returned number of blocks */
-       if (time_per_block < usecs_remaining && time_per_block > 0)
-           sampler->estblocks = sampler->time * time_per_block;
+       ntuples = npages * density;
     }
-
-   PG_RETURN_UINT32(sampler->lb);
-}
-
-/*
- * Get next tuple offset in current block or InvalidOffsetNumber if we are done
- * with this block.
- */
-Datum
-tsm_system_time_nexttuple(PG_FUNCTION_ARGS)
-{
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   OffsetNumber maxoffset = PG_GETARG_UINT16(2);
-   SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-   OffsetNumber tupoffset = sampler->lt;
-
-   if (tupoffset == InvalidOffsetNumber)
-       tupoffset = FirstOffsetNumber;
     else
-       tupoffset++;
-
-   if (tupoffset > maxoffset)
-       tupoffset = InvalidOffsetNumber;
+   {
+       /* For lack of data, assume one tuple per page */
+       ntuples = npages;
+   }
  
-   sampler->lt = tupoffset;
+   /* Clamp to the estimated relation size */
+   ntuples = clamp_row_est(Min(baserel->tuples, ntuples));
  
-   PG_RETURN_UINT16(tupoffset);
+   *pages = npages;
+   *tuples = ntuples;
  }
  
  /*
- * Cleanup method.
+ * Initialize during executor setup.
   */
-Datum
-tsm_system_time_end(PG_FUNCTION_ARGS)
+static void
+system_time_initsamplescan(SampleScanState *node, int eflags)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-
-   pfree(tsdesc->tsmdata);
-
-   PG_RETURN_VOID();
+   node->tsm_state = palloc0(sizeof(SystemTimeSamplerData));
+   /* Note the above leaves tsm_state->step equal to zero */
  }
  
  /*
- * Reset state (called by ReScan).
+ * Examine parameters and prepare for a sample scan.
   */
-Datum
-tsm_system_time_reset(PG_FUNCTION_ARGS)
+static void
+system_time_beginsamplescan(SampleScanState *node,
+                           Datum *params,
+                           int nparams,
+                           uint32 seed)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+   SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
+   double      millis = DatumGetFloat8(params[0]);
+
+   if (millis < 0 || isnan(millis))
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+                errmsg("sample collection time must not be negative")));
  
+   sampler->seed = seed;
+   sampler->millis = millis;
     sampler->lt = InvalidOffsetNumber;
-   sampler->start_time = GetCurrentTimestamp();
-   sampler->end_time = TimestampTzPlusMilliseconds(sampler->start_time,
-                                                   sampler->time);
-   sampler->estblocks = 2;
     sampler->doneblocks = 0;
-
-   sampler_random_init_state(sampler->seed, sampler->randstate);
-   sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-   sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
-
-   PG_RETURN_VOID();
+   /* start_time, lb will be initialized during first NextSampleBlock call */
+   /* we intentionally do not change nblocks/firstblock/step here */
  }
  
  /*
- * Costing function.
+ * Select next block to sample.
+ *
+ * Uses linear probing algorithm for picking next block.
   */
-Datum
-tsm_system_time_cost(PG_FUNCTION_ARGS)
+static BlockNumber
+system_time_nextsampleblock(SampleScanState *node)
  {
-   PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-   Path       *path = (Path *) PG_GETARG_POINTER(1);
-   RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
-   List       *args = (List *) PG_GETARG_POINTER(3);
-   BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
-   double     *tuples = (double *) PG_GETARG_POINTER(5);
-   Node       *limitnode;
-   int32       time;
-   BlockNumber relpages;
-   double      reltuples;
-   double      density;
-   double      spc_random_page_cost;
-
-   limitnode = linitial(args);
-   limitnode = estimate_expression_value(root, limitnode);
-
-   if (IsA(limitnode, RelabelType))
-       limitnode = (Node *) ((RelabelType *) limitnode)->arg;
+   SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
+   HeapScanDesc scan = node->ss.ss_currentScanDesc;
+   instr_time  cur_time;
  
-   if (IsA(limitnode, Const))
-       time = DatumGetInt32(((Const *) limitnode)->constvalue);
-   else
+   /* First call within scan? */
+   if (sampler->doneblocks == 0)
     {
-       /* Default time (1s) if the estimation didn't return Const. */
-       time = 1000;
+       /* First scan within query? */
+       if (sampler->step == 0)
+       {
+           /* Initialize now that we have scan descriptor */
+           SamplerRandomState randstate;
+
+           /* If relation is empty, there's nothing to scan */
+           if (scan->rs_nblocks == 0)
+               return InvalidBlockNumber;
+
+           /* We only need an RNG during this setup step */
+           sampler_random_init_state(sampler->seed, randstate);
+
+           /* Compute nblocks/firstblock/step only once per query */
+           sampler->nblocks = scan->rs_nblocks;
+
+           /* Choose random starting block within the relation */
+           /* (Actually this is the predecessor of the first block visited) */
+           sampler->firstblock = sampler_random_fract(randstate) *
+               sampler->nblocks;
+
+           /* Find relative prime as step size for linear probing */
+           sampler->step = random_relative_prime(sampler->nblocks, randstate);
+       }
+
+       /* Reinitialize lb and start_time */
+       sampler->lb = sampler->firstblock;
+       INSTR_TIME_SET_CURRENT(sampler->start_time);
     }
  
-   relpages = baserel->pages;
-   reltuples = baserel->tuples;
+   /* If we've read all blocks in relation, we're done */
+   if (++sampler->doneblocks > sampler->nblocks)
+       return InvalidBlockNumber;
  
-   /* estimate the tuple density */
-   if (relpages > 0)
-       density = reltuples / (double) relpages;
-   else
-       density = (BLCKSZ - SizeOfPageHeaderData) / baserel->width;
+   /* If we've used up all the allotted time, we're done */
+   INSTR_TIME_SET_CURRENT(cur_time);
+   INSTR_TIME_SUBTRACT(cur_time, sampler->start_time);
+   if (INSTR_TIME_GET_MILLISEC(cur_time) >= sampler->millis)
+       return InvalidBlockNumber;
  
     /*
-    * We equal random page cost value to number of ms it takes to read the
-    * random page here which is far from accurate but we don't have anything
-    * better to base our predicted page reads.
+    * It's probably impossible for scan->rs_nblocks to decrease between scans
+    * within a query; but just in case, loop until we select a block number
+    * less than scan->rs_nblocks.  We don't care if scan->rs_nblocks has
+    * increased since the first scan.
      */
-   get_tablespace_page_costs(baserel->reltablespace,
-                             &spc_random_page_cost,
-                             NULL);
+   do
+   {
+       /* Advance lb, using uint64 arithmetic to forestall overflow */
+       sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
+   } while (sampler->lb >= scan->rs_nblocks);
  
-   /*
-    * Assumption here is that we'll never read less than 1% of table pages,
-    * this is here mainly because it is much less bad to overestimate than
-    * underestimate and using just spc_random_page_cost will probably lead to
-    * underestimations in general.
-    */
-   *pages = Min(baserel->pages, Max(time / spc_random_page_cost, baserel->pages / 100));
-   *tuples = rint(density * (double) *pages * path->rows / baserel->tuples);
-   path->rows = *tuples;
+   return sampler->lb;
+}
+
+/*
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+static OffsetNumber
+system_time_nextsampletuple(SampleScanState *node,
+                           BlockNumber blockno,
+                           OffsetNumber maxoffset)
+{
+   SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
+   OffsetNumber tupoffset = sampler->lt;
+
+   /* Advance to next possible offset on page */
+   if (tupoffset == InvalidOffsetNumber)
+       tupoffset = FirstOffsetNumber;
+   else
+       tupoffset++;
+
+   /* Done? */
+   if (tupoffset > maxoffset)
+       tupoffset = InvalidOffsetNumber;
+
+   sampler->lt = tupoffset;
  
-   PG_RETURN_VOID();
+   return tupoffset;
  }
  
+/*
+ * Compute greatest common divisor of two uint32's.
+ */
  static uint32
  gcd(uint32 a, uint32 b)
  {
@@ -296,22 +330,29 @@ gcd(uint32 a, uint32 b)
     return b;
  }
  
+/*
+ * Pick a random value less than and relatively prime to n, if possible
+ * (else return 1).
+ */
  static uint32
  random_relative_prime(uint32 n, SamplerRandomState randstate)
  {
-   /* Pick random starting number, with some limits on what it can be. */
-   uint32      r = (uint32) sampler_random_fract(randstate) * n / 2 + n / 4,
-               t;
+   uint32      r;
+
+   /* Safety check to avoid infinite loop or zero result for small n. */
+   if (n <= 1)
+       return 1;
  
     /*
      * This should only take 2 or 3 iterations as the probability of 2 numbers
-    * being relatively prime is ~61%.
+    * being relatively prime is ~61%; but just in case, we'll include a
+    * CHECK_FOR_INTERRUPTS in the loop.
      */
-   while ((t = gcd(r, n)) > 1)
+   do
     {
         CHECK_FOR_INTERRUPTS();
-       r /= t;
-   }
+       r = (uint32) (sampler_random_fract(randstate) * n);
+   } while (r == 0 || gcd(r, n) > 1);
  
     return r;
  }
diff --git a/contrib/tsm_system_time/tsm_system_time.control b/contrib/tsm_system_time/tsm_system_time.control

index ebcee19d23a0db519f1597335e80588d7a56b59d..c247987c66d14b9a2cb75fb07bbe623366d4b458 100644 (file)
--- a/contrib/tsm_system_time/tsm_system_time.control
+++ b/contrib/tsm_system_time/tsm_system_time.control
@@ -1,5 +1,5 @@
  # tsm_system_time extension
-comment = 'SYSTEM TABLESAMPLE method which accepts time in milliseconds as a limit'
+comment = 'TABLESAMPLE method which accepts time in milliseconds as a limit'
  default_version = '1.0'
  module_pathname = '$libdir/tsm_system_time'
  relocatable = true
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml

index 2c2190f13d373e0ff0567f7052bf73838cc3d770..9096ee5d517de88aff7d8cd0c233cc8862c13b3c 100644 (file)
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -278,11 +278,6 @@
        <entry>planner statistics</entry>
       </row>
  
-     <row>
-      <entry><link linkend="catalog-pg-tablesample-method"><structname>pg_tablesample_method</structname></link></entry>
-      <entry>table sampling methods</entry>
-     </row>
-
       <row>
        <entry><link linkend="catalog-pg-tablespace"><structname>pg_tablespace</structname></link></entry>
        <entry>tablespaces within this database cluster</entry>
@@ -6132,121 +6127,6 @@
   </sect1>
  
  
- <sect1 id="catalog-pg-tablesample-method">
-  <title><structname>pg_tabesample_method</structname></title>
-
-  <indexterm zone="catalog-pg-tablesample-method">
-   <primary>pg_am</primary>
-  </indexterm>
-
-  <para>
-   The catalog <structname>pg_tablesample_method</structname> stores
-   information about table sampling methods which can be used in
-   <command>TABLESAMPLE</command> clause of a <command>SELECT</command>
-   statement.
-  </para>
-
-  <table>
-   <title><structname>pg_tablesample_method</> Columns</title>
-
-   <tgroup cols="4">
-    <thead>
-     <row>
-      <entry>Name</entry>
-      <entry>Type</entry>
-      <entry>References</entry>
-      <entry>Description</entry>
-     </row>
-    </thead>
-    <tbody>
-
-     <row>
-      <entry><structfield>oid</structfield></entry>
-      <entry><type>oid</type></entry>
-      <entry></entry>
-      <entry>Row identifier (hidden attribute; must be explicitly selected)</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmname</structfield></entry>
-      <entry><type>name</type></entry>
-      <entry></entry>
-      <entry>Name of the sampling method</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmseqscan</structfield></entry>
-      <entry><type>bool</type></entry>
-      <entry></entry>
-      <entry>If true, the sampling method scans the whole table sequentially.
-      </entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmpagemode</structfield></entry>
-      <entry><type>bool</type></entry>
-      <entry></entry>
-      <entry>If true, the sampling method always reads the pages completely.
-      </entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsminit</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Initialize the sampling scan</quote> function</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmnextblock</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Get next block number</quote> function</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmnexttuple</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Get next tuple offset</quote> function</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmexaminetuple</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry>Function which examines the tuple contents and decides if to
-        return it, or zero if none</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmend</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>End the sampling scan</quote> function</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmreset</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Restart the state of sampling scan</quote> function</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmcost</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry>Costing function</entry>
-     </row>
-
-    </tbody>
-   </tgroup>
-  </table>
-
- </sect1>
-
-
   <sect1 id="catalog-pg-tablespace">
    <title><structname>pg_tablespace</structname></title>
  
diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml

index 8e13555a3aa5518d11eac71c1536bb6acf4032be..8113ddf8179f10e1b0f031d2d4106837656e23e3 100644 (file)
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4346,7 +4346,7 @@ SET xmloption TO { DOCUMENT | CONTENT };
      an object identifier.  There are also several alias types for
      <type>oid</>: <type>regproc</>, <type>regprocedure</>,
      <type>regoper</>, <type>regoperator</>, <type>regclass</>,
-    <type>regtype</>, <type>regrole</>, <type>regnamespace</>, 
+    <type>regtype</>, <type>regrole</>, <type>regnamespace</>,
      <type>regconfig</>, and <type>regdictionary</>.
      <xref linkend="datatype-oid-table"> shows an overview.
     </para>
@@ -4622,6 +4622,10 @@ SELECT * FROM pg_attribute
      <primary>fdw_handler</primary>
     </indexterm>
  
+   <indexterm zone="datatype-pseudo">
+    <primary>tsm_handler</primary>
+   </indexterm>
+
     <indexterm zone="datatype-pseudo">
      <primary>cstring</primary>
     </indexterm>
@@ -4716,6 +4720,11 @@ SELECT * FROM pg_attribute
          <entry>A foreign-data wrapper handler is declared to return <type>fdw_handler</>.</entry>
         </row>
  
+       <row>
+        <entry><type>tsm_handler</></entry>
+        <entry>A tablesample method handler is declared to return <type>tsm_handler</>.</entry>
+       </row>
+
         <row>
          <entry><type>record</></entry>
          <entry>Identifies a function returning an unspecified row type.</entry>
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml

index d1703e9c01ff87eb99f48e8e00b3bd5ff0db187f..7e82cdc3b124b870f39ac6d4a8077372d1cd8cd8 100644 (file)
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -243,6 +243,7 @@
    &nls;
    &plhandler;
    &fdwhandler;
+  &tablesample-method;
    &custom-scan;
    &geqo;
    &indexam;
@@ -250,7 +251,6 @@
    &spgist;
    &gin;
    &brin;
-  &tablesample-method;
    &storage;
    &bki;
    &planstats;
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml

index 632d7935cb41fe946cbbd6d356ba927af6c1cf27..44810f4909c06cdfaac8db5a9cf2a0ad5746db50 100644 (file)
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -49,7 +49,8 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac
  
  <phrase>where <replaceable class="parameter">from_item</replaceable> can be one of:</phrase>
  
-    [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ] [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ] [ TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ] ]
+    [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ] [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ]
+                [ TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ] ]
      [ LATERAL ] ( <replaceable class="parameter">select</replaceable> ) [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ]
      <replaceable class="parameter">with_query_name</replaceable> [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ]
      [ LATERAL ] <replaceable class="parameter">function_name</replaceable> ( [ <replaceable class="parameter">argument</replaceable> [, ...] ] )
@@ -325,50 +326,6 @@ TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ]
        </listitem>
       </varlistentry>
  
-     <varlistentry>
-      <term>TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ]</term>
-      <listitem>
-       <para>
-        Table sample clause after
-        <replaceable class="parameter">table_name</replaceable> indicates that
-        a <replaceable class="parameter">sampling_method</replaceable> should
-        be used to retrieve subset of rows in the table.
-        The <replaceable class="parameter">sampling_method</replaceable> can be
-        any sampling method installed in the database. There are currently two
-        sampling methods available in the standard
-        <productname>PostgreSQL</productname> distribution:
-        <itemizedlist>
-         <listitem>
-          <para><literal>SYSTEM</literal></para>
-         </listitem>
-         <listitem>
-          <para><literal>BERNOULLI</literal></para>
-         </listitem>
-        </itemizedlist>
-        Both of these sampling methods currently accept only single argument
-        which is the percent (floating point from 0 to 100) of the rows to
-        be returned.
-        The <literal>SYSTEM</literal> sampling method does block level
-        sampling with each block having the same chance of being selected and
-        returns all rows from each selected block.
-        The <literal>BERNOULLI</literal> scans whole table and returns
-        individual rows with equal probability. Additional sampling methods
-        may be installed in the database via extensions.
-       </para>
-       <para>
-        The optional parameter <literal>REPEATABLE</literal> uses the seed
-        parameter, which can be a number or expression producing a number, as
-        a random seed for sampling. Note that subsequent commands may return
-        different results even if same <literal>REPEATABLE</literal> clause was
-        specified. This happens because <acronym>DML</acronym> statements and
-        maintenance operations such as <command>VACUUM</> may affect physical
-        distribution of data. The <function>setseed()</> function will not
-        affect the sampling result when the <literal>REPEATABLE</literal>
-        parameter is used.
-       </para>
-      </listitem>
-     </varlistentry>
-
       <varlistentry>
        <term><replaceable class="parameter">alias</replaceable></term>
        <listitem>
@@ -387,6 +344,61 @@ TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ]
        </listitem>
       </varlistentry>
  
+     <varlistentry>
+      <term><literal>TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ]</literal></term>
+      <listitem>
+       <para>
+        A <literal>TABLESAMPLE</> clause after
+        a <replaceable class="parameter">table_name</> indicates that the
+        specified <replaceable class="parameter">sampling_method</replaceable>
+        should be used to retrieve a subset of the rows in that table.
+        This sampling precedes the application of any other filters such
+        as <literal>WHERE</> clauses.
+        The standard <productname>PostgreSQL</productname> distribution
+        includes two sampling methods, <literal>BERNOULLI</literal>
+        and <literal>SYSTEM</literal>, and other sampling methods can be
+        installed in the database via extensions.
+       </para>
+
+       <para>
+        The <literal>BERNOULLI</> and <literal>SYSTEM</> sampling methods
+        each accept a single <replaceable class="parameter">argument</>
+        which is the fraction of the table to sample, expressed as a
+        percentage between 0 and 100.  This argument can be
+        any <type>real</>-valued expression.  (Other sampling methods might
+        accept more or different arguments.)  These two methods each return
+        a randomly-chosen sample of the table that will contain
+        approximately the specified percentage of the table's rows.
+        The <literal>BERNOULLI</literal> method scans the whole table and
+        selects or ignores individual rows independently with the specified
+        probability.
+        The <literal>SYSTEM</literal> method does block-level sampling with
+        each block having the specified chance of being selected; all rows
+        in each selected block are returned.
+        The <literal>SYSTEM</literal> method is significantly faster than
+        the <literal>BERNOULLI</literal> method when small sampling
+        percentages are specified, but it may return a less-random sample of
+        the table as a result of clustering effects.
+       </para>
+
+       <para>
+        The optional <literal>REPEATABLE</literal> clause specifies
+        a <replaceable class="parameter">seed</> number or expression to use
+        for generating random numbers within the sampling method.  The seed
+        value can be any non-null floating-point value.  Two queries that
+        specify the same seed and <replaceable class="parameter">argument</>
+        values will select the same sample of the table, if the table has
+        not been changed meanwhile.  But different seed values will usually
+        produce different samples.
+        If <literal>REPEATABLE</literal> is not given then a new random
+        sample is selected for each query.
+        Note that some add-on sampling methods do not
+        accept <literal>REPEATABLE</literal>, and will always produce new
+        samples on each use.
+       </para>
+      </listitem>
+     </varlistentry>
+
       <varlistentry>
        <term><replaceable class="parameter">select</replaceable></term>
        <listitem>
@@ -1870,6 +1882,16 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
     </para>
    </refsect2>
  
+  <refsect2>
+   <title><literal>TABLESAMPLE</literal> Clause Restrictions</title>
+
+   <para>
+    The <literal>TABLESAMPLE</> clause is currently accepted only on
+    regular tables and materialized views.  According to the SQL standard
+    it should be possible to apply it to any <literal>FROM</> item.
+   </para>
+  </refsect2>
+
    <refsect2>
     <title>Function Calls in <literal>FROM</literal></title>
  
@@ -1993,19 +2015,5 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
     </para>
    </refsect2>
  
-  <refsect2>
-   <title><literal>TABLESAMPLE</literal> clause</title>
-
-   <para>
-    The <literal>TABLESAMPLE</> clause is currently accepted only on physical
-    relations and materialized views.
-   </para>
-
-   <para>
-    Additional modules allow you to install custom sampling methods and use
-    them instead of the SQL standard methods.
-   </para>
-  </refsect2>
-
   </refsect1>
  </refentry>
diff --git a/doc/src/sgml/tablesample-method.sgml b/doc/src/sgml/tablesample-method.sgml

index 48eb7fe84ea93e61166d7b689582cce2b2a1720c..22f8bbe19aa4b4c5166a8de98e1c8b26624d0d44 100644 (file)
--- a/doc/src/sgml/tablesample-method.sgml
+++ b/doc/src/sgml/tablesample-method.sgml
@@ -1,139 +1,301 @@
  <!-- doc/src/sgml/tablesample-method.sgml -->
  
  <chapter id="tablesample-method">
- <title>Writing A TABLESAMPLE Sampling Method</title>
+ <title>Writing A Table Sampling Method</title>
  
   <indexterm zone="tablesample-method">
-  <primary>tablesample method</primary>
+  <primary>table sampling method</primary>
+ </indexterm>
+
+ <indexterm zone="tablesample-method">
+  <primary><literal>TABLESAMPLE</literal> method</primary>
   </indexterm>
  
   <para>
-  The <command>TABLESAMPLE</command> clause implementation in
-  <productname>PostgreSQL</> supports creating a custom sampling methods.
-  These methods control what sample of the table will be returned when the
-  <command>TABLESAMPLE</command> clause is used.
+  <productname>PostgreSQL</>'s implementation of the <literal>TABLESAMPLE</>
+  clause supports custom table sampling methods, in addition to
+  the <literal>BERNOULLI</> and <literal>SYSTEM</> methods that are required
+  by the SQL standard.  The sampling method determines which rows of the
+  table will be selected when the <literal>TABLESAMPLE</> clause is used.
   </para>
  
- <sect1 id="tablesample-method-functions">
-  <title>Tablesample Method Functions</title>
+ <para>
+  At the SQL level, a table sampling method is represented by a single SQL
+  function, typically implemented in C, having the signature
+<programlisting>
+method_name(internal) RETURNS tsm_handler
+</programlisting>
+  The name of the function is the same method name appearing in the
+  <literal>TABLESAMPLE</> clause.  The <type>internal</> argument is a dummy
+  (always having value zero) that simply serves to prevent this function from
+  being called directly from a SQL command.
+  The result of the function must be a palloc'd struct of
+  type <type>TsmRoutine</>, which contains pointers to support functions for
+  the sampling method.  These support functions are plain C functions and
+  are not visible or callable at the SQL level.  The support functions are
+  described in <xref linkend="tablesample-support-functions">.
+ </para>
+
+ <para>
+  In addition to function pointers, the <type>TsmRoutine</> struct must
+  provide these additional fields:
+ </para>
+
+ <variablelist>
+  <varlistentry>
+   <term><literal>List *parameterTypes</literal></term>
+   <listitem>
+    <para>
+     This is an OID list containing the data type OIDs of the parameter(s)
+     that will be accepted by the <literal>TABLESAMPLE</> clause when this
+     sampling method is used.  For example, for the built-in methods, this
+     list contains a single item with value <literal>FLOAT4OID</>, which
+     represents the sampling percentage.  Custom sampling methods can have
+     more or different parameters.
+    </para>
+   </listitem>
+  </varlistentry>
+
+  <varlistentry>
+   <term><literal>bool repeatable_across_queries</literal></term>
+   <listitem>
+    <para>
+     If <literal>true</>, the sampling method can deliver identical samples
+     across successive queries, if the same parameters
+     and <literal>REPEATABLE</> seed value are supplied each time and the
+     table contents have not changed.  When this is <literal>false</>,
+     the <literal>REPEATABLE</> clause is not accepted for use with the
+     sampling method.
+    </para>
+   </listitem>
+  </varlistentry>
+
+  <varlistentry>
+   <term><literal>bool repeatable_across_scans</literal></term>
+   <listitem>
+    <para>
+     If <literal>true</>, the sampling method can deliver identical samples
+     across successive scans in the same query (assuming unchanging
+     parameters, seed value, and snapshot).
+     When this is <literal>false</>, the planner will not select plans that
+     would require scanning the sampled table more than once, since that
+     might result in inconsistent query output.
+    </para>
+   </listitem>
+  </varlistentry>
+ </variablelist>
+
+ <para>
+  The <type>TsmRoutine</> struct type is declared
+  in <filename>src/include/access/tsmapi.h</>, which see for additional
+  details.
+ </para>
+
+ <para>
+  The table sampling methods included in the standard distribution are good
+  references when trying to write your own.  Look into
+  the <filename>src/backend/access/tablesample</> subdirectory of the source
+  tree for the built-in sampling methods, and into the <filename>contrib</>
+  subdirectory for add-on methods.
+ </para>
+
+ <sect1 id="tablesample-support-functions">
+  <title>Sampling Method Support Functions</title>
  
    <para>
-   The tablesample method must provide following set of functions:
+   The TSM handler function returns a palloc'd <type>TsmRoutine</> struct
+   containing pointers to the support functions described below.  Most of
+   the functions are required, but some are optional, and those pointers can
+   be NULL.
    </para>
  
    <para>
  <programlisting>
  void
-tsm_init (TableSampleDesc *desc,
-         uint32 seed, ...);
+SampleScanGetSampleSize (PlannerInfo *root,
+                         RelOptInfo *baserel,
+                         List *paramexprs,
+                         BlockNumber *pages,
+                         double *tuples);
  </programlisting>
-   Initialize the tablesample scan. The function is called at the beginning
-   of each relation scan.
+
+   This function is called during planning.  It must estimate the number of
+   relation pages that will be read during a sample scan, and the number of
+   tuples that will be selected by the scan.  (For example, these might be
+   determined by estimating the sampling fraction, and then multiplying
+   the <literal>baserel-&gt;pages</> and <literal>baserel-&gt;tuples</>
+   numbers by that, being sure to round the results to integral values.)
+   The <literal>paramexprs</> list holds the expression(s) that are
+   parameters to the <literal>TABLESAMPLE</> clause.  It is recommended to
+   use <function>estimate_expression_value()</> to try to reduce these
+   expressions to constants, if their values are needed for estimation
+   purposes; but the function must provide size estimates even if they cannot
+   be reduced, and it should not fail even if the values appear invalid
+   (remember that they're only estimates of what the run-time values will be).
+   The <literal>pages</> and <literal>tuples</> parameters are outputs.
    </para>
+
    <para>
-   Note that the first two parameters are required but you can specify
-   additional parameters which then will be used by the <command>TABLESAMPLE</>
-   clause to determine the required user input in the query itself.
-   This means that if your function will specify additional float4 parameter
-   named percent, the user will have to call the tablesample method with
-   expression which evaluates (or can be coerced) to float4.
-   For example this definition:
  <programlisting>
-tsm_init (TableSampleDesc *desc,
-          uint32 seed, float4 pct);
-</programlisting>
-Will lead to SQL call like this:
-<programlisting>
-... TABLESAMPLE yourmethod(0.5) ...
+void
+InitSampleScan (SampleScanState *node,
+                int eflags);
  </programlisting>
+
+   Initialize for execution of a SampleScan plan node.
+   This is called during executor startup.
+   It should perform any initialization needed before processing can start.
+   The <structname>SampleScanState</> node has already been created, but
+   its <structfield>tsm_state</> field is NULL.
+   The <function>InitSampleScan</> function can palloc whatever internal
+   state data is needed by the sampling method, and store a pointer to
+   it in <literal>node-&gt;tsm_state</>.
+   Information about the table to scan is accessible through other fields
+   of the <structname>SampleScanState</> node (but note that the
+   <literal>node-&gt;ss.ss_currentScanDesc</> scan descriptor is not set
+   up yet).
+   <literal>eflags</> contains flag bits describing the executor's
+   operating mode for this plan node.
    </para>
  
    <para>
-<programlisting>
-BlockNumber
-tsm_nextblock (TableSampleDesc *desc);
-</programlisting>
-   Returns the block number of next page to be scanned. InvalidBlockNumber
-   should be returned if the sampling has reached end of the relation.
+   When <literal>(eflags &amp; EXEC_FLAG_EXPLAIN_ONLY)</> is true,
+   the scan will not actually be performed, so this function should only do
+   the minimum required to make the node state valid for <command>EXPLAIN</>
+   and <function>EndSampleScan</>.
    </para>
  
    <para>
-<programlisting>
-OffsetNumber
-tsm_nexttuple (TableSampleDesc *desc, BlockNumber blockno,
-               OffsetNumber maxoffset);
-</programlisting>
-   Return next tuple offset for the current page. InvalidOffsetNumber should
-   be returned if the sampling has reached end of the page.
+   This function can be omitted (set the pointer to NULL), in which case
+   <function>BeginSampleScan</> must perform all initialization needed
+   by the sampling method.
    </para>
  
    <para>
  <programlisting>
  void
-tsm_end (TableSampleDesc *desc);
+BeginSampleScan (SampleScanState *node,
+                 Datum *params,
+                 int nparams,
+                 uint32 seed);
  </programlisting>
-   The scan has finished, cleanup any left over state.
+
+   Begin execution of a sampling scan.
+   This is called just before the first attempt to fetch a tuple, and
+   may be called again if the scan needs to be restarted.
+   Information about the table to scan is accessible through fields
+   of the <structname>SampleScanState</> node (but note that the
+   <literal>node-&gt;ss.ss_currentScanDesc</> scan descriptor is not set
+   up yet).
+   The <literal>params</> array, of length <literal>nparams</>, contains the
+   values of the parameters supplied in the <literal>TABLESAMPLE</> clause.
+   These will have the number and types specified in the sampling
+   method's <literal>parameterTypes</literal> list, and have been checked
+   to not be null.
+   <literal>seed</> contains a seed to use for any random numbers generated
+   within the sampling method; it is either a hash derived from the
+   <literal>REPEATABLE</> value if one was given, or the result
+   of <literal>random()</> if not.
    </para>
  
    <para>
-<programlisting>
-void
-tsm_reset (TableSampleDesc *desc);
-</programlisting>
-   The scan needs to rescan the relation again, reset any tablesample method
-   state.
+   This function may adjust the fields <literal>node-&gt;use_bulkread</>
+   and <literal>node-&gt;use_pagemode</>.
+   If <literal>node-&gt;use_bulkread</> is <literal>true</>, which it is by
+   default, the scan will use a buffer access strategy that encourages
+   recycling buffers after use.  It might be reasonable to set this
+   to <literal>false</> if the scan will visit only a small fraction of the
+   table's pages.
+   If <literal>node-&gt;use_pagemode</> is <literal>true</>, which it is by
+   default, the scan will perform visibility checking in a single pass for
+   all tuples on each visited page.  It might be reasonable to set this
+   to <literal>false</> if the scan will select only a small fraction of the
+   tuples on each visited page.  That will result in fewer tuple visibility
+   checks being performed, though each one will be more expensive because it
+   will require more locking.
+  </para>
+
+  <para>
+   If the sampling method is
+   marked <literal>repeatable_across_scans</literal>, it must be able to
+   select the same set of tuples during a rescan as it did originally, that is
+   a fresh call of <function>BeginSampleScan</> must lead to selecting the
+   same tuples as before (if the <literal>TABLESAMPLE</> parameters
+   and seed don't change).
    </para>
  
    <para>
  <programlisting>
-void
-tsm_cost (PlannerInfo *root, Path *path, RelOptInfo *baserel,
-          List *args, BlockNumber *pages, double *tuples);
+BlockNumber
+NextSampleBlock (SampleScanState *node);
  </programlisting>
-   This function is used by optimizer to decide best plan and is also used
-   for output of <command>EXPLAIN</>.
+
+   Returns the block number of the next page to be scanned, or
+   <literal>InvalidBlockNumber</> if no pages remain to be scanned.
    </para>
  
    <para>
-   There is one more function which tablesampling method can implement in order
-   to gain more fine grained control over sampling. This function is optional:
+   This function can be omitted (set the pointer to NULL), in which case
+   the core code will perform a sequential scan of the entire relation.
+   Such a scan can use synchronized scanning, so that the sampling method
+   cannot assume that the relation pages are visited in the same order on
+   each scan.
    </para>
  
    <para>
  <programlisting>
-bool
-tsm_examinetuple (TableSampleDesc *desc, BlockNumber blockno,
-                  HeapTuple tuple, bool visible);
+OffsetNumber
+NextSampleTuple (SampleScanState *node,
+                 BlockNumber blockno,
+                 OffsetNumber maxoffset);
  </programlisting>
-   Function that enables the sampling method to examine contents of the tuple
-   (for example to collect some internal statistics). The return value of this
-   function is used to determine if the tuple should be returned to client.
-   Note that this function will receive even invisible tuples but it is not
-   allowed to return true for such tuple (if it does,
-   <productname>PostgreSQL</> will raise an error).
+
+   Returns the offset number of the next tuple to be sampled on the
+   specified page, or <literal>InvalidOffsetNumber</> if no tuples remain to
+   be sampled.  <literal>maxoffset</> is the largest offset number in use
+   on the page.
    </para>
  
+  <note>
+   <para>
+    <function>NextSampleTuple</> is not explicitly told which of the offset
+    numbers in the range <literal>1 .. maxoffset</> actually contain valid
+    tuples.  This is not normally a problem since the core code ignores
+    requests to sample missing or invisible tuples; that should not result in
+    any bias in the sample.  However, if necessary, the function can
+    examine <literal>node-&gt;ss.ss_currentScanDesc-&gt;rs_vistuples[]</>
+    to identify which tuples are valid and visible.  (This
+    requires <literal>node-&gt;use_pagemode</> to be <literal>true</>.)
+   </para>
+  </note>
+
+  <note>
+   <para>
+    <function>NextSampleTuple</> must <emphasis>not</> assume
+    that <literal>blockno</> is the same page number returned by the most
+    recent <function>NextSampleBlock</> call.  It was returned by some
+    previous <function>NextSampleBlock</> call, but the core code is allowed
+    to call <function>NextSampleBlock</> in advance of actually scanning
+    pages, so as to support prefetching.  It is OK to assume that once
+    sampling of a given page begins, successive <function>NextSampleTuple</>
+    calls all refer to the same page until <literal>InvalidOffsetNumber</> is
+    returned.
+   </para>
+  </note>
+
    <para>
-  As you can see most of the tablesample method interfaces get the
-  <structname>TableSampleDesc</> as a first parameter. This structure holds
-  state of the current scan and also provides storage for the tablesample
-  method's state. It is defined as following:
  <programlisting>
-typedef struct TableSampleDesc {
-    HeapScanDesc    heapScan;
-    TupleDesc       tupDesc;
-
-    void           *tsmdata;
-} TableSampleDesc;
+void
+EndSampleScan (SampleScanState *node);
  </programlisting>
-  Where <structfield>heapScan</> is the descriptor of the physical table scan.
-  It's possible to get table size info from it. The <structfield>tupDesc</>
-  represents the tuple descriptor of the tuples returned by the scan and passed
-  to the <function>tsm_examinetuple()</> interface. The <structfield>tsmdata</>
-  can be used by tablesample method itself to store any state info it might
-  need during the scan. If used by the method, it should be <function>pfree</>d
-  in <function>tsm_end()</> function.
+
+   End the scan and release resources.  It is normally not important
+   to release palloc'd memory, but any externally-visible resources
+   should be cleaned up.
+   This function can be omitted (set the pointer to NULL) in the common
+   case where no such resources exist.
    </para>
+
   </sect1>
  
  </chapter>
diff --git a/doc/src/sgml/tsm-system-rows.sgml b/doc/src/sgml/tsm-system-rows.sgml

index 0c2f1779c9ad8750db938a6edb38480ae25b57cf..93aa5366649bda2b93f19f141982a77b8f1e8a01 100644 (file)
--- a/doc/src/sgml/tsm-system-rows.sgml
+++ b/doc/src/sgml/tsm-system-rows.sgml
@@ -8,24 +8,37 @@
   </indexterm>
  
   <para>
-  The <filename>tsm_system_rows</> module provides the tablesample method
-  <literal>SYSTEM_ROWS</literal>, which can be used inside the
-  <command>TABLESAMPLE</command> clause of a <command>SELECT</command>.
+  The <filename>tsm_system_rows</> module provides the table sampling method
+  <literal>SYSTEM_ROWS</literal>, which can be used in
+  the <literal>TABLESAMPLE</> clause of a <xref linkend="sql-select">
+  command.
   </para>
  
   <para>
-  This tablesample method uses a linear probing algorithm to read sample
-  of a table and uses actual number of rows as limit (unlike the
-  <literal>SYSTEM</literal> tablesample method which limits by percentage
-  of a table).
+  This table sampling method accepts a single integer argument that is the
+  maximum number of rows to read.  The resulting sample will always contain
+  exactly that many rows, unless the table does not contain enough rows, in
+  which case the whole table is selected.
+ </para>
+
+ <para>
+  Like the built-in <literal>SYSTEM</literal> sampling
+  method, <literal>SYSTEM_ROWS</literal> performs block-level sampling, so
+  that the sample is not completely random but may be subject to clustering
+  effects, especially if only a small number of rows are requested.
+ </para>
+
+ <para>
+  <literal>SYSTEM_ROWS</literal> does not support
+  the <literal>REPEATABLE</literal> clause.
   </para>
  
   <sect2>
    <title>Examples</title>
  
    <para>
-   Here is an example of selecting sample of a table with
-   <literal>SYSTEM_ROWS</>. First install the extension:
+   Here is an example of selecting a sample of a table with
+   <literal>SYSTEM_ROWS</>.  First install the extension:
    </para>
  
  <programlisting>
@@ -33,8 +46,7 @@ CREATE EXTENSION tsm_system_rows;
  </programlisting>
  
    <para>
-   Then you can use it in <command>SELECT</command> command same way as other
-   tablesample methods:
+   Then you can use it in a <command>SELECT</command> command, for instance:
  
  <programlisting>
  SELECT * FROM my_table TABLESAMPLE SYSTEM_ROWS(100);
@@ -42,8 +54,9 @@ SELECT * FROM my_table TABLESAMPLE SYSTEM_ROWS(100);
    </para>
  
    <para>
-   The above command will return a sample of 100 rows from the table my_table
-   (less if the table does not have 100 visible rows).
+   This command will return a sample of 100 rows from the
+   table <structname>my_table</> (unless the table does not have 100
+   visible rows, in which case all its rows are returned).
    </para>
   </sect2>
  
diff --git a/doc/src/sgml/tsm-system-time.sgml b/doc/src/sgml/tsm-system-time.sgml

index 2343ab16d4f2bf6e3cadd4f599822f27106c26ab..3f8ff1a026f2e1a719dccb2ab7cd29ed6f293139 100644 (file)
--- a/doc/src/sgml/tsm-system-time.sgml
+++ b/doc/src/sgml/tsm-system-time.sgml
@@ -8,25 +8,39 @@
   </indexterm>
  
   <para>
-  The <filename>tsm_system_time</> module provides the tablesample method
-  <literal>SYSTEM_TIME</literal>, which can be used inside the
-  <command>TABLESAMPLE</command> clause of a <command>SELECT</command>.
+  The <filename>tsm_system_time</> module provides the table sampling method
+  <literal>SYSTEM_TIME</literal>, which can be used in
+  the <literal>TABLESAMPLE</> clause of a <xref linkend="sql-select">
+  command.
   </para>
  
   <para>
-  This tablesample method uses a linear probing algorithm to read sample
-  of a table and uses time in milliseconds as limit (unlike the
-  <literal>SYSTEM</literal> tablesample method which limits by percentage
-  of a table). This gives you some control over the length of execution
-  of your query.
+  This table sampling method accepts a single floating-point argument that
+  is the maximum number of milliseconds to spend reading the table.  This
+  gives you direct control over how long the query takes, at the price that
+  the size of the sample becomes hard to predict.  The resulting sample will
+  contain as many rows as could be read in the specified time, unless the
+  whole table has been read first.
+ </para>
+
+ <para>
+  Like the built-in <literal>SYSTEM</literal> sampling
+  method, <literal>SYSTEM_TIME</literal> performs block-level sampling, so
+  that the sample is not completely random but may be subject to clustering
+  effects, especially if only a small number of rows are selected.
+ </para>
+
+ <para>
+  <literal>SYSTEM_TIME</literal> does not support
+  the <literal>REPEATABLE</literal> clause.
   </para>
  
   <sect2>
    <title>Examples</title>
  
    <para>
-   Here is an example of selecting sample of a table with
-   <literal>SYSTEM_TIME</>. First install the extension:
+   Here is an example of selecting a sample of a table with
+   <literal>SYSTEM_TIME</>.  First install the extension:
    </para>
  
  <programlisting>
@@ -34,8 +48,7 @@ CREATE EXTENSION tsm_system_time;
  </programlisting>
  
    <para>
-   Then you can use it in a <command>SELECT</command> command the same way as
-   other tablesample methods:
+   Then you can use it in a <command>SELECT</command> command, for instance:
  
  <programlisting>
  SELECT * FROM my_table TABLESAMPLE SYSTEM_TIME(1000);
@@ -43,8 +56,9 @@ SELECT * FROM my_table TABLESAMPLE SYSTEM_TIME(1000);
    </para>
  
    <para>
-   The above command will return as large a sample of my_table as it can read in
-   1 second (or less if it reads whole table faster).
+   This command will return as large a sample of <structname>my_table</> as
+   it can read in 1 second (1000 milliseconds).  Of course, if the whole
+   table can be read in under 1 second, all its rows will be returned.
    </para>
   </sect2>
  
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c

index 6f4ff2718fed8d224837d2aeb46da44cb5cadecd..050efdc4806a716df0f0515619fe7a04bd9577d5 100644 (file)
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -80,8 +80,11 @@ bool     synchronize_seqscans = true;
  static HeapScanDesc heap_beginscan_internal(Relation relation,
                         Snapshot snapshot,
                         int nkeys, ScanKey key,
-                     bool allow_strat, bool allow_sync, bool allow_pagemode,
-                       bool is_bitmapscan, bool is_samplescan,
+                       bool allow_strat,
+                       bool allow_sync,
+                       bool allow_pagemode,
+                       bool is_bitmapscan,
+                       bool is_samplescan,
                         bool temp_snap);
  static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
                     TransactionId xid, CommandId cid, int options);
@@ -207,7 +210,7 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] =
   * ----------------
   */
  static void
-initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
+initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
  {
     bool        allow_strat;
     bool        allow_sync;
@@ -257,12 +260,12 @@ initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
         scan->rs_strategy = NULL;
     }
  
-   if (is_rescan)
+   if (keep_startblock)
     {
         /*
-        * If rescan, keep the previous startblock setting so that rewinding a
-        * cursor doesn't generate surprising results.  Reset the syncscan
-        * setting, though.
+        * When rescanning, we want to keep the previous startblock setting,
+        * so that rewinding a cursor doesn't generate surprising results.
+        * Reset the active syncscan setting, though.
          */
         scan->rs_syncscan = (allow_sync && synchronize_seqscans);
     }
@@ -1313,6 +1316,10 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
  /* ----------------
   *     heap_beginscan  - begin relation scan
   *
+ * heap_beginscan is the "standard" case.
+ *
+ * heap_beginscan_catalog differs in setting up its own temporary snapshot.
+ *
   * heap_beginscan_strat offers an extended API that lets the caller control
   * whether a nondefault buffer access strategy can be used, and whether
   * syncscan can be chosen (possibly resulting in the scan not starting from
@@ -1323,8 +1330,11 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
   * really quite unlike a standard seqscan, there is just enough commonality
   * to make it worth using the same data structure.
   *
- * heap_beginscan_samplingscan is alternate entry point for setting up a
- * HeapScanDesc for a TABLESAMPLE scan.
+ * heap_beginscan_sampling is an alternative entry point for setting up a
+ * HeapScanDesc for a TABLESAMPLE scan.  As with bitmap scans, it's worth
+ * using the same data structure although the behavior is rather different.
+ * In addition to the options offered by heap_beginscan_strat, this call
+ * also allows control of whether page-mode visibility checking is used.
   * ----------------
   */
  HeapScanDesc
@@ -1366,18 +1376,22 @@ heap_beginscan_bm(Relation relation, Snapshot snapshot,
  HeapScanDesc
  heap_beginscan_sampling(Relation relation, Snapshot snapshot,
                         int nkeys, ScanKey key,
-                       bool allow_strat, bool allow_pagemode)
+                     bool allow_strat, bool allow_sync, bool allow_pagemode)
  {
     return heap_beginscan_internal(relation, snapshot, nkeys, key,
-                                  allow_strat, false, allow_pagemode,
+                                  allow_strat, allow_sync, allow_pagemode,
                                    false, true, false);
  }
  
  static HeapScanDesc
  heap_beginscan_internal(Relation relation, Snapshot snapshot,
                         int nkeys, ScanKey key,
-                     bool allow_strat, bool allow_sync, bool allow_pagemode,
-                     bool is_bitmapscan, bool is_samplescan, bool temp_snap)
+                       bool allow_strat,
+                       bool allow_sync,
+                       bool allow_pagemode,
+                       bool is_bitmapscan,
+                       bool is_samplescan,
+                       bool temp_snap)
  {
     HeapScanDesc scan;
  
@@ -1461,6 +1475,27 @@ heap_rescan(HeapScanDesc scan,
     initscan(scan, key, true);
  }
  
+/* ----------------
+ *     heap_rescan_set_params  - restart a relation scan after changing params
+ *
+ * This call allows changing the buffer strategy, syncscan, and pagemode
+ * options before starting a fresh scan.  Note that although the actual use
+ * of syncscan might change (effectively, enabling or disabling reporting),
+ * the previously selected startblock will be kept.
+ * ----------------
+ */
+void
+heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
+                      bool allow_strat, bool allow_sync, bool allow_pagemode)
+{
+   /* adjust parameters */
+   scan->rs_allow_strat = allow_strat;
+   scan->rs_allow_sync = allow_sync;
+   scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot);
+   /* ... and rescan */
+   heap_rescan(scan, key);
+}
+
  /* ----------------
   *     heap_endscan    - end relation scan
   *
diff --git a/src/backend/access/tablesample/Makefile b/src/backend/access/tablesample/Makefile

index 46eeb59f9c468075c53d241fcb529175461e7a64..68d9ab281472d976e41aea3350fa768b5c296160 100644 (file)
--- a/src/backend/access/tablesample/Makefile
+++ b/src/backend/access/tablesample/Makefile
@@ -1,10 +1,10 @@
  #-------------------------------------------------------------------------
  #
  # Makefile--
-#    Makefile for utils/tablesample
+#    Makefile for access/tablesample
  #
  # IDENTIFICATION
-#    src/backend/utils/tablesample/Makefile
+#    src/backend/access/tablesample/Makefile
  #
  #-------------------------------------------------------------------------
  
@@ -12,6 +12,6 @@ subdir = src/backend/access/tablesample
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
-OBJS = tablesample.o system.o bernoulli.o
+OBJS = bernoulli.o system.o tablesample.o
  
  include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c

index 0a539008221a5592febbeb9cf1a652eb9da0a1d6..cf88f95e757b1754da8b4d074c9abfc367560208 100644 (file)
--- a/src/backend/access/tablesample/bernoulli.c
+++ b/src/backend/access/tablesample/bernoulli.c
@@ -1,233 +1,231 @@
  /*-------------------------------------------------------------------------
   *
   * bernoulli.c
- *   interface routines for BERNOULLI tablesample method
+ *   support routines for BERNOULLI tablesample method
   *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * To ensure repeatability of samples, it is necessary that selection of a
+ * given tuple be history-independent; otherwise syncscanning would break
+ * repeatability, to say nothing of logically-irrelevant maintenance such
+ * as physical extension or shortening of the relation.
+ *
+ * To achieve that, we proceed by hashing each candidate TID together with
+ * the active seed, and then selecting it if the hash is less than the
+ * cutoff value computed from the selection probability by BeginSampleScan.
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   src/backend/utils/tablesample/bernoulli.c
+ *   src/backend/access/tablesample/bernoulli.c
   *
   *-------------------------------------------------------------------------
   */
  
  #include "postgres.h"
  
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h>             /* for _isnan */
+#endif
+#include <math.h>
  
-#include "access/tablesample.h"
-#include "access/relscan.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
+#include "access/hash.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
  #include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
-#include "utils/sampling.h"
+#include "optimizer/cost.h"
+#include "utils/builtins.h"
  
  
-/* tsdesc */
+/* Private state */
  typedef struct
  {
+   uint64      cutoff;         /* select tuples with hash less than this */
     uint32      seed;           /* random seed */
-   BlockNumber startblock;     /* starting block, we use ths for syncscan
-                                * support */
-   BlockNumber nblocks;        /* number of blocks */
-   BlockNumber blockno;        /* current block */
-   float4      probability;    /* probabilty that tuple will be returned
-                                * (0.0-1.0) */
     OffsetNumber lt;            /* last tuple returned from current block */
-   SamplerRandomState randstate;       /* random generator tsdesc */
  } BernoulliSamplerData;
  
+
+static void bernoulli_samplescangetsamplesize(PlannerInfo *root,
+                                 RelOptInfo *baserel,
+                                 List *paramexprs,
+                                 BlockNumber *pages,
+                                 double *tuples);
+static void bernoulli_initsamplescan(SampleScanState *node,
+                        int eflags);
+static void bernoulli_beginsamplescan(SampleScanState *node,
+                         Datum *params,
+                         int nparams,
+                         uint32 seed);
+static OffsetNumber bernoulli_nextsampletuple(SampleScanState *node,
+                         BlockNumber blockno,
+                         OffsetNumber maxoffset);
+
+
  /*
- * Initialize the state.
+ * Create a TsmRoutine descriptor for the BERNOULLI method.
   */
  Datum
-tsm_bernoulli_init(PG_FUNCTION_ARGS)
+tsm_bernoulli_handler(PG_FUNCTION_ARGS)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   uint32      seed = PG_GETARG_UINT32(1);
-   float4      percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
-   HeapScanDesc scan = tsdesc->heapScan;
-   BernoulliSamplerData *sampler;
+   TsmRoutine *tsm = makeNode(TsmRoutine);
+
+   tsm->parameterTypes = list_make1_oid(FLOAT4OID);
+   tsm->repeatable_across_queries = true;
+   tsm->repeatable_across_scans = true;
+   tsm->SampleScanGetSampleSize = bernoulli_samplescangetsamplesize;
+   tsm->InitSampleScan = bernoulli_initsamplescan;
+   tsm->BeginSampleScan = bernoulli_beginsamplescan;
+   tsm->NextSampleBlock = NULL;
+   tsm->NextSampleTuple = bernoulli_nextsampletuple;
+   tsm->EndSampleScan = NULL;
+
+   PG_RETURN_POINTER(tsm);
+}
  
-   if (percent < 0 || percent > 100)
-       ereport(ERROR,
-               (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-                errmsg("invalid sample size"),
-                errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
+/*
+ * Sample size estimation.
+ */
+static void
+bernoulli_samplescangetsamplesize(PlannerInfo *root,
+                                 RelOptInfo *baserel,
+                                 List *paramexprs,
+                                 BlockNumber *pages,
+                                 double *tuples)
+{
+   Node       *pctnode;
+   float4      samplefract;
  
-   sampler = palloc0(sizeof(BernoulliSamplerData));
+   /* Try to extract an estimate for the sample percentage */
+   pctnode = (Node *) linitial(paramexprs);
+   pctnode = estimate_expression_value(root, pctnode);
  
-   /* Remember initial values for reinit */
-   sampler->seed = seed;
-   sampler->startblock = scan->rs_startblock;
-   sampler->nblocks = scan->rs_nblocks;
-   sampler->blockno = InvalidBlockNumber;
-   sampler->probability = percent / 100;
-   sampler->lt = InvalidOffsetNumber;
-   sampler_random_init_state(sampler->seed, sampler->randstate);
+   if (IsA(pctnode, Const) &&
+       !((Const *) pctnode)->constisnull)
+   {
+       samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
+       if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
+           samplefract /= 100.0f;
+       else
+       {
+           /* Default samplefract if the value is bogus */
+           samplefract = 0.1f;
+       }
+   }
+   else
+   {
+       /* Default samplefract if we didn't obtain a non-null Const */
+       samplefract = 0.1f;
+   }
+
+   /* We'll visit all pages of the baserel */
+   *pages = baserel->pages;
  
-   tsdesc->tsmdata = (void *) sampler;
+   *tuples = clamp_row_est(baserel->tuples * samplefract);
+}
  
-   PG_RETURN_VOID();
+/*
+ * Initialize during executor setup.
+ */
+static void
+bernoulli_initsamplescan(SampleScanState *node, int eflags)
+{
+   node->tsm_state = palloc0(sizeof(BernoulliSamplerData));
  }
  
  /*
- * Get next block number to read or InvalidBlockNumber if we are at the
- * end of the relation.
+ * Examine parameters and prepare for a sample scan.
   */
-Datum
-tsm_bernoulli_nextblock(PG_FUNCTION_ARGS)
+static void
+bernoulli_beginsamplescan(SampleScanState *node,
+                         Datum *params,
+                         int nparams,
+                         uint32 seed)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
+   BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
+   double      percent = DatumGetFloat4(params[0]);
+
+   if (percent < 0 || percent > 100 || isnan(percent))
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+                errmsg("sample percentage must be between 0 and 100")));
  
     /*
-    * Bernoulli sampling scans all blocks on the table and supports syncscan
-    * so loop from startblock to startblock instead of from 0 to nblocks.
+    * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
+    * store that as a uint64, of course.  Note that this gives strictly
+    * correct behavior at the limits of zero or one probability.
      */
-   if (sampler->blockno == InvalidBlockNumber)
-       sampler->blockno = sampler->startblock;
-   else
-   {
-       sampler->blockno++;
-
-       if (sampler->blockno >= sampler->nblocks)
-           sampler->blockno = 0;
-
-       if (sampler->blockno == sampler->startblock)
-           PG_RETURN_UINT32(InvalidBlockNumber);
-   }
+   sampler->cutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
+   sampler->seed = seed;
+   sampler->lt = InvalidOffsetNumber;
  
-   PG_RETURN_UINT32(sampler->blockno);
+   /*
+    * Use bulkread, since we're scanning all pages.  But pagemode visibility
+    * checking is a win only at larger sampling fractions.  The 25% cutoff
+    * here is based on very limited experimentation.
+    */
+   node->use_bulkread = true;
+   node->use_pagemode = (percent >= 25);
  }
  
  /*
- * Get next tuple from current block.
- *
- * This method implements the main logic in bernoulli sampling.
- * The algorithm simply generates new random number (in 0.0-1.0 range) and if
- * it falls within user specified probability (in the same range) return the
- * tuple offset.
- *
- * It is ok here to return tuple offset without knowing if tuple is visible
- * and not check it via examinetuple. The reason for that is that we do the
- * coinflip (random number generation) for every tuple in the table. Since all
- * tuples have same probability of being returned the visible and invisible
- * tuples will be returned in same ratio as they have in the actual table.
- * This means that there is no skew towards either visible or invisible tuples
- * and the number of visible tuples returned from the executor node should
- * match the fraction of visible tuples which was specified by user.
+ * Select next sampled tuple in current block.
   *
- * This is faster than doing the coinflip in examinetuple because we don't
- * have to do visibility checks on uninteresting tuples.
+ * It is OK here to return an offset without knowing if the tuple is visible
+ * (or even exists).  The reason is that we do the coinflip for every tuple
+ * offset in the table.  Since all tuples have the same probability of being
+ * returned, it doesn't matter if we do extra coinflips for invisible tuples.
   *
- * If we reach end of the block return InvalidOffsetNumber which tells
+ * When we reach end of the block, return InvalidOffsetNumber which tells
   * SampleScan to go to next block.
   */
-Datum
-tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS)
+static OffsetNumber
+bernoulli_nextsampletuple(SampleScanState *node,
+                         BlockNumber blockno,
+                         OffsetNumber maxoffset)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   OffsetNumber maxoffset = PG_GETARG_UINT16(2);
-   BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
+   BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
     OffsetNumber tupoffset = sampler->lt;
-   float4      probability = sampler->probability;
+   uint32      hashinput[3];
  
+   /* Advance to first/next tuple in block */
     if (tupoffset == InvalidOffsetNumber)
         tupoffset = FirstOffsetNumber;
     else
         tupoffset++;
  
     /*
-    * Loop over tuple offsets until the random generator returns value that
-    * is within the probability of returning the tuple or until we reach end
-    * of the block.
+    * We compute the hash by applying hash_any to an array of 3 uint32's
+    * containing the block, offset, and seed.  This is efficient to set up,
+    * and with the current implementation of hash_any, it gives
+    * machine-independent results, which is a nice property for regression
+    * testing.
      *
-    * (This is our implementation of bernoulli trial)
+    * These words in the hash input are the same throughout the block:
      */
-   while (sampler_random_fract(sampler->randstate) > probability)
+   hashinput[0] = blockno;
+   hashinput[2] = sampler->seed;
+
+   /*
+    * Loop over tuple offsets until finding suitable TID or reaching end of
+    * block.
+    */
+   for (; tupoffset <= maxoffset; tupoffset++)
     {
-       tupoffset++;
+       uint32      hash;
  
-       if (tupoffset > maxoffset)
+       hashinput[1] = tupoffset;
+
+       hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
+                                      (int) sizeof(hashinput)));
+       if (hash < sampler->cutoff)
             break;
     }
  
     if (tupoffset > maxoffset)
-       /* Tell SampleScan that we want next block. */
         tupoffset = InvalidOffsetNumber;
  
     sampler->lt = tupoffset;
  
-   PG_RETURN_UINT16(tupoffset);
-}
-
-/*
- * Cleanup method.
- */
-Datum
-tsm_bernoulli_end(PG_FUNCTION_ARGS)
-{
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-
-   pfree(tsdesc->tsmdata);
-
-   PG_RETURN_VOID();
-}
-
-/*
- * Reset tsdesc (called by ReScan).
- */
-Datum
-tsm_bernoulli_reset(PG_FUNCTION_ARGS)
-{
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
-
-   sampler->blockno = InvalidBlockNumber;
-   sampler->lt = InvalidOffsetNumber;
-   sampler_random_init_state(sampler->seed, sampler->randstate);
-
-   PG_RETURN_VOID();
-}
-
-/*
- * Costing function.
- */
-Datum
-tsm_bernoulli_cost(PG_FUNCTION_ARGS)
-{
-   PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-   Path       *path = (Path *) PG_GETARG_POINTER(1);
-   RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
-   List       *args = (List *) PG_GETARG_POINTER(3);
-   BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
-   double     *tuples = (double *) PG_GETARG_POINTER(5);
-   Node       *pctnode;
-   float4      samplesize;
-
-   *pages = baserel->pages;
-
-   pctnode = linitial(args);
-   pctnode = estimate_expression_value(root, pctnode);
-
-   if (IsA(pctnode, RelabelType))
-       pctnode = (Node *) ((RelabelType *) pctnode)->arg;
-
-   if (IsA(pctnode, Const))
-   {
-       samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
-       samplesize /= 100.0;
-   }
-   else
-   {
-       /* Default samplesize if the estimation didn't return Const. */
-       samplesize = 0.1f;
-   }
-
-   *tuples = path->rows * samplesize;
-   path->rows = *tuples;
-
-   PG_RETURN_VOID();
+   return tupoffset;
  }
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c

index 1d834369a4bd11fbf6127d9d8c8d7e3e4859ca01..43c5dab71619a7a6d8e2ee22bc306e56674191c0 100644 (file)
--- a/src/backend/access/tablesample/system.c
+++ b/src/backend/access/tablesample/system.c
@@ -1,186 +1,260 @@
  /*-------------------------------------------------------------------------
   *
   * system.c
- *   interface routines for system tablesample method
+ *   support routines for SYSTEM tablesample method
   *
+ * To ensure repeatability of samples, it is necessary that selection of a
+ * given tuple be history-independent; otherwise syncscanning would break
+ * repeatability, to say nothing of logically-irrelevant maintenance such
+ * as physical extension or shortening of the relation.
   *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * To achieve that, we proceed by hashing each candidate block number together
+ * with the active seed, and then selecting it if the hash is less than the
+ * cutoff value computed from the selection probability by BeginSampleScan.
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   src/backend/utils/tablesample/system.c
+ *   src/backend/access/tablesample/system.c
   *
   *-------------------------------------------------------------------------
   */
  
  #include "postgres.h"
  
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h>             /* for _isnan */
+#endif
+#include <math.h>
  
-#include "access/tablesample.h"
+#include "access/hash.h"
  #include "access/relscan.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
  #include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
-#include "utils/sampling.h"
+#include "optimizer/cost.h"
+#include "utils/builtins.h"
  
  
-/*
- * State
- */
+/* Private state */
  typedef struct
  {
-   BlockSamplerData bs;
+   uint64      cutoff;         /* select blocks with hash less than this */
     uint32      seed;           /* random seed */
-   BlockNumber nblocks;        /* number of block in relation */
-   int         samplesize;     /* number of blocks to return */
+   BlockNumber nextblock;      /* next block to consider sampling */
     OffsetNumber lt;            /* last tuple returned from current block */
  } SystemSamplerData;
  
  
-/*
- * Initializes the state.
- */
-Datum
-tsm_system_init(PG_FUNCTION_ARGS)
-{
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   uint32      seed = PG_GETARG_UINT32(1);
-   float4      percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
-   HeapScanDesc scan = tsdesc->heapScan;
-   SystemSamplerData *sampler;
+static void system_samplescangetsamplesize(PlannerInfo *root,
+                              RelOptInfo *baserel,
+                              List *paramexprs,
+                              BlockNumber *pages,
+                              double *tuples);
+static void system_initsamplescan(SampleScanState *node,
+                     int eflags);
+static void system_beginsamplescan(SampleScanState *node,
+                      Datum *params,
+                      int nparams,
+                      uint32 seed);
+static BlockNumber system_nextsampleblock(SampleScanState *node);
+static OffsetNumber system_nextsampletuple(SampleScanState *node,
+                      BlockNumber blockno,
+                      OffsetNumber maxoffset);
  
-   if (percent < 0 || percent > 100)
-       ereport(ERROR,
-               (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-                errmsg("invalid sample size"),
-                errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
-
-   sampler = palloc0(sizeof(SystemSamplerData));
-
-   /* Remember initial values for reinit */
-   sampler->seed = seed;
-   sampler->nblocks = scan->rs_nblocks;
-   sampler->samplesize = 1 + (int) (sampler->nblocks * (percent / 100.0));
-   sampler->lt = InvalidOffsetNumber;
-
-   BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
-                     sampler->seed);
-
-   tsdesc->tsmdata = (void *) sampler;
-
-   PG_RETURN_VOID();
-}
  
  /*
- * Get next block number or InvalidBlockNumber when we're done.
- *
- * Uses the same logic as ANALYZE for picking the random blocks.
+ * Create a TsmRoutine descriptor for the SYSTEM method.
   */
  Datum
-tsm_system_nextblock(PG_FUNCTION_ARGS)
+tsm_system_handler(PG_FUNCTION_ARGS)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-   BlockNumber blockno;
-
-   if (!BlockSampler_HasMore(&sampler->bs))
-       PG_RETURN_UINT32(InvalidBlockNumber);
-
-   blockno = BlockSampler_Next(&sampler->bs);
-
-   PG_RETURN_UINT32(blockno);
+   TsmRoutine *tsm = makeNode(TsmRoutine);
+
+   tsm->parameterTypes = list_make1_oid(FLOAT4OID);
+   tsm->repeatable_across_queries = true;
+   tsm->repeatable_across_scans = true;
+   tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
+   tsm->InitSampleScan = system_initsamplescan;
+   tsm->BeginSampleScan = system_beginsamplescan;
+   tsm->NextSampleBlock = system_nextsampleblock;
+   tsm->NextSampleTuple = system_nextsampletuple;
+   tsm->EndSampleScan = NULL;
+
+   PG_RETURN_POINTER(tsm);
  }
  
  /*
- * Get next tuple offset in current block or InvalidOffsetNumber if we are done
- * with this block.
+ * Sample size estimation.
   */
-Datum
-tsm_system_nexttuple(PG_FUNCTION_ARGS)
+static void
+system_samplescangetsamplesize(PlannerInfo *root,
+                              RelOptInfo *baserel,
+                              List *paramexprs,
+                              BlockNumber *pages,
+                              double *tuples)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   OffsetNumber maxoffset = PG_GETARG_UINT16(2);
-   SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-   OffsetNumber tupoffset = sampler->lt;
+   Node       *pctnode;
+   float4      samplefract;
  
-   if (tupoffset == InvalidOffsetNumber)
-       tupoffset = FirstOffsetNumber;
-   else
-       tupoffset++;
+   /* Try to extract an estimate for the sample percentage */
+   pctnode = (Node *) linitial(paramexprs);
+   pctnode = estimate_expression_value(root, pctnode);
  
-   if (tupoffset > maxoffset)
-       tupoffset = InvalidOffsetNumber;
+   if (IsA(pctnode, Const) &&
+       !((Const *) pctnode)->constisnull)
+   {
+       samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
+       if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
+           samplefract /= 100.0f;
+       else
+       {
+           /* Default samplefract if the value is bogus */
+           samplefract = 0.1f;
+       }
+   }
+   else
+   {
+       /* Default samplefract if we didn't obtain a non-null Const */
+       samplefract = 0.1f;
+   }
  
-   sampler->lt = tupoffset;
+   /* We'll visit a sample of the pages ... */
+   *pages = clamp_row_est(baserel->pages * samplefract);
  
-   PG_RETURN_UINT16(tupoffset);
+   /* ... and hopefully get a representative number of tuples from them */
+   *tuples = clamp_row_est(baserel->tuples * samplefract);
  }
  
  /*
- * Cleanup method.
+ * Initialize during executor setup.
   */
-Datum
-tsm_system_end(PG_FUNCTION_ARGS)
+static void
+system_initsamplescan(SampleScanState *node, int eflags)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-
-   pfree(tsdesc->tsmdata);
-
-   PG_RETURN_VOID();
+   node->tsm_state = palloc0(sizeof(SystemSamplerData));
  }
  
  /*
- * Reset state (called by ReScan).
+ * Examine parameters and prepare for a sample scan.
   */
-Datum
-tsm_system_reset(PG_FUNCTION_ARGS)
+static void
+system_beginsamplescan(SampleScanState *node,
+                      Datum *params,
+                      int nparams,
+                      uint32 seed)
  {
-   TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-   SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+   SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+   double      percent = DatumGetFloat4(params[0]);
  
+   if (percent < 0 || percent > 100 || isnan(percent))
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+                errmsg("sample percentage must be between 0 and 100")));
+
+   /*
+    * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
+    * store that as a uint64, of course.  Note that this gives strictly
+    * correct behavior at the limits of zero or one probability.
+    */
+   sampler->cutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
+   sampler->seed = seed;
+   sampler->nextblock = 0;
     sampler->lt = InvalidOffsetNumber;
-   BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
-                     sampler->seed);
  
-   PG_RETURN_VOID();
+   /*
+    * Bulkread buffer access strategy probably makes sense unless we're
+    * scanning a very small fraction of the table.  The 1% cutoff here is a
+    * guess.  We should use pagemode visibility checking, since we scan all
+    * tuples on each selected page.
+    */
+   node->use_bulkread = (percent >= 1);
+   node->use_pagemode = true;
  }
  
  /*
- * Costing function.
+ * Select next block to sample.
   */
-Datum
-tsm_system_cost(PG_FUNCTION_ARGS)
+static BlockNumber
+system_nextsampleblock(SampleScanState *node)
  {
-   PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-   Path       *path = (Path *) PG_GETARG_POINTER(1);
-   RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
-   List       *args = (List *) PG_GETARG_POINTER(3);
-   BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
-   double     *tuples = (double *) PG_GETARG_POINTER(5);
-   Node       *pctnode;
-   float4      samplesize;
+   SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+   HeapScanDesc scan = node->ss.ss_currentScanDesc;
+   BlockNumber nextblock = sampler->nextblock;
+   uint32      hashinput[2];
+
+   /*
+    * We compute the hash by applying hash_any to an array of 2 uint32's
+    * containing the block number and seed.  This is efficient to set up, and
+    * with the current implementation of hash_any, it gives
+    * machine-independent results, which is a nice property for regression
+    * testing.
+    *
+    * These words in the hash input are the same throughout the block:
+    */
+   hashinput[1] = sampler->seed;
+
+   /*
+    * Loop over block numbers until finding suitable block or reaching end of
+    * relation.
+    */
+   for (; nextblock < scan->rs_nblocks; nextblock++)
+   {
+       uint32      hash;
  
-   pctnode = linitial(args);
-   pctnode = estimate_expression_value(root, pctnode);
+       hashinput[0] = nextblock;
  
-   if (IsA(pctnode, RelabelType))
-       pctnode = (Node *) ((RelabelType *) pctnode)->arg;
+       hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
+                                      (int) sizeof(hashinput)));
+       if (hash < sampler->cutoff)
+           break;
+   }
  
-   if (IsA(pctnode, Const))
+   if (nextblock < scan->rs_nblocks)
     {
-       samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
-       samplesize /= 100.0;
+       /* Found a suitable block; remember where we should start next time */
+       sampler->nextblock = nextblock + 1;
+       return nextblock;
     }
+
+   /* Done, but let's reset nextblock to 0 for safety. */
+   sampler->nextblock = 0;
+   return InvalidBlockNumber;
+}
+
+/*
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * It is OK here to return an offset without knowing if the tuple is visible
+ * (or even exists); nodeSamplescan.c will deal with that.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+static OffsetNumber
+system_nextsampletuple(SampleScanState *node,
+                      BlockNumber blockno,
+                      OffsetNumber maxoffset)
+{
+   SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+   OffsetNumber tupoffset = sampler->lt;
+
+   /* Advance to next possible offset on page */
+   if (tupoffset == InvalidOffsetNumber)
+       tupoffset = FirstOffsetNumber;
     else
-   {
-       /* Default samplesize if the estimation didn't return Const. */
-       samplesize = 0.1f;
-   }
+       tupoffset++;
  
-   *pages = baserel->pages * samplesize;
-   *tuples = path->rows * samplesize;
-   path->rows = *tuples;
+   /* Done? */
+   if (tupoffset > maxoffset)
+       tupoffset = InvalidOffsetNumber;
+
+   sampler->lt = tupoffset;
  
-   PG_RETURN_VOID();
+   return tupoffset;
  }
diff --git a/src/backend/access/tablesample/tablesample.c b/src/backend/access/tablesample/tablesample.c

index f21d42c8e38ca04b82579967f54c613fd84290ee..b8ad7ced743cba99021c4752fc8131aa46c99789 100644 (file)
--- a/src/backend/access/tablesample/tablesample.c
+++ b/src/backend/access/tablesample/tablesample.c
@@ -1,7 +1,7 @@
  /*-------------------------------------------------------------------------
   *
   * tablesample.c
- *       TABLESAMPLE internal API
+ *       Support functions for TABLESAMPLE feature
   *
   * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
@@ -10,356 +10,31 @@
   * IDENTIFICATION
   *       src/backend/access/tablesample/tablesample.c
   *
- * TABLESAMPLE is the SQL standard clause for sampling the relations.
- *
- * The API is interface between the Executor and the TABLESAMPLE Methods.
- *
- * TABLESAMPLE Methods are implementations of actual sampling algorithms which
- * can be used for returning a sample of the source relation.
- * Methods don't read the table directly but are asked for block number and
- * tuple offset which they want to examine (or return) and the tablesample
- * interface implemented here does the reading for them.
- *
- * We currently only support sampling of the physical relations, but in the
- * future we might extend the API to support subqueries as well.
- *
   * -------------------------------------------------------------------------
   */
  
  #include "postgres.h"
  
-#include "access/tablesample.h"
-
-#include "catalog/pg_tablesample_method.h"
-#include "miscadmin.h"
-#include "pgstat.h"
-#include "storage/bufmgr.h"
-#include "storage/predicate.h"
-#include "utils/rel.h"
-#include "utils/tqual.h"
-
-
-static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan);
-
-
-/*
- * Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method.
- */
-TableSampleDesc *
-tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample)
-{
-   FunctionCallInfoData fcinfo;
-   int         i;
-   List       *args = tablesample->args;
-   ListCell   *arg;
-   ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
-   TableSampleDesc *tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc));
-
-   /* Load functions */
-   fmgr_info(tablesample->tsminit, &(tsdesc->tsminit));
-   fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock));
-   fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple));
-   if (OidIsValid(tablesample->tsmexaminetuple))
-       fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple));
-   else
-       tsdesc->tsmexaminetuple.fn_oid = InvalidOid;
-   fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset));
-   fmgr_info(tablesample->tsmend, &(tsdesc->tsmend));
-
-   InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit,
-                            list_length(args) + 2,
-                            InvalidOid, NULL, NULL);
-
-   tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
-   tsdesc->heapScan = scanstate->ss.ss_currentScanDesc;
-
-   /* First argument for init function is always TableSampleDesc */
-   fcinfo.arg[0] = PointerGetDatum(tsdesc);
-   fcinfo.argnull[0] = false;
+#include "access/tsmapi.h"
  
-   /*
-    * Second arg for init function is always REPEATABLE.
-    *
-    * If tablesample->repeatable is NULL then REPEATABLE clause was not
-    * specified, and we insert a random value as default.
-    *
-    * When specified, the expression cannot evaluate to NULL.
-    */
-   if (tablesample->repeatable)
-   {
-       ExprState  *argstate = ExecInitExpr((Expr *) tablesample->repeatable,
-                                           (PlanState *) scanstate);
-
-       fcinfo.arg[1] = ExecEvalExpr(argstate, econtext,
-                                    &fcinfo.argnull[1], NULL);
-       if (fcinfo.argnull[1])
-           ereport(ERROR,
-                   (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
-               errmsg("REPEATABLE clause must be NOT NULL numeric value")));
-   }
-   else
-   {
-       fcinfo.arg[1] = UInt32GetDatum(random());
-       fcinfo.argnull[1] = false;
-   }
-
-   /* Rest of the arguments come from user. */
-   i = 2;
-   foreach(arg, args)
-   {
-       Expr       *argexpr = (Expr *) lfirst(arg);
-       ExprState  *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate);
-
-       fcinfo.arg[i] = ExecEvalExpr(argstate, econtext,
-                                    &fcinfo.argnull[i], NULL);
-       i++;
-   }
-   Assert(i == fcinfo.nargs);
-
-   (void) FunctionCallInvoke(&fcinfo);
-
-   return tsdesc;
-}
  
  /*
- * Get next tuple from TABLESAMPLE Method.
- */
-HeapTuple
-tablesample_getnext(TableSampleDesc *desc)
-{
-   HeapScanDesc scan = desc->heapScan;
-   HeapTuple   tuple = &(scan->rs_ctup);
-   bool        pagemode = scan->rs_pageatatime;
-   BlockNumber blockno;
-   Page        page;
-   bool        page_all_visible;
-   ItemId      itemid;
-   OffsetNumber tupoffset,
-               maxoffset;
-
-   if (!scan->rs_inited)
-   {
-       /*
-        * return null immediately if relation is empty
-        */
-       if (scan->rs_nblocks == 0)
-       {
-           Assert(!BufferIsValid(scan->rs_cbuf));
-           tuple->t_data = NULL;
-           return NULL;
-       }
-       blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
-                                             PointerGetDatum(desc)));
-       if (!BlockNumberIsValid(blockno))
-       {
-           tuple->t_data = NULL;
-           return NULL;
-       }
-
-       heapgetpage(scan, blockno);
-       scan->rs_inited = true;
-   }
-   else
-   {
-       /* continue from previously returned page/tuple */
-       blockno = scan->rs_cblock;      /* current page */
-   }
-
-   /*
-    * When pagemode is disabled, the scan will do visibility checks for each
-    * tuple it finds so the buffer needs to be locked.
-    */
-   if (!pagemode)
-       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-   page = (Page) BufferGetPage(scan->rs_cbuf);
-   page_all_visible = PageIsAllVisible(page);
-   maxoffset = PageGetMaxOffsetNumber(page);
-
-   for (;;)
-   {
-       CHECK_FOR_INTERRUPTS();
-
-       tupoffset = DatumGetUInt16(FunctionCall3(&desc->tsmnexttuple,
-                                                PointerGetDatum(desc),
-                                                UInt32GetDatum(blockno),
-                                                UInt16GetDatum(maxoffset)));
-
-       if (OffsetNumberIsValid(tupoffset))
-       {
-           bool        visible;
-           bool        found;
-
-           /* Skip invalid tuple pointers. */
-           itemid = PageGetItemId(page, tupoffset);
-           if (!ItemIdIsNormal(itemid))
-               continue;
-
-           tuple->t_data = (HeapTupleHeader) PageGetItem((Page) page, itemid);
-           tuple->t_len = ItemIdGetLength(itemid);
-           ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
-
-           if (page_all_visible)
-               visible = true;
-           else
-               visible = SampleTupleVisible(tuple, tupoffset, scan);
-
-           /*
-            * Let the sampling method examine the actual tuple and decide if
-            * we should return it.
-            *
-            * Note that we let it examine even invisible tuples for
-            * statistical purposes, but not return them since user should
-            * never see invisible tuples.
-            */
-           if (OidIsValid(desc->tsmexaminetuple.fn_oid))
-           {
-               found = DatumGetBool(FunctionCall4(&desc->tsmexaminetuple,
-                                                  PointerGetDatum(desc),
-                                                  UInt32GetDatum(blockno),
-                                                  PointerGetDatum(tuple),
-                                                  BoolGetDatum(visible)));
-               /* Should not happen if sampling method is well written. */
-               if (found && !visible)
-                   elog(ERROR, "Sampling method wanted to return invisible tuple");
-           }
-           else
-               found = visible;
-
-           /* Found visible tuple, return it. */
-           if (found)
-           {
-               if (!pagemode)
-                   LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-               break;
-           }
-           else
-           {
-               /* Try next tuple from same page. */
-               continue;
-           }
-       }
-
-
-       if (!pagemode)
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-
-       blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
-                                             PointerGetDatum(desc)));
-
-       /*
-        * Report our new scan position for synchronization purposes. We don't
-        * do that when moving backwards, however. That would just mess up any
-        * other forward-moving scanners.
-        *
-        * Note: we do this before checking for end of scan so that the final
-        * state of the position hint is back at the start of the rel.  That's
-        * not strictly necessary, but otherwise when you run the same query
-        * multiple times the starting position would shift a little bit
-        * backwards on every invocation, which is confusing. We don't
-        * guarantee any specific ordering in general, though.
-        */
-       if (scan->rs_syncscan)
-           ss_report_location(scan->rs_rd, BlockNumberIsValid(blockno) ?
-                              blockno : scan->rs_startblock);
-
-       /*
-        * Reached end of scan.
-        */
-       if (!BlockNumberIsValid(blockno))
-       {
-           if (BufferIsValid(scan->rs_cbuf))
-               ReleaseBuffer(scan->rs_cbuf);
-           scan->rs_cbuf = InvalidBuffer;
-           scan->rs_cblock = InvalidBlockNumber;
-           tuple->t_data = NULL;
-           scan->rs_inited = false;
-           return NULL;
-       }
-
-       heapgetpage(scan, blockno);
-
-       if (!pagemode)
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-       page = (Page) BufferGetPage(scan->rs_cbuf);
-       page_all_visible = PageIsAllVisible(page);
-       maxoffset = PageGetMaxOffsetNumber(page);
-   }
-
-   pgstat_count_heap_getnext(scan->rs_rd);
-
-   return &(scan->rs_ctup);
-}
-
-/*
- * Reset the sampling to starting state
- */
-void
-tablesample_reset(TableSampleDesc *desc)
-{
-   (void) FunctionCall1(&desc->tsmreset, PointerGetDatum(desc));
-}
-
-/*
- * Signal the sampling method that the scan has finished.
- */
-void
-tablesample_end(TableSampleDesc *desc)
-{
-   (void) FunctionCall1(&desc->tsmend, PointerGetDatum(desc));
-}
-
-/*
- * Check visibility of the tuple.
+ * GetTsmRoutine --- get a TsmRoutine struct by invoking the handler.
+ *
+ * This is a convenience routine that's just meant to check for errors.
   */
-static bool
-SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+TsmRoutine *
+GetTsmRoutine(Oid tsmhandler)
  {
-   /*
-    * If this scan is reading whole pages at a time, there is already
-    * visibility info present in rs_vistuples so we can just search it for
-    * the tupoffset.
-    */
-   if (scan->rs_pageatatime)
-   {
-       int         start = 0,
-                   end = scan->rs_ntuples - 1;
-
-       /*
-        * Do the binary search over rs_vistuples, it's already sorted by
-        * OffsetNumber so we don't need to do any sorting ourselves here.
-        *
-        * We could use bsearch() here but it's slower for integers because of
-        * the function call overhead and because it needs boiler plate code
-        * it would not save us anything code-wise anyway.
-        */
-       while (start <= end)
-       {
-           int         mid = start + (end - start) / 2;
-           OffsetNumber curoffset = scan->rs_vistuples[mid];
-
-           if (curoffset == tupoffset)
-               return true;
-           else if (curoffset > tupoffset)
-               end = mid - 1;
-           else
-               start = mid + 1;
-       }
-
-       return false;
-   }
-   else
-   {
-       /* No pagemode, we have to check the tuple itself. */
-       Snapshot    snapshot = scan->rs_snapshot;
-       Buffer      buffer = scan->rs_cbuf;
+   Datum       datum;
+   TsmRoutine *routine;
  
-       bool        visible = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
+   datum = OidFunctionCall1(tsmhandler, PointerGetDatum(NULL));
+   routine = (TsmRoutine *) DatumGetPointer(datum);
  
-       CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, buffer,
-                                       snapshot);
+   if (routine == NULL || !IsA(routine, TsmRoutine))
+       elog(ERROR, "tablesample handler function %u did not return a TsmRoutine struct",
+            tsmhandler);
  
-       return visible;
-   }
+   return routine;
  }
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile

index 3d1139b5ba0bfb7e41041fedc1adf42f022e41ed..25130ecf124805565f61c17045c9589445c10e8c 100644 (file)
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -40,8 +40,9 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
     pg_ts_parser.h pg_ts_template.h pg_extension.h \
     pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \
     pg_foreign_table.h pg_policy.h pg_replication_origin.h \
-   pg_tablesample_method.h pg_default_acl.h pg_seclabel.h pg_shseclabel.h \
-   pg_collation.h pg_range.h pg_transform.h toasting.h indexing.h \
+   pg_default_acl.h pg_seclabel.h pg_shseclabel.h \
+   pg_collation.h pg_range.h pg_transform.h \
+   toasting.h indexing.h \
      )
  
  # location of Catalog.pm
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c

index 5d7c441739cec7a45090bab0f331c0ad2fc130c5..90b1cd835f89edad6200872360d4dabee620ce37 100644 (file)
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -1911,6 +1911,14 @@ find_expr_references_walker(Node *node,
                                    context->addrs);
         }
     }
+   else if (IsA(node, TableSampleClause))
+   {
+       TableSampleClause *tsc = (TableSampleClause *) node;
+
+       add_object_address(OCLASS_PROC, tsc->tsmhandler, 0,
+                          context->addrs);
+       /* fall through to examine arguments */
+   }
  
     return expression_tree_walker(node, find_expr_references_walker,
                                   (void *) context);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c

index 0d1ecc2a3edbb85276c3e707ad7b90840e5fd35f..5d06fa4ea65c4a751c38daaefb05b032a0b7aaca 100644 (file)
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -96,6 +96,8 @@ static void show_sort_group_keys(PlanState *planstate, const char *qlabel,
                      List *ancestors, ExplainState *es);
  static void show_sortorder_options(StringInfo buf, Node *sortexpr,
                        Oid sortOperator, Oid collation, bool nullsFirst);
+static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+                List *ancestors, ExplainState *es);
  static void show_sort_info(SortState *sortstate, ExplainState *es);
  static void show_hash_info(HashState *hashstate, ExplainState *es);
  static void show_tidbitmap_info(BitmapHeapScanState *planstate,
@@ -116,7 +118,7 @@ static void ExplainMemberNodes(List *plans, PlanState **planstates,
  static void ExplainSubPlans(List *plans, List *ancestors,
                 const char *relationship, ExplainState *es);
  static void ExplainCustomChildren(CustomScanState *css,
-                                 List *ancestors, ExplainState *es);
+                     List *ancestors, ExplainState *es);
  static void ExplainProperty(const char *qlabel, const char *value,
                 bool numeric, ExplainState *es);
  static void ExplainOpenGroup(const char *objtype, const char *labelname,
@@ -730,6 +732,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
     switch (nodeTag(plan))
     {
         case T_SeqScan:
+       case T_SampleScan:
         case T_IndexScan:
         case T_IndexOnlyScan:
         case T_BitmapHeapScan:
@@ -739,7 +742,6 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
         case T_ValuesScan:
         case T_CteScan:
         case T_WorkTableScan:
-       case T_SampleScan:
             *rels_used = bms_add_member(*rels_used,
                                         ((Scan *) plan)->scanrelid);
             break;
@@ -935,6 +937,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
         case T_SeqScan:
             pname = sname = "Seq Scan";
             break;
+       case T_SampleScan:
+           pname = sname = "Sample Scan";
+           break;
         case T_IndexScan:
             pname = sname = "Index Scan";
             break;
@@ -976,23 +981,6 @@ ExplainNode(PlanState *planstate, List *ancestors,
             else
                 pname = sname;
             break;
-       case T_SampleScan:
-           {
-               /*
-                * Fetch the tablesample method name from RTE.
-                *
-                * It would be nice to also show parameters, but since we
-                * support arbitrary expressions as parameter it might get
-                * quite messy.
-                */
-               RangeTblEntry *rte;
-
-               rte = rt_fetch(((SampleScan *) plan)->scanrelid, es->rtable);
-               custom_name = get_tablesample_method_name(rte->tablesample->tsmid);
-               pname = psprintf("Sample Scan (%s)", custom_name);
-               sname = "Sample Scan";
-           }
-           break;
         case T_Material:
             pname = sname = "Materialize";
             break;
@@ -1101,6 +1089,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
     switch (nodeTag(plan))
     {
         case T_SeqScan:
+       case T_SampleScan:
         case T_BitmapHeapScan:
         case T_TidScan:
         case T_SubqueryScan:
@@ -1115,9 +1104,6 @@ ExplainNode(PlanState *planstate, List *ancestors,
             if (((Scan *) plan)->scanrelid > 0)
                 ExplainScanTarget((Scan *) plan, es);
             break;
-       case T_SampleScan:
-           ExplainScanTarget((Scan *) plan, es);
-           break;
         case T_IndexScan:
             {
                 IndexScan  *indexscan = (IndexScan *) plan;
@@ -1363,12 +1349,15 @@ ExplainNode(PlanState *planstate, List *ancestors,
             if (es->analyze)
                 show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
             break;
+       case T_SampleScan:
+           show_tablesample(((SampleScan *) plan)->tablesample,
+                            planstate, ancestors, es);
+           /* FALL THRU to print additional fields the same as SeqScan */
         case T_SeqScan:
         case T_ValuesScan:
         case T_CteScan:
         case T_WorkTableScan:
         case T_SubqueryScan:
-       case T_SampleScan:
             show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
             if (plan->qual)
                 show_instrumentation_count("Rows Removed by Filter", 1,
@@ -2109,6 +2098,72 @@ show_sortorder_options(StringInfo buf, Node *sortexpr,
     }
  }
  
+/*
+ * Show TABLESAMPLE properties
+ */
+static void
+show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+                List *ancestors, ExplainState *es)
+{
+   List       *context;
+   bool        useprefix;
+   char       *method_name;
+   List       *params = NIL;
+   char       *repeatable;
+   ListCell   *lc;
+
+   /* Set up deparsing context */
+   context = set_deparse_context_planstate(es->deparse_cxt,
+                                           (Node *) planstate,
+                                           ancestors);
+   useprefix = list_length(es->rtable) > 1;
+
+   /* Get the tablesample method name */
+   method_name = get_func_name(tsc->tsmhandler);
+
+   /* Deparse parameter expressions */
+   foreach(lc, tsc->args)
+   {
+       Node       *arg = (Node *) lfirst(lc);
+
+       params = lappend(params,
+                        deparse_expression(arg, context,
+                                           useprefix, false));
+   }
+   if (tsc->repeatable)
+       repeatable = deparse_expression((Node *) tsc->repeatable, context,
+                                       useprefix, false);
+   else
+       repeatable = NULL;
+
+   /* Print results */
+   if (es->format == EXPLAIN_FORMAT_TEXT)
+   {
+       bool        first = true;
+
+       appendStringInfoSpaces(es->str, es->indent * 2);
+       appendStringInfo(es->str, "Sampling: %s (", method_name);
+       foreach(lc, params)
+       {
+           if (!first)
+               appendStringInfoString(es->str, ", ");
+           appendStringInfoString(es->str, (const char *) lfirst(lc));
+           first = false;
+       }
+       appendStringInfoChar(es->str, ')');
+       if (repeatable)
+           appendStringInfo(es->str, " REPEATABLE (%s)", repeatable);
+       appendStringInfoChar(es->str, '\n');
+   }
+   else
+   {
+       ExplainPropertyText("Sampling Method", method_name, es);
+       ExplainPropertyList("Sampling Parameters", params, es);
+       if (repeatable)
+           ExplainPropertyText("Repeatable Seed", repeatable, es);
+   }
+}
+
  /*
   * If it's EXPLAIN ANALYZE, show tuplesort stats for a sort node
   */
@@ -2366,13 +2421,13 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
     switch (nodeTag(plan))
     {
         case T_SeqScan:
+       case T_SampleScan:
         case T_IndexScan:
         case T_IndexOnlyScan:
         case T_BitmapHeapScan:
         case T_TidScan:
         case T_ForeignScan:
         case T_CustomScan:
-       case T_SampleScan:
         case T_ModifyTable:
             /* Assert it's on a real relation */
             Assert(rte->rtekind == RTE_RELATION);
@@ -2663,9 +2718,9 @@ ExplainCustomChildren(CustomScanState *css, List *ancestors, ExplainState *es)
  {
     ListCell   *cell;
     const char *label =
-       (list_length(css->custom_ps) != 1 ? "children" : "child");
+   (list_length(css->custom_ps) != 1 ? "children" : "child");
  
-   foreach (cell, css->custom_ps)
+   foreach(cell, css->custom_ps)
         ExplainNode((PlanState *) lfirst(cell), ancestors, label, NULL, es);
  }
  
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c

index 04073d3f9f916f23a750ad0ce2c45e5b0169b802..93e1e9a691c507b08aa58beddbcb74c66a1a8501 100644 (file)
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -463,6 +463,10 @@ ExecSupportsBackwardScan(Plan *node)
         case T_CteScan:
             return TargetListSupportsBackwardScan(node->targetlist);
  
+       case T_SampleScan:
+           /* Simplify life for tablesample methods by disallowing this */
+           return false;
+
         case T_IndexScan:
             return IndexSupportsBackwardScan(((IndexScan *) node)->indexid) &&
                 TargetListSupportsBackwardScan(node->targetlist);
@@ -485,9 +489,6 @@ ExecSupportsBackwardScan(Plan *node)
             }
             return false;
  
-       case T_SampleScan:
-           return false;
-
         case T_Material:
         case T_Sort:
             /* these don't evaluate tlist */
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c

index 4c1c5237b7d203c5bd19f48375d87586980776d5..dbe84b0baa86886be548194b2630e6f39497293b 100644 (file)
--- a/src/backend/executor/nodeSamplescan.c
+++ b/src/backend/executor/nodeSamplescan.c
@@ -3,7 +3,7 @@
   * nodeSamplescan.c
   *   Support routines for sample scans of relations (table sampling).
   *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
@@ -14,22 +14,23 @@
   */
  #include "postgres.h"
  
-#include "access/tablesample.h"
+#include "access/hash.h"
+#include "access/relscan.h"
+#include "access/tsmapi.h"
  #include "executor/executor.h"
  #include "executor/nodeSamplescan.h"
  #include "miscadmin.h"
-#include "parser/parsetree.h"
  #include "pgstat.h"
-#include "storage/bufmgr.h"
  #include "storage/predicate.h"
  #include "utils/rel.h"
-#include "utils/syscache.h"
  #include "utils/tqual.h"
  
-static void InitScanRelation(SampleScanState *node, EState *estate,
-                int eflags, TableSampleClause *tablesample);
+static void InitScanRelation(SampleScanState *node, EState *estate, int eflags);
  static TupleTableSlot *SampleNext(SampleScanState *node);
-
+static void tablesample_init(SampleScanState *scanstate);
+static HeapTuple tablesample_getnext(SampleScanState *scanstate);
+static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
+                  HeapScanDesc scan);
  
  /* ----------------------------------------------------------------
   *                     Scan Support
@@ -45,23 +46,26 @@ static TupleTableSlot *SampleNext(SampleScanState *node);
  static TupleTableSlot *
  SampleNext(SampleScanState *node)
  {
-   TupleTableSlot *slot;
-   TableSampleDesc *tsdesc;
     HeapTuple   tuple;
+   TupleTableSlot *slot;
  
     /*
-    * get information from the scan state
+    * if this is first call within a scan, initialize
      */
-   slot = node->ss.ss_ScanTupleSlot;
-   tsdesc = node->tsdesc;
+   if (!node->begun)
+       tablesample_init(node);
+
+   /*
+    * get the next tuple, and store it in our result slot
+    */
+   tuple = tablesample_getnext(node);
  
-   tuple = tablesample_getnext(tsdesc);
+   slot = node->ss.ss_ScanTupleSlot;
  
     if (tuple)
         ExecStoreTuple(tuple,   /* tuple to store */
                        slot,    /* slot to store in */
-                      tsdesc->heapScan->rs_cbuf,       /* buffer associated
-                                                        * with this tuple */
+                      node->ss.ss_currentScanDesc->rs_cbuf,    /* tuple's buffer */
                        false);  /* don't pfree this pointer */
     else
         ExecClearTuple(slot);
@@ -75,7 +79,10 @@ SampleNext(SampleScanState *node)
  static bool
  SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
  {
-   /* No need to recheck for SampleScan */
+   /*
+    * No need to recheck for SampleScan, since like SeqScan we don't pass any
+    * checkable keys to heap_beginscan.
+    */
     return true;
  }
  
@@ -103,8 +110,7 @@ ExecSampleScan(SampleScanState *node)
   * ----------------------------------------------------------------
   */
  static void
-InitScanRelation(SampleScanState *node, EState *estate, int eflags,
-                TableSampleClause *tablesample)
+InitScanRelation(SampleScanState *node, EState *estate, int eflags)
  {
     Relation    currentRelation;
  
@@ -113,19 +119,13 @@ InitScanRelation(SampleScanState *node, EState *estate, int eflags,
      * open that relation and acquire appropriate lock on it.
      */
     currentRelation = ExecOpenScanRelation(estate,
-                               ((SampleScan *) node->ss.ps.plan)->scanrelid,
+                          ((SampleScan *) node->ss.ps.plan)->scan.scanrelid,
                                            eflags);
  
     node->ss.ss_currentRelation = currentRelation;
  
-   /*
-    * Even though we aren't going to do a conventional seqscan, it is useful
-    * to create a HeapScanDesc --- many of the fields in it are usable.
-    */
-   node->ss.ss_currentScanDesc =
-       heap_beginscan_sampling(currentRelation, estate->es_snapshot, 0, NULL,
-                               tablesample->tsmseqscan,
-                               tablesample->tsmpagemode);
+   /* we won't set up the HeapScanDesc till later */
+   node->ss.ss_currentScanDesc = NULL;
  
     /* and report the scan tuple slot's rowtype */
     ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation));
@@ -140,12 +140,11 @@ SampleScanState *
  ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
  {
     SampleScanState *scanstate;
-   RangeTblEntry *rte = rt_fetch(node->scanrelid,
-                                 estate->es_range_table);
+   TableSampleClause *tsc = node->tablesample;
+   TsmRoutine *tsm;
  
     Assert(outerPlan(node) == NULL);
     Assert(innerPlan(node) == NULL);
-   Assert(rte->tablesample != NULL);
  
     /*
      * create state structure
@@ -165,10 +164,17 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
      * initialize child expressions
      */
     scanstate->ss.ps.targetlist = (List *)
-       ExecInitExpr((Expr *) node->plan.targetlist,
+       ExecInitExpr((Expr *) node->scan.plan.targetlist,
                      (PlanState *) scanstate);
     scanstate->ss.ps.qual = (List *)
-       ExecInitExpr((Expr *) node->plan.qual,
+       ExecInitExpr((Expr *) node->scan.plan.qual,
+                    (PlanState *) scanstate);
+
+   scanstate->args = (List *)
+       ExecInitExpr((Expr *) tsc->args,
+                    (PlanState *) scanstate);
+   scanstate->repeatable =
+       ExecInitExpr(tsc->repeatable,
                      (PlanState *) scanstate);
  
     /*
@@ -180,7 +186,7 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
     /*
      * initialize scan relation
      */
-   InitScanRelation(scanstate, estate, eflags, rte->tablesample);
+   InitScanRelation(scanstate, estate, eflags);
  
     scanstate->ss.ps.ps_TupFromTlist = false;
  
@@ -190,7 +196,25 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
     ExecAssignResultTypeFromTL(&scanstate->ss.ps);
     ExecAssignScanProjectionInfo(&scanstate->ss);
  
-   scanstate->tsdesc = tablesample_init(scanstate, rte->tablesample);
+   /*
+    * If we don't have a REPEATABLE clause, select a random seed.  We want to
+    * do this just once, since the seed shouldn't change over rescans.
+    */
+   if (tsc->repeatable == NULL)
+       scanstate->seed = random();
+
+   /*
+    * Finally, initialize the TABLESAMPLE method handler.
+    */
+   tsm = GetTsmRoutine(tsc->tsmhandler);
+   scanstate->tsmroutine = tsm;
+   scanstate->tsm_state = NULL;
+
+   if (tsm->InitSampleScan)
+       tsm->InitSampleScan(scanstate, eflags);
+
+   /* We'll do BeginSampleScan later; we can't evaluate params yet */
+   scanstate->begun = false;
  
     return scanstate;
  }
@@ -207,7 +231,8 @@ ExecEndSampleScan(SampleScanState *node)
     /*
      * Tell sampling function that we finished the scan.
      */
-   tablesample_end(node->tsdesc);
+   if (node->tsmroutine->EndSampleScan)
+       node->tsmroutine->EndSampleScan(node);
  
     /*
      * Free the exprcontext
@@ -223,7 +248,8 @@ ExecEndSampleScan(SampleScanState *node)
     /*
      * close heap scan
      */
-   heap_endscan(node->ss.ss_currentScanDesc);
+   if (node->ss.ss_currentScanDesc)
+       heap_endscan(node->ss.ss_currentScanDesc);
  
     /*
      * close the heap relation.
@@ -231,11 +257,6 @@ ExecEndSampleScan(SampleScanState *node)
     ExecCloseScanRelation(node->ss.ss_currentRelation);
  }
  
-/* ----------------------------------------------------------------
- *                     Join Support
- * ----------------------------------------------------------------
- */
-
  /* ----------------------------------------------------------------
   *     ExecReScanSampleScan
   *
@@ -246,12 +267,336 @@ ExecEndSampleScan(SampleScanState *node)
  void
  ExecReScanSampleScan(SampleScanState *node)
  {
-   heap_rescan(node->ss.ss_currentScanDesc, NULL);
+   /* Remember we need to do BeginSampleScan again (if we did it at all) */
+   node->begun = false;
+
+   ExecScanReScan(&node->ss);
+}
+
+
+/*
+ * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
+ */
+static void
+tablesample_init(SampleScanState *scanstate)
+{
+   TsmRoutine *tsm = scanstate->tsmroutine;
+   ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
+   Datum      *params;
+   Datum       datum;
+   bool        isnull;
+   uint32      seed;
+   bool        allow_sync;
+   int         i;
+   ListCell   *arg;
+
+   params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
+
+   i = 0;
+   foreach(arg, scanstate->args)
+   {
+       ExprState  *argstate = (ExprState *) lfirst(arg);
+
+       params[i] = ExecEvalExprSwitchContext(argstate,
+                                             econtext,
+                                             &isnull,
+                                             NULL);
+       if (isnull)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+                    errmsg("TABLESAMPLE parameter cannot be null")));
+       i++;
+   }
+
+   if (scanstate->repeatable)
+   {
+       datum = ExecEvalExprSwitchContext(scanstate->repeatable,
+                                         econtext,
+                                         &isnull,
+                                         NULL);
+       if (isnull)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
+                errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
+
+       /*
+        * The REPEATABLE parameter has been coerced to float8 by the parser.
+        * The reason for using float8 at the SQL level is that it will
+        * produce unsurprising results both for users used to databases that
+        * accept only integers in the REPEATABLE clause and for those who
+        * might expect that REPEATABLE works like setseed() (a float in the
+        * range from -1 to 1).
+        *
+        * We use hashfloat8() to convert the supplied value into a suitable
+        * seed.  For regression-testing purposes, that has the convenient
+        * property that REPEATABLE(0) gives a machine-independent result.
+        */
+       seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
+   }
+   else
+   {
+       /* Use the seed selected by ExecInitSampleScan */
+       seed = scanstate->seed;
+   }
+
+   /* Set default values for params that BeginSampleScan can adjust */
+   scanstate->use_bulkread = true;
+   scanstate->use_pagemode = true;
+
+   /* Let tablesample method do its thing */
+   tsm->BeginSampleScan(scanstate,
+                        params,
+                        list_length(scanstate->args),
+                        seed);
+
+   /* We'll use syncscan if there's no NextSampleBlock function */
+   allow_sync = (tsm->NextSampleBlock == NULL);
+
+   /* Now we can create or reset the HeapScanDesc */
+   if (scanstate->ss.ss_currentScanDesc == NULL)
+   {
+       scanstate->ss.ss_currentScanDesc =
+           heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
+                                   scanstate->ss.ps.state->es_snapshot,
+                                   0, NULL,
+                                   scanstate->use_bulkread,
+                                   allow_sync,
+                                   scanstate->use_pagemode);
+   }
+   else
+   {
+       heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
+                              scanstate->use_bulkread,
+                              allow_sync,
+                              scanstate->use_pagemode);
+   }
+
+   pfree(params);
+
+   /* And we're initialized. */
+   scanstate->begun = true;
+}
+
+/*
+ * Get next tuple from TABLESAMPLE method.
+ *
+ * Note: an awful lot of this is copied-and-pasted from heapam.c.  It would
+ * perhaps be better to refactor to share more code.
+ */
+static HeapTuple
+tablesample_getnext(SampleScanState *scanstate)
+{
+   TsmRoutine *tsm = scanstate->tsmroutine;
+   HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
+   HeapTuple   tuple = &(scan->rs_ctup);
+   Snapshot    snapshot = scan->rs_snapshot;
+   bool        pagemode = scan->rs_pageatatime;
+   BlockNumber blockno;
+   Page        page;
+   bool        all_visible;
+   OffsetNumber maxoffset;
+
+   if (!scan->rs_inited)
+   {
+       /*
+        * return null immediately if relation is empty
+        */
+       if (scan->rs_nblocks == 0)
+       {
+           Assert(!BufferIsValid(scan->rs_cbuf));
+           tuple->t_data = NULL;
+           return NULL;
+       }
+       if (tsm->NextSampleBlock)
+       {
+           blockno = tsm->NextSampleBlock(scanstate);
+           if (!BlockNumberIsValid(blockno))
+           {
+               tuple->t_data = NULL;
+               return NULL;
+           }
+       }
+       else
+           blockno = scan->rs_startblock;
+       Assert(blockno < scan->rs_nblocks);
+       heapgetpage(scan, blockno);
+       scan->rs_inited = true;
+   }
+   else
+   {
+       /* continue from previously returned page/tuple */
+       blockno = scan->rs_cblock;      /* current page */
+   }
  
     /*
-    * Tell sampling function to reset its state for rescan.
+    * When not using pagemode, we must lock the buffer during tuple
+    * visibility checks.
      */
-   tablesample_reset(node->tsdesc);
+   if (!pagemode)
+       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+   page = (Page) BufferGetPage(scan->rs_cbuf);
+   all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+   maxoffset = PageGetMaxOffsetNumber(page);
+
+   for (;;)
+   {
+       OffsetNumber tupoffset;
+       bool        finished;
+
+       CHECK_FOR_INTERRUPTS();
+
+       /* Ask the tablesample method which tuples to check on this page. */
+       tupoffset = tsm->NextSampleTuple(scanstate,
+                                        blockno,
+                                        maxoffset);
+
+       if (OffsetNumberIsValid(tupoffset))
+       {
+           ItemId      itemid;
+           bool        visible;
+
+           /* Skip invalid tuple pointers. */
+           itemid = PageGetItemId(page, tupoffset);
+           if (!ItemIdIsNormal(itemid))
+               continue;
+
+           tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+           tuple->t_len = ItemIdGetLength(itemid);
+           ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+           if (all_visible)
+               visible = true;
+           else
+               visible = SampleTupleVisible(tuple, tupoffset, scan);
+
+           /* in pagemode, heapgetpage did this for us */
+           if (!pagemode)
+               CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
+                                               scan->rs_cbuf, snapshot);
+
+           if (visible)
+           {
+               /* Found visible tuple, return it. */
+               if (!pagemode)
+                   LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+               break;
+           }
+           else
+           {
+               /* Try next tuple from same page. */
+               continue;
+           }
+       }
+
+       /*
+        * if we get here, it means we've exhausted the items on this page and
+        * it's time to move to the next.
+        */
+       if (!pagemode)
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+       if (tsm->NextSampleBlock)
+       {
+           blockno = tsm->NextSampleBlock(scanstate);
+           Assert(!scan->rs_syncscan);
+           finished = !BlockNumberIsValid(blockno);
+       }
+       else
+       {
+           /* Without NextSampleBlock, just do a plain forward seqscan. */
+           blockno++;
+           if (blockno >= scan->rs_nblocks)
+               blockno = 0;
+
+           /*
+            * Report our new scan position for synchronization purposes.
+            *
+            * Note: we do this before checking for end of scan so that the
+            * final state of the position hint is back at the start of the
+            * rel.  That's not strictly necessary, but otherwise when you run
+            * the same query multiple times the starting position would shift
+            * a little bit backwards on every invocation, which is confusing.
+            * We don't guarantee any specific ordering in general, though.
+            */
+           if (scan->rs_syncscan)
+               ss_report_location(scan->rs_rd, blockno);
+
+           finished = (blockno == scan->rs_startblock);
+       }
+
+       /*
+        * Reached end of scan?
+        */
+       if (finished)
+       {
+           if (BufferIsValid(scan->rs_cbuf))
+               ReleaseBuffer(scan->rs_cbuf);
+           scan->rs_cbuf = InvalidBuffer;
+           scan->rs_cblock = InvalidBlockNumber;
+           tuple->t_data = NULL;
+           scan->rs_inited = false;
+           return NULL;
+       }
+
+       Assert(blockno < scan->rs_nblocks);
+       heapgetpage(scan, blockno);
+
+       /* Re-establish state for new page */
+       if (!pagemode)
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+       page = (Page) BufferGetPage(scan->rs_cbuf);
+       all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+       maxoffset = PageGetMaxOffsetNumber(page);
+   }
+
+   /* Count successfully-fetched tuples as heap fetches */
+   pgstat_count_heap_getnext(scan->rs_rd);
+
+   return &(scan->rs_ctup);
+}
  
-   ExecScanReScan(&node->ss);
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+{
+   if (scan->rs_pageatatime)
+   {
+       /*
+        * In pageatatime mode, heapgetpage() already did visibility checks,
+        * so just look at the info it left in rs_vistuples[].
+        *
+        * We use a binary search over the known-sorted array.  Note: we could
+        * save some effort if we insisted that NextSampleTuple select tuples
+        * in increasing order, but it's not clear that there would be enough
+        * gain to justify the restriction.
+        */
+       int         start = 0,
+                   end = scan->rs_ntuples - 1;
+
+       while (start <= end)
+       {
+           int         mid = (start + end) / 2;
+           OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+           if (tupoffset == curoffset)
+               return true;
+           else if (tupoffset < curoffset)
+               end = mid - 1;
+           else
+               start = mid + 1;
+       }
+
+       return false;
+   }
+   else
+   {
+       /* Otherwise, we have to check the tuple individually. */
+       return HeapTupleSatisfiesVisibility(tuple,
+                                           scan->rs_snapshot,
+                                           scan->rs_cbuf);
+   }
  }
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index 6a08c2db211b4e65a103b4aacf5e52c5f41b5adc..7248440ead363a0960b20a2f5b73f8662e4c85d0 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -359,6 +359,27 @@ _copySeqScan(const SeqScan *from)
     return newnode;
  }
  
+/*
+ * _copySampleScan
+ */
+static SampleScan *
+_copySampleScan(const SampleScan *from)
+{
+   SampleScan *newnode = makeNode(SampleScan);
+
+   /*
+    * copy node superclass fields
+    */
+   CopyScanFields((const Scan *) from, (Scan *) newnode);
+
+   /*
+    * copy remainder of node
+    */
+   COPY_NODE_FIELD(tablesample);
+
+   return newnode;
+}
+
  /*
   * _copyIndexScan
   */
@@ -641,22 +662,6 @@ _copyCustomScan(const CustomScan *from)
     return newnode;
  }
  
-/*
- * _copySampleScan
- */
-static SampleScan *
-_copySampleScan(const SampleScan *from)
-{
-   SampleScan *newnode = makeNode(SampleScan);
-
-   /*
-    * copy node superclass fields
-    */
-   CopyScanFields((const Scan *) from, (Scan *) newnode);
-
-   return newnode;
-}
-
  /*
   * CopyJoinFields
   *
@@ -2143,6 +2148,18 @@ _copyRangeTblFunction(const RangeTblFunction *from)
     return newnode;
  }
  
+static TableSampleClause *
+_copyTableSampleClause(const TableSampleClause *from)
+{
+   TableSampleClause *newnode = makeNode(TableSampleClause);
+
+   COPY_SCALAR_FIELD(tsmhandler);
+   COPY_NODE_FIELD(args);
+   COPY_NODE_FIELD(repeatable);
+
+   return newnode;
+}
+
  static WithCheckOption *
  _copyWithCheckOption(const WithCheckOption *from)
  {
@@ -2271,40 +2288,6 @@ _copyCommonTableExpr(const CommonTableExpr *from)
     return newnode;
  }
  
-static RangeTableSample *
-_copyRangeTableSample(const RangeTableSample *from)
-{
-   RangeTableSample *newnode = makeNode(RangeTableSample);
-
-   COPY_NODE_FIELD(relation);
-   COPY_STRING_FIELD(method);
-   COPY_NODE_FIELD(repeatable);
-   COPY_NODE_FIELD(args);
-
-   return newnode;
-}
-
-static TableSampleClause *
-_copyTableSampleClause(const TableSampleClause *from)
-{
-   TableSampleClause *newnode = makeNode(TableSampleClause);
-
-   COPY_SCALAR_FIELD(tsmid);
-   COPY_SCALAR_FIELD(tsmseqscan);
-   COPY_SCALAR_FIELD(tsmpagemode);
-   COPY_SCALAR_FIELD(tsminit);
-   COPY_SCALAR_FIELD(tsmnextblock);
-   COPY_SCALAR_FIELD(tsmnexttuple);
-   COPY_SCALAR_FIELD(tsmexaminetuple);
-   COPY_SCALAR_FIELD(tsmend);
-   COPY_SCALAR_FIELD(tsmreset);
-   COPY_SCALAR_FIELD(tsmcost);
-   COPY_NODE_FIELD(repeatable);
-   COPY_NODE_FIELD(args);
-
-   return newnode;
-}
-
  static A_Expr *
  _copyAExpr(const A_Expr *from)
  {
@@ -2532,6 +2515,20 @@ _copyRangeFunction(const RangeFunction *from)
     return newnode;
  }
  
+static RangeTableSample *
+_copyRangeTableSample(const RangeTableSample *from)
+{
+   RangeTableSample *newnode = makeNode(RangeTableSample);
+
+   COPY_NODE_FIELD(relation);
+   COPY_NODE_FIELD(method);
+   COPY_NODE_FIELD(args);
+   COPY_NODE_FIELD(repeatable);
+   COPY_LOCATION_FIELD(location);
+
+   return newnode;
+}
+
  static TypeCast *
  _copyTypeCast(const TypeCast *from)
  {
@@ -4237,6 +4234,9 @@ copyObject(const void *from)
         case T_SeqScan:
             retval = _copySeqScan(from);
             break;
+       case T_SampleScan:
+           retval = _copySampleScan(from);
+           break;
         case T_IndexScan:
             retval = _copyIndexScan(from);
             break;
@@ -4273,9 +4273,6 @@ copyObject(const void *from)
         case T_CustomScan:
             retval = _copyCustomScan(from);
             break;
-       case T_SampleScan:
-           retval = _copySampleScan(from);
-           break;
         case T_Join:
             retval = _copyJoin(from);
             break;
@@ -4897,6 +4894,9 @@ copyObject(const void *from)
         case T_RangeFunction:
             retval = _copyRangeFunction(from);
             break;
+       case T_RangeTableSample:
+           retval = _copyRangeTableSample(from);
+           break;
         case T_TypeName:
             retval = _copyTypeName(from);
             break;
@@ -4921,6 +4921,9 @@ copyObject(const void *from)
         case T_RangeTblFunction:
             retval = _copyRangeTblFunction(from);
             break;
+       case T_TableSampleClause:
+           retval = _copyTableSampleClause(from);
+           break;
         case T_WithCheckOption:
             retval = _copyWithCheckOption(from);
             break;
@@ -4948,12 +4951,6 @@ copyObject(const void *from)
         case T_CommonTableExpr:
             retval = _copyCommonTableExpr(from);
             break;
-       case T_RangeTableSample:
-           retval = _copyRangeTableSample(from);
-           break;
-       case T_TableSampleClause:
-           retval = _copyTableSampleClause(from);
-           break;
         case T_FuncWithArgs:
             retval = _copyFuncWithArgs(from);
             break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index faf5eedab4ed4b7412970b82621d6a42704d008c..6597dbc33e12f9d7e942eb7fa4ca72a566475989 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2290,6 +2290,18 @@ _equalRangeFunction(const RangeFunction *a, const RangeFunction *b)
     return true;
  }
  
+static bool
+_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b)
+{
+   COMPARE_NODE_FIELD(relation);
+   COMPARE_NODE_FIELD(method);
+   COMPARE_NODE_FIELD(args);
+   COMPARE_NODE_FIELD(repeatable);
+   COMPARE_LOCATION_FIELD(location);
+
+   return true;
+}
+
  static bool
  _equalIndexElem(const IndexElem *a, const IndexElem *b)
  {
@@ -2428,6 +2440,16 @@ _equalRangeTblFunction(const RangeTblFunction *a, const RangeTblFunction *b)
     return true;
  }
  
+static bool
+_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b)
+{
+   COMPARE_SCALAR_FIELD(tsmhandler);
+   COMPARE_NODE_FIELD(args);
+   COMPARE_NODE_FIELD(repeatable);
+
+   return true;
+}
+
  static bool
  _equalWithCheckOption(const WithCheckOption *a, const WithCheckOption *b)
  {
@@ -2538,36 +2560,6 @@ _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b)
     return true;
  }
  
-static bool
-_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b)
-{
-   COMPARE_NODE_FIELD(relation);
-   COMPARE_STRING_FIELD(method);
-   COMPARE_NODE_FIELD(repeatable);
-   COMPARE_NODE_FIELD(args);
-
-   return true;
-}
-
-static bool
-_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b)
-{
-   COMPARE_SCALAR_FIELD(tsmid);
-   COMPARE_SCALAR_FIELD(tsmseqscan);
-   COMPARE_SCALAR_FIELD(tsmpagemode);
-   COMPARE_SCALAR_FIELD(tsminit);
-   COMPARE_SCALAR_FIELD(tsmnextblock);
-   COMPARE_SCALAR_FIELD(tsmnexttuple);
-   COMPARE_SCALAR_FIELD(tsmexaminetuple);
-   COMPARE_SCALAR_FIELD(tsmend);
-   COMPARE_SCALAR_FIELD(tsmreset);
-   COMPARE_SCALAR_FIELD(tsmcost);
-   COMPARE_NODE_FIELD(repeatable);
-   COMPARE_NODE_FIELD(args);
-
-   return true;
-}
-
  static bool
  _equalXmlSerialize(const XmlSerialize *a, const XmlSerialize *b)
  {
@@ -3260,6 +3252,9 @@ equal(const void *a, const void *b)
         case T_RangeFunction:
             retval = _equalRangeFunction(a, b);
             break;
+       case T_RangeTableSample:
+           retval = _equalRangeTableSample(a, b);
+           break;
         case T_TypeName:
             retval = _equalTypeName(a, b);
             break;
@@ -3284,6 +3279,9 @@ equal(const void *a, const void *b)
         case T_RangeTblFunction:
             retval = _equalRangeTblFunction(a, b);
             break;
+       case T_TableSampleClause:
+           retval = _equalTableSampleClause(a, b);
+           break;
         case T_WithCheckOption:
             retval = _equalWithCheckOption(a, b);
             break;
@@ -3311,12 +3309,6 @@ equal(const void *a, const void *b)
         case T_CommonTableExpr:
             retval = _equalCommonTableExpr(a, b);
             break;
-       case T_RangeTableSample:
-           retval = _equalRangeTableSample(a, b);
-           break;
-       case T_TableSampleClause:
-           retval = _equalTableSampleClause(a, b);
-           break;
         case T_FuncWithArgs:
             retval = _equalFuncWithArgs(a, b);
             break;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c

index b1e3e6e489320086dce3500b1418178095e99714..c517dfd9d69c6264ecdd0c4904b8b8337ccea099 100644 (file)
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -1486,6 +1486,9 @@ exprLocation(const Node *expr)
         case T_WindowDef:
             loc = ((const WindowDef *) expr)->location;
             break;
+       case T_RangeTableSample:
+           loc = ((const RangeTableSample *) expr)->location;
+           break;
         case T_TypeName:
             loc = ((const TypeName *) expr)->location;
             break;
@@ -1995,6 +1998,17 @@ expression_tree_walker(Node *node,
             return walker(((PlaceHolderInfo *) node)->ph_var, context);
         case T_RangeTblFunction:
             return walker(((RangeTblFunction *) node)->funcexpr, context);
+       case T_TableSampleClause:
+           {
+               TableSampleClause *tsc = (TableSampleClause *) node;
+
+               if (expression_tree_walker((Node *) tsc->args,
+                                          walker, context))
+                   return true;
+               if (walker((Node *) tsc->repeatable, context))
+                   return true;
+           }
+           break;
         default:
             elog(ERROR, "unrecognized node type: %d",
                  (int) nodeTag(node));
@@ -2082,13 +2096,8 @@ range_table_walker(List *rtable,
         switch (rte->rtekind)
         {
             case RTE_RELATION:
-               if (rte->tablesample)
-               {
-                   if (walker(rte->tablesample->args, context))
-                       return true;
-                   if (walker(rte->tablesample->repeatable, context))
-                       return true;
-               }
+               if (walker(rte->tablesample, context))
+                   return true;
                 break;
             case RTE_CTE:
                 /* nothing to do */
@@ -2782,6 +2791,17 @@ expression_tree_mutator(Node *node,
                 return (Node *) newnode;
             }
             break;
+       case T_TableSampleClause:
+           {
+               TableSampleClause *tsc = (TableSampleClause *) node;
+               TableSampleClause *newnode;
+
+               FLATCOPY(newnode, tsc, TableSampleClause);
+               MUTATE(newnode->args, tsc->args, List *);
+               MUTATE(newnode->repeatable, tsc->repeatable, Expr *);
+               return (Node *) newnode;
+           }
+           break;
         default:
             elog(ERROR, "unrecognized node type: %d",
                  (int) nodeTag(node));
@@ -2868,20 +2888,12 @@ range_table_mutator(List *rtable,
         switch (rte->rtekind)
         {
             case RTE_RELATION:
-               if (rte->tablesample)
-               {
-                   CHECKFLATCOPY(newrte->tablesample, rte->tablesample,
-                                 TableSampleClause);
-                   MUTATE(newrte->tablesample->args,
-                          newrte->tablesample->args,
-                          List *);
-                   MUTATE(newrte->tablesample->repeatable,
-                          newrte->tablesample->repeatable,
-                          Node *);
-               }
+               MUTATE(newrte->tablesample, rte->tablesample,
+                      TableSampleClause *);
+               /* we don't bother to copy eref, aliases, etc; OK? */
                 break;
             case RTE_CTE:
-               /* we don't bother to copy eref, aliases, etc; OK? */
+               /* nothing to do */
                 break;
             case RTE_SUBQUERY:
                 if (!(flags & QTW_IGNORE_RT_SUBQUERIES))
@@ -3316,6 +3328,19 @@ raw_expression_tree_walker(Node *node,
                     return true;
             }
             break;
+       case T_RangeTableSample:
+           {
+               RangeTableSample *rts = (RangeTableSample *) node;
+
+               if (walker(rts->relation, context))
+                   return true;
+               /* method name is deemed uninteresting */
+               if (walker(rts->args, context))
+                   return true;
+               if (walker(rts->repeatable, context))
+                   return true;
+           }
+           break;
         case T_TypeName:
             {
                 TypeName   *tn = (TypeName *) node;
@@ -3380,18 +3405,6 @@ raw_expression_tree_walker(Node *node,
             break;
         case T_CommonTableExpr:
             return walker(((CommonTableExpr *) node)->ctequery, context);
-       case T_RangeTableSample:
-           {
-               RangeTableSample *rts = (RangeTableSample *) node;
-
-               if (walker(rts->relation, context))
-                   return true;
-               if (walker(rts->repeatable, context))
-                   return true;
-               if (walker(rts->args, context))
-                   return true;
-           }
-           break;
         default:
             elog(ERROR, "unrecognized node type: %d",
                  (int) nodeTag(node));
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 87304ba9bf65df548c5361bcf33eb7f45aaa0c83..81725d6e59a20d2e2dfc9efea995202e843afb7b 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -444,6 +444,16 @@ _outSeqScan(StringInfo str, const SeqScan *node)
     _outScanInfo(str, (const Scan *) node);
  }
  
+static void
+_outSampleScan(StringInfo str, const SampleScan *node)
+{
+   WRITE_NODE_TYPE("SAMPLESCAN");
+
+   _outScanInfo(str, (const Scan *) node);
+
+   WRITE_NODE_FIELD(tablesample);
+}
+
  static void
  _outIndexScan(StringInfo str, const IndexScan *node)
  {
@@ -591,14 +601,6 @@ _outCustomScan(StringInfo str, const CustomScan *node)
         node->methods->TextOutCustomScan(str, node);
  }
  
-static void
-_outSampleScan(StringInfo str, const SampleScan *node)
-{
-   WRITE_NODE_TYPE("SAMPLESCAN");
-
-   _outScanInfo(str, (const Scan *) node);
-}
-
  static void
  _outJoin(StringInfo str, const Join *node)
  {
@@ -2478,36 +2480,6 @@ _outCommonTableExpr(StringInfo str, const CommonTableExpr *node)
     WRITE_NODE_FIELD(ctecolcollations);
  }
  
-static void
-_outRangeTableSample(StringInfo str, const RangeTableSample *node)
-{
-   WRITE_NODE_TYPE("RANGETABLESAMPLE");
-
-   WRITE_NODE_FIELD(relation);
-   WRITE_STRING_FIELD(method);
-   WRITE_NODE_FIELD(repeatable);
-   WRITE_NODE_FIELD(args);
-}
-
-static void
-_outTableSampleClause(StringInfo str, const TableSampleClause *node)
-{
-   WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
-
-   WRITE_OID_FIELD(tsmid);
-   WRITE_BOOL_FIELD(tsmseqscan);
-   WRITE_BOOL_FIELD(tsmpagemode);
-   WRITE_OID_FIELD(tsminit);
-   WRITE_OID_FIELD(tsmnextblock);
-   WRITE_OID_FIELD(tsmnexttuple);
-   WRITE_OID_FIELD(tsmexaminetuple);
-   WRITE_OID_FIELD(tsmend);
-   WRITE_OID_FIELD(tsmreset);
-   WRITE_OID_FIELD(tsmcost);
-   WRITE_NODE_FIELD(repeatable);
-   WRITE_NODE_FIELD(args);
-}
-
  static void
  _outSetOperationStmt(StringInfo str, const SetOperationStmt *node)
  {
@@ -2594,6 +2566,16 @@ _outRangeTblFunction(StringInfo str, const RangeTblFunction *node)
     WRITE_BITMAPSET_FIELD(funcparams);
  }
  
+static void
+_outTableSampleClause(StringInfo str, const TableSampleClause *node)
+{
+   WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
+
+   WRITE_OID_FIELD(tsmhandler);
+   WRITE_NODE_FIELD(args);
+   WRITE_NODE_FIELD(repeatable);
+}
+
  static void
  _outAExpr(StringInfo str, const A_Expr *node)
  {
@@ -2845,6 +2827,18 @@ _outRangeFunction(StringInfo str, const RangeFunction *node)
     WRITE_NODE_FIELD(coldeflist);
  }
  
+static void
+_outRangeTableSample(StringInfo str, const RangeTableSample *node)
+{
+   WRITE_NODE_TYPE("RANGETABLESAMPLE");
+
+   WRITE_NODE_FIELD(relation);
+   WRITE_NODE_FIELD(method);
+   WRITE_NODE_FIELD(args);
+   WRITE_NODE_FIELD(repeatable);
+   WRITE_LOCATION_FIELD(location);
+}
+
  static void
  _outConstraint(StringInfo str, const Constraint *node)
  {
@@ -3002,6 +2996,9 @@ _outNode(StringInfo str, const void *obj)
             case T_SeqScan:
                 _outSeqScan(str, obj);
                 break;
+           case T_SampleScan:
+               _outSampleScan(str, obj);
+               break;
             case T_IndexScan:
                 _outIndexScan(str, obj);
                 break;
@@ -3038,9 +3035,6 @@ _outNode(StringInfo str, const void *obj)
             case T_CustomScan:
                 _outCustomScan(str, obj);
                 break;
-           case T_SampleScan:
-               _outSampleScan(str, obj);
-               break;
             case T_Join:
                 _outJoin(str, obj);
                 break;
@@ -3393,12 +3387,6 @@ _outNode(StringInfo str, const void *obj)
             case T_CommonTableExpr:
                 _outCommonTableExpr(str, obj);
                 break;
-           case T_RangeTableSample:
-               _outRangeTableSample(str, obj);
-               break;
-           case T_TableSampleClause:
-               _outTableSampleClause(str, obj);
-               break;
             case T_SetOperationStmt:
                 _outSetOperationStmt(str, obj);
                 break;
@@ -3408,6 +3396,9 @@ _outNode(StringInfo str, const void *obj)
             case T_RangeTblFunction:
                 _outRangeTblFunction(str, obj);
                 break;
+           case T_TableSampleClause:
+               _outTableSampleClause(str, obj);
+               break;
             case T_A_Expr:
                 _outAExpr(str, obj);
                 break;
@@ -3450,6 +3441,9 @@ _outNode(StringInfo str, const void *obj)
             case T_RangeFunction:
                 _outRangeFunction(str, obj);
                 break;
+           case T_RangeTableSample:
+               _outRangeTableSample(str, obj);
+               break;
             case T_Constraint:
                 _outConstraint(str, obj);
                 break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index f5a40fbfb44b8d648a9aa32c1089055c4d3c70a6..71be840eac9f76a44dfbf258fcec629cdd2268d7 100644 (file)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -367,46 +367,6 @@ _readCommonTableExpr(void)
     READ_DONE();
  }
  
-/*
- * _readRangeTableSample
- */
-static RangeTableSample *
-_readRangeTableSample(void)
-{
-   READ_LOCALS(RangeTableSample);
-
-   READ_NODE_FIELD(relation);
-   READ_STRING_FIELD(method);
-   READ_NODE_FIELD(repeatable);
-   READ_NODE_FIELD(args);
-
-   READ_DONE();
-}
-
-/*
- * _readTableSampleClause
- */
-static TableSampleClause *
-_readTableSampleClause(void)
-{
-   READ_LOCALS(TableSampleClause);
-
-   READ_OID_FIELD(tsmid);
-   READ_BOOL_FIELD(tsmseqscan);
-   READ_BOOL_FIELD(tsmpagemode);
-   READ_OID_FIELD(tsminit);
-   READ_OID_FIELD(tsmnextblock);
-   READ_OID_FIELD(tsmnexttuple);
-   READ_OID_FIELD(tsmexaminetuple);
-   READ_OID_FIELD(tsmend);
-   READ_OID_FIELD(tsmreset);
-   READ_OID_FIELD(tsmcost);
-   READ_NODE_FIELD(repeatable);
-   READ_NODE_FIELD(args);
-
-   READ_DONE();
-}
-
  /*
   * _readSetOperationStmt
   */
@@ -1391,6 +1351,21 @@ _readRangeTblFunction(void)
     READ_DONE();
  }
  
+/*
+ * _readTableSampleClause
+ */
+static TableSampleClause *
+_readTableSampleClause(void)
+{
+   READ_LOCALS(TableSampleClause);
+
+   READ_OID_FIELD(tsmhandler);
+   READ_NODE_FIELD(args);
+   READ_NODE_FIELD(repeatable);
+
+   READ_DONE();
+}
+
  
  /*
   * parseNodeString
@@ -1426,10 +1401,6 @@ parseNodeString(void)
         return_value = _readRowMarkClause();
     else if (MATCH("COMMONTABLEEXPR", 15))
         return_value = _readCommonTableExpr();
-   else if (MATCH("RANGETABLESAMPLE", 16))
-       return_value = _readRangeTableSample();
-   else if (MATCH("TABLESAMPLECLAUSE", 17))
-       return_value = _readTableSampleClause();
     else if (MATCH("SETOPERATIONSTMT", 16))
         return_value = _readSetOperationStmt();
     else if (MATCH("ALIAS", 5))
@@ -1528,6 +1499,8 @@ parseNodeString(void)
         return_value = _readRangeTblEntry();
     else if (MATCH("RANGETBLFUNCTION", 16))
         return_value = _readRangeTblFunction();
+   else if (MATCH("TABLESAMPLECLAUSE", 17))
+       return_value = _readTableSampleClause();
     else if (MATCH("NOTIFY", 6))
         return_value = _readNotifyStmt();
     else if (MATCH("DECLARECURSOR", 13))
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c

index 888eeac5151842a285fb16c94da8f338ce89567b..1590be116750846b8957fe8a9ae1ed03b89d6917 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -18,6 +18,7 @@
  #include <math.h>
  
  #include "access/sysattr.h"
+#include "access/tsmapi.h"
  #include "catalog/pg_class.h"
  #include "catalog/pg_operator.h"
  #include "foreign/fdwapi.h"
@@ -390,7 +391,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                 }
                 else if (rte->tablesample != NULL)
                 {
-                   /* Build sample scan on relation */
+                   /* Sampled relation */
                     set_tablesample_rel_pathlist(root, rel, rte);
                 }
                 else
@@ -480,11 +481,40 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  
  /*
   * set_tablesample_rel_size
- *   Set size estimates for a sampled relation.
+ *   Set size estimates for a sampled relation
   */
  static void
  set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  {
+   TableSampleClause *tsc = rte->tablesample;
+   TsmRoutine *tsm;
+   BlockNumber pages;
+   double      tuples;
+
+   /*
+    * Test any partial indexes of rel for applicability.  We must do this
+    * first since partial unique indexes can affect size estimates.
+    */
+   check_partial_indexes(root, rel);
+
+   /*
+    * Call the sampling method's estimation function to estimate the number
+    * of pages it will read and the number of tuples it will return.  (Note:
+    * we assume the function returns sane values.)
+    */
+   tsm = GetTsmRoutine(tsc->tsmhandler);
+   tsm->SampleScanGetSampleSize(root, rel, tsc->args,
+                                &pages, &tuples);
+
+   /*
+    * For the moment, because we will only consider a SampleScan path for the
+    * rel, it's okay to just overwrite the pages and tuples estimates for the
+    * whole relation.  If we ever consider multiple path types for sampled
+    * rels, we'll need more complication.
+    */
+   rel->pages = pages;
+   rel->tuples = tuples;
+
     /* Mark rel with estimated output rows, width, etc */
     set_baserel_size_estimates(root, rel);
  }
@@ -492,8 +522,6 @@ set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  /*
   * set_tablesample_rel_pathlist
   *   Build access paths for a sampled relation
- *
- * There is only one possible path - sampling scan
   */
  static void
  set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
@@ -502,15 +530,41 @@ set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *
     Path       *path;
  
     /*
-    * We don't support pushing join clauses into the quals of a seqscan, but
-    * it could still have required parameterization due to LATERAL refs in
-    * its tlist.
+    * We don't support pushing join clauses into the quals of a samplescan,
+    * but it could still have required parameterization due to LATERAL refs
+    * in its tlist or TABLESAMPLE arguments.
      */
     required_outer = rel->lateral_relids;
  
-   /* We only do sample scan if it was requested */
+   /* Consider sampled scan */
     path = create_samplescan_path(root, rel, required_outer);
-   rel->pathlist = list_make1(path);
+
+   /*
+    * If the sampling method does not support repeatable scans, we must avoid
+    * plans that would scan the rel multiple times.  Ideally, we'd simply
+    * avoid putting the rel on the inside of a nestloop join; but adding such
+    * a consideration to the planner seems like a great deal of complication
+    * to support an uncommon usage of second-rate sampling methods.  Instead,
+    * if there is a risk that the query might perform an unsafe join, just
+    * wrap the SampleScan in a Materialize node.  We can check for joins by
+    * counting the membership of all_baserels (note that this correctly
+    * counts inheritance trees as single rels).  If we're inside a subquery,
+    * we can't easily check whether a join might occur in the outer query, so
+    * just assume one is possible.
+    *
+    * GetTsmRoutine is relatively expensive compared to the other tests here,
+    * so check repeatable_across_scans last, even though that's a bit odd.
+    */
+   if ((root->query_level > 1 ||
+        bms_membership(root->all_baserels) != BMS_SINGLETON) &&
+    !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans))
+   {
+       path = (Path *) create_material_path(rel, path);
+   }
+
+   add_path(rel, path);
+
+   /* For the moment, at least, there are no other paths to consider */
  }
  
  /*
@@ -2450,7 +2504,33 @@ print_path(PlannerInfo *root, Path *path, int indent)
     switch (nodeTag(path))
     {
         case T_Path:
-           ptype = "SeqScan";
+           switch (path->pathtype)
+           {
+               case T_SeqScan:
+                   ptype = "SeqScan";
+                   break;
+               case T_SampleScan:
+                   ptype = "SampleScan";
+                   break;
+               case T_SubqueryScan:
+                   ptype = "SubqueryScan";
+                   break;
+               case T_FunctionScan:
+                   ptype = "FunctionScan";
+                   break;
+               case T_ValuesScan:
+                   ptype = "ValuesScan";
+                   break;
+               case T_CteScan:
+                   ptype = "CteScan";
+                   break;
+               case T_WorkTableScan:
+                   ptype = "WorkTableScan";
+                   break;
+               default:
+                   ptype = "???Path";
+                   break;
+           }
             break;
         case T_IndexPath:
             ptype = "IdxScan";
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 0d302f66bee4c478dc4cc99729c79a75af727982..7069f6041102e6cb995316a3d951fe61adc0d367 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -74,6 +74,7 @@
  #include <math.h>
  
  #include "access/htup_details.h"
+#include "access/tsmapi.h"
  #include "executor/executor.h"
  #include "executor/nodeHash.h"
  #include "miscadmin.h"
@@ -223,64 +224,66 @@ cost_seqscan(Path *path, PlannerInfo *root,
   * cost_samplescan
   *   Determines and returns the cost of scanning a relation using sampling.
   *
- * From planner/optimizer perspective, we don't care all that much about cost
- * itself since there is always only one scan path to consider when sampling
- * scan is present, but number of rows estimation is still important.
- *
   * 'baserel' is the relation to be scanned
   * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
   */
  void
-cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+cost_samplescan(Path *path, PlannerInfo *root,
+               RelOptInfo *baserel, ParamPathInfo *param_info)
  {
     Cost        startup_cost = 0;
     Cost        run_cost = 0;
+   RangeTblEntry *rte;
+   TableSampleClause *tsc;
+   TsmRoutine *tsm;
     double      spc_seq_page_cost,
                 spc_random_page_cost,
                 spc_page_cost;
     QualCost    qpqual_cost;
     Cost        cpu_per_tuple;
-   BlockNumber pages;
-   double      tuples;
-   RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
-   TableSampleClause *tablesample = rte->tablesample;
  
-   /* Should only be applied to base relations */
+   /* Should only be applied to base relations with tablesample clauses */
     Assert(baserel->relid > 0);
-   Assert(baserel->rtekind == RTE_RELATION);
+   rte = planner_rt_fetch(baserel->relid, root);
+   Assert(rte->rtekind == RTE_RELATION);
+   tsc = rte->tablesample;
+   Assert(tsc != NULL);
+   tsm = GetTsmRoutine(tsc->tsmhandler);
  
     /* Mark the path with the correct row estimate */
-   if (path->param_info)
-       path->rows = path->param_info->ppi_rows;
+   if (param_info)
+       path->rows = param_info->ppi_rows;
     else
         path->rows = baserel->rows;
  
-   /* Call the sampling method's costing function. */
-   OidFunctionCall6(tablesample->tsmcost, PointerGetDatum(root),
-                    PointerGetDatum(path), PointerGetDatum(baserel),
-                    PointerGetDatum(tablesample->args),
-                    PointerGetDatum(&pages), PointerGetDatum(&tuples));
-
     /* fetch estimated page cost for tablespace containing table */
     get_tablespace_page_costs(baserel->reltablespace,
                               &spc_random_page_cost,
                               &spc_seq_page_cost);
  
-
-   spc_page_cost = tablesample->tsmseqscan ? spc_seq_page_cost :
-       spc_random_page_cost;
+   /* if NextSampleBlock is used, assume random access, else sequential */
+   spc_page_cost = (tsm->NextSampleBlock != NULL) ?
+       spc_random_page_cost : spc_seq_page_cost;
  
     /*
-    * disk costs
+    * disk costs (recall that baserel->pages has already been set to the
+    * number of pages the sampling method will visit)
      */
-   run_cost += spc_page_cost * pages;
+   run_cost += spc_page_cost * baserel->pages;
  
-   /* CPU costs */
-   get_restriction_qual_cost(root, baserel, path->param_info, &qpqual_cost);
+   /*
+    * CPU costs (recall that baserel->tuples has already been set to the
+    * number of tuples the sampling method will select).  Note that we ignore
+    * execution cost of the TABLESAMPLE parameter expressions; they will be
+    * evaluated only once per scan, and in most usages they'll likely be
+    * simple constants anyway.  We also don't charge anything for the
+    * calculations the sampling method might do internally.
+    */
+   get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
  
     startup_cost += qpqual_cost.startup;
     cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
-   run_cost += cpu_per_tuple * tuples;
+   run_cost += cpu_per_tuple * baserel->tuples;
  
     path->startup_cost = startup_cost;
     path->total_cost = startup_cost + run_cost;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 8d15c8ede90f9be93dec263ce61a0eb20dea5e54..f461586e08c5b3a2711eb55c003c26d2907388c7 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -102,7 +102,8 @@ static List *order_qual_clauses(PlannerInfo *root, List *clauses);
  static void copy_path_costsize(Plan *dest, Path *src);
  static void copy_plan_costsize(Plan *dest, Plan *src);
  static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
-static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid);
+static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid,
+               TableSampleClause *tsc);
  static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
                Oid indexid, List *indexqual, List *indexqualorig,
                List *indexorderby, List *indexorderbyorig,
@@ -1148,7 +1149,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path,
  
  /*
   * create_samplescan_plan
- *  Returns a samplecan plan for the base relation scanned by 'best_path'
+ *  Returns a samplescan plan for the base relation scanned by 'best_path'
   *  with restriction clauses 'scan_clauses' and targetlist 'tlist'.
   */
  static SampleScan *
@@ -1157,11 +1158,15 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path,
  {
     SampleScan *scan_plan;
     Index       scan_relid = best_path->parent->relid;
+   RangeTblEntry *rte;
+   TableSampleClause *tsc;
  
-   /* it should be a base rel with tablesample clause... */
+   /* it should be a base rel with a tablesample clause... */
     Assert(scan_relid > 0);
-   Assert(best_path->parent->rtekind == RTE_RELATION);
-   Assert(best_path->pathtype == T_SampleScan);
+   rte = planner_rt_fetch(scan_relid, root);
+   Assert(rte->rtekind == RTE_RELATION);
+   tsc = rte->tablesample;
+   Assert(tsc != NULL);
  
     /* Sort clauses into best execution order */
     scan_clauses = order_qual_clauses(root, scan_clauses);
@@ -1174,13 +1179,16 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path,
     {
         scan_clauses = (List *)
             replace_nestloop_params(root, (Node *) scan_clauses);
+       tsc = (TableSampleClause *)
+           replace_nestloop_params(root, (Node *) tsc);
     }
  
     scan_plan = make_samplescan(tlist,
                                 scan_clauses,
-                               scan_relid);
+                               scan_relid,
+                               tsc);
  
-   copy_path_costsize(&scan_plan->plan, best_path);
+   copy_path_costsize(&scan_plan->scan.plan, best_path);
  
     return scan_plan;
  }
@@ -2161,9 +2169,9 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path,
     ListCell   *lc;
  
     /* Recursively transform child paths. */
-   foreach (lc, best_path->custom_paths)
+   foreach(lc, best_path->custom_paths)
     {
-       Plan   *plan = create_plan_recurse(root, (Path *) lfirst(lc));
+       Plan       *plan = create_plan_recurse(root, (Path *) lfirst(lc));
  
         custom_plans = lappend(custom_plans, plan);
     }
@@ -3437,17 +3445,19 @@ make_seqscan(List *qptlist,
  static SampleScan *
  make_samplescan(List *qptlist,
                 List *qpqual,
-               Index scanrelid)
+               Index scanrelid,
+               TableSampleClause *tsc)
  {
     SampleScan *node = makeNode(SampleScan);
-   Plan       *plan = &node->plan;
+   Plan       *plan = &node->scan.plan;
  
     /* cost should be inserted by caller */
     plan->targetlist = qptlist;
     plan->qual = qpqual;
     plan->lefttree = NULL;
     plan->righttree = NULL;
-   node->scanrelid = scanrelid;
+   node->scan.scanrelid = scanrelid;
+   node->tablesample = tsc;
  
     return node;
  }
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c

index 00b2625d342ee375e884bf30945e4b29230a2118..701b99254db0d1745f3c2965bb8e445f58d4a45b 100644 (file)
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -306,7 +306,9 @@ extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex)
         return;
  
     /* Fetch the appropriate variables */
-   if (rte->rtekind == RTE_SUBQUERY)
+   if (rte->rtekind == RTE_RELATION)
+       vars = pull_vars_of_level((Node *) rte->tablesample, 0);
+   else if (rte->rtekind == RTE_SUBQUERY)
         vars = pull_vars_of_level((Node *) rte->subquery, 1);
     else if (rte->rtekind == RTE_FUNCTION)
         vars = pull_vars_of_level((Node *) rte->functions, 0);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index a6ce96efc48623c187233a1b04e45a64f8eeeae0..b95cc95e5d9a201949d89d713e0cfa77be6a1a22 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -505,14 +505,10 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
         if (rte->rtekind == RTE_RELATION)
         {
             if (rte->tablesample)
-           {
-               rte->tablesample->args = (List *)
-                   preprocess_expression(root, (Node *) rte->tablesample->args,
-                                         EXPRKIND_TABLESAMPLE);
-               rte->tablesample->repeatable = (Node *)
-                   preprocess_expression(root, rte->tablesample->repeatable,
+               rte->tablesample = (TableSampleClause *)
+                   preprocess_expression(root,
+                                         (Node *) rte->tablesample,
                                           EXPRKIND_TABLESAMPLE);
-           }
         }
         else if (rte->rtekind == RTE_SUBQUERY)
         {
@@ -697,11 +693,14 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
      * If the query has any join RTEs, replace join alias variables with
      * base-relation variables.  We must do this before sublink processing,
      * else sublinks expanded out from join aliases would not get processed.
-    * We can skip it in non-lateral RTE functions and VALUES lists, however,
-    * since they can't contain any Vars of the current query level.
+    * We can skip it in non-lateral RTE functions, VALUES lists, and
+    * TABLESAMPLE clauses, however, since they can't contain any Vars of the
+    * current query level.
      */
     if (root->hasJoinRTEs &&
-       !(kind == EXPRKIND_RTFUNC || kind == EXPRKIND_VALUES))
+       !(kind == EXPRKIND_RTFUNC ||
+         kind == EXPRKIND_VALUES ||
+         kind == EXPRKIND_TABLESAMPLE))
         expr = flatten_join_alias_vars(root, expr);
  
     /*
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c

index 258e541754aa165612ff41eb141fc2bc1db9198b..ea185d4b4cff6b98cb1da5a709b376595eb6d652 100644 (file)
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -372,9 +372,8 @@ flatten_rtes_walker(Node *node, PlannerGlobal *glob)
   *
   * In the flat rangetable, we zero out substructure pointers that are not
   * needed by the executor; this reduces the storage space and copying cost
- * for cached plans.  We keep only the tablesample field (which we'd otherwise
- * have to put in the plan tree, anyway); the ctename, alias and eref Alias
- * fields, which are needed by EXPLAIN; and the selectedCols, insertedCols and
+ * for cached plans.  We keep only the ctename, alias and eref Alias fields,
+ * which are needed by EXPLAIN, and the selectedCols, insertedCols and
   * updatedCols bitmaps, which are needed for executor-startup permissions
   * checking and for trigger event checking.
   */
@@ -388,6 +387,7 @@ add_rte_to_flat_rtable(PlannerGlobal *glob, RangeTblEntry *rte)
     memcpy(newrte, rte, sizeof(RangeTblEntry));
  
     /* zap unneeded sub-structure */
+   newrte->tablesample = NULL;
     newrte->subquery = NULL;
     newrte->joinaliasvars = NIL;
     newrte->functions = NIL;
@@ -456,11 +456,13 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
             {
                 SampleScan *splan = (SampleScan *) plan;
  
-               splan->scanrelid += rtoffset;
-               splan->plan.targetlist =
-                   fix_scan_list(root, splan->plan.targetlist, rtoffset);
-               splan->plan.qual =
-                   fix_scan_list(root, splan->plan.qual, rtoffset);
+               splan->scan.scanrelid += rtoffset;
+               splan->scan.plan.targetlist =
+                   fix_scan_list(root, splan->scan.plan.targetlist, rtoffset);
+               splan->scan.plan.qual =
+                   fix_scan_list(root, splan->scan.plan.qual, rtoffset);
+               splan->tablesample = (TableSampleClause *)
+                   fix_scan_expr(root, (Node *) splan->tablesample, rtoffset);
             }
             break;
         case T_IndexScan:
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c

index 4708b87f330b6145505afeac4a0be5eb00d441cb..f3038cdffda3ad9467935327df6c1cf7913798f1 100644 (file)
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2216,7 +2216,12 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
             break;
  
         case T_SeqScan:
+           context.paramids = bms_add_members(context.paramids, scan_params);
+           break;
+
         case T_SampleScan:
+           finalize_primnode((Node *) ((SampleScan *) plan)->tablesample,
+                             &context);
             context.paramids = bms_add_members(context.paramids, scan_params);
             break;
  
@@ -2384,7 +2389,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
                     bms_add_members(context.paramids, scan_params);
  
                 /* child nodes if any */
-               foreach (lc, cscan->custom_plans)
+               foreach(lc, cscan->custom_plans)
                 {
                     context.paramids =
                         bms_add_members(context.paramids,
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c

index 92b0562843458b403517d2c008c9db5cd26a1f79..34144ccaf0fa69161541bf5785d9d20d9a162cda 100644 (file)
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -1091,12 +1091,15 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
  
             switch (child_rte->rtekind)
             {
+               case RTE_RELATION:
+                   if (child_rte->tablesample)
+                       child_rte->lateral = true;
+                   break;
                 case RTE_SUBQUERY:
                 case RTE_FUNCTION:
                 case RTE_VALUES:
                     child_rte->lateral = true;
                     break;
-               case RTE_RELATION:
                 case RTE_JOIN:
                 case RTE_CTE:
                     /* these can't contain any lateral references */
@@ -1909,6 +1912,13 @@ replace_vars_in_jointree(Node *jtnode,
             {
                 switch (rte->rtekind)
                 {
+                   case RTE_RELATION:
+                       /* shouldn't be marked LATERAL unless tablesample */
+                       Assert(rte->tablesample);
+                       rte->tablesample = (TableSampleClause *)
+                           pullup_replace_vars((Node *) rte->tablesample,
+                                               context);
+                       break;
                     case RTE_SUBQUERY:
                         rte->subquery =
                             pullup_replace_vars_subquery(rte->subquery,
@@ -1924,7 +1934,6 @@ replace_vars_in_jointree(Node *jtnode,
                             pullup_replace_vars((Node *) rte->values_lists,
                                                 context);
                         break;
-                   case RTE_RELATION:
                     case RTE_JOIN:
                     case RTE_CTE:
                         /* these shouldn't be marked LATERAL */
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index f7f33bbe7721b6a01865f1deaf2f25d0d6d96a96..935bc2b9667d33e7e8ddc9a7469b42ea0a2c0fdf 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -713,7 +713,7 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
  
  /*
   * create_samplescan_path
- *   Like seqscan but uses sampling function while scanning.
+ *   Creates a path node for a sampled table scan.
   */
  Path *
  create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
@@ -726,7 +726,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer
                                                      required_outer);
     pathnode->pathkeys = NIL;   /* samplescan has unordered result */
  
-   cost_samplescan(pathnode, root, rel);
+   cost_samplescan(pathnode, root, rel, pathnode->param_info);
  
     return pathnode;
  }
@@ -1773,6 +1773,8 @@ reparameterize_path(PlannerInfo *root, Path *path,
     {
         case T_SeqScan:
             return create_seqscan_path(root, rel, required_outer);
+       case T_SampleScan:
+           return (Path *) create_samplescan_path(root, rel, required_outer);
         case T_IndexScan:
         case T_IndexOnlyScan:
             {
@@ -1805,8 +1807,6 @@ reparameterize_path(PlannerInfo *root, Path *path,
         case T_SubqueryScan:
             return create_subqueryscan_path(root, rel, path->pathkeys,
                                             required_outer);
-       case T_SampleScan:
-           return (Path *) create_samplescan_path(root, rel, required_outer);
         default:
             break;
     }
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y

index 2b02a2e523380cf2a12d2171c63b4ed887cb7285..8f053e47e82df8aebb228138691dc131f61805f8 100644 (file)
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -457,8 +457,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
  %type <jexpr>  joined_table
  %type <range>  relation_expr
  %type <range>  relation_expr_opt_alias
+%type <node>   tablesample_clause opt_repeatable_clause
  %type <target> target_el single_set_clause set_target insert_column_item
-%type <node>   relation_expr_tablesample tablesample_clause opt_repeatable_clause
  
  %type <str>        generic_option_name
  %type <node>   generic_option_arg
@@ -10491,9 +10491,13 @@ table_ref: relation_expr opt_alias_clause
                     $1->alias = $2;
                     $$ = (Node *) $1;
                 }
-           | relation_expr_tablesample
+           | relation_expr opt_alias_clause tablesample_clause
                 {
-                   $$ = (Node *) $1;
+                   RangeTableSample *n = (RangeTableSample *) $3;
+                   $1->alias = $2;
+                   /* relation_expr goes inside the RangeTableSample node */
+                   n->relation = (Node *) $1;
+                   $$ = (Node *) n;
                 }
             | func_table func_alias_clause
                 {
@@ -10820,23 +10824,18 @@ relation_expr_opt_alias: relation_expr                    %prec UMINUS
                 }
         ;
  
-
-relation_expr_tablesample: relation_expr opt_alias_clause tablesample_clause
-               {
-                   RangeTableSample *n = (RangeTableSample *) $3;
-                   n->relation = $1;
-                   n->relation->alias = $2;
-                   $$ = (Node *) n;
-               }
-       ;
-
+/*
+ * TABLESAMPLE decoration in a FROM item
+ */
  tablesample_clause:
-           TABLESAMPLE ColId '(' expr_list ')' opt_repeatable_clause
+           TABLESAMPLE func_name '(' expr_list ')' opt_repeatable_clause
                 {
                     RangeTableSample *n = makeNode(RangeTableSample);
+                   /* n->relation will be filled in later */
                     n->method = $2;
                     n->args = $4;
                     n->repeatable = $6;
+                   n->location = @2;
                     $$ = (Node *) n;
                 }
         ;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c

index e90e1d68e3a535ad9c549e5fd242e0c6d69232c4..4e490b23b4e272fadc4272fbc06c7453465f2a9d 100644 (file)
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -18,8 +18,8 @@
  #include "miscadmin.h"
  
  #include "access/heapam.h"
+#include "access/tsmapi.h"
  #include "catalog/catalog.h"
-#include "access/htup_details.h"
  #include "catalog/heap.h"
  #include "catalog/pg_constraint.h"
  #include "catalog/pg_type.h"
@@ -43,7 +43,7 @@
  #include "utils/guc.h"
  #include "utils/lsyscache.h"
  #include "utils/rel.h"
-#include "utils/syscache.h"
+
  
  /* Convenience macro for the most common makeNamespaceItem() case */
  #define makeDefaultNSItem(rte) makeNamespaceItem(rte, true, true, false, true)
@@ -63,6 +63,8 @@ static RangeTblEntry *transformRangeSubselect(ParseState *pstate,
                         RangeSubselect *r);
  static RangeTblEntry *transformRangeFunction(ParseState *pstate,
                        RangeFunction *r);
+static TableSampleClause *transformRangeTableSample(ParseState *pstate,
+                         RangeTableSample *rts);
  static Node *transformFromClauseItem(ParseState *pstate, Node *n,
                         RangeTblEntry **top_rte, int *top_rti,
                         List **namespace);
@@ -423,40 +425,6 @@ transformJoinOnClause(ParseState *pstate, JoinExpr *j, List *namespace)
     return result;
  }
  
-static RangeTblEntry *
-transformTableSampleEntry(ParseState *pstate, RangeTableSample *rv)
-{
-   RangeTblEntry *rte = NULL;
-   CommonTableExpr *cte = NULL;
-   TableSampleClause *tablesample = NULL;
-
-   /* if relation has an unqualified name, it might be a CTE reference */
-   if (!rv->relation->schemaname)
-   {
-       Index       levelsup;
-
-       cte = scanNameSpaceForCTE(pstate, rv->relation->relname, &levelsup);
-   }
-
-   /* We first need to build a range table entry */
-   if (!cte)
-       rte = transformTableEntry(pstate, rv->relation);
-
-   if (!rte ||
-       (rte->relkind != RELKIND_RELATION &&
-        rte->relkind != RELKIND_MATVIEW))
-       ereport(ERROR,
-               (errcode(ERRCODE_SYNTAX_ERROR),
-                errmsg("TABLESAMPLE clause can only be used on tables and materialized views"),
-                parser_errposition(pstate, rv->relation->location)));
-
-   tablesample = ParseTableSample(pstate, rv->method, rv->repeatable,
-                                  rv->args, rv->relation->location);
-   rte->tablesample = tablesample;
-
-   return rte;
-}
-
  /*
   * transformTableEntry --- transform a RangeVar (simple relation reference)
   */
@@ -748,6 +716,109 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r)
     return rte;
  }
  
+/*
+ * transformRangeTableSample --- transform a TABLESAMPLE clause
+ *
+ * Caller has already transformed rts->relation, we just have to validate
+ * the remaining fields and create a TableSampleClause node.
+ */
+static TableSampleClause *
+transformRangeTableSample(ParseState *pstate, RangeTableSample *rts)
+{
+   TableSampleClause *tablesample;
+   Oid         handlerOid;
+   Oid         funcargtypes[1];
+   TsmRoutine *tsm;
+   List       *fargs;
+   ListCell   *larg,
+              *ltyp;
+
+   /*
+    * To validate the sample method name, look up the handler function, which
+    * has the same name, one dummy INTERNAL argument, and a result type of
+    * tsm_handler.  (Note: tablesample method names are not schema-qualified
+    * in the SQL standard; but since they are just functions to us, we allow
+    * schema qualification to resolve any potential ambiguity.)
+    */
+   funcargtypes[0] = INTERNALOID;
+
+   handlerOid = LookupFuncName(rts->method, 1, funcargtypes, true);
+
+   /* we want error to complain about no-such-method, not no-such-function */
+   if (!OidIsValid(handlerOid))
+       ereport(ERROR,
+               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                errmsg("tablesample method %s does not exist",
+                       NameListToString(rts->method)),
+                parser_errposition(pstate, rts->location)));
+
+   /* check that handler has correct return type */
+   if (get_func_rettype(handlerOid) != TSM_HANDLEROID)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("function %s must return type \"tsm_handler\"",
+                       NameListToString(rts->method)),
+                parser_errposition(pstate, rts->location)));
+
+   /* OK, run the handler to get TsmRoutine, for argument type info */
+   tsm = GetTsmRoutine(handlerOid);
+
+   tablesample = makeNode(TableSampleClause);
+   tablesample->tsmhandler = handlerOid;
+
+   /* check user provided the expected number of arguments */
+   if (list_length(rts->args) != list_length(tsm->parameterTypes))
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+         errmsg_plural("tablesample method %s requires %d argument, not %d",
+                       "tablesample method %s requires %d arguments, not %d",
+                       list_length(tsm->parameterTypes),
+                       NameListToString(rts->method),
+                       list_length(tsm->parameterTypes),
+                       list_length(rts->args)),
+                parser_errposition(pstate, rts->location)));
+
+   /*
+    * Transform the arguments, typecasting them as needed.  Note we must also
+    * assign collations now, because assign_query_collations() doesn't
+    * examine any substructure of RTEs.
+    */
+   fargs = NIL;
+   forboth(larg, rts->args, ltyp, tsm->parameterTypes)
+   {
+       Node       *arg = (Node *) lfirst(larg);
+       Oid         argtype = lfirst_oid(ltyp);
+
+       arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
+       arg = coerce_to_specific_type(pstate, arg, argtype, "TABLESAMPLE");
+       assign_expr_collations(pstate, arg);
+       fargs = lappend(fargs, arg);
+   }
+   tablesample->args = fargs;
+
+   /* Process REPEATABLE (seed) */
+   if (rts->repeatable != NULL)
+   {
+       Node       *arg;
+
+       if (!tsm->repeatable_across_queries)
+           ereport(ERROR,
+                   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                 errmsg("tablesample method %s does not support REPEATABLE",
+                        NameListToString(rts->method)),
+                    parser_errposition(pstate, rts->location)));
+
+       arg = transformExpr(pstate, rts->repeatable, EXPR_KIND_FROM_FUNCTION);
+       arg = coerce_to_specific_type(pstate, arg, FLOAT8OID, "REPEATABLE");
+       assign_expr_collations(pstate, arg);
+       tablesample->repeatable = (Expr *) arg;
+   }
+   else
+       tablesample->repeatable = NULL;
+
+   return tablesample;
+}
+
  
  /*
   * transformFromClauseItem -
@@ -844,6 +915,33 @@ transformFromClauseItem(ParseState *pstate, Node *n,
         rtr->rtindex = rtindex;
         return (Node *) rtr;
     }
+   else if (IsA(n, RangeTableSample))
+   {
+       /* TABLESAMPLE clause (wrapping some other valid FROM node) */
+       RangeTableSample *rts = (RangeTableSample *) n;
+       Node       *rel;
+       RangeTblRef *rtr;
+       RangeTblEntry *rte;
+
+       /* Recursively transform the contained relation */
+       rel = transformFromClauseItem(pstate, rts->relation,
+                                     top_rte, top_rti, namespace);
+       /* Currently, grammar could only return a RangeVar as contained rel */
+       Assert(IsA(rel, RangeTblRef));
+       rtr = (RangeTblRef *) rel;
+       rte = rt_fetch(rtr->rtindex, pstate->p_rtable);
+       /* We only support this on plain relations and matviews */
+       if (rte->relkind != RELKIND_RELATION &&
+           rte->relkind != RELKIND_MATVIEW)
+           ereport(ERROR,
+                   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                    errmsg("TABLESAMPLE clause can only be applied to tables and materialized views"),
+                  parser_errposition(pstate, exprLocation(rts->relation))));
+
+       /* Transform TABLESAMPLE details and attach to the RTE */
+       rte->tablesample = transformRangeTableSample(pstate, rts);
+       return (Node *) rtr;
+   }
     else if (IsA(n, JoinExpr))
     {
         /* A newfangled join expression */
@@ -1165,26 +1263,6 @@ transformFromClauseItem(ParseState *pstate, Node *n,
  
         return (Node *) j;
     }
-   else if (IsA(n, RangeTableSample))
-   {
-       /* Tablesample reference */
-       RangeTableSample *rv = (RangeTableSample *) n;
-       RangeTblRef *rtr;
-       RangeTblEntry *rte = NULL;
-       int         rtindex;
-
-       rte = transformTableSampleEntry(pstate, rv);
-
-       /* assume new rte is at end */
-       rtindex = list_length(pstate->p_rtable);
-       Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
-       *top_rte = rte;
-       *top_rti = rtindex;
-       *namespace = list_make1(makeDefaultNSItem(rte));
-       rtr = makeNode(RangeTblRef);
-       rtr->rtindex = rtindex;
-       return (Node *) rtr;
-   }
     else
         elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
     return NULL;                /* can't get here, keep compiler quiet */
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c

index 430baff11652721778e3f37ba49b1707fea62247..554ca9d8c47e5f38eddb5579dd70160a2d5d363b 100644 (file)
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -18,7 +18,6 @@
  #include "catalog/pg_aggregate.h"
  #include "catalog/pg_proc.h"
  #include "catalog/pg_type.h"
-#include "catalog/pg_tablesample_method.h"
  #include "funcapi.h"
  #include "lib/stringinfo.h"
  #include "nodes/makefuncs.h"
@@ -27,7 +26,6 @@
  #include "parser/parse_clause.h"
  #include "parser/parse_coerce.h"
  #include "parser/parse_func.h"
-#include "parser/parse_expr.h"
  #include "parser/parse_relation.h"
  #include "parser/parse_target.h"
  #include "parser/parse_type.h"
@@ -769,148 +767,6 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
  }
  
  
-/*
- * ParseTableSample
- *
- * Parse TABLESAMPLE clause and process the arguments
- */
-TableSampleClause *
-ParseTableSample(ParseState *pstate, char *samplemethod, Node *repeatable,
-                List *sampleargs, int location)
-{
-   HeapTuple   tuple;
-   Form_pg_tablesample_method tsm;
-   Form_pg_proc procform;
-   TableSampleClause *tablesample;
-   List       *fargs;
-   ListCell   *larg;
-   int         nargs,
-               initnargs;
-   Oid         init_arg_types[FUNC_MAX_ARGS];
-
-   /* Load the tablesample method */
-   tuple = SearchSysCache1(TABLESAMPLEMETHODNAME, PointerGetDatum(samplemethod));
-   if (!HeapTupleIsValid(tuple))
-       ereport(ERROR,
-               (errcode(ERRCODE_UNDEFINED_OBJECT),
-                errmsg("tablesample method \"%s\" does not exist",
-                       samplemethod),
-                parser_errposition(pstate, location)));
-
-   tablesample = makeNode(TableSampleClause);
-   tablesample->tsmid = HeapTupleGetOid(tuple);
-
-   tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
-
-   tablesample->tsmseqscan = tsm->tsmseqscan;
-   tablesample->tsmpagemode = tsm->tsmpagemode;
-   tablesample->tsminit = tsm->tsminit;
-   tablesample->tsmnextblock = tsm->tsmnextblock;
-   tablesample->tsmnexttuple = tsm->tsmnexttuple;
-   tablesample->tsmexaminetuple = tsm->tsmexaminetuple;
-   tablesample->tsmend = tsm->tsmend;
-   tablesample->tsmreset = tsm->tsmreset;
-   tablesample->tsmcost = tsm->tsmcost;
-
-   ReleaseSysCache(tuple);
-
-   /* Validate the parameters against init function definition. */
-   tuple = SearchSysCache1(PROCOID,
-                           ObjectIdGetDatum(tablesample->tsminit));
-
-   if (!HeapTupleIsValid(tuple))       /* should not happen */
-       elog(ERROR, "cache lookup failed for function %u",
-            tablesample->tsminit);
-
-   procform = (Form_pg_proc) GETSTRUCT(tuple);
-   initnargs = procform->pronargs;
-   Assert(initnargs >= 3);
-
-   /*
-    * First parameter is used to pass the SampleScanState, second is seed
-    * (REPEATABLE), skip the processing for them here, just assert that the
-    * types are correct.
-    */
-   Assert(procform->proargtypes.values[0] == INTERNALOID);
-   Assert(procform->proargtypes.values[1] == INT4OID);
-   initnargs -= 2;
-   memcpy(init_arg_types, procform->proargtypes.values + 2,
-          initnargs * sizeof(Oid));
-
-   /* Now we are done with the catalog */
-   ReleaseSysCache(tuple);
-
-   /* Process repeatable (seed) */
-   if (repeatable != NULL)
-   {
-       Node       *arg = repeatable;
-
-       if (arg && IsA(arg, A_Const))
-       {
-           A_Const    *con = (A_Const *) arg;
-
-           if (con->val.type == T_Null)
-               ereport(ERROR,
-                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                 errmsg("REPEATABLE clause must be NOT NULL numeric value"),
-                        parser_errposition(pstate, con->location)));
-
-       }
-
-       arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
-       arg = coerce_to_specific_type(pstate, arg, INT4OID, "REPEATABLE");
-       tablesample->repeatable = arg;
-   }
-   else
-       tablesample->repeatable = NULL;
-
-   /* Check user provided expected number of arguments. */
-   if (list_length(sampleargs) != initnargs)
-       ereport(ERROR,
-               (errcode(ERRCODE_TOO_MANY_ARGUMENTS),
-       errmsg_plural("tablesample method \"%s\" expects %d argument got %d",
-                     "tablesample method \"%s\" expects %d arguments got %d",
-                     initnargs,
-                     samplemethod,
-                     initnargs, list_length(sampleargs)),
-                parser_errposition(pstate, location)));
-
-   /* Transform the arguments, typecasting them as needed. */
-   fargs = NIL;
-   nargs = 0;
-   foreach(larg, sampleargs)
-   {
-       Node       *inarg = (Node *) lfirst(larg);
-       Node       *arg = transformExpr(pstate, inarg, EXPR_KIND_FROM_FUNCTION);
-       Oid         argtype = exprType(arg);
-
-       if (argtype != init_arg_types[nargs])
-       {
-           if (!can_coerce_type(1, &argtype, &init_arg_types[nargs],
-                                COERCION_IMPLICIT))
-               ereport(ERROR,
-                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                  errmsg("wrong parameter %d for tablesample method \"%s\"",
-                         nargs + 1, samplemethod),
-                        errdetail("Expected type %s got %s.",
-                                  format_type_be(init_arg_types[nargs]),
-                                  format_type_be(argtype)),
-                        parser_errposition(pstate, exprLocation(inarg))));
-
-           arg = coerce_type(pstate, arg, argtype, init_arg_types[nargs], -1,
-                             COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, -1);
-       }
-
-       fargs = lappend(fargs, arg);
-       nargs++;
-   }
-
-   /* Pass the arguments down */
-   tablesample->args = fargs;
-
-   return tablesample;
-}
-
  /* func_match_argtypes()
   *
   * Given a list of candidate functions (having the right name and number
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c

index bbd6b77c5eab640ee1af8accaaaa6ae686aa1313..1734e48241ada102ac66cfd2788ffff0837dfcab 100644 (file)
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -418,6 +418,10 @@ rewriteRuleAction(Query *parsetree,
  
             switch (rte->rtekind)
             {
+               case RTE_RELATION:
+                   sub_action->hasSubLinks =
+                       checkExprHasSubLink((Node *) rte->tablesample);
+                   break;
                 case RTE_FUNCTION:
                     sub_action->hasSubLinks =
                         checkExprHasSubLink((Node *) rte->functions);
diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c

index 9ad460abfbdbcc8f1e75613b00841b7926af5592..5b809aa7d4996d55d467aa570e8dca9650031a31 100644 (file)
--- a/src/backend/utils/adt/pseudotypes.c
+++ b/src/backend/utils/adt/pseudotypes.c
@@ -373,6 +373,33 @@ fdw_handler_out(PG_FUNCTION_ARGS)
  }
  
  
+/*
+ * tsm_handler_in      - input routine for pseudo-type TSM_HANDLER.
+ */
+Datum
+tsm_handler_in(PG_FUNCTION_ARGS)
+{
+   ereport(ERROR,
+           (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+            errmsg("cannot accept a value of type tsm_handler")));
+
+   PG_RETURN_VOID();           /* keep compiler quiet */
+}
+
+/*
+ * tsm_handler_out     - output routine for pseudo-type TSM_HANDLER.
+ */
+Datum
+tsm_handler_out(PG_FUNCTION_ARGS)
+{
+   ereport(ERROR,
+           (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+            errmsg("cannot display a value of type tsm_handler")));
+
+   PG_RETURN_VOID();           /* keep compiler quiet */
+}
+
+
  /*
   * internal_in     - input routine for pseudo-type INTERNAL.
   */
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c

index 5112cac90173595d56c7cc14beba8ebfcc9113e7..51391f6a4e0d16e4e647f7845425e11795ca7508 100644 (file)
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -32,7 +32,6 @@
  #include "catalog/pg_opclass.h"
  #include "catalog/pg_operator.h"
  #include "catalog/pg_proc.h"
-#include "catalog/pg_tablesample_method.h"
  #include "catalog/pg_trigger.h"
  #include "catalog/pg_type.h"
  #include "commands/defrem.h"
@@ -349,8 +348,6 @@ static void make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
              int prettyFlags);
  static void make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
              int prettyFlags, int wrapColumn);
-static void get_tablesample_def(TableSampleClause *tablesample,
-                   deparse_context *context);
  static void get_query_def(Query *query, StringInfo buf, List *parentnamespace,
               TupleDesc resultDesc,
               int prettyFlags, int wrapColumn, int startIndent);
@@ -416,6 +413,8 @@ static void get_column_alias_list(deparse_columns *colinfo,
  static void get_from_clause_coldeflist(RangeTblFunction *rtfunc,
                            deparse_columns *colinfo,
                            deparse_context *context);
+static void get_tablesample_def(TableSampleClause *tablesample,
+                   deparse_context *context);
  static void get_opclass_name(Oid opclass, Oid actual_datatype,
                  StringInfo buf);
  static Node *processIndirection(Node *node, deparse_context *context,
@@ -4235,50 +4234,6 @@ make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
     heap_close(ev_relation, AccessShareLock);
  }
  
-/* ----------
- * get_tablesample_def         - Convert TableSampleClause back to SQL
- * ----------
- */
-static void
-get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
-{
-   StringInfo  buf = context->buf;
-   HeapTuple   tuple;
-   Form_pg_tablesample_method tsm;
-   char       *tsmname;
-   int         nargs;
-   ListCell   *l;
-
-   /* Load the tablesample method */
-   tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tablesample->tsmid));
-   if (!HeapTupleIsValid(tuple))
-       ereport(ERROR,
-               (errcode(ERRCODE_UNDEFINED_OBJECT),
-                errmsg("cache lookup failed for tablesample method %u",
-                       tablesample->tsmid)));
-
-   tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
-   tsmname = NameStr(tsm->tsmname);
-   appendStringInfo(buf, " TABLESAMPLE %s (", quote_identifier(tsmname));
-
-   ReleaseSysCache(tuple);
-
-   nargs = 0;
-   foreach(l, tablesample->args)
-   {
-       if (nargs++ > 0)
-           appendStringInfoString(buf, ", ");
-       get_rule_expr((Node *) lfirst(l), context, true);
-   }
-   appendStringInfoChar(buf, ')');
-
-   if (tablesample->repeatable != NULL)
-   {
-       appendStringInfoString(buf, " REPEATABLE (");
-       get_rule_expr(tablesample->repeatable, context, true);
-       appendStringInfoChar(buf, ')');
-   }
-}
  
  /* ----------
   * get_query_def           - Parse back one query parsetree
@@ -8781,9 +8736,6 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
                                  only_marker(rte),
                                  generate_relation_name(rte->relid,
                                                         context->namespaces));
-
-               if (rte->tablesample)
-                   get_tablesample_def(rte->tablesample, context);
                 break;
             case RTE_SUBQUERY:
                 /* Subquery RTE */
@@ -8963,6 +8915,10 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
             /* Else print column aliases as needed */
             get_column_alias_list(colinfo, context);
         }
+
+       /* Tablesample clause must go after any alias */
+       if (rte->rtekind == RTE_RELATION && rte->tablesample)
+           get_tablesample_def(rte->tablesample, context);
     }
     else if (IsA(jtnode, JoinExpr))
     {
@@ -9162,6 +9118,44 @@ get_from_clause_coldeflist(RangeTblFunction *rtfunc,
     appendStringInfoChar(buf, ')');
  }
  
+/*
+ * get_tablesample_def         - print a TableSampleClause
+ */
+static void
+get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
+{
+   StringInfo  buf = context->buf;
+   Oid         argtypes[1];
+   int         nargs;
+   ListCell   *l;
+
+   /*
+    * We should qualify the handler's function name if it wouldn't be
+    * resolved by lookup in the current search path.
+    */
+   argtypes[0] = INTERNALOID;
+   appendStringInfo(buf, " TABLESAMPLE %s (",
+                    generate_function_name(tablesample->tsmhandler, 1,
+                                           NIL, argtypes,
+                                           false, NULL, EXPR_KIND_NONE));
+
+   nargs = 0;
+   foreach(l, tablesample->args)
+   {
+       if (nargs++ > 0)
+           appendStringInfoString(buf, ", ");
+       get_rule_expr((Node *) lfirst(l), context, false);
+   }
+   appendStringInfoChar(buf, ')');
+
+   if (tablesample->repeatable != NULL)
+   {
+       appendStringInfoString(buf, " REPEATABLE (");
+       get_rule_expr((Node *) tablesample->repeatable, context, false);
+       appendStringInfoChar(buf, ')');
+   }
+}
+
  /*
   * get_opclass_name            - fetch name of an index operator class
   *
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c

index 7b32247d34eae9eb8dadc9514d0b7e2c88828ff0..1dc293297d93edb933b743dc302fb7bcad757290 100644 (file)
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -32,7 +32,6 @@
  #include "catalog/pg_range.h"
  #include "catalog/pg_statistic.h"
  #include "catalog/pg_transform.h"
-#include "catalog/pg_tablesample_method.h"
  #include "catalog/pg_type.h"
  #include "miscadmin.h"
  #include "nodes/makefuncs.h"
@@ -2997,29 +2996,3 @@ get_range_subtype(Oid rangeOid)
     else
         return InvalidOid;
  }
-
-/*             ---------- PG_TABLESAMPLE_METHOD CACHE ----------            */
-
-/*
- * get_tablesample_method_name - given a tablesample method OID,
- * look up the name or NULL if not found
- */
-char *
-get_tablesample_method_name(Oid tsmid)
-{
-   HeapTuple   tuple;
-
-   tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tsmid));
-   if (HeapTupleIsValid(tuple))
-   {
-       Form_pg_tablesample_method tup =
-       (Form_pg_tablesample_method) GETSTRUCT(tuple);
-       char       *result;
-
-       result = pstrdup(NameStr(tup->tsmname));
-       ReleaseSysCache(tuple);
-       return result;
-   }
-   else
-       return NULL;
-}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c

index b6333e362f018b467aa34b065457268be6dc3bde..efce7b9a3d13b0ce73aff22292ca9f42ee31b60c 100644 (file)
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -56,7 +56,6 @@
  #include "catalog/pg_shseclabel.h"
  #include "catalog/pg_replication_origin.h"
  #include "catalog/pg_statistic.h"
-#include "catalog/pg_tablesample_method.h"
  #include "catalog/pg_tablespace.h"
  #include "catalog/pg_transform.h"
  #include "catalog/pg_ts_config.h"
@@ -667,28 +666,6 @@ static const struct cachedesc cacheinfo[] = {
         },
         128
     },
-   {TableSampleMethodRelationId,       /* TABLESAMPLEMETHODNAME */
-       TableSampleMethodNameIndexId,
-       1,
-       {
-           Anum_pg_tablesample_method_tsmname,
-           0,
-           0,
-           0,
-       },
-       2
-   },
-   {TableSampleMethodRelationId,       /* TABLESAMPLEMETHODOID */
-       TableSampleMethodOidIndexId,
-       1,
-       {
-           ObjectIdAttributeNumber,
-           0,
-           0,
-           0,
-       },
-       2
-   },
     {TableSpaceRelationId,      /* TABLESPACEOID */
         TablespaceOidIndexId,
         1,
diff --git a/src/backend/utils/errcodes.txt b/src/backend/utils/errcodes.txt

index 6cc3ed96c447bd5b7a743d015cd14ec06b9b3f0a..7b97d45a53a12ed849c13bef3630753109ba1f49 100644 (file)
--- a/src/backend/utils/errcodes.txt
+++ b/src/backend/utils/errcodes.txt
@@ -177,6 +177,8 @@ Section: Class 22 - Data Exception
  2201B    E    ERRCODE_INVALID_REGULAR_EXPRESSION                             invalid_regular_expression
  2201W    E    ERRCODE_INVALID_ROW_COUNT_IN_LIMIT_CLAUSE                      invalid_row_count_in_limit_clause
  2201X    E    ERRCODE_INVALID_ROW_COUNT_IN_RESULT_OFFSET_CLAUSE              invalid_row_count_in_result_offset_clause
+2202H    E    ERRCODE_INVALID_TABLESAMPLE_ARGUMENT                           invalid_tablesample_argument
+2202G    E    ERRCODE_INVALID_TABLESAMPLE_REPEAT                             invalid_tablesample_repeat
  22009    E    ERRCODE_INVALID_TIME_ZONE_DISPLACEMENT_VALUE                   invalid_time_zone_displacement_value
  2200C    E    ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER                        invalid_use_of_escape_character
  2200G    E    ERRCODE_MOST_SPECIFIC_TYPE_MISMATCH                            most_specific_type_mismatch
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c

index 6191f7973441b2ac7dbc473cfc2058e35d0da4d3..4142e01123f79fe880cac07889e9227d4b6678d5 100644 (file)
--- a/src/backend/utils/misc/sampling.c
+++ b/src/backend/utils/misc/sampling.c
@@ -228,7 +228,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
  void
  sampler_random_init_state(long seed, SamplerRandomState randstate)
  {
-   randstate[0] = RAND48_SEED_0;
+   randstate[0] = 0x330e;      /* same as pg_erand48, but could be anything */
     randstate[1] = (unsigned short) seed;
     randstate[2] = (unsigned short) (seed >> 16);
  }
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c

index 9596af6a7b35ad57e3c73e19b1841f20b8c0384c..ece05155490b8755cff88231be9a3e01b6e3b773 100644 (file)
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -738,13 +738,15 @@ static const SchemaQuery Query_for_list_of_matviews = {
  "  WHERE substring(pg_catalog.quote_ident(evtname),1,%d)='%s'"
  
  #define Query_for_list_of_tablesample_methods \
-" SELECT pg_catalog.quote_ident(tsmname) "\
-"   FROM pg_catalog.pg_tablesample_method "\
-"  WHERE substring(pg_catalog.quote_ident(tsmname),1,%d)='%s'"
+" SELECT pg_catalog.quote_ident(proname) "\
+"   FROM pg_catalog.pg_proc "\
+"  WHERE prorettype = 'pg_catalog.tsm_handler'::pg_catalog.regtype AND "\
+"        proargtypes[0] = 'pg_catalog.internal'::pg_catalog.regtype AND "\
+"        substring(pg_catalog.quote_ident(proname),1,%d)='%s'"
  
  #define Query_for_list_of_policies \
  " SELECT pg_catalog.quote_ident(polname) "\
-"   FROM pg_catalog.pg_policy " \
+"   FROM pg_catalog.pg_policy "\
  "  WHERE substring(pg_catalog.quote_ident(polname),1,%d)='%s'"
  
  #define Query_for_list_of_tables_for_policy \
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h

index 31139cbd0ccc736908afa9218dc10e9d61dedd99..75e6b72f9e0204913254548a42322a6fa7708d63 100644 (file)
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -116,11 +116,13 @@ extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot,
                   int nkeys, ScanKey key);
  extern HeapScanDesc heap_beginscan_sampling(Relation relation,
                         Snapshot snapshot, int nkeys, ScanKey key,
-                       bool allow_strat, bool allow_pagemode);
+                    bool allow_strat, bool allow_sync, bool allow_pagemode);
  extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk,
                    BlockNumber endBlk);
  extern void heapgetpage(HeapScanDesc scan, BlockNumber page);
  extern void heap_rescan(HeapScanDesc scan, ScanKey key);
+extern void heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
+                    bool allow_strat, bool allow_sync, bool allow_pagemode);
  extern void heap_endscan(HeapScanDesc scan);
  extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
  
diff --git a/src/include/access/tablesample.h b/src/include/access/tablesample.h

deleted file mode 100644 (file)

index a02e93d..0000000
--- a/src/include/access/tablesample.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * tablesample.h
- *       Public header file for TABLESAMPLE clause interface
- *
- *
- * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/include/access/tablesample.h
- *
- *-------------------------------------------------------------------------
- */
-#ifndef TABLESAMPLE_H
-#define TABLESAMPLE_H
-
-#include "access/relscan.h"
-#include "executor/executor.h"
-
-typedef struct TableSampleDesc
-{
-   HeapScanDesc heapScan;
-   TupleDesc   tupDesc;        /* Mostly useful for tsmexaminetuple */
-
-   void       *tsmdata;        /* private method data */
-
-   /* These point to he function of the TABLESAMPLE Method. */
-   FmgrInfo    tsminit;
-   FmgrInfo    tsmnextblock;
-   FmgrInfo    tsmnexttuple;
-   FmgrInfo    tsmexaminetuple;
-   FmgrInfo    tsmreset;
-   FmgrInfo    tsmend;
-} TableSampleDesc;
-
-
-extern TableSampleDesc *tablesample_init(SampleScanState *scanstate,
-                TableSampleClause *tablesample);
-extern HeapTuple tablesample_getnext(TableSampleDesc *desc);
-extern void tablesample_reset(TableSampleDesc *desc);
-extern void tablesample_end(TableSampleDesc *desc);
-extern HeapTuple tablesample_source_getnext(TableSampleDesc *desc);
-extern HeapTuple tablesample_source_gettup(TableSampleDesc *desc, ItemPointer tid,
-                         bool *visible);
-
-extern Datum tsm_system_init(PG_FUNCTION_ARGS);
-extern Datum tsm_system_nextblock(PG_FUNCTION_ARGS);
-extern Datum tsm_system_nexttuple(PG_FUNCTION_ARGS);
-extern Datum tsm_system_end(PG_FUNCTION_ARGS);
-extern Datum tsm_system_reset(PG_FUNCTION_ARGS);
-extern Datum tsm_system_cost(PG_FUNCTION_ARGS);
-
-extern Datum tsm_bernoulli_init(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_nextblock(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_end(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_reset(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_cost(PG_FUNCTION_ARGS);
-
-
-#endif
diff --git a/src/include/access/tsmapi.h b/src/include/access/tsmapi.h

new file mode 100644 (file)

index 0000000..4b59ffa
--- /dev/null
+++ b/src/include/access/tsmapi.h
@@ -0,0 +1,81 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsmapi.h
+ *   API for tablesample methods
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * src/include/access/tsmapi.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TSMAPI_H
+#define TSMAPI_H
+
+#include "nodes/execnodes.h"
+#include "nodes/relation.h"
+
+
+/*
+ * Callback function signatures --- see tablesample-method.sgml for more info.
+ */
+
+typedef void (*SampleScanGetSampleSize_function) (PlannerInfo *root,
+                                                        RelOptInfo *baserel,
+                                                           List *paramexprs,
+                                                         BlockNumber *pages,
+                                                             double *tuples);
+
+typedef void (*InitSampleScan_function) (SampleScanState *node,
+                                                    int eflags);
+
+typedef void (*BeginSampleScan_function) (SampleScanState *node,
+                                                     Datum *params,
+                                                     int nparams,
+                                                     uint32 seed);
+
+typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node);
+
+typedef OffsetNumber (*NextSampleTuple_function) (SampleScanState *node,
+                                                        BlockNumber blockno,
+                                                    OffsetNumber maxoffset);
+
+typedef void (*EndSampleScan_function) (SampleScanState *node);
+
+/*
+ * TsmRoutine is the struct returned by a tablesample method's handler
+ * function.  It provides pointers to the callback functions needed by the
+ * planner and executor, as well as additional information about the method.
+ *
+ * More function pointers are likely to be added in the future.
+ * Therefore it's recommended that the handler initialize the struct with
+ * makeNode(TsmRoutine) so that all fields are set to NULL.  This will
+ * ensure that no fields are accidentally left undefined.
+ */
+typedef struct TsmRoutine
+{
+   NodeTag     type;
+
+   /* List of datatype OIDs for the arguments of the TABLESAMPLE clause */
+   List       *parameterTypes;
+
+   /* Can method produce repeatable samples across, or even within, queries? */
+   bool        repeatable_across_queries;
+   bool        repeatable_across_scans;
+
+   /* Functions for planning a SampleScan on a physical table */
+   SampleScanGetSampleSize_function SampleScanGetSampleSize;
+
+   /* Functions for executing a SampleScan on a physical table */
+   InitSampleScan_function InitSampleScan;     /* can be NULL */
+   BeginSampleScan_function BeginSampleScan;
+   NextSampleBlock_function NextSampleBlock;   /* can be NULL */
+   NextSampleTuple_function NextSampleTuple;
+   EndSampleScan_function EndSampleScan;       /* can be NULL */
+} TsmRoutine;
+
+
+/* Functions in access/tablesample/tablesample.c */
+extern TsmRoutine *GetTsmRoutine(Oid tsmhandler);
+
+#endif   /* TSMAPI_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 8f6685fd0cce89d8060dd8468b7575bd85bb412d..0e983279313cd59a8a0df57e9d17829cf5967b41 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
   */
  
  /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 201507171
+#define CATALOG_VERSION_NO 201507252
  
  #endif
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h

index 748aadde94598945c715a2244d8b12249792eba9..c38958d6c5e26985ceeef95ca0df4defe42bf711 100644 (file)
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -316,11 +316,6 @@ DECLARE_UNIQUE_INDEX(pg_replication_origin_roiident_index, 6001, on pg_replicati
  DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication_origin using btree(roname text_pattern_ops));
  #define ReplicationOriginNameIndex 6002
  
-DECLARE_UNIQUE_INDEX(pg_tablesample_method_name_index, 3331, on pg_tablesample_method using btree(tsmname name_ops));
-#define TableSampleMethodNameIndexId  3331
-DECLARE_UNIQUE_INDEX(pg_tablesample_method_oid_index, 3332, on pg_tablesample_method using btree(oid oid_ops));
-#define TableSampleMethodOidIndexId  3332
-
  /* last step of initialization script: build the indexes declared above */
  BUILD_INDICES
  
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h

index 1d68ad7209e1c65333491a4a646945dfc525f2ab..09bf1439c46f8f68f0f3ebbb275ebe84fc84c752 100644 (file)
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3734,6 +3734,16 @@ DATA(insert OID = 3116 (  fdw_handler_in PGNSP PGUID 12 1 0 0 0 f f f f f f i 1
  DESCR("I/O");
  DATA(insert OID = 3117 (  fdw_handler_out  PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3115" _null_ _null_ _null_ _null_ _null_ fdw_handler_out _null_ _null_ _null_ ));
  DESCR("I/O");
+DATA(insert OID = 3311 (  tsm_handler_in   PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 3310 "2275" _null_ _null_ _null_ _null_ _null_ tsm_handler_in _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3312 (  tsm_handler_out  PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3310" _null_ _null_ _null_ _null_ _null_ tsm_handler_out _null_ _null_ _null_ ));
+DESCR("I/O");
+
+/* tablesample method handlers */
+DATA(insert OID = 3313 (  bernoulli            PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_handler _null_ _null_ _null_ ));
+DESCR("BERNOULLI tablesample method handler");
+DATA(insert OID = 3314 (  system           PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_handler _null_ _null_ _null_ ));
+DESCR("SYSTEM tablesample method handler");
  
  /* cryptographic */
  DATA(insert OID =  2311 (  md5    PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ md5_text _null_ _null_ _null_ ));
@@ -5321,33 +5331,6 @@ DESCR("get an individual replication origin's replication progress");
  DATA(insert OID = 6014 ( pg_show_replication_origin_status PGNSP PGUID 12 1 100 0 0 f f f f f t v 0 0 2249 "" "{26,25,3220,3220}" "{o,o,o,o}" "{local_id, external_id, remote_lsn, local_lsn}" _null_ _null_ pg_show_replication_origin_status _null_ _null_ _null_ ));
  DESCR("get progress for all replication origins");
  
-/* tablesample */
-DATA(insert OID = 3335 (  tsm_system_init      PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_system_init _null_ _null_ _null_ ));
-DESCR("tsm_system_init(internal)");
-DATA(insert OID = 3336 (  tsm_system_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nextblock _null_ _null_ _null_ ));
-DESCR("tsm_system_nextblock(internal)");
-DATA(insert OID = 3337 (  tsm_system_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nexttuple _null_ _null_ _null_ ));
-DESCR("tsm_system_nexttuple(internal)");
-DATA(insert OID = 3338 (  tsm_system_end       PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_end _null_ _null_ _null_ ));
-DESCR("tsm_system_end(internal)");
-DATA(insert OID = 3339 (  tsm_system_reset     PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_reset _null_ _null_ _null_ ));
-DESCR("tsm_system_reset(internal)");
-DATA(insert OID = 3340 (  tsm_system_cost      PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_system_cost _null_ _null_ _null_ ));
-DESCR("tsm_system_cost(internal)");
-
-DATA(insert OID = 3341 (  tsm_bernoulli_init       PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_init _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_init(internal)");
-DATA(insert OID = 3342 (  tsm_bernoulli_nextblock  PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nextblock _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_nextblock(internal)");
-DATA(insert OID = 3343 (  tsm_bernoulli_nexttuple  PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nexttuple _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_nexttuple(internal)");
-DATA(insert OID = 3344 (  tsm_bernoulli_end            PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_end _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_end(internal)");
-DATA(insert OID = 3345 (  tsm_bernoulli_reset      PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_reset _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_reset(internal)");
-DATA(insert OID = 3346 (  tsm_bernoulli_cost       PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_cost _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_cost(internal)");
-
  /*
   * Symbolic values for provolatile column: these indicate whether the result
   * of a function is dependent *only* on the values of its explicit arguments,
diff --git a/src/include/catalog/pg_tablesample_method.h b/src/include/catalog/pg_tablesample_method.h

deleted file mode 100644 (file)

index b422414..0000000
--- a/src/include/catalog/pg_tablesample_method.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * pg_tablesample_method.h
- *   definition of the table scan methods.
- *
- *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/include/catalog/pg_tablesample_method.h
- *
- *
- *-------------------------------------------------------------------------
- */
-#ifndef PG_TABLESAMPLE_METHOD_H
-#define PG_TABLESAMPLE_METHOD_H
-
-#include "catalog/genbki.h"
-#include "catalog/objectaddress.h"
-
-/* ----------------
- *     pg_tablesample_method definition.  cpp turns this into
- *     typedef struct FormData_pg_tablesample_method
- * ----------------
- */
-#define TableSampleMethodRelationId 3330
-
-CATALOG(pg_tablesample_method,3330)
-{
-   NameData    tsmname;        /* tablesample method name */
-   bool        tsmseqscan;     /* does this method scan whole table
-                                * sequentially? */
-   bool        tsmpagemode;    /* does this method scan page at a time? */
-   regproc     tsminit;        /* init scan function */
-   regproc     tsmnextblock;   /* function returning next block to sample or
-                                * InvalidBlockOffset if finished */
-   regproc     tsmnexttuple;   /* function returning next tuple offset from
-                                * current block or InvalidOffsetNumber if end
-                                * of the block was reacher */
-   regproc     tsmexaminetuple;/* optional function which can examine tuple
-                                * contents and decide if tuple should be
-                                * returned or not */
-   regproc     tsmend;         /* end scan function */
-   regproc     tsmreset;       /* reset state - used by rescan */
-   regproc     tsmcost;        /* costing function */
-} FormData_pg_tablesample_method;
-
-/* ----------------
- *     Form_pg_tablesample_method corresponds to a pointer to a tuple with
- *     the format of pg_tablesample_method relation.
- * ----------------
- */
-typedef FormData_pg_tablesample_method *Form_pg_tablesample_method;
-
-/* ----------------
- *     compiler constants for pg_tablesample_method
- * ----------------
- */
-#define Natts_pg_tablesample_method                    10
-#define Anum_pg_tablesample_method_tsmname         1
-#define Anum_pg_tablesample_method_tsmseqscan      2
-#define Anum_pg_tablesample_method_tsmpagemode     3
-#define Anum_pg_tablesample_method_tsminit         4
-#define Anum_pg_tablesample_method_tsmnextblock        5
-#define Anum_pg_tablesample_method_tsmnexttuple        6
-#define Anum_pg_tablesample_method_tsmexaminetuple 7
-#define Anum_pg_tablesample_method_tsmend          8
-#define Anum_pg_tablesample_method_tsmreset            9
-#define Anum_pg_tablesample_method_tsmcost         10
-
-/* ----------------
- *     initial contents of pg_tablesample_method
- * ----------------
- */
-
-DATA(insert OID = 3333 ( system false true tsm_system_init tsm_system_nextblock tsm_system_nexttuple - tsm_system_end tsm_system_reset tsm_system_cost ));
-DESCR("SYSTEM table sampling method");
-DATA(insert OID = 3334 ( bernoulli true false tsm_bernoulli_init tsm_bernoulli_nextblock tsm_bernoulli_nexttuple - tsm_bernoulli_end tsm_bernoulli_reset tsm_bernoulli_cost ));
-DESCR("BERNOULLI table sampling method");
-
-#endif   /* PG_TABLESAMPLE_METHOD_H */
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h

index da123f6c4957e70b3fecc07b948609a6bd21bea2..7dc95c8d2c651ef3fc577af3d62cefd8f4d24169 100644 (file)
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -694,6 +694,8 @@ DATA(insert OID = 3500 ( anyenum        PGNSP PGUID  4 t p P f t \054 0 0 0 anyenum_in
  #define ANYENUMOID     3500
  DATA(insert OID = 3115 ( fdw_handler   PGNSP PGUID  4 t p P f t \054 0 0 0 fdw_handler_in fdw_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ ));
  #define FDW_HANDLEROID 3115
+DATA(insert OID = 3310 ( tsm_handler   PGNSP PGUID  4 t p P f t \054 0 0 0 tsm_handler_in tsm_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ ));
+#define TSM_HANDLEROID 3310
  DATA(insert OID = 3831 ( anyrange      PGNSP PGUID  -1 f p P f t \054 0 0 0 anyrange_in anyrange_out - - - - - d x f 0 -1 0 0 _null_ _null_ _null_ ));
  #define ANYRANGEOID        3831
  
diff --git a/src/include/executor/nodeSamplescan.h b/src/include/executor/nodeSamplescan.h

index 4b769daec8b917e90587597c92c61ba82ddd5bac..a0cc6ce467a9f58bf54b7eaeba9789c315a4fe55 100644 (file)
--- a/src/include/executor/nodeSamplescan.h
+++ b/src/include/executor/nodeSamplescan.h
@@ -4,7 +4,7 @@
   *
   *
   *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * src/include/executor/nodeSamplescan.h
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 541ee187356850c69da8e26fc2615cb968f1a62d..303fc3c1c77dca2f4c2abd51440c8614b1e88f61 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1257,13 +1257,22 @@ typedef struct ScanState
   */
  typedef ScanState SeqScanState;
  
-/*
- * SampleScan
+/* ----------------
+ *  SampleScanState information
+ * ----------------
   */
  typedef struct SampleScanState
  {
     ScanState   ss;
-   struct TableSampleDesc *tsdesc;
+   List       *args;           /* expr states for TABLESAMPLE params */
+   ExprState  *repeatable;     /* expr state for REPEATABLE expr */
+   /* use struct pointer to avoid including tsmapi.h here */
+   struct TsmRoutine *tsmroutine;      /* descriptor for tablesample method */
+   void       *tsm_state;      /* tablesample method can keep state here */
+   bool        use_bulkread;   /* use bulkread buffer access strategy? */
+   bool        use_pagemode;   /* use page-at-a-time visibility checking? */
+   bool        begun;          /* false means need to call BeginSampleScan */
+   uint32      seed;           /* random seed */
  } SampleScanState;
  
  /*
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h

index f8acda4eede0190e444d0d37e84cd7743884d4e7..748e434a27a21a47874b3ae50844ff39c8f54a24 100644 (file)
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -51,6 +51,7 @@ typedef enum NodeTag
     T_BitmapOr,
     T_Scan,
     T_SeqScan,
+   T_SampleScan,
     T_IndexScan,
     T_IndexOnlyScan,
     T_BitmapIndexScan,
@@ -61,7 +62,6 @@ typedef enum NodeTag
     T_ValuesScan,
     T_CteScan,
     T_WorkTableScan,
-   T_SampleScan,
     T_ForeignScan,
     T_CustomScan,
     T_Join,
@@ -400,6 +400,7 @@ typedef enum NodeTag
     T_WindowDef,
     T_RangeSubselect,
     T_RangeFunction,
+   T_RangeTableSample,
     T_TypeName,
     T_ColumnDef,
     T_IndexElem,
@@ -407,6 +408,7 @@ typedef enum NodeTag
     T_DefElem,
     T_RangeTblEntry,
     T_RangeTblFunction,
+   T_TableSampleClause,
     T_WithCheckOption,
     T_SortGroupClause,
     T_GroupingSet,
@@ -425,8 +427,6 @@ typedef enum NodeTag
     T_OnConflictClause,
     T_CommonTableExpr,
     T_RoleSpec,
-   T_RangeTableSample,
-   T_TableSampleClause,
  
     /*
      * TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h)
@@ -452,7 +452,8 @@ typedef enum NodeTag
     T_WindowObjectData,         /* private in nodeWindowAgg.c */
     T_TIDBitmap,                /* in nodes/tidbitmap.h */
     T_InlineCodeBlock,          /* in nodes/parsenodes.h */
-   T_FdwRoutine                /* in foreign/fdwapi.h */
+   T_FdwRoutine,               /* in foreign/fdwapi.h */
+   T_TsmRoutine                /* in access/tsmapi.h */
  } NodeTag;
  
  /*
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index b336ff9c6abaf0bc40bc34d6b3bd527d0e2c5393..151c93a078ea009aa8bd229a64581cb6cce79fc9 100644 (file)
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -337,26 +337,6 @@ typedef struct FuncCall
     int         location;       /* token location, or -1 if unknown */
  } FuncCall;
  
-/*
- * TableSampleClause - a sampling method information
- */
-typedef struct TableSampleClause
-{
-   NodeTag     type;
-   Oid         tsmid;
-   bool        tsmseqscan;
-   bool        tsmpagemode;
-   Oid         tsminit;
-   Oid         tsmnextblock;
-   Oid         tsmnexttuple;
-   Oid         tsmexaminetuple;
-   Oid         tsmend;
-   Oid         tsmreset;
-   Oid         tsmcost;
-   Node       *repeatable;
-   List       *args;
-} TableSampleClause;
-
  /*
   * A_Star - '*' representing all columns of a table or compound field
   *
@@ -558,19 +538,23 @@ typedef struct RangeFunction
  } RangeFunction;
  
  /*
- * RangeTableSample - represents <table> TABLESAMPLE <method> (<params>) REPEATABLE (<num>)
+ * RangeTableSample - TABLESAMPLE appearing in a raw FROM clause
   *
- * SQL Standard specifies only one parameter which is percentage. But we allow
- * custom tablesample methods which may need different input arguments so we
- * accept list of arguments.
+ * This node, appearing only in raw parse trees, represents
+ *     <relation> TABLESAMPLE <method> (<params>) REPEATABLE (<num>)
+ * Currently, the <relation> can only be a RangeVar, but we might in future
+ * allow RangeSubselect and other options.  Note that the RangeTableSample
+ * is wrapped around the node representing the <relation>, rather than being
+ * a subfield of it.
   */
  typedef struct RangeTableSample
  {
     NodeTag     type;
-   RangeVar   *relation;
-   char       *method;         /* sampling method */
-   Node       *repeatable;
-   List       *args;           /* arguments for sampling method */
+   Node       *relation;       /* relation to be sampled */
+   List       *method;         /* sampling method name (possibly qualified) */
+   List       *args;           /* argument(s) for sampling method */
+   Node       *repeatable;     /* REPEATABLE expression, or NULL if none */
+   int         location;       /* method name location, or -1 if unknown */
  } RangeTableSample;
  
  /*
@@ -810,7 +794,7 @@ typedef struct RangeTblEntry
      */
     Oid         relid;          /* OID of the relation */
     char        relkind;        /* relation kind (see pg_class.relkind) */
-   TableSampleClause *tablesample;     /* sampling method and parameters */
+   struct TableSampleClause *tablesample;      /* sampling info, or NULL */
  
     /*
      * Fields valid for a subquery RTE (else NULL):
@@ -912,6 +896,19 @@ typedef struct RangeTblFunction
     Bitmapset  *funcparams;     /* PARAM_EXEC Param IDs affecting this func */
  } RangeTblFunction;
  
+/*
+ * TableSampleClause - TABLESAMPLE appearing in a transformed FROM clause
+ *
+ * Unlike RangeTableSample, this is a subnode of the relevant RangeTblEntry.
+ */
+typedef struct TableSampleClause
+{
+   NodeTag     type;
+   Oid         tsmhandler;     /* OID of the tablesample handler function */
+   List       *args;           /* tablesample argument expression(s) */
+   Expr       *repeatable;     /* REPEATABLE expression, or NULL if none */
+} TableSampleClause;
+
  /*
   * WithCheckOption -
   *     representation of WITH CHECK OPTION checks to be applied to new tuples
@@ -2520,7 +2517,7 @@ typedef struct RenameStmt
  typedef struct AlterObjectSchemaStmt
  {
     NodeTag     type;
-   ObjectType objectType;      /* OBJECT_TABLE, OBJECT_TYPE, etc */
+   ObjectType  objectType;     /* OBJECT_TABLE, OBJECT_TYPE, etc */
     RangeVar   *relation;       /* in case it's a table */
     List       *object;         /* in case it's some other object */
     List       *objarg;         /* argument types, if applicable */
@@ -2535,7 +2532,7 @@ typedef struct AlterObjectSchemaStmt
  typedef struct AlterOwnerStmt
  {
     NodeTag     type;
-   ObjectType objectType;      /* OBJECT_TABLE, OBJECT_TYPE, etc */
+   ObjectType  objectType;     /* OBJECT_TABLE, OBJECT_TYPE, etc */
     RangeVar   *relation;       /* in case it's a table */
     List       *object;         /* in case it's some other object */
     List       *objarg;         /* argument types, if applicable */
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h

index 5f538f3e8ccb5a9298af8d2a836d3c08fc8d611d..0654d0266cd6e7d17a7fb75c2834b6b6e793fbf4 100644 (file)
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -287,7 +287,12 @@ typedef Scan SeqScan;
   *     table sample scan node
   * ----------------
   */
-typedef Scan SampleScan;
+typedef struct SampleScan
+{
+   Scan        scan;
+   /* use struct pointer to avoid including parsenodes.h here */
+   struct TableSampleClause *tablesample;
+} SampleScan;
  
  /* ----------------
   *     index scan node
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index 24003ae3591b9a2e1d74709a5f60485297f7b146..dd43e45d0c0a5b6c98f54c654f06b0c08ed7bff9 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -68,7 +68,8 @@ extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
                     double index_pages, PlannerInfo *root);
  extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
              ParamPathInfo *param_info);
-extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
+extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
+               ParamPathInfo *param_info);
  extern void cost_index(IndexPath *path, PlannerInfo *root,
            double loop_count);
  extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
diff --git a/src/include/parser/parse_func.h b/src/include/parser/parse_func.h

index 3194da463948a34e96478738e30368c37d1efedf..32646918e20c4b8101d0b5936144e3f8ff3d5776 100644 (file)
--- a/src/include/parser/parse_func.h
+++ b/src/include/parser/parse_func.h
@@ -33,11 +33,6 @@ typedef enum
  extern Node *ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
                   FuncCall *fn, int location);
  
-extern TableSampleClause *ParseTableSample(ParseState *pstate,
-                char *samplemethod,
-                Node *repeatable, List *args,
-                int location);
-
  extern FuncDetailCode func_get_detail(List *funcname,
                 List *fargs, List *fargnames,
                 int nargs, Oid *argtypes,
diff --git a/src/include/port.h b/src/include/port.h

index 71113c03944bd7f88991ef9953ae4ea15e86f443..3787cbfb7614cd318a3885bc59554128fb063502 100644 (file)
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -357,10 +357,6 @@ extern off_t ftello(FILE *stream);
  #endif
  #endif
  
-#define RAND48_SEED_0  (0x330e)
-#define RAND48_SEED_1  (0xabcd)
-#define RAND48_SEED_2  (0x1234)
-
  extern double pg_erand48(unsigned short xseed[3]);
  extern long pg_lrand48(void);
  extern void pg_srand48(long seed);
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h

index fcb0bf0ce8e94c376c683dcf7a8bb18d2b83b0a7..49caa56557420ed96a8adf2ee89d8adfb6157304 100644 (file)
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -566,6 +566,8 @@ extern Datum language_handler_in(PG_FUNCTION_ARGS);
  extern Datum language_handler_out(PG_FUNCTION_ARGS);
  extern Datum fdw_handler_in(PG_FUNCTION_ARGS);
  extern Datum fdw_handler_out(PG_FUNCTION_ARGS);
+extern Datum tsm_handler_in(PG_FUNCTION_ARGS);
+extern Datum tsm_handler_out(PG_FUNCTION_ARGS);
  extern Datum internal_in(PG_FUNCTION_ARGS);
  extern Datum internal_out(PG_FUNCTION_ARGS);
  extern Datum opaque_in(PG_FUNCTION_ARGS);
@@ -1213,6 +1215,12 @@ extern Datum ginqueryarrayextract(PG_FUNCTION_ARGS);
  extern Datum ginarrayconsistent(PG_FUNCTION_ARGS);
  extern Datum ginarraytriconsistent(PG_FUNCTION_ARGS);
  
+/* access/tablesample/bernoulli.c */
+extern Datum tsm_bernoulli_handler(PG_FUNCTION_ARGS);
+
+/* access/tablesample/system.c */
+extern Datum tsm_system_handler(PG_FUNCTION_ARGS);
+
  /* access/transam/twophase.c */
  extern Datum pg_prepared_xact(PG_FUNCTION_ARGS);
  
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h

index a40c9b12732da07f80c1c410a1651cd0db748188..971153843296d55612f201ace510af8ccbee8cdd 100644 (file)
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -156,7 +156,6 @@ extern void free_attstatsslot(Oid atttype,
  extern char *get_namespace_name(Oid nspid);
  extern char *get_namespace_name_or_temp(Oid nspid);
  extern Oid get_range_subtype(Oid rangeOid);
-extern char *get_tablesample_method_name(Oid tsmid);
  
  #define type_is_array(typid)  (get_element_type(typid) != InvalidOid)
  /* type_is_array_domain accepts both plain arrays and domains over arrays */
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h

index f06f03a996f260455d8d7c9a0cbae7c8badf4e22..18404e266eb63ec0384e7cf8c75d87a6683ef0ca 100644 (file)
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -81,8 +81,6 @@ enum SysCacheIdentifier
     REPLORIGNAME,
     RULERELNAME,
     STATRELATTINH,
-   TABLESAMPLEMETHODNAME,
-   TABLESAMPLEMETHODOID,
     TABLESPACEOID,
     TRFOID,
     TRFTYPELANG,
diff --git a/src/port/erand48.c b/src/port/erand48.c

index 12efd8193c4ed7b424961b44ba65de750debd1d6..9d471197c354056c8903a5a9e1c6b0023419f1d1 100644 (file)
--- a/src/port/erand48.c
+++ b/src/port/erand48.c
@@ -33,6 +33,9 @@
  
  #include <math.h>
  
+#define RAND48_SEED_0  (0x330e)
+#define RAND48_SEED_1  (0xabcd)
+#define RAND48_SEED_2  (0x1234)
  #define RAND48_MULT_0  (0xe66d)
  #define RAND48_MULT_1  (0xdeec)
  #define RAND48_MULT_2  (0x0005)
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out

index 414299a694114112410b8b62682cafcc448af2e8..e7c242cd22d480c0b566a9a8dc617f07fb03b6f6 100644 (file)
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -101,15 +101,17 @@ NOTICE:  f_leak => great manga
    44 |   8 |      1 | rls_regress_user2 | great manga           | manga
  (4 rows)
  
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
-NOTICE:  f_leak => my first novel
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+  WHERE f_leak(dtitle) ORDER BY did;
  NOTICE:  f_leak => my first manga
  NOTICE:  f_leak => great science fiction
+NOTICE:  f_leak => great manga
   did | cid | dlevel |      dauthor      |        dtitle         
  -----+-----+--------+-------------------+-----------------------
-   1 |  11 |      1 | rls_regress_user1 | my first novel
     4 |  44 |      1 | rls_regress_user1 | my first manga
     6 |  22 |      1 | rls_regress_user2 | great science fiction
+   8 |  44 |      1 | rls_regress_user2 | great manga
  (3 rows)
  
  -- viewpoint from rls_regress_user2
@@ -156,20 +158,20 @@ NOTICE:  f_leak => great manga
    44 |   8 |      1 | rls_regress_user2 | great manga           | manga
  (8 rows)
  
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
-NOTICE:  f_leak => my first novel
-NOTICE:  f_leak => my second novel
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+  WHERE f_leak(dtitle) ORDER BY did;
  NOTICE:  f_leak => my first manga
+NOTICE:  f_leak => my second manga
  NOTICE:  f_leak => great science fiction
-NOTICE:  f_leak => great technology book
+NOTICE:  f_leak => great manga
   did | cid | dlevel |      dauthor      |        dtitle         
  -----+-----+--------+-------------------+-----------------------
-   1 |  11 |      1 | rls_regress_user1 | my first novel
-   2 |  11 |      2 | rls_regress_user1 | my second novel
     4 |  44 |      1 | rls_regress_user1 | my first manga
+   5 |  44 |      2 | rls_regress_user1 | my second manga
     6 |  22 |      1 | rls_regress_user2 | great science fiction
-   7 |  33 |      2 | rls_regress_user2 | great technology book
-(5 rows)
+   8 |  44 |      1 | rls_regress_user2 | great manga
+(4 rows)
  
  EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
                          QUERY PLAN                        
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out

index cd5337531d4b41e90aa469c8f33c2a9f13ca8ddd..1e5b0b9a2c43a522d088417dfa249168b3e5eeab 100644 (file)
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2202,6 +2202,10 @@ street| SELECT r.name,
     FROM ONLY road r,
      real_city c
    WHERE (c.outline ## r.thepath);
+test_tablesample_v1| SELECT test_tablesample.id
+   FROM test_tablesample TABLESAMPLE system ((10 * 2)) REPEATABLE (2);
+test_tablesample_v2| SELECT test_tablesample.id
+   FROM test_tablesample TABLESAMPLE system (99);
  toyemp| SELECT emp.name,
      emp.age,
      emp.location,
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out

index 14acd16da3b3d00d2f1630ac3c7c941f0b4e986f..eb0bc88ef1fb27daee22dd4a2de684df35507417 100644 (file)
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -128,7 +128,6 @@ pg_shdepend|t
  pg_shdescription|t
  pg_shseclabel|t
  pg_statistic|t
-pg_tablesample_method|t
  pg_tablespace|t
  pg_transform|t
  pg_trigger|t
diff --git a/src/test/regress/expected/tablesample.out b/src/test/regress/expected/tablesample.out

index 04e5eb8b807e2d1c95adae29a57e40742fe0ccf0..727a83543973436293d6f6371374a25b54a66078 100644 (file)
--- a/src/test/regress/expected/tablesample.out
+++ b/src/test/regress/expected/tablesample.out
@@ -1,107 +1,123 @@
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
-SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10);
+-- use fillfactor so we don't have to load too much data to get multiple pages
+INSERT INTO test_tablesample
+  SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i);
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0);
   id 
  ----
-  0
-  1
-  2
    3
    4
    5
-  9
-(7 rows)
-
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
- id 
-----
    6
    7
    8
-(3 rows)
+(6 rows)
  
-SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
- count 
--------
-    10
-(1 row)
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0);
+ id 
+----
+(0 rows)
  
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
   id 
  ----
-  0
-  1
-  2
+  3
+  4
+  5
    6
    7
    8
-  9
-(7 rows)
+(6 rows)
  
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (0);
   id 
  ----
-  0
-  1
-  3
    4
    5
+  6
+  7
+  8
  (5 rows)
  
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (0);
   id 
  ----
-  0
-  5
-(2 rows)
+  7
+(1 row)
  
-CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
-CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
-SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
-                                 pg_get_viewdef                                 
---------------------------------------------------------------------------------
-  SELECT test_tablesample.id                                                   +
-    FROM test_tablesample TABLESAMPLE system (((10 * 2))::real) REPEATABLE (2);
+-- 100% should give repeatable count results (ie, all rows) in any case
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
+ count 
+-------
+    10
  (1 row)
  
-SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
-                      pg_get_viewdef                       
------------------------------------------------------------
-  SELECT test_tablesample.id                              +
-    FROM test_tablesample TABLESAMPLE system ((99)::real);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (1+2);
+ count 
+-------
+    10
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (0.4);
+ count 
+-------
+    10
  (1 row)
  
+CREATE VIEW test_tablesample_v1 AS
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
+CREATE VIEW test_tablesample_v2 AS
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
+\d+ test_tablesample_v1
+          View "public.test_tablesample_v1"
+ Column |  Type   | Modifiers | Storage | Description 
+--------+---------+-----------+---------+-------------
+ id     | integer |           | plain   | 
+View definition:
+ SELECT test_tablesample.id
+   FROM test_tablesample TABLESAMPLE system ((10 * 2)) REPEATABLE (2);
+
+\d+ test_tablesample_v2
+          View "public.test_tablesample_v2"
+ Column |  Type   | Modifiers | Storage | Description 
+--------+---------+-----------+---------+-------------
+ id     | integer |           | plain   | 
+View definition:
+ SELECT test_tablesample.id
+   FROM test_tablesample TABLESAMPLE system (99);
+
+-- check a sampled query doesn't affect cursor in progress
  BEGIN;
-DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+DECLARE tablesample_cur CURSOR FOR
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
  FETCH FIRST FROM tablesample_cur;
   id 
  ----
-  0
+  3
  (1 row)
  
  FETCH NEXT FROM tablesample_cur;
   id 
  ----
-  1
+  4
  (1 row)
  
  FETCH NEXT FROM tablesample_cur;
   id 
  ----
-  2
+  5
  (1 row)
  
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
   id 
  ----
-  0
-  1
-  2
    3
    4
    5
-  9
-(7 rows)
+  6
+  7
+  8
+(6 rows)
  
  FETCH NEXT FROM tablesample_cur;
   id 
@@ -124,19 +140,19 @@ FETCH NEXT FROM tablesample_cur;
  FETCH FIRST FROM tablesample_cur;
   id 
  ----
-  0
+  3
  (1 row)
  
  FETCH NEXT FROM tablesample_cur;
   id 
  ----
-  1
+  4
  (1 row)
  
  FETCH NEXT FROM tablesample_cur;
   id 
  ----
-  2
+  5
  (1 row)
  
  FETCH NEXT FROM tablesample_cur;
@@ -159,41 +175,129 @@ FETCH NEXT FROM tablesample_cur;
  
  CLOSE tablesample_cur;
  END;
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
-                                  QUERY PLAN                                   
--------------------------------------------------------------------------------
- Sample Scan (system) on test_tablesample  (cost=0.00..26.35 rows=635 width=4)
+EXPLAIN (COSTS OFF)
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2);
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Sample Scan on test_tablesample
+   Sampling: system ('50'::real) REPEATABLE ('2'::double precision)
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM test_tablesample_v1;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Sample Scan on test_tablesample
+   Sampling: system ('20'::real) REPEATABLE ('2'::double precision)
+(2 rows)
+
+-- check inheritance behavior
+explain (costs off)
+  select count(*) from person tablesample bernoulli (100);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Aggregate
+   ->  Append
+         ->  Sample Scan on person
+               Sampling: bernoulli ('100'::real)
+         ->  Sample Scan on emp
+               Sampling: bernoulli ('100'::real)
+         ->  Sample Scan on student
+               Sampling: bernoulli ('100'::real)
+         ->  Sample Scan on stud_emp
+               Sampling: bernoulli ('100'::real)
+(10 rows)
+
+select count(*) from person tablesample bernoulli (100);
+ count 
+-------
+    58
  (1 row)
  
-EXPLAIN SELECT * FROM test_tablesample_v1;
-                                  QUERY PLAN                                   
--------------------------------------------------------------------------------
- Sample Scan (system) on test_tablesample  (cost=0.00..10.54 rows=254 width=4)
+select count(*) from person;
+ count 
+-------
+    58
+(1 row)
+
+-- check that collations get assigned within the tablesample arguments
+SELECT count(*) FROM test_tablesample TABLESAMPLE bernoulli (('1'::text < '0'::text)::int);
+ count 
+-------
+     0
+(1 row)
+
+-- check behavior during rescans, as well as correct handling of min/max pct
+select * from
+  (values (0),(100)) v(pct),
+  lateral (select count(*) from tenk1 tablesample bernoulli (pct)) ss;
+ pct | count 
+-----+-------
+   0 |     0
+ 100 | 10000
+(2 rows)
+
+select * from
+  (values (0),(100)) v(pct),
+  lateral (select count(*) from tenk1 tablesample system (pct)) ss;
+ pct | count 
+-----+-------
+   0 |     0
+ 100 | 10000
+(2 rows)
+
+explain (costs off)
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+  group by pct;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ HashAggregate
+   Group Key: "*VALUES*".column1
+   ->  Nested Loop
+         ->  Values Scan on "*VALUES*"
+         ->  Sample Scan on tenk1
+               Sampling: bernoulli ("*VALUES*".column1)
+(6 rows)
+
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+  group by pct;
+ pct | count 
+-----+-------
+ 100 | 10000
+(1 row)
+
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample system (pct)) ss
+  group by pct;
+ pct | count 
+-----+-------
+ 100 | 10000
  (1 row)
  
  -- errors
  SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
-ERROR:  tablesample method "foobar" does not exist
+ERROR:  tablesample method foobar does not exist
  LINE 1: SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
-                       ^
+                                                    ^
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (NULL);
+ERROR:  TABLESAMPLE parameter cannot be null
  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
-ERROR:  REPEATABLE clause must be NOT NULL numeric value
-LINE 1: ... test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
-                                                                 ^
+ERROR:  TABLESAMPLE REPEATABLE parameter cannot be null
  SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
-ERROR:  invalid sample size
-HINT:  Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR:  sample percentage must be between 0 and 100
  SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (200);
-ERROR:  invalid sample size
-HINT:  Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR:  sample percentage must be between 0 and 100
  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (-1);
-ERROR:  invalid sample size
-HINT:  Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR:  sample percentage must be between 0 and 100
  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (200);
-ERROR:  invalid sample size
-HINT:  Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR:  sample percentage must be between 0 and 100
  SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1);
-ERROR:  TABLESAMPLE clause can only be used on tables and materialized views
+ERROR:  TABLESAMPLE clause can only be applied to tables and materialized views
  LINE 1: SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1)...
                         ^
  INSERT INTO test_tablesample_v1 VALUES(1);
@@ -202,30 +306,10 @@ DETAIL:  Views containing TABLESAMPLE are not automatically updatable.
  HINT:  To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule.
  WITH query_select AS (SELECT * FROM test_tablesample)
  SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
-ERROR:  TABLESAMPLE clause can only be used on tables and materialized views
+ERROR:  TABLESAMPLE clause can only be applied to tables and materialized views
  LINE 2: SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEA...
                        ^
  SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
  ERROR:  syntax error at or near "TABLESAMPLE"
  LINE 1: ...CT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPL...
                                                               ^
--- catalog sanity
-SELECT *
-FROM pg_tablesample_method
-WHERE tsminit IS NULL
-   OR tsmseqscan IS NULL
-   OR tsmpagemode IS NULL
-   OR tsmnextblock IS NULL
-   OR tsmnexttuple IS NULL
-   OR tsmend IS NULL
-   OR tsmreset IS NULL
-   OR tsmcost IS NULL;
- tsmname | tsmseqscan | tsmpagemode | tsminit | tsmnextblock | tsmnexttuple | tsmexaminetuple | tsmend | tsmreset | tsmcost 
----------+------------+-------------+---------+--------------+--------------+-----------------+--------+----------+---------
-(0 rows)
-
--- done
-DROP TABLE test_tablesample CASCADE;
-NOTICE:  drop cascades to 2 other objects
-DETAIL:  drop cascades to view test_tablesample_v1
-drop cascades to view test_tablesample_v2
diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source

index 70c9cc356a642075b3df47ae897b70673e13e471..9eedb363d06be9602d35d90ff68bb84d7b923c7e 100644 (file)
--- a/src/test/regress/output/misc.source
+++ b/src/test/regress/output/misc.source
@@ -686,6 +686,9 @@ SELECT user_relns() AS user_relns
   test_range_excl
   test_range_gist
   test_range_spgist
+ test_tablesample
+ test_tablesample_v1
+ test_tablesample_v2
   test_tsvector
   testjsonb
   text_tbl
@@ -705,7 +708,7 @@ SELECT user_relns() AS user_relns
   tvvmv
   varchar_tbl
   xacttest
-(127 rows)
+(130 rows)
  
  SELECT name(equipment(hobby_construct(text 'skywalking', text 'mer')));
   name 
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule

index 3a607cff46c235ff8b15fdbd095c2098ca3ac217..15d74d4e6eba90abc4476ae85412c0e4dc0b5081 100644 (file)
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -110,6 +110,7 @@ test: lock
  test: replica_identity
  test: rowsecurity
  test: object_address
+test: tablesample
  test: alter_generic
  test: alter_operator
  test: misc
@@ -156,4 +157,3 @@ test: with
  test: xml
  test: event_trigger
  test: stats
-test: tablesample
diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql

index 039070b85b73370be2acdb27dfb115770a96427a..e86f8143142cbbee3f7d154874e9fd82bcff702a 100644 (file)
--- a/src/test/regress/sql/rowsecurity.sql
+++ b/src/test/regress/sql/rowsecurity.sql
@@ -94,14 +94,18 @@ SET row_security TO ON;
  SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
  SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
  
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+  WHERE f_leak(dtitle) ORDER BY did;
  
  -- viewpoint from rls_regress_user2
  SET SESSION AUTHORIZATION rls_regress_user2;
  SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
  SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
  
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+  WHERE f_leak(dtitle) ORDER BY did;
  
  EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
  EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
diff --git a/src/test/regress/sql/tablesample.sql b/src/test/regress/sql/tablesample.sql

index 7b3eb9bedf7bb3a82ccd127a65bb1382e10a415b..eec97934966966229800a47563153669b5ea353a 100644 (file)
--- a/src/test/regress/sql/tablesample.sql
+++ b/src/test/regress/sql/tablesample.sql
@@ -1,26 +1,37 @@
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10);
+-- use fillfactor so we don't have to load too much data to get multiple pages
  
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
+INSERT INTO test_tablesample
+  SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i);
  
-SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (0);
+
+-- 100% should give repeatable count results (ie, all rows) in any case
  SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (1+2);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (0.4);
  
-CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
-CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
-SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
-SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
+CREATE VIEW test_tablesample_v1 AS
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
+CREATE VIEW test_tablesample_v2 AS
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
+\d+ test_tablesample_v1
+\d+ test_tablesample_v2
  
+-- check a sampled query doesn't affect cursor in progress
  BEGIN;
-DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+DECLARE tablesample_cur CURSOR FOR
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+
  FETCH FIRST FROM tablesample_cur;
  FETCH NEXT FROM tablesample_cur;
  FETCH NEXT FROM tablesample_cur;
  
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
  
  FETCH NEXT FROM tablesample_cur;
  FETCH NEXT FROM tablesample_cur;
@@ -36,12 +47,45 @@ FETCH NEXT FROM tablesample_cur;
  CLOSE tablesample_cur;
  END;
  
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
-EXPLAIN SELECT * FROM test_tablesample_v1;
+EXPLAIN (COSTS OFF)
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2);
+EXPLAIN (COSTS OFF)
+  SELECT * FROM test_tablesample_v1;
+
+-- check inheritance behavior
+explain (costs off)
+  select count(*) from person tablesample bernoulli (100);
+select count(*) from person tablesample bernoulli (100);
+select count(*) from person;
+
+-- check that collations get assigned within the tablesample arguments
+SELECT count(*) FROM test_tablesample TABLESAMPLE bernoulli (('1'::text < '0'::text)::int);
+
+-- check behavior during rescans, as well as correct handling of min/max pct
+select * from
+  (values (0),(100)) v(pct),
+  lateral (select count(*) from tenk1 tablesample bernoulli (pct)) ss;
+select * from
+  (values (0),(100)) v(pct),
+  lateral (select count(*) from tenk1 tablesample system (pct)) ss;
+explain (costs off)
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+  group by pct;
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+  group by pct;
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample system (pct)) ss
+  group by pct;
  
  -- errors
  SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
  
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (NULL);
  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
  
  SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
@@ -56,19 +100,3 @@ WITH query_select AS (SELECT * FROM test_tablesample)
  SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
  
  SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
-
--- catalog sanity
-
-SELECT *
-FROM pg_tablesample_method
-WHERE tsminit IS NULL
-   OR tsmseqscan IS NULL
-   OR tsmpagemode IS NULL
-   OR tsmnextblock IS NULL
-   OR tsmnexttuple IS NULL
-   OR tsmend IS NULL
-   OR tsmreset IS NULL
-   OR tsmcost IS NULL;
-
--- done
-DROP TABLE test_tablesample CASCADE;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 25 Jul 2015 18:39:00 +0000 (14:39 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 25 Jul 2015 18:39:00 +0000 (14:39 -0400)
contrib/pg_stat_statements/pg_stat_statements.c		patch \| blob \| blame \| history
contrib/tsm_system_rows/Makefile		patch \| blob \| blame \| history
contrib/tsm_system_rows/expected/tsm_system_rows.out		patch \| blob \| blame \| history
contrib/tsm_system_rows/sql/tsm_system_rows.sql		patch \| blob \| blame \| history
contrib/tsm_system_rows/tsm_system_rows--1.0.sql		patch \| blob \| blame \| history
contrib/tsm_system_rows/tsm_system_rows.c		patch \| blob \| blame \| history
contrib/tsm_system_rows/tsm_system_rows.control		patch \| blob \| blame \| history
contrib/tsm_system_time/Makefile		patch \| blob \| blame \| history
contrib/tsm_system_time/expected/tsm_system_time.out		patch \| blob \| blame \| history
contrib/tsm_system_time/sql/tsm_system_time.sql		patch \| blob \| blame \| history
contrib/tsm_system_time/tsm_system_time--1.0.sql		patch \| blob \| blame \| history
contrib/tsm_system_time/tsm_system_time.c		patch \| blob \| blame \| history
contrib/tsm_system_time/tsm_system_time.control		patch \| blob \| blame \| history
doc/src/sgml/catalogs.sgml		patch \| blob \| blame \| history
doc/src/sgml/datatype.sgml		patch \| blob \| blame \| history
doc/src/sgml/postgres.sgml		patch \| blob \| blame \| history
doc/src/sgml/ref/select.sgml		patch \| blob \| blame \| history
doc/src/sgml/tablesample-method.sgml		patch \| blob \| blame \| history
doc/src/sgml/tsm-system-rows.sgml		patch \| blob \| blame \| history
doc/src/sgml/tsm-system-time.sgml		patch \| blob \| blame \| history
src/backend/access/heap/heapam.c		patch \| blob \| blame \| history
src/backend/access/tablesample/Makefile		patch \| blob \| blame \| history
src/backend/access/tablesample/bernoulli.c		patch \| blob \| blame \| history
src/backend/access/tablesample/system.c		patch \| blob \| blame \| history
src/backend/access/tablesample/tablesample.c		patch \| blob \| blame \| history
src/backend/catalog/Makefile		patch \| blob \| blame \| history
src/backend/catalog/dependency.c		patch \| blob \| blame \| history
src/backend/commands/explain.c		patch \| blob \| blame \| history
src/backend/executor/execAmi.c		patch \| blob \| blame \| history
src/backend/executor/nodeSamplescan.c		patch \| blob \| blame \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/nodeFuncs.c		patch \| blob \| blame \| history
src/backend/nodes/outfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/readfuncs.c		patch \| blob \| blame \| history
src/backend/optimizer/path/allpaths.c		patch \| blob \| blame \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/initsplan.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/setrefs.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/subselect.c		patch \| blob \| blame \| history
src/backend/optimizer/prep/prepjointree.c		patch \| blob \| blame \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| blame \| history
src/backend/parser/gram.y		patch \| blob \| blame \| history
src/backend/parser/parse_clause.c		patch \| blob \| blame \| history
src/backend/parser/parse_func.c		patch \| blob \| blame \| history
src/backend/rewrite/rewriteHandler.c		patch \| blob \| blame \| history
src/backend/utils/adt/pseudotypes.c		patch \| blob \| blame \| history
src/backend/utils/adt/ruleutils.c		patch \| blob \| blame \| history
src/backend/utils/cache/lsyscache.c		patch \| blob \| blame \| history
src/backend/utils/cache/syscache.c		patch \| blob \| blame \| history
src/backend/utils/errcodes.txt		patch \| blob \| blame \| history
src/backend/utils/misc/sampling.c		patch \| blob \| blame \| history
src/bin/psql/tab-complete.c		patch \| blob \| blame \| history
src/include/access/heapam.h		patch \| blob \| blame \| history
src/include/access/tablesample.h	[deleted file]	patch \| blob \| blame \| history
src/include/access/tsmapi.h	[new file with mode: 0644]	patch \| blob
src/include/catalog/catversion.h		patch \| blob \| blame \| history
src/include/catalog/indexing.h		patch \| blob \| blame \| history
src/include/catalog/pg_proc.h		patch \| blob \| blame \| history
src/include/catalog/pg_tablesample_method.h	[deleted file]	patch \| blob \| blame \| history
src/include/catalog/pg_type.h		patch \| blob \| blame \| history
src/include/executor/nodeSamplescan.h		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history
src/include/nodes/nodes.h		patch \| blob \| blame \| history
src/include/nodes/parsenodes.h		patch \| blob \| blame \| history
src/include/nodes/plannodes.h		patch \| blob \| blame \| history
src/include/optimizer/cost.h		patch \| blob \| blame \| history
src/include/parser/parse_func.h		patch \| blob \| blame \| history
src/include/port.h		patch \| blob \| blame \| history
src/include/utils/builtins.h		patch \| blob \| blame \| history
src/include/utils/lsyscache.h		patch \| blob \| blame \| history
src/include/utils/syscache.h		patch \| blob \| blame \| history
src/port/erand48.c		patch \| blob \| blame \| history
src/test/regress/expected/rowsecurity.out		patch \| blob \| blame \| history
src/test/regress/expected/rules.out		patch \| blob \| blame \| history
src/test/regress/expected/sanity_check.out		patch \| blob \| blame \| history
src/test/regress/expected/tablesample.out		patch \| blob \| blame \| history
src/test/regress/output/misc.source		patch \| blob \| blame \| history
src/test/regress/serial_schedule		patch \| blob \| blame \| history
src/test/regress/sql/rowsecurity.sql		patch \| blob \| blame \| history
src/test/regress/sql/tablesample.sql		patch \| blob \| blame \| history