{
case RTE_RELATION:
APP_JUMB(rte->relid);
+ JumbleExpr(jstate, (Node *) rte->tablesample);
break;
case RTE_SUBQUERY:
JumbleQuery(jstate, rte->subquery);
JumbleExpr(jstate, rtfunc->funcexpr);
}
break;
+ case T_TableSampleClause:
+ {
+ TableSampleClause *tsc = (TableSampleClause *) node;
+
+ APP_JUMB(tsc->tsmhandler);
+ JumbleExpr(jstate, (Node *) tsc->args);
+ JumbleExpr(jstate, (Node *) tsc->repeatable);
+ }
+ break;
default:
/* Only a warning, since we can stumble along anyway */
elog(WARNING, "unrecognized node type: %d",
-# src/test/modules/tsm_system_rows/Makefile
+# contrib/tsm_system_rows/Makefile
MODULE_big = tsm_system_rows
OBJS = tsm_system_rows.o $(WIN32RES)
-PGFILEDESC = "tsm_system_rows - SYSTEM TABLESAMPLE method which accepts number of rows as a limit"
+PGFILEDESC = "tsm_system_rows - TABLESAMPLE method which accepts number of rows as a limit"
EXTENSION = tsm_system_rows
DATA = tsm_system_rows--1.0.sql
CREATE EXTENSION tsm_system_rows;
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+ FROM generate_series(0, 30) s(i);
ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1000);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (0);
+ count
+-------
+ 0
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1);
+ count
+-------
+ 1
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (10);
+ count
+-------
+ 10
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (100);
count
-------
31
(1 row)
-SELECT id FROM test_tablesample TABLESAMPLE system_rows (8) REPEATABLE (5432);
- id
-----
- 7
- 14
- 21
- 28
- 4
- 11
- 18
- 25
-(8 rows)
-
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_rows (20) REPEATABLE (10);
- QUERY PLAN
------------------------------------------------------------------------------------
- Sample Scan (system_rows) on test_tablesample (cost=0.00..80.20 rows=20 width=4)
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+ QUERY PLAN
+----------------------------------------
+ Sample Scan on test_tablesample
+ Sampling: system_rows ('-1'::bigint)
+(2 rows)
+
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+ERROR: sample size must not be negative
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) REPEATABLE (0);
+ERROR: tablesample method system_rows does not support REPEATABLE
+LINE 1: SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) ...
+ ^
+-- but a join should be allowed:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (VALUES (0),(10),(100)) v(nrows),
+ LATERAL (SELECT count(*) FROM test_tablesample
+ TABLESAMPLE system_rows (nrows)) ss;
+ QUERY PLAN
+----------------------------------------------------------
+ Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Aggregate
+ -> Sample Scan on test_tablesample
+ Sampling: system_rows ("*VALUES*".column1)
+(5 rows)
+
+SELECT * FROM
+ (VALUES (0),(10),(100)) v(nrows),
+ LATERAL (SELECT count(*) FROM test_tablesample
+ TABLESAMPLE system_rows (nrows)) ss;
+ nrows | count
+-------+-------
+ 0 | 0
+ 10 | 10
+ 100 | 31
+(3 rows)
+
+CREATE VIEW vv AS
+ SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (20);
+SELECT * FROM vv;
+ count
+-------
+ 20
(1 row)
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_rows; -- fail, view depends on extension
+ERROR: cannot drop extension tsm_system_rows because other objects depend on it
+DETAIL: view vv depends on function system_rows(internal)
+HINT: Use DROP ... CASCADE to drop the dependent objects too.
CREATE EXTENSION tsm_system_rows;
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+ FROM generate_series(0, 30) s(i);
ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1000);
-SELECT id FROM test_tablesample TABLESAMPLE system_rows (8) REPEATABLE (5432);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (0);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (10);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (100);
+
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) REPEATABLE (0);
+
+-- but a join should be allowed:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (VALUES (0),(10),(100)) v(nrows),
+ LATERAL (SELECT count(*) FROM test_tablesample
+ TABLESAMPLE system_rows (nrows)) ss;
+
+SELECT * FROM
+ (VALUES (0),(10),(100)) v(nrows),
+ LATERAL (SELECT count(*) FROM test_tablesample
+ TABLESAMPLE system_rows (nrows)) ss;
+
+CREATE VIEW vv AS
+ SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (20);
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_rows (20) REPEATABLE (10);
+SELECT * FROM vv;
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_rows; -- fail, view depends on extension
-/* src/test/modules/tablesample/tsm_system_rows--1.0.sql */
+/* contrib/tsm_system_rows/tsm_system_rows--1.0.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION tsm_system_rows" to load this file. \quit
-CREATE FUNCTION tsm_system_rows_init(internal, int4, int4)
-RETURNS void
-AS 'MODULE_PATHNAME'
+CREATE FUNCTION system_rows(internal)
+RETURNS tsm_handler
+AS 'MODULE_PATHNAME', 'tsm_system_rows_handler'
LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_nextblock(internal)
-RETURNS int4
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_nexttuple(internal, int4, int2)
-RETURNS int2
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_examinetuple(internal, int4, internal, bool)
-RETURNS bool
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_end(internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_reset(internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_cost(internal, internal, internal, internal, internal, internal, internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-INSERT INTO pg_tablesample_method VALUES('system_rows', false, true,
- 'tsm_system_rows_init', 'tsm_system_rows_nextblock',
- 'tsm_system_rows_nexttuple', 'tsm_system_rows_examinetuple',
- 'tsm_system_rows_end', 'tsm_system_rows_reset', 'tsm_system_rows_cost');
/*-------------------------------------------------------------------------
*
* tsm_system_rows.c
- * interface routines for system_rows tablesample method
+ * support routines for SYSTEM_ROWS tablesample method
*
+ * The desire here is to produce a random sample with a given number of rows
+ * (or the whole relation, if that is fewer rows). We use a block-sampling
+ * approach. To ensure that the whole relation will be visited if necessary,
+ * we start at a randomly chosen block and then advance with a stride that
+ * is randomly chosen but is relatively prime to the relation's nblocks.
*
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Because of the dependence on nblocks, this method cannot be repeatable
+ * across queries. (Even if the user hasn't explicitly changed the relation,
+ * maintenance activities such as autovacuum might change nblocks.) However,
+ * we can at least make it repeatable across scans, by determining the
+ * sampling pattern only once on the first scan. This means that rescans
+ * won't visit blocks added after the first scan, but that is fine since
+ * such blocks shouldn't contain any visible tuples anyway.
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * contrib/tsm_system_rows_rowlimit/tsm_system_rows.c
+ * contrib/tsm_system_rows/tsm_system_rows.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
-#include "fmgr.h"
-
-#include "access/tablesample.h"
#include "access/relscan.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
#include "miscadmin.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
#include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
+#include "optimizer/cost.h"
#include "utils/sampling.h"
PG_MODULE_MAGIC;
-/*
- * State
- */
+PG_FUNCTION_INFO_V1(tsm_system_rows_handler);
+
+
+/* Private state */
typedef struct
{
- SamplerRandomState randstate;
uint32 seed; /* random seed */
- BlockNumber nblocks; /* number of block in relation */
- int32 ntuples; /* number of tuples to return */
- int32 donetuples; /* tuples already returned */
+ int64 ntuples; /* number of tuples to return */
+ int64 donetuples; /* number of tuples already returned */
OffsetNumber lt; /* last tuple returned from current block */
- BlockNumber step; /* step size */
+ BlockNumber doneblocks; /* number of already-scanned blocks */
BlockNumber lb; /* last block visited */
- BlockNumber doneblocks; /* number of already returned blocks */
-} SystemSamplerData;
-
-
-PG_FUNCTION_INFO_V1(tsm_system_rows_init);
-PG_FUNCTION_INFO_V1(tsm_system_rows_nextblock);
-PG_FUNCTION_INFO_V1(tsm_system_rows_nexttuple);
-PG_FUNCTION_INFO_V1(tsm_system_rows_examinetuple);
-PG_FUNCTION_INFO_V1(tsm_system_rows_end);
-PG_FUNCTION_INFO_V1(tsm_system_rows_reset);
-PG_FUNCTION_INFO_V1(tsm_system_rows_cost);
-
+ /* these three values are not changed during a rescan: */
+ BlockNumber nblocks; /* number of blocks in relation */
+ BlockNumber firstblock; /* first block to sample from */
+ BlockNumber step; /* step size, or 0 if not set yet */
+} SystemRowsSamplerData;
+
+static void system_rows_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples);
+static void system_rows_initsamplescan(SampleScanState *node,
+ int eflags);
+static void system_rows_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed);
+static BlockNumber system_rows_nextsampleblock(SampleScanState *node);
+static OffsetNumber system_rows_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset);
+static bool SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan);
static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate);
+
/*
- * Initializes the state.
+ * Create a TsmRoutine descriptor for the SYSTEM_ROWS method.
*/
Datum
-tsm_system_rows_init(PG_FUNCTION_ARGS)
+tsm_system_rows_handler(PG_FUNCTION_ARGS)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- uint32 seed = PG_GETARG_UINT32(1);
- int32 ntuples = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2);
- HeapScanDesc scan = tsdesc->heapScan;
- SystemSamplerData *sampler;
+ TsmRoutine *tsm = makeNode(TsmRoutine);
- if (ntuples < 1)
- ereport(ERROR,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("invalid sample size"),
- errhint("Sample size must be positive integer value.")));
+ tsm->parameterTypes = list_make1_oid(INT8OID);
- sampler = palloc0(sizeof(SystemSamplerData));
+ /* See notes at head of file */
+ tsm->repeatable_across_queries = false;
+ tsm->repeatable_across_scans = true;
- /* Remember initial values for reinit */
- sampler->seed = seed;
- sampler->nblocks = scan->rs_nblocks;
- sampler->ntuples = ntuples;
- sampler->donetuples = 0;
- sampler->lt = InvalidOffsetNumber;
- sampler->doneblocks = 0;
-
- sampler_random_init_state(sampler->seed, sampler->randstate);
-
- /* Find relative prime as step size for linear probing. */
- sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-
- /*
- * Randomize start position so that blocks close to step size don't have
- * higher probability of being chosen on very short scan.
- */
- sampler->lb = sampler_random_fract(sampler->randstate) *
- (sampler->nblocks / sampler->step);
+ tsm->SampleScanGetSampleSize = system_rows_samplescangetsamplesize;
+ tsm->InitSampleScan = system_rows_initsamplescan;
+ tsm->BeginSampleScan = system_rows_beginsamplescan;
+ tsm->NextSampleBlock = system_rows_nextsampleblock;
+ tsm->NextSampleTuple = system_rows_nextsampletuple;
+ tsm->EndSampleScan = NULL;
- tsdesc->tsmdata = (void *) sampler;
-
- PG_RETURN_VOID();
+ PG_RETURN_POINTER(tsm);
}
/*
- * Get next block number or InvalidBlockNumber when we're done.
- *
- * Uses linear probing algorithm for picking next block.
+ * Sample size estimation.
*/
-Datum
-tsm_system_rows_nextblock(PG_FUNCTION_ARGS)
+static void
+system_rows_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+ Node *limitnode;
+ int64 ntuples;
+ double npages;
- sampler->lb = (sampler->lb + sampler->step) % sampler->nblocks;
- sampler->doneblocks++;
+ /* Try to extract an estimate for the limit rowcount */
+ limitnode = (Node *) linitial(paramexprs);
+ limitnode = estimate_expression_value(root, limitnode);
- /* All blocks have been read, we're done */
- if (sampler->doneblocks > sampler->nblocks ||
- sampler->donetuples >= sampler->ntuples)
- PG_RETURN_UINT32(InvalidBlockNumber);
+ if (IsA(limitnode, Const) &&
+ !((Const *) limitnode)->constisnull)
+ {
+ ntuples = DatumGetInt64(((Const *) limitnode)->constvalue);
+ if (ntuples < 0)
+ {
+ /* Default ntuples if the value is bogus */
+ ntuples = 1000;
+ }
+ }
+ else
+ {
+ /* Default ntuples if we didn't obtain a non-null Const */
+ ntuples = 1000;
+ }
- PG_RETURN_UINT32(sampler->lb);
-}
+ /* Clamp to the estimated relation size */
+ if (ntuples > baserel->tuples)
+ ntuples = (int64) baserel->tuples;
+ ntuples = clamp_row_est(ntuples);
-/*
- * Get next tuple offset in current block or InvalidOffsetNumber if we are done
- * with this block.
- */
-Datum
-tsm_system_rows_nexttuple(PG_FUNCTION_ARGS)
-{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- OffsetNumber maxoffset = PG_GETARG_UINT16(2);
- SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
- OffsetNumber tupoffset = sampler->lt;
+ if (baserel->tuples > 0 && baserel->pages > 0)
+ {
+ /* Estimate number of pages visited based on tuple density */
+ double density = baserel->tuples / (double) baserel->pages;
- if (tupoffset == InvalidOffsetNumber)
- tupoffset = FirstOffsetNumber;
+ npages = ntuples / density;
+ }
else
- tupoffset++;
-
- if (tupoffset > maxoffset ||
- sampler->donetuples >= sampler->ntuples)
- tupoffset = InvalidOffsetNumber;
+ {
+ /* For lack of data, assume one tuple per page */
+ npages = ntuples;
+ }
- sampler->lt = tupoffset;
+ /* Clamp to sane value */
+ npages = clamp_row_est(Min((double) baserel->pages, npages));
- PG_RETURN_UINT16(tupoffset);
+ *pages = npages;
+ *tuples = ntuples;
}
/*
- * Examine tuple and decide if it should be returned.
+ * Initialize during executor setup.
*/
-Datum
-tsm_system_rows_examinetuple(PG_FUNCTION_ARGS)
+static void
+system_rows_initsamplescan(SampleScanState *node, int eflags)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- bool visible = PG_GETARG_BOOL(3);
- SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-
- if (!visible)
- PG_RETURN_BOOL(false);
-
- sampler->donetuples++;
-
- PG_RETURN_BOOL(true);
+ node->tsm_state = palloc0(sizeof(SystemRowsSamplerData));
+ /* Note the above leaves tsm_state->step equal to zero */
}
/*
- * Cleanup method.
+ * Examine parameters and prepare for a sample scan.
*/
-Datum
-tsm_system_rows_end(PG_FUNCTION_ARGS)
+static void
+system_rows_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
+ int64 ntuples = DatumGetInt64(params[0]);
+
+ if (ntuples < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+ errmsg("sample size must not be negative")));
- pfree(tsdesc->tsmdata);
+ sampler->seed = seed;
+ sampler->ntuples = ntuples;
+ sampler->donetuples = 0;
+ sampler->lt = InvalidOffsetNumber;
+ sampler->doneblocks = 0;
+ /* lb will be initialized during first NextSampleBlock call */
+ /* we intentionally do not change nblocks/firstblock/step here */
- PG_RETURN_VOID();
+ /*
+ * We *must* use pagemode visibility checking in this module, so force
+ * that even though it's currently default.
+ */
+ node->use_pagemode = true;
}
/*
- * Reset state (called by ReScan).
+ * Select next block to sample.
+ *
+ * Uses linear probing algorithm for picking next block.
*/
-Datum
-tsm_system_rows_reset(PG_FUNCTION_ARGS)
+static BlockNumber
+system_rows_nextsampleblock(SampleScanState *node)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+ SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
+ HeapScanDesc scan = node->ss.ss_currentScanDesc;
- sampler->lt = InvalidOffsetNumber;
- sampler->donetuples = 0;
- sampler->doneblocks = 0;
+ /* First call within scan? */
+ if (sampler->doneblocks == 0)
+ {
+ /* First scan within query? */
+ if (sampler->step == 0)
+ {
+ /* Initialize now that we have scan descriptor */
+ SamplerRandomState randstate;
+
+ /* If relation is empty, there's nothing to scan */
+ if (scan->rs_nblocks == 0)
+ return InvalidBlockNumber;
+
+ /* We only need an RNG during this setup step */
+ sampler_random_init_state(sampler->seed, randstate);
+
+ /* Compute nblocks/firstblock/step only once per query */
+ sampler->nblocks = scan->rs_nblocks;
- sampler_random_init_state(sampler->seed, sampler->randstate);
- sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
- sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
+ /* Choose random starting block within the relation */
+ /* (Actually this is the predecessor of the first block visited) */
+ sampler->firstblock = sampler_random_fract(randstate) *
+ sampler->nblocks;
+
+ /* Find relative prime as step size for linear probing */
+ sampler->step = random_relative_prime(sampler->nblocks, randstate);
+ }
+
+ /* Reinitialize lb */
+ sampler->lb = sampler->firstblock;
+ }
+
+ /* If we've read all blocks or returned all needed tuples, we're done */
+ if (++sampler->doneblocks > sampler->nblocks ||
+ sampler->donetuples >= sampler->ntuples)
+ return InvalidBlockNumber;
+
+ /*
+ * It's probably impossible for scan->rs_nblocks to decrease between scans
+ * within a query; but just in case, loop until we select a block number
+ * less than scan->rs_nblocks. We don't care if scan->rs_nblocks has
+ * increased since the first scan.
+ */
+ do
+ {
+ /* Advance lb, using uint64 arithmetic to forestall overflow */
+ sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
+ } while (sampler->lb >= scan->rs_nblocks);
- PG_RETURN_VOID();
+ return sampler->lb;
}
/*
- * Costing function.
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
*/
-Datum
-tsm_system_rows_cost(PG_FUNCTION_ARGS)
+static OffsetNumber
+system_rows_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- Path *path = (Path *) PG_GETARG_POINTER(1);
- RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
- List *args = (List *) PG_GETARG_POINTER(3);
- BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
- double *tuples = (double *) PG_GETARG_POINTER(5);
- Node *limitnode;
- int32 ntuples;
+ SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
+ HeapScanDesc scan = node->ss.ss_currentScanDesc;
+ OffsetNumber tupoffset = sampler->lt;
- limitnode = linitial(args);
- limitnode = estimate_expression_value(root, limitnode);
+ /* Quit if we've returned all needed tuples */
+ if (sampler->donetuples >= sampler->ntuples)
+ return InvalidOffsetNumber;
- if (IsA(limitnode, RelabelType))
- limitnode = (Node *) ((RelabelType *) limitnode)->arg;
+ /*
+ * Because we should only count visible tuples as being returned, we need
+ * to search for a visible tuple rather than just let the core code do it.
+ */
- if (IsA(limitnode, Const))
- ntuples = DatumGetInt32(((Const *) limitnode)->constvalue);
- else
+ /* We rely on the data accumulated in pagemode access */
+ Assert(scan->rs_pageatatime);
+ for (;;)
{
- /* Default ntuples if the estimation didn't return Const. */
- ntuples = 1000;
+ /* Advance to next possible offset on page */
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
+ else
+ tupoffset++;
+
+ /* Done? */
+ if (tupoffset > maxoffset)
+ {
+ tupoffset = InvalidOffsetNumber;
+ break;
+ }
+
+ /* Found a candidate? */
+ if (SampleOffsetVisible(tupoffset, scan))
+ {
+ sampler->donetuples++;
+ break;
+ }
}
- *pages = Min(baserel->pages, ntuples);
- *tuples = ntuples;
- path->rows = *tuples;
+ sampler->lt = tupoffset;
- PG_RETURN_VOID();
+ return tupoffset;
}
+/*
+ * Check if tuple offset is visible
+ *
+ * In pageatatime mode, heapgetpage() already did visibility checks,
+ * so just look at the info it left in rs_vistuples[].
+ */
+static bool
+SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan)
+{
+ int start = 0,
+ end = scan->rs_ntuples - 1;
+
+ while (start <= end)
+ {
+ int mid = (start + end) / 2;
+ OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+ if (tupoffset == curoffset)
+ return true;
+ else if (tupoffset < curoffset)
+ end = mid - 1;
+ else
+ start = mid + 1;
+ }
+
+ return false;
+}
+/*
+ * Compute greatest common divisor of two uint32's.
+ */
static uint32
gcd(uint32 a, uint32 b)
{
return b;
}
+/*
+ * Pick a random value less than and relatively prime to n, if possible
+ * (else return 1).
+ */
static uint32
random_relative_prime(uint32 n, SamplerRandomState randstate)
{
- /* Pick random starting number, with some limits on what it can be. */
- uint32 r = (uint32) sampler_random_fract(randstate) * n / 2 + n / 4,
- t;
+ uint32 r;
+
+ /* Safety check to avoid infinite loop or zero result for small n. */
+ if (n <= 1)
+ return 1;
/*
* This should only take 2 or 3 iterations as the probability of 2 numbers
- * being relatively prime is ~61%.
+ * being relatively prime is ~61%; but just in case, we'll include a
+ * CHECK_FOR_INTERRUPTS in the loop.
*/
- while ((t = gcd(r, n)) > 1)
+ do
{
CHECK_FOR_INTERRUPTS();
- r /= t;
- }
+ r = (uint32) (sampler_random_fract(randstate) * n);
+ } while (r == 0 || gcd(r, n) > 1);
return r;
}
# tsm_system_rows extension
-comment = 'SYSTEM TABLESAMPLE method which accepts number rows as a limit'
+comment = 'TABLESAMPLE method which accepts number of rows as a limit'
default_version = '1.0'
module_pathname = '$libdir/tsm_system_rows'
relocatable = true
-# src/test/modules/tsm_system_time/Makefile
+# contrib/tsm_system_time/Makefile
MODULE_big = tsm_system_time
OBJS = tsm_system_time.o $(WIN32RES)
-PGFILEDESC = "tsm_system_time - SYSTEM TABLESAMPLE method which accepts number rows of as a limit"
+PGFILEDESC = "tsm_system_time - TABLESAMPLE method which accepts time in milliseconds as a limit"
EXTENSION = tsm_system_time
DATA = tsm_system_time--1.0.sql
CREATE EXTENSION tsm_system_time;
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+ FROM generate_series(0, 30) s(i);
ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000);
+-- It's a bit tricky to test SYSTEM_TIME in a platform-independent way.
+-- We can test the zero-time corner case ...
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (0);
count
-------
- 31
+ 0
(1 row)
-SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432);
- id
-----
- 7
- 14
- 21
- 28
- 4
- 11
- 18
- 25
- 1
- 8
- 15
- 22
- 29
- 5
- 12
- 19
- 26
- 2
- 9
- 16
- 23
- 30
- 6
- 13
- 20
- 27
- 3
- 10
- 17
- 24
- 0
-(31 rows)
-
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10);
- QUERY PLAN
-------------------------------------------------------------------------------------
- Sample Scan (system_time) on test_tablesample (cost=0.00..100.25 rows=25 width=4)
+-- ... and we assume that this will finish before running out of time:
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (100000);
+ count
+-------
+ 31
(1 row)
--- done
-DROP TABLE test_tablesample CASCADE;
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+ QUERY PLAN
+--------------------------------------------------
+ Sample Scan on test_tablesample
+ Sampling: system_time ('-1'::double precision)
+(2 rows)
+
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+ERROR: sample collection time must not be negative
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_time (10) REPEATABLE (0);
+ERROR: tablesample method system_time does not support REPEATABLE
+LINE 1: SELECT * FROM test_tablesample TABLESAMPLE system_time (10) ...
+ ^
+-- since it's not repeatable, we expect a Materialize node in these plans:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (VALUES (0),(100000)) v(time),
+ LATERAL (SELECT COUNT(*) FROM test_tablesample
+ TABLESAMPLE system_time (100000)) ss;
+ QUERY PLAN
+------------------------------------------------------------------------
+ Nested Loop
+ -> Aggregate
+ -> Materialize
+ -> Sample Scan on test_tablesample
+ Sampling: system_time ('100000'::double precision)
+ -> Values Scan on "*VALUES*"
+(6 rows)
+
+SELECT * FROM
+ (VALUES (0),(100000)) v(time),
+ LATERAL (SELECT COUNT(*) FROM test_tablesample
+ TABLESAMPLE system_time (100000)) ss;
+ time | count
+--------+-------
+ 0 | 31
+ 100000 | 31
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (VALUES (0),(100000)) v(time),
+ LATERAL (SELECT COUNT(*) FROM test_tablesample
+ TABLESAMPLE system_time (time)) ss;
+ QUERY PLAN
+----------------------------------------------------------------
+ Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Aggregate
+ -> Materialize
+ -> Sample Scan on test_tablesample
+ Sampling: system_time ("*VALUES*".column1)
+(6 rows)
+
+SELECT * FROM
+ (VALUES (0),(100000)) v(time),
+ LATERAL (SELECT COUNT(*) FROM test_tablesample
+ TABLESAMPLE system_time (time)) ss;
+ time | count
+--------+-------
+ 0 | 0
+ 100000 | 31
+(2 rows)
+
+CREATE VIEW vv AS
+ SELECT * FROM test_tablesample TABLESAMPLE system_time (20);
+EXPLAIN (COSTS OFF) SELECT * FROM vv;
+ QUERY PLAN
+--------------------------------------------------
+ Sample Scan on test_tablesample
+ Sampling: system_time ('20'::double precision)
+(2 rows)
+
+DROP EXTENSION tsm_system_time; -- fail, view depends on extension
+ERROR: cannot drop extension tsm_system_time because other objects depend on it
+DETAIL: view vv depends on function system_time(internal)
+HINT: Use DROP ... CASCADE to drop the dependent objects too.
CREATE EXTENSION tsm_system_time;
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+ FROM generate_series(0, 30) s(i);
ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000);
-SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432);
+-- It's a bit tricky to test SYSTEM_TIME in a platform-independent way.
+-- We can test the zero-time corner case ...
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (0);
+-- ... and we assume that this will finish before running out of time:
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (100000);
+
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_time (10) REPEATABLE (0);
+
+-- since it's not repeatable, we expect a Materialize node in these plans:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (VALUES (0),(100000)) v(time),
+ LATERAL (SELECT COUNT(*) FROM test_tablesample
+ TABLESAMPLE system_time (100000)) ss;
+
+SELECT * FROM
+ (VALUES (0),(100000)) v(time),
+ LATERAL (SELECT COUNT(*) FROM test_tablesample
+ TABLESAMPLE system_time (100000)) ss;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (VALUES (0),(100000)) v(time),
+ LATERAL (SELECT COUNT(*) FROM test_tablesample
+ TABLESAMPLE system_time (time)) ss;
+
+SELECT * FROM
+ (VALUES (0),(100000)) v(time),
+ LATERAL (SELECT COUNT(*) FROM test_tablesample
+ TABLESAMPLE system_time (time)) ss;
+
+CREATE VIEW vv AS
+ SELECT * FROM test_tablesample TABLESAMPLE system_time (20);
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10);
+EXPLAIN (COSTS OFF) SELECT * FROM vv;
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_time; -- fail, view depends on extension
-/* src/test/modules/tablesample/tsm_system_time--1.0.sql */
+/* contrib/tsm_system_time/tsm_system_time--1.0.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION tsm_system_time" to load this file. \quit
-CREATE FUNCTION tsm_system_time_init(internal, int4, int4)
-RETURNS void
-AS 'MODULE_PATHNAME'
+CREATE FUNCTION system_time(internal)
+RETURNS tsm_handler
+AS 'MODULE_PATHNAME', 'tsm_system_time_handler'
LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_nextblock(internal)
-RETURNS int4
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_nexttuple(internal, int4, int2)
-RETURNS int2
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_end(internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_reset(internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_cost(internal, internal, internal, internal, internal, internal, internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-INSERT INTO pg_tablesample_method VALUES('system_time', false, true,
- 'tsm_system_time_init', 'tsm_system_time_nextblock',
- 'tsm_system_time_nexttuple', '-', 'tsm_system_time_end',
- 'tsm_system_time_reset', 'tsm_system_time_cost');
/*-------------------------------------------------------------------------
*
* tsm_system_time.c
- * interface routines for system_time tablesample method
+ * support routines for SYSTEM_TIME tablesample method
*
+ * The desire here is to produce a random sample with as many rows as possible
+ * in no more than the specified amount of time. We use a block-sampling
+ * approach. To ensure that the whole relation will be visited if necessary,
+ * we start at a randomly chosen block and then advance with a stride that
+ * is randomly chosen but is relatively prime to the relation's nblocks.
*
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Because of the time dependence, this method is necessarily unrepeatable.
+ * However, we do what we can to reduce surprising behavior by selecting
+ * the sampling pattern just once per query, much as in tsm_system_rows.
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * contrib/tsm_system_time_rowlimit/tsm_system_time.c
+ * contrib/tsm_system_time/tsm_system_time.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h> /* for _isnan */
+#endif
+#include <math.h>
-#include "access/tablesample.h"
#include "access/relscan.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
#include "miscadmin.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
#include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
+#include "optimizer/cost.h"
#include "utils/sampling.h"
#include "utils/spccache.h"
-#include "utils/timestamp.h"
PG_MODULE_MAGIC;
-/*
- * State
- */
+PG_FUNCTION_INFO_V1(tsm_system_time_handler);
+
+
+/* Private state */
typedef struct
{
- SamplerRandomState randstate;
uint32 seed; /* random seed */
- BlockNumber nblocks; /* number of block in relation */
- int32 time; /* time limit for sampling */
- TimestampTz start_time; /* start time of sampling */
- TimestampTz end_time; /* end time of sampling */
+ double millis; /* time limit for sampling */
+ instr_time start_time; /* scan start time */
OffsetNumber lt; /* last tuple returned from current block */
- BlockNumber step; /* step size */
+ BlockNumber doneblocks; /* number of already-scanned blocks */
BlockNumber lb; /* last block visited */
- BlockNumber estblocks; /* estimated number of returned blocks
- * (moving) */
- BlockNumber doneblocks; /* number of already returned blocks */
-} SystemSamplerData;
-
-
-PG_FUNCTION_INFO_V1(tsm_system_time_init);
-PG_FUNCTION_INFO_V1(tsm_system_time_nextblock);
-PG_FUNCTION_INFO_V1(tsm_system_time_nexttuple);
-PG_FUNCTION_INFO_V1(tsm_system_time_end);
-PG_FUNCTION_INFO_V1(tsm_system_time_reset);
-PG_FUNCTION_INFO_V1(tsm_system_time_cost);
-
+ /* these three values are not changed during a rescan: */
+ BlockNumber nblocks; /* number of blocks in relation */
+ BlockNumber firstblock; /* first block to sample from */
+ BlockNumber step; /* step size, or 0 if not set yet */
+} SystemTimeSamplerData;
+
+static void system_time_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples);
+static void system_time_initsamplescan(SampleScanState *node,
+ int eflags);
+static void system_time_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed);
+static BlockNumber system_time_nextsampleblock(SampleScanState *node);
+static OffsetNumber system_time_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset);
static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate);
+
/*
- * Initializes the state.
+ * Create a TsmRoutine descriptor for the SYSTEM_TIME method.
*/
Datum
-tsm_system_time_init(PG_FUNCTION_ARGS)
+tsm_system_time_handler(PG_FUNCTION_ARGS)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- uint32 seed = PG_GETARG_UINT32(1);
- int32 time = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2);
- HeapScanDesc scan = tsdesc->heapScan;
- SystemSamplerData *sampler;
-
- if (time < 1)
- ereport(ERROR,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("invalid time limit"),
- errhint("Time limit must be positive integer value.")));
+ TsmRoutine *tsm = makeNode(TsmRoutine);
- sampler = palloc0(sizeof(SystemSamplerData));
+ tsm->parameterTypes = list_make1_oid(FLOAT8OID);
- /* Remember initial values for reinit */
- sampler->seed = seed;
- sampler->nblocks = scan->rs_nblocks;
- sampler->lt = InvalidOffsetNumber;
- sampler->estblocks = 2;
- sampler->doneblocks = 0;
- sampler->time = time;
- sampler->start_time = GetCurrentTimestamp();
- sampler->end_time = TimestampTzPlusMilliseconds(sampler->start_time,
- sampler->time);
+ /* See notes at head of file */
+ tsm->repeatable_across_queries = false;
+ tsm->repeatable_across_scans = false;
- sampler_random_init_state(sampler->seed, sampler->randstate);
+ tsm->SampleScanGetSampleSize = system_time_samplescangetsamplesize;
+ tsm->InitSampleScan = system_time_initsamplescan;
+ tsm->BeginSampleScan = system_time_beginsamplescan;
+ tsm->NextSampleBlock = system_time_nextsampleblock;
+ tsm->NextSampleTuple = system_time_nextsampletuple;
+ tsm->EndSampleScan = NULL;
- /* Find relative prime as step size for linear probing. */
- sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-
- /*
- * Randomize start position so that blocks close to step size don't have
- * higher probability of being chosen on very short scan.
- */
- sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
-
- tsdesc->tsmdata = (void *) sampler;
-
- PG_RETURN_VOID();
+ PG_RETURN_POINTER(tsm);
}
/*
- * Get next block number or InvalidBlockNumber when we're done.
- *
- * Uses linear probing algorithm for picking next block.
+ * Sample size estimation.
*/
-Datum
-tsm_system_time_nextblock(PG_FUNCTION_ARGS)
+static void
+system_time_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-
- sampler->lb = (sampler->lb + sampler->step) % sampler->nblocks;
- sampler->doneblocks++;
+ Node *limitnode;
+ double millis;
+ double spc_random_page_cost;
+ double npages;
+ double ntuples;
- /* All blocks have been read, we're done */
- if (sampler->doneblocks > sampler->nblocks)
- PG_RETURN_UINT32(InvalidBlockNumber);
+ /* Try to extract an estimate for the limit time spec */
+ limitnode = (Node *) linitial(paramexprs);
+ limitnode = estimate_expression_value(root, limitnode);
- /*
- * Update the estimations for time limit at least 10 times per estimated
- * number of returned blocks to handle variations in block read speed.
- */
- if (sampler->doneblocks % Max(sampler->estblocks / 10, 1) == 0)
+ if (IsA(limitnode, Const) &&
+ !((Const *) limitnode)->constisnull)
+ {
+ millis = DatumGetFloat8(((Const *) limitnode)->constvalue);
+ if (millis < 0 || isnan(millis))
+ {
+ /* Default millis if the value is bogus */
+ millis = 1000;
+ }
+ }
+ else
{
- TimestampTz now = GetCurrentTimestamp();
- long secs;
- int usecs;
- int usecs_remaining;
- int time_per_block;
+ /* Default millis if we didn't obtain a non-null Const */
+ millis = 1000;
+ }
- TimestampDifference(sampler->start_time, now, &secs, &usecs);
- usecs += (int) secs *1000000;
+ /* Get the planner's idea of cost per page read */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ NULL);
- time_per_block = usecs / sampler->doneblocks;
+ /*
+ * Estimate the number of pages we can read by assuming that the cost
+ * figure is expressed in milliseconds. This is completely, unmistakably
+ * bogus, but we have to do something to produce an estimate and there's
+ * no better answer.
+ */
+ if (spc_random_page_cost > 0)
+ npages = millis / spc_random_page_cost;
+ else
+ npages = millis; /* even more bogus, but whatcha gonna do? */
- /* No time left, end. */
- TimestampDifference(now, sampler->end_time, &secs, &usecs);
- if (secs <= 0 && usecs <= 0)
- PG_RETURN_UINT32(InvalidBlockNumber);
+ /* Clamp to sane value */
+ npages = clamp_row_est(Min((double) baserel->pages, npages));
- /* Remaining microseconds */
- usecs_remaining = usecs + (int) secs *1000000;
+ if (baserel->tuples > 0 && baserel->pages > 0)
+ {
+ /* Estimate number of tuples returned based on tuple density */
+ double density = baserel->tuples / (double) baserel->pages;
- /* Recalculate estimated returned number of blocks */
- if (time_per_block < usecs_remaining && time_per_block > 0)
- sampler->estblocks = sampler->time * time_per_block;
+ ntuples = npages * density;
}
-
- PG_RETURN_UINT32(sampler->lb);
-}
-
-/*
- * Get next tuple offset in current block or InvalidOffsetNumber if we are done
- * with this block.
- */
-Datum
-tsm_system_time_nexttuple(PG_FUNCTION_ARGS)
-{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- OffsetNumber maxoffset = PG_GETARG_UINT16(2);
- SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
- OffsetNumber tupoffset = sampler->lt;
-
- if (tupoffset == InvalidOffsetNumber)
- tupoffset = FirstOffsetNumber;
else
- tupoffset++;
-
- if (tupoffset > maxoffset)
- tupoffset = InvalidOffsetNumber;
+ {
+ /* For lack of data, assume one tuple per page */
+ ntuples = npages;
+ }
- sampler->lt = tupoffset;
+ /* Clamp to the estimated relation size */
+ ntuples = clamp_row_est(Min(baserel->tuples, ntuples));
- PG_RETURN_UINT16(tupoffset);
+ *pages = npages;
+ *tuples = ntuples;
}
/*
- * Cleanup method.
+ * Initialize during executor setup.
*/
-Datum
-tsm_system_time_end(PG_FUNCTION_ARGS)
+static void
+system_time_initsamplescan(SampleScanState *node, int eflags)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-
- pfree(tsdesc->tsmdata);
-
- PG_RETURN_VOID();
+ node->tsm_state = palloc0(sizeof(SystemTimeSamplerData));
+ /* Note the above leaves tsm_state->step equal to zero */
}
/*
- * Reset state (called by ReScan).
+ * Examine parameters and prepare for a sample scan.
*/
-Datum
-tsm_system_time_reset(PG_FUNCTION_ARGS)
+static void
+system_time_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+ SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
+ double millis = DatumGetFloat8(params[0]);
+
+ if (millis < 0 || isnan(millis))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+ errmsg("sample collection time must not be negative")));
+ sampler->seed = seed;
+ sampler->millis = millis;
sampler->lt = InvalidOffsetNumber;
- sampler->start_time = GetCurrentTimestamp();
- sampler->end_time = TimestampTzPlusMilliseconds(sampler->start_time,
- sampler->time);
- sampler->estblocks = 2;
sampler->doneblocks = 0;
-
- sampler_random_init_state(sampler->seed, sampler->randstate);
- sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
- sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
-
- PG_RETURN_VOID();
+ /* start_time, lb will be initialized during first NextSampleBlock call */
+ /* we intentionally do not change nblocks/firstblock/step here */
}
/*
- * Costing function.
+ * Select next block to sample.
+ *
+ * Uses linear probing algorithm for picking next block.
*/
-Datum
-tsm_system_time_cost(PG_FUNCTION_ARGS)
+static BlockNumber
+system_time_nextsampleblock(SampleScanState *node)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- Path *path = (Path *) PG_GETARG_POINTER(1);
- RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
- List *args = (List *) PG_GETARG_POINTER(3);
- BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
- double *tuples = (double *) PG_GETARG_POINTER(5);
- Node *limitnode;
- int32 time;
- BlockNumber relpages;
- double reltuples;
- double density;
- double spc_random_page_cost;
-
- limitnode = linitial(args);
- limitnode = estimate_expression_value(root, limitnode);
-
- if (IsA(limitnode, RelabelType))
- limitnode = (Node *) ((RelabelType *) limitnode)->arg;
+ SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
+ HeapScanDesc scan = node->ss.ss_currentScanDesc;
+ instr_time cur_time;
- if (IsA(limitnode, Const))
- time = DatumGetInt32(((Const *) limitnode)->constvalue);
- else
+ /* First call within scan? */
+ if (sampler->doneblocks == 0)
{
- /* Default time (1s) if the estimation didn't return Const. */
- time = 1000;
+ /* First scan within query? */
+ if (sampler->step == 0)
+ {
+ /* Initialize now that we have scan descriptor */
+ SamplerRandomState randstate;
+
+ /* If relation is empty, there's nothing to scan */
+ if (scan->rs_nblocks == 0)
+ return InvalidBlockNumber;
+
+ /* We only need an RNG during this setup step */
+ sampler_random_init_state(sampler->seed, randstate);
+
+ /* Compute nblocks/firstblock/step only once per query */
+ sampler->nblocks = scan->rs_nblocks;
+
+ /* Choose random starting block within the relation */
+ /* (Actually this is the predecessor of the first block visited) */
+ sampler->firstblock = sampler_random_fract(randstate) *
+ sampler->nblocks;
+
+ /* Find relative prime as step size for linear probing */
+ sampler->step = random_relative_prime(sampler->nblocks, randstate);
+ }
+
+ /* Reinitialize lb and start_time */
+ sampler->lb = sampler->firstblock;
+ INSTR_TIME_SET_CURRENT(sampler->start_time);
}
- relpages = baserel->pages;
- reltuples = baserel->tuples;
+ /* If we've read all blocks in relation, we're done */
+ if (++sampler->doneblocks > sampler->nblocks)
+ return InvalidBlockNumber;
- /* estimate the tuple density */
- if (relpages > 0)
- density = reltuples / (double) relpages;
- else
- density = (BLCKSZ - SizeOfPageHeaderData) / baserel->width;
+ /* If we've used up all the allotted time, we're done */
+ INSTR_TIME_SET_CURRENT(cur_time);
+ INSTR_TIME_SUBTRACT(cur_time, sampler->start_time);
+ if (INSTR_TIME_GET_MILLISEC(cur_time) >= sampler->millis)
+ return InvalidBlockNumber;
/*
- * We equal random page cost value to number of ms it takes to read the
- * random page here which is far from accurate but we don't have anything
- * better to base our predicted page reads.
+ * It's probably impossible for scan->rs_nblocks to decrease between scans
+ * within a query; but just in case, loop until we select a block number
+ * less than scan->rs_nblocks. We don't care if scan->rs_nblocks has
+ * increased since the first scan.
*/
- get_tablespace_page_costs(baserel->reltablespace,
- &spc_random_page_cost,
- NULL);
+ do
+ {
+ /* Advance lb, using uint64 arithmetic to forestall overflow */
+ sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
+ } while (sampler->lb >= scan->rs_nblocks);
- /*
- * Assumption here is that we'll never read less than 1% of table pages,
- * this is here mainly because it is much less bad to overestimate than
- * underestimate and using just spc_random_page_cost will probably lead to
- * underestimations in general.
- */
- *pages = Min(baserel->pages, Max(time / spc_random_page_cost, baserel->pages / 100));
- *tuples = rint(density * (double) *pages * path->rows / baserel->tuples);
- path->rows = *tuples;
+ return sampler->lb;
+}
+
+/*
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+static OffsetNumber
+system_time_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset)
+{
+ SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
+ OffsetNumber tupoffset = sampler->lt;
+
+ /* Advance to next possible offset on page */
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
+ else
+ tupoffset++;
+
+ /* Done? */
+ if (tupoffset > maxoffset)
+ tupoffset = InvalidOffsetNumber;
+
+ sampler->lt = tupoffset;
- PG_RETURN_VOID();
+ return tupoffset;
}
+/*
+ * Compute greatest common divisor of two uint32's.
+ */
static uint32
gcd(uint32 a, uint32 b)
{
return b;
}
+/*
+ * Pick a random value less than and relatively prime to n, if possible
+ * (else return 1).
+ */
static uint32
random_relative_prime(uint32 n, SamplerRandomState randstate)
{
- /* Pick random starting number, with some limits on what it can be. */
- uint32 r = (uint32) sampler_random_fract(randstate) * n / 2 + n / 4,
- t;
+ uint32 r;
+
+ /* Safety check to avoid infinite loop or zero result for small n. */
+ if (n <= 1)
+ return 1;
/*
* This should only take 2 or 3 iterations as the probability of 2 numbers
- * being relatively prime is ~61%.
+ * being relatively prime is ~61%; but just in case, we'll include a
+ * CHECK_FOR_INTERRUPTS in the loop.
*/
- while ((t = gcd(r, n)) > 1)
+ do
{
CHECK_FOR_INTERRUPTS();
- r /= t;
- }
+ r = (uint32) (sampler_random_fract(randstate) * n);
+ } while (r == 0 || gcd(r, n) > 1);
return r;
}
# tsm_system_time extension
-comment = 'SYSTEM TABLESAMPLE method which accepts time in milliseconds as a limit'
+comment = 'TABLESAMPLE method which accepts time in milliseconds as a limit'
default_version = '1.0'
module_pathname = '$libdir/tsm_system_time'
relocatable = true
<entry>planner statistics</entry>
</row>
- <row>
- <entry><link linkend="catalog-pg-tablesample-method"><structname>pg_tablesample_method</structname></link></entry>
- <entry>table sampling methods</entry>
- </row>
-
<row>
<entry><link linkend="catalog-pg-tablespace"><structname>pg_tablespace</structname></link></entry>
<entry>tablespaces within this database cluster</entry>
</sect1>
- <sect1 id="catalog-pg-tablesample-method">
- <title><structname>pg_tabesample_method</structname></title>
-
- <indexterm zone="catalog-pg-tablesample-method">
- <primary>pg_am</primary>
- </indexterm>
-
- <para>
- The catalog <structname>pg_tablesample_method</structname> stores
- information about table sampling methods which can be used in
- <command>TABLESAMPLE</command> clause of a <command>SELECT</command>
- statement.
- </para>
-
- <table>
- <title><structname>pg_tablesample_method</> Columns</title>
-
- <tgroup cols="4">
- <thead>
- <row>
- <entry>Name</entry>
- <entry>Type</entry>
- <entry>References</entry>
- <entry>Description</entry>
- </row>
- </thead>
- <tbody>
-
- <row>
- <entry><structfield>oid</structfield></entry>
- <entry><type>oid</type></entry>
- <entry></entry>
- <entry>Row identifier (hidden attribute; must be explicitly selected)</entry>
- </row>
-
- <row>
- <entry><structfield>tsmname</structfield></entry>
- <entry><type>name</type></entry>
- <entry></entry>
- <entry>Name of the sampling method</entry>
- </row>
-
- <row>
- <entry><structfield>tsmseqscan</structfield></entry>
- <entry><type>bool</type></entry>
- <entry></entry>
- <entry>If true, the sampling method scans the whole table sequentially.
- </entry>
- </row>
-
- <row>
- <entry><structfield>tsmpagemode</structfield></entry>
- <entry><type>bool</type></entry>
- <entry></entry>
- <entry>If true, the sampling method always reads the pages completely.
- </entry>
- </row>
-
- <row>
- <entry><structfield>tsminit</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry><quote>Initialize the sampling scan</quote> function</entry>
- </row>
-
- <row>
- <entry><structfield>tsmnextblock</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry><quote>Get next block number</quote> function</entry>
- </row>
-
- <row>
- <entry><structfield>tsmnexttuple</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry><quote>Get next tuple offset</quote> function</entry>
- </row>
-
- <row>
- <entry><structfield>tsmexaminetuple</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry>Function which examines the tuple contents and decides if to
- return it, or zero if none</entry>
- </row>
-
- <row>
- <entry><structfield>tsmend</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry><quote>End the sampling scan</quote> function</entry>
- </row>
-
- <row>
- <entry><structfield>tsmreset</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry><quote>Restart the state of sampling scan</quote> function</entry>
- </row>
-
- <row>
- <entry><structfield>tsmcost</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry>Costing function</entry>
- </row>
-
- </tbody>
- </tgroup>
- </table>
-
- </sect1>
-
-
<sect1 id="catalog-pg-tablespace">
<title><structname>pg_tablespace</structname></title>
an object identifier. There are also several alias types for
<type>oid</>: <type>regproc</>, <type>regprocedure</>,
<type>regoper</>, <type>regoperator</>, <type>regclass</>,
- <type>regtype</>, <type>regrole</>, <type>regnamespace</>,
+ <type>regtype</>, <type>regrole</>, <type>regnamespace</>,
<type>regconfig</>, and <type>regdictionary</>.
<xref linkend="datatype-oid-table"> shows an overview.
</para>
<primary>fdw_handler</primary>
</indexterm>
+ <indexterm zone="datatype-pseudo">
+ <primary>tsm_handler</primary>
+ </indexterm>
+
<indexterm zone="datatype-pseudo">
<primary>cstring</primary>
</indexterm>
<entry>A foreign-data wrapper handler is declared to return <type>fdw_handler</>.</entry>
</row>
+ <row>
+ <entry><type>tsm_handler</></entry>
+ <entry>A tablesample method handler is declared to return <type>tsm_handler</>.</entry>
+ </row>
+
<row>
<entry><type>record</></entry>
<entry>Identifies a function returning an unspecified row type.</entry>
&nls;
&plhandler;
&fdwhandler;
+ &tablesample-method;
&custom-scan;
&geqo;
&indexam;
&spgist;
&gin;
&brin;
- &tablesample-method;
&storage;
&bki;
&planstats;
<phrase>where <replaceable class="parameter">from_item</replaceable> can be one of:</phrase>
- [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ] [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ] [ TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ] ]
+ [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ] [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ]
+ [ TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ] ]
[ LATERAL ] ( <replaceable class="parameter">select</replaceable> ) [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ]
<replaceable class="parameter">with_query_name</replaceable> [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ]
[ LATERAL ] <replaceable class="parameter">function_name</replaceable> ( [ <replaceable class="parameter">argument</replaceable> [, ...] ] )
</listitem>
</varlistentry>
- <varlistentry>
- <term>TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ]</term>
- <listitem>
- <para>
- Table sample clause after
- <replaceable class="parameter">table_name</replaceable> indicates that
- a <replaceable class="parameter">sampling_method</replaceable> should
- be used to retrieve subset of rows in the table.
- The <replaceable class="parameter">sampling_method</replaceable> can be
- any sampling method installed in the database. There are currently two
- sampling methods available in the standard
- <productname>PostgreSQL</productname> distribution:
- <itemizedlist>
- <listitem>
- <para><literal>SYSTEM</literal></para>
- </listitem>
- <listitem>
- <para><literal>BERNOULLI</literal></para>
- </listitem>
- </itemizedlist>
- Both of these sampling methods currently accept only single argument
- which is the percent (floating point from 0 to 100) of the rows to
- be returned.
- The <literal>SYSTEM</literal> sampling method does block level
- sampling with each block having the same chance of being selected and
- returns all rows from each selected block.
- The <literal>BERNOULLI</literal> scans whole table and returns
- individual rows with equal probability. Additional sampling methods
- may be installed in the database via extensions.
- </para>
- <para>
- The optional parameter <literal>REPEATABLE</literal> uses the seed
- parameter, which can be a number or expression producing a number, as
- a random seed for sampling. Note that subsequent commands may return
- different results even if same <literal>REPEATABLE</literal> clause was
- specified. This happens because <acronym>DML</acronym> statements and
- maintenance operations such as <command>VACUUM</> may affect physical
- distribution of data. The <function>setseed()</> function will not
- affect the sampling result when the <literal>REPEATABLE</literal>
- parameter is used.
- </para>
- </listitem>
- </varlistentry>
-
<varlistentry>
<term><replaceable class="parameter">alias</replaceable></term>
<listitem>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><literal>TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ]</literal></term>
+ <listitem>
+ <para>
+ A <literal>TABLESAMPLE</> clause after
+ a <replaceable class="parameter">table_name</> indicates that the
+ specified <replaceable class="parameter">sampling_method</replaceable>
+ should be used to retrieve a subset of the rows in that table.
+ This sampling precedes the application of any other filters such
+ as <literal>WHERE</> clauses.
+ The standard <productname>PostgreSQL</productname> distribution
+ includes two sampling methods, <literal>BERNOULLI</literal>
+ and <literal>SYSTEM</literal>, and other sampling methods can be
+ installed in the database via extensions.
+ </para>
+
+ <para>
+ The <literal>BERNOULLI</> and <literal>SYSTEM</> sampling methods
+ each accept a single <replaceable class="parameter">argument</>
+ which is the fraction of the table to sample, expressed as a
+ percentage between 0 and 100. This argument can be
+ any <type>real</>-valued expression. (Other sampling methods might
+ accept more or different arguments.) These two methods each return
+ a randomly-chosen sample of the table that will contain
+ approximately the specified percentage of the table's rows.
+ The <literal>BERNOULLI</literal> method scans the whole table and
+ selects or ignores individual rows independently with the specified
+ probability.
+ The <literal>SYSTEM</literal> method does block-level sampling with
+ each block having the specified chance of being selected; all rows
+ in each selected block are returned.
+ The <literal>SYSTEM</literal> method is significantly faster than
+ the <literal>BERNOULLI</literal> method when small sampling
+ percentages are specified, but it may return a less-random sample of
+ the table as a result of clustering effects.
+ </para>
+
+ <para>
+ The optional <literal>REPEATABLE</literal> clause specifies
+ a <replaceable class="parameter">seed</> number or expression to use
+ for generating random numbers within the sampling method. The seed
+ value can be any non-null floating-point value. Two queries that
+ specify the same seed and <replaceable class="parameter">argument</>
+ values will select the same sample of the table, if the table has
+ not been changed meanwhile. But different seed values will usually
+ produce different samples.
+ If <literal>REPEATABLE</literal> is not given then a new random
+ sample is selected for each query.
+ Note that some add-on sampling methods do not
+ accept <literal>REPEATABLE</literal>, and will always produce new
+ samples on each use.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><replaceable class="parameter">select</replaceable></term>
<listitem>
</para>
</refsect2>
+ <refsect2>
+ <title><literal>TABLESAMPLE</literal> Clause Restrictions</title>
+
+ <para>
+ The <literal>TABLESAMPLE</> clause is currently accepted only on
+ regular tables and materialized views. According to the SQL standard
+ it should be possible to apply it to any <literal>FROM</> item.
+ </para>
+ </refsect2>
+
<refsect2>
<title>Function Calls in <literal>FROM</literal></title>
</para>
</refsect2>
- <refsect2>
- <title><literal>TABLESAMPLE</literal> clause</title>
-
- <para>
- The <literal>TABLESAMPLE</> clause is currently accepted only on physical
- relations and materialized views.
- </para>
-
- <para>
- Additional modules allow you to install custom sampling methods and use
- them instead of the SQL standard methods.
- </para>
- </refsect2>
-
</refsect1>
</refentry>
<!-- doc/src/sgml/tablesample-method.sgml -->
<chapter id="tablesample-method">
- <title>Writing A TABLESAMPLE Sampling Method</title>
+ <title>Writing A Table Sampling Method</title>
<indexterm zone="tablesample-method">
- <primary>tablesample method</primary>
+ <primary>table sampling method</primary>
+ </indexterm>
+
+ <indexterm zone="tablesample-method">
+ <primary><literal>TABLESAMPLE</literal> method</primary>
</indexterm>
<para>
- The <command>TABLESAMPLE</command> clause implementation in
- <productname>PostgreSQL</> supports creating a custom sampling methods.
- These methods control what sample of the table will be returned when the
- <command>TABLESAMPLE</command> clause is used.
+ <productname>PostgreSQL</>'s implementation of the <literal>TABLESAMPLE</>
+ clause supports custom table sampling methods, in addition to
+ the <literal>BERNOULLI</> and <literal>SYSTEM</> methods that are required
+ by the SQL standard. The sampling method determines which rows of the
+ table will be selected when the <literal>TABLESAMPLE</> clause is used.
</para>
- <sect1 id="tablesample-method-functions">
- <title>Tablesample Method Functions</title>
+ <para>
+ At the SQL level, a table sampling method is represented by a single SQL
+ function, typically implemented in C, having the signature
+<programlisting>
+method_name(internal) RETURNS tsm_handler
+</programlisting>
+ The name of the function is the same method name appearing in the
+ <literal>TABLESAMPLE</> clause. The <type>internal</> argument is a dummy
+ (always having value zero) that simply serves to prevent this function from
+ being called directly from a SQL command.
+ The result of the function must be a palloc'd struct of
+ type <type>TsmRoutine</>, which contains pointers to support functions for
+ the sampling method. These support functions are plain C functions and
+ are not visible or callable at the SQL level. The support functions are
+ described in <xref linkend="tablesample-support-functions">.
+ </para>
+
+ <para>
+ In addition to function pointers, the <type>TsmRoutine</> struct must
+ provide these additional fields:
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term><literal>List *parameterTypes</literal></term>
+ <listitem>
+ <para>
+ This is an OID list containing the data type OIDs of the parameter(s)
+ that will be accepted by the <literal>TABLESAMPLE</> clause when this
+ sampling method is used. For example, for the built-in methods, this
+ list contains a single item with value <literal>FLOAT4OID</>, which
+ represents the sampling percentage. Custom sampling methods can have
+ more or different parameters.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>bool repeatable_across_queries</literal></term>
+ <listitem>
+ <para>
+ If <literal>true</>, the sampling method can deliver identical samples
+ across successive queries, if the same parameters
+ and <literal>REPEATABLE</> seed value are supplied each time and the
+ table contents have not changed. When this is <literal>false</>,
+ the <literal>REPEATABLE</> clause is not accepted for use with the
+ sampling method.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>bool repeatable_across_scans</literal></term>
+ <listitem>
+ <para>
+ If <literal>true</>, the sampling method can deliver identical samples
+ across successive scans in the same query (assuming unchanging
+ parameters, seed value, and snapshot).
+ When this is <literal>false</>, the planner will not select plans that
+ would require scanning the sampled table more than once, since that
+ might result in inconsistent query output.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ The <type>TsmRoutine</> struct type is declared
+ in <filename>src/include/access/tsmapi.h</>, which see for additional
+ details.
+ </para>
+
+ <para>
+ The table sampling methods included in the standard distribution are good
+ references when trying to write your own. Look into
+ the <filename>src/backend/access/tablesample</> subdirectory of the source
+ tree for the built-in sampling methods, and into the <filename>contrib</>
+ subdirectory for add-on methods.
+ </para>
+
+ <sect1 id="tablesample-support-functions">
+ <title>Sampling Method Support Functions</title>
<para>
- The tablesample method must provide following set of functions:
+ The TSM handler function returns a palloc'd <type>TsmRoutine</> struct
+ containing pointers to the support functions described below. Most of
+ the functions are required, but some are optional, and those pointers can
+ be NULL.
</para>
<para>
<programlisting>
void
-tsm_init (TableSampleDesc *desc,
- uint32 seed, ...);
+SampleScanGetSampleSize (PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples);
</programlisting>
- Initialize the tablesample scan. The function is called at the beginning
- of each relation scan.
+
+ This function is called during planning. It must estimate the number of
+ relation pages that will be read during a sample scan, and the number of
+ tuples that will be selected by the scan. (For example, these might be
+ determined by estimating the sampling fraction, and then multiplying
+ the <literal>baserel->pages</> and <literal>baserel->tuples</>
+ numbers by that, being sure to round the results to integral values.)
+ The <literal>paramexprs</> list holds the expression(s) that are
+ parameters to the <literal>TABLESAMPLE</> clause. It is recommended to
+ use <function>estimate_expression_value()</> to try to reduce these
+ expressions to constants, if their values are needed for estimation
+ purposes; but the function must provide size estimates even if they cannot
+ be reduced, and it should not fail even if the values appear invalid
+ (remember that they're only estimates of what the run-time values will be).
+ The <literal>pages</> and <literal>tuples</> parameters are outputs.
</para>
+
<para>
- Note that the first two parameters are required but you can specify
- additional parameters which then will be used by the <command>TABLESAMPLE</>
- clause to determine the required user input in the query itself.
- This means that if your function will specify additional float4 parameter
- named percent, the user will have to call the tablesample method with
- expression which evaluates (or can be coerced) to float4.
- For example this definition:
<programlisting>
-tsm_init (TableSampleDesc *desc,
- uint32 seed, float4 pct);
-</programlisting>
-Will lead to SQL call like this:
-<programlisting>
-... TABLESAMPLE yourmethod(0.5) ...
+void
+InitSampleScan (SampleScanState *node,
+ int eflags);
</programlisting>
+
+ Initialize for execution of a SampleScan plan node.
+ This is called during executor startup.
+ It should perform any initialization needed before processing can start.
+ The <structname>SampleScanState</> node has already been created, but
+ its <structfield>tsm_state</> field is NULL.
+ The <function>InitSampleScan</> function can palloc whatever internal
+ state data is needed by the sampling method, and store a pointer to
+ it in <literal>node->tsm_state</>.
+ Information about the table to scan is accessible through other fields
+ of the <structname>SampleScanState</> node (but note that the
+ <literal>node->ss.ss_currentScanDesc</> scan descriptor is not set
+ up yet).
+ <literal>eflags</> contains flag bits describing the executor's
+ operating mode for this plan node.
</para>
<para>
-<programlisting>
-BlockNumber
-tsm_nextblock (TableSampleDesc *desc);
-</programlisting>
- Returns the block number of next page to be scanned. InvalidBlockNumber
- should be returned if the sampling has reached end of the relation.
+ When <literal>(eflags & EXEC_FLAG_EXPLAIN_ONLY)</> is true,
+ the scan will not actually be performed, so this function should only do
+ the minimum required to make the node state valid for <command>EXPLAIN</>
+ and <function>EndSampleScan</>.
</para>
<para>
-<programlisting>
-OffsetNumber
-tsm_nexttuple (TableSampleDesc *desc, BlockNumber blockno,
- OffsetNumber maxoffset);
-</programlisting>
- Return next tuple offset for the current page. InvalidOffsetNumber should
- be returned if the sampling has reached end of the page.
+ This function can be omitted (set the pointer to NULL), in which case
+ <function>BeginSampleScan</> must perform all initialization needed
+ by the sampling method.
</para>
<para>
<programlisting>
void
-tsm_end (TableSampleDesc *desc);
+BeginSampleScan (SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed);
</programlisting>
- The scan has finished, cleanup any left over state.
+
+ Begin execution of a sampling scan.
+ This is called just before the first attempt to fetch a tuple, and
+ may be called again if the scan needs to be restarted.
+ Information about the table to scan is accessible through fields
+ of the <structname>SampleScanState</> node (but note that the
+ <literal>node->ss.ss_currentScanDesc</> scan descriptor is not set
+ up yet).
+ The <literal>params</> array, of length <literal>nparams</>, contains the
+ values of the parameters supplied in the <literal>TABLESAMPLE</> clause.
+ These will have the number and types specified in the sampling
+ method's <literal>parameterTypes</literal> list, and have been checked
+ to not be null.
+ <literal>seed</> contains a seed to use for any random numbers generated
+ within the sampling method; it is either a hash derived from the
+ <literal>REPEATABLE</> value if one was given, or the result
+ of <literal>random()</> if not.
</para>
<para>
-<programlisting>
-void
-tsm_reset (TableSampleDesc *desc);
-</programlisting>
- The scan needs to rescan the relation again, reset any tablesample method
- state.
+ This function may adjust the fields <literal>node->use_bulkread</>
+ and <literal>node->use_pagemode</>.
+ If <literal>node->use_bulkread</> is <literal>true</>, which it is by
+ default, the scan will use a buffer access strategy that encourages
+ recycling buffers after use. It might be reasonable to set this
+ to <literal>false</> if the scan will visit only a small fraction of the
+ table's pages.
+ If <literal>node->use_pagemode</> is <literal>true</>, which it is by
+ default, the scan will perform visibility checking in a single pass for
+ all tuples on each visited page. It might be reasonable to set this
+ to <literal>false</> if the scan will select only a small fraction of the
+ tuples on each visited page. That will result in fewer tuple visibility
+ checks being performed, though each one will be more expensive because it
+ will require more locking.
+ </para>
+
+ <para>
+ If the sampling method is
+ marked <literal>repeatable_across_scans</literal>, it must be able to
+ select the same set of tuples during a rescan as it did originally, that is
+ a fresh call of <function>BeginSampleScan</> must lead to selecting the
+ same tuples as before (if the <literal>TABLESAMPLE</> parameters
+ and seed don't change).
</para>
<para>
<programlisting>
-void
-tsm_cost (PlannerInfo *root, Path *path, RelOptInfo *baserel,
- List *args, BlockNumber *pages, double *tuples);
+BlockNumber
+NextSampleBlock (SampleScanState *node);
</programlisting>
- This function is used by optimizer to decide best plan and is also used
- for output of <command>EXPLAIN</>.
+
+ Returns the block number of the next page to be scanned, or
+ <literal>InvalidBlockNumber</> if no pages remain to be scanned.
</para>
<para>
- There is one more function which tablesampling method can implement in order
- to gain more fine grained control over sampling. This function is optional:
+ This function can be omitted (set the pointer to NULL), in which case
+ the core code will perform a sequential scan of the entire relation.
+ Such a scan can use synchronized scanning, so that the sampling method
+ cannot assume that the relation pages are visited in the same order on
+ each scan.
</para>
<para>
<programlisting>
-bool
-tsm_examinetuple (TableSampleDesc *desc, BlockNumber blockno,
- HeapTuple tuple, bool visible);
+OffsetNumber
+NextSampleTuple (SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset);
</programlisting>
- Function that enables the sampling method to examine contents of the tuple
- (for example to collect some internal statistics). The return value of this
- function is used to determine if the tuple should be returned to client.
- Note that this function will receive even invisible tuples but it is not
- allowed to return true for such tuple (if it does,
- <productname>PostgreSQL</> will raise an error).
+
+ Returns the offset number of the next tuple to be sampled on the
+ specified page, or <literal>InvalidOffsetNumber</> if no tuples remain to
+ be sampled. <literal>maxoffset</> is the largest offset number in use
+ on the page.
</para>
+ <note>
+ <para>
+ <function>NextSampleTuple</> is not explicitly told which of the offset
+ numbers in the range <literal>1 .. maxoffset</> actually contain valid
+ tuples. This is not normally a problem since the core code ignores
+ requests to sample missing or invisible tuples; that should not result in
+ any bias in the sample. However, if necessary, the function can
+ examine <literal>node->ss.ss_currentScanDesc->rs_vistuples[]</>
+ to identify which tuples are valid and visible. (This
+ requires <literal>node->use_pagemode</> to be <literal>true</>.)
+ </para>
+ </note>
+
+ <note>
+ <para>
+ <function>NextSampleTuple</> must <emphasis>not</> assume
+ that <literal>blockno</> is the same page number returned by the most
+ recent <function>NextSampleBlock</> call. It was returned by some
+ previous <function>NextSampleBlock</> call, but the core code is allowed
+ to call <function>NextSampleBlock</> in advance of actually scanning
+ pages, so as to support prefetching. It is OK to assume that once
+ sampling of a given page begins, successive <function>NextSampleTuple</>
+ calls all refer to the same page until <literal>InvalidOffsetNumber</> is
+ returned.
+ </para>
+ </note>
+
<para>
- As you can see most of the tablesample method interfaces get the
- <structname>TableSampleDesc</> as a first parameter. This structure holds
- state of the current scan and also provides storage for the tablesample
- method's state. It is defined as following:
<programlisting>
-typedef struct TableSampleDesc {
- HeapScanDesc heapScan;
- TupleDesc tupDesc;
-
- void *tsmdata;
-} TableSampleDesc;
+void
+EndSampleScan (SampleScanState *node);
</programlisting>
- Where <structfield>heapScan</> is the descriptor of the physical table scan.
- It's possible to get table size info from it. The <structfield>tupDesc</>
- represents the tuple descriptor of the tuples returned by the scan and passed
- to the <function>tsm_examinetuple()</> interface. The <structfield>tsmdata</>
- can be used by tablesample method itself to store any state info it might
- need during the scan. If used by the method, it should be <function>pfree</>d
- in <function>tsm_end()</> function.
+
+ End the scan and release resources. It is normally not important
+ to release palloc'd memory, but any externally-visible resources
+ should be cleaned up.
+ This function can be omitted (set the pointer to NULL) in the common
+ case where no such resources exist.
</para>
+
</sect1>
</chapter>
</indexterm>
<para>
- The <filename>tsm_system_rows</> module provides the tablesample method
- <literal>SYSTEM_ROWS</literal>, which can be used inside the
- <command>TABLESAMPLE</command> clause of a <command>SELECT</command>.
+ The <filename>tsm_system_rows</> module provides the table sampling method
+ <literal>SYSTEM_ROWS</literal>, which can be used in
+ the <literal>TABLESAMPLE</> clause of a <xref linkend="sql-select">
+ command.
</para>
<para>
- This tablesample method uses a linear probing algorithm to read sample
- of a table and uses actual number of rows as limit (unlike the
- <literal>SYSTEM</literal> tablesample method which limits by percentage
- of a table).
+ This table sampling method accepts a single integer argument that is the
+ maximum number of rows to read. The resulting sample will always contain
+ exactly that many rows, unless the table does not contain enough rows, in
+ which case the whole table is selected.
+ </para>
+
+ <para>
+ Like the built-in <literal>SYSTEM</literal> sampling
+ method, <literal>SYSTEM_ROWS</literal> performs block-level sampling, so
+ that the sample is not completely random but may be subject to clustering
+ effects, especially if only a small number of rows are requested.
+ </para>
+
+ <para>
+ <literal>SYSTEM_ROWS</literal> does not support
+ the <literal>REPEATABLE</literal> clause.
</para>
<sect2>
<title>Examples</title>
<para>
- Here is an example of selecting sample of a table with
- <literal>SYSTEM_ROWS</>. First install the extension:
+ Here is an example of selecting a sample of a table with
+ <literal>SYSTEM_ROWS</>. First install the extension:
</para>
<programlisting>
</programlisting>
<para>
- Then you can use it in <command>SELECT</command> command same way as other
- tablesample methods:
+ Then you can use it in a <command>SELECT</command> command, for instance:
<programlisting>
SELECT * FROM my_table TABLESAMPLE SYSTEM_ROWS(100);
</para>
<para>
- The above command will return a sample of 100 rows from the table my_table
- (less if the table does not have 100 visible rows).
+ This command will return a sample of 100 rows from the
+ table <structname>my_table</> (unless the table does not have 100
+ visible rows, in which case all its rows are returned).
</para>
</sect2>
</indexterm>
<para>
- The <filename>tsm_system_time</> module provides the tablesample method
- <literal>SYSTEM_TIME</literal>, which can be used inside the
- <command>TABLESAMPLE</command> clause of a <command>SELECT</command>.
+ The <filename>tsm_system_time</> module provides the table sampling method
+ <literal>SYSTEM_TIME</literal>, which can be used in
+ the <literal>TABLESAMPLE</> clause of a <xref linkend="sql-select">
+ command.
</para>
<para>
- This tablesample method uses a linear probing algorithm to read sample
- of a table and uses time in milliseconds as limit (unlike the
- <literal>SYSTEM</literal> tablesample method which limits by percentage
- of a table). This gives you some control over the length of execution
- of your query.
+ This table sampling method accepts a single floating-point argument that
+ is the maximum number of milliseconds to spend reading the table. This
+ gives you direct control over how long the query takes, at the price that
+ the size of the sample becomes hard to predict. The resulting sample will
+ contain as many rows as could be read in the specified time, unless the
+ whole table has been read first.
+ </para>
+
+ <para>
+ Like the built-in <literal>SYSTEM</literal> sampling
+ method, <literal>SYSTEM_TIME</literal> performs block-level sampling, so
+ that the sample is not completely random but may be subject to clustering
+ effects, especially if only a small number of rows are selected.
+ </para>
+
+ <para>
+ <literal>SYSTEM_TIME</literal> does not support
+ the <literal>REPEATABLE</literal> clause.
</para>
<sect2>
<title>Examples</title>
<para>
- Here is an example of selecting sample of a table with
- <literal>SYSTEM_TIME</>. First install the extension:
+ Here is an example of selecting a sample of a table with
+ <literal>SYSTEM_TIME</>. First install the extension:
</para>
<programlisting>
</programlisting>
<para>
- Then you can use it in a <command>SELECT</command> command the same way as
- other tablesample methods:
+ Then you can use it in a <command>SELECT</command> command, for instance:
<programlisting>
SELECT * FROM my_table TABLESAMPLE SYSTEM_TIME(1000);
</para>
<para>
- The above command will return as large a sample of my_table as it can read in
- 1 second (or less if it reads whole table faster).
+ This command will return as large a sample of <structname>my_table</> as
+ it can read in 1 second (1000 milliseconds). Of course, if the whole
+ table can be read in under 1 second, all its rows will be returned.
</para>
</sect2>
static HeapScanDesc heap_beginscan_internal(Relation relation,
Snapshot snapshot,
int nkeys, ScanKey key,
- bool allow_strat, bool allow_sync, bool allow_pagemode,
- bool is_bitmapscan, bool is_samplescan,
+ bool allow_strat,
+ bool allow_sync,
+ bool allow_pagemode,
+ bool is_bitmapscan,
+ bool is_samplescan,
bool temp_snap);
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
TransactionId xid, CommandId cid, int options);
* ----------------
*/
static void
-initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
+initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
{
bool allow_strat;
bool allow_sync;
scan->rs_strategy = NULL;
}
- if (is_rescan)
+ if (keep_startblock)
{
/*
- * If rescan, keep the previous startblock setting so that rewinding a
- * cursor doesn't generate surprising results. Reset the syncscan
- * setting, though.
+ * When rescanning, we want to keep the previous startblock setting,
+ * so that rewinding a cursor doesn't generate surprising results.
+ * Reset the active syncscan setting, though.
*/
scan->rs_syncscan = (allow_sync && synchronize_seqscans);
}
/* ----------------
* heap_beginscan - begin relation scan
*
+ * heap_beginscan is the "standard" case.
+ *
+ * heap_beginscan_catalog differs in setting up its own temporary snapshot.
+ *
* heap_beginscan_strat offers an extended API that lets the caller control
* whether a nondefault buffer access strategy can be used, and whether
* syncscan can be chosen (possibly resulting in the scan not starting from
* really quite unlike a standard seqscan, there is just enough commonality
* to make it worth using the same data structure.
*
- * heap_beginscan_samplingscan is alternate entry point for setting up a
- * HeapScanDesc for a TABLESAMPLE scan.
+ * heap_beginscan_sampling is an alternative entry point for setting up a
+ * HeapScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth
+ * using the same data structure although the behavior is rather different.
+ * In addition to the options offered by heap_beginscan_strat, this call
+ * also allows control of whether page-mode visibility checking is used.
* ----------------
*/
HeapScanDesc
HeapScanDesc
heap_beginscan_sampling(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key,
- bool allow_strat, bool allow_pagemode)
+ bool allow_strat, bool allow_sync, bool allow_pagemode)
{
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- allow_strat, false, allow_pagemode,
+ allow_strat, allow_sync, allow_pagemode,
false, true, false);
}
static HeapScanDesc
heap_beginscan_internal(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key,
- bool allow_strat, bool allow_sync, bool allow_pagemode,
- bool is_bitmapscan, bool is_samplescan, bool temp_snap)
+ bool allow_strat,
+ bool allow_sync,
+ bool allow_pagemode,
+ bool is_bitmapscan,
+ bool is_samplescan,
+ bool temp_snap)
{
HeapScanDesc scan;
initscan(scan, key, true);
}
+/* ----------------
+ * heap_rescan_set_params - restart a relation scan after changing params
+ *
+ * This call allows changing the buffer strategy, syncscan, and pagemode
+ * options before starting a fresh scan. Note that although the actual use
+ * of syncscan might change (effectively, enabling or disabling reporting),
+ * the previously selected startblock will be kept.
+ * ----------------
+ */
+void
+heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
+ bool allow_strat, bool allow_sync, bool allow_pagemode)
+{
+ /* adjust parameters */
+ scan->rs_allow_strat = allow_strat;
+ scan->rs_allow_sync = allow_sync;
+ scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot);
+ /* ... and rescan */
+ heap_rescan(scan, key);
+}
+
/* ----------------
* heap_endscan - end relation scan
*
#-------------------------------------------------------------------------
#
# Makefile--
-# Makefile for utils/tablesample
+# Makefile for access/tablesample
#
# IDENTIFICATION
-# src/backend/utils/tablesample/Makefile
+# src/backend/access/tablesample/Makefile
#
#-------------------------------------------------------------------------
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = tablesample.o system.o bernoulli.o
+OBJS = bernoulli.o system.o tablesample.o
include $(top_srcdir)/src/backend/common.mk
/*-------------------------------------------------------------------------
*
* bernoulli.c
- * interface routines for BERNOULLI tablesample method
+ * support routines for BERNOULLI tablesample method
*
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * To ensure repeatability of samples, it is necessary that selection of a
+ * given tuple be history-independent; otherwise syncscanning would break
+ * repeatability, to say nothing of logically-irrelevant maintenance such
+ * as physical extension or shortening of the relation.
+ *
+ * To achieve that, we proceed by hashing each candidate TID together with
+ * the active seed, and then selecting it if the hash is less than the
+ * cutoff value computed from the selection probability by BeginSampleScan.
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * src/backend/utils/tablesample/bernoulli.c
+ * src/backend/access/tablesample/bernoulli.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h> /* for _isnan */
+#endif
+#include <math.h>
-#include "access/tablesample.h"
-#include "access/relscan.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
+#include "access/hash.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
#include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
-#include "utils/sampling.h"
+#include "optimizer/cost.h"
+#include "utils/builtins.h"
-/* tsdesc */
+/* Private state */
typedef struct
{
+ uint64 cutoff; /* select tuples with hash less than this */
uint32 seed; /* random seed */
- BlockNumber startblock; /* starting block, we use ths for syncscan
- * support */
- BlockNumber nblocks; /* number of blocks */
- BlockNumber blockno; /* current block */
- float4 probability; /* probabilty that tuple will be returned
- * (0.0-1.0) */
OffsetNumber lt; /* last tuple returned from current block */
- SamplerRandomState randstate; /* random generator tsdesc */
} BernoulliSamplerData;
+
+static void bernoulli_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples);
+static void bernoulli_initsamplescan(SampleScanState *node,
+ int eflags);
+static void bernoulli_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed);
+static OffsetNumber bernoulli_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset);
+
+
/*
- * Initialize the state.
+ * Create a TsmRoutine descriptor for the BERNOULLI method.
*/
Datum
-tsm_bernoulli_init(PG_FUNCTION_ARGS)
+tsm_bernoulli_handler(PG_FUNCTION_ARGS)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- uint32 seed = PG_GETARG_UINT32(1);
- float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
- HeapScanDesc scan = tsdesc->heapScan;
- BernoulliSamplerData *sampler;
+ TsmRoutine *tsm = makeNode(TsmRoutine);
+
+ tsm->parameterTypes = list_make1_oid(FLOAT4OID);
+ tsm->repeatable_across_queries = true;
+ tsm->repeatable_across_scans = true;
+ tsm->SampleScanGetSampleSize = bernoulli_samplescangetsamplesize;
+ tsm->InitSampleScan = bernoulli_initsamplescan;
+ tsm->BeginSampleScan = bernoulli_beginsamplescan;
+ tsm->NextSampleBlock = NULL;
+ tsm->NextSampleTuple = bernoulli_nextsampletuple;
+ tsm->EndSampleScan = NULL;
+
+ PG_RETURN_POINTER(tsm);
+}
- if (percent < 0 || percent > 100)
- ereport(ERROR,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("invalid sample size"),
- errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
+/*
+ * Sample size estimation.
+ */
+static void
+bernoulli_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples)
+{
+ Node *pctnode;
+ float4 samplefract;
- sampler = palloc0(sizeof(BernoulliSamplerData));
+ /* Try to extract an estimate for the sample percentage */
+ pctnode = (Node *) linitial(paramexprs);
+ pctnode = estimate_expression_value(root, pctnode);
- /* Remember initial values for reinit */
- sampler->seed = seed;
- sampler->startblock = scan->rs_startblock;
- sampler->nblocks = scan->rs_nblocks;
- sampler->blockno = InvalidBlockNumber;
- sampler->probability = percent / 100;
- sampler->lt = InvalidOffsetNumber;
- sampler_random_init_state(sampler->seed, sampler->randstate);
+ if (IsA(pctnode, Const) &&
+ !((Const *) pctnode)->constisnull)
+ {
+ samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
+ samplefract /= 100.0f;
+ else
+ {
+ /* Default samplefract if the value is bogus */
+ samplefract = 0.1f;
+ }
+ }
+ else
+ {
+ /* Default samplefract if we didn't obtain a non-null Const */
+ samplefract = 0.1f;
+ }
+
+ /* We'll visit all pages of the baserel */
+ *pages = baserel->pages;
- tsdesc->tsmdata = (void *) sampler;
+ *tuples = clamp_row_est(baserel->tuples * samplefract);
+}
- PG_RETURN_VOID();
+/*
+ * Initialize during executor setup.
+ */
+static void
+bernoulli_initsamplescan(SampleScanState *node, int eflags)
+{
+ node->tsm_state = palloc0(sizeof(BernoulliSamplerData));
}
/*
- * Get next block number to read or InvalidBlockNumber if we are at the
- * end of the relation.
+ * Examine parameters and prepare for a sample scan.
*/
-Datum
-tsm_bernoulli_nextblock(PG_FUNCTION_ARGS)
+static void
+bernoulli_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
+ BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
+ double percent = DatumGetFloat4(params[0]);
+
+ if (percent < 0 || percent > 100 || isnan(percent))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+ errmsg("sample percentage must be between 0 and 100")));
/*
- * Bernoulli sampling scans all blocks on the table and supports syncscan
- * so loop from startblock to startblock instead of from 0 to nblocks.
+ * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
+ * store that as a uint64, of course. Note that this gives strictly
+ * correct behavior at the limits of zero or one probability.
*/
- if (sampler->blockno == InvalidBlockNumber)
- sampler->blockno = sampler->startblock;
- else
- {
- sampler->blockno++;
-
- if (sampler->blockno >= sampler->nblocks)
- sampler->blockno = 0;
-
- if (sampler->blockno == sampler->startblock)
- PG_RETURN_UINT32(InvalidBlockNumber);
- }
+ sampler->cutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
+ sampler->seed = seed;
+ sampler->lt = InvalidOffsetNumber;
- PG_RETURN_UINT32(sampler->blockno);
+ /*
+ * Use bulkread, since we're scanning all pages. But pagemode visibility
+ * checking is a win only at larger sampling fractions. The 25% cutoff
+ * here is based on very limited experimentation.
+ */
+ node->use_bulkread = true;
+ node->use_pagemode = (percent >= 25);
}
/*
- * Get next tuple from current block.
- *
- * This method implements the main logic in bernoulli sampling.
- * The algorithm simply generates new random number (in 0.0-1.0 range) and if
- * it falls within user specified probability (in the same range) return the
- * tuple offset.
- *
- * It is ok here to return tuple offset without knowing if tuple is visible
- * and not check it via examinetuple. The reason for that is that we do the
- * coinflip (random number generation) for every tuple in the table. Since all
- * tuples have same probability of being returned the visible and invisible
- * tuples will be returned in same ratio as they have in the actual table.
- * This means that there is no skew towards either visible or invisible tuples
- * and the number of visible tuples returned from the executor node should
- * match the fraction of visible tuples which was specified by user.
+ * Select next sampled tuple in current block.
*
- * This is faster than doing the coinflip in examinetuple because we don't
- * have to do visibility checks on uninteresting tuples.
+ * It is OK here to return an offset without knowing if the tuple is visible
+ * (or even exists). The reason is that we do the coinflip for every tuple
+ * offset in the table. Since all tuples have the same probability of being
+ * returned, it doesn't matter if we do extra coinflips for invisible tuples.
*
- * If we reach end of the block return InvalidOffsetNumber which tells
+ * When we reach end of the block, return InvalidOffsetNumber which tells
* SampleScan to go to next block.
*/
-Datum
-tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS)
+static OffsetNumber
+bernoulli_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- OffsetNumber maxoffset = PG_GETARG_UINT16(2);
- BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
+ BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
OffsetNumber tupoffset = sampler->lt;
- float4 probability = sampler->probability;
+ uint32 hashinput[3];
+ /* Advance to first/next tuple in block */
if (tupoffset == InvalidOffsetNumber)
tupoffset = FirstOffsetNumber;
else
tupoffset++;
/*
- * Loop over tuple offsets until the random generator returns value that
- * is within the probability of returning the tuple or until we reach end
- * of the block.
+ * We compute the hash by applying hash_any to an array of 3 uint32's
+ * containing the block, offset, and seed. This is efficient to set up,
+ * and with the current implementation of hash_any, it gives
+ * machine-independent results, which is a nice property for regression
+ * testing.
*
- * (This is our implementation of bernoulli trial)
+ * These words in the hash input are the same throughout the block:
*/
- while (sampler_random_fract(sampler->randstate) > probability)
+ hashinput[0] = blockno;
+ hashinput[2] = sampler->seed;
+
+ /*
+ * Loop over tuple offsets until finding suitable TID or reaching end of
+ * block.
+ */
+ for (; tupoffset <= maxoffset; tupoffset++)
{
- tupoffset++;
+ uint32 hash;
- if (tupoffset > maxoffset)
+ hashinput[1] = tupoffset;
+
+ hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
+ (int) sizeof(hashinput)));
+ if (hash < sampler->cutoff)
break;
}
if (tupoffset > maxoffset)
- /* Tell SampleScan that we want next block. */
tupoffset = InvalidOffsetNumber;
sampler->lt = tupoffset;
- PG_RETURN_UINT16(tupoffset);
-}
-
-/*
- * Cleanup method.
- */
-Datum
-tsm_bernoulli_end(PG_FUNCTION_ARGS)
-{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-
- pfree(tsdesc->tsmdata);
-
- PG_RETURN_VOID();
-}
-
-/*
- * Reset tsdesc (called by ReScan).
- */
-Datum
-tsm_bernoulli_reset(PG_FUNCTION_ARGS)
-{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
-
- sampler->blockno = InvalidBlockNumber;
- sampler->lt = InvalidOffsetNumber;
- sampler_random_init_state(sampler->seed, sampler->randstate);
-
- PG_RETURN_VOID();
-}
-
-/*
- * Costing function.
- */
-Datum
-tsm_bernoulli_cost(PG_FUNCTION_ARGS)
-{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- Path *path = (Path *) PG_GETARG_POINTER(1);
- RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
- List *args = (List *) PG_GETARG_POINTER(3);
- BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
- double *tuples = (double *) PG_GETARG_POINTER(5);
- Node *pctnode;
- float4 samplesize;
-
- *pages = baserel->pages;
-
- pctnode = linitial(args);
- pctnode = estimate_expression_value(root, pctnode);
-
- if (IsA(pctnode, RelabelType))
- pctnode = (Node *) ((RelabelType *) pctnode)->arg;
-
- if (IsA(pctnode, Const))
- {
- samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
- samplesize /= 100.0;
- }
- else
- {
- /* Default samplesize if the estimation didn't return Const. */
- samplesize = 0.1f;
- }
-
- *tuples = path->rows * samplesize;
- path->rows = *tuples;
-
- PG_RETURN_VOID();
+ return tupoffset;
}
/*-------------------------------------------------------------------------
*
* system.c
- * interface routines for system tablesample method
+ * support routines for SYSTEM tablesample method
*
+ * To ensure repeatability of samples, it is necessary that selection of a
+ * given tuple be history-independent; otherwise syncscanning would break
+ * repeatability, to say nothing of logically-irrelevant maintenance such
+ * as physical extension or shortening of the relation.
*
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * To achieve that, we proceed by hashing each candidate block number together
+ * with the active seed, and then selecting it if the hash is less than the
+ * cutoff value computed from the selection probability by BeginSampleScan.
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * src/backend/utils/tablesample/system.c
+ * src/backend/access/tablesample/system.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h> /* for _isnan */
+#endif
+#include <math.h>
-#include "access/tablesample.h"
+#include "access/hash.h"
#include "access/relscan.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
#include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
-#include "utils/sampling.h"
+#include "optimizer/cost.h"
+#include "utils/builtins.h"
-/*
- * State
- */
+/* Private state */
typedef struct
{
- BlockSamplerData bs;
+ uint64 cutoff; /* select blocks with hash less than this */
uint32 seed; /* random seed */
- BlockNumber nblocks; /* number of block in relation */
- int samplesize; /* number of blocks to return */
+ BlockNumber nextblock; /* next block to consider sampling */
OffsetNumber lt; /* last tuple returned from current block */
} SystemSamplerData;
-/*
- * Initializes the state.
- */
-Datum
-tsm_system_init(PG_FUNCTION_ARGS)
-{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- uint32 seed = PG_GETARG_UINT32(1);
- float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
- HeapScanDesc scan = tsdesc->heapScan;
- SystemSamplerData *sampler;
+static void system_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples);
+static void system_initsamplescan(SampleScanState *node,
+ int eflags);
+static void system_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed);
+static BlockNumber system_nextsampleblock(SampleScanState *node);
+static OffsetNumber system_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset);
- if (percent < 0 || percent > 100)
- ereport(ERROR,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("invalid sample size"),
- errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
-
- sampler = palloc0(sizeof(SystemSamplerData));
-
- /* Remember initial values for reinit */
- sampler->seed = seed;
- sampler->nblocks = scan->rs_nblocks;
- sampler->samplesize = 1 + (int) (sampler->nblocks * (percent / 100.0));
- sampler->lt = InvalidOffsetNumber;
-
- BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
- sampler->seed);
-
- tsdesc->tsmdata = (void *) sampler;
-
- PG_RETURN_VOID();
-}
/*
- * Get next block number or InvalidBlockNumber when we're done.
- *
- * Uses the same logic as ANALYZE for picking the random blocks.
+ * Create a TsmRoutine descriptor for the SYSTEM method.
*/
Datum
-tsm_system_nextblock(PG_FUNCTION_ARGS)
+tsm_system_handler(PG_FUNCTION_ARGS)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
- BlockNumber blockno;
-
- if (!BlockSampler_HasMore(&sampler->bs))
- PG_RETURN_UINT32(InvalidBlockNumber);
-
- blockno = BlockSampler_Next(&sampler->bs);
-
- PG_RETURN_UINT32(blockno);
+ TsmRoutine *tsm = makeNode(TsmRoutine);
+
+ tsm->parameterTypes = list_make1_oid(FLOAT4OID);
+ tsm->repeatable_across_queries = true;
+ tsm->repeatable_across_scans = true;
+ tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
+ tsm->InitSampleScan = system_initsamplescan;
+ tsm->BeginSampleScan = system_beginsamplescan;
+ tsm->NextSampleBlock = system_nextsampleblock;
+ tsm->NextSampleTuple = system_nextsampletuple;
+ tsm->EndSampleScan = NULL;
+
+ PG_RETURN_POINTER(tsm);
}
/*
- * Get next tuple offset in current block or InvalidOffsetNumber if we are done
- * with this block.
+ * Sample size estimation.
*/
-Datum
-tsm_system_nexttuple(PG_FUNCTION_ARGS)
+static void
+system_samplescangetsamplesize(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- OffsetNumber maxoffset = PG_GETARG_UINT16(2);
- SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
- OffsetNumber tupoffset = sampler->lt;
+ Node *pctnode;
+ float4 samplefract;
- if (tupoffset == InvalidOffsetNumber)
- tupoffset = FirstOffsetNumber;
- else
- tupoffset++;
+ /* Try to extract an estimate for the sample percentage */
+ pctnode = (Node *) linitial(paramexprs);
+ pctnode = estimate_expression_value(root, pctnode);
- if (tupoffset > maxoffset)
- tupoffset = InvalidOffsetNumber;
+ if (IsA(pctnode, Const) &&
+ !((Const *) pctnode)->constisnull)
+ {
+ samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
+ samplefract /= 100.0f;
+ else
+ {
+ /* Default samplefract if the value is bogus */
+ samplefract = 0.1f;
+ }
+ }
+ else
+ {
+ /* Default samplefract if we didn't obtain a non-null Const */
+ samplefract = 0.1f;
+ }
- sampler->lt = tupoffset;
+ /* We'll visit a sample of the pages ... */
+ *pages = clamp_row_est(baserel->pages * samplefract);
- PG_RETURN_UINT16(tupoffset);
+ /* ... and hopefully get a representative number of tuples from them */
+ *tuples = clamp_row_est(baserel->tuples * samplefract);
}
/*
- * Cleanup method.
+ * Initialize during executor setup.
*/
-Datum
-tsm_system_end(PG_FUNCTION_ARGS)
+static void
+system_initsamplescan(SampleScanState *node, int eflags)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-
- pfree(tsdesc->tsmdata);
-
- PG_RETURN_VOID();
+ node->tsm_state = palloc0(sizeof(SystemSamplerData));
}
/*
- * Reset state (called by ReScan).
+ * Examine parameters and prepare for a sample scan.
*/
-Datum
-tsm_system_reset(PG_FUNCTION_ARGS)
+static void
+system_beginsamplescan(SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed)
{
- TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
- SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+ SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+ double percent = DatumGetFloat4(params[0]);
+ if (percent < 0 || percent > 100 || isnan(percent))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+ errmsg("sample percentage must be between 0 and 100")));
+
+ /*
+ * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
+ * store that as a uint64, of course. Note that this gives strictly
+ * correct behavior at the limits of zero or one probability.
+ */
+ sampler->cutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
+ sampler->seed = seed;
+ sampler->nextblock = 0;
sampler->lt = InvalidOffsetNumber;
- BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
- sampler->seed);
- PG_RETURN_VOID();
+ /*
+ * Bulkread buffer access strategy probably makes sense unless we're
+ * scanning a very small fraction of the table. The 1% cutoff here is a
+ * guess. We should use pagemode visibility checking, since we scan all
+ * tuples on each selected page.
+ */
+ node->use_bulkread = (percent >= 1);
+ node->use_pagemode = true;
}
/*
- * Costing function.
+ * Select next block to sample.
*/
-Datum
-tsm_system_cost(PG_FUNCTION_ARGS)
+static BlockNumber
+system_nextsampleblock(SampleScanState *node)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- Path *path = (Path *) PG_GETARG_POINTER(1);
- RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
- List *args = (List *) PG_GETARG_POINTER(3);
- BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
- double *tuples = (double *) PG_GETARG_POINTER(5);
- Node *pctnode;
- float4 samplesize;
+ SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+ HeapScanDesc scan = node->ss.ss_currentScanDesc;
+ BlockNumber nextblock = sampler->nextblock;
+ uint32 hashinput[2];
+
+ /*
+ * We compute the hash by applying hash_any to an array of 2 uint32's
+ * containing the block number and seed. This is efficient to set up, and
+ * with the current implementation of hash_any, it gives
+ * machine-independent results, which is a nice property for regression
+ * testing.
+ *
+ * These words in the hash input are the same throughout the block:
+ */
+ hashinput[1] = sampler->seed;
+
+ /*
+ * Loop over block numbers until finding suitable block or reaching end of
+ * relation.
+ */
+ for (; nextblock < scan->rs_nblocks; nextblock++)
+ {
+ uint32 hash;
- pctnode = linitial(args);
- pctnode = estimate_expression_value(root, pctnode);
+ hashinput[0] = nextblock;
- if (IsA(pctnode, RelabelType))
- pctnode = (Node *) ((RelabelType *) pctnode)->arg;
+ hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
+ (int) sizeof(hashinput)));
+ if (hash < sampler->cutoff)
+ break;
+ }
- if (IsA(pctnode, Const))
+ if (nextblock < scan->rs_nblocks)
{
- samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
- samplesize /= 100.0;
+ /* Found a suitable block; remember where we should start next time */
+ sampler->nextblock = nextblock + 1;
+ return nextblock;
}
+
+ /* Done, but let's reset nextblock to 0 for safety. */
+ sampler->nextblock = 0;
+ return InvalidBlockNumber;
+}
+
+/*
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * It is OK here to return an offset without knowing if the tuple is visible
+ * (or even exists); nodeSamplescan.c will deal with that.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+static OffsetNumber
+system_nextsampletuple(SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset)
+{
+ SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+ OffsetNumber tupoffset = sampler->lt;
+
+ /* Advance to next possible offset on page */
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
else
- {
- /* Default samplesize if the estimation didn't return Const. */
- samplesize = 0.1f;
- }
+ tupoffset++;
- *pages = baserel->pages * samplesize;
- *tuples = path->rows * samplesize;
- path->rows = *tuples;
+ /* Done? */
+ if (tupoffset > maxoffset)
+ tupoffset = InvalidOffsetNumber;
+
+ sampler->lt = tupoffset;
- PG_RETURN_VOID();
+ return tupoffset;
}
/*-------------------------------------------------------------------------
*
* tablesample.c
- * TABLESAMPLE internal API
+ * Support functions for TABLESAMPLE feature
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
* IDENTIFICATION
* src/backend/access/tablesample/tablesample.c
*
- * TABLESAMPLE is the SQL standard clause for sampling the relations.
- *
- * The API is interface between the Executor and the TABLESAMPLE Methods.
- *
- * TABLESAMPLE Methods are implementations of actual sampling algorithms which
- * can be used for returning a sample of the source relation.
- * Methods don't read the table directly but are asked for block number and
- * tuple offset which they want to examine (or return) and the tablesample
- * interface implemented here does the reading for them.
- *
- * We currently only support sampling of the physical relations, but in the
- * future we might extend the API to support subqueries as well.
- *
* -------------------------------------------------------------------------
*/
#include "postgres.h"
-#include "access/tablesample.h"
-
-#include "catalog/pg_tablesample_method.h"
-#include "miscadmin.h"
-#include "pgstat.h"
-#include "storage/bufmgr.h"
-#include "storage/predicate.h"
-#include "utils/rel.h"
-#include "utils/tqual.h"
-
-
-static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan);
-
-
-/*
- * Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method.
- */
-TableSampleDesc *
-tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample)
-{
- FunctionCallInfoData fcinfo;
- int i;
- List *args = tablesample->args;
- ListCell *arg;
- ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
- TableSampleDesc *tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc));
-
- /* Load functions */
- fmgr_info(tablesample->tsminit, &(tsdesc->tsminit));
- fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock));
- fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple));
- if (OidIsValid(tablesample->tsmexaminetuple))
- fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple));
- else
- tsdesc->tsmexaminetuple.fn_oid = InvalidOid;
- fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset));
- fmgr_info(tablesample->tsmend, &(tsdesc->tsmend));
-
- InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit,
- list_length(args) + 2,
- InvalidOid, NULL, NULL);
-
- tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
- tsdesc->heapScan = scanstate->ss.ss_currentScanDesc;
-
- /* First argument for init function is always TableSampleDesc */
- fcinfo.arg[0] = PointerGetDatum(tsdesc);
- fcinfo.argnull[0] = false;
+#include "access/tsmapi.h"
- /*
- * Second arg for init function is always REPEATABLE.
- *
- * If tablesample->repeatable is NULL then REPEATABLE clause was not
- * specified, and we insert a random value as default.
- *
- * When specified, the expression cannot evaluate to NULL.
- */
- if (tablesample->repeatable)
- {
- ExprState *argstate = ExecInitExpr((Expr *) tablesample->repeatable,
- (PlanState *) scanstate);
-
- fcinfo.arg[1] = ExecEvalExpr(argstate, econtext,
- &fcinfo.argnull[1], NULL);
- if (fcinfo.argnull[1])
- ereport(ERROR,
- (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
- errmsg("REPEATABLE clause must be NOT NULL numeric value")));
- }
- else
- {
- fcinfo.arg[1] = UInt32GetDatum(random());
- fcinfo.argnull[1] = false;
- }
-
- /* Rest of the arguments come from user. */
- i = 2;
- foreach(arg, args)
- {
- Expr *argexpr = (Expr *) lfirst(arg);
- ExprState *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate);
-
- fcinfo.arg[i] = ExecEvalExpr(argstate, econtext,
- &fcinfo.argnull[i], NULL);
- i++;
- }
- Assert(i == fcinfo.nargs);
-
- (void) FunctionCallInvoke(&fcinfo);
-
- return tsdesc;
-}
/*
- * Get next tuple from TABLESAMPLE Method.
- */
-HeapTuple
-tablesample_getnext(TableSampleDesc *desc)
-{
- HeapScanDesc scan = desc->heapScan;
- HeapTuple tuple = &(scan->rs_ctup);
- bool pagemode = scan->rs_pageatatime;
- BlockNumber blockno;
- Page page;
- bool page_all_visible;
- ItemId itemid;
- OffsetNumber tupoffset,
- maxoffset;
-
- if (!scan->rs_inited)
- {
- /*
- * return null immediately if relation is empty
- */
- if (scan->rs_nblocks == 0)
- {
- Assert(!BufferIsValid(scan->rs_cbuf));
- tuple->t_data = NULL;
- return NULL;
- }
- blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
- PointerGetDatum(desc)));
- if (!BlockNumberIsValid(blockno))
- {
- tuple->t_data = NULL;
- return NULL;
- }
-
- heapgetpage(scan, blockno);
- scan->rs_inited = true;
- }
- else
- {
- /* continue from previously returned page/tuple */
- blockno = scan->rs_cblock; /* current page */
- }
-
- /*
- * When pagemode is disabled, the scan will do visibility checks for each
- * tuple it finds so the buffer needs to be locked.
- */
- if (!pagemode)
- LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-
- page = (Page) BufferGetPage(scan->rs_cbuf);
- page_all_visible = PageIsAllVisible(page);
- maxoffset = PageGetMaxOffsetNumber(page);
-
- for (;;)
- {
- CHECK_FOR_INTERRUPTS();
-
- tupoffset = DatumGetUInt16(FunctionCall3(&desc->tsmnexttuple,
- PointerGetDatum(desc),
- UInt32GetDatum(blockno),
- UInt16GetDatum(maxoffset)));
-
- if (OffsetNumberIsValid(tupoffset))
- {
- bool visible;
- bool found;
-
- /* Skip invalid tuple pointers. */
- itemid = PageGetItemId(page, tupoffset);
- if (!ItemIdIsNormal(itemid))
- continue;
-
- tuple->t_data = (HeapTupleHeader) PageGetItem((Page) page, itemid);
- tuple->t_len = ItemIdGetLength(itemid);
- ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
-
- if (page_all_visible)
- visible = true;
- else
- visible = SampleTupleVisible(tuple, tupoffset, scan);
-
- /*
- * Let the sampling method examine the actual tuple and decide if
- * we should return it.
- *
- * Note that we let it examine even invisible tuples for
- * statistical purposes, but not return them since user should
- * never see invisible tuples.
- */
- if (OidIsValid(desc->tsmexaminetuple.fn_oid))
- {
- found = DatumGetBool(FunctionCall4(&desc->tsmexaminetuple,
- PointerGetDatum(desc),
- UInt32GetDatum(blockno),
- PointerGetDatum(tuple),
- BoolGetDatum(visible)));
- /* Should not happen if sampling method is well written. */
- if (found && !visible)
- elog(ERROR, "Sampling method wanted to return invisible tuple");
- }
- else
- found = visible;
-
- /* Found visible tuple, return it. */
- if (found)
- {
- if (!pagemode)
- LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
- break;
- }
- else
- {
- /* Try next tuple from same page. */
- continue;
- }
- }
-
-
- if (!pagemode)
- LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-
- blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
- PointerGetDatum(desc)));
-
- /*
- * Report our new scan position for synchronization purposes. We don't
- * do that when moving backwards, however. That would just mess up any
- * other forward-moving scanners.
- *
- * Note: we do this before checking for end of scan so that the final
- * state of the position hint is back at the start of the rel. That's
- * not strictly necessary, but otherwise when you run the same query
- * multiple times the starting position would shift a little bit
- * backwards on every invocation, which is confusing. We don't
- * guarantee any specific ordering in general, though.
- */
- if (scan->rs_syncscan)
- ss_report_location(scan->rs_rd, BlockNumberIsValid(blockno) ?
- blockno : scan->rs_startblock);
-
- /*
- * Reached end of scan.
- */
- if (!BlockNumberIsValid(blockno))
- {
- if (BufferIsValid(scan->rs_cbuf))
- ReleaseBuffer(scan->rs_cbuf);
- scan->rs_cbuf = InvalidBuffer;
- scan->rs_cblock = InvalidBlockNumber;
- tuple->t_data = NULL;
- scan->rs_inited = false;
- return NULL;
- }
-
- heapgetpage(scan, blockno);
-
- if (!pagemode)
- LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-
- page = (Page) BufferGetPage(scan->rs_cbuf);
- page_all_visible = PageIsAllVisible(page);
- maxoffset = PageGetMaxOffsetNumber(page);
- }
-
- pgstat_count_heap_getnext(scan->rs_rd);
-
- return &(scan->rs_ctup);
-}
-
-/*
- * Reset the sampling to starting state
- */
-void
-tablesample_reset(TableSampleDesc *desc)
-{
- (void) FunctionCall1(&desc->tsmreset, PointerGetDatum(desc));
-}
-
-/*
- * Signal the sampling method that the scan has finished.
- */
-void
-tablesample_end(TableSampleDesc *desc)
-{
- (void) FunctionCall1(&desc->tsmend, PointerGetDatum(desc));
-}
-
-/*
- * Check visibility of the tuple.
+ * GetTsmRoutine --- get a TsmRoutine struct by invoking the handler.
+ *
+ * This is a convenience routine that's just meant to check for errors.
*/
-static bool
-SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+TsmRoutine *
+GetTsmRoutine(Oid tsmhandler)
{
- /*
- * If this scan is reading whole pages at a time, there is already
- * visibility info present in rs_vistuples so we can just search it for
- * the tupoffset.
- */
- if (scan->rs_pageatatime)
- {
- int start = 0,
- end = scan->rs_ntuples - 1;
-
- /*
- * Do the binary search over rs_vistuples, it's already sorted by
- * OffsetNumber so we don't need to do any sorting ourselves here.
- *
- * We could use bsearch() here but it's slower for integers because of
- * the function call overhead and because it needs boiler plate code
- * it would not save us anything code-wise anyway.
- */
- while (start <= end)
- {
- int mid = start + (end - start) / 2;
- OffsetNumber curoffset = scan->rs_vistuples[mid];
-
- if (curoffset == tupoffset)
- return true;
- else if (curoffset > tupoffset)
- end = mid - 1;
- else
- start = mid + 1;
- }
-
- return false;
- }
- else
- {
- /* No pagemode, we have to check the tuple itself. */
- Snapshot snapshot = scan->rs_snapshot;
- Buffer buffer = scan->rs_cbuf;
+ Datum datum;
+ TsmRoutine *routine;
- bool visible = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
+ datum = OidFunctionCall1(tsmhandler, PointerGetDatum(NULL));
+ routine = (TsmRoutine *) DatumGetPointer(datum);
- CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, buffer,
- snapshot);
+ if (routine == NULL || !IsA(routine, TsmRoutine))
+ elog(ERROR, "tablesample handler function %u did not return a TsmRoutine struct",
+ tsmhandler);
- return visible;
- }
+ return routine;
}
pg_ts_parser.h pg_ts_template.h pg_extension.h \
pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \
pg_foreign_table.h pg_policy.h pg_replication_origin.h \
- pg_tablesample_method.h pg_default_acl.h pg_seclabel.h pg_shseclabel.h \
- pg_collation.h pg_range.h pg_transform.h toasting.h indexing.h \
+ pg_default_acl.h pg_seclabel.h pg_shseclabel.h \
+ pg_collation.h pg_range.h pg_transform.h \
+ toasting.h indexing.h \
)
# location of Catalog.pm
context->addrs);
}
}
+ else if (IsA(node, TableSampleClause))
+ {
+ TableSampleClause *tsc = (TableSampleClause *) node;
+
+ add_object_address(OCLASS_PROC, tsc->tsmhandler, 0,
+ context->addrs);
+ /* fall through to examine arguments */
+ }
return expression_tree_walker(node, find_expr_references_walker,
(void *) context);
List *ancestors, ExplainState *es);
static void show_sortorder_options(StringInfo buf, Node *sortexpr,
Oid sortOperator, Oid collation, bool nullsFirst);
+static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+ List *ancestors, ExplainState *es);
static void show_sort_info(SortState *sortstate, ExplainState *es);
static void show_hash_info(HashState *hashstate, ExplainState *es);
static void show_tidbitmap_info(BitmapHeapScanState *planstate,
static void ExplainSubPlans(List *plans, List *ancestors,
const char *relationship, ExplainState *es);
static void ExplainCustomChildren(CustomScanState *css,
- List *ancestors, ExplainState *es);
+ List *ancestors, ExplainState *es);
static void ExplainProperty(const char *qlabel, const char *value,
bool numeric, ExplainState *es);
static void ExplainOpenGroup(const char *objtype, const char *labelname,
switch (nodeTag(plan))
{
case T_SeqScan:
+ case T_SampleScan:
case T_IndexScan:
case T_IndexOnlyScan:
case T_BitmapHeapScan:
case T_ValuesScan:
case T_CteScan:
case T_WorkTableScan:
- case T_SampleScan:
*rels_used = bms_add_member(*rels_used,
((Scan *) plan)->scanrelid);
break;
case T_SeqScan:
pname = sname = "Seq Scan";
break;
+ case T_SampleScan:
+ pname = sname = "Sample Scan";
+ break;
case T_IndexScan:
pname = sname = "Index Scan";
break;
else
pname = sname;
break;
- case T_SampleScan:
- {
- /*
- * Fetch the tablesample method name from RTE.
- *
- * It would be nice to also show parameters, but since we
- * support arbitrary expressions as parameter it might get
- * quite messy.
- */
- RangeTblEntry *rte;
-
- rte = rt_fetch(((SampleScan *) plan)->scanrelid, es->rtable);
- custom_name = get_tablesample_method_name(rte->tablesample->tsmid);
- pname = psprintf("Sample Scan (%s)", custom_name);
- sname = "Sample Scan";
- }
- break;
case T_Material:
pname = sname = "Materialize";
break;
switch (nodeTag(plan))
{
case T_SeqScan:
+ case T_SampleScan:
case T_BitmapHeapScan:
case T_TidScan:
case T_SubqueryScan:
if (((Scan *) plan)->scanrelid > 0)
ExplainScanTarget((Scan *) plan, es);
break;
- case T_SampleScan:
- ExplainScanTarget((Scan *) plan, es);
- break;
case T_IndexScan:
{
IndexScan *indexscan = (IndexScan *) plan;
if (es->analyze)
show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
break;
+ case T_SampleScan:
+ show_tablesample(((SampleScan *) plan)->tablesample,
+ planstate, ancestors, es);
+ /* FALL THRU to print additional fields the same as SeqScan */
case T_SeqScan:
case T_ValuesScan:
case T_CteScan:
case T_WorkTableScan:
case T_SubqueryScan:
- case T_SampleScan:
show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
if (plan->qual)
show_instrumentation_count("Rows Removed by Filter", 1,
}
}
+/*
+ * Show TABLESAMPLE properties
+ */
+static void
+show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+ List *ancestors, ExplainState *es)
+{
+ List *context;
+ bool useprefix;
+ char *method_name;
+ List *params = NIL;
+ char *repeatable;
+ ListCell *lc;
+
+ /* Set up deparsing context */
+ context = set_deparse_context_planstate(es->deparse_cxt,
+ (Node *) planstate,
+ ancestors);
+ useprefix = list_length(es->rtable) > 1;
+
+ /* Get the tablesample method name */
+ method_name = get_func_name(tsc->tsmhandler);
+
+ /* Deparse parameter expressions */
+ foreach(lc, tsc->args)
+ {
+ Node *arg = (Node *) lfirst(lc);
+
+ params = lappend(params,
+ deparse_expression(arg, context,
+ useprefix, false));
+ }
+ if (tsc->repeatable)
+ repeatable = deparse_expression((Node *) tsc->repeatable, context,
+ useprefix, false);
+ else
+ repeatable = NULL;
+
+ /* Print results */
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ bool first = true;
+
+ appendStringInfoSpaces(es->str, es->indent * 2);
+ appendStringInfo(es->str, "Sampling: %s (", method_name);
+ foreach(lc, params)
+ {
+ if (!first)
+ appendStringInfoString(es->str, ", ");
+ appendStringInfoString(es->str, (const char *) lfirst(lc));
+ first = false;
+ }
+ appendStringInfoChar(es->str, ')');
+ if (repeatable)
+ appendStringInfo(es->str, " REPEATABLE (%s)", repeatable);
+ appendStringInfoChar(es->str, '\n');
+ }
+ else
+ {
+ ExplainPropertyText("Sampling Method", method_name, es);
+ ExplainPropertyList("Sampling Parameters", params, es);
+ if (repeatable)
+ ExplainPropertyText("Repeatable Seed", repeatable, es);
+ }
+}
+
/*
* If it's EXPLAIN ANALYZE, show tuplesort stats for a sort node
*/
switch (nodeTag(plan))
{
case T_SeqScan:
+ case T_SampleScan:
case T_IndexScan:
case T_IndexOnlyScan:
case T_BitmapHeapScan:
case T_TidScan:
case T_ForeignScan:
case T_CustomScan:
- case T_SampleScan:
case T_ModifyTable:
/* Assert it's on a real relation */
Assert(rte->rtekind == RTE_RELATION);
{
ListCell *cell;
const char *label =
- (list_length(css->custom_ps) != 1 ? "children" : "child");
+ (list_length(css->custom_ps) != 1 ? "children" : "child");
- foreach (cell, css->custom_ps)
+ foreach(cell, css->custom_ps)
ExplainNode((PlanState *) lfirst(cell), ancestors, label, NULL, es);
}
case T_CteScan:
return TargetListSupportsBackwardScan(node->targetlist);
+ case T_SampleScan:
+ /* Simplify life for tablesample methods by disallowing this */
+ return false;
+
case T_IndexScan:
return IndexSupportsBackwardScan(((IndexScan *) node)->indexid) &&
TargetListSupportsBackwardScan(node->targetlist);
}
return false;
- case T_SampleScan:
- return false;
-
case T_Material:
case T_Sort:
/* these don't evaluate tlist */
* nodeSamplescan.c
* Support routines for sample scans of relations (table sampling).
*
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
*/
#include "postgres.h"
-#include "access/tablesample.h"
+#include "access/hash.h"
+#include "access/relscan.h"
+#include "access/tsmapi.h"
#include "executor/executor.h"
#include "executor/nodeSamplescan.h"
#include "miscadmin.h"
-#include "parser/parsetree.h"
#include "pgstat.h"
-#include "storage/bufmgr.h"
#include "storage/predicate.h"
#include "utils/rel.h"
-#include "utils/syscache.h"
#include "utils/tqual.h"
-static void InitScanRelation(SampleScanState *node, EState *estate,
- int eflags, TableSampleClause *tablesample);
+static void InitScanRelation(SampleScanState *node, EState *estate, int eflags);
static TupleTableSlot *SampleNext(SampleScanState *node);
-
+static void tablesample_init(SampleScanState *scanstate);
+static HeapTuple tablesample_getnext(SampleScanState *scanstate);
+static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
+ HeapScanDesc scan);
/* ----------------------------------------------------------------
* Scan Support
static TupleTableSlot *
SampleNext(SampleScanState *node)
{
- TupleTableSlot *slot;
- TableSampleDesc *tsdesc;
HeapTuple tuple;
+ TupleTableSlot *slot;
/*
- * get information from the scan state
+ * if this is first call within a scan, initialize
*/
- slot = node->ss.ss_ScanTupleSlot;
- tsdesc = node->tsdesc;
+ if (!node->begun)
+ tablesample_init(node);
+
+ /*
+ * get the next tuple, and store it in our result slot
+ */
+ tuple = tablesample_getnext(node);
- tuple = tablesample_getnext(tsdesc);
+ slot = node->ss.ss_ScanTupleSlot;
if (tuple)
ExecStoreTuple(tuple, /* tuple to store */
slot, /* slot to store in */
- tsdesc->heapScan->rs_cbuf, /* buffer associated
- * with this tuple */
+ node->ss.ss_currentScanDesc->rs_cbuf, /* tuple's buffer */
false); /* don't pfree this pointer */
else
ExecClearTuple(slot);
static bool
SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
{
- /* No need to recheck for SampleScan */
+ /*
+ * No need to recheck for SampleScan, since like SeqScan we don't pass any
+ * checkable keys to heap_beginscan.
+ */
return true;
}
* ----------------------------------------------------------------
*/
static void
-InitScanRelation(SampleScanState *node, EState *estate, int eflags,
- TableSampleClause *tablesample)
+InitScanRelation(SampleScanState *node, EState *estate, int eflags)
{
Relation currentRelation;
* open that relation and acquire appropriate lock on it.
*/
currentRelation = ExecOpenScanRelation(estate,
- ((SampleScan *) node->ss.ps.plan)->scanrelid,
+ ((SampleScan *) node->ss.ps.plan)->scan.scanrelid,
eflags);
node->ss.ss_currentRelation = currentRelation;
- /*
- * Even though we aren't going to do a conventional seqscan, it is useful
- * to create a HeapScanDesc --- many of the fields in it are usable.
- */
- node->ss.ss_currentScanDesc =
- heap_beginscan_sampling(currentRelation, estate->es_snapshot, 0, NULL,
- tablesample->tsmseqscan,
- tablesample->tsmpagemode);
+ /* we won't set up the HeapScanDesc till later */
+ node->ss.ss_currentScanDesc = NULL;
/* and report the scan tuple slot's rowtype */
ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation));
ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
{
SampleScanState *scanstate;
- RangeTblEntry *rte = rt_fetch(node->scanrelid,
- estate->es_range_table);
+ TableSampleClause *tsc = node->tablesample;
+ TsmRoutine *tsm;
Assert(outerPlan(node) == NULL);
Assert(innerPlan(node) == NULL);
- Assert(rte->tablesample != NULL);
/*
* create state structure
* initialize child expressions
*/
scanstate->ss.ps.targetlist = (List *)
- ExecInitExpr((Expr *) node->plan.targetlist,
+ ExecInitExpr((Expr *) node->scan.plan.targetlist,
(PlanState *) scanstate);
scanstate->ss.ps.qual = (List *)
- ExecInitExpr((Expr *) node->plan.qual,
+ ExecInitExpr((Expr *) node->scan.plan.qual,
+ (PlanState *) scanstate);
+
+ scanstate->args = (List *)
+ ExecInitExpr((Expr *) tsc->args,
+ (PlanState *) scanstate);
+ scanstate->repeatable =
+ ExecInitExpr(tsc->repeatable,
(PlanState *) scanstate);
/*
/*
* initialize scan relation
*/
- InitScanRelation(scanstate, estate, eflags, rte->tablesample);
+ InitScanRelation(scanstate, estate, eflags);
scanstate->ss.ps.ps_TupFromTlist = false;
ExecAssignResultTypeFromTL(&scanstate->ss.ps);
ExecAssignScanProjectionInfo(&scanstate->ss);
- scanstate->tsdesc = tablesample_init(scanstate, rte->tablesample);
+ /*
+ * If we don't have a REPEATABLE clause, select a random seed. We want to
+ * do this just once, since the seed shouldn't change over rescans.
+ */
+ if (tsc->repeatable == NULL)
+ scanstate->seed = random();
+
+ /*
+ * Finally, initialize the TABLESAMPLE method handler.
+ */
+ tsm = GetTsmRoutine(tsc->tsmhandler);
+ scanstate->tsmroutine = tsm;
+ scanstate->tsm_state = NULL;
+
+ if (tsm->InitSampleScan)
+ tsm->InitSampleScan(scanstate, eflags);
+
+ /* We'll do BeginSampleScan later; we can't evaluate params yet */
+ scanstate->begun = false;
return scanstate;
}
/*
* Tell sampling function that we finished the scan.
*/
- tablesample_end(node->tsdesc);
+ if (node->tsmroutine->EndSampleScan)
+ node->tsmroutine->EndSampleScan(node);
/*
* Free the exprcontext
/*
* close heap scan
*/
- heap_endscan(node->ss.ss_currentScanDesc);
+ if (node->ss.ss_currentScanDesc)
+ heap_endscan(node->ss.ss_currentScanDesc);
/*
* close the heap relation.
ExecCloseScanRelation(node->ss.ss_currentRelation);
}
-/* ----------------------------------------------------------------
- * Join Support
- * ----------------------------------------------------------------
- */
-
/* ----------------------------------------------------------------
* ExecReScanSampleScan
*
void
ExecReScanSampleScan(SampleScanState *node)
{
- heap_rescan(node->ss.ss_currentScanDesc, NULL);
+ /* Remember we need to do BeginSampleScan again (if we did it at all) */
+ node->begun = false;
+
+ ExecScanReScan(&node->ss);
+}
+
+
+/*
+ * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
+ */
+static void
+tablesample_init(SampleScanState *scanstate)
+{
+ TsmRoutine *tsm = scanstate->tsmroutine;
+ ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
+ Datum *params;
+ Datum datum;
+ bool isnull;
+ uint32 seed;
+ bool allow_sync;
+ int i;
+ ListCell *arg;
+
+ params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
+
+ i = 0;
+ foreach(arg, scanstate->args)
+ {
+ ExprState *argstate = (ExprState *) lfirst(arg);
+
+ params[i] = ExecEvalExprSwitchContext(argstate,
+ econtext,
+ &isnull,
+ NULL);
+ if (isnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+ errmsg("TABLESAMPLE parameter cannot be null")));
+ i++;
+ }
+
+ if (scanstate->repeatable)
+ {
+ datum = ExecEvalExprSwitchContext(scanstate->repeatable,
+ econtext,
+ &isnull,
+ NULL);
+ if (isnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
+ errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
+
+ /*
+ * The REPEATABLE parameter has been coerced to float8 by the parser.
+ * The reason for using float8 at the SQL level is that it will
+ * produce unsurprising results both for users used to databases that
+ * accept only integers in the REPEATABLE clause and for those who
+ * might expect that REPEATABLE works like setseed() (a float in the
+ * range from -1 to 1).
+ *
+ * We use hashfloat8() to convert the supplied value into a suitable
+ * seed. For regression-testing purposes, that has the convenient
+ * property that REPEATABLE(0) gives a machine-independent result.
+ */
+ seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
+ }
+ else
+ {
+ /* Use the seed selected by ExecInitSampleScan */
+ seed = scanstate->seed;
+ }
+
+ /* Set default values for params that BeginSampleScan can adjust */
+ scanstate->use_bulkread = true;
+ scanstate->use_pagemode = true;
+
+ /* Let tablesample method do its thing */
+ tsm->BeginSampleScan(scanstate,
+ params,
+ list_length(scanstate->args),
+ seed);
+
+ /* We'll use syncscan if there's no NextSampleBlock function */
+ allow_sync = (tsm->NextSampleBlock == NULL);
+
+ /* Now we can create or reset the HeapScanDesc */
+ if (scanstate->ss.ss_currentScanDesc == NULL)
+ {
+ scanstate->ss.ss_currentScanDesc =
+ heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
+ scanstate->ss.ps.state->es_snapshot,
+ 0, NULL,
+ scanstate->use_bulkread,
+ allow_sync,
+ scanstate->use_pagemode);
+ }
+ else
+ {
+ heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
+ scanstate->use_bulkread,
+ allow_sync,
+ scanstate->use_pagemode);
+ }
+
+ pfree(params);
+
+ /* And we're initialized. */
+ scanstate->begun = true;
+}
+
+/*
+ * Get next tuple from TABLESAMPLE method.
+ *
+ * Note: an awful lot of this is copied-and-pasted from heapam.c. It would
+ * perhaps be better to refactor to share more code.
+ */
+static HeapTuple
+tablesample_getnext(SampleScanState *scanstate)
+{
+ TsmRoutine *tsm = scanstate->tsmroutine;
+ HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
+ HeapTuple tuple = &(scan->rs_ctup);
+ Snapshot snapshot = scan->rs_snapshot;
+ bool pagemode = scan->rs_pageatatime;
+ BlockNumber blockno;
+ Page page;
+ bool all_visible;
+ OffsetNumber maxoffset;
+
+ if (!scan->rs_inited)
+ {
+ /*
+ * return null immediately if relation is empty
+ */
+ if (scan->rs_nblocks == 0)
+ {
+ Assert(!BufferIsValid(scan->rs_cbuf));
+ tuple->t_data = NULL;
+ return NULL;
+ }
+ if (tsm->NextSampleBlock)
+ {
+ blockno = tsm->NextSampleBlock(scanstate);
+ if (!BlockNumberIsValid(blockno))
+ {
+ tuple->t_data = NULL;
+ return NULL;
+ }
+ }
+ else
+ blockno = scan->rs_startblock;
+ Assert(blockno < scan->rs_nblocks);
+ heapgetpage(scan, blockno);
+ scan->rs_inited = true;
+ }
+ else
+ {
+ /* continue from previously returned page/tuple */
+ blockno = scan->rs_cblock; /* current page */
+ }
/*
- * Tell sampling function to reset its state for rescan.
+ * When not using pagemode, we must lock the buffer during tuple
+ * visibility checks.
*/
- tablesample_reset(node->tsdesc);
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+ maxoffset = PageGetMaxOffsetNumber(page);
+
+ for (;;)
+ {
+ OffsetNumber tupoffset;
+ bool finished;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Ask the tablesample method which tuples to check on this page. */
+ tupoffset = tsm->NextSampleTuple(scanstate,
+ blockno,
+ maxoffset);
+
+ if (OffsetNumberIsValid(tupoffset))
+ {
+ ItemId itemid;
+ bool visible;
+
+ /* Skip invalid tuple pointers. */
+ itemid = PageGetItemId(page, tupoffset);
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple->t_len = ItemIdGetLength(itemid);
+ ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+ if (all_visible)
+ visible = true;
+ else
+ visible = SampleTupleVisible(tuple, tupoffset, scan);
+
+ /* in pagemode, heapgetpage did this for us */
+ if (!pagemode)
+ CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
+ scan->rs_cbuf, snapshot);
+
+ if (visible)
+ {
+ /* Found visible tuple, return it. */
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+ break;
+ }
+ else
+ {
+ /* Try next tuple from same page. */
+ continue;
+ }
+ }
+
+ /*
+ * if we get here, it means we've exhausted the items on this page and
+ * it's time to move to the next.
+ */
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+ if (tsm->NextSampleBlock)
+ {
+ blockno = tsm->NextSampleBlock(scanstate);
+ Assert(!scan->rs_syncscan);
+ finished = !BlockNumberIsValid(blockno);
+ }
+ else
+ {
+ /* Without NextSampleBlock, just do a plain forward seqscan. */
+ blockno++;
+ if (blockno >= scan->rs_nblocks)
+ blockno = 0;
+
+ /*
+ * Report our new scan position for synchronization purposes.
+ *
+ * Note: we do this before checking for end of scan so that the
+ * final state of the position hint is back at the start of the
+ * rel. That's not strictly necessary, but otherwise when you run
+ * the same query multiple times the starting position would shift
+ * a little bit backwards on every invocation, which is confusing.
+ * We don't guarantee any specific ordering in general, though.
+ */
+ if (scan->rs_syncscan)
+ ss_report_location(scan->rs_rd, blockno);
+
+ finished = (blockno == scan->rs_startblock);
+ }
+
+ /*
+ * Reached end of scan?
+ */
+ if (finished)
+ {
+ if (BufferIsValid(scan->rs_cbuf))
+ ReleaseBuffer(scan->rs_cbuf);
+ scan->rs_cbuf = InvalidBuffer;
+ scan->rs_cblock = InvalidBlockNumber;
+ tuple->t_data = NULL;
+ scan->rs_inited = false;
+ return NULL;
+ }
+
+ Assert(blockno < scan->rs_nblocks);
+ heapgetpage(scan, blockno);
+
+ /* Re-establish state for new page */
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+ maxoffset = PageGetMaxOffsetNumber(page);
+ }
+
+ /* Count successfully-fetched tuples as heap fetches */
+ pgstat_count_heap_getnext(scan->rs_rd);
+
+ return &(scan->rs_ctup);
+}
- ExecScanReScan(&node->ss);
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+{
+ if (scan->rs_pageatatime)
+ {
+ /*
+ * In pageatatime mode, heapgetpage() already did visibility checks,
+ * so just look at the info it left in rs_vistuples[].
+ *
+ * We use a binary search over the known-sorted array. Note: we could
+ * save some effort if we insisted that NextSampleTuple select tuples
+ * in increasing order, but it's not clear that there would be enough
+ * gain to justify the restriction.
+ */
+ int start = 0,
+ end = scan->rs_ntuples - 1;
+
+ while (start <= end)
+ {
+ int mid = (start + end) / 2;
+ OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+ if (tupoffset == curoffset)
+ return true;
+ else if (tupoffset < curoffset)
+ end = mid - 1;
+ else
+ start = mid + 1;
+ }
+
+ return false;
+ }
+ else
+ {
+ /* Otherwise, we have to check the tuple individually. */
+ return HeapTupleSatisfiesVisibility(tuple,
+ scan->rs_snapshot,
+ scan->rs_cbuf);
+ }
}
return newnode;
}
+/*
+ * _copySampleScan
+ */
+static SampleScan *
+_copySampleScan(const SampleScan *from)
+{
+ SampleScan *newnode = makeNode(SampleScan);
+
+ /*
+ * copy node superclass fields
+ */
+ CopyScanFields((const Scan *) from, (Scan *) newnode);
+
+ /*
+ * copy remainder of node
+ */
+ COPY_NODE_FIELD(tablesample);
+
+ return newnode;
+}
+
/*
* _copyIndexScan
*/
return newnode;
}
-/*
- * _copySampleScan
- */
-static SampleScan *
-_copySampleScan(const SampleScan *from)
-{
- SampleScan *newnode = makeNode(SampleScan);
-
- /*
- * copy node superclass fields
- */
- CopyScanFields((const Scan *) from, (Scan *) newnode);
-
- return newnode;
-}
-
/*
* CopyJoinFields
*
return newnode;
}
+static TableSampleClause *
+_copyTableSampleClause(const TableSampleClause *from)
+{
+ TableSampleClause *newnode = makeNode(TableSampleClause);
+
+ COPY_SCALAR_FIELD(tsmhandler);
+ COPY_NODE_FIELD(args);
+ COPY_NODE_FIELD(repeatable);
+
+ return newnode;
+}
+
static WithCheckOption *
_copyWithCheckOption(const WithCheckOption *from)
{
return newnode;
}
-static RangeTableSample *
-_copyRangeTableSample(const RangeTableSample *from)
-{
- RangeTableSample *newnode = makeNode(RangeTableSample);
-
- COPY_NODE_FIELD(relation);
- COPY_STRING_FIELD(method);
- COPY_NODE_FIELD(repeatable);
- COPY_NODE_FIELD(args);
-
- return newnode;
-}
-
-static TableSampleClause *
-_copyTableSampleClause(const TableSampleClause *from)
-{
- TableSampleClause *newnode = makeNode(TableSampleClause);
-
- COPY_SCALAR_FIELD(tsmid);
- COPY_SCALAR_FIELD(tsmseqscan);
- COPY_SCALAR_FIELD(tsmpagemode);
- COPY_SCALAR_FIELD(tsminit);
- COPY_SCALAR_FIELD(tsmnextblock);
- COPY_SCALAR_FIELD(tsmnexttuple);
- COPY_SCALAR_FIELD(tsmexaminetuple);
- COPY_SCALAR_FIELD(tsmend);
- COPY_SCALAR_FIELD(tsmreset);
- COPY_SCALAR_FIELD(tsmcost);
- COPY_NODE_FIELD(repeatable);
- COPY_NODE_FIELD(args);
-
- return newnode;
-}
-
static A_Expr *
_copyAExpr(const A_Expr *from)
{
return newnode;
}
+static RangeTableSample *
+_copyRangeTableSample(const RangeTableSample *from)
+{
+ RangeTableSample *newnode = makeNode(RangeTableSample);
+
+ COPY_NODE_FIELD(relation);
+ COPY_NODE_FIELD(method);
+ COPY_NODE_FIELD(args);
+ COPY_NODE_FIELD(repeatable);
+ COPY_LOCATION_FIELD(location);
+
+ return newnode;
+}
+
static TypeCast *
_copyTypeCast(const TypeCast *from)
{
case T_SeqScan:
retval = _copySeqScan(from);
break;
+ case T_SampleScan:
+ retval = _copySampleScan(from);
+ break;
case T_IndexScan:
retval = _copyIndexScan(from);
break;
case T_CustomScan:
retval = _copyCustomScan(from);
break;
- case T_SampleScan:
- retval = _copySampleScan(from);
- break;
case T_Join:
retval = _copyJoin(from);
break;
case T_RangeFunction:
retval = _copyRangeFunction(from);
break;
+ case T_RangeTableSample:
+ retval = _copyRangeTableSample(from);
+ break;
case T_TypeName:
retval = _copyTypeName(from);
break;
case T_RangeTblFunction:
retval = _copyRangeTblFunction(from);
break;
+ case T_TableSampleClause:
+ retval = _copyTableSampleClause(from);
+ break;
case T_WithCheckOption:
retval = _copyWithCheckOption(from);
break;
case T_CommonTableExpr:
retval = _copyCommonTableExpr(from);
break;
- case T_RangeTableSample:
- retval = _copyRangeTableSample(from);
- break;
- case T_TableSampleClause:
- retval = _copyTableSampleClause(from);
- break;
case T_FuncWithArgs:
retval = _copyFuncWithArgs(from);
break;
return true;
}
+static bool
+_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b)
+{
+ COMPARE_NODE_FIELD(relation);
+ COMPARE_NODE_FIELD(method);
+ COMPARE_NODE_FIELD(args);
+ COMPARE_NODE_FIELD(repeatable);
+ COMPARE_LOCATION_FIELD(location);
+
+ return true;
+}
+
static bool
_equalIndexElem(const IndexElem *a, const IndexElem *b)
{
return true;
}
+static bool
+_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b)
+{
+ COMPARE_SCALAR_FIELD(tsmhandler);
+ COMPARE_NODE_FIELD(args);
+ COMPARE_NODE_FIELD(repeatable);
+
+ return true;
+}
+
static bool
_equalWithCheckOption(const WithCheckOption *a, const WithCheckOption *b)
{
return true;
}
-static bool
-_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b)
-{
- COMPARE_NODE_FIELD(relation);
- COMPARE_STRING_FIELD(method);
- COMPARE_NODE_FIELD(repeatable);
- COMPARE_NODE_FIELD(args);
-
- return true;
-}
-
-static bool
-_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b)
-{
- COMPARE_SCALAR_FIELD(tsmid);
- COMPARE_SCALAR_FIELD(tsmseqscan);
- COMPARE_SCALAR_FIELD(tsmpagemode);
- COMPARE_SCALAR_FIELD(tsminit);
- COMPARE_SCALAR_FIELD(tsmnextblock);
- COMPARE_SCALAR_FIELD(tsmnexttuple);
- COMPARE_SCALAR_FIELD(tsmexaminetuple);
- COMPARE_SCALAR_FIELD(tsmend);
- COMPARE_SCALAR_FIELD(tsmreset);
- COMPARE_SCALAR_FIELD(tsmcost);
- COMPARE_NODE_FIELD(repeatable);
- COMPARE_NODE_FIELD(args);
-
- return true;
-}
-
static bool
_equalXmlSerialize(const XmlSerialize *a, const XmlSerialize *b)
{
case T_RangeFunction:
retval = _equalRangeFunction(a, b);
break;
+ case T_RangeTableSample:
+ retval = _equalRangeTableSample(a, b);
+ break;
case T_TypeName:
retval = _equalTypeName(a, b);
break;
case T_RangeTblFunction:
retval = _equalRangeTblFunction(a, b);
break;
+ case T_TableSampleClause:
+ retval = _equalTableSampleClause(a, b);
+ break;
case T_WithCheckOption:
retval = _equalWithCheckOption(a, b);
break;
case T_CommonTableExpr:
retval = _equalCommonTableExpr(a, b);
break;
- case T_RangeTableSample:
- retval = _equalRangeTableSample(a, b);
- break;
- case T_TableSampleClause:
- retval = _equalTableSampleClause(a, b);
- break;
case T_FuncWithArgs:
retval = _equalFuncWithArgs(a, b);
break;
case T_WindowDef:
loc = ((const WindowDef *) expr)->location;
break;
+ case T_RangeTableSample:
+ loc = ((const RangeTableSample *) expr)->location;
+ break;
case T_TypeName:
loc = ((const TypeName *) expr)->location;
break;
return walker(((PlaceHolderInfo *) node)->ph_var, context);
case T_RangeTblFunction:
return walker(((RangeTblFunction *) node)->funcexpr, context);
+ case T_TableSampleClause:
+ {
+ TableSampleClause *tsc = (TableSampleClause *) node;
+
+ if (expression_tree_walker((Node *) tsc->args,
+ walker, context))
+ return true;
+ if (walker((Node *) tsc->repeatable, context))
+ return true;
+ }
+ break;
default:
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(node));
switch (rte->rtekind)
{
case RTE_RELATION:
- if (rte->tablesample)
- {
- if (walker(rte->tablesample->args, context))
- return true;
- if (walker(rte->tablesample->repeatable, context))
- return true;
- }
+ if (walker(rte->tablesample, context))
+ return true;
break;
case RTE_CTE:
/* nothing to do */
return (Node *) newnode;
}
break;
+ case T_TableSampleClause:
+ {
+ TableSampleClause *tsc = (TableSampleClause *) node;
+ TableSampleClause *newnode;
+
+ FLATCOPY(newnode, tsc, TableSampleClause);
+ MUTATE(newnode->args, tsc->args, List *);
+ MUTATE(newnode->repeatable, tsc->repeatable, Expr *);
+ return (Node *) newnode;
+ }
+ break;
default:
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(node));
switch (rte->rtekind)
{
case RTE_RELATION:
- if (rte->tablesample)
- {
- CHECKFLATCOPY(newrte->tablesample, rte->tablesample,
- TableSampleClause);
- MUTATE(newrte->tablesample->args,
- newrte->tablesample->args,
- List *);
- MUTATE(newrte->tablesample->repeatable,
- newrte->tablesample->repeatable,
- Node *);
- }
+ MUTATE(newrte->tablesample, rte->tablesample,
+ TableSampleClause *);
+ /* we don't bother to copy eref, aliases, etc; OK? */
break;
case RTE_CTE:
- /* we don't bother to copy eref, aliases, etc; OK? */
+ /* nothing to do */
break;
case RTE_SUBQUERY:
if (!(flags & QTW_IGNORE_RT_SUBQUERIES))
return true;
}
break;
+ case T_RangeTableSample:
+ {
+ RangeTableSample *rts = (RangeTableSample *) node;
+
+ if (walker(rts->relation, context))
+ return true;
+ /* method name is deemed uninteresting */
+ if (walker(rts->args, context))
+ return true;
+ if (walker(rts->repeatable, context))
+ return true;
+ }
+ break;
case T_TypeName:
{
TypeName *tn = (TypeName *) node;
break;
case T_CommonTableExpr:
return walker(((CommonTableExpr *) node)->ctequery, context);
- case T_RangeTableSample:
- {
- RangeTableSample *rts = (RangeTableSample *) node;
-
- if (walker(rts->relation, context))
- return true;
- if (walker(rts->repeatable, context))
- return true;
- if (walker(rts->args, context))
- return true;
- }
- break;
default:
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(node));
_outScanInfo(str, (const Scan *) node);
}
+static void
+_outSampleScan(StringInfo str, const SampleScan *node)
+{
+ WRITE_NODE_TYPE("SAMPLESCAN");
+
+ _outScanInfo(str, (const Scan *) node);
+
+ WRITE_NODE_FIELD(tablesample);
+}
+
static void
_outIndexScan(StringInfo str, const IndexScan *node)
{
node->methods->TextOutCustomScan(str, node);
}
-static void
-_outSampleScan(StringInfo str, const SampleScan *node)
-{
- WRITE_NODE_TYPE("SAMPLESCAN");
-
- _outScanInfo(str, (const Scan *) node);
-}
-
static void
_outJoin(StringInfo str, const Join *node)
{
WRITE_NODE_FIELD(ctecolcollations);
}
-static void
-_outRangeTableSample(StringInfo str, const RangeTableSample *node)
-{
- WRITE_NODE_TYPE("RANGETABLESAMPLE");
-
- WRITE_NODE_FIELD(relation);
- WRITE_STRING_FIELD(method);
- WRITE_NODE_FIELD(repeatable);
- WRITE_NODE_FIELD(args);
-}
-
-static void
-_outTableSampleClause(StringInfo str, const TableSampleClause *node)
-{
- WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
-
- WRITE_OID_FIELD(tsmid);
- WRITE_BOOL_FIELD(tsmseqscan);
- WRITE_BOOL_FIELD(tsmpagemode);
- WRITE_OID_FIELD(tsminit);
- WRITE_OID_FIELD(tsmnextblock);
- WRITE_OID_FIELD(tsmnexttuple);
- WRITE_OID_FIELD(tsmexaminetuple);
- WRITE_OID_FIELD(tsmend);
- WRITE_OID_FIELD(tsmreset);
- WRITE_OID_FIELD(tsmcost);
- WRITE_NODE_FIELD(repeatable);
- WRITE_NODE_FIELD(args);
-}
-
static void
_outSetOperationStmt(StringInfo str, const SetOperationStmt *node)
{
WRITE_BITMAPSET_FIELD(funcparams);
}
+static void
+_outTableSampleClause(StringInfo str, const TableSampleClause *node)
+{
+ WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
+
+ WRITE_OID_FIELD(tsmhandler);
+ WRITE_NODE_FIELD(args);
+ WRITE_NODE_FIELD(repeatable);
+}
+
static void
_outAExpr(StringInfo str, const A_Expr *node)
{
WRITE_NODE_FIELD(coldeflist);
}
+static void
+_outRangeTableSample(StringInfo str, const RangeTableSample *node)
+{
+ WRITE_NODE_TYPE("RANGETABLESAMPLE");
+
+ WRITE_NODE_FIELD(relation);
+ WRITE_NODE_FIELD(method);
+ WRITE_NODE_FIELD(args);
+ WRITE_NODE_FIELD(repeatable);
+ WRITE_LOCATION_FIELD(location);
+}
+
static void
_outConstraint(StringInfo str, const Constraint *node)
{
case T_SeqScan:
_outSeqScan(str, obj);
break;
+ case T_SampleScan:
+ _outSampleScan(str, obj);
+ break;
case T_IndexScan:
_outIndexScan(str, obj);
break;
case T_CustomScan:
_outCustomScan(str, obj);
break;
- case T_SampleScan:
- _outSampleScan(str, obj);
- break;
case T_Join:
_outJoin(str, obj);
break;
case T_CommonTableExpr:
_outCommonTableExpr(str, obj);
break;
- case T_RangeTableSample:
- _outRangeTableSample(str, obj);
- break;
- case T_TableSampleClause:
- _outTableSampleClause(str, obj);
- break;
case T_SetOperationStmt:
_outSetOperationStmt(str, obj);
break;
case T_RangeTblFunction:
_outRangeTblFunction(str, obj);
break;
+ case T_TableSampleClause:
+ _outTableSampleClause(str, obj);
+ break;
case T_A_Expr:
_outAExpr(str, obj);
break;
case T_RangeFunction:
_outRangeFunction(str, obj);
break;
+ case T_RangeTableSample:
+ _outRangeTableSample(str, obj);
+ break;
case T_Constraint:
_outConstraint(str, obj);
break;
READ_DONE();
}
-/*
- * _readRangeTableSample
- */
-static RangeTableSample *
-_readRangeTableSample(void)
-{
- READ_LOCALS(RangeTableSample);
-
- READ_NODE_FIELD(relation);
- READ_STRING_FIELD(method);
- READ_NODE_FIELD(repeatable);
- READ_NODE_FIELD(args);
-
- READ_DONE();
-}
-
-/*
- * _readTableSampleClause
- */
-static TableSampleClause *
-_readTableSampleClause(void)
-{
- READ_LOCALS(TableSampleClause);
-
- READ_OID_FIELD(tsmid);
- READ_BOOL_FIELD(tsmseqscan);
- READ_BOOL_FIELD(tsmpagemode);
- READ_OID_FIELD(tsminit);
- READ_OID_FIELD(tsmnextblock);
- READ_OID_FIELD(tsmnexttuple);
- READ_OID_FIELD(tsmexaminetuple);
- READ_OID_FIELD(tsmend);
- READ_OID_FIELD(tsmreset);
- READ_OID_FIELD(tsmcost);
- READ_NODE_FIELD(repeatable);
- READ_NODE_FIELD(args);
-
- READ_DONE();
-}
-
/*
* _readSetOperationStmt
*/
READ_DONE();
}
+/*
+ * _readTableSampleClause
+ */
+static TableSampleClause *
+_readTableSampleClause(void)
+{
+ READ_LOCALS(TableSampleClause);
+
+ READ_OID_FIELD(tsmhandler);
+ READ_NODE_FIELD(args);
+ READ_NODE_FIELD(repeatable);
+
+ READ_DONE();
+}
+
/*
* parseNodeString
return_value = _readRowMarkClause();
else if (MATCH("COMMONTABLEEXPR", 15))
return_value = _readCommonTableExpr();
- else if (MATCH("RANGETABLESAMPLE", 16))
- return_value = _readRangeTableSample();
- else if (MATCH("TABLESAMPLECLAUSE", 17))
- return_value = _readTableSampleClause();
else if (MATCH("SETOPERATIONSTMT", 16))
return_value = _readSetOperationStmt();
else if (MATCH("ALIAS", 5))
return_value = _readRangeTblEntry();
else if (MATCH("RANGETBLFUNCTION", 16))
return_value = _readRangeTblFunction();
+ else if (MATCH("TABLESAMPLECLAUSE", 17))
+ return_value = _readTableSampleClause();
else if (MATCH("NOTIFY", 6))
return_value = _readNotifyStmt();
else if (MATCH("DECLARECURSOR", 13))
#include <math.h>
#include "access/sysattr.h"
+#include "access/tsmapi.h"
#include "catalog/pg_class.h"
#include "catalog/pg_operator.h"
#include "foreign/fdwapi.h"
}
else if (rte->tablesample != NULL)
{
- /* Build sample scan on relation */
+ /* Sampled relation */
set_tablesample_rel_pathlist(root, rel, rte);
}
else
/*
* set_tablesample_rel_size
- * Set size estimates for a sampled relation.
+ * Set size estimates for a sampled relation
*/
static void
set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
+ TableSampleClause *tsc = rte->tablesample;
+ TsmRoutine *tsm;
+ BlockNumber pages;
+ double tuples;
+
+ /*
+ * Test any partial indexes of rel for applicability. We must do this
+ * first since partial unique indexes can affect size estimates.
+ */
+ check_partial_indexes(root, rel);
+
+ /*
+ * Call the sampling method's estimation function to estimate the number
+ * of pages it will read and the number of tuples it will return. (Note:
+ * we assume the function returns sane values.)
+ */
+ tsm = GetTsmRoutine(tsc->tsmhandler);
+ tsm->SampleScanGetSampleSize(root, rel, tsc->args,
+ &pages, &tuples);
+
+ /*
+ * For the moment, because we will only consider a SampleScan path for the
+ * rel, it's okay to just overwrite the pages and tuples estimates for the
+ * whole relation. If we ever consider multiple path types for sampled
+ * rels, we'll need more complication.
+ */
+ rel->pages = pages;
+ rel->tuples = tuples;
+
/* Mark rel with estimated output rows, width, etc */
set_baserel_size_estimates(root, rel);
}
/*
* set_tablesample_rel_pathlist
* Build access paths for a sampled relation
- *
- * There is only one possible path - sampling scan
*/
static void
set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
Path *path;
/*
- * We don't support pushing join clauses into the quals of a seqscan, but
- * it could still have required parameterization due to LATERAL refs in
- * its tlist.
+ * We don't support pushing join clauses into the quals of a samplescan,
+ * but it could still have required parameterization due to LATERAL refs
+ * in its tlist or TABLESAMPLE arguments.
*/
required_outer = rel->lateral_relids;
- /* We only do sample scan if it was requested */
+ /* Consider sampled scan */
path = create_samplescan_path(root, rel, required_outer);
- rel->pathlist = list_make1(path);
+
+ /*
+ * If the sampling method does not support repeatable scans, we must avoid
+ * plans that would scan the rel multiple times. Ideally, we'd simply
+ * avoid putting the rel on the inside of a nestloop join; but adding such
+ * a consideration to the planner seems like a great deal of complication
+ * to support an uncommon usage of second-rate sampling methods. Instead,
+ * if there is a risk that the query might perform an unsafe join, just
+ * wrap the SampleScan in a Materialize node. We can check for joins by
+ * counting the membership of all_baserels (note that this correctly
+ * counts inheritance trees as single rels). If we're inside a subquery,
+ * we can't easily check whether a join might occur in the outer query, so
+ * just assume one is possible.
+ *
+ * GetTsmRoutine is relatively expensive compared to the other tests here,
+ * so check repeatable_across_scans last, even though that's a bit odd.
+ */
+ if ((root->query_level > 1 ||
+ bms_membership(root->all_baserels) != BMS_SINGLETON) &&
+ !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans))
+ {
+ path = (Path *) create_material_path(rel, path);
+ }
+
+ add_path(rel, path);
+
+ /* For the moment, at least, there are no other paths to consider */
}
/*
switch (nodeTag(path))
{
case T_Path:
- ptype = "SeqScan";
+ switch (path->pathtype)
+ {
+ case T_SeqScan:
+ ptype = "SeqScan";
+ break;
+ case T_SampleScan:
+ ptype = "SampleScan";
+ break;
+ case T_SubqueryScan:
+ ptype = "SubqueryScan";
+ break;
+ case T_FunctionScan:
+ ptype = "FunctionScan";
+ break;
+ case T_ValuesScan:
+ ptype = "ValuesScan";
+ break;
+ case T_CteScan:
+ ptype = "CteScan";
+ break;
+ case T_WorkTableScan:
+ ptype = "WorkTableScan";
+ break;
+ default:
+ ptype = "???Path";
+ break;
+ }
break;
case T_IndexPath:
ptype = "IdxScan";
#include <math.h>
#include "access/htup_details.h"
+#include "access/tsmapi.h"
#include "executor/executor.h"
#include "executor/nodeHash.h"
#include "miscadmin.h"
* cost_samplescan
* Determines and returns the cost of scanning a relation using sampling.
*
- * From planner/optimizer perspective, we don't care all that much about cost
- * itself since there is always only one scan path to consider when sampling
- * scan is present, but number of rows estimation is still important.
- *
* 'baserel' is the relation to be scanned
* 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
*/
void
-cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+cost_samplescan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
{
Cost startup_cost = 0;
Cost run_cost = 0;
+ RangeTblEntry *rte;
+ TableSampleClause *tsc;
+ TsmRoutine *tsm;
double spc_seq_page_cost,
spc_random_page_cost,
spc_page_cost;
QualCost qpqual_cost;
Cost cpu_per_tuple;
- BlockNumber pages;
- double tuples;
- RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
- TableSampleClause *tablesample = rte->tablesample;
- /* Should only be applied to base relations */
+ /* Should only be applied to base relations with tablesample clauses */
Assert(baserel->relid > 0);
- Assert(baserel->rtekind == RTE_RELATION);
+ rte = planner_rt_fetch(baserel->relid, root);
+ Assert(rte->rtekind == RTE_RELATION);
+ tsc = rte->tablesample;
+ Assert(tsc != NULL);
+ tsm = GetTsmRoutine(tsc->tsmhandler);
/* Mark the path with the correct row estimate */
- if (path->param_info)
- path->rows = path->param_info->ppi_rows;
+ if (param_info)
+ path->rows = param_info->ppi_rows;
else
path->rows = baserel->rows;
- /* Call the sampling method's costing function. */
- OidFunctionCall6(tablesample->tsmcost, PointerGetDatum(root),
- PointerGetDatum(path), PointerGetDatum(baserel),
- PointerGetDatum(tablesample->args),
- PointerGetDatum(&pages), PointerGetDatum(&tuples));
-
/* fetch estimated page cost for tablespace containing table */
get_tablespace_page_costs(baserel->reltablespace,
&spc_random_page_cost,
&spc_seq_page_cost);
-
- spc_page_cost = tablesample->tsmseqscan ? spc_seq_page_cost :
- spc_random_page_cost;
+ /* if NextSampleBlock is used, assume random access, else sequential */
+ spc_page_cost = (tsm->NextSampleBlock != NULL) ?
+ spc_random_page_cost : spc_seq_page_cost;
/*
- * disk costs
+ * disk costs (recall that baserel->pages has already been set to the
+ * number of pages the sampling method will visit)
*/
- run_cost += spc_page_cost * pages;
+ run_cost += spc_page_cost * baserel->pages;
- /* CPU costs */
- get_restriction_qual_cost(root, baserel, path->param_info, &qpqual_cost);
+ /*
+ * CPU costs (recall that baserel->tuples has already been set to the
+ * number of tuples the sampling method will select). Note that we ignore
+ * execution cost of the TABLESAMPLE parameter expressions; they will be
+ * evaluated only once per scan, and in most usages they'll likely be
+ * simple constants anyway. We also don't charge anything for the
+ * calculations the sampling method might do internally.
+ */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
startup_cost += qpqual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
- run_cost += cpu_per_tuple * tuples;
+ run_cost += cpu_per_tuple * baserel->tuples;
path->startup_cost = startup_cost;
path->total_cost = startup_cost + run_cost;
static void copy_path_costsize(Plan *dest, Path *src);
static void copy_plan_costsize(Plan *dest, Plan *src);
static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
-static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid);
+static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid,
+ TableSampleClause *tsc);
static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
Oid indexid, List *indexqual, List *indexqualorig,
List *indexorderby, List *indexorderbyorig,
/*
* create_samplescan_plan
- * Returns a samplecan plan for the base relation scanned by 'best_path'
+ * Returns a samplescan plan for the base relation scanned by 'best_path'
* with restriction clauses 'scan_clauses' and targetlist 'tlist'.
*/
static SampleScan *
{
SampleScan *scan_plan;
Index scan_relid = best_path->parent->relid;
+ RangeTblEntry *rte;
+ TableSampleClause *tsc;
- /* it should be a base rel with tablesample clause... */
+ /* it should be a base rel with a tablesample clause... */
Assert(scan_relid > 0);
- Assert(best_path->parent->rtekind == RTE_RELATION);
- Assert(best_path->pathtype == T_SampleScan);
+ rte = planner_rt_fetch(scan_relid, root);
+ Assert(rte->rtekind == RTE_RELATION);
+ tsc = rte->tablesample;
+ Assert(tsc != NULL);
/* Sort clauses into best execution order */
scan_clauses = order_qual_clauses(root, scan_clauses);
{
scan_clauses = (List *)
replace_nestloop_params(root, (Node *) scan_clauses);
+ tsc = (TableSampleClause *)
+ replace_nestloop_params(root, (Node *) tsc);
}
scan_plan = make_samplescan(tlist,
scan_clauses,
- scan_relid);
+ scan_relid,
+ tsc);
- copy_path_costsize(&scan_plan->plan, best_path);
+ copy_path_costsize(&scan_plan->scan.plan, best_path);
return scan_plan;
}
ListCell *lc;
/* Recursively transform child paths. */
- foreach (lc, best_path->custom_paths)
+ foreach(lc, best_path->custom_paths)
{
- Plan *plan = create_plan_recurse(root, (Path *) lfirst(lc));
+ Plan *plan = create_plan_recurse(root, (Path *) lfirst(lc));
custom_plans = lappend(custom_plans, plan);
}
static SampleScan *
make_samplescan(List *qptlist,
List *qpqual,
- Index scanrelid)
+ Index scanrelid,
+ TableSampleClause *tsc)
{
SampleScan *node = makeNode(SampleScan);
- Plan *plan = &node->plan;
+ Plan *plan = &node->scan.plan;
/* cost should be inserted by caller */
plan->targetlist = qptlist;
plan->qual = qpqual;
plan->lefttree = NULL;
plan->righttree = NULL;
- node->scanrelid = scanrelid;
+ node->scan.scanrelid = scanrelid;
+ node->tablesample = tsc;
return node;
}
return;
/* Fetch the appropriate variables */
- if (rte->rtekind == RTE_SUBQUERY)
+ if (rte->rtekind == RTE_RELATION)
+ vars = pull_vars_of_level((Node *) rte->tablesample, 0);
+ else if (rte->rtekind == RTE_SUBQUERY)
vars = pull_vars_of_level((Node *) rte->subquery, 1);
else if (rte->rtekind == RTE_FUNCTION)
vars = pull_vars_of_level((Node *) rte->functions, 0);
if (rte->rtekind == RTE_RELATION)
{
if (rte->tablesample)
- {
- rte->tablesample->args = (List *)
- preprocess_expression(root, (Node *) rte->tablesample->args,
- EXPRKIND_TABLESAMPLE);
- rte->tablesample->repeatable = (Node *)
- preprocess_expression(root, rte->tablesample->repeatable,
+ rte->tablesample = (TableSampleClause *)
+ preprocess_expression(root,
+ (Node *) rte->tablesample,
EXPRKIND_TABLESAMPLE);
- }
}
else if (rte->rtekind == RTE_SUBQUERY)
{
* If the query has any join RTEs, replace join alias variables with
* base-relation variables. We must do this before sublink processing,
* else sublinks expanded out from join aliases would not get processed.
- * We can skip it in non-lateral RTE functions and VALUES lists, however,
- * since they can't contain any Vars of the current query level.
+ * We can skip it in non-lateral RTE functions, VALUES lists, and
+ * TABLESAMPLE clauses, however, since they can't contain any Vars of the
+ * current query level.
*/
if (root->hasJoinRTEs &&
- !(kind == EXPRKIND_RTFUNC || kind == EXPRKIND_VALUES))
+ !(kind == EXPRKIND_RTFUNC ||
+ kind == EXPRKIND_VALUES ||
+ kind == EXPRKIND_TABLESAMPLE))
expr = flatten_join_alias_vars(root, expr);
/*
*
* In the flat rangetable, we zero out substructure pointers that are not
* needed by the executor; this reduces the storage space and copying cost
- * for cached plans. We keep only the tablesample field (which we'd otherwise
- * have to put in the plan tree, anyway); the ctename, alias and eref Alias
- * fields, which are needed by EXPLAIN; and the selectedCols, insertedCols and
+ * for cached plans. We keep only the ctename, alias and eref Alias fields,
+ * which are needed by EXPLAIN, and the selectedCols, insertedCols and
* updatedCols bitmaps, which are needed for executor-startup permissions
* checking and for trigger event checking.
*/
memcpy(newrte, rte, sizeof(RangeTblEntry));
/* zap unneeded sub-structure */
+ newrte->tablesample = NULL;
newrte->subquery = NULL;
newrte->joinaliasvars = NIL;
newrte->functions = NIL;
{
SampleScan *splan = (SampleScan *) plan;
- splan->scanrelid += rtoffset;
- splan->plan.targetlist =
- fix_scan_list(root, splan->plan.targetlist, rtoffset);
- splan->plan.qual =
- fix_scan_list(root, splan->plan.qual, rtoffset);
+ splan->scan.scanrelid += rtoffset;
+ splan->scan.plan.targetlist =
+ fix_scan_list(root, splan->scan.plan.targetlist, rtoffset);
+ splan->scan.plan.qual =
+ fix_scan_list(root, splan->scan.plan.qual, rtoffset);
+ splan->tablesample = (TableSampleClause *)
+ fix_scan_expr(root, (Node *) splan->tablesample, rtoffset);
}
break;
case T_IndexScan:
break;
case T_SeqScan:
+ context.paramids = bms_add_members(context.paramids, scan_params);
+ break;
+
case T_SampleScan:
+ finalize_primnode((Node *) ((SampleScan *) plan)->tablesample,
+ &context);
context.paramids = bms_add_members(context.paramids, scan_params);
break;
bms_add_members(context.paramids, scan_params);
/* child nodes if any */
- foreach (lc, cscan->custom_plans)
+ foreach(lc, cscan->custom_plans)
{
context.paramids =
bms_add_members(context.paramids,
switch (child_rte->rtekind)
{
+ case RTE_RELATION:
+ if (child_rte->tablesample)
+ child_rte->lateral = true;
+ break;
case RTE_SUBQUERY:
case RTE_FUNCTION:
case RTE_VALUES:
child_rte->lateral = true;
break;
- case RTE_RELATION:
case RTE_JOIN:
case RTE_CTE:
/* these can't contain any lateral references */
{
switch (rte->rtekind)
{
+ case RTE_RELATION:
+ /* shouldn't be marked LATERAL unless tablesample */
+ Assert(rte->tablesample);
+ rte->tablesample = (TableSampleClause *)
+ pullup_replace_vars((Node *) rte->tablesample,
+ context);
+ break;
case RTE_SUBQUERY:
rte->subquery =
pullup_replace_vars_subquery(rte->subquery,
pullup_replace_vars((Node *) rte->values_lists,
context);
break;
- case RTE_RELATION:
case RTE_JOIN:
case RTE_CTE:
/* these shouldn't be marked LATERAL */
/*
* create_samplescan_path
- * Like seqscan but uses sampling function while scanning.
+ * Creates a path node for a sampled table scan.
*/
Path *
create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
required_outer);
pathnode->pathkeys = NIL; /* samplescan has unordered result */
- cost_samplescan(pathnode, root, rel);
+ cost_samplescan(pathnode, root, rel, pathnode->param_info);
return pathnode;
}
{
case T_SeqScan:
return create_seqscan_path(root, rel, required_outer);
+ case T_SampleScan:
+ return (Path *) create_samplescan_path(root, rel, required_outer);
case T_IndexScan:
case T_IndexOnlyScan:
{
case T_SubqueryScan:
return create_subqueryscan_path(root, rel, path->pathkeys,
required_outer);
- case T_SampleScan:
- return (Path *) create_samplescan_path(root, rel, required_outer);
default:
break;
}
%type <jexpr> joined_table
%type <range> relation_expr
%type <range> relation_expr_opt_alias
+%type <node> tablesample_clause opt_repeatable_clause
%type <target> target_el single_set_clause set_target insert_column_item
-%type <node> relation_expr_tablesample tablesample_clause opt_repeatable_clause
%type <str> generic_option_name
%type <node> generic_option_arg
$1->alias = $2;
$$ = (Node *) $1;
}
- | relation_expr_tablesample
+ | relation_expr opt_alias_clause tablesample_clause
{
- $$ = (Node *) $1;
+ RangeTableSample *n = (RangeTableSample *) $3;
+ $1->alias = $2;
+ /* relation_expr goes inside the RangeTableSample node */
+ n->relation = (Node *) $1;
+ $$ = (Node *) n;
}
| func_table func_alias_clause
{
}
;
-
-relation_expr_tablesample: relation_expr opt_alias_clause tablesample_clause
- {
- RangeTableSample *n = (RangeTableSample *) $3;
- n->relation = $1;
- n->relation->alias = $2;
- $$ = (Node *) n;
- }
- ;
-
+/*
+ * TABLESAMPLE decoration in a FROM item
+ */
tablesample_clause:
- TABLESAMPLE ColId '(' expr_list ')' opt_repeatable_clause
+ TABLESAMPLE func_name '(' expr_list ')' opt_repeatable_clause
{
RangeTableSample *n = makeNode(RangeTableSample);
+ /* n->relation will be filled in later */
n->method = $2;
n->args = $4;
n->repeatable = $6;
+ n->location = @2;
$$ = (Node *) n;
}
;
#include "miscadmin.h"
#include "access/heapam.h"
+#include "access/tsmapi.h"
#include "catalog/catalog.h"
-#include "access/htup_details.h"
#include "catalog/heap.h"
#include "catalog/pg_constraint.h"
#include "catalog/pg_type.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
-#include "utils/syscache.h"
+
/* Convenience macro for the most common makeNamespaceItem() case */
#define makeDefaultNSItem(rte) makeNamespaceItem(rte, true, true, false, true)
RangeSubselect *r);
static RangeTblEntry *transformRangeFunction(ParseState *pstate,
RangeFunction *r);
+static TableSampleClause *transformRangeTableSample(ParseState *pstate,
+ RangeTableSample *rts);
static Node *transformFromClauseItem(ParseState *pstate, Node *n,
RangeTblEntry **top_rte, int *top_rti,
List **namespace);
return result;
}
-static RangeTblEntry *
-transformTableSampleEntry(ParseState *pstate, RangeTableSample *rv)
-{
- RangeTblEntry *rte = NULL;
- CommonTableExpr *cte = NULL;
- TableSampleClause *tablesample = NULL;
-
- /* if relation has an unqualified name, it might be a CTE reference */
- if (!rv->relation->schemaname)
- {
- Index levelsup;
-
- cte = scanNameSpaceForCTE(pstate, rv->relation->relname, &levelsup);
- }
-
- /* We first need to build a range table entry */
- if (!cte)
- rte = transformTableEntry(pstate, rv->relation);
-
- if (!rte ||
- (rte->relkind != RELKIND_RELATION &&
- rte->relkind != RELKIND_MATVIEW))
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("TABLESAMPLE clause can only be used on tables and materialized views"),
- parser_errposition(pstate, rv->relation->location)));
-
- tablesample = ParseTableSample(pstate, rv->method, rv->repeatable,
- rv->args, rv->relation->location);
- rte->tablesample = tablesample;
-
- return rte;
-}
-
/*
* transformTableEntry --- transform a RangeVar (simple relation reference)
*/
return rte;
}
+/*
+ * transformRangeTableSample --- transform a TABLESAMPLE clause
+ *
+ * Caller has already transformed rts->relation, we just have to validate
+ * the remaining fields and create a TableSampleClause node.
+ */
+static TableSampleClause *
+transformRangeTableSample(ParseState *pstate, RangeTableSample *rts)
+{
+ TableSampleClause *tablesample;
+ Oid handlerOid;
+ Oid funcargtypes[1];
+ TsmRoutine *tsm;
+ List *fargs;
+ ListCell *larg,
+ *ltyp;
+
+ /*
+ * To validate the sample method name, look up the handler function, which
+ * has the same name, one dummy INTERNAL argument, and a result type of
+ * tsm_handler. (Note: tablesample method names are not schema-qualified
+ * in the SQL standard; but since they are just functions to us, we allow
+ * schema qualification to resolve any potential ambiguity.)
+ */
+ funcargtypes[0] = INTERNALOID;
+
+ handlerOid = LookupFuncName(rts->method, 1, funcargtypes, true);
+
+ /* we want error to complain about no-such-method, not no-such-function */
+ if (!OidIsValid(handlerOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablesample method %s does not exist",
+ NameListToString(rts->method)),
+ parser_errposition(pstate, rts->location)));
+
+ /* check that handler has correct return type */
+ if (get_func_rettype(handlerOid) != TSM_HANDLEROID)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("function %s must return type \"tsm_handler\"",
+ NameListToString(rts->method)),
+ parser_errposition(pstate, rts->location)));
+
+ /* OK, run the handler to get TsmRoutine, for argument type info */
+ tsm = GetTsmRoutine(handlerOid);
+
+ tablesample = makeNode(TableSampleClause);
+ tablesample->tsmhandler = handlerOid;
+
+ /* check user provided the expected number of arguments */
+ if (list_length(rts->args) != list_length(tsm->parameterTypes))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+ errmsg_plural("tablesample method %s requires %d argument, not %d",
+ "tablesample method %s requires %d arguments, not %d",
+ list_length(tsm->parameterTypes),
+ NameListToString(rts->method),
+ list_length(tsm->parameterTypes),
+ list_length(rts->args)),
+ parser_errposition(pstate, rts->location)));
+
+ /*
+ * Transform the arguments, typecasting them as needed. Note we must also
+ * assign collations now, because assign_query_collations() doesn't
+ * examine any substructure of RTEs.
+ */
+ fargs = NIL;
+ forboth(larg, rts->args, ltyp, tsm->parameterTypes)
+ {
+ Node *arg = (Node *) lfirst(larg);
+ Oid argtype = lfirst_oid(ltyp);
+
+ arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
+ arg = coerce_to_specific_type(pstate, arg, argtype, "TABLESAMPLE");
+ assign_expr_collations(pstate, arg);
+ fargs = lappend(fargs, arg);
+ }
+ tablesample->args = fargs;
+
+ /* Process REPEATABLE (seed) */
+ if (rts->repeatable != NULL)
+ {
+ Node *arg;
+
+ if (!tsm->repeatable_across_queries)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablesample method %s does not support REPEATABLE",
+ NameListToString(rts->method)),
+ parser_errposition(pstate, rts->location)));
+
+ arg = transformExpr(pstate, rts->repeatable, EXPR_KIND_FROM_FUNCTION);
+ arg = coerce_to_specific_type(pstate, arg, FLOAT8OID, "REPEATABLE");
+ assign_expr_collations(pstate, arg);
+ tablesample->repeatable = (Expr *) arg;
+ }
+ else
+ tablesample->repeatable = NULL;
+
+ return tablesample;
+}
+
/*
* transformFromClauseItem -
rtr->rtindex = rtindex;
return (Node *) rtr;
}
+ else if (IsA(n, RangeTableSample))
+ {
+ /* TABLESAMPLE clause (wrapping some other valid FROM node) */
+ RangeTableSample *rts = (RangeTableSample *) n;
+ Node *rel;
+ RangeTblRef *rtr;
+ RangeTblEntry *rte;
+
+ /* Recursively transform the contained relation */
+ rel = transformFromClauseItem(pstate, rts->relation,
+ top_rte, top_rti, namespace);
+ /* Currently, grammar could only return a RangeVar as contained rel */
+ Assert(IsA(rel, RangeTblRef));
+ rtr = (RangeTblRef *) rel;
+ rte = rt_fetch(rtr->rtindex, pstate->p_rtable);
+ /* We only support this on plain relations and matviews */
+ if (rte->relkind != RELKIND_RELATION &&
+ rte->relkind != RELKIND_MATVIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("TABLESAMPLE clause can only be applied to tables and materialized views"),
+ parser_errposition(pstate, exprLocation(rts->relation))));
+
+ /* Transform TABLESAMPLE details and attach to the RTE */
+ rte->tablesample = transformRangeTableSample(pstate, rts);
+ return (Node *) rtr;
+ }
else if (IsA(n, JoinExpr))
{
/* A newfangled join expression */
return (Node *) j;
}
- else if (IsA(n, RangeTableSample))
- {
- /* Tablesample reference */
- RangeTableSample *rv = (RangeTableSample *) n;
- RangeTblRef *rtr;
- RangeTblEntry *rte = NULL;
- int rtindex;
-
- rte = transformTableSampleEntry(pstate, rv);
-
- /* assume new rte is at end */
- rtindex = list_length(pstate->p_rtable);
- Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
- *top_rte = rte;
- *top_rti = rtindex;
- *namespace = list_make1(makeDefaultNSItem(rte));
- rtr = makeNode(RangeTblRef);
- rtr->rtindex = rtindex;
- return (Node *) rtr;
- }
else
elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
return NULL; /* can't get here, keep compiler quiet */
#include "catalog/pg_aggregate.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
-#include "catalog/pg_tablesample_method.h"
#include "funcapi.h"
#include "lib/stringinfo.h"
#include "nodes/makefuncs.h"
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parse_func.h"
-#include "parser/parse_expr.h"
#include "parser/parse_relation.h"
#include "parser/parse_target.h"
#include "parser/parse_type.h"
}
-/*
- * ParseTableSample
- *
- * Parse TABLESAMPLE clause and process the arguments
- */
-TableSampleClause *
-ParseTableSample(ParseState *pstate, char *samplemethod, Node *repeatable,
- List *sampleargs, int location)
-{
- HeapTuple tuple;
- Form_pg_tablesample_method tsm;
- Form_pg_proc procform;
- TableSampleClause *tablesample;
- List *fargs;
- ListCell *larg;
- int nargs,
- initnargs;
- Oid init_arg_types[FUNC_MAX_ARGS];
-
- /* Load the tablesample method */
- tuple = SearchSysCache1(TABLESAMPLEMETHODNAME, PointerGetDatum(samplemethod));
- if (!HeapTupleIsValid(tuple))
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_OBJECT),
- errmsg("tablesample method \"%s\" does not exist",
- samplemethod),
- parser_errposition(pstate, location)));
-
- tablesample = makeNode(TableSampleClause);
- tablesample->tsmid = HeapTupleGetOid(tuple);
-
- tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
-
- tablesample->tsmseqscan = tsm->tsmseqscan;
- tablesample->tsmpagemode = tsm->tsmpagemode;
- tablesample->tsminit = tsm->tsminit;
- tablesample->tsmnextblock = tsm->tsmnextblock;
- tablesample->tsmnexttuple = tsm->tsmnexttuple;
- tablesample->tsmexaminetuple = tsm->tsmexaminetuple;
- tablesample->tsmend = tsm->tsmend;
- tablesample->tsmreset = tsm->tsmreset;
- tablesample->tsmcost = tsm->tsmcost;
-
- ReleaseSysCache(tuple);
-
- /* Validate the parameters against init function definition. */
- tuple = SearchSysCache1(PROCOID,
- ObjectIdGetDatum(tablesample->tsminit));
-
- if (!HeapTupleIsValid(tuple)) /* should not happen */
- elog(ERROR, "cache lookup failed for function %u",
- tablesample->tsminit);
-
- procform = (Form_pg_proc) GETSTRUCT(tuple);
- initnargs = procform->pronargs;
- Assert(initnargs >= 3);
-
- /*
- * First parameter is used to pass the SampleScanState, second is seed
- * (REPEATABLE), skip the processing for them here, just assert that the
- * types are correct.
- */
- Assert(procform->proargtypes.values[0] == INTERNALOID);
- Assert(procform->proargtypes.values[1] == INT4OID);
- initnargs -= 2;
- memcpy(init_arg_types, procform->proargtypes.values + 2,
- initnargs * sizeof(Oid));
-
- /* Now we are done with the catalog */
- ReleaseSysCache(tuple);
-
- /* Process repeatable (seed) */
- if (repeatable != NULL)
- {
- Node *arg = repeatable;
-
- if (arg && IsA(arg, A_Const))
- {
- A_Const *con = (A_Const *) arg;
-
- if (con->val.type == T_Null)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("REPEATABLE clause must be NOT NULL numeric value"),
- parser_errposition(pstate, con->location)));
-
- }
-
- arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
- arg = coerce_to_specific_type(pstate, arg, INT4OID, "REPEATABLE");
- tablesample->repeatable = arg;
- }
- else
- tablesample->repeatable = NULL;
-
- /* Check user provided expected number of arguments. */
- if (list_length(sampleargs) != initnargs)
- ereport(ERROR,
- (errcode(ERRCODE_TOO_MANY_ARGUMENTS),
- errmsg_plural("tablesample method \"%s\" expects %d argument got %d",
- "tablesample method \"%s\" expects %d arguments got %d",
- initnargs,
- samplemethod,
- initnargs, list_length(sampleargs)),
- parser_errposition(pstate, location)));
-
- /* Transform the arguments, typecasting them as needed. */
- fargs = NIL;
- nargs = 0;
- foreach(larg, sampleargs)
- {
- Node *inarg = (Node *) lfirst(larg);
- Node *arg = transformExpr(pstate, inarg, EXPR_KIND_FROM_FUNCTION);
- Oid argtype = exprType(arg);
-
- if (argtype != init_arg_types[nargs])
- {
- if (!can_coerce_type(1, &argtype, &init_arg_types[nargs],
- COERCION_IMPLICIT))
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("wrong parameter %d for tablesample method \"%s\"",
- nargs + 1, samplemethod),
- errdetail("Expected type %s got %s.",
- format_type_be(init_arg_types[nargs]),
- format_type_be(argtype)),
- parser_errposition(pstate, exprLocation(inarg))));
-
- arg = coerce_type(pstate, arg, argtype, init_arg_types[nargs], -1,
- COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, -1);
- }
-
- fargs = lappend(fargs, arg);
- nargs++;
- }
-
- /* Pass the arguments down */
- tablesample->args = fargs;
-
- return tablesample;
-}
-
/* func_match_argtypes()
*
* Given a list of candidate functions (having the right name and number
switch (rte->rtekind)
{
+ case RTE_RELATION:
+ sub_action->hasSubLinks =
+ checkExprHasSubLink((Node *) rte->tablesample);
+ break;
case RTE_FUNCTION:
sub_action->hasSubLinks =
checkExprHasSubLink((Node *) rte->functions);
}
+/*
+ * tsm_handler_in - input routine for pseudo-type TSM_HANDLER.
+ */
+Datum
+tsm_handler_in(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot accept a value of type tsm_handler")));
+
+ PG_RETURN_VOID(); /* keep compiler quiet */
+}
+
+/*
+ * tsm_handler_out - output routine for pseudo-type TSM_HANDLER.
+ */
+Datum
+tsm_handler_out(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot display a value of type tsm_handler")));
+
+ PG_RETURN_VOID(); /* keep compiler quiet */
+}
+
+
/*
* internal_in - input routine for pseudo-type INTERNAL.
*/
#include "catalog/pg_opclass.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
-#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
int prettyFlags);
static void make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
int prettyFlags, int wrapColumn);
-static void get_tablesample_def(TableSampleClause *tablesample,
- deparse_context *context);
static void get_query_def(Query *query, StringInfo buf, List *parentnamespace,
TupleDesc resultDesc,
int prettyFlags, int wrapColumn, int startIndent);
static void get_from_clause_coldeflist(RangeTblFunction *rtfunc,
deparse_columns *colinfo,
deparse_context *context);
+static void get_tablesample_def(TableSampleClause *tablesample,
+ deparse_context *context);
static void get_opclass_name(Oid opclass, Oid actual_datatype,
StringInfo buf);
static Node *processIndirection(Node *node, deparse_context *context,
heap_close(ev_relation, AccessShareLock);
}
-/* ----------
- * get_tablesample_def - Convert TableSampleClause back to SQL
- * ----------
- */
-static void
-get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
-{
- StringInfo buf = context->buf;
- HeapTuple tuple;
- Form_pg_tablesample_method tsm;
- char *tsmname;
- int nargs;
- ListCell *l;
-
- /* Load the tablesample method */
- tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tablesample->tsmid));
- if (!HeapTupleIsValid(tuple))
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_OBJECT),
- errmsg("cache lookup failed for tablesample method %u",
- tablesample->tsmid)));
-
- tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
- tsmname = NameStr(tsm->tsmname);
- appendStringInfo(buf, " TABLESAMPLE %s (", quote_identifier(tsmname));
-
- ReleaseSysCache(tuple);
-
- nargs = 0;
- foreach(l, tablesample->args)
- {
- if (nargs++ > 0)
- appendStringInfoString(buf, ", ");
- get_rule_expr((Node *) lfirst(l), context, true);
- }
- appendStringInfoChar(buf, ')');
-
- if (tablesample->repeatable != NULL)
- {
- appendStringInfoString(buf, " REPEATABLE (");
- get_rule_expr(tablesample->repeatable, context, true);
- appendStringInfoChar(buf, ')');
- }
-}
/* ----------
* get_query_def - Parse back one query parsetree
only_marker(rte),
generate_relation_name(rte->relid,
context->namespaces));
-
- if (rte->tablesample)
- get_tablesample_def(rte->tablesample, context);
break;
case RTE_SUBQUERY:
/* Subquery RTE */
/* Else print column aliases as needed */
get_column_alias_list(colinfo, context);
}
+
+ /* Tablesample clause must go after any alias */
+ if (rte->rtekind == RTE_RELATION && rte->tablesample)
+ get_tablesample_def(rte->tablesample, context);
}
else if (IsA(jtnode, JoinExpr))
{
appendStringInfoChar(buf, ')');
}
+/*
+ * get_tablesample_def - print a TableSampleClause
+ */
+static void
+get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ Oid argtypes[1];
+ int nargs;
+ ListCell *l;
+
+ /*
+ * We should qualify the handler's function name if it wouldn't be
+ * resolved by lookup in the current search path.
+ */
+ argtypes[0] = INTERNALOID;
+ appendStringInfo(buf, " TABLESAMPLE %s (",
+ generate_function_name(tablesample->tsmhandler, 1,
+ NIL, argtypes,
+ false, NULL, EXPR_KIND_NONE));
+
+ nargs = 0;
+ foreach(l, tablesample->args)
+ {
+ if (nargs++ > 0)
+ appendStringInfoString(buf, ", ");
+ get_rule_expr((Node *) lfirst(l), context, false);
+ }
+ appendStringInfoChar(buf, ')');
+
+ if (tablesample->repeatable != NULL)
+ {
+ appendStringInfoString(buf, " REPEATABLE (");
+ get_rule_expr((Node *) tablesample->repeatable, context, false);
+ appendStringInfoChar(buf, ')');
+ }
+}
+
/*
* get_opclass_name - fetch name of an index operator class
*
#include "catalog/pg_range.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_transform.h"
-#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
else
return InvalidOid;
}
-
-/* ---------- PG_TABLESAMPLE_METHOD CACHE ---------- */
-
-/*
- * get_tablesample_method_name - given a tablesample method OID,
- * look up the name or NULL if not found
- */
-char *
-get_tablesample_method_name(Oid tsmid)
-{
- HeapTuple tuple;
-
- tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tsmid));
- if (HeapTupleIsValid(tuple))
- {
- Form_pg_tablesample_method tup =
- (Form_pg_tablesample_method) GETSTRUCT(tuple);
- char *result;
-
- result = pstrdup(NameStr(tup->tsmname));
- ReleaseSysCache(tuple);
- return result;
- }
- else
- return NULL;
-}
#include "catalog/pg_shseclabel.h"
#include "catalog/pg_replication_origin.h"
#include "catalog/pg_statistic.h"
-#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_transform.h"
#include "catalog/pg_ts_config.h"
},
128
},
- {TableSampleMethodRelationId, /* TABLESAMPLEMETHODNAME */
- TableSampleMethodNameIndexId,
- 1,
- {
- Anum_pg_tablesample_method_tsmname,
- 0,
- 0,
- 0,
- },
- 2
- },
- {TableSampleMethodRelationId, /* TABLESAMPLEMETHODOID */
- TableSampleMethodOidIndexId,
- 1,
- {
- ObjectIdAttributeNumber,
- 0,
- 0,
- 0,
- },
- 2
- },
{TableSpaceRelationId, /* TABLESPACEOID */
TablespaceOidIndexId,
1,
2201B E ERRCODE_INVALID_REGULAR_EXPRESSION invalid_regular_expression
2201W E ERRCODE_INVALID_ROW_COUNT_IN_LIMIT_CLAUSE invalid_row_count_in_limit_clause
2201X E ERRCODE_INVALID_ROW_COUNT_IN_RESULT_OFFSET_CLAUSE invalid_row_count_in_result_offset_clause
+2202H E ERRCODE_INVALID_TABLESAMPLE_ARGUMENT invalid_tablesample_argument
+2202G E ERRCODE_INVALID_TABLESAMPLE_REPEAT invalid_tablesample_repeat
22009 E ERRCODE_INVALID_TIME_ZONE_DISPLACEMENT_VALUE invalid_time_zone_displacement_value
2200C E ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER invalid_use_of_escape_character
2200G E ERRCODE_MOST_SPECIFIC_TYPE_MISMATCH most_specific_type_mismatch
void
sampler_random_init_state(long seed, SamplerRandomState randstate)
{
- randstate[0] = RAND48_SEED_0;
+ randstate[0] = 0x330e; /* same as pg_erand48, but could be anything */
randstate[1] = (unsigned short) seed;
randstate[2] = (unsigned short) (seed >> 16);
}
" WHERE substring(pg_catalog.quote_ident(evtname),1,%d)='%s'"
#define Query_for_list_of_tablesample_methods \
-" SELECT pg_catalog.quote_ident(tsmname) "\
-" FROM pg_catalog.pg_tablesample_method "\
-" WHERE substring(pg_catalog.quote_ident(tsmname),1,%d)='%s'"
+" SELECT pg_catalog.quote_ident(proname) "\
+" FROM pg_catalog.pg_proc "\
+" WHERE prorettype = 'pg_catalog.tsm_handler'::pg_catalog.regtype AND "\
+" proargtypes[0] = 'pg_catalog.internal'::pg_catalog.regtype AND "\
+" substring(pg_catalog.quote_ident(proname),1,%d)='%s'"
#define Query_for_list_of_policies \
" SELECT pg_catalog.quote_ident(polname) "\
-" FROM pg_catalog.pg_policy " \
+" FROM pg_catalog.pg_policy "\
" WHERE substring(pg_catalog.quote_ident(polname),1,%d)='%s'"
#define Query_for_list_of_tables_for_policy \
int nkeys, ScanKey key);
extern HeapScanDesc heap_beginscan_sampling(Relation relation,
Snapshot snapshot, int nkeys, ScanKey key,
- bool allow_strat, bool allow_pagemode);
+ bool allow_strat, bool allow_sync, bool allow_pagemode);
extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk,
BlockNumber endBlk);
extern void heapgetpage(HeapScanDesc scan, BlockNumber page);
extern void heap_rescan(HeapScanDesc scan, ScanKey key);
+extern void heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
+ bool allow_strat, bool allow_sync, bool allow_pagemode);
extern void heap_endscan(HeapScanDesc scan);
extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
+++ /dev/null
-/*-------------------------------------------------------------------------
- *
- * tablesample.h
- * Public header file for TABLESAMPLE clause interface
- *
- *
- * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/include/access/tablesample.h
- *
- *-------------------------------------------------------------------------
- */
-#ifndef TABLESAMPLE_H
-#define TABLESAMPLE_H
-
-#include "access/relscan.h"
-#include "executor/executor.h"
-
-typedef struct TableSampleDesc
-{
- HeapScanDesc heapScan;
- TupleDesc tupDesc; /* Mostly useful for tsmexaminetuple */
-
- void *tsmdata; /* private method data */
-
- /* These point to he function of the TABLESAMPLE Method. */
- FmgrInfo tsminit;
- FmgrInfo tsmnextblock;
- FmgrInfo tsmnexttuple;
- FmgrInfo tsmexaminetuple;
- FmgrInfo tsmreset;
- FmgrInfo tsmend;
-} TableSampleDesc;
-
-
-extern TableSampleDesc *tablesample_init(SampleScanState *scanstate,
- TableSampleClause *tablesample);
-extern HeapTuple tablesample_getnext(TableSampleDesc *desc);
-extern void tablesample_reset(TableSampleDesc *desc);
-extern void tablesample_end(TableSampleDesc *desc);
-extern HeapTuple tablesample_source_getnext(TableSampleDesc *desc);
-extern HeapTuple tablesample_source_gettup(TableSampleDesc *desc, ItemPointer tid,
- bool *visible);
-
-extern Datum tsm_system_init(PG_FUNCTION_ARGS);
-extern Datum tsm_system_nextblock(PG_FUNCTION_ARGS);
-extern Datum tsm_system_nexttuple(PG_FUNCTION_ARGS);
-extern Datum tsm_system_end(PG_FUNCTION_ARGS);
-extern Datum tsm_system_reset(PG_FUNCTION_ARGS);
-extern Datum tsm_system_cost(PG_FUNCTION_ARGS);
-
-extern Datum tsm_bernoulli_init(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_nextblock(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_end(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_reset(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_cost(PG_FUNCTION_ARGS);
-
-
-#endif
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * tsmapi.h
+ * API for tablesample methods
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * src/include/access/tsmapi.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TSMAPI_H
+#define TSMAPI_H
+
+#include "nodes/execnodes.h"
+#include "nodes/relation.h"
+
+
+/*
+ * Callback function signatures --- see tablesample-method.sgml for more info.
+ */
+
+typedef void (*SampleScanGetSampleSize_function) (PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples);
+
+typedef void (*InitSampleScan_function) (SampleScanState *node,
+ int eflags);
+
+typedef void (*BeginSampleScan_function) (SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed);
+
+typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node);
+
+typedef OffsetNumber (*NextSampleTuple_function) (SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset);
+
+typedef void (*EndSampleScan_function) (SampleScanState *node);
+
+/*
+ * TsmRoutine is the struct returned by a tablesample method's handler
+ * function. It provides pointers to the callback functions needed by the
+ * planner and executor, as well as additional information about the method.
+ *
+ * More function pointers are likely to be added in the future.
+ * Therefore it's recommended that the handler initialize the struct with
+ * makeNode(TsmRoutine) so that all fields are set to NULL. This will
+ * ensure that no fields are accidentally left undefined.
+ */
+typedef struct TsmRoutine
+{
+ NodeTag type;
+
+ /* List of datatype OIDs for the arguments of the TABLESAMPLE clause */
+ List *parameterTypes;
+
+ /* Can method produce repeatable samples across, or even within, queries? */
+ bool repeatable_across_queries;
+ bool repeatable_across_scans;
+
+ /* Functions for planning a SampleScan on a physical table */
+ SampleScanGetSampleSize_function SampleScanGetSampleSize;
+
+ /* Functions for executing a SampleScan on a physical table */
+ InitSampleScan_function InitSampleScan; /* can be NULL */
+ BeginSampleScan_function BeginSampleScan;
+ NextSampleBlock_function NextSampleBlock; /* can be NULL */
+ NextSampleTuple_function NextSampleTuple;
+ EndSampleScan_function EndSampleScan; /* can be NULL */
+} TsmRoutine;
+
+
+/* Functions in access/tablesample/tablesample.c */
+extern TsmRoutine *GetTsmRoutine(Oid tsmhandler);
+
+#endif /* TSMAPI_H */
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201507171
+#define CATALOG_VERSION_NO 201507252
#endif
DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication_origin using btree(roname text_pattern_ops));
#define ReplicationOriginNameIndex 6002
-DECLARE_UNIQUE_INDEX(pg_tablesample_method_name_index, 3331, on pg_tablesample_method using btree(tsmname name_ops));
-#define TableSampleMethodNameIndexId 3331
-DECLARE_UNIQUE_INDEX(pg_tablesample_method_oid_index, 3332, on pg_tablesample_method using btree(oid oid_ops));
-#define TableSampleMethodOidIndexId 3332
-
/* last step of initialization script: build the indexes declared above */
BUILD_INDICES
DESCR("I/O");
DATA(insert OID = 3117 ( fdw_handler_out PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3115" _null_ _null_ _null_ _null_ _null_ fdw_handler_out _null_ _null_ _null_ ));
DESCR("I/O");
+DATA(insert OID = 3311 ( tsm_handler_in PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 3310 "2275" _null_ _null_ _null_ _null_ _null_ tsm_handler_in _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3312 ( tsm_handler_out PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3310" _null_ _null_ _null_ _null_ _null_ tsm_handler_out _null_ _null_ _null_ ));
+DESCR("I/O");
+
+/* tablesample method handlers */
+DATA(insert OID = 3313 ( bernoulli PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_handler _null_ _null_ _null_ ));
+DESCR("BERNOULLI tablesample method handler");
+DATA(insert OID = 3314 ( system PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_handler _null_ _null_ _null_ ));
+DESCR("SYSTEM tablesample method handler");
/* cryptographic */
DATA(insert OID = 2311 ( md5 PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ md5_text _null_ _null_ _null_ ));
DATA(insert OID = 6014 ( pg_show_replication_origin_status PGNSP PGUID 12 1 100 0 0 f f f f f t v 0 0 2249 "" "{26,25,3220,3220}" "{o,o,o,o}" "{local_id, external_id, remote_lsn, local_lsn}" _null_ _null_ pg_show_replication_origin_status _null_ _null_ _null_ ));
DESCR("get progress for all replication origins");
-/* tablesample */
-DATA(insert OID = 3335 ( tsm_system_init PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_system_init _null_ _null_ _null_ ));
-DESCR("tsm_system_init(internal)");
-DATA(insert OID = 3336 ( tsm_system_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nextblock _null_ _null_ _null_ ));
-DESCR("tsm_system_nextblock(internal)");
-DATA(insert OID = 3337 ( tsm_system_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nexttuple _null_ _null_ _null_ ));
-DESCR("tsm_system_nexttuple(internal)");
-DATA(insert OID = 3338 ( tsm_system_end PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_end _null_ _null_ _null_ ));
-DESCR("tsm_system_end(internal)");
-DATA(insert OID = 3339 ( tsm_system_reset PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_reset _null_ _null_ _null_ ));
-DESCR("tsm_system_reset(internal)");
-DATA(insert OID = 3340 ( tsm_system_cost PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_system_cost _null_ _null_ _null_ ));
-DESCR("tsm_system_cost(internal)");
-
-DATA(insert OID = 3341 ( tsm_bernoulli_init PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_init _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_init(internal)");
-DATA(insert OID = 3342 ( tsm_bernoulli_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nextblock _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_nextblock(internal)");
-DATA(insert OID = 3343 ( tsm_bernoulli_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nexttuple _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_nexttuple(internal)");
-DATA(insert OID = 3344 ( tsm_bernoulli_end PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_end _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_end(internal)");
-DATA(insert OID = 3345 ( tsm_bernoulli_reset PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_reset _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_reset(internal)");
-DATA(insert OID = 3346 ( tsm_bernoulli_cost PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_cost _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_cost(internal)");
-
/*
* Symbolic values for provolatile column: these indicate whether the result
* of a function is dependent *only* on the values of its explicit arguments,
+++ /dev/null
-/*-------------------------------------------------------------------------
- *
- * pg_tablesample_method.h
- * definition of the table scan methods.
- *
- *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/include/catalog/pg_tablesample_method.h
- *
- *
- *-------------------------------------------------------------------------
- */
-#ifndef PG_TABLESAMPLE_METHOD_H
-#define PG_TABLESAMPLE_METHOD_H
-
-#include "catalog/genbki.h"
-#include "catalog/objectaddress.h"
-
-/* ----------------
- * pg_tablesample_method definition. cpp turns this into
- * typedef struct FormData_pg_tablesample_method
- * ----------------
- */
-#define TableSampleMethodRelationId 3330
-
-CATALOG(pg_tablesample_method,3330)
-{
- NameData tsmname; /* tablesample method name */
- bool tsmseqscan; /* does this method scan whole table
- * sequentially? */
- bool tsmpagemode; /* does this method scan page at a time? */
- regproc tsminit; /* init scan function */
- regproc tsmnextblock; /* function returning next block to sample or
- * InvalidBlockOffset if finished */
- regproc tsmnexttuple; /* function returning next tuple offset from
- * current block or InvalidOffsetNumber if end
- * of the block was reacher */
- regproc tsmexaminetuple;/* optional function which can examine tuple
- * contents and decide if tuple should be
- * returned or not */
- regproc tsmend; /* end scan function */
- regproc tsmreset; /* reset state - used by rescan */
- regproc tsmcost; /* costing function */
-} FormData_pg_tablesample_method;
-
-/* ----------------
- * Form_pg_tablesample_method corresponds to a pointer to a tuple with
- * the format of pg_tablesample_method relation.
- * ----------------
- */
-typedef FormData_pg_tablesample_method *Form_pg_tablesample_method;
-
-/* ----------------
- * compiler constants for pg_tablesample_method
- * ----------------
- */
-#define Natts_pg_tablesample_method 10
-#define Anum_pg_tablesample_method_tsmname 1
-#define Anum_pg_tablesample_method_tsmseqscan 2
-#define Anum_pg_tablesample_method_tsmpagemode 3
-#define Anum_pg_tablesample_method_tsminit 4
-#define Anum_pg_tablesample_method_tsmnextblock 5
-#define Anum_pg_tablesample_method_tsmnexttuple 6
-#define Anum_pg_tablesample_method_tsmexaminetuple 7
-#define Anum_pg_tablesample_method_tsmend 8
-#define Anum_pg_tablesample_method_tsmreset 9
-#define Anum_pg_tablesample_method_tsmcost 10
-
-/* ----------------
- * initial contents of pg_tablesample_method
- * ----------------
- */
-
-DATA(insert OID = 3333 ( system false true tsm_system_init tsm_system_nextblock tsm_system_nexttuple - tsm_system_end tsm_system_reset tsm_system_cost ));
-DESCR("SYSTEM table sampling method");
-DATA(insert OID = 3334 ( bernoulli true false tsm_bernoulli_init tsm_bernoulli_nextblock tsm_bernoulli_nexttuple - tsm_bernoulli_end tsm_bernoulli_reset tsm_bernoulli_cost ));
-DESCR("BERNOULLI table sampling method");
-
-#endif /* PG_TABLESAMPLE_METHOD_H */
#define ANYENUMOID 3500
DATA(insert OID = 3115 ( fdw_handler PGNSP PGUID 4 t p P f t \054 0 0 0 fdw_handler_in fdw_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ ));
#define FDW_HANDLEROID 3115
+DATA(insert OID = 3310 ( tsm_handler PGNSP PGUID 4 t p P f t \054 0 0 0 tsm_handler_in tsm_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ ));
+#define TSM_HANDLEROID 3310
DATA(insert OID = 3831 ( anyrange PGNSP PGUID -1 f p P f t \054 0 0 0 anyrange_in anyrange_out - - - - - d x f 0 -1 0 0 _null_ _null_ _null_ ));
#define ANYRANGEOID 3831
*
*
*
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/executor/nodeSamplescan.h
*/
typedef ScanState SeqScanState;
-/*
- * SampleScan
+/* ----------------
+ * SampleScanState information
+ * ----------------
*/
typedef struct SampleScanState
{
ScanState ss;
- struct TableSampleDesc *tsdesc;
+ List *args; /* expr states for TABLESAMPLE params */
+ ExprState *repeatable; /* expr state for REPEATABLE expr */
+ /* use struct pointer to avoid including tsmapi.h here */
+ struct TsmRoutine *tsmroutine; /* descriptor for tablesample method */
+ void *tsm_state; /* tablesample method can keep state here */
+ bool use_bulkread; /* use bulkread buffer access strategy? */
+ bool use_pagemode; /* use page-at-a-time visibility checking? */
+ bool begun; /* false means need to call BeginSampleScan */
+ uint32 seed; /* random seed */
} SampleScanState;
/*
T_BitmapOr,
T_Scan,
T_SeqScan,
+ T_SampleScan,
T_IndexScan,
T_IndexOnlyScan,
T_BitmapIndexScan,
T_ValuesScan,
T_CteScan,
T_WorkTableScan,
- T_SampleScan,
T_ForeignScan,
T_CustomScan,
T_Join,
T_WindowDef,
T_RangeSubselect,
T_RangeFunction,
+ T_RangeTableSample,
T_TypeName,
T_ColumnDef,
T_IndexElem,
T_DefElem,
T_RangeTblEntry,
T_RangeTblFunction,
+ T_TableSampleClause,
T_WithCheckOption,
T_SortGroupClause,
T_GroupingSet,
T_OnConflictClause,
T_CommonTableExpr,
T_RoleSpec,
- T_RangeTableSample,
- T_TableSampleClause,
/*
* TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h)
T_WindowObjectData, /* private in nodeWindowAgg.c */
T_TIDBitmap, /* in nodes/tidbitmap.h */
T_InlineCodeBlock, /* in nodes/parsenodes.h */
- T_FdwRoutine /* in foreign/fdwapi.h */
+ T_FdwRoutine, /* in foreign/fdwapi.h */
+ T_TsmRoutine /* in access/tsmapi.h */
} NodeTag;
/*
int location; /* token location, or -1 if unknown */
} FuncCall;
-/*
- * TableSampleClause - a sampling method information
- */
-typedef struct TableSampleClause
-{
- NodeTag type;
- Oid tsmid;
- bool tsmseqscan;
- bool tsmpagemode;
- Oid tsminit;
- Oid tsmnextblock;
- Oid tsmnexttuple;
- Oid tsmexaminetuple;
- Oid tsmend;
- Oid tsmreset;
- Oid tsmcost;
- Node *repeatable;
- List *args;
-} TableSampleClause;
-
/*
* A_Star - '*' representing all columns of a table or compound field
*
} RangeFunction;
/*
- * RangeTableSample - represents <table> TABLESAMPLE <method> (<params>) REPEATABLE (<num>)
+ * RangeTableSample - TABLESAMPLE appearing in a raw FROM clause
*
- * SQL Standard specifies only one parameter which is percentage. But we allow
- * custom tablesample methods which may need different input arguments so we
- * accept list of arguments.
+ * This node, appearing only in raw parse trees, represents
+ * <relation> TABLESAMPLE <method> (<params>) REPEATABLE (<num>)
+ * Currently, the <relation> can only be a RangeVar, but we might in future
+ * allow RangeSubselect and other options. Note that the RangeTableSample
+ * is wrapped around the node representing the <relation>, rather than being
+ * a subfield of it.
*/
typedef struct RangeTableSample
{
NodeTag type;
- RangeVar *relation;
- char *method; /* sampling method */
- Node *repeatable;
- List *args; /* arguments for sampling method */
+ Node *relation; /* relation to be sampled */
+ List *method; /* sampling method name (possibly qualified) */
+ List *args; /* argument(s) for sampling method */
+ Node *repeatable; /* REPEATABLE expression, or NULL if none */
+ int location; /* method name location, or -1 if unknown */
} RangeTableSample;
/*
*/
Oid relid; /* OID of the relation */
char relkind; /* relation kind (see pg_class.relkind) */
- TableSampleClause *tablesample; /* sampling method and parameters */
+ struct TableSampleClause *tablesample; /* sampling info, or NULL */
/*
* Fields valid for a subquery RTE (else NULL):
Bitmapset *funcparams; /* PARAM_EXEC Param IDs affecting this func */
} RangeTblFunction;
+/*
+ * TableSampleClause - TABLESAMPLE appearing in a transformed FROM clause
+ *
+ * Unlike RangeTableSample, this is a subnode of the relevant RangeTblEntry.
+ */
+typedef struct TableSampleClause
+{
+ NodeTag type;
+ Oid tsmhandler; /* OID of the tablesample handler function */
+ List *args; /* tablesample argument expression(s) */
+ Expr *repeatable; /* REPEATABLE expression, or NULL if none */
+} TableSampleClause;
+
/*
* WithCheckOption -
* representation of WITH CHECK OPTION checks to be applied to new tuples
typedef struct AlterObjectSchemaStmt
{
NodeTag type;
- ObjectType objectType; /* OBJECT_TABLE, OBJECT_TYPE, etc */
+ ObjectType objectType; /* OBJECT_TABLE, OBJECT_TYPE, etc */
RangeVar *relation; /* in case it's a table */
List *object; /* in case it's some other object */
List *objarg; /* argument types, if applicable */
typedef struct AlterOwnerStmt
{
NodeTag type;
- ObjectType objectType; /* OBJECT_TABLE, OBJECT_TYPE, etc */
+ ObjectType objectType; /* OBJECT_TABLE, OBJECT_TYPE, etc */
RangeVar *relation; /* in case it's a table */
List *object; /* in case it's some other object */
List *objarg; /* argument types, if applicable */
* table sample scan node
* ----------------
*/
-typedef Scan SampleScan;
+typedef struct SampleScan
+{
+ Scan scan;
+ /* use struct pointer to avoid including parsenodes.h here */
+ struct TableSampleClause *tablesample;
+} SampleScan;
/* ----------------
* index scan node
double index_pages, PlannerInfo *root);
extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
ParamPathInfo *param_info);
-extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
+extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
+ ParamPathInfo *param_info);
extern void cost_index(IndexPath *path, PlannerInfo *root,
double loop_count);
extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
extern Node *ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
FuncCall *fn, int location);
-extern TableSampleClause *ParseTableSample(ParseState *pstate,
- char *samplemethod,
- Node *repeatable, List *args,
- int location);
-
extern FuncDetailCode func_get_detail(List *funcname,
List *fargs, List *fargnames,
int nargs, Oid *argtypes,
#endif
#endif
-#define RAND48_SEED_0 (0x330e)
-#define RAND48_SEED_1 (0xabcd)
-#define RAND48_SEED_2 (0x1234)
-
extern double pg_erand48(unsigned short xseed[3]);
extern long pg_lrand48(void);
extern void pg_srand48(long seed);
extern Datum language_handler_out(PG_FUNCTION_ARGS);
extern Datum fdw_handler_in(PG_FUNCTION_ARGS);
extern Datum fdw_handler_out(PG_FUNCTION_ARGS);
+extern Datum tsm_handler_in(PG_FUNCTION_ARGS);
+extern Datum tsm_handler_out(PG_FUNCTION_ARGS);
extern Datum internal_in(PG_FUNCTION_ARGS);
extern Datum internal_out(PG_FUNCTION_ARGS);
extern Datum opaque_in(PG_FUNCTION_ARGS);
extern Datum ginarrayconsistent(PG_FUNCTION_ARGS);
extern Datum ginarraytriconsistent(PG_FUNCTION_ARGS);
+/* access/tablesample/bernoulli.c */
+extern Datum tsm_bernoulli_handler(PG_FUNCTION_ARGS);
+
+/* access/tablesample/system.c */
+extern Datum tsm_system_handler(PG_FUNCTION_ARGS);
+
/* access/transam/twophase.c */
extern Datum pg_prepared_xact(PG_FUNCTION_ARGS);
extern char *get_namespace_name(Oid nspid);
extern char *get_namespace_name_or_temp(Oid nspid);
extern Oid get_range_subtype(Oid rangeOid);
-extern char *get_tablesample_method_name(Oid tsmid);
#define type_is_array(typid) (get_element_type(typid) != InvalidOid)
/* type_is_array_domain accepts both plain arrays and domains over arrays */
REPLORIGNAME,
RULERELNAME,
STATRELATTINH,
- TABLESAMPLEMETHODNAME,
- TABLESAMPLEMETHODOID,
TABLESPACEOID,
TRFOID,
TRFTYPELANG,
#include <math.h>
+#define RAND48_SEED_0 (0x330e)
+#define RAND48_SEED_1 (0xabcd)
+#define RAND48_SEED_2 (0x1234)
#define RAND48_MULT_0 (0xe66d)
#define RAND48_MULT_1 (0xdeec)
#define RAND48_MULT_2 (0x0005)
44 | 8 | 1 | rls_regress_user2 | great manga | manga
(4 rows)
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
-NOTICE: f_leak => my first novel
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+ WHERE f_leak(dtitle) ORDER BY did;
NOTICE: f_leak => my first manga
NOTICE: f_leak => great science fiction
+NOTICE: f_leak => great manga
did | cid | dlevel | dauthor | dtitle
-----+-----+--------+-------------------+-----------------------
- 1 | 11 | 1 | rls_regress_user1 | my first novel
4 | 44 | 1 | rls_regress_user1 | my first manga
6 | 22 | 1 | rls_regress_user2 | great science fiction
+ 8 | 44 | 1 | rls_regress_user2 | great manga
(3 rows)
-- viewpoint from rls_regress_user2
44 | 8 | 1 | rls_regress_user2 | great manga | manga
(8 rows)
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
-NOTICE: f_leak => my first novel
-NOTICE: f_leak => my second novel
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+ WHERE f_leak(dtitle) ORDER BY did;
NOTICE: f_leak => my first manga
+NOTICE: f_leak => my second manga
NOTICE: f_leak => great science fiction
-NOTICE: f_leak => great technology book
+NOTICE: f_leak => great manga
did | cid | dlevel | dauthor | dtitle
-----+-----+--------+-------------------+-----------------------
- 1 | 11 | 1 | rls_regress_user1 | my first novel
- 2 | 11 | 2 | rls_regress_user1 | my second novel
4 | 44 | 1 | rls_regress_user1 | my first manga
+ 5 | 44 | 2 | rls_regress_user1 | my second manga
6 | 22 | 1 | rls_regress_user2 | great science fiction
- 7 | 33 | 2 | rls_regress_user2 | great technology book
-(5 rows)
+ 8 | 44 | 1 | rls_regress_user2 | great manga
+(4 rows)
EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
QUERY PLAN
FROM ONLY road r,
real_city c
WHERE (c.outline ## r.thepath);
+test_tablesample_v1| SELECT test_tablesample.id
+ FROM test_tablesample TABLESAMPLE system ((10 * 2)) REPEATABLE (2);
+test_tablesample_v2| SELECT test_tablesample.id
+ FROM test_tablesample TABLESAMPLE system (99);
toyemp| SELECT emp.name,
emp.age,
emp.location,
pg_shdescription|t
pg_shseclabel|t
pg_statistic|t
-pg_tablesample_method|t
pg_tablespace|t
pg_transform|t
pg_trigger|t
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
-SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10);
+-- use fillfactor so we don't have to load too much data to get multiple pages
+INSERT INTO test_tablesample
+ SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i);
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0);
id
----
- 0
- 1
- 2
3
4
5
- 9
-(7 rows)
-
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
- id
-----
6
7
8
-(3 rows)
+(6 rows)
-SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
- count
--------
- 10
-(1 row)
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0);
+ id
+----
+(0 rows)
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
id
----
- 0
- 1
- 2
+ 3
+ 4
+ 5
6
7
8
- 9
-(7 rows)
+(6 rows)
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (0);
id
----
- 0
- 1
- 3
4
5
+ 6
+ 7
+ 8
(5 rows)
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (0);
id
----
- 0
- 5
-(2 rows)
+ 7
+(1 row)
-CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
-CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
-SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
- pg_get_viewdef
---------------------------------------------------------------------------------
- SELECT test_tablesample.id +
- FROM test_tablesample TABLESAMPLE system (((10 * 2))::real) REPEATABLE (2);
+-- 100% should give repeatable count results (ie, all rows) in any case
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
+ count
+-------
+ 10
(1 row)
-SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
- pg_get_viewdef
------------------------------------------------------------
- SELECT test_tablesample.id +
- FROM test_tablesample TABLESAMPLE system ((99)::real);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (1+2);
+ count
+-------
+ 10
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (0.4);
+ count
+-------
+ 10
(1 row)
+CREATE VIEW test_tablesample_v1 AS
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
+CREATE VIEW test_tablesample_v2 AS
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
+\d+ test_tablesample_v1
+ View "public.test_tablesample_v1"
+ Column | Type | Modifiers | Storage | Description
+--------+---------+-----------+---------+-------------
+ id | integer | | plain |
+View definition:
+ SELECT test_tablesample.id
+ FROM test_tablesample TABLESAMPLE system ((10 * 2)) REPEATABLE (2);
+
+\d+ test_tablesample_v2
+ View "public.test_tablesample_v2"
+ Column | Type | Modifiers | Storage | Description
+--------+---------+-----------+---------+-------------
+ id | integer | | plain |
+View definition:
+ SELECT test_tablesample.id
+ FROM test_tablesample TABLESAMPLE system (99);
+
+-- check a sampled query doesn't affect cursor in progress
BEGIN;
-DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+DECLARE tablesample_cur CURSOR FOR
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
FETCH FIRST FROM tablesample_cur;
id
----
- 0
+ 3
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
- 1
+ 4
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
- 2
+ 5
(1 row)
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
id
----
- 0
- 1
- 2
3
4
5
- 9
-(7 rows)
+ 6
+ 7
+ 8
+(6 rows)
FETCH NEXT FROM tablesample_cur;
id
FETCH FIRST FROM tablesample_cur;
id
----
- 0
+ 3
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
- 1
+ 4
(1 row)
FETCH NEXT FROM tablesample_cur;
id
----
- 2
+ 5
(1 row)
FETCH NEXT FROM tablesample_cur;
CLOSE tablesample_cur;
END;
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
- QUERY PLAN
--------------------------------------------------------------------------------
- Sample Scan (system) on test_tablesample (cost=0.00..26.35 rows=635 width=4)
+EXPLAIN (COSTS OFF)
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2);
+ QUERY PLAN
+--------------------------------------------------------------------
+ Sample Scan on test_tablesample
+ Sampling: system ('50'::real) REPEATABLE ('2'::double precision)
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM test_tablesample_v1;
+ QUERY PLAN
+--------------------------------------------------------------------
+ Sample Scan on test_tablesample
+ Sampling: system ('20'::real) REPEATABLE ('2'::double precision)
+(2 rows)
+
+-- check inheritance behavior
+explain (costs off)
+ select count(*) from person tablesample bernoulli (100);
+ QUERY PLAN
+-------------------------------------------------
+ Aggregate
+ -> Append
+ -> Sample Scan on person
+ Sampling: bernoulli ('100'::real)
+ -> Sample Scan on emp
+ Sampling: bernoulli ('100'::real)
+ -> Sample Scan on student
+ Sampling: bernoulli ('100'::real)
+ -> Sample Scan on stud_emp
+ Sampling: bernoulli ('100'::real)
+(10 rows)
+
+select count(*) from person tablesample bernoulli (100);
+ count
+-------
+ 58
(1 row)
-EXPLAIN SELECT * FROM test_tablesample_v1;
- QUERY PLAN
--------------------------------------------------------------------------------
- Sample Scan (system) on test_tablesample (cost=0.00..10.54 rows=254 width=4)
+select count(*) from person;
+ count
+-------
+ 58
+(1 row)
+
+-- check that collations get assigned within the tablesample arguments
+SELECT count(*) FROM test_tablesample TABLESAMPLE bernoulli (('1'::text < '0'::text)::int);
+ count
+-------
+ 0
+(1 row)
+
+-- check behavior during rescans, as well as correct handling of min/max pct
+select * from
+ (values (0),(100)) v(pct),
+ lateral (select count(*) from tenk1 tablesample bernoulli (pct)) ss;
+ pct | count
+-----+-------
+ 0 | 0
+ 100 | 10000
+(2 rows)
+
+select * from
+ (values (0),(100)) v(pct),
+ lateral (select count(*) from tenk1 tablesample system (pct)) ss;
+ pct | count
+-----+-------
+ 0 | 0
+ 100 | 10000
+(2 rows)
+
+explain (costs off)
+select pct, count(unique1) from
+ (values (0),(100)) v(pct),
+ lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+ group by pct;
+ QUERY PLAN
+--------------------------------------------------------
+ HashAggregate
+ Group Key: "*VALUES*".column1
+ -> Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Sample Scan on tenk1
+ Sampling: bernoulli ("*VALUES*".column1)
+(6 rows)
+
+select pct, count(unique1) from
+ (values (0),(100)) v(pct),
+ lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+ group by pct;
+ pct | count
+-----+-------
+ 100 | 10000
+(1 row)
+
+select pct, count(unique1) from
+ (values (0),(100)) v(pct),
+ lateral (select * from tenk1 tablesample system (pct)) ss
+ group by pct;
+ pct | count
+-----+-------
+ 100 | 10000
(1 row)
-- errors
SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
-ERROR: tablesample method "foobar" does not exist
+ERROR: tablesample method foobar does not exist
LINE 1: SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
- ^
+ ^
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (NULL);
+ERROR: TABLESAMPLE parameter cannot be null
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
-ERROR: REPEATABLE clause must be NOT NULL numeric value
-LINE 1: ... test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
- ^
+ERROR: TABLESAMPLE REPEATABLE parameter cannot be null
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
-ERROR: invalid sample size
-HINT: Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR: sample percentage must be between 0 and 100
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (200);
-ERROR: invalid sample size
-HINT: Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR: sample percentage must be between 0 and 100
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (-1);
-ERROR: invalid sample size
-HINT: Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR: sample percentage must be between 0 and 100
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (200);
-ERROR: invalid sample size
-HINT: Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR: sample percentage must be between 0 and 100
SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1);
-ERROR: TABLESAMPLE clause can only be used on tables and materialized views
+ERROR: TABLESAMPLE clause can only be applied to tables and materialized views
LINE 1: SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1)...
^
INSERT INTO test_tablesample_v1 VALUES(1);
HINT: To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule.
WITH query_select AS (SELECT * FROM test_tablesample)
SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
-ERROR: TABLESAMPLE clause can only be used on tables and materialized views
+ERROR: TABLESAMPLE clause can only be applied to tables and materialized views
LINE 2: SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEA...
^
SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
ERROR: syntax error at or near "TABLESAMPLE"
LINE 1: ...CT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPL...
^
--- catalog sanity
-SELECT *
-FROM pg_tablesample_method
-WHERE tsminit IS NULL
- OR tsmseqscan IS NULL
- OR tsmpagemode IS NULL
- OR tsmnextblock IS NULL
- OR tsmnexttuple IS NULL
- OR tsmend IS NULL
- OR tsmreset IS NULL
- OR tsmcost IS NULL;
- tsmname | tsmseqscan | tsmpagemode | tsminit | tsmnextblock | tsmnexttuple | tsmexaminetuple | tsmend | tsmreset | tsmcost
----------+------------+-------------+---------+--------------+--------------+-----------------+--------+----------+---------
-(0 rows)
-
--- done
-DROP TABLE test_tablesample CASCADE;
-NOTICE: drop cascades to 2 other objects
-DETAIL: drop cascades to view test_tablesample_v1
-drop cascades to view test_tablesample_v2
test_range_excl
test_range_gist
test_range_spgist
+ test_tablesample
+ test_tablesample_v1
+ test_tablesample_v2
test_tsvector
testjsonb
text_tbl
tvvmv
varchar_tbl
xacttest
-(127 rows)
+(130 rows)
SELECT name(equipment(hobby_construct(text 'skywalking', text 'mer')));
name
test: replica_identity
test: rowsecurity
test: object_address
+test: tablesample
test: alter_generic
test: alter_operator
test: misc
test: xml
test: event_trigger
test: stats
-test: tablesample
SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+ WHERE f_leak(dtitle) ORDER BY did;
-- viewpoint from rls_regress_user2
SET SESSION AUTHORIZATION rls_regress_user2;
SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+ WHERE f_leak(dtitle) ORDER BY did;
EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10);
+-- use fillfactor so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
+INSERT INTO test_tablesample
+ SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i);
-SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (0);
+
+-- 100% should give repeatable count results (ie, all rows) in any case
SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (1+2);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (0.4);
-CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
-CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
-SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
-SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
+CREATE VIEW test_tablesample_v1 AS
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
+CREATE VIEW test_tablesample_v2 AS
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
+\d+ test_tablesample_v1
+\d+ test_tablesample_v2
+-- check a sampled query doesn't affect cursor in progress
BEGIN;
-DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+DECLARE tablesample_cur CURSOR FOR
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+
FETCH FIRST FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
FETCH NEXT FROM tablesample_cur;
FETCH NEXT FROM tablesample_cur;
CLOSE tablesample_cur;
END;
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
-EXPLAIN SELECT * FROM test_tablesample_v1;
+EXPLAIN (COSTS OFF)
+ SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2);
+EXPLAIN (COSTS OFF)
+ SELECT * FROM test_tablesample_v1;
+
+-- check inheritance behavior
+explain (costs off)
+ select count(*) from person tablesample bernoulli (100);
+select count(*) from person tablesample bernoulli (100);
+select count(*) from person;
+
+-- check that collations get assigned within the tablesample arguments
+SELECT count(*) FROM test_tablesample TABLESAMPLE bernoulli (('1'::text < '0'::text)::int);
+
+-- check behavior during rescans, as well as correct handling of min/max pct
+select * from
+ (values (0),(100)) v(pct),
+ lateral (select count(*) from tenk1 tablesample bernoulli (pct)) ss;
+select * from
+ (values (0),(100)) v(pct),
+ lateral (select count(*) from tenk1 tablesample system (pct)) ss;
+explain (costs off)
+select pct, count(unique1) from
+ (values (0),(100)) v(pct),
+ lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+ group by pct;
+select pct, count(unique1) from
+ (values (0),(100)) v(pct),
+ lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+ group by pct;
+select pct, count(unique1) from
+ (values (0),(100)) v(pct),
+ lateral (select * from tenk1 tablesample system (pct)) ss
+ group by pct;
-- errors
SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (NULL);
SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
-
--- catalog sanity
-
-SELECT *
-FROM pg_tablesample_method
-WHERE tsminit IS NULL
- OR tsmseqscan IS NULL
- OR tsmpagemode IS NULL
- OR tsmnextblock IS NULL
- OR tsmnexttuple IS NULL
- OR tsmend IS NULL
- OR tsmreset IS NULL
- OR tsmcost IS NULL;
-
--- done
-DROP TABLE test_tablesample CASCADE;