From f6d208d6e51810c73f0e02c477984a6b44627f11 Mon Sep 17 00:00:00 2001 From: Simon Riggs Date: Fri, 15 May 2015 14:37:10 -0400 Subject: TABLESAMPLE, SQL Standard and extensible Add a TABLESAMPLE clause to SELECT statements that allows user to specify random BERNOULLI sampling or block level SYSTEM sampling. Implementation allows for extensible sampling functions to be written, using a standard API. Basic version follows SQLStandard exactly. Usable concrete use cases for the sampling API follow in later commits. Petr Jelinek Reviewed by Michael Paquier and Simon Riggs --- src/include/access/heapam.h | 4 +++ src/include/access/relscan.h | 1 + src/include/access/tablesample.h | 60 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 src/include/access/tablesample.h (limited to 'src/include/access') diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 49c8ca4d66e..eec7c95b218 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -114,8 +114,12 @@ extern HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot, bool allow_strat, bool allow_sync); extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot, int nkeys, ScanKey key); +extern HeapScanDesc heap_beginscan_sampling(Relation relation, + Snapshot snapshot, int nkeys, ScanKey key, + bool allow_strat, bool allow_pagemode); extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber endBlk); +extern void heapgetpage(HeapScanDesc scan, BlockNumber page); extern void heap_rescan(HeapScanDesc scan, ScanKey key); extern void heap_endscan(HeapScanDesc scan); extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 5a0d724aca5..1b9b2993957 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -29,6 +29,7 @@ typedef struct HeapScanDescData int rs_nkeys; /* number of scan keys */ ScanKey rs_key; /* array of scan key descriptors */ bool rs_bitmapscan; /* true if this is really a bitmap scan */ + bool rs_samplescan; /* true if this is really a sample scan */ bool rs_pageatatime; /* verify visibility page-at-a-time? */ bool rs_allow_strat; /* allow or disallow use of access strategy */ bool rs_allow_sync; /* allow or disallow use of syncscan */ diff --git a/src/include/access/tablesample.h b/src/include/access/tablesample.h new file mode 100644 index 00000000000..222fa8d5561 --- /dev/null +++ b/src/include/access/tablesample.h @@ -0,0 +1,60 @@ +/*------------------------------------------------------------------------- + * + * tablesample.h + * Public header file for TABLESAMPLE clause interface + * + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/tablesample.h + * + *------------------------------------------------------------------------- + */ +#ifndef TABLESAMPLE_H +#define TABLESAMPLE_H + +#include "access/relscan.h" +#include "executor/executor.h" + +typedef struct TableSampleDesc { + HeapScanDesc heapScan; + TupleDesc tupDesc; /* Mostly useful for tsmexaminetuple */ + + void *tsmdata; /* private method data */ + + /* These point to he function of the TABLESAMPLE Method. */ + FmgrInfo tsminit; + FmgrInfo tsmnextblock; + FmgrInfo tsmnexttuple; + FmgrInfo tsmexaminetuple; + FmgrInfo tsmreset; + FmgrInfo tsmend; +} TableSampleDesc; + + +extern TableSampleDesc *tablesample_init(SampleScanState *scanstate, + TableSampleClause *tablesample); +extern HeapTuple tablesample_getnext(TableSampleDesc *desc); +extern void tablesample_reset(TableSampleDesc *desc); +extern void tablesample_end(TableSampleDesc *desc); +extern HeapTuple tablesample_source_getnext(TableSampleDesc *desc); +extern HeapTuple tablesample_source_gettup(TableSampleDesc *desc, ItemPointer tid, + bool *visible); + +extern Datum tsm_system_init(PG_FUNCTION_ARGS); +extern Datum tsm_system_nextblock(PG_FUNCTION_ARGS); +extern Datum tsm_system_nexttuple(PG_FUNCTION_ARGS); +extern Datum tsm_system_end(PG_FUNCTION_ARGS); +extern Datum tsm_system_reset(PG_FUNCTION_ARGS); +extern Datum tsm_system_cost(PG_FUNCTION_ARGS); + +extern Datum tsm_bernoulli_init(PG_FUNCTION_ARGS); +extern Datum tsm_bernoulli_nextblock(PG_FUNCTION_ARGS); +extern Datum tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS); +extern Datum tsm_bernoulli_end(PG_FUNCTION_ARGS); +extern Datum tsm_bernoulli_reset(PG_FUNCTION_ARGS); +extern Datum tsm_bernoulli_cost(PG_FUNCTION_ARGS); + + +#endif -- cgit v1.2.3