Fix possible failures when a tuplestore switches from in-memory to on-disk
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 27 Mar 2009 18:30:21 +0000 (18:30 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 27 Mar 2009 18:30:21 +0000 (18:30 +0000)
mode while callers hold pointers to in-memory tuples.  I reported this for
the case of nodeWindowAgg's primary scan tuple, but inspection of the code
shows that all of the calls in nodeWindowAgg and nodeCtescan are at risk.
For the moment, fix it with a rather brute-force approach of copying
whenever one of the at-risk callers requests a tuple.  Later we might
think of some sort of reference-count approach to reduce tuple copying.

src/backend/executor/execQual.c
src/backend/executor/functions.c
src/backend/executor/nodeCtescan.c
src/backend/executor/nodeFunctionscan.c
src/backend/executor/nodeMaterial.c
src/backend/executor/nodeWindowAgg.c
src/backend/executor/nodeWorktablescan.c
src/backend/tcop/pquery.c
src/backend/utils/sort/tuplestore.c
src/include/utils/tuplestore.h

index 8d0a3a285aad3ed1c7b10d42d47b3a68e9ea153b..6eb6a803ff33fd5b64e2dfaf9ce968dde1a7f191 100644 (file)
@@ -1385,7 +1385,7 @@ restart:
        if (fcache->funcResultStore)
        {
                Assert(isDone);                         /* it was provided before ... */
-               if (tuplestore_gettupleslot(fcache->funcResultStore, true,
+               if (tuplestore_gettupleslot(fcache->funcResultStore, true, false,
                                                                        fcache->funcResultSlot))
                {
                        *isDone = ExprMultipleResult;
index 3aa7d0de2751dae17bf2705aa547f93341511092..c0261fe66b5f3277b452700178d4b0e21ac9b80c 100644 (file)
@@ -736,7 +736,7 @@ fmgr_sql(PG_FUNCTION_ARGS)
                        /* Re-use the junkfilter's output slot to fetch back the tuple */
                        Assert(fcache->junkFilter);
                        slot = fcache->junkFilter->jf_resultSlot;
-                       if (!tuplestore_gettupleslot(fcache->tstore, true, slot))
+                       if (!tuplestore_gettupleslot(fcache->tstore, true, false, slot))
                                elog(ERROR, "failed to fetch lazy-eval tuple");
                        /* Extract the result as a datum, and copy out from the slot */
                        result = postquel_get_single_result(slot, fcinfo,
@@ -822,7 +822,7 @@ fmgr_sql(PG_FUNCTION_ARGS)
                {
                        /* Re-use the junkfilter's output slot to fetch back the tuple */
                        slot = fcache->junkFilter->jf_resultSlot;
-                       if (tuplestore_gettupleslot(fcache->tstore, true, slot))
+                       if (tuplestore_gettupleslot(fcache->tstore, true, false, slot))
                                result = postquel_get_single_result(slot, fcinfo,
                                                                                                        fcache, oldcontext);
                        else
index f54066ec886f3b8397a708cfb4aad4f35abb01b4..43de33f799e3f55886c5c66f1d92cf6a93f13a5f 100644 (file)
@@ -71,10 +71,14 @@ CteScanNext(CteScanState *node)
 
        /*
         * If we can fetch another tuple from the tuplestore, return it.
+        *
+        * Note: we have to use copy=true in the tuplestore_gettupleslot call,
+        * because we are sharing the tuplestore with other nodes that might
+        * write into the tuplestore before we get called again.
         */
        if (!eof_tuplestore)
        {
-               if (tuplestore_gettupleslot(tuplestorestate, forward, slot))
+               if (tuplestore_gettupleslot(tuplestorestate, forward, true, slot))
                        return slot;
                if (forward)
                        eof_tuplestore = true;
index 5100f081e17a7466f1a88aa95044a0efc02ebb1f..c74c27356e8523dc9d09d1c2f4ab6fdbc5f2788f 100644 (file)
@@ -74,6 +74,7 @@ FunctionNext(FunctionScanState *node)
        slot = node->ss.ss_ScanTupleSlot;
        (void) tuplestore_gettupleslot(tuplestorestate,
                                                                   ScanDirectionIsForward(direction),
+                                                                  false,
                                                                   slot);
        return slot;
 }
index 66971d2018a305f29c4bcea88f77961a6413e449..e8770eadca713fb4cf39d7f6129b00cb255bc4d7 100644 (file)
@@ -104,7 +104,7 @@ ExecMaterial(MaterialState *node)
        slot = node->ss.ps.ps_ResultTupleSlot;
        if (!eof_tuplestore)
        {
-               if (tuplestore_gettupleslot(tuplestorestate, forward, slot))
+               if (tuplestore_gettupleslot(tuplestorestate, forward, false, slot))
                        return slot;
                if (forward)
                        eof_tuplestore = true;
index 8db779407784268ea0a3eca3fda84e9932c1f70e..f862b48695d5a3d67a6c588e627a208c0f3cfb03 100644 (file)
@@ -480,7 +480,8 @@ eval_windowaggregates(WindowAggState *winstate)
                        spool_tuples(winstate, winstate->aggregatedupto);
                        tuplestore_select_read_pointer(winstate->buffer,
                                                                                   winstate->agg_ptr);
-                       if (!tuplestore_gettupleslot(winstate->buffer, true, agg_row_slot))
+                       if (!tuplestore_gettupleslot(winstate->buffer, true, true,
+                                                                                agg_row_slot))
                                break;                  /* must be end of partition */
                }
 
@@ -1001,12 +1002,14 @@ restart:
        /*
         * Read the current row from the tuplestore, and save in ScanTupleSlot.
         * (We can't rely on the outerplan's output slot because we may have to
-        * read beyond the current row.)
+        * read beyond the current row.  Also, we have to actually copy the row
+        * out of the tuplestore, since window function evaluation might cause
+        * the tuplestore to dump its state to disk.)
         *
         * Current row must be in the tuplestore, since we spooled it above.
         */
        tuplestore_select_read_pointer(winstate->buffer, winstate->current_ptr);
-       if (!tuplestore_gettupleslot(winstate->buffer, true,
+       if (!tuplestore_gettupleslot(winstate->buffer, true, true,
                                                                 winstate->ss.ss_ScanTupleSlot))
                elog(ERROR, "unexpected end of tuplestore");
 
@@ -1589,14 +1592,14 @@ window_gettupleslot(WindowObject winobj, int64 pos, TupleTableSlot *slot)
 
        while (winobj->seekpos > pos)
        {
-               if (!tuplestore_gettupleslot(winstate->buffer, false, slot))
+               if (!tuplestore_gettupleslot(winstate->buffer, false, true, slot))
                        elog(ERROR, "unexpected end of tuplestore");
                winobj->seekpos--;
        }
 
        while (winobj->seekpos < pos)
        {
-               if (!tuplestore_gettupleslot(winstate->buffer, true, slot))
+               if (!tuplestore_gettupleslot(winstate->buffer, true, true, slot))
                        elog(ERROR, "unexpected end of tuplestore");
                winobj->seekpos++;
        }
index a1e0553be789cdba4b1dab03f7847f96c8908cd2..c4236f0177a2ac28dbea3b54f1088eb25f4e2aad 100644 (file)
@@ -43,6 +43,10 @@ WorkTableScanNext(WorkTableScanState *node)
         * worktable plan node, since it cannot appear high enough in the plan
         * tree of a scrollable cursor to be exposed to a backward-scan
         * requirement.  So it's not worth expending effort to support it.
+        *
+        * Note: we are also assuming that this node is the only reader of the
+        * worktable.  Therefore, we don't need a private read pointer for the
+        * tuplestore, nor do we need to tell tuplestore_gettupleslot to copy.
         */
        estate = node->ss.ps.state;
        Assert(ScanDirectionIsForward(estate->es_direction));
@@ -53,7 +57,7 @@ WorkTableScanNext(WorkTableScanState *node)
         * Get the next tuple from tuplestore. Return NULL if no more tuples.
         */
        slot = node->ss.ss_ScanTupleSlot;
-       (void) tuplestore_gettupleslot(tuplestorestate, true, slot);
+       (void) tuplestore_gettupleslot(tuplestorestate, true, false, slot);
        return slot;
 }
 
index 55a3ada7aa5fdb43fd5ede9570ed755b8981267a..28e6e59e80b9f612a84d3415eda1d23367f55f03 100644 (file)
@@ -1118,7 +1118,8 @@ RunFromStore(Portal portal, ScanDirection direction, long count,
 
                        oldcontext = MemoryContextSwitchTo(portal->holdContext);
 
-                       ok = tuplestore_gettupleslot(portal->holdStore, forward, slot);
+                       ok = tuplestore_gettupleslot(portal->holdStore, forward, false,
+                                                                                slot);
 
                        MemoryContextSwitchTo(oldcontext);
 
index fe090d56a1bf87b1ac7aa97f7d4341c167de7471..4367236503234e383ce35dc2cf66e0674e93f353 100644 (file)
@@ -871,10 +871,17 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
  *
  * If successful, put tuple in slot and return TRUE; else, clear the slot
  * and return FALSE.
+ *
+ * If copy is TRUE, the slot receives a copied tuple (allocated in current
+ * memory context) that will stay valid regardless of future manipulations of
+ * the tuplestore's state.  If copy is FALSE, the slot may just receive a
+ * pointer to a tuple held within the tuplestore.  The latter is more
+ * efficient but the slot contents may be corrupted if additional writes to
+ * the tuplestore occur.  (If using tuplestore_trim, see comments therein.)
  */
 bool
 tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
-                                               TupleTableSlot *slot)
+                                               bool copy, TupleTableSlot *slot)
 {
        MinimalTuple tuple;
        bool            should_free;
@@ -883,6 +890,11 @@ tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
 
        if (tuple)
        {
+               if (copy && !should_free)
+               {
+                       tuple = heap_copy_minimal_tuple(tuple);
+                       should_free = true;
+               }
                ExecStoreMinimalTuple(tuple, slot, should_free);
                return true;
        }
@@ -1107,7 +1119,10 @@ tuplestore_trim(Tuplestorestate *state)
         * since tuplestore_gettuple returns a direct pointer to our
         * internal copy of the tuple, it's likely that the caller has
         * still got the tuple just before "current" referenced in a slot.
-        * So we keep one extra tuple before the oldest "current".
+        * So we keep one extra tuple before the oldest "current".  (Strictly
+        * speaking, we could require such callers to use the "copy" flag to
+        * tuplestore_gettupleslot, but for efficiency we allow this one case
+        * to not use "copy".)
         */
        nremove = oldest - 1;
        if (nremove <= 0)
index 001cab5c3f0317e2b9d49bedd4bb68b100e69de5..4a5d5b009f7372d24726b5f6d717f9bfb7b05312 100644 (file)
@@ -71,7 +71,7 @@ extern void tuplestore_trim(Tuplestorestate *state);
 extern bool tuplestore_in_memory(Tuplestorestate *state);
 
 extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
-                                               TupleTableSlot *slot);
+                                               bool copy, TupleTableSlot *slot);
 extern bool tuplestore_advance(Tuplestorestate *state, bool forward);
 
 extern bool tuplestore_ateof(Tuplestorestate *state);