* using Algorithm D.
*
* When merging runs, we use a heap containing just the frontmost tuple from
- * each source run; we repeatedly output the smallest tuple and insert the
- * next tuple from its source tape (if any). When the heap empties, the merge
- * is complete. The basic merge algorithm thus needs very little memory ---
- * only M tuples for an M-way merge, and M is constrained to a small number.
- * However, we can still make good use of our full workMem allocation by
- * pre-reading additional tuples from each source tape. Without prereading,
- * our access pattern to the temporary file would be very erratic; on average
- * we'd read one block from each of M source tapes during the same time that
- * we're writing M blocks to the output tape, so there is no sequentiality of
- * access at all, defeating the read-ahead methods used by most Unix kernels.
- * Worse, the output tape gets written into a very random sequence of blocks
- * of the temp file, ensuring that things will be even worse when it comes
- * time to read that tape. A straightforward merge pass thus ends up doing a
- * lot of waiting for disk seeks. We can improve matters by prereading from
- * each source tape sequentially, loading about workMem/M bytes from each tape
- * in turn. Then we run the merge algorithm, writing but not reading until
- * one of the preloaded tuple series runs out. Then we switch back to preread
- * mode, fill memory again, and repeat. This approach helps to localize both
- * read and write accesses.
+ * each source run; we repeatedly output the smallest tuple and replace it
+ * with the next tuple from its source tape (if any). When the heap empties,
+ * the merge is complete. The basic merge algorithm thus needs very little
+ * memory --- only M tuples for an M-way merge, and M is constrained to a
+ * small number. However, we can still make good use of our full workMem
+ * allocation by pre-reading additional tuples from each source tape. Without
+ * prereading, our access pattern to the temporary file would be very erratic;
+ * on average we'd read one block from each of M source tapes during the same
+ * time that we're writing M blocks to the output tape, so there is no
+ * sequentiality of access at all, defeating the read-ahead methods used by
+ * most Unix kernels. Worse, the output tape gets written into a very random
+ * sequence of blocks of the temp file, ensuring that things will be even
+ * worse when it comes time to read that tape. A straightforward merge pass
+ * thus ends up doing a lot of waiting for disk seeks. We can improve matters
+ * by prereading from each source tape sequentially, loading about workMem/M
+ * bytes from each tape in turn. Then we run the merge algorithm, writing but
+ * not reading until one of the preloaded tuple series runs out. Then we
+ * switch back to preread mode, fill memory again, and repeat. This approach
+ * helps to localize both read and write accesses.
*
* When the caller requests random access to the sort result, we form
* the final sorted run on a logical tape which is then "frozen", so
static void sort_bounded_heap(Tuplesortstate *state);
static void tuplesort_sort_memtuples(Tuplesortstate *state);
static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple,
- int tupleindex, bool checkIndex);
-static void tuplesort_heap_siftup(Tuplesortstate *state, bool checkIndex);
+ bool checkIndex);
+static void tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple,
+ bool checkIndex);
+static void tuplesort_heap_delete_top(Tuplesortstate *state, bool checkIndex);
static void reversedirection(Tuplesortstate *state);
static unsigned int getlen(Tuplesortstate *state, int tapenum, bool eofOK);
static void markrunend(Tuplesortstate *state, int tapenum);
}
else
{
- /* discard top of heap, sift up, insert new tuple */
+ /* discard top of heap, replacing it with the new tuple */
free_sort_tuple(state, &state->memtuples[0]);
- tuplesort_heap_siftup(state, false);
- tuplesort_heap_insert(state, tuple, 0, false);
+ tuple->tupindex = 0; /* not used */
+ tuplesort_heap_replace_top(state, tuple, false);
}
break;
* initial COMPARETUP() call is required for the tuple, to
* determine that the tuple does not belong in RUN_FIRST).
*/
- tuplesort_heap_insert(state, tuple, state->currentRun, true);
+ tuple->tupindex = state->currentRun;
+ tuplesort_heap_insert(state, tuple, true);
}
else
{
* more generally.
*/
*stup = state->memtuples[0];
- tuplesort_heap_siftup(state, false);
if ((tupIndex = state->mergenext[srcTape]) == 0)
{
/*
*/
if ((tupIndex = state->mergenext[srcTape]) == 0)
{
+ /* Remove the top node from the heap */
+ tuplesort_heap_delete_top(state, false);
/* Free tape's buffer, avoiding dangling pointer */
if (state->batchUsed)
mergebatchfreetape(state, srcTape, stup, should_free);
return true;
}
}
- /* pull next preread tuple from list, insert in heap */
+
+ /*
+ * pull next preread tuple from list, and replace the returned
+ * tuple at top of the heap with it.
+ */
newtup = &state->memtuples[tupIndex];
state->mergenext[srcTape] = newtup->tupindex;
if (state->mergenext[srcTape] == 0)
state->mergelast[srcTape] = 0;
- tuplesort_heap_insert(state, newtup, srcTape, false);
+ newtup->tupindex = srcTape;
+ tuplesort_heap_replace_top(state, newtup, false);
/* put the now-unused memtuples entry on the freelist */
newtup->tupindex = state->mergefreelist;
state->mergefreelist = tupIndex;
/* Must copy source tuple to avoid possible overwrite */
SortTuple stup = state->memtuples[j];
- tuplesort_heap_insert(state, &stup, 0, false);
+ stup.tupindex = RUN_FIRST;
+ tuplesort_heap_insert(state, &stup, false);
}
Assert(state->memtupcount == ntuples);
}
/* writetup adjusted total free space, now fix per-tape space */
spaceFreed = state->availMem - priorAvail;
state->mergeavailmem[srcTape] += spaceFreed;
- /* compact the heap */
- tuplesort_heap_siftup(state, false);
if ((tupIndex = state->mergenext[srcTape]) == 0)
{
/* out of preloaded data on this tape, try to read more */
mergepreread(state);
/* if still no data, we've reached end of run on this tape */
if ((tupIndex = state->mergenext[srcTape]) == 0)
+ {
+ /* remove the written-out tuple from the heap */
+ tuplesort_heap_delete_top(state, false);
continue;
+ }
}
- /* pull next preread tuple from list, insert in heap */
+
+ /*
+ * pull next preread tuple from list, and replace the written-out
+ * tuple in the heap with it.
+ */
tup = &state->memtuples[tupIndex];
state->mergenext[srcTape] = tup->tupindex;
if (state->mergenext[srcTape] == 0)
state->mergelast[srcTape] = 0;
- tuplesort_heap_insert(state, tup, srcTape, false);
+ tup->tupindex = srcTape;
+ tuplesort_heap_replace_top(state, tup, false);
/* put the now-unused memtuples entry on the freelist */
tup->tupindex = state->mergefreelist;
state->mergefreelist = tupIndex;
state->mergenext[srcTape] = tup->tupindex;
if (state->mergenext[srcTape] == 0)
state->mergelast[srcTape] = 0;
- tuplesort_heap_insert(state, tup, srcTape, false);
+ tup->tupindex = srcTape;
+ tuplesort_heap_insert(state, tup, false);
/* put the now-unused memtuples entry on the freelist */
tup->tupindex = state->mergefreelist;
state->mergefreelist = tupIndex;
* Still holding out for a case favorable to replacement
* selection. Still incrementally spilling using heap.
*
- * Dump the heap's frontmost entry, and sift up to remove it from
- * the heap.
+ * Dump the heap's frontmost entry, and remove it from the heap.
*/
Assert(state->memtupcount > 0);
WRITETUP(state, state->tp_tapenum[state->destTape],
&state->memtuples[0]);
- tuplesort_heap_siftup(state, true);
+ tuplesort_heap_delete_top(state, true);
}
else
{
state->memtupcount = 0; /* make the heap empty */
for (i = 0; i < tupcount; i++)
{
- if (state->memtupcount >= state->bound &&
- COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0)
- {
- /* New tuple would just get thrown out, so skip it */
- free_sort_tuple(state, &state->memtuples[i]);
- CHECK_FOR_INTERRUPTS();
- }
- else
+ if (state->memtupcount < state->bound)
{
/* Insert next tuple into heap */
/* Must copy source tuple to avoid possible overwrite */
SortTuple stup = state->memtuples[i];
- tuplesort_heap_insert(state, &stup, 0, false);
-
- /* If heap too full, discard largest entry */
- if (state->memtupcount > state->bound)
+ stup.tupindex = 0; /* not used */
+ tuplesort_heap_insert(state, &stup, false);
+ }
+ else
+ {
+ /*
+ * The heap is full. Replace the largest entry with the new
+ * tuple, or just discard it, if it's larger than anything already
+ * in the heap.
+ */
+ if (COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0)
{
- free_sort_tuple(state, &state->memtuples[0]);
- tuplesort_heap_siftup(state, false);
+ free_sort_tuple(state, &state->memtuples[i]);
+ CHECK_FOR_INTERRUPTS();
}
+ else
+ tuplesort_heap_replace_top(state, &state->memtuples[i], false);
}
}
Assert(tupcount == state->bound);
/*
- * We can unheapify in place because each sift-up will remove the largest
- * entry, which we can promptly store in the newly freed slot at the end.
- * Once we're down to a single-entry heap, we're done.
+ * We can unheapify in place because each delete-top call will remove the
+ * largest entry, which we can promptly store in the newly freed slot at
+ * the end. Once we're down to a single-entry heap, we're done.
*/
while (state->memtupcount > 1)
{
SortTuple stup = state->memtuples[0];
/* this sifts-up the next-largest entry and decreases memtupcount */
- tuplesort_heap_siftup(state, false);
+ tuplesort_heap_delete_top(state, false);
state->memtuples[state->memtupcount] = stup;
}
state->memtupcount = tupcount;
* Insert a new tuple into an empty or existing heap, maintaining the
* heap invariant. Caller is responsible for ensuring there's room.
*
- * Note: we assume *tuple is a temporary variable that can be scribbled on.
- * For some callers, tuple actually points to a memtuples[] entry above the
+ * Note: For some callers, tuple points to a memtuples[] entry above the
* end of the heap. This is safe as long as it's not immediately adjacent
* to the end of the heap (ie, in the [memtupcount] array entry) --- if it
* is, it might get overwritten before being moved into the heap!
*/
static void
tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple,
- int tupleindex, bool checkIndex)
+ bool checkIndex)
{
SortTuple *memtuples;
int j;
- /*
- * Save the tupleindex --- see notes above about writing on *tuple. It's a
- * historical artifact that tupleindex is passed as a separate argument
- * and not in *tuple, but it's notationally convenient so let's leave it
- * that way.
- */
- tuple->tupindex = tupleindex;
-
memtuples = state->memtuples;
Assert(state->memtupcount < state->memtupsize);
- Assert(!checkIndex || tupleindex == RUN_FIRST);
+ Assert(!checkIndex || tuple->tupindex == RUN_FIRST);
CHECK_FOR_INTERRUPTS();
}
/*
- * The tuple at state->memtuples[0] has been removed from the heap.
- * Decrement memtupcount, and sift up to maintain the heap invariant.
+ * Remove the tuple at state->memtuples[0] from the heap. Decrement
+ * memtupcount, and sift up to maintain the heap invariant.
+ *
+ * The caller has already free'd the tuple the top node points to,
+ * if necessary.
*/
static void
-tuplesort_heap_siftup(Tuplesortstate *state, bool checkIndex)
+tuplesort_heap_delete_top(Tuplesortstate *state, bool checkIndex)
{
SortTuple *memtuples = state->memtuples;
SortTuple *tuple;
- int i,
- n;
Assert(!checkIndex || state->currentRun == RUN_FIRST);
if (--state->memtupcount <= 0)
return;
+ /*
+ * Remove the last tuple in the heap, and re-insert it, by replacing the
+ * current top node with it.
+ */
+ tuple = &memtuples[state->memtupcount];
+ tuplesort_heap_replace_top(state, tuple, checkIndex);
+}
+
+/*
+ * Replace the tuple at state->memtuples[0] with a new tuple. Sift up to
+ * maintain the heap invariant.
+ *
+ * This corresponds to Knuth's "sift-up" algorithm (Algorithm 5.2.3H,
+ * Heapsort, steps H3-H8).
+ */
+static void
+tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple,
+ bool checkIndex)
+{
+ SortTuple *memtuples = state->memtuples;
+ int i,
+ n;
+
+ Assert(!checkIndex || state->currentRun == RUN_FIRST);
+ Assert(state->memtupcount >= 1);
+
CHECK_FOR_INTERRUPTS();
n = state->memtupcount;
- tuple = &memtuples[n]; /* tuple that must be reinserted */
i = 0; /* i is where the "hole" is */
for (;;)
{