Put abbreviation logic into puttuple_common()
authorAlexander Korotkov <akorotkov@postgresql.org>
Wed, 27 Jul 2022 05:27:46 +0000 (08:27 +0300)
committerAlexander Korotkov <akorotkov@postgresql.org>
Wed, 27 Jul 2022 05:27:46 +0000 (08:27 +0300)
Abbreviation code is very similar along tuplesort_put*() functions.  This
commit unifies that code and puts it into puttuple_common().  tuplesort_put*()
functions differs in the abbreviation condition, so it has been added as an
argument to the puttuple_common() function.

Discussion: https://postgr.es/m/CAPpHfdvjix0Ahx-H3Jp1M2R%2B_74P-zKnGGygx4OWr%3DbUQ8BNdw%40mail.gmail.com
Author: Alexander Korotkov
Reviewed-by: Pavel Borisov, Maxim Orlov, Matthias van de Meent
Reviewed-by: Andres Freund, John Naylor
src/backend/utils/sort/tuplesort.c

index 7e6b2b8c888f268c39e5a4cdb706192a63b32963..828efe701e55c1070d60a6d50f1f2ddff6e65a81 100644 (file)
@@ -616,7 +616,8 @@ static Tuplesortstate *tuplesort_begin_common(int workMem,
                                                                                          SortCoordinate coordinate,
                                                                                          int sortopt);
 static void tuplesort_begin_batch(Tuplesortstate *state);
-static void puttuple_common(Tuplesortstate *state, SortTuple *tuple);
+static void puttuple_common(Tuplesortstate *state, SortTuple *tuple,
+                                                       bool useAbbrev);
 static bool consider_abort_common(Tuplesortstate *state);
 static void inittapes(Tuplesortstate *state, bool mergeruns);
 static void inittapestate(Tuplesortstate *state, int maxTapes);
@@ -1841,7 +1842,6 @@ tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
 {
        MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext);
        SortTuple       stup;
-       Datum           original;
        MinimalTuple tuple;
        HeapTupleData htup;
 
@@ -1852,49 +1852,15 @@ tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
        /* set up first-column key value */
        htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
        htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
-       original = heap_getattr(&htup,
-                                                       state->sortKeys[0].ssup_attno,
-                                                       state->tupDesc,
-                                                       &stup.isnull1);
+       stup.datum1 = heap_getattr(&htup,
+                                                          state->sortKeys[0].ssup_attno,
+                                                          state->tupDesc,
+                                                          &stup.isnull1);
 
        MemoryContextSwitchTo(state->sortcontext);
 
-       if (!state->sortKeys->abbrev_converter || stup.isnull1)
-       {
-               /*
-                * Store ordinary Datum representation, or NULL value.  If there is a
-                * converter it won't expect NULL values, and cost model is not
-                * required to account for NULL, so in that case we avoid calling
-                * converter and just set datum1 to zeroed representation (to be
-                * consistent, and to support cheap inequality tests for NULL
-                * abbreviated keys).
-                */
-               stup.datum1 = original;
-       }
-       else if (!consider_abort_common(state))
-       {
-               /* Store abbreviated key representation */
-               stup.datum1 = state->sortKeys->abbrev_converter(original,
-                                                                                                               state->sortKeys);
-       }
-       else
-       {
-               /* Abort abbreviation */
-               stup.datum1 = original;
-
-               /*
-                * Set state to be consistent with never trying abbreviation.
-                *
-                * Alter datum1 representation in already-copied tuples, so as to
-                * ensure a consistent representation (current tuple was just
-                * handled).  It does not matter if some dumped tuples are already
-                * sorted on tape, since serialized tuples lack abbreviated keys
-                * (TSS_BUILDRUNS state prevents control reaching here in any case).
-                */
-               REMOVEABBREV(state, state->memtuples, state->memtupcount);
-       }
-
-       puttuple_common(state, &stup);
+       puttuple_common(state, &stup,
+                                       state->sortKeys->abbrev_converter && !stup.isnull1);
 
        MemoryContextSwitchTo(oldcontext);
 }
@@ -1908,7 +1874,6 @@ void
 tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
 {
        SortTuple       stup;
-       Datum           original;
        MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext);
 
        /* copy the tuple into sort storage */
@@ -1924,49 +1889,14 @@ tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
         */
        if (state->haveDatum1)
        {
-               original = heap_getattr(tup,
-                                                               state->indexInfo->ii_IndexAttrNumbers[0],
-                                                               state->tupDesc,
-                                                               &stup.isnull1);
-
-               if (!state->sortKeys->abbrev_converter || stup.isnull1)
-               {
-                       /*
-                        * Store ordinary Datum representation, or NULL value.  If there
-                        * is a converter it won't expect NULL values, and cost model is
-                        * not required to account for NULL, so in that case we avoid
-                        * calling converter and just set datum1 to zeroed representation
-                        * (to be consistent, and to support cheap inequality tests for
-                        * NULL abbreviated keys).
-                        */
-                       stup.datum1 = original;
-               }
-               else if (!consider_abort_common(state))
-               {
-                       /* Store abbreviated key representation */
-                       stup.datum1 = state->sortKeys->abbrev_converter(original,
-                                                                                                                       state->sortKeys);
-               }
-               else
-               {
-                       /* Abort abbreviation */
-                       stup.datum1 = original;
-
-                       /*
-                        * Set state to be consistent with never trying abbreviation.
-                        *
-                        * Alter datum1 representation in already-copied tuples, so as to
-                        * ensure a consistent representation (current tuple was just
-                        * handled).  It does not matter if some dumped tuples are already
-                        * sorted on tape, since serialized tuples lack abbreviated keys
-                        * (TSS_BUILDRUNS state prevents control reaching here in any
-                        * case).
-                        */
-                       REMOVEABBREV(state, state->memtuples, state->memtupcount);
-               }
+               stup.datum1 = heap_getattr(tup,
+                                                                  state->indexInfo->ii_IndexAttrNumbers[0],
+                                                                  state->tupDesc,
+                                                                  &stup.isnull1);
        }
 
-       puttuple_common(state, &stup);
+       puttuple_common(state, &stup,
+                                       state->haveDatum1 && state->sortKeys->abbrev_converter && !stup.isnull1);
 
        MemoryContextSwitchTo(oldcontext);
 }
@@ -1982,7 +1912,6 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
 {
        MemoryContext oldcontext;
        SortTuple       stup;
-       Datum           original;
        IndexTuple      tuple;
 
        stup.tuple = index_form_tuple_context(RelationGetDescr(rel), values,
@@ -1991,49 +1920,15 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
        tuple->t_tid = *self;
        USEMEM(state, GetMemoryChunkSpace(stup.tuple));
        /* set up first-column key value */
-       original = index_getattr(tuple,
-                                                        1,
-                                                        RelationGetDescr(state->indexRel),
-                                                        &stup.isnull1);
+       stup.datum1 = index_getattr(tuple,
+                                                               1,
+                                                               RelationGetDescr(state->indexRel),
+                                                               &stup.isnull1);
 
        oldcontext = MemoryContextSwitchTo(state->sortcontext);
 
-       if (!state->sortKeys || !state->sortKeys->abbrev_converter || stup.isnull1)
-       {
-               /*
-                * Store ordinary Datum representation, or NULL value.  If there is a
-                * converter it won't expect NULL values, and cost model is not
-                * required to account for NULL, so in that case we avoid calling
-                * converter and just set datum1 to zeroed representation (to be
-                * consistent, and to support cheap inequality tests for NULL
-                * abbreviated keys).
-                */
-               stup.datum1 = original;
-       }
-       else if (!consider_abort_common(state))
-       {
-               /* Store abbreviated key representation */
-               stup.datum1 = state->sortKeys->abbrev_converter(original,
-                                                                                                               state->sortKeys);
-       }
-       else
-       {
-               /* Abort abbreviation */
-               stup.datum1 = original;
-
-               /*
-                * Set state to be consistent with never trying abbreviation.
-                *
-                * Alter datum1 representation in already-copied tuples, so as to
-                * ensure a consistent representation (current tuple was just
-                * handled).  It does not matter if some dumped tuples are already
-                * sorted on tape, since serialized tuples lack abbreviated keys
-                * (TSS_BUILDRUNS state prevents control reaching here in any case).
-                */
-               REMOVEABBREV(state, state->memtuples, state->memtupcount);
-       }
-
-       puttuple_common(state, &stup);
+       puttuple_common(state, &stup,
+                                       state->sortKeys && state->sortKeys->abbrev_converter && !stup.isnull1);
 
        MemoryContextSwitchTo(oldcontext);
 }
@@ -2074,43 +1969,15 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
        }
        else
        {
-               Datum           original = datumCopy(val, false, state->datumTypeLen);
-
                stup.isnull1 = false;
-               stup.tuple = DatumGetPointer(original);
+               stup.datum1 = datumCopy(val, false, state->datumTypeLen);
+               stup.tuple = DatumGetPointer(stup.datum1);
                USEMEM(state, GetMemoryChunkSpace(stup.tuple));
                MemoryContextSwitchTo(state->sortcontext);
-
-               if (!state->sortKeys->abbrev_converter)
-               {
-                       stup.datum1 = original;
-               }
-               else if (!consider_abort_common(state))
-               {
-                       /* Store abbreviated key representation */
-                       stup.datum1 = state->sortKeys->abbrev_converter(original,
-                                                                                                                       state->sortKeys);
-               }
-               else
-               {
-                       /* Abort abbreviation */
-                       stup.datum1 = original;
-
-                       /*
-                        * Set state to be consistent with never trying abbreviation.
-                        *
-                        * Alter datum1 representation in already-copied tuples, so as to
-                        * ensure a consistent representation (current tuple was just
-                        * handled).  It does not matter if some dumped tuples are already
-                        * sorted on tape, since serialized tuples lack abbreviated keys
-                        * (TSS_BUILDRUNS state prevents control reaching here in any
-                        * case).
-                        */
-                       REMOVEABBREV(state, state->memtuples, state->memtupcount);
-               }
        }
 
-       puttuple_common(state, &stup);
+       puttuple_common(state, &stup,
+                                       state->tuples && !isNull && state->sortKeys->abbrev_converter);
 
        MemoryContextSwitchTo(oldcontext);
 }
@@ -2119,10 +1986,41 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
  * Shared code for tuple and datum cases.
  */
 static void
-puttuple_common(Tuplesortstate *state, SortTuple *tuple)
+puttuple_common(Tuplesortstate *state, SortTuple *tuple, bool useAbbrev)
 {
        Assert(!LEADER(state));
 
+       if (!useAbbrev)
+       {
+               /*
+                * Leave ordinary Datum representation, or NULL value.  If there is a
+                * converter it won't expect NULL values, and cost model is not
+                * required to account for NULL, so in that case we avoid calling
+                * converter and just set datum1 to zeroed representation (to be
+                * consistent, and to support cheap inequality tests for NULL
+                * abbreviated keys).
+                */
+       }
+       else if (!consider_abort_common(state))
+       {
+               /* Store abbreviated key representation */
+               tuple->datum1 = state->sortKeys->abbrev_converter(tuple->datum1,
+                                                                                                                 state->sortKeys);
+       }
+       else
+       {
+               /*
+                * Set state to be consistent with never trying abbreviation.
+                *
+                * Alter datum1 representation in already-copied tuples, so as to
+                * ensure a consistent representation (current tuple was just
+                * handled).  It does not matter if some dumped tuples are already
+                * sorted on tape, since serialized tuples lack abbreviated keys
+                * (TSS_BUILDRUNS state prevents control reaching here in any case).
+                */
+               REMOVEABBREV(state, state->memtuples, state->memtupcount);
+       }
+
        switch (state->status)
        {
                case TSS_INITIAL: