Put abbreviation logic into puttuple_common()
authorAlexander Korotkov <akorotkov@postgresql.org>
Wed, 27 Jul 2022 05:27:46 +0000 (08:27 +0300)
committerAlexander Korotkov <akorotkov@postgresql.org>
Wed, 27 Jul 2022 05:27:46 +0000 (08:27 +0300)
Abbreviation code is very similar along tuplesort_put*() functions.  This
commit unifies that code and puts it into puttuple_common().  tuplesort_put*()
functions differs in the abbreviation condition, so it has been added as an
argument to the puttuple_common() function.

Discussion: https://postgr.es/m/CAPpHfdvjix0Ahx-H3Jp1M2R%2B_74P-zKnGGygx4OWr%3DbUQ8BNdw%40mail.gmail.com
Author: Alexander Korotkov
Reviewed-by: Pavel Borisov, Maxim Orlov, Matthias van de Meent
Reviewed-by: Andres Freund, John Naylor
src/backend/utils/sort/tuplesort.c

index 7e6b2b8c888f268c39e5a4cdb706192a63b32963..828efe701e55c1070d60a6d50f1f2ddff6e65a81 100644 (file)
@@ -616,7 +616,8 @@ static Tuplesortstate *tuplesort_begin_common(int workMem,
                                              SortCoordinate coordinate,
                                              int sortopt);
 static void tuplesort_begin_batch(Tuplesortstate *state);
-static void puttuple_common(Tuplesortstate *state, SortTuple *tuple);
+static void puttuple_common(Tuplesortstate *state, SortTuple *tuple,
+                           bool useAbbrev);
 static bool consider_abort_common(Tuplesortstate *state);
 static void inittapes(Tuplesortstate *state, bool mergeruns);
 static void inittapestate(Tuplesortstate *state, int maxTapes);
@@ -1841,7 +1842,6 @@ tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
 {
    MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext);
    SortTuple   stup;
-   Datum       original;
    MinimalTuple tuple;
    HeapTupleData htup;
 
@@ -1852,49 +1852,15 @@ tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
    /* set up first-column key value */
    htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
    htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
-   original = heap_getattr(&htup,
-                           state->sortKeys[0].ssup_attno,
-                           state->tupDesc,
-                           &stup.isnull1);
+   stup.datum1 = heap_getattr(&htup,
+                              state->sortKeys[0].ssup_attno,
+                              state->tupDesc,
+                              &stup.isnull1);
 
    MemoryContextSwitchTo(state->sortcontext);
 
-   if (!state->sortKeys->abbrev_converter || stup.isnull1)
-   {
-       /*
-        * Store ordinary Datum representation, or NULL value.  If there is a
-        * converter it won't expect NULL values, and cost model is not
-        * required to account for NULL, so in that case we avoid calling
-        * converter and just set datum1 to zeroed representation (to be
-        * consistent, and to support cheap inequality tests for NULL
-        * abbreviated keys).
-        */
-       stup.datum1 = original;
-   }
-   else if (!consider_abort_common(state))
-   {
-       /* Store abbreviated key representation */
-       stup.datum1 = state->sortKeys->abbrev_converter(original,
-                                                       state->sortKeys);
-   }
-   else
-   {
-       /* Abort abbreviation */
-       stup.datum1 = original;
-
-       /*
-        * Set state to be consistent with never trying abbreviation.
-        *
-        * Alter datum1 representation in already-copied tuples, so as to
-        * ensure a consistent representation (current tuple was just
-        * handled).  It does not matter if some dumped tuples are already
-        * sorted on tape, since serialized tuples lack abbreviated keys
-        * (TSS_BUILDRUNS state prevents control reaching here in any case).
-        */
-       REMOVEABBREV(state, state->memtuples, state->memtupcount);
-   }
-
-   puttuple_common(state, &stup);
+   puttuple_common(state, &stup,
+                   state->sortKeys->abbrev_converter && !stup.isnull1);
 
    MemoryContextSwitchTo(oldcontext);
 }
@@ -1908,7 +1874,6 @@ void
 tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
 {
    SortTuple   stup;
-   Datum       original;
    MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext);
 
    /* copy the tuple into sort storage */
@@ -1924,49 +1889,14 @@ tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
     */
    if (state->haveDatum1)
    {
-       original = heap_getattr(tup,
-                               state->indexInfo->ii_IndexAttrNumbers[0],
-                               state->tupDesc,
-                               &stup.isnull1);
-
-       if (!state->sortKeys->abbrev_converter || stup.isnull1)
-       {
-           /*
-            * Store ordinary Datum representation, or NULL value.  If there
-            * is a converter it won't expect NULL values, and cost model is
-            * not required to account for NULL, so in that case we avoid
-            * calling converter and just set datum1 to zeroed representation
-            * (to be consistent, and to support cheap inequality tests for
-            * NULL abbreviated keys).
-            */
-           stup.datum1 = original;
-       }
-       else if (!consider_abort_common(state))
-       {
-           /* Store abbreviated key representation */
-           stup.datum1 = state->sortKeys->abbrev_converter(original,
-                                                           state->sortKeys);
-       }
-       else
-       {
-           /* Abort abbreviation */
-           stup.datum1 = original;
-
-           /*
-            * Set state to be consistent with never trying abbreviation.
-            *
-            * Alter datum1 representation in already-copied tuples, so as to
-            * ensure a consistent representation (current tuple was just
-            * handled).  It does not matter if some dumped tuples are already
-            * sorted on tape, since serialized tuples lack abbreviated keys
-            * (TSS_BUILDRUNS state prevents control reaching here in any
-            * case).
-            */
-           REMOVEABBREV(state, state->memtuples, state->memtupcount);
-       }
+       stup.datum1 = heap_getattr(tup,
+                                  state->indexInfo->ii_IndexAttrNumbers[0],
+                                  state->tupDesc,
+                                  &stup.isnull1);
    }
 
-   puttuple_common(state, &stup);
+   puttuple_common(state, &stup,
+                   state->haveDatum1 && state->sortKeys->abbrev_converter && !stup.isnull1);
 
    MemoryContextSwitchTo(oldcontext);
 }
@@ -1982,7 +1912,6 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
 {
    MemoryContext oldcontext;
    SortTuple   stup;
-   Datum       original;
    IndexTuple  tuple;
 
    stup.tuple = index_form_tuple_context(RelationGetDescr(rel), values,
@@ -1991,49 +1920,15 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
    tuple->t_tid = *self;
    USEMEM(state, GetMemoryChunkSpace(stup.tuple));
    /* set up first-column key value */
-   original = index_getattr(tuple,
-                            1,
-                            RelationGetDescr(state->indexRel),
-                            &stup.isnull1);
+   stup.datum1 = index_getattr(tuple,
+                               1,
+                               RelationGetDescr(state->indexRel),
+                               &stup.isnull1);
 
    oldcontext = MemoryContextSwitchTo(state->sortcontext);
 
-   if (!state->sortKeys || !state->sortKeys->abbrev_converter || stup.isnull1)
-   {
-       /*
-        * Store ordinary Datum representation, or NULL value.  If there is a
-        * converter it won't expect NULL values, and cost model is not
-        * required to account for NULL, so in that case we avoid calling
-        * converter and just set datum1 to zeroed representation (to be
-        * consistent, and to support cheap inequality tests for NULL
-        * abbreviated keys).
-        */
-       stup.datum1 = original;
-   }
-   else if (!consider_abort_common(state))
-   {
-       /* Store abbreviated key representation */
-       stup.datum1 = state->sortKeys->abbrev_converter(original,
-                                                       state->sortKeys);
-   }
-   else
-   {
-       /* Abort abbreviation */
-       stup.datum1 = original;
-
-       /*
-        * Set state to be consistent with never trying abbreviation.
-        *
-        * Alter datum1 representation in already-copied tuples, so as to
-        * ensure a consistent representation (current tuple was just
-        * handled).  It does not matter if some dumped tuples are already
-        * sorted on tape, since serialized tuples lack abbreviated keys
-        * (TSS_BUILDRUNS state prevents control reaching here in any case).
-        */
-       REMOVEABBREV(state, state->memtuples, state->memtupcount);
-   }
-
-   puttuple_common(state, &stup);
+   puttuple_common(state, &stup,
+                   state->sortKeys && state->sortKeys->abbrev_converter && !stup.isnull1);
 
    MemoryContextSwitchTo(oldcontext);
 }
@@ -2074,43 +1969,15 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
    }
    else
    {
-       Datum       original = datumCopy(val, false, state->datumTypeLen);
-
        stup.isnull1 = false;
-       stup.tuple = DatumGetPointer(original);
+       stup.datum1 = datumCopy(val, false, state->datumTypeLen);
+       stup.tuple = DatumGetPointer(stup.datum1);
        USEMEM(state, GetMemoryChunkSpace(stup.tuple));
        MemoryContextSwitchTo(state->sortcontext);
-
-       if (!state->sortKeys->abbrev_converter)
-       {
-           stup.datum1 = original;
-       }
-       else if (!consider_abort_common(state))
-       {
-           /* Store abbreviated key representation */
-           stup.datum1 = state->sortKeys->abbrev_converter(original,
-                                                           state->sortKeys);
-       }
-       else
-       {
-           /* Abort abbreviation */
-           stup.datum1 = original;
-
-           /*
-            * Set state to be consistent with never trying abbreviation.
-            *
-            * Alter datum1 representation in already-copied tuples, so as to
-            * ensure a consistent representation (current tuple was just
-            * handled).  It does not matter if some dumped tuples are already
-            * sorted on tape, since serialized tuples lack abbreviated keys
-            * (TSS_BUILDRUNS state prevents control reaching here in any
-            * case).
-            */
-           REMOVEABBREV(state, state->memtuples, state->memtupcount);
-       }
    }
 
-   puttuple_common(state, &stup);
+   puttuple_common(state, &stup,
+                   state->tuples && !isNull && state->sortKeys->abbrev_converter);
 
    MemoryContextSwitchTo(oldcontext);
 }
@@ -2119,10 +1986,41 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
  * Shared code for tuple and datum cases.
  */
 static void
-puttuple_common(Tuplesortstate *state, SortTuple *tuple)
+puttuple_common(Tuplesortstate *state, SortTuple *tuple, bool useAbbrev)
 {
    Assert(!LEADER(state));
 
+   if (!useAbbrev)
+   {
+       /*
+        * Leave ordinary Datum representation, or NULL value.  If there is a
+        * converter it won't expect NULL values, and cost model is not
+        * required to account for NULL, so in that case we avoid calling
+        * converter and just set datum1 to zeroed representation (to be
+        * consistent, and to support cheap inequality tests for NULL
+        * abbreviated keys).
+        */
+   }
+   else if (!consider_abort_common(state))
+   {
+       /* Store abbreviated key representation */
+       tuple->datum1 = state->sortKeys->abbrev_converter(tuple->datum1,
+                                                         state->sortKeys);
+   }
+   else
+   {
+       /*
+        * Set state to be consistent with never trying abbreviation.
+        *
+        * Alter datum1 representation in already-copied tuples, so as to
+        * ensure a consistent representation (current tuple was just
+        * handled).  It does not matter if some dumped tuples are already
+        * sorted on tape, since serialized tuples lack abbreviated keys
+        * (TSS_BUILDRUNS state prevents control reaching here in any case).
+        */
+       REMOVEABBREV(state, state->memtuples, state->memtupcount);
+   }
+
    switch (state->status)
    {
        case TSS_INITIAL: