Add reusable routine for making arrays unique.
authorThomas Munro <tmunro@postgresql.org>
Thu, 7 Nov 2019 03:51:04 +0000 (16:51 +1300)
committerThomas Munro <tmunro@postgresql.org>
Thu, 7 Nov 2019 04:00:48 +0000 (17:00 +1300)
Introduce qunique() and qunique_arg(), which can be used after qsort()
and qsort_arg() respectively to remove duplicate values.  Use it where
appropriate.

Author: Thomas Munro
Reviewed-by: Tom Lane (in an earlier version)
Discussion: https://postgr.es/m/CAEepm%3D2vmFTNpAmwbGGD2WaryM6T3hSDVKQPfUwjdD_5XY6vAA%40mail.gmail.com

13 files changed:
contrib/hstore/hstore_io.c
contrib/intarray/_int_tool.c
contrib/pg_trgm/trgm_op.c
src/backend/access/nbtree/nbtutils.c
src/backend/executor/nodeTidscan.c
src/backend/utils/adt/acl.c
src/backend/utils/adt/tsgistidx.c
src/backend/utils/adt/tsquery_op.c
src/backend/utils/adt/tsvector.c
src/backend/utils/adt/tsvector_op.c
src/backend/utils/adt/txid.c
src/backend/utils/cache/syscache.c
src/include/lib/qunique.h [new file with mode: 0644]

index be3cce4f74000d9bbea8c90090d028912ee482c4..10ec392775ecfda3a876cbd63f446ef32755eb7d 100644 (file)
@@ -322,6 +322,11 @@ hstoreUniquePairs(Pairs *a, int32 l, int32 *buflen)
        }
 
        qsort((void *) a, l, sizeof(Pairs), comparePairs);
+
+       /*
+        * We can't use qunique here because we have some clean-up code to run on
+        * removed elements.
+        */
        ptr = a + 1;
        res = a;
        while (ptr - a < l)
index efff81d77dcd28d4304f38a83373a7a6d90f7c13..e5f4bd4064353bbb802884bc28b275d20058c302 100644 (file)
@@ -7,6 +7,7 @@
 
 #include "_int.h"
 #include "catalog/pg_type.h"
+#include "lib/qunique.h"
 
 /* arguments are assumed sorted & unique-ified */
 bool
@@ -308,23 +309,13 @@ internal_size(int *a, int len)
 ArrayType *
 _int_unique(ArrayType *r)
 {
-       int                *tmp,
-                          *dr,
-                          *data;
        int                     num = ARRNELEMS(r);
+       bool            duplicates_found;       /* not used */
 
-       if (num < 2)
-               return r;
+       num = qunique_arg(ARRPTR(r), num, sizeof(int), isort_cmp,
+                                         &duplicates_found);
 
-       data = tmp = dr = ARRPTR(r);
-       while (tmp - data < num)
-       {
-               if (*tmp != *dr)
-                       *(++dr) = *tmp++;
-               else
-                       tmp++;
-       }
-       return resize_intArrayType(r, dr + 1 - ARRPTR(r));
+       return resize_intArrayType(r, num);
 }
 
 void
index 07a31803786f3792358dc56e77030e5653f9f6df..c9c8cbc73423aac60d2f1747acf58a5388c3572f 100644 (file)
@@ -6,6 +6,7 @@
 #include <ctype.h>
 
 #include "catalog/pg_type.h"
+#include "lib/qunique.h"
 #include "trgm.h"
 #include "tsearch/ts_locale.h"
 #include "utils/lsyscache.h"
@@ -162,26 +163,6 @@ comp_trgm(const void *a, const void *b)
        return CMPTRGM(a, b);
 }
 
-static int
-unique_array(trgm *a, int len)
-{
-       trgm       *curend,
-                          *tmp;
-
-       curend = tmp = a;
-       while (tmp - a < len)
-               if (CMPTRGM(tmp, curend))
-               {
-                       curend++;
-                       CPTRGM(curend, tmp);
-                       tmp++;
-               }
-               else
-                       tmp++;
-
-       return curend + 1 - a;
-}
-
 /*
  * Finds first word in string, returns pointer to the word,
  * endword points to the character after word
@@ -394,7 +375,7 @@ generate_trgm(char *str, int slen)
        if (len > 1)
        {
                qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
-               len = unique_array(GETARR(trg), len);
+               len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
        }
 
        SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
@@ -942,7 +923,7 @@ generate_wildcard_trgm(const char *str, int slen)
        if (len > 1)
        {
                qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
-               len = unique_array(GETARR(trg), len);
+               len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
        }
 
        SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
index bc855dd25dc79b02a6704bc38f2078792caeef76..6a3008dd48d8e7a471157441a300b90d2aeee486 100644 (file)
@@ -21,6 +21,7 @@
 #include "access/reloptions.h"
 #include "access/relscan.h"
 #include "commands/progress.h"
+#include "lib/qunique.h"
 #include "miscadmin.h"
 #include "utils/array.h"
 #include "utils/datum.h"
@@ -435,8 +436,6 @@ _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
        Oid                     elemtype;
        RegProcedure cmp_proc;
        BTSortArrayContext cxt;
-       int                     last_non_dup;
-       int                     i;
 
        if (nelems <= 1)
                return nelems;                  /* no work to do */
@@ -475,20 +474,8 @@ _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
                          _bt_compare_array_elements, (void *) &cxt);
 
        /* Now scan the sorted elements and remove duplicates */
-       last_non_dup = 0;
-       for (i = 1; i < nelems; i++)
-       {
-               int32           compare;
-
-               compare = DatumGetInt32(FunctionCall2Coll(&cxt.flinfo,
-                                                                                                 cxt.collation,
-                                                                                                 elems[last_non_dup],
-                                                                                                 elems[i]));
-               if (compare != 0)
-                       elems[++last_non_dup] = elems[i];
-       }
-
-       return last_non_dup + 1;
+       return qunique_arg(elems, nelems, sizeof(Datum),
+                                          _bt_compare_array_elements, &cxt);
 }
 
 /*
index 8cf22d5bf00a7fc4816f1fe57a04358871067354..a59480f90680991ecb2d850d4a7667a478ec73b6 100644 (file)
@@ -27,6 +27,7 @@
 #include "catalog/pg_type.h"
 #include "executor/execdebug.h"
 #include "executor/nodeTidscan.h"
+#include "lib/qunique.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
 #include "storage/bufmgr.h"
@@ -260,21 +261,13 @@ TidListEval(TidScanState *tidstate)
         */
        if (numTids > 1)
        {
-               int                     lastTid;
-               int                     i;
-
                /* CurrentOfExpr could never appear OR'd with something else */
                Assert(!tidstate->tss_isCurrentOf);
 
                qsort((void *) tidList, numTids, sizeof(ItemPointerData),
                          itemptr_comparator);
-               lastTid = 0;
-               for (i = 1; i < numTids; i++)
-               {
-                       if (!ItemPointerEquals(&tidList[lastTid], &tidList[i]))
-                               tidList[++lastTid] = tidList[i];
-               }
-               numTids = lastTid + 1;
+               numTids = qunique(tidList, numTids, sizeof(ItemPointerData),
+                                                 itemptr_comparator);
        }
 
        tidstate->tss_TidList = tidList;
index d7e6100ccbf8c8f3b59348ed2e0df8b4e181ba71..79bc750d7754a3fccdaeb1aa251e5780edde71e7 100644 (file)
@@ -28,6 +28,7 @@
 #include "commands/tablespace.h"
 #include "foreign/foreign.h"
 #include "funcapi.h"
+#include "lib/qunique.h"
 #include "miscadmin.h"
 #include "utils/acl.h"
 #include "utils/array.h"
@@ -1475,8 +1476,7 @@ aclmembers(const Acl *acl, Oid **roleids)
        Oid                *list;
        const AclItem *acldat;
        int                     i,
-                               j,
-                               k;
+                               j;
 
        if (acl == NULL || ACL_NUM(acl) == 0)
        {
@@ -1508,21 +1508,14 @@ aclmembers(const Acl *acl, Oid **roleids)
        /* Sort the array */
        qsort(list, j, sizeof(Oid), oid_cmp);
 
-       /* Remove duplicates from the array */
-       k = 0;
-       for (i = 1; i < j; i++)
-       {
-               if (list[k] != list[i])
-                       list[++k] = list[i];
-       }
-
        /*
         * We could repalloc the array down to minimum size, but it's hardly worth
         * it since it's only transient memory.
         */
        *roleids = list;
 
-       return k + 1;
+       /* Remove duplicates from the array */
+       return qunique(list, j, sizeof(Oid), oid_cmp);
 }
 
 
index 6ff71a49b8fa35d6da194fa4a364ae03edbc8ed1..af5b6809c50b1fca300c66eb3d5de4059ef69e04 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "access/gist.h"
 #include "access/heaptoast.h"
+#include "lib/qunique.h"
 #include "port/pg_bitutils.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
@@ -122,31 +123,6 @@ compareint(const void *va, const void *vb)
        return (a > b) ? 1 : -1;
 }
 
-/*
- * Removes duplicates from an array of int32. 'l' is
- * size of the input array. Returns the new size of the array.
- */
-static int
-uniqueint(int32 *a, int32 l)
-{
-       int32      *ptr,
-                          *res;
-
-       if (l <= 1)
-               return l;
-
-       ptr = res = a;
-
-       qsort((void *) a, l, sizeof(int32), compareint);
-
-       while (ptr - a < l)
-               if (*ptr != *res)
-                       *(++res) = *ptr++;
-               else
-                       ptr++;
-       return res + 1 - a;
-}
-
 static void
 makesign(BITVECP sign, SignTSVector *a)
 {
@@ -193,7 +169,8 @@ gtsvector_compress(PG_FUNCTION_ARGS)
                        ptr++;
                }
 
-               len = uniqueint(GETARR(res), val->size);
+               qsort(GETARR(res), val->size, sizeof(int), compareint);
+               len = qunique(GETARR(res), val->size, sizeof(int), compareint);
                if (len != val->size)
                {
                        /*
index 1f63d9b6a960f7571314b2fafa9009a37bb5b97a..e6816940ad208b299727cd2787821c6c88330f1f 100644 (file)
@@ -14,6 +14,7 @@
 
 #include "postgres.h"
 
+#include "lib/qunique.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
 
@@ -302,29 +303,6 @@ cmp_string(const void *a, const void *b)
        return strcmp(sa, sb);
 }
 
-static int
-remove_duplicates(char **strings, int n)
-{
-       if (n <= 1)
-               return n;
-       else
-       {
-               int                     i;
-               char       *prev = strings[0];
-               int                     new_n = 1;
-
-               for (i = 1; i < n; i++)
-               {
-                       if (strcmp(strings[i], prev) != 0)
-                       {
-                               strings[new_n++] = strings[i];
-                               prev = strings[i];
-                       }
-               }
-               return new_n;
-       }
-}
-
 Datum
 tsq_mcontains(PG_FUNCTION_ARGS)
 {
@@ -342,9 +320,10 @@ tsq_mcontains(PG_FUNCTION_ARGS)
 
        /* Sort and remove duplicates from both arrays */
        qsort(query_values, query_nvalues, sizeof(char *), cmp_string);
-       query_nvalues = remove_duplicates(query_values, query_nvalues);
+       query_nvalues = qunique(query_values, query_nvalues, sizeof(char *),
+                                                       cmp_string);
        qsort(ex_values, ex_nvalues, sizeof(char *), cmp_string);
-       ex_nvalues = remove_duplicates(ex_values, ex_nvalues);
+       ex_nvalues = qunique(ex_values, ex_nvalues, sizeof(char *), cmp_string);
 
        if (ex_nvalues > query_nvalues)
                result = false;
index ccfc4147fab9033446508e8b561135c4ad4b5876..098eaed3e5e1080e75f5146a7332a2b65321a15c 100644 (file)
@@ -41,8 +41,9 @@ compareWordEntryPos(const void *a, const void *b)
 }
 
 /*
- * Removes duplicate pos entries. If there's two entries with same pos
- * but different weight, the higher weight is retained.
+ * Removes duplicate pos entries. If there's two entries with same pos but
+ * different weight, the higher weight is retained, so we can't use
+ * qunique here.
  *
  * Returns new length.
  */
index 28d042273e3b572dd4afe5ada859931a8381228a..f483c4b228baee3e174c560043e8666bd55000e0 100644 (file)
@@ -21,6 +21,7 @@
 #include "commands/trigger.h"
 #include "executor/spi.h"
 #include "funcapi.h"
+#include "lib/qunique.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "parser/parse_coerce.h"
@@ -475,16 +476,9 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
         */
        if (indices_count > 1)
        {
-               int                     kp;
-
                qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
-               kp = 0;
-               for (k = 1; k < indices_count; k++)
-               {
-                       if (indices_to_delete[k] != indices_to_delete[kp])
-                               indices_to_delete[++kp] = indices_to_delete[k];
-               }
-               indices_count = ++kp;
+               indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
+                                                               compare_int);
        }
 
        /*
@@ -761,7 +755,6 @@ array_to_tsvector(PG_FUNCTION_ARGS)
        bool       *nulls;
        int                     nitems,
                                i,
-                               j,
                                tslen,
                                datalen = 0;
        char       *cur;
@@ -781,13 +774,8 @@ array_to_tsvector(PG_FUNCTION_ARGS)
        if (nitems > 1)
        {
                qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
-               j = 0;
-               for (i = 1; i < nitems; i++)
-               {
-                       if (compare_text_lexemes(&dlexemes[j], &dlexemes[i]) < 0)
-                               dlexemes[++j] = dlexemes[i];
-               }
-               nitems = ++j;
+               nitems = qunique(dlexemes, nitems, sizeof(Datum),
+                                                compare_text_lexemes);
        }
 
        /* Calculate space needed for surviving lexemes. */
@@ -1270,39 +1258,6 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
        return result;
 }
 
-/*
- * Removes duplicate pos entries. We can't use uniquePos() from
- * tsvector.c because array might be longer than MAXENTRYPOS
- *
- * Returns new length.
- */
-static int
-uniqueLongPos(WordEntryPos *pos, int npos)
-{
-       WordEntryPos *pos_iter,
-                          *result;
-
-       if (npos <= 1)
-               return npos;
-
-       qsort((void *) pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
-
-       result = pos;
-       pos_iter = pos + 1;
-       while (pos_iter < pos + npos)
-       {
-               if (WEP_GETPOS(*pos_iter) != WEP_GETPOS(*result))
-               {
-                       result++;
-                       *result = WEP_GETPOS(*pos_iter);
-               }
-
-               pos_iter++;
-       }
-
-       return result + 1 - pos;
-}
-
 /*
  * is there value 'val' in array or not ?
  */
@@ -1397,7 +1352,9 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
                {
                        /* Sort and make unique array of found positions */
                        data->pos = allpos;
-                       data->npos = uniqueLongPos(allpos, npos);
+                       qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
+                       data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
+                                                                compareWordEntryPos);
                        data->allocated = true;
                }
        }
index 90b2c9b69488afe458db1643d3690d0c96d89bcf..e220c5f1364f92f394c1c2bd9cbbadaea59d6378 100644 (file)
@@ -27,6 +27,7 @@
 #include "access/xlog.h"
 #include "funcapi.h"
 #include "miscadmin.h"
+#include "lib/qunique.h"
 #include "libpq/pqformat.h"
 #include "postmaster/postmaster.h"
 #include "storage/lwlock.h"
@@ -213,26 +214,10 @@ cmp_txid(const void *aa, const void *bb)
 static void
 sort_snapshot(TxidSnapshot *snap)
 {
-       txid            last = 0;
-       int                     nxip,
-                               idx1,
-                               idx2;
-
        if (snap->nxip > 1)
        {
                qsort(snap->xip, snap->nxip, sizeof(txid), cmp_txid);
-
-               /* remove duplicates */
-               nxip = snap->nxip;
-               idx1 = idx2 = 0;
-               while (idx1 < nxip)
-               {
-                       if (snap->xip[idx1] != last)
-                               last = snap->xip[idx2++] = snap->xip[idx1];
-                       else
-                               snap->nxip--;
-                       idx1++;
-               }
+               snap->nxip = qunique(snap->xip, snap->nxip, sizeof(txid), cmp_txid);
        }
 }
 
index 16297a52a1906a42582cbfdebe2da25cbc51dfa5..36aee74ab0220781dfab5c057a68f5fad3551edc 100644 (file)
@@ -74,6 +74,7 @@
 #include "catalog/pg_ts_template.h"
 #include "catalog/pg_type.h"
 #include "catalog/pg_user_mapping.h"
+#include "lib/qunique.h"
 #include "utils/rel.h"
 #include "utils/catcache.h"
 #include "utils/syscache.h"
@@ -1010,8 +1011,6 @@ void
 InitCatalogCache(void)
 {
        int                     cacheId;
-       int                     i,
-                               j;
 
        StaticAssertStmt(SysCacheSize == (int) lengthof(cacheinfo),
                                         "SysCacheSize does not match syscache.c's array");
@@ -1048,21 +1047,15 @@ InitCatalogCache(void)
        /* Sort and de-dup OID arrays, so we can use binary search. */
        pg_qsort(SysCacheRelationOid, SysCacheRelationOidSize,
                         sizeof(Oid), oid_compare);
-       for (i = 1, j = 0; i < SysCacheRelationOidSize; i++)
-       {
-               if (SysCacheRelationOid[i] != SysCacheRelationOid[j])
-                       SysCacheRelationOid[++j] = SysCacheRelationOid[i];
-       }
-       SysCacheRelationOidSize = j + 1;
+       SysCacheRelationOidSize =
+               qunique(SysCacheRelationOid, SysCacheRelationOidSize, sizeof(Oid),
+                               oid_compare);
 
        pg_qsort(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize,
                         sizeof(Oid), oid_compare);
-       for (i = 1, j = 0; i < SysCacheSupportingRelOidSize; i++)
-       {
-               if (SysCacheSupportingRelOid[i] != SysCacheSupportingRelOid[j])
-                       SysCacheSupportingRelOid[++j] = SysCacheSupportingRelOid[i];
-       }
-       SysCacheSupportingRelOidSize = j + 1;
+       SysCacheSupportingRelOidSize =
+               qunique(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize,
+                               sizeof(Oid), oid_compare);
 
        CacheInitialized = true;
 }
diff --git a/src/include/lib/qunique.h b/src/include/lib/qunique.h
new file mode 100644 (file)
index 0000000..4d620f8
--- /dev/null
@@ -0,0 +1,65 @@
+/*-------------------------------------------------------------------------
+ *
+ * qunique.h
+ *             inline array unique functions
+ * Portions Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *             src/include/lib/qunique.h
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef QUNIQUE_H
+#define QUNIQUE_H
+
+/*
+ * Remove duplicates from a pre-sorted array, according to a user-supplied
+ * comparator.  Usually the array should have been sorted with qsort() using
+ * the same arguments.  Return the new size.
+ */
+static inline size_t
+qunique(void *array, size_t elements, size_t width,
+               int (*compare) (const void *, const void *))
+{
+       char       *bytes = (char *) array;
+       size_t          i,
+                               j;
+
+       if (elements <= 1)
+               return elements;
+
+       for (i = 1, j = 0; i < elements; ++i)
+       {
+               if (compare(bytes + i * width, bytes + j * width) != 0)
+                       memcpy(bytes + ++j * width, bytes + i * width, width);
+       }
+
+       return j + 1;
+}
+
+/*
+ * Like qunique(), but takes a comparator with an extra user data argument
+ * which is passed through, for compatibility with qsort_arg().
+ */
+static inline size_t
+qunique_arg(void *array, size_t elements, size_t width,
+                       int (*compare) (const void *, const void *, void *),
+                       void *arg)
+{
+       char       *bytes = (char *) array;
+       size_t          i,
+                               j;
+
+       if (elements <= 1)
+               return elements;
+
+       for (i = 1, j = 0; i < elements; ++i)
+       {
+               if (compare(bytes + i * width, bytes + j * width, arg) != 0)
+                       memcpy(bytes + ++j * width, bytes + i * width, width);
+       }
+
+       return j + 1;
+}
+
+#endif                                                 /* QUNIQUE_H */