Som improve page split in multicolumn GiST index.
authorTeodor Sigaev <teodor@sigaev.ru>
Mon, 29 May 2006 12:50:06 +0000 (12:50 +0000)
committerTeodor Sigaev <teodor@sigaev.ru>
Mon, 29 May 2006 12:50:06 +0000 (12:50 +0000)
If user picksplit on n-th column generate equals
left and right unions then it calls picksplit on n+1-th
column.

src/backend/access/gist/gist.c
src/backend/access/gist/gistutil.c
src/include/access/gist_private.h

index 54ac45ee2fea85dedd9df2a7ecacd800c2c04e2e..326e87e78894e817b555c62d412cc863db7c2929 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.137 2006/05/24 11:01:39 teodor Exp $
+ *   $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.138 2006/05/29 12:50:06 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1033,7 +1033,13 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *gist
        /*
         * all keys are not-null
         */
-       gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate);
+       if ( gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate) && attno+1 != r->rd_att->natts )
+           /*
+            * Splitting on attno column is not optimized: unions of left and right
+            * page are the same, we will try to split page by 
+            * following columns
+            */
+           gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno+1);
    }
 }
 
index 3db72aa199d8941d459573f32169390c70166fd6..987aed89f71de018c1b8fb1be2e5437482b0d288 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *         $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.14 2006/05/24 11:01:39 teodor Exp $
+ *         $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.15 2006/05/29 12:50:06 teodor Exp $
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
@@ -262,6 +262,16 @@ gistMakeUnionKey( GISTSTATE *giststate, int attno,
    }
 }
 
+static bool
+gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b) {
+   bool result;
+
+   FunctionCall3(&giststate->equalFn[attno],
+                               a, b,
+                               PointerGetDatum(&result));
+   return result;
+}
+
 /*
  * Forms union of oldtup and addtup, if union == oldtup then return NULL
  */
@@ -300,19 +310,8 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
            continue;
 
        if ( !addisnull[i] ) {
-           if ( oldisnull[i] )
+           if ( oldisnull[i] || gistKeyIsEQ(giststate, i, oldentries[i].key, attrS[i])==false )
                neednew = true;
-           else {
-               bool        result;
-
-               FunctionCall3(&giststate->equalFn[i],
-                               oldentries[i].key,
-                               attrS[i],
-                               PointerGetDatum(&result));
-
-               if (!result)
-                   neednew = true;
-           }
        }
    }
 
@@ -395,7 +394,6 @@ gistfindgroup(GISTSTATE *giststate, GISTENTRY *valvec, GIST_SPLITVEC *spl, int a
    {
        int         j;
        int         len;
-       bool        result;
 
        if (spl->spl_idgrp[spl->spl_left[i]])
            continue;
@@ -405,11 +403,7 @@ gistfindgroup(GISTSTATE *giststate, GISTENTRY *valvec, GIST_SPLITVEC *spl, int a
        {
            if (spl->spl_idgrp[spl->spl_right[j]])
                continue;
-           FunctionCall3(&giststate->equalFn[attno],
-                         valvec[spl->spl_left[i]].key,
-                         valvec[spl->spl_right[j]].key,
-                         PointerGetDatum(&result));
-           if (result)
+           if (gistKeyIsEQ(giststate, attno, valvec[spl->spl_left[i]].key, valvec[spl->spl_right[j]].key))
            {
                spl->spl_idgrp[spl->spl_right[j]] = curid;
                len++;
@@ -425,11 +419,7 @@ gistfindgroup(GISTSTATE *giststate, GISTENTRY *valvec, GIST_SPLITVEC *spl, int a
            {
                if (spl->spl_idgrp[spl->spl_left[j]])
                    continue;
-               FunctionCall3(&giststate->equalFn[attno],
-                             valvec[spl->spl_left[i]].key,
-                             valvec[spl->spl_left[j]].key,
-                             PointerGetDatum(&result));
-               if (result)
+               if (gistKeyIsEQ(giststate, attno, valvec[spl->spl_left[i]].key, valvec[spl->spl_left[j]].key))
                {
                    spl->spl_idgrp[spl->spl_left[j]] = curid;
                    len++;
@@ -758,7 +748,14 @@ gistpenalty(GISTSTATE *giststate, int attno,
    return penalty;
 }
 
-void
+/*
+ * Calls user picksplit method for attno columns to split vector to
+ * two vectors. May use attno+n columns data to
+ * get better split.
+ * Returns TRUE if left and right unions of attno columns are the same,
+ * so caller may find better split
+ */
+bool
 gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GIST_SPLITVEC *v,
                  IndexTuple *itup, int len, GISTSTATE *giststate)
 {
@@ -787,24 +784,36 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GIST_SPLITVE
     */
    if (giststate->tupdesc->natts > 1 && attno+1 != giststate->tupdesc->natts)
    {
-       int         MaxGrpId;
+       if ( gistKeyIsEQ(giststate, attno, v->spl_ldatum, v->spl_rdatum) ) {
+           /*
+            * Left and right key's unions are equial, so
+            * we can get better split by following columns. Note,
+            * uninons for attno columns are already done.
+            */
+
+           return true;
+       } else {
+           int         MaxGrpId;
 
-       v->spl_idgrp = (int *) palloc0(sizeof(int) * entryvec->n);
-       v->spl_grpflag = (char *) palloc0(sizeof(char) * entryvec->n);
-       v->spl_ngrp = (int *) palloc(sizeof(int) * entryvec->n);
+           v->spl_idgrp = (int *) palloc0(sizeof(int) * entryvec->n);
+           v->spl_grpflag = (char *) palloc0(sizeof(char) * entryvec->n);
+           v->spl_ngrp = (int *) palloc(sizeof(int) * entryvec->n);
 
-       MaxGrpId = gistfindgroup(giststate, entryvec->vector, v, attno);
+           MaxGrpId = gistfindgroup(giststate, entryvec->vector, v, attno);
 
-       /* form union of sub keys for each page (l,p) */
-       gistunionsubkey(giststate, itup, v, attno + 1);
+           /* form union of sub keys for each page (l,p) */
+           gistunionsubkey(giststate, itup, v, attno + 1);
 
-       /*
-        * if possible, we insert equivalent tuples with control by penalty
-        * for a subkey(s)
-        */
-       if (MaxGrpId > 1)
-           gistadjsubkey(r, itup, len, v, giststate, attno);
+           /*
+            * if possible, we insert equivalent tuples with control by penalty
+           * for a subkey(s)
+           */
+           if (MaxGrpId > 1)
+               gistadjsubkey(r, itup, len, v, giststate, attno);
+       }
    }
+
+   return false;
 }
 
 /*
index 43a6f62943ab511da38a7524c68e1f0e515f9400..f75682847a66e11f7e0898bec3a59a603292beb5 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.16 2006/05/24 11:01:39 teodor Exp $
+ * $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.17 2006/05/29 12:50:06 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -312,7 +312,7 @@ extern void GISTInitBuffer(Buffer b, uint32 f);
 extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
               Datum k, Relation r, Page pg, OffsetNumber o,
               int b, bool l, bool isNull);
-void gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GIST_SPLITVEC *v,
+bool gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GIST_SPLITVEC *v,
                  IndexTuple *itup, int len, GISTSTATE *giststate);
 
 /* gistvacuum.c */