LCOV - differential code coverage report
Current view: top level - src/backend/access/gist - gistsplit.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 78.4 % 236 185 51 185
Current Date: 2023-04-08 15:15:32 Functions: 80.0 % 10 8 2 8
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * gistsplit.c
       4                 :  *    Multi-column page splitting algorithm
       5                 :  *
       6                 :  * This file is concerned with making good page-split decisions in multi-column
       7                 :  * GiST indexes.  The opclass-specific picksplit functions can only be expected
       8                 :  * to produce answers based on a single column.  We first run the picksplit
       9                 :  * function for column 1; then, if there are more columns, we check if any of
      10                 :  * the tuples are "don't cares" so far as the column 1 split is concerned
      11                 :  * (that is, they could go to either side for no additional penalty).  If so,
      12                 :  * we try to redistribute those tuples on the basis of the next column.
      13                 :  * Repeat till we're out of columns.
      14                 :  *
      15                 :  * gistSplitByKey() is the entry point to this file.
      16                 :  *
      17                 :  *
      18                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      19                 :  * Portions Copyright (c) 1994, Regents of the University of California
      20                 :  *
      21                 :  * IDENTIFICATION
      22                 :  *    src/backend/access/gist/gistsplit.c
      23                 :  *
      24                 :  *-------------------------------------------------------------------------
      25                 :  */
      26                 : #include "postgres.h"
      27                 : 
      28                 : #include "access/gist_private.h"
      29                 : #include "utils/rel.h"
      30                 : 
      31                 : typedef struct
      32                 : {
      33                 :     OffsetNumber *entries;
      34                 :     int         len;
      35                 :     Datum      *attr;
      36                 :     bool       *isnull;
      37                 :     bool       *dontcare;
      38                 : } GistSplitUnion;
      39                 : 
      40                 : 
      41                 : /*
      42                 :  * Form unions of subkeys in itvec[] entries listed in gsvp->entries[],
      43                 :  * ignoring any tuples that are marked in gsvp->dontcare[].  Subroutine for
      44                 :  * gistunionsubkey.
      45                 :  */
      46                 : static void
      47 CBC        2742 : gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
      48                 :                    GistSplitUnion *gsvp)
      49                 : {
      50                 :     IndexTuple *cleanedItVec;
      51                 :     int         i,
      52            2742 :                 cleanedLen = 0;
      53                 : 
      54            2742 :     cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len);
      55                 : 
      56          139801 :     for (i = 0; i < gsvp->len; i++)
      57                 :     {
      58          137059 :         if (gsvp->dontcare && gsvp->dontcare[gsvp->entries[i]])
      59 UBC           0 :             continue;
      60                 : 
      61 CBC      137059 :         cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
      62                 :     }
      63                 : 
      64            2742 :     gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen,
      65                 :                        gsvp->attr, gsvp->isnull);
      66                 : 
      67            2742 :     pfree(cleanedItVec);
      68            2742 : }
      69                 : 
      70                 : /*
      71                 :  * Recompute unions of left- and right-side subkeys after a page split,
      72                 :  * ignoring any tuples that are marked in spl->spl_dontcare[].
      73                 :  *
      74                 :  * Note: we always recompute union keys for all index columns.  In some cases
      75                 :  * this might represent duplicate work for the leftmost column(s), but it's
      76                 :  * not safe to assume that "zero penalty to move a tuple" means "the union
      77                 :  * key doesn't change at all".  Penalty functions aren't 100% accurate.
      78                 :  */
      79                 : static void
      80            1371 : gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl)
      81                 : {
      82                 :     GistSplitUnion gsvp;
      83                 : 
      84            1371 :     gsvp.dontcare = spl->spl_dontcare;
      85                 : 
      86            1371 :     gsvp.entries = spl->splitVector.spl_left;
      87            1371 :     gsvp.len = spl->splitVector.spl_nleft;
      88            1371 :     gsvp.attr = spl->spl_lattr;
      89            1371 :     gsvp.isnull = spl->spl_lisnull;
      90                 : 
      91            1371 :     gistunionsubkeyvec(giststate, itvec, &gsvp);
      92                 : 
      93            1371 :     gsvp.entries = spl->splitVector.spl_right;
      94            1371 :     gsvp.len = spl->splitVector.spl_nright;
      95            1371 :     gsvp.attr = spl->spl_rattr;
      96            1371 :     gsvp.isnull = spl->spl_risnull;
      97                 : 
      98            1371 :     gistunionsubkeyvec(giststate, itvec, &gsvp);
      99            1371 : }
     100                 : 
     101                 : /*
     102                 :  * Find tuples that are "don't cares", that is could be moved to the other
     103                 :  * side of the split with zero penalty, so far as the attno column is
     104                 :  * concerned.
     105                 :  *
     106                 :  * Don't-care tuples are marked by setting the corresponding entry in
     107                 :  * spl->spl_dontcare[] to "true".  Caller must have initialized that array
     108                 :  * to zeroes.
     109                 :  *
     110                 :  * Returns number of don't-cares found.
     111                 :  */
     112                 : static int
     113            1265 : findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
     114                 :               GistSplitVector *spl, int attno)
     115                 : {
     116                 :     int         i;
     117                 :     GISTENTRY   entry;
     118            1265 :     int         NumDontCare = 0;
     119                 : 
     120                 :     /*
     121                 :      * First, search the left-side tuples to see if any have zero penalty to
     122                 :      * be added to the right-side union key.
     123                 :      *
     124                 :      * attno column is known all-not-null (see gistSplitByKey), so we need not
     125                 :      * check for nulls
     126                 :      */
     127            1265 :     gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL,
     128                 :                   (OffsetNumber) 0, false);
     129           62237 :     for (i = 0; i < spl->splitVector.spl_nleft; i++)
     130                 :     {
     131           60972 :         int         j = spl->splitVector.spl_left[i];
     132           60972 :         float       penalty = gistpenalty(giststate, attno, &entry, false,
     133           60972 :                                           &valvec[j], false);
     134                 : 
     135           60972 :         if (penalty == 0.0)
     136                 :         {
     137             184 :             spl->spl_dontcare[j] = true;
     138             184 :             NumDontCare++;
     139                 :         }
     140                 :     }
     141                 : 
     142                 :     /* And conversely for the right-side tuples */
     143            1265 :     gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL,
     144                 :                   (OffsetNumber) 0, false);
     145           63497 :     for (i = 0; i < spl->splitVector.spl_nright; i++)
     146                 :     {
     147           62232 :         int         j = spl->splitVector.spl_right[i];
     148           62232 :         float       penalty = gistpenalty(giststate, attno, &entry, false,
     149           62232 :                                           &valvec[j], false);
     150                 : 
     151           62232 :         if (penalty == 0.0)
     152                 :         {
     153             185 :             spl->spl_dontcare[j] = true;
     154             185 :             NumDontCare++;
     155                 :         }
     156                 :     }
     157                 : 
     158            1265 :     return NumDontCare;
     159                 : }
     160                 : 
     161                 : /*
     162                 :  * Remove tuples that are marked don't-cares from the tuple index array a[]
     163                 :  * of length *len.  This is applied separately to the spl_left and spl_right
     164                 :  * arrays.
     165                 :  */
     166                 : static void
     167               4 : removeDontCares(OffsetNumber *a, int *len, const bool *dontcare)
     168                 : {
     169                 :     int         origlen,
     170                 :                 newlen,
     171                 :                 i;
     172                 :     OffsetNumber *curwpos;
     173                 : 
     174               4 :     origlen = newlen = *len;
     175               4 :     curwpos = a;
     176             376 :     for (i = 0; i < origlen; i++)
     177                 :     {
     178             372 :         OffsetNumber ai = a[i];
     179                 : 
     180             372 :         if (dontcare[ai] == false)
     181                 :         {
     182                 :             /* re-emit item into a[] */
     183               3 :             *curwpos = ai;
     184               3 :             curwpos++;
     185                 :         }
     186                 :         else
     187             369 :             newlen--;
     188                 :     }
     189                 : 
     190               4 :     *len = newlen;
     191               4 : }
     192                 : 
     193                 : /*
     194                 :  * Place a single don't-care tuple into either the left or right side of the
     195                 :  * split, according to which has least penalty for merging the tuple into
     196                 :  * the previously-computed union keys.  We need consider only columns starting
     197                 :  * at attno.
     198                 :  */
     199                 : static void
     200 UBC           0 : placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v,
     201                 :          IndexTuple itup, OffsetNumber off, int attno)
     202                 : {
     203                 :     GISTENTRY   identry[INDEX_MAX_KEYS];
     204                 :     bool        isnull[INDEX_MAX_KEYS];
     205               0 :     bool        toLeft = true;
     206                 : 
     207               0 :     gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0,
     208                 :                       identry, isnull);
     209                 : 
     210               0 :     for (; attno < giststate->nonLeafTupdesc->natts; attno++)
     211                 :     {
     212                 :         float       lpenalty,
     213                 :                     rpenalty;
     214                 :         GISTENTRY   entry;
     215                 : 
     216               0 :         gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, false);
     217               0 :         lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno],
     218               0 :                                identry + attno, isnull[attno]);
     219               0 :         gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, false);
     220               0 :         rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno],
     221               0 :                                identry + attno, isnull[attno]);
     222                 : 
     223               0 :         if (lpenalty != rpenalty)
     224                 :         {
     225               0 :             if (lpenalty > rpenalty)
     226               0 :                 toLeft = false;
     227               0 :             break;
     228                 :         }
     229                 :     }
     230                 : 
     231               0 :     if (toLeft)
     232               0 :         v->splitVector.spl_left[v->splitVector.spl_nleft++] = off;
     233                 :     else
     234               0 :         v->splitVector.spl_right[v->splitVector.spl_nright++] = off;
     235               0 : }
     236                 : 
     237                 : #define SWAPVAR( s, d, t ) \
     238                 : do {    \
     239                 :     (t) = (s); \
     240                 :     (s) = (d); \
     241                 :     (d) = (t); \
     242                 : } while(0)
     243                 : 
     244                 : /*
     245                 :  * Clean up when we did a secondary split but the user-defined PickSplit
     246                 :  * method didn't support it (leaving spl_ldatum_exists or spl_rdatum_exists
     247                 :  * true).
     248                 :  *
     249                 :  * We consider whether to swap the left and right outputs of the secondary
     250                 :  * split; this can be worthwhile if the penalty for merging those tuples into
     251                 :  * the previously chosen sets is less that way.
     252                 :  *
     253                 :  * In any case we must update the union datums for the current column by
     254                 :  * adding in the previous union keys (oldL/oldR), since the user-defined
     255                 :  * PickSplit method didn't do so.
     256                 :  */
     257                 : static void
     258 CBC           1 : supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
     259                 :                       GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
     260                 : {
     261               1 :     bool        leaveOnLeft = true,
     262                 :                 tmpBool;
     263                 :     GISTENTRY   entryL,
     264                 :                 entryR,
     265                 :                 entrySL,
     266                 :                 entrySR;
     267                 : 
     268               1 :     gistentryinit(entryL, oldL, r, NULL, 0, false);
     269               1 :     gistentryinit(entryR, oldR, r, NULL, 0, false);
     270               1 :     gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false);
     271               1 :     gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false);
     272                 : 
     273               1 :     if (sv->spl_ldatum_exists && sv->spl_rdatum_exists)
     274               1 :     {
     275                 :         float       penalty1,
     276                 :                     penalty2;
     277                 : 
     278               1 :         penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) +
     279               1 :             gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
     280               1 :         penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) +
     281               1 :             gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
     282                 : 
     283               1 :         if (penalty1 > penalty2)
     284 UBC           0 :             leaveOnLeft = false;
     285                 :     }
     286                 :     else
     287                 :     {
     288               0 :         GISTENTRY  *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
     289                 :         float       penalty1,
     290                 :                     penalty2;
     291                 : 
     292                 :         /*
     293                 :          * There is only one previously defined union, so we just choose swap
     294                 :          * or not by lowest penalty for that side.  We can only get here if a
     295                 :          * secondary split happened to have all NULLs in its column in the
     296                 :          * tuples that the outer recursion level had assigned to one side.
     297                 :          * (Note that the null checks in gistSplitByKey don't prevent the
     298                 :          * case, because they'll only be checking tuples that were considered
     299                 :          * don't-cares at the outer recursion level, not the tuples that went
     300                 :          * into determining the passed-down left and right union keys.)
     301                 :          */
     302               0 :         penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
     303               0 :         penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
     304                 : 
     305               0 :         if (penalty1 < penalty2)
     306               0 :             leaveOnLeft = sv->spl_ldatum_exists;
     307                 :         else
     308               0 :             leaveOnLeft = sv->spl_rdatum_exists;
     309                 :     }
     310                 : 
     311 CBC           1 :     if (leaveOnLeft == false)
     312                 :     {
     313                 :         /*
     314                 :          * swap left and right
     315                 :          */
     316                 :         OffsetNumber *off,
     317                 :                     noff;
     318                 :         Datum       datum;
     319                 : 
     320 UBC           0 :         SWAPVAR(sv->spl_left, sv->spl_right, off);
     321               0 :         SWAPVAR(sv->spl_nleft, sv->spl_nright, noff);
     322               0 :         SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum);
     323               0 :         gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false);
     324               0 :         gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false);
     325                 :     }
     326                 : 
     327 CBC           1 :     if (sv->spl_ldatum_exists)
     328               1 :         gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false,
     329                 :                          &sv->spl_ldatum, &tmpBool);
     330                 : 
     331               1 :     if (sv->spl_rdatum_exists)
     332               1 :         gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false,
     333                 :                          &sv->spl_rdatum, &tmpBool);
     334                 : 
     335               1 :     sv->spl_ldatum_exists = sv->spl_rdatum_exists = false;
     336               1 : }
     337                 : 
     338                 : /*
     339                 :  * Trivial picksplit implementation. Function called only
     340                 :  * if user-defined picksplit puts all keys on the same side of the split.
     341                 :  * That is a bug of user-defined picksplit but we don't want to fail.
     342                 :  */
     343                 : static void
     344              45 : genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno)
     345                 : {
     346                 :     OffsetNumber i,
     347                 :                 maxoff;
     348                 :     int         nbytes;
     349                 :     GistEntryVector *evec;
     350                 : 
     351              45 :     maxoff = entryvec->n - 1;
     352                 : 
     353              45 :     nbytes = (maxoff + 2) * sizeof(OffsetNumber);
     354                 : 
     355              45 :     v->spl_left = (OffsetNumber *) palloc(nbytes);
     356              45 :     v->spl_right = (OffsetNumber *) palloc(nbytes);
     357              45 :     v->spl_nleft = v->spl_nright = 0;
     358                 : 
     359           12555 :     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
     360                 :     {
     361           12510 :         if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
     362                 :         {
     363            6255 :             v->spl_left[v->spl_nleft] = i;
     364            6255 :             v->spl_nleft++;
     365                 :         }
     366                 :         else
     367                 :         {
     368            6255 :             v->spl_right[v->spl_nright] = i;
     369            6255 :             v->spl_nright++;
     370                 :         }
     371                 :     }
     372                 : 
     373                 :     /*
     374                 :      * Form union datums for each side
     375                 :      */
     376              45 :     evec = palloc(sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ);
     377                 : 
     378              45 :     evec->n = v->spl_nleft;
     379              45 :     memcpy(evec->vector, entryvec->vector + FirstOffsetNumber,
     380              45 :            sizeof(GISTENTRY) * evec->n);
     381              45 :     v->spl_ldatum = FunctionCall2Coll(&giststate->unionFn[attno],
     382                 :                                       giststate->supportCollation[attno],
     383                 :                                       PointerGetDatum(evec),
     384                 :                                       PointerGetDatum(&nbytes));
     385                 : 
     386              45 :     evec->n = v->spl_nright;
     387              45 :     memcpy(evec->vector, entryvec->vector + FirstOffsetNumber + v->spl_nleft,
     388              45 :            sizeof(GISTENTRY) * evec->n);
     389              45 :     v->spl_rdatum = FunctionCall2Coll(&giststate->unionFn[attno],
     390                 :                                       giststate->supportCollation[attno],
     391                 :                                       PointerGetDatum(evec),
     392                 :                                       PointerGetDatum(&nbytes));
     393              45 : }
     394                 : 
     395                 : /*
     396                 :  * Calls user picksplit method for attno column to split tuples into
     397                 :  * two vectors.
     398                 :  *
     399                 :  * Returns false if split is complete (there are no more index columns, or
     400                 :  * there is no need to consider them because split is optimal already).
     401                 :  *
     402                 :  * Returns true and v->spl_dontcare = NULL if the picksplit result is
     403                 :  * degenerate (all tuples seem to be don't-cares), so we should just
     404                 :  * disregard this column and split on the next column(s) instead.
     405                 :  *
     406                 :  * Returns true and v->spl_dontcare != NULL if there are don't-care tuples
     407                 :  * that could be relocated based on the next column(s).  The don't-care
     408                 :  * tuples have been removed from the split and must be reinserted by caller.
     409                 :  * There is at least one non-don't-care tuple on each side of the split,
     410                 :  * and union keys for all columns are updated to include just those tuples.
     411                 :  *
     412                 :  * A true result implies there is at least one more index column.
     413                 :  */
     414                 : static bool
     415           12815 : gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
     416                 :                   IndexTuple *itup, int len, GISTSTATE *giststate)
     417                 : {
     418           12815 :     GIST_SPLITVEC *sv = &v->splitVector;
     419                 : 
     420                 :     /*
     421                 :      * Prepare spl_ldatum/spl_rdatum/spl_ldatum_exists/spl_rdatum_exists in
     422                 :      * case we are doing a secondary split (see comments in gist.h).
     423                 :      */
     424           12815 :     sv->spl_ldatum_exists = !(v->spl_lisnull[attno]);
     425           12815 :     sv->spl_rdatum_exists = !(v->spl_risnull[attno]);
     426           12815 :     sv->spl_ldatum = v->spl_lattr[attno];
     427           12815 :     sv->spl_rdatum = v->spl_rattr[attno];
     428                 : 
     429                 :     /*
     430                 :      * Let the opclass-specific PickSplit method do its thing.  Note that at
     431                 :      * this point we know there are no null keys in the entryvec.
     432                 :      */
     433           12815 :     FunctionCall2Coll(&giststate->picksplitFn[attno],
     434                 :                       giststate->supportCollation[attno],
     435                 :                       PointerGetDatum(entryvec),
     436                 :                       PointerGetDatum(sv));
     437                 : 
     438           12815 :     if (sv->spl_nleft == 0 || sv->spl_nright == 0)
     439                 :     {
     440                 :         /*
     441                 :          * User-defined picksplit failed to create an actual split, ie it put
     442                 :          * everything on the same side.  Complain but cope.
     443                 :          */
     444              45 :         ereport(DEBUG1,
     445                 :                 (errcode(ERRCODE_INTERNAL_ERROR),
     446                 :                  errmsg("picksplit method for column %d of index \"%s\" failed",
     447                 :                         attno + 1, RelationGetRelationName(r)),
     448                 :                  errhint("The index is not optimal. To optimize it, contact a developer, or try to use the column as the second one in the CREATE INDEX command.")));
     449                 : 
     450                 :         /*
     451                 :          * Reinit GIST_SPLITVEC. Although these fields are not used by
     452                 :          * genericPickSplit(), set them up for further processing
     453                 :          */
     454              45 :         sv->spl_ldatum_exists = !(v->spl_lisnull[attno]);
     455              45 :         sv->spl_rdatum_exists = !(v->spl_risnull[attno]);
     456              45 :         sv->spl_ldatum = v->spl_lattr[attno];
     457              45 :         sv->spl_rdatum = v->spl_rattr[attno];
     458                 : 
     459                 :         /* Do a generic split */
     460              45 :         genericPickSplit(giststate, entryvec, sv, attno);
     461                 :     }
     462                 :     else
     463                 :     {
     464                 :         /* hack for compatibility with old picksplit API */
     465           12770 :         if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
     466 UBC           0 :             sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
     467 CBC       12770 :         if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
     468 UBC           0 :             sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
     469                 :     }
     470                 : 
     471                 :     /* Clean up if PickSplit didn't take care of a secondary split */
     472 CBC       12815 :     if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
     473               1 :         supportSecondarySplit(r, giststate, attno, sv,
     474                 :                               v->spl_lattr[attno], v->spl_rattr[attno]);
     475                 : 
     476                 :     /* emit union datums computed by PickSplit back to v arrays */
     477           12815 :     v->spl_lattr[attno] = sv->spl_ldatum;
     478           12815 :     v->spl_rattr[attno] = sv->spl_rdatum;
     479           12815 :     v->spl_lisnull[attno] = false;
     480           12815 :     v->spl_risnull[attno] = false;
     481                 : 
     482                 :     /*
     483                 :      * If index columns remain, then consider whether we can improve the split
     484                 :      * by using them.
     485                 :      */
     486           12815 :     v->spl_dontcare = NULL;
     487                 : 
     488           12815 :     if (attno + 1 < giststate->nonLeafTupdesc->natts)
     489                 :     {
     490                 :         int         NumDontCare;
     491                 : 
     492                 :         /*
     493                 :          * Make a quick check to see if left and right union keys are equal;
     494                 :          * if so, the split is certainly degenerate, so tell caller to
     495                 :          * re-split with the next column.
     496                 :          */
     497            1282 :         if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
     498              17 :             return true;
     499                 : 
     500                 :         /*
     501                 :          * Locate don't-care tuples, if any.  If there are none, the split is
     502                 :          * optimal, so just fall out and return false.
     503                 :          */
     504            1265 :         v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
     505                 : 
     506            1265 :         NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno);
     507                 : 
     508            1265 :         if (NumDontCare > 0)
     509                 :         {
     510                 :             /*
     511                 :              * Remove don't-cares from spl_left[] and spl_right[].
     512                 :              */
     513               2 :             removeDontCares(sv->spl_left, &sv->spl_nleft, v->spl_dontcare);
     514               2 :             removeDontCares(sv->spl_right, &sv->spl_nright, v->spl_dontcare);
     515                 : 
     516                 :             /*
     517                 :              * If all tuples on either side were don't-cares, the split is
     518                 :              * degenerate, and we're best off to ignore it and split on the
     519                 :              * next column.  (We used to try to press on with a secondary
     520                 :              * split by forcing a random tuple on each side to be treated as
     521                 :              * non-don't-care, but it seems unlikely that that technique
     522                 :              * really gives a better result.  Note that we don't want to try a
     523                 :              * secondary split with empty left or right primary split sides,
     524                 :              * because then there is no union key on that side for the
     525                 :              * PickSplit function to try to expand, so it can have no good
     526                 :              * figure of merit for what it's doing.  Also note that this check
     527                 :              * ensures we can't produce a bogus one-side-only split in the
     528                 :              * NumDontCare == 1 special case below.)
     529                 :              */
     530               2 :             if (sv->spl_nleft == 0 || sv->spl_nright == 0)
     531                 :             {
     532               1 :                 v->spl_dontcare = NULL;
     533               1 :                 return true;
     534                 :             }
     535                 : 
     536                 :             /*
     537                 :              * Recompute union keys, considering only non-don't-care tuples.
     538                 :              * NOTE: this will set union keys for remaining index columns,
     539                 :              * which will cause later calls of gistUserPicksplit to pass those
     540                 :              * values down to user-defined PickSplit methods with
     541                 :              * spl_ldatum_exists/spl_rdatum_exists set true.
     542                 :              */
     543               1 :             gistunionsubkey(giststate, itup, v);
     544                 : 
     545               1 :             if (NumDontCare == 1)
     546                 :             {
     547                 :                 /*
     548                 :                  * If there's only one don't-care tuple then we can't do a
     549                 :                  * PickSplit on it, so just choose whether to send it left or
     550                 :                  * right by comparing penalties.  We needed the
     551                 :                  * gistunionsubkey step anyway so that we have appropriate
     552                 :                  * union keys for figuring the penalties.
     553                 :                  */
     554                 :                 OffsetNumber toMove;
     555                 : 
     556                 :                 /* find it ... */
     557 UBC           0 :                 for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
     558                 :                 {
     559               0 :                     if (v->spl_dontcare[toMove])
     560               0 :                         break;
     561                 :                 }
     562               0 :                 Assert(toMove < entryvec->n);
     563                 : 
     564                 :                 /* ... and assign it to cheaper side */
     565               0 :                 placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
     566                 : 
     567                 :                 /*
     568                 :                  * At this point the union keys are wrong, but we don't care
     569                 :                  * because we're done splitting.  The outermost recursion
     570                 :                  * level of gistSplitByKey will fix things before returning.
     571                 :                  */
     572                 :             }
     573                 :             else
     574 CBC           1 :                 return true;
     575                 :         }
     576                 :     }
     577                 : 
     578           12796 :     return false;
     579                 : }
     580                 : 
     581                 : /*
     582                 :  * simply split page in half
     583                 :  */
     584                 : static void
     585 UBC           0 : gistSplitHalf(GIST_SPLITVEC *v, int len)
     586                 : {
     587                 :     int         i;
     588                 : 
     589               0 :     v->spl_nright = v->spl_nleft = 0;
     590               0 :     v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
     591               0 :     v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
     592               0 :     for (i = 1; i <= len; i++)
     593               0 :         if (i < len / 2)
     594               0 :             v->spl_right[v->spl_nright++] = i;
     595                 :         else
     596               0 :             v->spl_left[v->spl_nleft++] = i;
     597                 : 
     598                 :     /* we need not compute union keys, caller took care of it */
     599               0 : }
     600                 : 
     601                 : /*
     602                 :  * gistSplitByKey: main entry point for page-splitting algorithm
     603                 :  *
     604                 :  * r: index relation
     605                 :  * page: page being split
     606                 :  * itup: array of IndexTuples to be processed
     607                 :  * len: number of IndexTuples to be processed (must be at least 2)
     608                 :  * giststate: additional info about index
     609                 :  * v: working state and output area
     610                 :  * attno: column we are working on (zero-based index)
     611                 :  *
     612                 :  * Outside caller must initialize v->spl_lisnull and v->spl_risnull arrays
     613                 :  * to all-true.  On return, spl_left/spl_nleft contain indexes of tuples
     614                 :  * to go left, spl_right/spl_nright contain indexes of tuples to go right,
     615                 :  * spl_lattr/spl_lisnull contain left-side union key values, and
     616                 :  * spl_rattr/spl_risnull contain right-side union key values.  Other fields
     617                 :  * in this struct are workspace for this file.
     618                 :  *
     619                 :  * Outside caller must pass zero for attno.  The function may internally
     620                 :  * recurse to the next column by passing attno+1.
     621                 :  */
     622                 : void
     623 CBC       12903 : gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
     624                 :                GISTSTATE *giststate, GistSplitVector *v, int attno)
     625                 : {
     626                 :     GistEntryVector *entryvec;
     627                 :     OffsetNumber *offNullTuples;
     628           12903 :     int         nOffNullTuples = 0;
     629                 :     int         i;
     630                 : 
     631                 :     /* generate the item array, and identify tuples with null keys */
     632                 :     /* note that entryvec->vector[0] goes unused in this code */
     633           12903 :     entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
     634           12903 :     entryvec->n = len + 1;
     635           12903 :     offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
     636                 : 
     637         1137313 :     for (i = 1; i <= len; i++)
     638                 :     {
     639                 :         Datum       datum;
     640                 :         bool        IsNull;
     641                 : 
     642         1124410 :         datum = index_getattr(itup[i - 1], attno + 1, giststate->leafTupdesc,
     643                 :                               &IsNull);
     644         1124410 :         gistdentryinit(giststate, attno, &(entryvec->vector[i]),
     645                 :                        datum, r, page, i,
     646                 :                        false, IsNull);
     647         1124410 :         if (IsNull)
     648             848 :             offNullTuples[nOffNullTuples++] = i;
     649                 :     }
     650                 : 
     651           12903 :     if (nOffNullTuples == len)
     652                 :     {
     653                 :         /*
     654                 :          * Corner case: All keys in attno column are null, so just transfer
     655                 :          * our attention to the next column.  If there's no next column, just
     656                 :          * split page in half.
     657                 :          */
     658 UBC           0 :         v->spl_risnull[attno] = v->spl_lisnull[attno] = true;
     659                 : 
     660               0 :         if (attno + 1 < giststate->nonLeafTupdesc->natts)
     661               0 :             gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
     662                 :         else
     663               0 :             gistSplitHalf(&v->splitVector, len);
     664                 :     }
     665 CBC       12903 :     else if (nOffNullTuples > 0)
     666                 :     {
     667              88 :         int         j = 0;
     668                 : 
     669                 :         /*
     670                 :          * We don't want to mix NULL and not-NULL keys on one page, so split
     671                 :          * nulls to right page and not-nulls to left.
     672                 :          */
     673              88 :         v->splitVector.spl_right = offNullTuples;
     674              88 :         v->splitVector.spl_nright = nOffNullTuples;
     675              88 :         v->spl_risnull[attno] = true;
     676                 : 
     677              88 :         v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
     678              88 :         v->splitVector.spl_nleft = 0;
     679           10779 :         for (i = 1; i <= len; i++)
     680           10691 :             if (j < v->splitVector.spl_nright && offNullTuples[j] == i)
     681             848 :                 j++;
     682                 :             else
     683            9843 :                 v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
     684                 : 
     685                 :         /* Compute union keys, unless outer recursion level will handle it */
     686              88 :         if (attno == 0 && giststate->nonLeafTupdesc->natts == 1)
     687                 :         {
     688              87 :             v->spl_dontcare = NULL;
     689              87 :             gistunionsubkey(giststate, itup, v);
     690                 :         }
     691                 :     }
     692                 :     else
     693                 :     {
     694                 :         /*
     695                 :          * All keys are not-null, so apply user-defined PickSplit method
     696                 :          */
     697           12815 :         if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate))
     698                 :         {
     699                 :             /*
     700                 :              * Splitting on attno column is not optimal, so consider
     701                 :              * redistributing don't-care tuples according to the next column
     702                 :              */
     703              19 :             Assert(attno + 1 < giststate->nonLeafTupdesc->natts);
     704                 : 
     705              19 :             if (v->spl_dontcare == NULL)
     706                 :             {
     707                 :                 /*
     708                 :                  * This split was actually degenerate, so ignore it altogether
     709                 :                  * and just split according to the next column.
     710                 :                  */
     711              18 :                 gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
     712                 :             }
     713                 :             else
     714                 :             {
     715                 :                 /*
     716                 :                  * Form an array of just the don't-care tuples to pass to a
     717                 :                  * recursive invocation of this function for the next column.
     718                 :                  */
     719               1 :                 IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple));
     720               1 :                 OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
     721               1 :                 int         newlen = 0;
     722                 :                 GIST_SPLITVEC backupSplit;
     723                 : 
     724             187 :                 for (i = 0; i < len; i++)
     725                 :                 {
     726             186 :                     if (v->spl_dontcare[i + 1])
     727                 :                     {
     728             184 :                         newitup[newlen] = itup[i];
     729             184 :                         map[newlen] = i + 1;
     730             184 :                         newlen++;
     731                 :                     }
     732                 :                 }
     733                 : 
     734               1 :                 Assert(newlen > 0);
     735                 : 
     736                 :                 /*
     737                 :                  * Make a backup copy of v->splitVector, since the recursive
     738                 :                  * call will overwrite that with its own result.
     739                 :                  */
     740               1 :                 backupSplit = v->splitVector;
     741               1 :                 backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
     742               1 :                 memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
     743               1 :                 backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
     744               1 :                 memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
     745                 : 
     746                 :                 /* Recursively decide how to split the don't-care tuples */
     747               1 :                 gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1);
     748                 : 
     749                 :                 /* Merge result of subsplit with non-don't-care tuples */
     750              93 :                 for (i = 0; i < v->splitVector.spl_nleft; i++)
     751              92 :                     backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
     752              93 :                 for (i = 0; i < v->splitVector.spl_nright; i++)
     753              92 :                     backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
     754                 : 
     755               1 :                 v->splitVector = backupSplit;
     756                 :             }
     757                 :         }
     758                 :     }
     759                 : 
     760                 :     /*
     761                 :      * If we're handling a multicolumn index, at the end of the recursion
     762                 :      * recompute the left and right union datums for all index columns.  This
     763                 :      * makes sure we hand back correct union datums in all corner cases,
     764                 :      * including when we haven't processed all columns to start with, or when
     765                 :      * a secondary split moved "don't care" tuples from one side to the other
     766                 :      * (we really shouldn't assume that that didn't change the union datums).
     767                 :      *
     768                 :      * Note: when we're in an internal recursion (attno > 0), we do not worry
     769                 :      * about whether the union datums we return with are sensible, since
     770                 :      * calling levels won't care.  Also, in a single-column index, we expect
     771                 :      * that PickSplit (or the special cases above) produced correct union
     772                 :      * datums.
     773                 :      */
     774           12903 :     if (attno == 0 && giststate->nonLeafTupdesc->natts > 1)
     775                 :     {
     776            1283 :         v->spl_dontcare = NULL;
     777            1283 :         gistunionsubkey(giststate, itup, v);
     778                 :     }
     779           12903 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a