LCOV - differential code coverage report
Current view: top level - src/backend/access/nbtree - nbtutils.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 78.7 % 779 613 7 24 135 4 133 4 472 26 127 1 7
Current Date: 2023-04-08 17:13:01 Functions: 93.8 % 32 30 2 13 3 14 2 13 1
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 [..60] days: 100.0 % 3 3 3
Legend: Lines: hit not hit (60,120] days: 100.0 % 1 1 1
(240..) days: 78.6 % 775 609 7 24 135 4 133 472 26 127
Function coverage date bins:
[..60] days: 100.0 % 1 1 1
(240..) days: 63.0 % 46 29 2 13 2 14 2 13

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * nbtutils.c
                                  4                 :  *    Utility code for Postgres btree implementation.
                                  5                 :  *
                                  6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                  7                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                  8                 :  *
                                  9                 :  *
                                 10                 :  * IDENTIFICATION
                                 11                 :  *    src/backend/access/nbtree/nbtutils.c
                                 12                 :  *
                                 13                 :  *-------------------------------------------------------------------------
                                 14                 :  */
                                 15                 : 
                                 16                 : #include "postgres.h"
                                 17                 : 
                                 18                 : #include <time.h>
                                 19                 : 
                                 20                 : #include "access/nbtree.h"
                                 21                 : #include "access/reloptions.h"
                                 22                 : #include "access/relscan.h"
                                 23                 : #include "catalog/catalog.h"
                                 24                 : #include "commands/progress.h"
                                 25                 : #include "lib/qunique.h"
                                 26                 : #include "miscadmin.h"
                                 27                 : #include "utils/array.h"
                                 28                 : #include "utils/datum.h"
                                 29                 : #include "utils/lsyscache.h"
                                 30                 : #include "utils/memutils.h"
                                 31                 : #include "utils/rel.h"
                                 32                 : 
                                 33                 : 
                                 34                 : typedef struct BTSortArrayContext
                                 35                 : {
                                 36                 :     FmgrInfo    flinfo;
                                 37                 :     Oid         collation;
                                 38                 :     bool        reverse;
                                 39                 : } BTSortArrayContext;
                                 40                 : 
                                 41                 : static Datum _bt_find_extreme_element(IndexScanDesc scan, ScanKey skey,
                                 42                 :                                       StrategyNumber strat,
                                 43                 :                                       Datum *elems, int nelems);
                                 44                 : static int  _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
                                 45                 :                                     bool reverse,
                                 46                 :                                     Datum *elems, int nelems);
                                 47                 : static int  _bt_compare_array_elements(const void *a, const void *b, void *arg);
                                 48                 : static bool _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
                                 49                 :                                      ScanKey leftarg, ScanKey rightarg,
                                 50                 :                                      bool *result);
                                 51                 : static bool _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption);
                                 52                 : static void _bt_mark_scankey_required(ScanKey skey);
                                 53                 : static bool _bt_check_rowcompare(ScanKey skey,
                                 54                 :                                  IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
                                 55                 :                                  ScanDirection dir, bool *continuescan);
                                 56                 : static int  _bt_keep_natts(Relation rel, IndexTuple lastleft,
                                 57                 :                            IndexTuple firstright, BTScanInsert itup_key);
                                 58                 : 
                                 59                 : 
                                 60                 : /*
                                 61                 :  * _bt_mkscankey
                                 62                 :  *      Build an insertion scan key that contains comparison data from itup
                                 63                 :  *      as well as comparator routines appropriate to the key datatypes.
                                 64                 :  *
                                 65                 :  *      When itup is a non-pivot tuple, the returned insertion scan key is
                                 66                 :  *      suitable for finding a place for it to go on the leaf level.  Pivot
                                 67                 :  *      tuples can be used to re-find leaf page with matching high key, but
                                 68                 :  *      then caller needs to set scan key's pivotsearch field to true.  This
                                 69                 :  *      allows caller to search for a leaf page with a matching high key,
                                 70                 :  *      which is usually to the left of the first leaf page a non-pivot match
                                 71                 :  *      might appear on.
                                 72                 :  *
                                 73                 :  *      The result is intended for use with _bt_compare() and _bt_truncate().
                                 74                 :  *      Callers that don't need to fill out the insertion scankey arguments
                                 75                 :  *      (e.g. they use an ad-hoc comparison routine, or only need a scankey
                                 76                 :  *      for _bt_truncate()) can pass a NULL index tuple.  The scankey will
                                 77                 :  *      be initialized as if an "all truncated" pivot tuple was passed
                                 78                 :  *      instead.
                                 79                 :  *
                                 80                 :  *      Note that we may occasionally have to share lock the metapage to
                                 81                 :  *      determine whether or not the keys in the index are expected to be
                                 82                 :  *      unique (i.e. if this is a "heapkeyspace" index).  We assume a
                                 83                 :  *      heapkeyspace index when caller passes a NULL tuple, allowing index
                                 84                 :  *      build callers to avoid accessing the non-existent metapage.  We
                                 85                 :  *      also assume that the index is _not_ allequalimage when a NULL tuple
                                 86                 :  *      is passed; CREATE INDEX callers call _bt_allequalimage() to set the
                                 87                 :  *      field themselves.
                                 88                 :  */
                                 89                 : BTScanInsert
    8 andres                     90 GNC     9135483 : _bt_mkscankey(Relation rel, Relation heaprel, IndexTuple itup)
                                 91                 : {
                                 92                 :     BTScanInsert key;
                                 93                 :     ScanKey     skey;
                                 94                 :     TupleDesc   itupdesc;
                                 95                 :     int         indnkeyatts;
                                 96                 :     int16      *indoption;
                                 97                 :     int         tupnatts;
                                 98                 :     int         i;
                                 99                 : 
 8986 bruce                     100 CBC     9135483 :     itupdesc = RelationGetDescr(rel);
 1828 teodor                    101         9135483 :     indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
 5934 tgl                       102         9135483 :     indoption = rel->rd_indoption;
 1481 pg                        103         9135483 :     tupnatts = itup ? BTreeTupleGetNAtts(itup, rel) : 0;
                                104                 : 
                                105         9135483 :     Assert(tupnatts <= IndexRelationGetNumberOfAttributes(rel));
                                106                 : 
                                107                 :     /*
                                108                 :      * We'll execute search using scan key constructed on key columns.
                                109                 :      * Truncated attributes and non-key attributes are omitted from the final
                                110                 :      * scan key.
                                111                 :      */
                                112         9135483 :     key = palloc(offsetof(BTScanInsertData, scankeys) +
                                113         9135483 :                  sizeof(ScanKeyData) * indnkeyatts);
 1138                           114         9135483 :     if (itup)
    8 andres                    115 GNC     8950316 :         _bt_metaversion(rel, heaprel, &key->heapkeyspace, &key->allequalimage);
                                116                 :     else
                                117                 :     {
                                118                 :         /* Utility statement callers can set these fields themselves */
 1138 pg                        119 CBC      185167 :         key->heapkeyspace = true;
                                120          185167 :         key->allequalimage = false;
                                121                 :     }
 1418 tgl                       122         9135483 :     key->anynullkeys = false;    /* initial assumption */
 1481 pg                        123         9135483 :     key->nextkey = false;
                                124         9135483 :     key->pivotsearch = false;
                                125         9135483 :     key->keysz = Min(indnkeyatts, tupnatts);
                                126         9135483 :     key->scantid = key->heapkeyspace && itup ?
                                127        18270966 :         BTreeTupleGetHeapTID(itup) : NULL;
                                128         9135483 :     skey = key->scankeys;
 1828 teodor                    129        27120508 :     for (i = 0; i < indnkeyatts; i++)
                                130                 :     {
                                131                 :         FmgrInfo   *procinfo;
                                132                 :         Datum       arg;
                                133                 :         bool        null;
                                134                 :         int         flags;
                                135                 : 
                                136                 :         /*
                                137                 :          * We can use the cached (default) support procs since no cross-type
                                138                 :          * comparison can be needed.
                                139                 :          */
 7855 tgl                       140        17985025 :         procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
                                141                 : 
                                142                 :         /*
                                143                 :          * Key arguments built from truncated attributes (or when caller
                                144                 :          * provides no tuple) are defensively represented as NULL values. They
                                145                 :          * should never be used.
                                146                 :          */
 1481 pg                        147        17985025 :         if (i < tupnatts)
                                148        17659875 :             arg = index_getattr(itup, i + 1, itupdesc, &null);
                                149                 :         else
                                150                 :         {
                                151          325150 :             arg = (Datum) 0;
                                152          325150 :             null = true;
                                153                 :         }
                                154        17985025 :         flags = (null ? SK_ISNULL : 0) | (indoption[i] << SK_BT_INDOPTION_SHIFT);
 7855 tgl                       155        17985025 :         ScanKeyEntryInitializeWithInfo(&skey[i],
                                156                 :                                        flags,
                                157        17985025 :                                        (AttrNumber) (i + 1),
                                158                 :                                        InvalidStrategy,
                                159                 :                                        InvalidOid,
 4380                           160        17985025 :                                        rel->rd_indcollation[i],
                                161                 :                                        procinfo,
                                162                 :                                        arg);
                                163                 :         /* Record if any key attribute is NULL (or truncated) */
 1447 pg                        164        17985025 :         if (null)
                                165          325247 :             key->anynullkeys = true;
                                166                 :     }
                                167                 : 
                                168                 :     /*
                                169                 :      * In NULLS NOT DISTINCT mode, we pretend that there are no null keys, so
                                170                 :      * that full uniqueness check is done.
                                171                 :      */
  430 peter                     172         9135483 :     if (rel->rd_index->indnullsnotdistinct)
                                173              84 :         key->anynullkeys = false;
                                174                 : 
 1481 pg                        175         9135483 :     return key;
                                176                 : }
                                177                 : 
                                178                 : /*
                                179                 :  * free a retracement stack made by _bt_search.
                                180                 :  */
                                181                 : void
 9770 scrappy                   182        14919966 : _bt_freestack(BTStack stack)
                                183                 : {
                                184                 :     BTStack     ostack;
                                185                 : 
 7032 neilc                     186        27190253 :     while (stack != NULL)
                                187                 :     {
 9345 bruce                     188        12270287 :         ostack = stack;
                                189        12270287 :         stack = stack->bts_parent;
                                190        12270287 :         pfree(ostack);
                                191                 :     }
 9770 scrappy                   192        14919966 : }
                                193                 : 
                                194                 : 
                                195                 : /*
                                196                 :  *  _bt_preprocess_array_keys() -- Preprocess SK_SEARCHARRAY scan keys
                                197                 :  *
                                198                 :  * If there are any SK_SEARCHARRAY scan keys, deconstruct the array(s) and
                                199                 :  * set up BTArrayKeyInfo info for each one that is an equality-type key.
                                200                 :  * Prepare modified scan keys in so->arrayKeyData, which will hold the current
                                201                 :  * array elements during each primitive indexscan operation.  For inequality
                                202                 :  * array keys, it's sufficient to find the extreme element value and replace
                                203                 :  * the whole array with that scalar value.
                                204                 :  *
                                205                 :  * Note: the reason we need so->arrayKeyData, rather than just scribbling
                                206                 :  * on scan->keyData, is that callers are permitted to call btrescan without
                                207                 :  * supplying a new set of scankey data.
                                208                 :  */
                                209                 : void
 4193 tgl                       210         9088299 : _bt_preprocess_array_keys(IndexScanDesc scan)
                                211                 : {
                                212         9088299 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
                                213         9088299 :     int         numberOfKeys = scan->numberOfKeys;
                                214         9088299 :     int16      *indoption = scan->indexRelation->rd_indoption;
                                215                 :     int         numArrayKeys;
                                216                 :     ScanKey     cur;
                                217                 :     int         i;
                                218                 :     MemoryContext oldContext;
                                219                 : 
                                220                 :     /* Quick check to see if there are any array keys */
                                221         9088299 :     numArrayKeys = 0;
                                222        24998875 :     for (i = 0; i < numberOfKeys; i++)
                                223                 :     {
                                224        15910576 :         cur = &scan->keyData[i];
                                225        15910576 :         if (cur->sk_flags & SK_SEARCHARRAY)
                                226                 :         {
                                227             353 :             numArrayKeys++;
                                228             353 :             Assert(!(cur->sk_flags & (SK_ROW_HEADER | SK_SEARCHNULL | SK_SEARCHNOTNULL)));
                                229                 :             /* If any arrays are null as a whole, we can quit right now. */
                                230             353 :             if (cur->sk_flags & SK_ISNULL)
                                231                 :             {
 4193 tgl                       232 UBC           0 :                 so->numArrayKeys = -1;
                                233               0 :                 so->arrayKeyData = NULL;
                                234               0 :                 return;
                                235                 :             }
                                236                 :         }
                                237                 :     }
                                238                 : 
                                239                 :     /* Quit if nothing to do. */
 4193 tgl                       240 CBC     9088299 :     if (numArrayKeys == 0)
                                241                 :     {
                                242         9087985 :         so->numArrayKeys = 0;
                                243         9087985 :         so->arrayKeyData = NULL;
                                244         9087985 :         return;
                                245                 :     }
                                246                 : 
                                247                 :     /*
                                248                 :      * Make a scan-lifespan context to hold array-associated data, or reset it
                                249                 :      * if we already have one from a previous rescan cycle.
                                250                 :      */
                                251             314 :     if (so->arrayContext == NULL)
                                252             314 :         so->arrayContext = AllocSetContextCreate(CurrentMemoryContext,
                                253                 :                                                  "BTree array context",
                                254                 :                                                  ALLOCSET_SMALL_SIZES);
                                255                 :     else
 4193 tgl                       256 UBC           0 :         MemoryContextReset(so->arrayContext);
                                257                 : 
 4193 tgl                       258 CBC         314 :     oldContext = MemoryContextSwitchTo(so->arrayContext);
                                259                 : 
                                260                 :     /* Create modifiable copy of scan->keyData in the workspace context */
                                261             314 :     so->arrayKeyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
                                262             314 :     memcpy(so->arrayKeyData,
                                263             314 :            scan->keyData,
                                264             314 :            scan->numberOfKeys * sizeof(ScanKeyData));
                                265                 : 
                                266                 :     /* Allocate space for per-array data in the workspace context */
                                267             314 :     so->arrayKeys = (BTArrayKeyInfo *) palloc0(numArrayKeys * sizeof(BTArrayKeyInfo));
                                268                 : 
                                269                 :     /* Now process each array key */
                                270             314 :     numArrayKeys = 0;
                                271             763 :     for (i = 0; i < numberOfKeys; i++)
                                272                 :     {
                                273                 :         ArrayType  *arrayval;
                                274                 :         int16       elmlen;
                                275                 :         bool        elmbyval;
                                276                 :         char        elmalign;
                                277                 :         int         num_elems;
                                278                 :         Datum      *elem_values;
                                279                 :         bool       *elem_nulls;
                                280                 :         int         num_nonnulls;
                                281                 :         int         j;
                                282                 : 
                                283             449 :         cur = &so->arrayKeyData[i];
                                284             449 :         if (!(cur->sk_flags & SK_SEARCHARRAY))
                                285              99 :             continue;
                                286                 : 
                                287                 :         /*
                                288                 :          * First, deconstruct the array into elements.  Anything allocated
                                289                 :          * here (including a possibly detoasted array value) is in the
                                290                 :          * workspace context.
                                291                 :          */
                                292             353 :         arrayval = DatumGetArrayTypeP(cur->sk_argument);
                                293                 :         /* We could cache this data, but not clear it's worth it */
                                294             353 :         get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
                                295                 :                              &elmlen, &elmbyval, &elmalign);
                                296             353 :         deconstruct_array(arrayval,
                                297                 :                           ARR_ELEMTYPE(arrayval),
                                298                 :                           elmlen, elmbyval, elmalign,
                                299                 :                           &elem_values, &elem_nulls, &num_elems);
                                300                 : 
                                301                 :         /*
                                302                 :          * Compress out any null elements.  We can ignore them since we assume
                                303                 :          * all btree operators are strict.
                                304                 :          */
                                305             353 :         num_nonnulls = 0;
                                306            1926 :         for (j = 0; j < num_elems; j++)
                                307                 :         {
                                308            1573 :             if (!elem_nulls[j])
                                309            1573 :                 elem_values[num_nonnulls++] = elem_values[j];
                                310                 :         }
                                311                 : 
                                312                 :         /* We could pfree(elem_nulls) now, but not worth the cycles */
                                313                 : 
                                314                 :         /* If there's no non-nulls, the scan qual is unsatisfiable */
                                315             353 :         if (num_nonnulls == 0)
                                316                 :         {
 4193 tgl                       317 UBC           0 :             numArrayKeys = -1;
                                318               0 :             break;
                                319                 :         }
                                320                 : 
                                321                 :         /*
                                322                 :          * If the comparison operator is not equality, then the array qual
                                323                 :          * degenerates to a simple comparison against the smallest or largest
                                324                 :          * non-null array element, as appropriate.
                                325                 :          */
 4193 tgl                       326 CBC         353 :         switch (cur->sk_strategy)
                                327                 :         {
 4193 tgl                       328 UBC           0 :             case BTLessStrategyNumber:
                                329                 :             case BTLessEqualStrategyNumber:
                                330               0 :                 cur->sk_argument =
                                331               0 :                     _bt_find_extreme_element(scan, cur,
                                332                 :                                              BTGreaterStrategyNumber,
                                333                 :                                              elem_values, num_nonnulls);
                                334               0 :                 continue;
 4193 tgl                       335 CBC         350 :             case BTEqualStrategyNumber:
                                336                 :                 /* proceed with rest of loop */
                                337             350 :                 break;
                                338               3 :             case BTGreaterEqualStrategyNumber:
                                339                 :             case BTGreaterStrategyNumber:
                                340               3 :                 cur->sk_argument =
                                341               3 :                     _bt_find_extreme_element(scan, cur,
                                342                 :                                              BTLessStrategyNumber,
                                343                 :                                              elem_values, num_nonnulls);
                                344               3 :                 continue;
 4193 tgl                       345 UBC           0 :             default:
                                346               0 :                 elog(ERROR, "unrecognized StrategyNumber: %d",
                                347                 :                      (int) cur->sk_strategy);
                                348                 :                 break;
                                349                 :         }
                                350                 : 
                                351                 :         /*
                                352                 :          * Sort the non-null elements and eliminate any duplicates.  We must
                                353                 :          * sort in the same ordering used by the index column, so that the
                                354                 :          * successive primitive indexscans produce data in index order.
                                355                 :          */
 4193 tgl                       356 CBC         700 :         num_elems = _bt_sort_array_elements(scan, cur,
 2118                           357             350 :                                             (indoption[cur->sk_attno - 1] & INDOPTION_DESC) != 0,
                                358                 :                                             elem_values, num_nonnulls);
                                359                 : 
                                360                 :         /*
                                361                 :          * And set up the BTArrayKeyInfo data.
                                362                 :          */
 4193                           363             350 :         so->arrayKeys[numArrayKeys].scan_key = i;
                                364             350 :         so->arrayKeys[numArrayKeys].num_elems = num_elems;
                                365             350 :         so->arrayKeys[numArrayKeys].elem_values = elem_values;
                                366             350 :         numArrayKeys++;
                                367                 :     }
                                368                 : 
                                369             314 :     so->numArrayKeys = numArrayKeys;
                                370                 : 
                                371             314 :     MemoryContextSwitchTo(oldContext);
                                372                 : }
                                373                 : 
                                374                 : /*
                                375                 :  * _bt_find_extreme_element() -- get least or greatest array element
                                376                 :  *
                                377                 :  * scan and skey identify the index column, whose opfamily determines the
                                378                 :  * comparison semantics.  strat should be BTLessStrategyNumber to get the
                                379                 :  * least element, or BTGreaterStrategyNumber to get the greatest.
                                380                 :  */
                                381                 : static Datum
                                382               3 : _bt_find_extreme_element(IndexScanDesc scan, ScanKey skey,
                                383                 :                          StrategyNumber strat,
                                384                 :                          Datum *elems, int nelems)
                                385                 : {
                                386               3 :     Relation    rel = scan->indexRelation;
                                387                 :     Oid         elemtype,
                                388                 :                 cmp_op;
                                389                 :     RegProcedure cmp_proc;
                                390                 :     FmgrInfo    flinfo;
                                391                 :     Datum       result;
                                392                 :     int         i;
                                393                 : 
                                394                 :     /*
                                395                 :      * Determine the nominal datatype of the array elements.  We have to
                                396                 :      * support the convention that sk_subtype == InvalidOid means the opclass
                                397                 :      * input type; this is a hack to simplify life for ScanKeyInit().
                                398                 :      */
                                399               3 :     elemtype = skey->sk_subtype;
                                400               3 :     if (elemtype == InvalidOid)
 4193 tgl                       401 UBC           0 :         elemtype = rel->rd_opcintype[skey->sk_attno - 1];
                                402                 : 
                                403                 :     /*
                                404                 :      * Look up the appropriate comparison operator in the opfamily.
                                405                 :      *
                                406                 :      * Note: it's possible that this would fail, if the opfamily is
                                407                 :      * incomplete, but it seems quite unlikely that an opfamily would omit
                                408                 :      * non-cross-type comparison operators for any datatype that it supports
                                409                 :      * at all.
                                410                 :      */
 4193 tgl                       411 CBC           3 :     cmp_op = get_opfamily_member(rel->rd_opfamily[skey->sk_attno - 1],
                                412                 :                                  elemtype,
                                413                 :                                  elemtype,
                                414                 :                                  strat);
                                415               3 :     if (!OidIsValid(cmp_op))
 4193 tgl                       416 UBC           0 :         elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
                                417                 :              strat, elemtype, elemtype,
                                418                 :              rel->rd_opfamily[skey->sk_attno - 1]);
 4193 tgl                       419 CBC           3 :     cmp_proc = get_opcode(cmp_op);
                                420               3 :     if (!RegProcedureIsValid(cmp_proc))
 4193 tgl                       421 UBC           0 :         elog(ERROR, "missing oprcode for operator %u", cmp_op);
                                422                 : 
 4193 tgl                       423 CBC           3 :     fmgr_info(cmp_proc, &flinfo);
                                424                 : 
                                425               3 :     Assert(nelems > 0);
                                426               3 :     result = elems[0];
                                427               6 :     for (i = 1; i < nelems; i++)
                                428                 :     {
                                429               3 :         if (DatumGetBool(FunctionCall2Coll(&flinfo,
                                430                 :                                            skey->sk_collation,
                                431               3 :                                            elems[i],
                                432                 :                                            result)))
 4193 tgl                       433 UBC           0 :             result = elems[i];
                                434                 :     }
                                435                 : 
 4193 tgl                       436 CBC           3 :     return result;
                                437                 : }
                                438                 : 
                                439                 : /*
                                440                 :  * _bt_sort_array_elements() -- sort and de-dup array elements
                                441                 :  *
                                442                 :  * The array elements are sorted in-place, and the new number of elements
                                443                 :  * after duplicate removal is returned.
                                444                 :  *
                                445                 :  * scan and skey identify the index column, whose opfamily determines the
                                446                 :  * comparison semantics.  If reverse is true, we sort in descending order.
                                447                 :  */
                                448                 : static int
                                449             350 : _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
                                450                 :                         bool reverse,
                                451                 :                         Datum *elems, int nelems)
                                452                 : {
                                453             350 :     Relation    rel = scan->indexRelation;
                                454                 :     Oid         elemtype;
                                455                 :     RegProcedure cmp_proc;
                                456                 :     BTSortArrayContext cxt;
                                457                 : 
                                458             350 :     if (nelems <= 1)
                                459               6 :         return nelems;          /* no work to do */
                                460                 : 
                                461                 :     /*
                                462                 :      * Determine the nominal datatype of the array elements.  We have to
                                463                 :      * support the convention that sk_subtype == InvalidOid means the opclass
                                464                 :      * input type; this is a hack to simplify life for ScanKeyInit().
                                465                 :      */
                                466             344 :     elemtype = skey->sk_subtype;
                                467             344 :     if (elemtype == InvalidOid)
 4193 tgl                       468 UBC           0 :         elemtype = rel->rd_opcintype[skey->sk_attno - 1];
                                469                 : 
                                470                 :     /*
                                471                 :      * Look up the appropriate comparison function in the opfamily.
                                472                 :      *
                                473                 :      * Note: it's possible that this would fail, if the opfamily is
                                474                 :      * incomplete, but it seems quite unlikely that an opfamily would omit
                                475                 :      * non-cross-type support functions for any datatype that it supports at
                                476                 :      * all.
                                477                 :      */
 4193 tgl                       478 CBC         344 :     cmp_proc = get_opfamily_proc(rel->rd_opfamily[skey->sk_attno - 1],
                                479                 :                                  elemtype,
                                480                 :                                  elemtype,
                                481                 :                                  BTORDER_PROC);
                                482             344 :     if (!RegProcedureIsValid(cmp_proc))
 4193 tgl                       483 UBC           0 :         elog(ERROR, "missing support function %d(%u,%u) in opfamily %u",
                                484                 :              BTORDER_PROC, elemtype, elemtype,
                                485                 :              rel->rd_opfamily[skey->sk_attno - 1]);
                                486                 : 
                                487                 :     /* Sort the array elements */
 4193 tgl                       488 CBC         344 :     fmgr_info(cmp_proc, &cxt.flinfo);
                                489             344 :     cxt.collation = skey->sk_collation;
                                490             344 :     cxt.reverse = reverse;
   61 peter                     491 GNC         344 :     qsort_arg(elems, nelems, sizeof(Datum),
                                492                 :               _bt_compare_array_elements, &cxt);
                                493                 : 
                                494                 :     /* Now scan the sorted elements and remove duplicates */
 1249 tmunro                    495 CBC         344 :     return qunique_arg(elems, nelems, sizeof(Datum),
                                496                 :                        _bt_compare_array_elements, &cxt);
                                497                 : }
                                498                 : 
                                499                 : /*
                                500                 :  * qsort_arg comparator for sorting array elements
                                501                 :  */
                                502                 : static int
 4193 tgl                       503            3475 : _bt_compare_array_elements(const void *a, const void *b, void *arg)
                                504                 : {
                                505            3475 :     Datum       da = *((const Datum *) a);
                                506            3475 :     Datum       db = *((const Datum *) b);
                                507            3475 :     BTSortArrayContext *cxt = (BTSortArrayContext *) arg;
                                508                 :     int32       compare;
                                509                 : 
                                510            3475 :     compare = DatumGetInt32(FunctionCall2Coll(&cxt->flinfo,
                                511                 :                                               cxt->collation,
                                512                 :                                               da, db));
                                513            3475 :     if (cxt->reverse)
 1647 tgl                       514 UBC           0 :         INVERT_COMPARE_RESULT(compare);
 4193 tgl                       515 CBC        3475 :     return compare;
                                516                 : }
                                517                 : 
                                518                 : /*
                                519                 :  * _bt_start_array_keys() -- Initialize array keys at start of a scan
                                520                 :  *
                                521                 :  * Set up the cur_elem counters and fill in the first sk_argument value for
                                522                 :  * each array scankey.  We can't do this until we know the scan direction.
                                523                 :  */
                                524                 : void
                                525             310 : _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
                                526                 : {
                                527             310 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
                                528                 :     int         i;
                                529                 : 
                                530             659 :     for (i = 0; i < so->numArrayKeys; i++)
                                531                 :     {
                                532             349 :         BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
                                533             349 :         ScanKey     skey = &so->arrayKeyData[curArrayKey->scan_key];
                                534                 : 
                                535             349 :         Assert(curArrayKey->num_elems > 0);
                                536             349 :         if (ScanDirectionIsBackward(dir))
 4193 tgl                       537 UBC           0 :             curArrayKey->cur_elem = curArrayKey->num_elems - 1;
                                538                 :         else
 4193 tgl                       539 CBC         349 :             curArrayKey->cur_elem = 0;
                                540             349 :         skey->sk_argument = curArrayKey->elem_values[curArrayKey->cur_elem];
                                541                 :     }
                                542             310 : }
                                543                 : 
                                544                 : /*
                                545                 :  * _bt_advance_array_keys() -- Advance to next set of array elements
                                546                 :  *
                                547                 :  * Returns true if there is another set of values to consider, false if not.
                                548                 :  * On true result, the scankeys are initialized with the next set of values.
                                549                 :  */
                                550                 : bool
                                551            1895 : _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
                                552                 : {
                                553            1895 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
                                554            1895 :     bool        found = false;
                                555                 :     int         i;
                                556                 : 
                                557                 :     /*
                                558                 :      * We must advance the last array key most quickly, since it will
                                559                 :      * correspond to the lowest-order index column among the available
                                560                 :      * qualifications. This is necessary to ensure correct ordering of output
                                561                 :      * when there are multiple array keys.
                                562                 :      */
                                563            2550 :     for (i = so->numArrayKeys - 1; i >= 0; i--)
                                564                 :     {
                                565            2243 :         BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
                                566            2243 :         ScanKey     skey = &so->arrayKeyData[curArrayKey->scan_key];
                                567            2243 :         int         cur_elem = curArrayKey->cur_elem;
                                568            2243 :         int         num_elems = curArrayKey->num_elems;
                                569                 : 
                                570            2243 :         if (ScanDirectionIsBackward(dir))
                                571                 :         {
 4193 tgl                       572 UBC           0 :             if (--cur_elem < 0)
                                573                 :             {
                                574               0 :                 cur_elem = num_elems - 1;
                                575               0 :                 found = false;  /* need to advance next array key */
                                576                 :             }
                                577                 :             else
                                578               0 :                 found = true;
                                579                 :         }
                                580                 :         else
                                581                 :         {
 4193 tgl                       582 CBC        2243 :             if (++cur_elem >= num_elems)
                                583                 :             {
                                584             655 :                 cur_elem = 0;
                                585             655 :                 found = false;  /* need to advance next array key */
                                586                 :             }
                                587                 :             else
                                588            1588 :                 found = true;
                                589                 :         }
                                590                 : 
                                591            2243 :         curArrayKey->cur_elem = cur_elem;
                                592            2243 :         skey->sk_argument = curArrayKey->elem_values[cur_elem];
                                593            2243 :         if (found)
                                594            1588 :             break;
                                595                 :     }
                                596                 : 
                                597                 :     /* advance parallel scan */
 2244 rhaas                     598            1895 :     if (scan->parallel_scan != NULL)
 2244 rhaas                     599 UBC           0 :         _bt_parallel_advance_array_keys(scan);
                                600                 : 
 4193 tgl                       601 CBC        1895 :     return found;
                                602                 : }
                                603                 : 
                                604                 : /*
                                605                 :  * _bt_mark_array_keys() -- Handle array keys during btmarkpos
                                606                 :  *
                                607                 :  * Save the current state of the array keys as the "mark" position.
                                608                 :  */
                                609                 : void
 3846                           610               3 : _bt_mark_array_keys(IndexScanDesc scan)
                                611                 : {
                                612               3 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
                                613                 :     int         i;
                                614                 : 
                                615               6 :     for (i = 0; i < so->numArrayKeys; i++)
                                616                 :     {
                                617               3 :         BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
                                618                 : 
                                619               3 :         curArrayKey->mark_elem = curArrayKey->cur_elem;
                                620                 :     }
                                621               3 : }
                                622                 : 
                                623                 : /*
                                624                 :  * _bt_restore_array_keys() -- Handle array keys during btrestrpos
                                625                 :  *
                                626                 :  * Restore the array keys to where they were when the mark was set.
                                627                 :  */
                                628                 : void
                                629               3 : _bt_restore_array_keys(IndexScanDesc scan)
                                630                 : {
                                631               3 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
                                632               3 :     bool        changed = false;
                                633                 :     int         i;
                                634                 : 
                                635                 :     /* Restore each array key to its position when the mark was set */
                                636               6 :     for (i = 0; i < so->numArrayKeys; i++)
                                637                 :     {
                                638               3 :         BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
                                639               3 :         ScanKey     skey = &so->arrayKeyData[curArrayKey->scan_key];
                                640               3 :         int         mark_elem = curArrayKey->mark_elem;
                                641                 : 
                                642               3 :         if (curArrayKey->cur_elem != mark_elem)
                                643                 :         {
 3846 tgl                       644 UBC           0 :             curArrayKey->cur_elem = mark_elem;
                                645               0 :             skey->sk_argument = curArrayKey->elem_values[mark_elem];
                                646               0 :             changed = true;
                                647                 :         }
                                648                 :     }
                                649                 : 
                                650                 :     /*
                                651                 :      * If we changed any keys, we must redo _bt_preprocess_keys.  That might
                                652                 :      * sound like overkill, but in cases with multiple keys per index column
                                653                 :      * it seems necessary to do the full set of pushups.
                                654                 :      */
 3846 tgl                       655 CBC           3 :     if (changed)
                                656                 :     {
 3846 tgl                       657 UBC           0 :         _bt_preprocess_keys(scan);
                                658                 :         /* The mark should have been set on a consistent set of keys... */
                                659               0 :         Assert(so->qual_ok);
                                660                 :     }
 3846 tgl                       661 CBC           3 : }
                                662                 : 
                                663                 : 
                                664                 : /*
                                665                 :  *  _bt_preprocess_keys() -- Preprocess scan keys
                                666                 :  *
                                667                 :  * The given search-type keys (in scan->keyData[] or so->arrayKeyData[])
                                668                 :  * are copied to so->keyData[] with possible transformation.
                                669                 :  * scan->numberOfKeys is the number of input keys, so->numberOfKeys gets
                                670                 :  * the number of output keys (possibly less, never greater).
                                671                 :  *
                                672                 :  * The output keys are marked with additional sk_flags bits beyond the
                                673                 :  * system-standard bits supplied by the caller.  The DESC and NULLS_FIRST
                                674                 :  * indoption bits for the relevant index attribute are copied into the flags.
                                675                 :  * Also, for a DESC column, we commute (flip) all the sk_strategy numbers
                                676                 :  * so that the index sorts in the desired direction.
                                677                 :  *
                                678                 :  * One key purpose of this routine is to discover which scan keys must be
                                679                 :  * satisfied to continue the scan.  It also attempts to eliminate redundant
                                680                 :  * keys and detect contradictory keys.  (If the index opfamily provides
                                681                 :  * incomplete sets of cross-type operators, we may fail to detect redundant
                                682                 :  * or contradictory keys, but we can survive that.)
                                683                 :  *
                                684                 :  * The output keys must be sorted by index attribute.  Presently we expect
                                685                 :  * (but verify) that the input keys are already so sorted --- this is done
                                686                 :  * by match_clauses_to_index() in indxpath.c.  Some reordering of the keys
                                687                 :  * within each attribute may be done as a byproduct of the processing here,
                                688                 :  * but no other code depends on that.
                                689                 :  *
                                690                 :  * The output keys are marked with flags SK_BT_REQFWD and/or SK_BT_REQBKWD
                                691                 :  * if they must be satisfied in order to continue the scan forward or backward
                                692                 :  * respectively.  _bt_checkkeys uses these flags.  For example, if the quals
                                693                 :  * are "x = 1 AND y < 4 AND z < 5", then _bt_checkkeys will reject a tuple
                                694                 :  * (1,2,7), but we must continue the scan in case there are tuples (1,3,z).
                                695                 :  * But once we reach tuples like (1,4,z) we can stop scanning because no
                                696                 :  * later tuples could match.  This is reflected by marking the x and y keys,
                                697                 :  * but not the z key, with SK_BT_REQFWD.  In general, the keys for leading
                                698                 :  * attributes with "=" keys are marked both SK_BT_REQFWD and SK_BT_REQBKWD.
                                699                 :  * For the first attribute without an "=" key, any "<" and "<=" keys are
                                700                 :  * marked SK_BT_REQFWD while any ">" and ">=" keys are marked SK_BT_REQBKWD.
                                701                 :  * This can be seen to be correct by considering the above example.  Note
                                702                 :  * in particular that if there are no keys for a given attribute, the keys for
                                703                 :  * subsequent attributes can never be required; for instance "WHERE y = 4"
                                704                 :  * requires a full-index scan.
                                705                 :  *
                                706                 :  * If possible, redundant keys are eliminated: we keep only the tightest
                                707                 :  * >/>= bound and the tightest </<= bound, and if there's an = key then
                                708                 :  * that's the only one returned.  (So, we return either a single = key,
                                709                 :  * or one or two boundary-condition keys for each attr.)  However, if we
                                710                 :  * cannot compare two keys for lack of a suitable cross-type operator,
                                711                 :  * we cannot eliminate either.  If there are two such keys of the same
                                712                 :  * operator strategy, the second one is just pushed into the output array
                                713                 :  * without further processing here.  We may also emit both >/>= or both
                                714                 :  * </<= keys if we can't compare them.  The logic about required keys still
                                715                 :  * works if we don't eliminate redundant keys.
                                716                 :  *
                                717                 :  * Note that one reason we need direction-sensitive required-key flags is
                                718                 :  * precisely that we may not be able to eliminate redundant keys.  Suppose
                                719                 :  * we have "x > 4::int AND x > 10::bigint", and we are unable to determine
                                720                 :  * which key is more restrictive for lack of a suitable cross-type operator.
                                721                 :  * _bt_first will arbitrarily pick one of the keys to do the initial
                                722                 :  * positioning with.  If it picks x > 4, then the x > 10 condition will fail
                                723                 :  * until we reach index entries > 10; but we can't stop the scan just because
                                724                 :  * x > 10 is failing.  On the other hand, if we are scanning backwards, then
                                725                 :  * failure of either key is indeed enough to stop the scan.  (In general, when
                                726                 :  * inequality keys are present, the initial-positioning code only promises to
                                727                 :  * position before the first possible match, not exactly at the first match,
                                728                 :  * for a forward scan; or after the last match for a backward scan.)
                                729                 :  *
                                730                 :  * As a byproduct of this work, we can detect contradictory quals such
                                731                 :  * as "x = 1 AND x > 2".  If we see that, we return so->qual_ok = false,
                                732                 :  * indicating the scan need not be run at all since no tuples can match.
                                733                 :  * (In this case we do not bother completing the output key array!)
                                734                 :  * Again, missing cross-type operators might cause us to fail to prove the
                                735                 :  * quals contradictory when they really are, but the scan will work correctly.
                                736                 :  *
                                737                 :  * Row comparison keys are currently also treated without any smarts:
                                738                 :  * we just transfer them into the preprocessed array without any
                                739                 :  * editorialization.  We can treat them the same as an ordinary inequality
                                740                 :  * comparison on the row's first index column, for the purposes of the logic
                                741                 :  * about required keys.
                                742                 :  *
                                743                 :  * Note: the reason we have to copy the preprocessed scan keys into private
                                744                 :  * storage is that we are modifying the array based on comparisons of the
                                745                 :  * key argument values, which could change on a rescan or after moving to
                                746                 :  * new elements of array keys.  Therefore we can't overwrite the source data.
                                747                 :  */
                                748                 : void
 7088                           749         9088140 : _bt_preprocess_keys(IndexScanDesc scan)
                                750                 : {
 7625                           751         9088140 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
 7088                           752         9088140 :     int         numberOfKeys = scan->numberOfKeys;
 5934                           753         9088140 :     int16      *indoption = scan->indexRelation->rd_indoption;
                                754                 :     int         new_numberOfKeys;
                                755                 :     int         numberOfEqualCols;
                                756                 :     ScanKey     inkeys;
                                757                 :     ScanKey     outkeys;
                                758                 :     ScanKey     cur;
                                759                 :     ScanKey     xform[BTMaxStrategyNumber];
                                760                 :     bool        test_result;
                                761                 :     int         i,
                                762                 :                 j;
                                763                 :     AttrNumber  attno;
                                764                 : 
                                765                 :     /* initialize result variables */
 8293                           766         9088140 :     so->qual_ok = true;
 7088                           767         9088140 :     so->numberOfKeys = 0;
                                768                 : 
 9345 bruce                     769         9088140 :     if (numberOfKeys < 1)
 8293 tgl                       770         4167301 :         return;                 /* done if qual-less scan */
                                771                 : 
                                772                 :     /*
                                773                 :      * Read so->arrayKeyData if array keys are present, else scan->keyData
                                774                 :      */
 4193                           775         9083232 :     if (so->arrayKeyData != NULL)
                                776            1901 :         inkeys = so->arrayKeyData;
                                777                 :     else
                                778         9081331 :         inkeys = scan->keyData;
                                779                 : 
 7088                           780         9083232 :     outkeys = so->keyData;
                                781         9083232 :     cur = &inkeys[0];
                                782                 :     /* we check that input keys are correctly ordered */
 6509                           783         9083232 :     if (cur->sk_attno < 1)
 6509 tgl                       784 UBC           0 :         elog(ERROR, "btree index keys must be ordered by attribute");
                                785                 : 
                                786                 :     /* We can short-circuit most of the work if there's just one key */
 9345 bruce                     787 CBC     9083232 :     if (numberOfKeys == 1)
                                788                 :     {
                                789                 :         /* Apply indoption to scankey (might change sk_strategy!) */
 4844 tgl                       790         4162381 :         if (!_bt_fix_scankey_strategy(cur, indoption))
                                791            1274 :             so->qual_ok = false;
 5934                           792         4162381 :         memcpy(outkeys, cur, sizeof(ScanKeyData));
 7088                           793         4162381 :         so->numberOfKeys = 1;
                                794                 :         /* We can mark the qual as required if it's for first index col */
 6283                           795         4162381 :         if (cur->sk_attno == 1)
                                796         4161125 :             _bt_mark_scankey_required(outkeys);
 9345 bruce                     797         4162381 :         return;
                                798                 :     }
                                799                 : 
                                800                 :     /*
                                801                 :      * Otherwise, do the full set of pushups.
                                802                 :      */
 8293 tgl                       803         4920851 :     new_numberOfKeys = 0;
 6689                           804         4920851 :     numberOfEqualCols = 0;
                                805                 : 
                                806                 :     /*
                                807                 :      * Initialize for processing of keys for attr 1.
                                808                 :      *
                                809                 :      * xform[i] points to the currently best scan key of strategy type i+1; it
                                810                 :      * is NULL if we haven't yet found such a key for this attr.
                                811                 :      */
 8293                           812         4920851 :     attno = 1;
 7088                           813         4920851 :     memset(xform, 0, sizeof(xform));
                                814                 : 
                                815                 :     /*
                                816                 :      * Loop iterates from 0 to numberOfKeys inclusive; we use the last pass to
                                817                 :      * handle after-last-key processing.  Actual exit from the loop is at the
                                818                 :      * "break" statement below.
                                819                 :      */
 8053 bruce                     820        16670534 :     for (i = 0;; cur++, i++)
                                821                 :     {
 9345                           822        16670534 :         if (i < numberOfKeys)
                                823                 :         {
                                824                 :             /* Apply indoption to scankey (might change sk_strategy!) */
 4844 tgl                       825        11749683 :             if (!_bt_fix_scankey_strategy(cur, indoption))
                                826                 :             {
                                827                 :                 /* NULL can't be matched, so give up */
 4844 tgl                       828 UBC           0 :                 so->qual_ok = false;
                                829               0 :                 return;
                                830                 :             }
                                831                 :         }
                                832                 : 
                                833                 :         /*
                                834                 :          * If we are at the end of the keys for a particular attr, finish up
                                835                 :          * processing and emit the cleaned-up keys.
                                836                 :          */
 9345 bruce                     837 CBC    16670534 :         if (i == numberOfKeys || cur->sk_attno != attno)
                                838                 :         {
 6689 tgl                       839        11748683 :             int         priorNumberOfEqualCols = numberOfEqualCols;
                                840                 : 
                                841                 :             /* check input keys are correctly ordered */
 6509                           842        11748683 :             if (i < numberOfKeys && cur->sk_attno < attno)
 6509 tgl                       843 UBC           0 :                 elog(ERROR, "btree index keys must be ordered by attribute");
                                844                 : 
                                845                 :             /*
                                846                 :              * If = has been specified, all other keys can be eliminated as
                                847                 :              * redundant.  If we have a case like key = 1 AND key > 2, we can
                                848                 :              * set qual_ok to false and abandon further processing.
                                849                 :              *
                                850                 :              * We also have to deal with the case of "key IS NULL", which is
                                851                 :              * unsatisfiable in combination with any other index condition. By
                                852                 :              * the time we get here, that's been classified as an equality
                                853                 :              * check, and we've rejected any combination of it with a regular
                                854                 :              * equality condition; but not with other types of conditions.
                                855                 :              */
 7088 tgl                       856 CBC    11748683 :             if (xform[BTEqualStrategyNumber - 1])
                                857                 :             {
                                858        11061315 :                 ScanKey     eq = xform[BTEqualStrategyNumber - 1];
                                859                 : 
 9345 bruce                     860        66367830 :                 for (j = BTMaxStrategyNumber; --j >= 0;)
                                861                 :                 {
 7088 tgl                       862        55306527 :                     ScanKey     chk = xform[j];
                                863                 : 
                                864        55306527 :                     if (!chk || j == (BTEqualStrategyNumber - 1))
 9345 bruce                     865        55306450 :                         continue;
                                866                 : 
 4302 tgl                       867              77 :                     if (eq->sk_flags & SK_SEARCHNULL)
                                868                 :                     {
                                869                 :                         /* IS NULL is contradictory to anything else */
                                870              12 :                         so->qual_ok = false;
                                871              12 :                         return;
                                872                 :                     }
                                873                 : 
 5946                           874              65 :                     if (_bt_compare_scankey_args(scan, chk, eq, chk,
                                875                 :                                                  &test_result))
                                876                 :                     {
                                877              65 :                         if (!test_result)
                                878                 :                         {
                                879                 :                             /* keys proven mutually contradictory */
 5946 tgl                       880 UBC           0 :                             so->qual_ok = false;
                                881               0 :                             return;
                                882                 :                         }
                                883                 :                         /* else discard the redundant non-equality key */
 5946 tgl                       884 CBC          65 :                         xform[j] = NULL;
                                885                 :                     }
                                886                 :                     /* else, cannot determine redundancy, keep both keys */
                                887                 :                 }
                                888                 :                 /* track number of attrs for which we have "=" keys */
 6689                           889        11061303 :                 numberOfEqualCols++;
                                890                 :             }
                                891                 : 
                                892                 :             /* try to keep only one of <, <= */
 7088                           893        11748671 :             if (xform[BTLessStrategyNumber - 1]
                                894             983 :                 && xform[BTLessEqualStrategyNumber - 1])
                                895                 :             {
 6797 bruce                     896 UBC           0 :                 ScanKey     lt = xform[BTLessStrategyNumber - 1];
                                897               0 :                 ScanKey     le = xform[BTLessEqualStrategyNumber - 1];
                                898                 : 
 5946 tgl                       899               0 :                 if (_bt_compare_scankey_args(scan, le, lt, le,
                                900                 :                                              &test_result))
                                901                 :                 {
                                902               0 :                     if (test_result)
                                903               0 :                         xform[BTLessEqualStrategyNumber - 1] = NULL;
                                904                 :                     else
                                905               0 :                         xform[BTLessStrategyNumber - 1] = NULL;
                                906                 :                 }
                                907                 :             }
                                908                 : 
                                909                 :             /* try to keep only one of >, >= */
 7088 tgl                       910 CBC    11748671 :             if (xform[BTGreaterStrategyNumber - 1]
                                911          685214 :                 && xform[BTGreaterEqualStrategyNumber - 1])
                                912                 :             {
 6797 bruce                     913 UBC           0 :                 ScanKey     gt = xform[BTGreaterStrategyNumber - 1];
                                914               0 :                 ScanKey     ge = xform[BTGreaterEqualStrategyNumber - 1];
                                915                 : 
 5946 tgl                       916               0 :                 if (_bt_compare_scankey_args(scan, ge, gt, ge,
                                917                 :                                              &test_result))
                                918                 :                 {
                                919               0 :                     if (test_result)
                                920               0 :                         xform[BTGreaterEqualStrategyNumber - 1] = NULL;
                                921                 :                     else
                                922               0 :                         xform[BTGreaterStrategyNumber - 1] = NULL;
                                923                 :                 }
                                924                 :             }
                                925                 : 
                                926                 :             /*
                                927                 :              * Emit the cleaned-up keys into the outkeys[] array, and then
                                928                 :              * mark them if they are required.  They are required (possibly
                                929                 :              * only in one direction) if all attrs before this one had "=".
                                930                 :              */
 9345 bruce                     931 CBC    70492026 :             for (j = BTMaxStrategyNumber; --j >= 0;)
                                932                 :             {
 7088 tgl                       933        58743355 :                 if (xform[j])
                                934                 :                 {
 6285                           935        11749561 :                     ScanKey     outkey = &outkeys[new_numberOfKeys++];
                                936                 : 
                                937        11749561 :                     memcpy(outkey, xform[j], sizeof(ScanKeyData));
                                938        11749561 :                     if (priorNumberOfEqualCols == attno - 1)
 6283                           939        11748725 :                         _bt_mark_scankey_required(outkey);
                                940                 :                 }
                                941                 :             }
                                942                 : 
                                943                 :             /*
                                944                 :              * Exit loop here if done.
                                945                 :              */
 9345 bruce                     946        11748671 :             if (i == numberOfKeys)
                                947         4920839 :                 break;
                                948                 : 
                                949                 :             /* Re-initialize for new attno */
                                950         6827832 :             attno = cur->sk_attno;
 7088 tgl                       951         6827832 :             memset(xform, 0, sizeof(xform));
                                952                 :         }
                                953                 : 
                                954                 :         /* check strategy this key's operator corresponds to */
 7091                           955        11749683 :         j = cur->sk_strategy - 1;
                                956                 : 
                                957                 :         /* if row comparison, push it directly to the output array */
 5946                           958        11749683 :         if (cur->sk_flags & SK_ROW_HEADER)
 7088 tgl                       959 UBC           0 :         {
 6283                           960               0 :             ScanKey     outkey = &outkeys[new_numberOfKeys++];
                                961                 : 
                                962               0 :             memcpy(outkey, cur, sizeof(ScanKeyData));
                                963               0 :             if (numberOfEqualCols == attno - 1)
                                964               0 :                 _bt_mark_scankey_required(outkey);
                                965                 : 
                                966                 :             /*
                                967                 :              * We don't support RowCompare using equality; such a qual would
                                968                 :              * mess up the numberOfEqualCols tracking.
                                969                 :              */
 5946                           970               0 :             Assert(j != (BTEqualStrategyNumber - 1));
 7088                           971               0 :             continue;
                                972                 :         }
                                973                 : 
                                974                 :         /* have we seen one of these before? */
 5946 tgl                       975 CBC    11749683 :         if (xform[j] == NULL)
                                976                 :         {
                                977                 :             /* nope, so remember this scankey */
                                978        11749650 :             xform[j] = cur;
                                979                 :         }
                                980                 :         else
                                981                 :         {
                                982                 :             /* yup, keep only the more restrictive key */
                                983              33 :             if (_bt_compare_scankey_args(scan, cur, cur, xform[j],
                                984                 :                                          &test_result))
                                985                 :             {
                                986              33 :                 if (test_result)
                                987              30 :                     xform[j] = cur;
                                988               3 :                 else if (j == (BTEqualStrategyNumber - 1))
                                989                 :                 {
                                990                 :                     /* key == a && key == b, but a != b */
 5946 tgl                       991 UBC           0 :                     so->qual_ok = false;
                                992               0 :                     return;
                                993                 :                 }
                                994                 :                 /* else old key is more restrictive, keep it */
                                995                 :             }
                                996                 :             else
                                997                 :             {
                                998                 :                 /*
                                999                 :                  * We can't determine which key is more restrictive.  Keep the
                               1000                 :                  * previous one in xform[j] and push this one directly to the
                               1001                 :                  * output array.
                               1002                 :                  */
                               1003               0 :                 ScanKey     outkey = &outkeys[new_numberOfKeys++];
                               1004                 : 
                               1005               0 :                 memcpy(outkey, cur, sizeof(ScanKeyData));
                               1006               0 :                 if (numberOfEqualCols == attno - 1)
                               1007               0 :                     _bt_mark_scankey_required(outkey);
                               1008                 :             }
                               1009                 :         }
                               1010                 :     }
                               1011                 : 
 9345 bruce                    1012 CBC     4920839 :     so->numberOfKeys = new_numberOfKeys;
                               1013                 : }
                               1014                 : 
                               1015                 : /*
                               1016                 :  * Compare two scankey values using a specified operator.
                               1017                 :  *
                               1018                 :  * The test we want to perform is logically "leftarg op rightarg", where
                               1019                 :  * leftarg and rightarg are the sk_argument values in those ScanKeys, and
                               1020                 :  * the comparison operator is the one in the op ScanKey.  However, in
                               1021                 :  * cross-data-type situations we may need to look up the correct operator in
                               1022                 :  * the index's opfamily: it is the one having amopstrategy = op->sk_strategy
                               1023                 :  * and amoplefttype/amoprighttype equal to the two argument datatypes.
                               1024                 :  *
                               1025                 :  * If the opfamily doesn't supply a complete set of cross-type operators we
                               1026                 :  * may not be able to make the comparison.  If we can make the comparison
                               1027                 :  * we store the operator result in *result and return true.  We return false
                               1028                 :  * if the comparison could not be made.
                               1029                 :  *
                               1030                 :  * Note: op always points at the same ScanKey as either leftarg or rightarg.
                               1031                 :  * Since we don't scribble on the scankeys, this aliasing should cause no
                               1032                 :  * trouble.
                               1033                 :  *
                               1034                 :  * Note: this routine needs to be insensitive to any DESC option applied
                               1035                 :  * to the index column.  For example, "x < 4" is a tighter constraint than
                               1036                 :  * "x < 5" regardless of which way the index is sorted.
                               1037                 :  */
                               1038                 : static bool
 5946 tgl                      1039              98 : _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
                               1040                 :                          ScanKey leftarg, ScanKey rightarg,
                               1041                 :                          bool *result)
                               1042                 : {
                               1043              98 :     Relation    rel = scan->indexRelation;
                               1044                 :     Oid         lefttype,
                               1045                 :                 righttype,
                               1046                 :                 optype,
                               1047                 :                 opcintype,
                               1048                 :                 cmp_op;
                               1049                 :     StrategyNumber strat;
                               1050                 : 
                               1051                 :     /*
                               1052                 :      * First, deal with cases where one or both args are NULL.  This should
                               1053                 :      * only happen when the scankeys represent IS NULL/NOT NULL conditions.
                               1054                 :      */
 4846                          1055              98 :     if ((leftarg->sk_flags | rightarg->sk_flags) & SK_ISNULL)
                               1056                 :     {
                               1057                 :         bool        leftnull,
                               1058                 :                     rightnull;
                               1059                 : 
                               1060              55 :         if (leftarg->sk_flags & SK_ISNULL)
                               1061                 :         {
                               1062               3 :             Assert(leftarg->sk_flags & (SK_SEARCHNULL | SK_SEARCHNOTNULL));
                               1063               3 :             leftnull = true;
                               1064                 :         }
                               1065                 :         else
                               1066              52 :             leftnull = false;
                               1067              55 :         if (rightarg->sk_flags & SK_ISNULL)
                               1068                 :         {
                               1069              52 :             Assert(rightarg->sk_flags & (SK_SEARCHNULL | SK_SEARCHNOTNULL));
                               1070              52 :             rightnull = true;
                               1071                 :         }
                               1072                 :         else
                               1073               3 :             rightnull = false;
                               1074                 : 
                               1075                 :         /*
                               1076                 :          * We treat NULL as either greater than or less than all other values.
                               1077                 :          * Since true > false, the tests below work correctly for NULLS LAST
                               1078                 :          * logic.  If the index is NULLS FIRST, we need to flip the strategy.
                               1079                 :          */
                               1080              55 :         strat = op->sk_strategy;
                               1081              55 :         if (op->sk_flags & SK_BT_NULLS_FIRST)
 4846 tgl                      1082 UBC           0 :             strat = BTCommuteStrategyNumber(strat);
                               1083                 : 
 4846 tgl                      1084 CBC          55 :         switch (strat)
                               1085                 :         {
                               1086              55 :             case BTLessStrategyNumber:
                               1087              55 :                 *result = (leftnull < rightnull);
                               1088              55 :                 break;
 4846 tgl                      1089 UBC           0 :             case BTLessEqualStrategyNumber:
                               1090               0 :                 *result = (leftnull <= rightnull);
                               1091               0 :                 break;
                               1092               0 :             case BTEqualStrategyNumber:
                               1093               0 :                 *result = (leftnull == rightnull);
                               1094               0 :                 break;
                               1095               0 :             case BTGreaterEqualStrategyNumber:
                               1096               0 :                 *result = (leftnull >= rightnull);
                               1097               0 :                 break;
                               1098               0 :             case BTGreaterStrategyNumber:
                               1099               0 :                 *result = (leftnull > rightnull);
                               1100               0 :                 break;
                               1101               0 :             default:
                               1102               0 :                 elog(ERROR, "unrecognized StrategyNumber: %d", (int) strat);
                               1103                 :                 *result = false;    /* keep compiler quiet */
                               1104                 :                 break;
                               1105                 :         }
 4846 tgl                      1106 CBC          55 :         return true;
                               1107                 :     }
                               1108                 : 
                               1109                 :     /*
                               1110                 :      * The opfamily we need to worry about is identified by the index column.
                               1111                 :      */
 5946                          1112              43 :     Assert(leftarg->sk_attno == rightarg->sk_attno);
                               1113                 : 
                               1114              43 :     opcintype = rel->rd_opcintype[leftarg->sk_attno - 1];
                               1115                 : 
                               1116                 :     /*
                               1117                 :      * Determine the actual datatypes of the ScanKey arguments.  We have to
                               1118                 :      * support the convention that sk_subtype == InvalidOid means the opclass
                               1119                 :      * input type; this is a hack to simplify life for ScanKeyInit().
                               1120                 :      */
                               1121              43 :     lefttype = leftarg->sk_subtype;
                               1122              43 :     if (lefttype == InvalidOid)
 5946 tgl                      1123 UBC           0 :         lefttype = opcintype;
 5946 tgl                      1124 CBC          43 :     righttype = rightarg->sk_subtype;
                               1125              43 :     if (righttype == InvalidOid)
 5946 tgl                      1126 UBC           0 :         righttype = opcintype;
 5946 tgl                      1127 CBC          43 :     optype = op->sk_subtype;
                               1128              43 :     if (optype == InvalidOid)
 5946 tgl                      1129 UBC           0 :         optype = opcintype;
                               1130                 : 
                               1131                 :     /*
                               1132                 :      * If leftarg and rightarg match the types expected for the "op" scankey,
                               1133                 :      * we can use its already-looked-up comparison function.
                               1134                 :      */
 5946 tgl                      1135 CBC          43 :     if (lefttype == opcintype && righttype == optype)
                               1136                 :     {
 4380                          1137              43 :         *result = DatumGetBool(FunctionCall2Coll(&op->sk_func,
                               1138                 :                                                  op->sk_collation,
                               1139                 :                                                  leftarg->sk_argument,
                               1140                 :                                                  rightarg->sk_argument));
 5946                          1141              43 :         return true;
                               1142                 :     }
                               1143                 : 
                               1144                 :     /*
                               1145                 :      * Otherwise, we need to go to the syscache to find the appropriate
                               1146                 :      * operator.  (This cannot result in infinite recursion, since no
                               1147                 :      * indexscan initiated by syscache lookup will use cross-data-type
                               1148                 :      * operators.)
                               1149                 :      *
                               1150                 :      * If the sk_strategy was flipped by _bt_fix_scankey_strategy, we have to
                               1151                 :      * un-flip it to get the correct opfamily member.
                               1152                 :      */
 5934 tgl                      1153 UBC           0 :     strat = op->sk_strategy;
                               1154               0 :     if (op->sk_flags & SK_BT_DESC)
                               1155               0 :         strat = BTCommuteStrategyNumber(strat);
                               1156                 : 
 5946                          1157               0 :     cmp_op = get_opfamily_member(rel->rd_opfamily[leftarg->sk_attno - 1],
                               1158                 :                                  lefttype,
                               1159                 :                                  righttype,
                               1160                 :                                  strat);
                               1161               0 :     if (OidIsValid(cmp_op))
                               1162                 :     {
                               1163               0 :         RegProcedure cmp_proc = get_opcode(cmp_op);
                               1164                 : 
                               1165               0 :         if (RegProcedureIsValid(cmp_proc))
                               1166                 :         {
 4380                          1167               0 :             *result = DatumGetBool(OidFunctionCall2Coll(cmp_proc,
                               1168                 :                                                         op->sk_collation,
                               1169                 :                                                         leftarg->sk_argument,
                               1170                 :                                                         rightarg->sk_argument));
 5946                          1171               0 :             return true;
                               1172                 :         }
                               1173                 :     }
                               1174                 : 
                               1175                 :     /* Can't make the comparison */
                               1176               0 :     *result = false;            /* suppress compiler warnings */
                               1177               0 :     return false;
                               1178                 : }
                               1179                 : 
                               1180                 : /*
                               1181                 :  * Adjust a scankey's strategy and flags setting as needed for indoptions.
                               1182                 :  *
                               1183                 :  * We copy the appropriate indoption value into the scankey sk_flags
                               1184                 :  * (shifting to avoid clobbering system-defined flag bits).  Also, if
                               1185                 :  * the DESC option is set, commute (flip) the operator strategy number.
                               1186                 :  *
                               1187                 :  * A secondary purpose is to check for IS NULL/NOT NULL scankeys and set up
                               1188                 :  * the strategy field correctly for them.
                               1189                 :  *
                               1190                 :  * Lastly, for ordinary scankeys (not IS NULL/NOT NULL), we check for a
                               1191                 :  * NULL comparison value.  Since all btree operators are assumed strict,
                               1192                 :  * a NULL means that the qual cannot be satisfied.  We return true if the
                               1193                 :  * comparison value isn't NULL, or false if the scan should be abandoned.
                               1194                 :  *
                               1195                 :  * This function is applied to the *input* scankey structure; therefore
                               1196                 :  * on a rescan we will be looking at already-processed scankeys.  Hence
                               1197                 :  * we have to be careful not to re-commute the strategy if we already did it.
                               1198                 :  * It's a bit ugly to modify the caller's copy of the scankey but in practice
                               1199                 :  * there shouldn't be any problem, since the index's indoptions are certainly
                               1200                 :  * not going to change while the scankey survives.
                               1201                 :  */
                               1202                 : static bool
 4844 tgl                      1203 CBC    15912064 : _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
                               1204                 : {
                               1205                 :     int         addflags;
                               1206                 : 
 5934                          1207        15912064 :     addflags = indoption[skey->sk_attno - 1] << SK_BT_INDOPTION_SHIFT;
                               1208                 : 
                               1209                 :     /*
                               1210                 :      * We treat all btree operators as strict (even if they're not so marked
                               1211                 :      * in pg_proc). This means that it is impossible for an operator condition
                               1212                 :      * with a NULL comparison constant to succeed, and we can reject it right
                               1213                 :      * away.
                               1214                 :      *
                               1215                 :      * However, we now also support "x IS NULL" clauses as search conditions,
                               1216                 :      * so in that case keep going. The planner has not filled in any
                               1217                 :      * particular strategy in this case, so set it to BTEqualStrategyNumber
                               1218                 :      * --- we can treat IS NULL as an equality operator for purposes of search
                               1219                 :      * strategy.
                               1220                 :      *
                               1221                 :      * Likewise, "x IS NOT NULL" is supported.  We treat that as either "less
                               1222                 :      * than NULL" in a NULLS LAST index, or "greater than NULL" in a NULLS
                               1223                 :      * FIRST index.
                               1224                 :      *
                               1225                 :      * Note: someday we might have to fill in sk_collation from the index
                               1226                 :      * column's collation.  At the moment this is a non-issue because we'll
                               1227                 :      * never actually call the comparison operator on a NULL.
                               1228                 :      */
 4844                          1229        15912064 :     if (skey->sk_flags & SK_ISNULL)
                               1230                 :     {
                               1231                 :         /* SK_ISNULL shouldn't be set in a row header scankey */
                               1232           48245 :         Assert(!(skey->sk_flags & SK_ROW_HEADER));
                               1233                 : 
                               1234                 :         /* Set indoption flags in scankey (might be done already) */
                               1235           48245 :         skey->sk_flags |= addflags;
                               1236                 : 
                               1237                 :         /* Set correct strategy for IS NULL or NOT NULL search */
                               1238           48245 :         if (skey->sk_flags & SK_SEARCHNULL)
                               1239                 :         {
                               1240              67 :             skey->sk_strategy = BTEqualStrategyNumber;
                               1241              67 :             skey->sk_subtype = InvalidOid;
 4380                          1242              67 :             skey->sk_collation = InvalidOid;
                               1243                 :         }
 4844                          1244           48178 :         else if (skey->sk_flags & SK_SEARCHNOTNULL)
                               1245                 :         {
                               1246           46904 :             if (skey->sk_flags & SK_BT_NULLS_FIRST)
                               1247              18 :                 skey->sk_strategy = BTGreaterStrategyNumber;
                               1248                 :             else
                               1249           46886 :                 skey->sk_strategy = BTLessStrategyNumber;
                               1250           46904 :             skey->sk_subtype = InvalidOid;
 4380                          1251           46904 :             skey->sk_collation = InvalidOid;
                               1252                 :         }
                               1253                 :         else
                               1254                 :         {
                               1255                 :             /* regular qual, so it cannot be satisfied */
 4844                          1256            1274 :             return false;
                               1257                 :         }
                               1258                 : 
                               1259                 :         /* Needn't do the rest */
                               1260           46971 :         return true;
                               1261                 :     }
                               1262                 : 
                               1263                 :     /* Adjust strategy for DESC, if we didn't already */
 5934                          1264        15863819 :     if ((addflags & SK_BT_DESC) && !(skey->sk_flags & SK_BT_DESC))
 5934 tgl                      1265 UBC           0 :         skey->sk_strategy = BTCommuteStrategyNumber(skey->sk_strategy);
 5934 tgl                      1266 CBC    15863819 :     skey->sk_flags |= addflags;
                               1267                 : 
                               1268                 :     /* If it's a row header, fix row member flags and strategies similarly */
                               1269        15863819 :     if (skey->sk_flags & SK_ROW_HEADER)
                               1270                 :     {
                               1271              18 :         ScanKey     subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
                               1272                 : 
                               1273                 :         for (;;)
                               1274                 :         {
                               1275              36 :             Assert(subkey->sk_flags & SK_ROW_MEMBER);
                               1276              36 :             addflags = indoption[subkey->sk_attno - 1] << SK_BT_INDOPTION_SHIFT;
                               1277              36 :             if ((addflags & SK_BT_DESC) && !(subkey->sk_flags & SK_BT_DESC))
 5934 tgl                      1278 UBC           0 :                 subkey->sk_strategy = BTCommuteStrategyNumber(subkey->sk_strategy);
 5934 tgl                      1279 CBC          36 :             subkey->sk_flags |= addflags;
                               1280              36 :             if (subkey->sk_flags & SK_ROW_END)
                               1281              18 :                 break;
                               1282              18 :             subkey++;
                               1283                 :         }
                               1284                 :     }
                               1285                 : 
 4844                          1286        15863819 :     return true;
                               1287                 : }
                               1288                 : 
                               1289                 : /*
                               1290                 :  * Mark a scankey as "required to continue the scan".
                               1291                 :  *
                               1292                 :  * Depending on the operator type, the key may be required for both scan
                               1293                 :  * directions or just one.  Also, if the key is a row comparison header,
                               1294                 :  * we have to mark its first subsidiary ScanKey as required.  (Subsequent
                               1295                 :  * subsidiary ScanKeys are normally for lower-order columns, and thus
                               1296                 :  * cannot be required, since they're after the first non-equality scankey.)
                               1297                 :  *
                               1298                 :  * Note: when we set required-key flag bits in a subsidiary scankey, we are
                               1299                 :  * scribbling on a data structure belonging to the index AM's caller, not on
                               1300                 :  * our private copy.  This should be OK because the marking will not change
                               1301                 :  * from scan to scan within a query, and so we'd just re-mark the same way
                               1302                 :  * anyway on a rescan.  Something to keep an eye on though.
                               1303                 :  */
                               1304                 : static void
 6283                          1305        15909850 : _bt_mark_scankey_required(ScanKey skey)
                               1306                 : {
                               1307                 :     int         addflags;
                               1308                 : 
                               1309        15909850 :     switch (skey->sk_strategy)
                               1310                 :     {
                               1311           48298 :         case BTLessStrategyNumber:
                               1312                 :         case BTLessEqualStrategyNumber:
                               1313           48298 :             addflags = SK_BT_REQFWD;
                               1314           48298 :             break;
                               1315        15173879 :         case BTEqualStrategyNumber:
                               1316        15173879 :             addflags = SK_BT_REQFWD | SK_BT_REQBKWD;
                               1317        15173879 :             break;
                               1318          687673 :         case BTGreaterEqualStrategyNumber:
                               1319                 :         case BTGreaterStrategyNumber:
                               1320          687673 :             addflags = SK_BT_REQBKWD;
                               1321          687673 :             break;
 6283 tgl                      1322 UBC           0 :         default:
                               1323               0 :             elog(ERROR, "unrecognized StrategyNumber: %d",
                               1324                 :                  (int) skey->sk_strategy);
                               1325                 :             addflags = 0;       /* keep compiler quiet */
                               1326                 :             break;
                               1327                 :     }
                               1328                 : 
 6283 tgl                      1329 CBC    15909850 :     skey->sk_flags |= addflags;
                               1330                 : 
                               1331        15909850 :     if (skey->sk_flags & SK_ROW_HEADER)
                               1332                 :     {
 6031 bruce                    1333              18 :         ScanKey     subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
                               1334                 : 
                               1335                 :         /* First subkey should be same column/operator as the header */
 2587 tgl                      1336              18 :         Assert(subkey->sk_flags & SK_ROW_MEMBER);
                               1337              18 :         Assert(subkey->sk_attno == skey->sk_attno);
                               1338              18 :         Assert(subkey->sk_strategy == skey->sk_strategy);
                               1339              18 :         subkey->sk_flags |= addflags;
                               1340                 :     }
 6283                          1341        15909850 : }
                               1342                 : 
                               1343                 : /*
                               1344                 :  * Test whether an indextuple satisfies all the scankey conditions.
                               1345                 :  *
                               1346                 :  * Return true if so, false if not.  If the tuple fails to pass the qual,
                               1347                 :  * we also determine whether there's any need to continue the scan beyond
                               1348                 :  * this tuple, and set *continuescan accordingly.  See comments for
                               1349                 :  * _bt_preprocess_keys(), above, about how this is done.
                               1350                 :  *
                               1351                 :  * Forward scan callers can pass a high key tuple in the hopes of having
                               1352                 :  * us set *continuescan to false, and avoiding an unnecessary visit to
                               1353                 :  * the page to the right.
                               1354                 :  *
                               1355                 :  * scan: index scan descriptor (containing a search-type scankey)
                               1356                 :  * tuple: index tuple to test
                               1357                 :  * tupnatts: number of attributes in tupnatts (high key may be truncated)
                               1358                 :  * dir: direction we are scanning in
                               1359                 :  * continuescan: output parameter (will be set correctly in all cases)
                               1360                 :  */
                               1361                 : bool
 1478 pg                       1362        30729387 : _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
                               1363                 :               ScanDirection dir, bool *continuescan)
                               1364                 : {
                               1365                 :     TupleDesc   tupdesc;
                               1366                 :     BTScanOpaque so;
                               1367                 :     int         keysz;
                               1368                 :     int         ikey;
                               1369                 :     ScanKey     key;
                               1370                 : 
                               1371        30729387 :     Assert(BTreeTupleGetNAtts(tuple, scan->indexRelation) == tupnatts);
                               1372                 : 
                               1373        30729387 :     *continuescan = true;       /* default assumption */
                               1374                 : 
 7088 tgl                      1375        30729387 :     tupdesc = RelationGetDescr(scan->indexRelation);
 6332                          1376        30729387 :     so = (BTScanOpaque) scan->opaque;
                               1377        30729387 :     keysz = so->numberOfKeys;
                               1378                 : 
 7088                          1379        60105367 :     for (key = so->keyData, ikey = 0; ikey < keysz; key++, ikey++)
                               1380                 :     {
                               1381                 :         Datum       datum;
                               1382                 :         bool        isNull;
                               1383                 :         Datum       test;
                               1384                 : 
 1478 pg                       1385        35695871 :         if (key->sk_attno > tupnatts)
                               1386                 :         {
                               1387                 :             /*
                               1388                 :              * This attribute is truncated (must be high key).  The value for
                               1389                 :              * this attribute in the first non-pivot tuple on the page to the
                               1390                 :              * right could be any possible value.  Assume that truncated
                               1391                 :              * attribute passes the qual.
                               1392                 :              */
                               1393             949 :             Assert(ScanDirectionIsForward(dir));
 1138                          1394             949 :             Assert(BTreeTupleIsPivot(tuple));
 1478                          1395         8580342 :             continue;
                               1396                 :         }
                               1397                 : 
                               1398                 :         /* row-comparison keys need special processing */
 6283 tgl                      1399        35694922 :         if (key->sk_flags & SK_ROW_HEADER)
                               1400                 :         {
 1478 pg                       1401             990 :             if (_bt_check_rowcompare(key, tuple, tupnatts, tupdesc, dir,
                               1402                 :                                      continuescan))
 6283 tgl                      1403             963 :                 continue;
 1478 pg                       1404         6319891 :             return false;
                               1405                 :         }
                               1406                 : 
 9345 bruce                    1407        35693932 :         datum = index_getattr(tuple,
 8293 tgl                      1408        35693932 :                               key->sk_attno,
                               1409                 :                               tupdesc,
                               1410                 :                               &isNull);
                               1411                 : 
                               1412        35693932 :         if (key->sk_flags & SK_ISNULL)
                               1413                 :         {
                               1414                 :             /* Handle IS NULL/NOT NULL tests */
 4846                          1415         8602466 :             if (key->sk_flags & SK_SEARCHNULL)
                               1416                 :             {
                               1417           24073 :                 if (isNull)
 4790 bruce                    1418              67 :                     continue;   /* tuple satisfies this qual */
                               1419                 :             }
                               1420                 :             else
                               1421                 :             {
 4846 tgl                      1422         8578393 :                 Assert(key->sk_flags & SK_SEARCHNOTNULL);
                               1423         8578393 :                 if (!isNull)
 4790 bruce                    1424         8578363 :                     continue;   /* tuple satisfies this qual */
                               1425                 :             }
                               1426                 : 
                               1427                 :             /*
                               1428                 :              * Tuple fails this qual.  If it's a required qual for the current
                               1429                 :              * scan direction, then we can conclude no further tuples will
                               1430                 :              * pass, either.
                               1431                 :              */
 4176 tgl                      1432           24036 :             if ((key->sk_flags & SK_BT_REQFWD) &&
                               1433                 :                 ScanDirectionIsForward(dir))
                               1434              12 :                 *continuescan = false;
                               1435           24024 :             else if ((key->sk_flags & SK_BT_REQBKWD) &&
                               1436                 :                      ScanDirectionIsBackward(dir))
 5847 tgl                      1437 UBC           0 :                 *continuescan = false;
                               1438                 : 
                               1439                 :             /*
                               1440                 :              * In any case, this indextuple doesn't match the qual.
                               1441                 :              */
 1478 pg                       1442 CBC       24036 :             return false;
                               1443                 :         }
                               1444                 : 
 8293 tgl                      1445        27091466 :         if (isNull)
                               1446                 :         {
 4176                          1447              66 :             if (key->sk_flags & SK_BT_NULLS_FIRST)
                               1448                 :             {
                               1449                 :                 /*
                               1450                 :                  * Since NULLs are sorted before non-NULLs, we know we have
                               1451                 :                  * reached the lower limit of the range of values for this
                               1452                 :                  * index attr.  On a backward scan, we can stop if this qual
                               1453                 :                  * is one of the "must match" subset.  We can stop regardless
                               1454                 :                  * of whether the qual is > or <, so long as it's required,
                               1455                 :                  * because it's not possible for any future tuples to pass. On
                               1456                 :                  * a forward scan, however, we must keep going, because we may
                               1457                 :                  * have initially positioned to the start of the index.
                               1458                 :                  */
 4176 tgl                      1459 UBC           0 :                 if ((key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
                               1460                 :                     ScanDirectionIsBackward(dir))
                               1461               0 :                     *continuescan = false;
                               1462                 :             }
                               1463                 :             else
                               1464                 :             {
                               1465                 :                 /*
                               1466                 :                  * Since NULLs are sorted after non-NULLs, we know we have
                               1467                 :                  * reached the upper limit of the range of values for this
                               1468                 :                  * index attr.  On a forward scan, we can stop if this qual is
                               1469                 :                  * one of the "must match" subset.  We can stop regardless of
                               1470                 :                  * whether the qual is > or <, so long as it's required,
                               1471                 :                  * because it's not possible for any future tuples to pass. On
                               1472                 :                  * a backward scan, however, we must keep going, because we
                               1473                 :                  * may have initially positioned to the end of the index.
                               1474                 :                  */
 4176 tgl                      1475 CBC          66 :                 if ((key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
                               1476                 :                     ScanDirectionIsForward(dir))
                               1477              42 :                     *continuescan = false;
                               1478                 :             }
                               1479                 : 
                               1480                 :             /*
                               1481                 :              * In any case, this indextuple doesn't match the qual.
                               1482                 :              */
 1478 pg                       1483              66 :             return false;
                               1484                 :         }
                               1485                 : 
 4380 tgl                      1486        27091400 :         test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
                               1487                 :                                  datum, key->sk_argument);
                               1488                 : 
 7091                          1489        27091400 :         if (!DatumGetBool(test))
                               1490                 :         {
                               1491                 :             /*
                               1492                 :              * Tuple fails this qual.  If it's a required qual for the current
                               1493                 :              * scan direction, then we can conclude no further tuples will
                               1494                 :              * pass, either.
                               1495                 :              *
                               1496                 :              * Note: because we stop the scan as soon as any required equality
                               1497                 :              * qual fails, it is critical that equality quals be used for the
                               1498                 :              * initial positioning in _bt_first() when they are available. See
                               1499                 :              * comments in _bt_first().
                               1500                 :              */
 6285                          1501         6295762 :             if ((key->sk_flags & SK_BT_REQFWD) &&
                               1502                 :                 ScanDirectionIsForward(dir))
                               1503         6147307 :                 *continuescan = false;
                               1504          148455 :             else if ((key->sk_flags & SK_BT_REQBKWD) &&
                               1505                 :                      ScanDirectionIsBackward(dir))
                               1506              42 :                 *continuescan = false;
                               1507                 : 
                               1508                 :             /*
                               1509                 :              * In any case, this indextuple doesn't match the qual.
                               1510                 :              */
 1478 pg                       1511         6295762 :             return false;
                               1512                 :         }
                               1513                 :     }
                               1514                 : 
                               1515                 :     /* If we get here, the tuple passes all index quals. */
                               1516        24409496 :     return true;
                               1517                 : }
                               1518                 : 
                               1519                 : /*
                               1520                 :  * Test whether an indextuple satisfies a row-comparison scan condition.
                               1521                 :  *
                               1522                 :  * Return true if so, false if not.  If not, also clear *continuescan if
                               1523                 :  * it's not possible for any future tuples in the current scan direction
                               1524                 :  * to pass the qual.
                               1525                 :  *
                               1526                 :  * This is a subroutine for _bt_checkkeys, which see for more info.
                               1527                 :  */
                               1528                 : static bool
                               1529             990 : _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
                               1530                 :                      TupleDesc tupdesc, ScanDirection dir, bool *continuescan)
                               1531                 : {
 6283 tgl                      1532             990 :     ScanKey     subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
                               1533             990 :     int32       cmpresult = 0;
                               1534                 :     bool        result;
                               1535                 : 
                               1536                 :     /* First subkey should be same as the header says */
                               1537             990 :     Assert(subkey->sk_attno == skey->sk_attno);
                               1538                 : 
                               1539                 :     /* Loop over columns of the row condition */
                               1540                 :     for (;;)
                               1541              78 :     {
                               1542                 :         Datum       datum;
                               1543                 :         bool        isNull;
                               1544                 : 
                               1545            1068 :         Assert(subkey->sk_flags & SK_ROW_MEMBER);
                               1546                 : 
 1478 pg                       1547            1068 :         if (subkey->sk_attno > tupnatts)
                               1548                 :         {
                               1549                 :             /*
                               1550                 :              * This attribute is truncated (must be high key).  The value for
                               1551                 :              * this attribute in the first non-pivot tuple on the page to the
                               1552                 :              * right could be any possible value.  Assume that truncated
                               1553                 :              * attribute passes the qual.
                               1554                 :              */
                               1555               3 :             Assert(ScanDirectionIsForward(dir));
 1138                          1556               3 :             Assert(BTreeTupleIsPivot(tuple));
 1478                          1557               3 :             cmpresult = 0;
 1470                          1558               3 :             if (subkey->sk_flags & SK_ROW_END)
                               1559               3 :                 break;
 1470 pg                       1560 UBC           0 :             subkey++;
 1478                          1561               0 :             continue;
                               1562                 :         }
                               1563                 : 
 6283 tgl                      1564 CBC        1065 :         datum = index_getattr(tuple,
                               1565            1065 :                               subkey->sk_attno,
                               1566                 :                               tupdesc,
                               1567                 :                               &isNull);
                               1568                 : 
                               1569            1065 :         if (isNull)
                               1570                 :         {
 4176                          1571              24 :             if (subkey->sk_flags & SK_BT_NULLS_FIRST)
                               1572                 :             {
                               1573                 :                 /*
                               1574                 :                  * Since NULLs are sorted before non-NULLs, we know we have
                               1575                 :                  * reached the lower limit of the range of values for this
                               1576                 :                  * index attr.  On a backward scan, we can stop if this qual
                               1577                 :                  * is one of the "must match" subset.  We can stop regardless
                               1578                 :                  * of whether the qual is > or <, so long as it's required,
                               1579                 :                  * because it's not possible for any future tuples to pass. On
                               1580                 :                  * a forward scan, however, we must keep going, because we may
                               1581                 :                  * have initially positioned to the start of the index.
                               1582                 :                  */
 4176 tgl                      1583 UBC           0 :                 if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
                               1584                 :                     ScanDirectionIsBackward(dir))
                               1585               0 :                     *continuescan = false;
                               1586                 :             }
                               1587                 :             else
                               1588                 :             {
                               1589                 :                 /*
                               1590                 :                  * Since NULLs are sorted after non-NULLs, we know we have
                               1591                 :                  * reached the upper limit of the range of values for this
                               1592                 :                  * index attr.  On a forward scan, we can stop if this qual is
                               1593                 :                  * one of the "must match" subset.  We can stop regardless of
                               1594                 :                  * whether the qual is > or <, so long as it's required,
                               1595                 :                  * because it's not possible for any future tuples to pass. On
                               1596                 :                  * a backward scan, however, we must keep going, because we
                               1597                 :                  * may have initially positioned to the end of the index.
                               1598                 :                  */
 4176 tgl                      1599 CBC          24 :                 if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
                               1600                 :                     ScanDirectionIsForward(dir))
 4176 tgl                      1601 UBC           0 :                     *continuescan = false;
                               1602                 :             }
                               1603                 : 
                               1604                 :             /*
                               1605                 :              * In any case, this indextuple doesn't match the qual.
                               1606                 :              */
 6283 tgl                      1607 CBC          24 :             return false;
                               1608                 :         }
                               1609                 : 
                               1610            1041 :         if (subkey->sk_flags & SK_ISNULL)
                               1611                 :         {
                               1612                 :             /*
                               1613                 :              * Unlike the simple-scankey case, this isn't a disallowed case.
                               1614                 :              * But it can never match.  If all the earlier row comparison
                               1615                 :              * columns are required for the scan direction, we can stop the
                               1616                 :              * scan, because there can't be another tuple that will succeed.
                               1617                 :              */
 6283 tgl                      1618 UBC           0 :             if (subkey != (ScanKey) DatumGetPointer(skey->sk_argument))
                               1619               0 :                 subkey--;
                               1620               0 :             if ((subkey->sk_flags & SK_BT_REQFWD) &&
                               1621                 :                 ScanDirectionIsForward(dir))
                               1622               0 :                 *continuescan = false;
                               1623               0 :             else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
                               1624                 :                      ScanDirectionIsBackward(dir))
                               1625               0 :                 *continuescan = false;
                               1626               0 :             return false;
                               1627                 :         }
                               1628                 : 
                               1629                 :         /* Perform the test --- three-way comparison not bool operator */
 4380 tgl                      1630 CBC        1041 :         cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func,
                               1631                 :                                                     subkey->sk_collation,
                               1632                 :                                                     datum,
                               1633                 :                                                     subkey->sk_argument));
                               1634                 : 
 5934                          1635            1041 :         if (subkey->sk_flags & SK_BT_DESC)
 1647 tgl                      1636 UBC           0 :             INVERT_COMPARE_RESULT(cmpresult);
                               1637                 : 
                               1638                 :         /* Done comparing if unequal, else advance to next column */
 6283 tgl                      1639 CBC        1041 :         if (cmpresult != 0)
                               1640             963 :             break;
                               1641                 : 
                               1642              78 :         if (subkey->sk_flags & SK_ROW_END)
 6283 tgl                      1643 UBC           0 :             break;
 6283 tgl                      1644 CBC          78 :         subkey++;
                               1645                 :     }
                               1646                 : 
                               1647                 :     /*
                               1648                 :      * At this point cmpresult indicates the overall result of the row
                               1649                 :      * comparison, and subkey points to the deciding column (or the last
                               1650                 :      * column if the result is "=").
                               1651                 :      */
                               1652             966 :     switch (subkey->sk_strategy)
                               1653                 :     {
                               1654                 :             /* EQ and NE cases aren't allowed here */
 6283 tgl                      1655 UBC           0 :         case BTLessStrategyNumber:
                               1656               0 :             result = (cmpresult < 0);
                               1657               0 :             break;
 6283 tgl                      1658 CBC         795 :         case BTLessEqualStrategyNumber:
                               1659             795 :             result = (cmpresult <= 0);
                               1660             795 :             break;
                               1661             120 :         case BTGreaterEqualStrategyNumber:
                               1662             120 :             result = (cmpresult >= 0);
                               1663             120 :             break;
                               1664              51 :         case BTGreaterStrategyNumber:
                               1665              51 :             result = (cmpresult > 0);
                               1666              51 :             break;
 6283 tgl                      1667 UBC           0 :         default:
                               1668               0 :             elog(ERROR, "unrecognized RowCompareType: %d",
                               1669                 :                  (int) subkey->sk_strategy);
                               1670                 :             result = 0;         /* keep compiler quiet */
                               1671                 :             break;
                               1672                 :     }
                               1673                 : 
 6283 tgl                      1674 CBC         966 :     if (!result)
                               1675                 :     {
                               1676                 :         /*
                               1677                 :          * Tuple fails this qual.  If it's a required qual for the current
                               1678                 :          * scan direction, then we can conclude no further tuples will pass,
                               1679                 :          * either.  Note we have to look at the deciding column, not
                               1680                 :          * necessarily the first or last column of the row condition.
                               1681                 :          */
                               1682               3 :         if ((subkey->sk_flags & SK_BT_REQFWD) &&
                               1683                 :             ScanDirectionIsForward(dir))
                               1684               3 :             *continuescan = false;
 6283 tgl                      1685 UBC           0 :         else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
                               1686                 :                  ScanDirectionIsBackward(dir))
                               1687               0 :             *continuescan = false;
                               1688                 :     }
                               1689                 : 
 6283 tgl                      1690 CBC         966 :     return result;
                               1691                 : }
                               1692                 : 
                               1693                 : /*
                               1694                 :  * _bt_killitems - set LP_DEAD state for items an indexscan caller has
                               1695                 :  * told us were killed
                               1696                 :  *
                               1697                 :  * scan->opaque, referenced locally through so, contains information about the
                               1698                 :  * current page and killed tuples thereon (generally, this should only be
                               1699                 :  * called if so->numKilled > 0).
                               1700                 :  *
                               1701                 :  * The caller does not have a lock on the page and may or may not have the
                               1702                 :  * page pinned in a buffer.  Note that read-lock is sufficient for setting
                               1703                 :  * LP_DEAD status (which is only a hint).
                               1704                 :  *
                               1705                 :  * We match items by heap TID before assuming they are the right ones to
                               1706                 :  * delete.  We cope with cases where items have moved right due to insertions.
                               1707                 :  * If an item has moved off the current page due to a split, we'll fail to
                               1708                 :  * find it and do nothing (this is not an error case --- we assume the item
                               1709                 :  * will eventually get marked in a future indexscan).
                               1710                 :  *
                               1711                 :  * Note that if we hold a pin on the target page continuously from initially
                               1712                 :  * reading the items until applying this function, VACUUM cannot have deleted
                               1713                 :  * any items from the page, and so there is no need to search left from the
                               1714                 :  * recorded offset.  (This observation also guarantees that the item is still
                               1715                 :  * the right one to delete, which might otherwise be questionable since heap
                               1716                 :  * TIDs can get recycled.)  This holds true even if the page has been modified
                               1717                 :  * by inserts and page splits, so there is no need to consult the LSN.
                               1718                 :  *
                               1719                 :  * If the pin was released after reading the page, then we re-read it.  If it
                               1720                 :  * has been modified since we read it (as determined by the LSN), we dare not
                               1721                 :  * flag any entries because it is possible that the old entry was vacuumed
                               1722                 :  * away and the TID was re-used by a completely different heap tuple.
                               1723                 :  */
                               1724                 : void
 2937 kgrittn                  1725          118090 : _bt_killitems(IndexScanDesc scan)
                               1726                 : {
 6181 tgl                      1727          118090 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
                               1728                 :     Page        page;
                               1729                 :     BTPageOpaque opaque;
                               1730                 :     OffsetNumber minoff;
                               1731                 :     OffsetNumber maxoff;
                               1732                 :     int         i;
 2937 kgrittn                  1733          118090 :     int         numKilled = so->numKilled;
 6181 tgl                      1734          118090 :     bool        killedsomething = false;
                               1735                 :     bool        droppedpin PG_USED_FOR_ASSERTS_ONLY;
                               1736                 : 
 2937 kgrittn                  1737          118090 :     Assert(BTScanPosIsValid(so->currPos));
                               1738                 : 
                               1739                 :     /*
                               1740                 :      * Always reset the scan state, so we don't look for same items on other
                               1741                 :      * pages.
                               1742                 :      */
                               1743          118090 :     so->numKilled = 0;
                               1744                 : 
                               1745          118090 :     if (BTScanPosIsPinned(so->currPos))
                               1746                 :     {
                               1747                 :         /*
                               1748                 :          * We have held the pin on this page since we read the index tuples,
                               1749                 :          * so all we need to do is lock it.  The pin will have prevented
                               1750                 :          * re-use of any TID on the page, so there is no need to check the
                               1751                 :          * LSN.
                               1752                 :          */
 1098 pg                       1753           17408 :         droppedpin = false;
  992                          1754           17408 :         _bt_lockbuf(scan->indexRelation, so->currPos.buf, BT_READ);
                               1755                 : 
 2545 kgrittn                  1756           17408 :         page = BufferGetPage(so->currPos.buf);
                               1757                 :     }
                               1758                 :     else
                               1759                 :     {
                               1760                 :         Buffer      buf;
                               1761                 : 
 1098 pg                       1762          100682 :         droppedpin = true;
                               1763                 :         /* Attempt to re-read the buffer, getting pin and lock. */
    8 andres                   1764 GNC      100682 :         buf = _bt_getbuf(scan->indexRelation, scan->heapRelation,
                               1765                 :                          so->currPos.currPage, BT_READ);
                               1766                 : 
 2545 kgrittn                  1767 GIC      100682 :         page = BufferGetPage(buf);
 1916 alvherre                 1768 CBC      100682 :         if (BufferGetLSNAtomic(buf) == so->currPos.lsn)
 2937 kgrittn                  1769          100648 :             so->currPos.buf = buf;
 2937 kgrittn                  1770 ECB             :         else
                               1771                 :         {
                               1772                 :             /* Modified while not pinned means hinting is not safe. */
 2937 kgrittn                  1773 GIC          34 :             _bt_relbuf(scan->indexRelation, buf);
 2937 kgrittn                  1774 CBC          34 :             return;
 2937 kgrittn                  1775 ECB             :         }
                               1776                 :     }
                               1777                 : 
  373 michael                  1778 GIC      118056 :     opaque = BTPageGetOpaque(page);
 6181 tgl                      1779 CBC      118056 :     minoff = P_FIRSTDATAKEY(opaque);
                               1780          118056 :     maxoff = PageGetMaxOffsetNumber(page);
 6181 tgl                      1781 ECB             : 
 2937 kgrittn                  1782 GIC      380939 :     for (i = 0; i < numKilled; i++)
 6181 tgl                      1783 ECB             :     {
 6031 bruce                    1784 GIC      262883 :         int         itemIndex = so->killedItems[i];
 6031 bruce                    1785 CBC      262883 :         BTScanPosItem *kitem = &so->currPos.items[itemIndex];
                               1786          262883 :         OffsetNumber offnum = kitem->indexOffset;
 6181 tgl                      1787 ECB             : 
 6181 tgl                      1788 GIC      262883 :         Assert(itemIndex >= so->currPos.firstItem &&
 6181 tgl                      1789 ECB             :                itemIndex <= so->currPos.lastItem);
 6181 tgl                      1790 GIC      262883 :         if (offnum < minoff)
 6181 tgl                      1791 LBC           0 :             continue;           /* pure paranoia */
 6181 tgl                      1792 GBC    10774381 :         while (offnum <= maxoff)
 6181 tgl                      1793 ECB             :         {
 6181 tgl                      1794 GIC    10707753 :             ItemId      iid = PageGetItemId(page, offnum);
 6181 tgl                      1795 CBC    10707753 :             IndexTuple  ituple = (IndexTuple) PageGetItem(page, iid);
 1138 pg                       1796        10707753 :             bool        killtuple = false;
 1138 pg                       1797 ECB             : 
 1138 pg                       1798 GIC    10707753 :             if (BTreeTupleIsPosting(ituple))
 1138 pg                       1799 ECB             :             {
 1138 pg                       1800 GIC     3297359 :                 int         pi = i + 1;
 1138 pg                       1801 CBC     3297359 :                 int         nposting = BTreeTupleGetNPosting(ituple);
 1138 pg                       1802 ECB             :                 int         j;
                               1803                 : 
                               1804                 :                 /*
                               1805                 :                  * We rely on the convention that heap TIDs in the scanpos
                               1806                 :                  * items array are stored in ascending heap TID order for a
                               1807                 :                  * group of TIDs that originally came from a posting list
                               1808                 :                  * tuple.  This convention even applies during backwards
                               1809                 :                  * scans, where returning the TIDs in descending order might
                               1810                 :                  * seem more natural.  This is about effectiveness, not
                               1811                 :                  * correctness.
                               1812                 :                  *
                               1813                 :                  * Note that the page may have been modified in almost any way
                               1814                 :                  * since we first read it (in the !droppedpin case), so it's
                               1815                 :                  * possible that this posting list tuple wasn't a posting list
                               1816                 :                  * tuple when we first encountered its heap TIDs.
                               1817                 :                  */
 1138 pg                       1818 GIC     3365637 :                 for (j = 0; j < nposting; j++)
 1138 pg                       1819 ECB             :                 {
 1138 pg                       1820 GIC     3364038 :                     ItemPointer item = BTreeTupleGetPostingN(ituple, j);
 1138 pg                       1821 ECB             : 
 1138 pg                       1822 GIC     3364038 :                     if (!ItemPointerEquals(item, &kitem->heapTid))
 1138 pg                       1823 CBC     3295760 :                         break;  /* out of posting list loop */
 1138 pg                       1824 ECB             : 
                               1825                 :                     /*
                               1826                 :                      * kitem must have matching offnum when heap TIDs match,
                               1827                 :                      * though only in the common case where the page can't
                               1828                 :                      * have been concurrently modified
                               1829                 :                      */
 1098 pg                       1830 GIC       68278 :                     Assert(kitem->indexOffset == offnum || !droppedpin);
 1138 pg                       1831 ECB             : 
                               1832                 :                     /*
                               1833                 :                      * Read-ahead to later kitems here.
                               1834                 :                      *
                               1835                 :                      * We rely on the assumption that not advancing kitem here
                               1836                 :                      * will prevent us from considering the posting list tuple
                               1837                 :                      * fully dead by not matching its next heap TID in next
                               1838                 :                      * loop iteration.
                               1839                 :                      *
                               1840                 :                      * If, on the other hand, this is the final heap TID in
                               1841                 :                      * the posting list tuple, then tuple gets killed
                               1842                 :                      * regardless (i.e. we handle the case where the last
                               1843                 :                      * kitem is also the last heap TID in the last index tuple
                               1844                 :                      * correctly -- posting tuple still gets killed).
                               1845                 :                      */
 1138 pg                       1846 GIC       68278 :                     if (pi < numKilled)
 1138 pg                       1847 CBC       22354 :                         kitem = &so->currPos.items[so->killedItems[pi++]];
 1138 pg                       1848 ECB             :                 }
                               1849                 : 
                               1850                 :                 /*
                               1851                 :                  * Don't bother advancing the outermost loop's int iterator to
                               1852                 :                  * avoid processing killed items that relate to the same
                               1853                 :                  * offnum/posting list tuple.  This micro-optimization hardly
                               1854                 :                  * seems worth it.  (Further iterations of the outermost loop
                               1855                 :                  * will fail to match on this same posting list's first heap
                               1856                 :                  * TID instead, so we'll advance to the next offnum/index
                               1857                 :                  * tuple pretty quickly.)
                               1858                 :                  */
 1138 pg                       1859 GIC     3297359 :                 if (j == nposting)
 1138 pg                       1860 CBC        1599 :                     killtuple = true;
 1138 pg                       1861 ECB             :             }
 1138 pg                       1862 GIC     7410394 :             else if (ItemPointerEquals(&ituple->t_tid, &kitem->heapTid))
 1138 pg                       1863 CBC      194890 :                 killtuple = true;
 6181 tgl                      1864 ECB             : 
                               1865                 :             /*
                               1866                 :              * Mark index item as dead, if it isn't already.  Since this
                               1867                 :              * happens while holding a buffer lock possibly in shared mode,
                               1868                 :              * it's possible that multiple processes attempt to do this
                               1869                 :              * simultaneously, leading to multiple full-page images being sent
                               1870                 :              * to WAL (if wal_log_hints or data checksums are enabled), which
                               1871                 :              * is undesirable.
                               1872                 :              */
 1059 alvherre                 1873 GIC    10707753 :             if (killtuple && !ItemIdIsDead(iid))
 6181 tgl                      1874 ECB             :             {
                               1875                 :                 /* found the item/all posting list items */
 5688 tgl                      1876 GIC      196255 :                 ItemIdMarkDead(iid);
 6181 tgl                      1877 CBC      196255 :                 killedsomething = true;
                               1878          196255 :                 break;          /* out of inner search loop */
 6181 tgl                      1879 ECB             :             }
 6181 tgl                      1880 GIC    10511498 :             offnum = OffsetNumberNext(offnum);
 6181 tgl                      1881 ECB             :         }
                               1882                 :     }
                               1883                 : 
                               1884                 :     /*
                               1885                 :      * Since this can be redone later if needed, mark as dirty hint.
                               1886                 :      *
                               1887                 :      * Whenever we mark anything LP_DEAD, we also set the page's
                               1888                 :      * BTP_HAS_GARBAGE flag, which is likewise just a hint.  (Note that we
                               1889                 :      * only rely on the page-level flag in !heapkeyspace indexes.)
                               1890                 :      */
 6181 tgl                      1891 GIC      118056 :     if (killedsomething)
 6102 tgl                      1892 ECB             :     {
 6102 tgl                      1893 GIC       69968 :         opaque->btpo_flags |= BTP_HAS_GARBAGE;
 3583 jdavis                   1894 CBC       69968 :         MarkBufferDirtyHint(so->currPos.buf, true);
 6102 tgl                      1895 ECB             :     }
                               1896                 : 
  992 pg                       1897 GIC      118056 :     _bt_unlockbuf(scan->indexRelation, so->currPos.buf);
 6181 tgl                      1898 ECB             : }
                               1899                 : 
                               1900                 : 
                               1901                 : /*
                               1902                 :  * The following routines manage a shared-memory area in which we track
                               1903                 :  * assignment of "vacuum cycle IDs" to currently-active btree vacuuming
                               1904                 :  * operations.  There is a single counter which increments each time we
                               1905                 :  * start a vacuum to assign it a cycle ID.  Since multiple vacuums could
                               1906                 :  * be active concurrently, we have to track the cycle ID for each active
                               1907                 :  * vacuum; this requires at most MaxBackends entries (usually far fewer).
                               1908                 :  * We assume at most one vacuum can be active for a given index.
                               1909                 :  *
                               1910                 :  * Access to the shared memory area is controlled by BtreeVacuumLock.
                               1911                 :  * In principle we could use a separate lmgr locktag for each index,
                               1912                 :  * but a single LWLock is much cheaper, and given the short time that
                               1913                 :  * the lock is ever held, the concurrency hit should be minimal.
                               1914                 :  */
                               1915                 : 
                               1916                 : typedef struct BTOneVacInfo
                               1917                 : {
                               1918                 :     LockRelId   relid;          /* global identifier of an index */
                               1919                 :     BTCycleId   cycleid;        /* cycle ID for its active VACUUM */
                               1920                 : } BTOneVacInfo;
                               1921                 : 
                               1922                 : typedef struct BTVacInfo
                               1923                 : {
                               1924                 :     BTCycleId   cycle_ctr;      /* cycle ID most recently assigned */
                               1925                 :     int         num_vacuums;    /* number of currently active VACUUMs */
                               1926                 :     int         max_vacuums;    /* allocated length of vacuums[] array */
                               1927                 :     BTOneVacInfo vacuums[FLEXIBLE_ARRAY_MEMBER];
                               1928                 : } BTVacInfo;
                               1929                 : 
                               1930                 : static BTVacInfo *btvacinfo;
                               1931                 : 
                               1932                 : 
                               1933                 : /*
                               1934                 :  * _bt_vacuum_cycleid --- get the active vacuum cycle ID for an index,
                               1935                 :  *      or zero if there is no active VACUUM
                               1936                 :  *
                               1937                 :  * Note: for correct interlocking, the caller must already hold pin and
                               1938                 :  * exclusive lock on each buffer it will store the cycle ID into.  This
                               1939                 :  * ensures that even if a VACUUM starts immediately afterwards, it cannot
                               1940                 :  * process those pages until the page split is complete.
                               1941                 :  */
                               1942                 : BTCycleId
 6180 tgl                      1943 GIC       23479 : _bt_vacuum_cycleid(Relation rel)
 6180 tgl                      1944 ECB             : {
 6180 tgl                      1945 GIC       23479 :     BTCycleId   result = 0;
 6180 tgl                      1946 ECB             :     int         i;
                               1947                 : 
                               1948                 :     /* Share lock is enough since this is a read-only operation */
 6180 tgl                      1949 GIC       23479 :     LWLockAcquire(BtreeVacuumLock, LW_SHARED);
 6180 tgl                      1950 ECB             : 
 6180 tgl                      1951 GIC       23479 :     for (i = 0; i < btvacinfo->num_vacuums; i++)
 6180 tgl                      1952 ECB             :     {
 6180 tgl                      1953 UIC           0 :         BTOneVacInfo *vac = &btvacinfo->vacuums[i];
 6180 tgl                      1954 EUB             : 
 6180 tgl                      1955 UIC           0 :         if (vac->relid.relId == rel->rd_lockInfo.lockRelId.relId &&
 6180 tgl                      1956 UBC           0 :             vac->relid.dbId == rel->rd_lockInfo.lockRelId.dbId)
 6180 tgl                      1957 EUB             :         {
 6180 tgl                      1958 UIC           0 :             result = vac->cycleid;
 6180 tgl                      1959 UBC           0 :             break;
 6180 tgl                      1960 EUB             :         }
                               1961                 :     }
                               1962                 : 
 6180 tgl                      1963 GIC       23479 :     LWLockRelease(BtreeVacuumLock);
 6180 tgl                      1964 CBC       23479 :     return result;
 6180 tgl                      1965 ECB             : }
                               1966                 : 
                               1967                 : /*
                               1968                 :  * _bt_start_vacuum --- assign a cycle ID to a just-starting VACUUM operation
                               1969                 :  *
                               1970                 :  * Note: the caller must guarantee that it will eventually call
                               1971                 :  * _bt_end_vacuum, else we'll permanently leak an array slot.  To ensure
                               1972                 :  * that this happens even in elog(FATAL) scenarios, the appropriate coding
                               1973                 :  * is not just a PG_TRY, but
                               1974                 :  *      PG_ENSURE_ERROR_CLEANUP(_bt_end_vacuum_callback, PointerGetDatum(rel))
                               1975                 :  */
                               1976                 : BTCycleId
 6180 tgl                      1977 GIC        4011 : _bt_start_vacuum(Relation rel)
 6180 tgl                      1978 ECB             : {
                               1979                 :     BTCycleId   result;
                               1980                 :     int         i;
                               1981                 :     BTOneVacInfo *vac;
                               1982                 : 
 6180 tgl                      1983 GIC        4011 :     LWLockAcquire(BtreeVacuumLock, LW_EXCLUSIVE);
 6180 tgl                      1984 ECB             : 
                               1985                 :     /*
                               1986                 :      * Assign the next cycle ID, being careful to avoid zero as well as the
                               1987                 :      * reserved high values.
                               1988                 :      */
 5844 tgl                      1989 GIC        4011 :     result = ++(btvacinfo->cycle_ctr);
 5844 tgl                      1990 CBC        4011 :     if (result == 0 || result > MAX_BT_CYCLE_ID)
 5844 tgl                      1991 LBC           0 :         result = btvacinfo->cycle_ctr = 1;
 6180 tgl                      1992 EUB             : 
                               1993                 :     /* Let's just make sure there's no entry already for this index */
 6180 tgl                      1994 GIC        4012 :     for (i = 0; i < btvacinfo->num_vacuums; i++)
 6180 tgl                      1995 ECB             :     {
 6180 tgl                      1996 GIC           1 :         vac = &btvacinfo->vacuums[i];
 6180 tgl                      1997 CBC           1 :         if (vac->relid.relId == rel->rd_lockInfo.lockRelId.relId &&
 6180 tgl                      1998 LBC           0 :             vac->relid.dbId == rel->rd_lockInfo.lockRelId.dbId)
 5854 tgl                      1999 EUB             :         {
                               2000                 :             /*
                               2001                 :              * Unlike most places in the backend, we have to explicitly
                               2002                 :              * release our LWLock before throwing an error.  This is because
                               2003                 :              * we expect _bt_end_vacuum() to be called before transaction
                               2004                 :              * abort cleanup can run to release LWLocks.
                               2005                 :              */
 5854 tgl                      2006 UIC           0 :             LWLockRelease(BtreeVacuumLock);
 6180 tgl                      2007 UBC           0 :             elog(ERROR, "multiple active vacuums for index \"%s\"",
 6180 tgl                      2008 EUB             :                  RelationGetRelationName(rel));
                               2009                 :         }
                               2010                 :     }
                               2011                 : 
                               2012                 :     /* OK, add an entry */
 6180 tgl                      2013 GIC        4011 :     if (btvacinfo->num_vacuums >= btvacinfo->max_vacuums)
 5854 tgl                      2014 ECB             :     {
 5854 tgl                      2015 UIC           0 :         LWLockRelease(BtreeVacuumLock);
 6180 tgl                      2016 UBC           0 :         elog(ERROR, "out of btvacinfo slots");
 5854 tgl                      2017 EUB             :     }
 6180 tgl                      2018 GIC        4011 :     vac = &btvacinfo->vacuums[btvacinfo->num_vacuums];
 6180 tgl                      2019 CBC        4011 :     vac->relid = rel->rd_lockInfo.lockRelId;
                               2020            4011 :     vac->cycleid = result;
                               2021            4011 :     btvacinfo->num_vacuums++;
 6180 tgl                      2022 ECB             : 
 6180 tgl                      2023 GIC        4011 :     LWLockRelease(BtreeVacuumLock);
 6180 tgl                      2024 CBC        4011 :     return result;
 6180 tgl                      2025 ECB             : }
                               2026                 : 
                               2027                 : /*
                               2028                 :  * _bt_end_vacuum --- mark a btree VACUUM operation as done
                               2029                 :  *
                               2030                 :  * Note: this is deliberately coded not to complain if no entry is found;
                               2031                 :  * this allows the caller to put PG_TRY around the start_vacuum operation.
                               2032                 :  */
                               2033                 : void
 6180 tgl                      2034 GIC        4011 : _bt_end_vacuum(Relation rel)
 6180 tgl                      2035 ECB             : {
                               2036                 :     int         i;
                               2037                 : 
 6180 tgl                      2038 GIC        4011 :     LWLockAcquire(BtreeVacuumLock, LW_EXCLUSIVE);
 6180 tgl                      2039 ECB             : 
                               2040                 :     /* Find the array entry */
 6180 tgl                      2041 GIC        4011 :     for (i = 0; i < btvacinfo->num_vacuums; i++)
 6180 tgl                      2042 ECB             :     {
 6180 tgl                      2043 GIC        4011 :         BTOneVacInfo *vac = &btvacinfo->vacuums[i];
 6180 tgl                      2044 ECB             : 
 6180 tgl                      2045 GIC        4011 :         if (vac->relid.relId == rel->rd_lockInfo.lockRelId.relId &&
 6180 tgl                      2046 CBC        4011 :             vac->relid.dbId == rel->rd_lockInfo.lockRelId.dbId)
 6180 tgl                      2047 ECB             :         {
                               2048                 :             /* Remove it by shifting down the last entry */
 6180 tgl                      2049 GIC        4011 :             *vac = btvacinfo->vacuums[btvacinfo->num_vacuums - 1];
 6180 tgl                      2050 CBC        4011 :             btvacinfo->num_vacuums--;
                               2051            4011 :             break;
 6180 tgl                      2052 ECB             :         }
                               2053                 :     }
                               2054                 : 
 6180 tgl                      2055 GIC        4011 :     LWLockRelease(BtreeVacuumLock);
 6180 tgl                      2056 CBC        4011 : }
 6180 tgl                      2057 ECB             : 
                               2058                 : /*
                               2059                 :  * _bt_end_vacuum wrapped as an on_shmem_exit callback function
                               2060                 :  */
                               2061                 : void
 5471 tgl                      2062 UIC           0 : _bt_end_vacuum_callback(int code, Datum arg)
 5471 tgl                      2063 EUB             : {
 5471 tgl                      2064 UIC           0 :     _bt_end_vacuum((Relation) DatumGetPointer(arg));
 5471 tgl                      2065 UBC           0 : }
 5471 tgl                      2066 EUB             : 
                               2067                 : /*
                               2068                 :  * BTreeShmemSize --- report amount of shared memory space needed
                               2069                 :  */
                               2070                 : Size
 6180 tgl                      2071 GIC        4564 : BTreeShmemSize(void)
 6180 tgl                      2072 ECB             : {
                               2073                 :     Size        size;
                               2074                 : 
 2970 tgl                      2075 GIC        4564 :     size = offsetof(BTVacInfo, vacuums);
  362 rhaas                    2076 CBC        4564 :     size = add_size(size, mul_size(MaxBackends, sizeof(BTOneVacInfo)));
 6180 tgl                      2077            4564 :     return size;
 6180 tgl                      2078 ECB             : }
                               2079                 : 
                               2080                 : /*
                               2081                 :  * BTreeShmemInit --- initialize this module's shared memory
                               2082                 :  */
                               2083                 : void
 6180 tgl                      2084 GIC        1826 : BTreeShmemInit(void)
 6180 tgl                      2085 ECB             : {
                               2086                 :     bool        found;
                               2087                 : 
 6180 tgl                      2088 GIC        1826 :     btvacinfo = (BTVacInfo *) ShmemInitStruct("BTree Vacuum State",
 6180 tgl                      2089 ECB             :                                               BTreeShmemSize(),
                               2090                 :                                               &found);
                               2091                 : 
 6180 tgl                      2092 GIC        1826 :     if (!IsUnderPostmaster)
 6180 tgl                      2093 ECB             :     {
                               2094                 :         /* Initialize shared memory area */
 6180 tgl                      2095 GIC        1826 :         Assert(!found);
 6180 tgl                      2096 ECB             : 
                               2097                 :         /*
                               2098                 :          * It doesn't really matter what the cycle counter starts at, but
                               2099                 :          * having it always start the same doesn't seem good.  Seed with
                               2100                 :          * low-order bits of time() instead.
                               2101                 :          */
 6180 tgl                      2102 GIC        1826 :         btvacinfo->cycle_ctr = (BTCycleId) time(NULL);
 6180 tgl                      2103 ECB             : 
 6180 tgl                      2104 GIC        1826 :         btvacinfo->num_vacuums = 0;
  362 rhaas                    2105 CBC        1826 :         btvacinfo->max_vacuums = MaxBackends;
 6180 tgl                      2106 ECB             :     }
                               2107                 :     else
 6180 tgl                      2108 UIC           0 :         Assert(found);
 6180 tgl                      2109 GBC        1826 : }
 6125 bruce                    2110 ECB             : 
                               2111                 : bytea *
 2639 tgl                      2112 GIC         115 : btoptions(Datum reloptions, bool validate)
 6125 bruce                    2113 ECB             : {
                               2114                 :     static const relopt_parse_elt tab[] = {
                               2115                 :         {"fillfactor", RELOPT_TYPE_INT, offsetof(BTOptions, fillfactor)},
                               2116                 :         {"vacuum_cleanup_index_scale_factor", RELOPT_TYPE_REAL,
                               2117                 :         offsetof(BTOptions, vacuum_cleanup_index_scale_factor)},
                               2118                 :         {"deduplicate_items", RELOPT_TYPE_BOOL,
                               2119                 :         offsetof(BTOptions, deduplicate_items)}
                               2120                 :     };
                               2121                 : 
 1231 michael                  2122 GIC         115 :     return (bytea *) build_reloptions(reloptions, validate,
 1231 michael                  2123 ECB             :                                       RELOPT_KIND_BTREE,
                               2124                 :                                       sizeof(BTOptions),
                               2125                 :                                       tab, lengthof(tab));
                               2126                 : }
                               2127                 : 
                               2128                 : /*
                               2129                 :  *  btproperty() -- Check boolean properties of indexes.
                               2130                 :  *
                               2131                 :  * This is optional, but handling AMPROP_RETURNABLE here saves opening the rel
                               2132                 :  * to call btcanreturn.
                               2133                 :  */
                               2134                 : bool
 2430 tgl                      2135 GIC         378 : btproperty(Oid index_oid, int attno,
 2430 tgl                      2136 ECB             :            IndexAMProperty prop, const char *propname,
                               2137                 :            bool *res, bool *isnull)
                               2138                 : {
 2430 tgl                      2139 GIC         378 :     switch (prop)
 2430 tgl                      2140 ECB             :     {
 2430 tgl                      2141 GIC          21 :         case AMPROP_RETURNABLE:
 2430 tgl                      2142 ECB             :             /* answer only for columns, not AM or whole index */
 2430 tgl                      2143 GIC          21 :             if (attno == 0)
 2430 tgl                      2144 CBC           6 :                 return false;
 2430 tgl                      2145 ECB             :             /* otherwise, btree can always return data */
 2430 tgl                      2146 GIC          15 :             *res = true;
 2430 tgl                      2147 CBC          15 :             return true;
 2430 tgl                      2148 ECB             : 
 2430 tgl                      2149 GIC         357 :         default:
 2430 tgl                      2150 CBC         357 :             return false;       /* punt to generic code */
 2430 tgl                      2151 ECB             :     }
                               2152                 : }
                               2153                 : 
                               2154                 : /*
                               2155                 :  *  btbuildphasename() -- Return name of index build phase.
                               2156                 :  */
                               2157                 : char *
 1468 alvherre                 2158 UIC           0 : btbuildphasename(int64 phasenum)
 1468 alvherre                 2159 EUB             : {
 1468 alvherre                 2160 UIC           0 :     switch (phasenum)
 1468 alvherre                 2161 EUB             :     {
 1468 alvherre                 2162 UIC           0 :         case PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE:
 1468 alvherre                 2163 UBC           0 :             return "initializing";
                               2164               0 :         case PROGRESS_BTREE_PHASE_INDEXBUILD_TABLESCAN:
                               2165               0 :             return "scanning table";
                               2166               0 :         case PROGRESS_BTREE_PHASE_PERFORMSORT_1:
                               2167               0 :             return "sorting live tuples";
                               2168               0 :         case PROGRESS_BTREE_PHASE_PERFORMSORT_2:
                               2169               0 :             return "sorting dead tuples";
                               2170               0 :         case PROGRESS_BTREE_PHASE_LEAF_LOAD:
                               2171               0 :             return "loading tuples in tree";
                               2172               0 :         default:
                               2173               0 :             return NULL;
 1468 alvherre                 2174 EUB             :     }
                               2175                 : }
                               2176                 : 
                               2177                 : /*
                               2178                 :  *  _bt_truncate() -- create tuple without unneeded suffix attributes.
                               2179                 :  *
                               2180                 :  * Returns truncated pivot index tuple allocated in caller's memory context,
                               2181                 :  * with key attributes copied from caller's firstright argument.  If rel is
                               2182                 :  * an INCLUDE index, non-key attributes will definitely be truncated away,
                               2183                 :  * since they're not part of the key space.  More aggressive suffix
                               2184                 :  * truncation can take place when it's clear that the returned tuple does not
                               2185                 :  * need one or more suffix key attributes.  We only need to keep firstright
                               2186                 :  * attributes up to and including the first non-lastleft-equal attribute.
                               2187                 :  * Caller's insertion scankey is used to compare the tuples; the scankey's
                               2188                 :  * argument values are not considered here.
                               2189                 :  *
                               2190                 :  * Note that returned tuple's t_tid offset will hold the number of attributes
                               2191                 :  * present, so the original item pointer offset is not represented.  Caller
                               2192                 :  * should only change truncated tuple's downlink.  Note also that truncated
                               2193                 :  * key attributes are treated as containing "minus infinity" values by
                               2194                 :  * _bt_compare().
                               2195                 :  *
                               2196                 :  * In the worst case (when a heap TID must be appended to distinguish lastleft
                               2197                 :  * from firstright), the size of the returned tuple is the size of firstright
                               2198                 :  * plus the size of an additional MAXALIGN()'d item pointer.  This guarantee
                               2199                 :  * is important, since callers need to stay under the 1/3 of a page
                               2200                 :  * restriction on tuple size.  If this routine is ever taught to truncate
                               2201                 :  * within an attribute/datum, it will need to avoid returning an enlarged
                               2202                 :  * tuple to caller when truncation + TOAST compression ends up enlarging the
                               2203                 :  * final datum.
                               2204                 :  */
                               2205                 : IndexTuple
 1481 pg                       2206 GIC       62102 : _bt_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright,
 1481 pg                       2207 ECB             :              BTScanInsert itup_key)
                               2208                 : {
 1481 pg                       2209 GIC       62102 :     TupleDesc   itupdesc = RelationGetDescr(rel);
 1481 pg                       2210 CBC       62102 :     int16       nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
 1481 pg                       2211 ECB             :     int         keepnatts;
                               2212                 :     IndexTuple  pivot;
                               2213                 :     IndexTuple  tidpivot;
                               2214                 :     ItemPointer pivotheaptid;
                               2215                 :     Size        newsize;
                               2216                 : 
                               2217                 :     /*
                               2218                 :      * We should only ever truncate non-pivot tuples from leaf pages.  It's
                               2219                 :      * never okay to truncate when splitting an internal page.
                               2220                 :      */
 1138 pg                       2221 GIC       62102 :     Assert(!BTreeTupleIsPivot(lastleft) && !BTreeTupleIsPivot(firstright));
 1481 pg                       2222 ECB             : 
                               2223                 :     /* Determine how many attributes must be kept in truncated tuple */
 1481 pg                       2224 GIC       62102 :     keepnatts = _bt_keep_natts(rel, lastleft, firstright, itup_key);
 1481 pg                       2225 ECB             : 
                               2226                 : #ifdef DEBUG_NO_TRUNCATE
                               2227                 :     /* Force truncation to be ineffective for testing purposes */
                               2228                 :     keepnatts = nkeyatts + 1;
                               2229                 : #endif
                               2230                 : 
 1105 pg                       2231 GIC       62102 :     pivot = index_truncate_tuple(itupdesc, firstright,
 1105 pg                       2232 ECB             :                                  Min(keepnatts, nkeyatts));
                               2233                 : 
 1105 pg                       2234 GIC       62102 :     if (BTreeTupleIsPosting(pivot))
 1481 pg                       2235 ECB             :     {
                               2236                 :         /*
                               2237                 :          * index_truncate_tuple() just returns a straight copy of firstright
                               2238                 :          * when it has no attributes to truncate.  When that happens, we may
                               2239                 :          * need to truncate away a posting list here instead.
                               2240                 :          */
 1105 pg                       2241 GIC         880 :         Assert(keepnatts == nkeyatts || keepnatts == nkeyatts + 1);
 1105 pg                       2242 CBC         880 :         Assert(IndexRelationGetNumberOfAttributes(rel) == nkeyatts);
                               2243             880 :         pivot->t_info &= ~INDEX_SIZE_MASK;
                               2244             880 :         pivot->t_info |= MAXALIGN(BTreeTupleGetPostingOffset(firstright));
 1105 pg                       2245 ECB             :     }
                               2246                 : 
                               2247                 :     /*
                               2248                 :      * If there is a distinguishing key attribute within pivot tuple, we're
                               2249                 :      * done
                               2250                 :      */
 1105 pg                       2251 GIC       62102 :     if (keepnatts <= nkeyatts)
 1105 pg                       2252 ECB             :     {
 1097 pg                       2253 GIC       61586 :         BTreeTupleSetNAtts(pivot, keepnatts, false);
 1105 pg                       2254 CBC       61586 :         return pivot;
 1481 pg                       2255 ECB             :     }
                               2256                 : 
                               2257                 :     /*
                               2258                 :      * We have to store a heap TID in the new pivot tuple, since no non-TID
                               2259                 :      * key attribute value in firstright distinguishes the right side of the
                               2260                 :      * split from the left side.  nbtree conceptualizes this case as an
                               2261                 :      * inability to truncate away any key attributes, since heap TID is
                               2262                 :      * treated as just another key attribute (despite lacking a pg_attribute
                               2263                 :      * entry).
                               2264                 :      *
                               2265                 :      * Use enlarged space that holds a copy of pivot.  We need the extra space
                               2266                 :      * to store a heap TID at the end (using the special pivot tuple
                               2267                 :      * representation).  Note that the original pivot already has firstright's
                               2268                 :      * possible posting list/non-key attribute values removed at this point.
                               2269                 :      */
 1105 pg                       2270 GIC         516 :     newsize = MAXALIGN(IndexTupleSize(pivot)) + MAXALIGN(sizeof(ItemPointerData));
 1105 pg                       2271 CBC         516 :     tidpivot = palloc0(newsize);
                               2272             516 :     memcpy(tidpivot, pivot, MAXALIGN(IndexTupleSize(pivot)));
 1105 pg                       2273 ECB             :     /* Cannot leak memory here */
 1105 pg                       2274 GIC         516 :     pfree(pivot);
 1105 pg                       2275 ECB             : 
                               2276                 :     /*
                               2277                 :      * Store all of firstright's key attribute values plus a tiebreaker heap
                               2278                 :      * TID value in enlarged pivot tuple
                               2279                 :      */
 1105 pg                       2280 GIC         516 :     tidpivot->t_info &= ~INDEX_SIZE_MASK;
 1105 pg                       2281 CBC         516 :     tidpivot->t_info |= newsize;
 1097                          2282             516 :     BTreeTupleSetNAtts(tidpivot, nkeyatts, true);
 1105                          2283             516 :     pivotheaptid = BTreeTupleGetHeapTID(tidpivot);
 1481 pg                       2284 ECB             : 
                               2285                 :     /*
                               2286                 :      * Lehman & Yao use lastleft as the leaf high key in all cases, but don't
                               2287                 :      * consider suffix truncation.  It seems like a good idea to follow that
                               2288                 :      * example in cases where no truncation takes place -- use lastleft's heap
                               2289                 :      * TID.  (This is also the closest value to negative infinity that's
                               2290                 :      * legally usable.)
                               2291                 :      */
 1138 pg                       2292 GIC         516 :     ItemPointerCopy(BTreeTupleGetMaxHeapTID(lastleft), pivotheaptid);
 1481 pg                       2293 ECB             : 
                               2294                 :     /*
                               2295                 :      * We're done.  Assert() that heap TID invariants hold before returning.
                               2296                 :      *
                               2297                 :      * Lehman and Yao require that the downlink to the right page, which is to
                               2298                 :      * be inserted into the parent page in the second phase of a page split be
                               2299                 :      * a strict lower bound on items on the right page, and a non-strict upper
                               2300                 :      * bound for items on the left page.  Assert that heap TIDs follow these
                               2301                 :      * invariants, since a heap TID value is apparently needed as a
                               2302                 :      * tiebreaker.
                               2303                 :      */
                               2304                 : #ifndef DEBUG_NO_TRUNCATE
 1138 pg                       2305 GIC         516 :     Assert(ItemPointerCompare(BTreeTupleGetMaxHeapTID(lastleft),
 1138 pg                       2306 ECB             :                               BTreeTupleGetHeapTID(firstright)) < 0);
 1138 pg                       2307 GIC         516 :     Assert(ItemPointerCompare(pivotheaptid,
 1138 pg                       2308 ECB             :                               BTreeTupleGetHeapTID(lastleft)) >= 0);
 1138 pg                       2309 GIC         516 :     Assert(ItemPointerCompare(pivotheaptid,
 1138 pg                       2310 ECB             :                               BTreeTupleGetHeapTID(firstright)) < 0);
                               2311                 : #else
                               2312                 : 
                               2313                 :     /*
                               2314                 :      * Those invariants aren't guaranteed to hold for lastleft + firstright
                               2315                 :      * heap TID attribute values when they're considered here only because
                               2316                 :      * DEBUG_NO_TRUNCATE is defined (a heap TID is probably not actually
                               2317                 :      * needed as a tiebreaker).  DEBUG_NO_TRUNCATE must therefore use a heap
                               2318                 :      * TID value that always works as a strict lower bound for items to the
                               2319                 :      * right.  In particular, it must avoid using firstright's leading key
                               2320                 :      * attribute values along with lastleft's heap TID value when lastleft's
                               2321                 :      * TID happens to be greater than firstright's TID.
                               2322                 :      */
                               2323                 :     ItemPointerCopy(BTreeTupleGetHeapTID(firstright), pivotheaptid);
                               2324                 : 
                               2325                 :     /*
                               2326                 :      * Pivot heap TID should never be fully equal to firstright.  Note that
                               2327                 :      * the pivot heap TID will still end up equal to lastleft's heap TID when
                               2328                 :      * that's the only usable value.
                               2329                 :      */
                               2330                 :     ItemPointerSetOffsetNumber(pivotheaptid,
                               2331                 :                                OffsetNumberPrev(ItemPointerGetOffsetNumber(pivotheaptid)));
                               2332                 :     Assert(ItemPointerCompare(pivotheaptid,
                               2333                 :                               BTreeTupleGetHeapTID(firstright)) < 0);
                               2334                 : #endif
                               2335                 : 
 1105 pg                       2336 GIC         516 :     return tidpivot;
 1481 pg                       2337 ECB             : }
                               2338                 : 
                               2339                 : /*
                               2340                 :  * _bt_keep_natts - how many key attributes to keep when truncating.
                               2341                 :  *
                               2342                 :  * Caller provides two tuples that enclose a split point.  Caller's insertion
                               2343                 :  * scankey is used to compare the tuples; the scankey's argument values are
                               2344                 :  * not considered here.
                               2345                 :  *
                               2346                 :  * This can return a number of attributes that is one greater than the
                               2347                 :  * number of key attributes for the index relation.  This indicates that the
                               2348                 :  * caller must use a heap TID as a unique-ifier in new pivot tuple.
                               2349                 :  */
                               2350                 : static int
 1481 pg                       2351 GIC       62102 : _bt_keep_natts(Relation rel, IndexTuple lastleft, IndexTuple firstright,
 1481 pg                       2352 ECB             :                BTScanInsert itup_key)
                               2353                 : {
 1481 pg                       2354 GIC       62102 :     int         nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
 1481 pg                       2355 CBC       62102 :     TupleDesc   itupdesc = RelationGetDescr(rel);
 1481 pg                       2356 ECB             :     int         keepnatts;
                               2357                 :     ScanKey     scankey;
                               2358                 : 
                               2359                 :     /*
                               2360                 :      * _bt_compare() treats truncated key attributes as having the value minus
                               2361                 :      * infinity, which would break searches within !heapkeyspace indexes.  We
                               2362                 :      * must still truncate away non-key attribute values, though.
                               2363                 :      */
 1481 pg                       2364 GIC       62102 :     if (!itup_key->heapkeyspace)
 1481 pg                       2365 LBC           0 :         return nkeyatts;
 1481 pg                       2366 EUB             : 
 1481 pg                       2367 GIC       62102 :     scankey = itup_key->scankeys;
 1481 pg                       2368 CBC       62102 :     keepnatts = 1;
                               2369           78658 :     for (int attnum = 1; attnum <= nkeyatts; attnum++, scankey++)
 1481 pg                       2370 ECB             :     {
                               2371                 :         Datum       datum1,
                               2372                 :                     datum2;
                               2373                 :         bool        isNull1,
                               2374                 :                     isNull2;
                               2375                 : 
 1481 pg                       2376 GIC       78142 :         datum1 = index_getattr(lastleft, attnum, itupdesc, &isNull1);
 1481 pg                       2377 CBC       78142 :         datum2 = index_getattr(firstright, attnum, itupdesc, &isNull2);
 1481 pg                       2378 ECB             : 
 1481 pg                       2379 GIC       78142 :         if (isNull1 != isNull2)
 1481 pg                       2380 CBC       61586 :             break;
 1481 pg                       2381 ECB             : 
 1481 pg                       2382 GIC      156284 :         if (!isNull1 &&
 1481 pg                       2383 CBC       78142 :             DatumGetInt32(FunctionCall2Coll(&scankey->sk_func,
 1481 pg                       2384 ECB             :                                             scankey->sk_collation,
                               2385                 :                                             datum1,
                               2386                 :                                             datum2)) != 0)
 1481 pg                       2387 GIC       61586 :             break;
 1481 pg                       2388 ECB             : 
 1481 pg                       2389 GIC       16556 :         keepnatts++;
 1481 pg                       2390 ECB             :     }
                               2391                 : 
                               2392                 :     /*
                               2393                 :      * Assert that _bt_keep_natts_fast() agrees with us in passing.  This is
                               2394                 :      * expected in an allequalimage index.
                               2395                 :      */
 1138 pg                       2396 GIC       62102 :     Assert(!itup_key->allequalimage ||
 1138 pg                       2397 ECB             :            keepnatts == _bt_keep_natts_fast(rel, lastleft, firstright));
                               2398                 : 
 1481 pg                       2399 GIC       62102 :     return keepnatts;
 1481 pg                       2400 ECB             : }
                               2401                 : 
                               2402                 : /*
                               2403                 :  * _bt_keep_natts_fast - fast bitwise variant of _bt_keep_natts.
                               2404                 :  *
                               2405                 :  * This is exported so that a candidate split point can have its effect on
                               2406                 :  * suffix truncation inexpensively evaluated ahead of time when finding a
                               2407                 :  * split location.  A naive bitwise approach to datum comparisons is used to
                               2408                 :  * save cycles.
                               2409                 :  *
                               2410                 :  * The approach taken here usually provides the same answer as _bt_keep_natts
                               2411                 :  * will (for the same pair of tuples from a heapkeyspace index), since the
                               2412                 :  * majority of btree opclasses can never indicate that two datums are equal
                               2413                 :  * unless they're bitwise equal after detoasting.  When an index only has
                               2414                 :  * "equal image" columns, routine is guaranteed to give the same result as
                               2415                 :  * _bt_keep_natts would.
                               2416                 :  *
                               2417                 :  * Callers can rely on the fact that attributes considered equal here are
                               2418                 :  * definitely also equal according to _bt_keep_natts, even when the index uses
                               2419                 :  * an opclass or collation that is not "allequalimage"/deduplication-safe.
                               2420                 :  * This weaker guarantee is good enough for nbtsplitloc.c caller, since false
                               2421                 :  * negatives generally only have the effect of making leaf page splits use a
                               2422                 :  * more balanced split point.
                               2423                 :  */
                               2424                 : int
 1481 pg                       2425 GIC    10694770 : _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright)
 1481 pg                       2426 ECB             : {
 1481 pg                       2427 GIC    10694770 :     TupleDesc   itupdesc = RelationGetDescr(rel);
 1481 pg                       2428 CBC    10694770 :     int         keysz = IndexRelationGetNumberOfKeyAttributes(rel);
 1481 pg                       2429 ECB             :     int         keepnatts;
                               2430                 : 
 1481 pg                       2431 GIC    10694770 :     keepnatts = 1;
 1481 pg                       2432 CBC    19967380 :     for (int attnum = 1; attnum <= keysz; attnum++)
 1481 pg                       2433 ECB             :     {
                               2434                 :         Datum       datum1,
                               2435                 :                     datum2;
                               2436                 :         bool        isNull1,
                               2437                 :                     isNull2;
                               2438                 :         Form_pg_attribute att;
                               2439                 : 
 1481 pg                       2440 GIC    18295223 :         datum1 = index_getattr(lastleft, attnum, itupdesc, &isNull1);
 1481 pg                       2441 CBC    18295223 :         datum2 = index_getattr(firstright, attnum, itupdesc, &isNull2);
                               2442        18295223 :         att = TupleDescAttr(itupdesc, attnum - 1);
 1481 pg                       2443 ECB             : 
 1481 pg                       2444 GIC    18295223 :         if (isNull1 != isNull2)
 1481 pg                       2445 CBC     9022613 :             break;
 1481 pg                       2446 ECB             : 
 1481 pg                       2447 GIC    18295151 :         if (!isNull1 &&
 1244 pg                       2448 CBC    18292025 :             !datum_image_eq(datum1, datum2, att->attbyval, att->attlen))
 1481                          2449         9022541 :             break;
 1481 pg                       2450 ECB             : 
 1481 pg                       2451 GIC     9272610 :         keepnatts++;
 1481 pg                       2452 ECB             :     }
                               2453                 : 
 1481 pg                       2454 GIC    10694770 :     return keepnatts;
 1816 teodor                   2455 ECB             : }
                               2456                 : 
                               2457                 : /*
                               2458                 :  *  _bt_check_natts() -- Verify tuple has expected number of attributes.
                               2459                 :  *
                               2460                 :  * Returns value indicating if the expected number of attributes were found
                               2461                 :  * for a particular offset on page.  This can be used as a general purpose
                               2462                 :  * sanity check.
                               2463                 :  *
                               2464                 :  * Testing a tuple directly with BTreeTupleGetNAtts() should generally be
                               2465                 :  * preferred to calling here.  That's usually more convenient, and is always
                               2466                 :  * more explicit.  Call here instead when offnum's tuple may be a negative
                               2467                 :  * infinity tuple that uses the pre-v11 on-disk representation, or when a low
                               2468                 :  * context check is appropriate.  This routine is as strict as possible about
                               2469                 :  * what is expected on each version of btree.
                               2470                 :  */
                               2471                 : bool
 1481 pg                       2472 GIC   179023210 : _bt_check_natts(Relation rel, bool heapkeyspace, Page page, OffsetNumber offnum)
 1816 teodor                   2473 ECB             : {
 1809 tgl                      2474 GIC   179023210 :     int16       natts = IndexRelationGetNumberOfAttributes(rel);
 1809 tgl                      2475 CBC   179023210 :     int16       nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
  373 michael                  2476       179023210 :     BTPageOpaque opaque = BTPageGetOpaque(page);
 1809 tgl                      2477 ECB             :     IndexTuple  itup;
                               2478                 :     int         tupnatts;
                               2479                 : 
                               2480                 :     /*
                               2481                 :      * We cannot reliably test a deleted or half-dead page, since they have
                               2482                 :      * dummy high keys
                               2483                 :      */
 1816 teodor                   2484 GIC   179023210 :     if (P_IGNORE(opaque))
 1816 teodor                   2485 LBC           0 :         return true;
 1816 teodor                   2486 EUB             : 
 1816 teodor                   2487 GIC   179023210 :     Assert(offnum >= FirstOffsetNumber &&
 1816 teodor                   2488 ECB             :            offnum <= PageGetMaxOffsetNumber(page));
                               2489                 : 
 1816 teodor                   2490 GIC   179023210 :     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
 1481 pg                       2491       179023210 :     tupnatts = BTreeTupleGetNAtts(itup, rel);
 1816 teodor                   2492 ECB             : 
 1138 pg                       2493                 :     /* !heapkeyspace indexes do not support deduplication */
 1138 pg                       2494 GIC   179023210 :     if (!heapkeyspace && BTreeTupleIsPosting(itup))
 1138 pg                       2495 UBC           0 :         return false;
                               2496                 : 
                               2497                 :     /* Posting list tuples should never have "pivot heap TID" bit set */
 1138 pg                       2498 CBC   179023210 :     if (BTreeTupleIsPosting(itup) &&
 1138 pg                       2499 GBC     2465578 :         (ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) &
                               2500                 :          BT_PIVOT_HEAP_TID_ATTR) != 0)
 1138 pg                       2501 LBC           0 :         return false;
                               2502                 : 
 1138 pg                       2503 ECB             :     /* INCLUDE indexes do not support deduplication */
 1138 pg                       2504 GIC   179023210 :     if (natts != nkeyatts && BTreeTupleIsPosting(itup))
 1138 pg                       2505 UIC           0 :         return false;
                               2506                 : 
 1816 teodor                   2507 GIC   179023210 :     if (P_ISLEAF(opaque))
                               2508                 :     {
 1816 teodor                   2509 CBC   136971736 :         if (offnum >= P_FIRSTDATAKEY(opaque))
 1816 teodor                   2510 EUB             :         {
                               2511                 :             /*
                               2512                 :              * Non-pivot tuple should never be explicitly marked as a pivot
                               2513                 :              * tuple
                               2514                 :              */
 1138 pg                       2515 GIC   127086211 :             if (BTreeTupleIsPivot(itup))
 1481 pg                       2516 UIC           0 :                 return false;
                               2517                 : 
 1816 teodor                   2518 ECB             :             /*
                               2519                 :              * Leaf tuples that are not the page high key (non-pivot tuples)
                               2520                 :              * should never be truncated.  (Note that tupnatts must have been
                               2521                 :              * inferred, even with a posting list tuple, because only pivot
                               2522                 :              * tuples store tupnatts directly.)
                               2523                 :              */
 1481 pg                       2524 GIC   127086211 :             return tupnatts == natts;
                               2525                 :         }
 1816 teodor                   2526 ECB             :         else
                               2527                 :         {
                               2528                 :             /*
                               2529                 :              * Rightmost page doesn't contain a page high key, so tuple was
                               2530                 :              * checked above as ordinary leaf tuple
                               2531                 :              */
 1816 teodor                   2532 GIC     9885525 :             Assert(!P_RIGHTMOST(opaque));
 1816 teodor                   2533 ECB             : 
 1481 pg                       2534 EUB             :             /*
                               2535                 :              * !heapkeyspace high key tuple contains only key attributes. Note
                               2536                 :              * that tupnatts will only have been explicitly represented in
                               2537                 :              * !heapkeyspace indexes that happen to have non-key attributes.
                               2538                 :              */
 1481 pg                       2539 GIC     9885525 :             if (!heapkeyspace)
 1481 pg                       2540 UIC           0 :                 return tupnatts == nkeyatts;
 1481 pg                       2541 ECB             : 
                               2542                 :             /* Use generic heapkeyspace pivot tuple handling */
                               2543                 :         }
                               2544                 :     }
                               2545                 :     else                        /* !P_ISLEAF(opaque) */
                               2546                 :     {
 1816 teodor                   2547 GIC    42051474 :         if (offnum == P_FIRSTDATAKEY(opaque))
                               2548                 :         {
 1816 teodor                   2549 ECB             :             /*
                               2550                 :              * The first tuple on any internal page (possibly the first after
                               2551                 :              * its high key) is its negative infinity tuple.  Negative
                               2552                 :              * infinity tuples are always truncated to zero attributes.  They
                               2553                 :              * are a particular kind of pivot tuple.
                               2554                 :              */
 1481 pg                       2555 GIC     1890395 :             if (heapkeyspace)
                               2556         1890395 :                 return tupnatts == 0;
                               2557                 : 
                               2558                 :             /*
                               2559                 :              * The number of attributes won't be explicitly represented if the
                               2560                 :              * negative infinity tuple was generated during a page split that
                               2561                 :              * occurred with a version of Postgres before v11.  There must be
                               2562                 :              * a problem when there is an explicit representation that is
                               2563                 :              * non-zero, or when there is no explicit representation and the
 1816 teodor                   2564 EUB             :              * tuple is evidently not a pre-pg_upgrade tuple.
                               2565                 :              *
                               2566                 :              * Prior to v11, downlinks always had P_HIKEY as their offset.
                               2567                 :              * Accept that as an alternative indication of a valid
                               2568                 :              * !heapkeyspace negative infinity tuple.
                               2569                 :              */
 1481 pg                       2570 UIC           0 :             return tupnatts == 0 ||
 1138                          2571               0 :                 ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY;
                               2572                 :         }
                               2573                 :         else
                               2574                 :         {
 1816 teodor                   2575 ECB             :             /*
 1481 pg                       2576 EUB             :              * !heapkeyspace downlink tuple with separator key contains only
                               2577                 :              * key attributes.  Note that tupnatts will only have been
                               2578                 :              * explicitly represented in !heapkeyspace indexes that happen to
                               2579                 :              * have non-key attributes.
                               2580                 :              */
 1481 pg                       2581 GIC    40161079 :             if (!heapkeyspace)
 1481 pg                       2582 UIC           0 :                 return tupnatts == nkeyatts;
 1481 pg                       2583 ECB             : 
                               2584                 :             /* Use generic heapkeyspace pivot tuple handling */
                               2585                 :         }
                               2586                 :     }
                               2587                 : 
                               2588                 :     /* Handle heapkeyspace pivot tuples (excluding minus infinity items) */
 1481 pg                       2589 GIC    50046604 :     Assert(heapkeyspace);
 1481 pg                       2590 ECB             : 
 1481 pg                       2591 EUB             :     /*
                               2592                 :      * Explicit representation of the number of attributes is mandatory with
                               2593                 :      * heapkeyspace index pivot tuples, regardless of whether or not there are
 1481 pg                       2594 ECB             :      * non-key attributes.
 1481 pg                       2595 EUB             :      */
 1138 pg                       2596 GIC    50046604 :     if (!BTreeTupleIsPivot(itup))
 1138 pg                       2597 UIC           0 :         return false;
                               2598                 : 
                               2599                 :     /* Pivot tuple should not use posting list representation (redundant) */
 1138 pg                       2600 GIC    50046604 :     if (BTreeTupleIsPosting(itup))
 1481 pg                       2601 LBC           0 :         return false;
 1481 pg                       2602 EUB             : 
                               2603                 :     /*
                               2604                 :      * Heap TID is a tiebreaker key attribute, so it cannot be untruncated
                               2605                 :      * when any other key attribute is truncated
                               2606                 :      */
 1481 pg                       2607 GIC    50046604 :     if (BTreeTupleGetHeapTID(itup) != NULL && tupnatts != nkeyatts)
 1481 pg                       2608 UIC           0 :         return false;
                               2609                 : 
 1481 pg                       2610 ECB             :     /*
                               2611                 :      * Pivot tuple must have at least one untruncated key attribute (minus
                               2612                 :      * infinity pivot tuples are the only exception).  Pivot tuples can never
                               2613                 :      * represent that there is a value present for a key attribute that
                               2614                 :      * exceeds pg_index.indnkeyatts for the index.
                               2615                 :      */
 1481 pg                       2616 GIC    50046604 :     return tupnatts > 0 && tupnatts <= nkeyatts;
                               2617                 : }
                               2618                 : 
                               2619                 : /*
                               2620                 :  *
                               2621                 :  *  _bt_check_third_page() -- check whether tuple fits on a btree page at all.
                               2622                 :  *
                               2623                 :  * We actually need to be able to fit three items on every page, so restrict
                               2624                 :  * any one item to 1/3 the per-page available space.  Note that itemsz should
                               2625                 :  * not include the ItemId overhead.
 1481 pg                       2626 ECB             :  *
                               2627                 :  * It might be useful to apply TOAST methods rather than throw an error here.
                               2628                 :  * Using out of line storage would break assumptions made by suffix truncation
                               2629                 :  * and by contrib/amcheck, though.
                               2630                 :  */
                               2631                 : void
 1481 pg                       2632 CBC         132 : _bt_check_third_page(Relation rel, Relation heap, bool needheaptidspace,
                               2633                 :                      Page page, IndexTuple newtup)
                               2634                 : {
 1481 pg                       2635 ECB             :     Size        itemsz;
 1481 pg                       2636 EUB             :     BTPageOpaque opaque;
                               2637                 : 
 1481 pg                       2638 GIC         132 :     itemsz = MAXALIGN(IndexTupleSize(newtup));
                               2639                 : 
                               2640                 :     /* Double check item size against limit */
                               2641             132 :     if (itemsz <= BTMaxItemSize(page))
 1481 pg                       2642 UIC           0 :         return;
 1481 pg                       2643 ECB             : 
                               2644                 :     /*
                               2645                 :      * Tuple is probably too large to fit on page, but it's possible that the
                               2646                 :      * index uses version 2 or version 3, or that page is an internal page, in
                               2647                 :      * which case a slightly higher limit applies.
                               2648                 :      */
 1481 pg                       2649 GIC         132 :     if (!needheaptidspace && itemsz <= BTMaxItemSizeNoHeapTid(page))
 1481 pg                       2650 GBC         132 :         return;
 1481 pg                       2651 EUB             : 
                               2652                 :     /*
                               2653                 :      * Internal page insertions cannot fail here, because that would mean that
                               2654                 :      * an earlier leaf level insertion that should have failed didn't
                               2655                 :      */
  373 michael                  2656 UIC           0 :     opaque = BTPageGetOpaque(page);
 1481 pg                       2657               0 :     if (!P_ISLEAF(opaque))
                               2658               0 :         elog(ERROR, "cannot insert oversized tuple of size %zu on internal page of index \"%s\"",
                               2659                 :              itemsz, RelationGetRelationName(rel));
                               2660                 : 
                               2661               0 :     ereport(ERROR,
                               2662                 :             (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                               2663                 :              errmsg("index row size %zu exceeds btree version %u maximum %zu for index \"%s\"",
                               2664                 :                     itemsz,
                               2665                 :                     needheaptidspace ? BTREE_VERSION : BTREE_NOVAC_VERSION,
                               2666                 :                     needheaptidspace ? BTMaxItemSize(page) :
                               2667                 :                     BTMaxItemSizeNoHeapTid(page),
                               2668                 :                     RelationGetRelationName(rel)),
                               2669                 :              errdetail("Index row references tuple (%u,%u) in relation \"%s\".",
                               2670                 :                        ItemPointerGetBlockNumber(BTreeTupleGetHeapTID(newtup)),
                               2671                 :                        ItemPointerGetOffsetNumber(BTreeTupleGetHeapTID(newtup)),
                               2672                 :                        RelationGetRelationName(heap)),
                               2673                 :              errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n"
                               2674                 :                      "Consider a function index of an MD5 hash of the value, "
                               2675                 :                      "or use full text indexing."),
                               2676                 :              errtableconstraint(heap, RelationGetRelationName(rel))));
                               2677                 : }
                               2678                 : 
                               2679                 : /*
                               2680                 :  * Are all attributes in rel "equality is image equality" attributes?
                               2681                 :  *
                               2682                 :  * We use each attribute's BTEQUALIMAGE_PROC opclass procedure.  If any
                               2683                 :  * opclass either lacks a BTEQUALIMAGE_PROC procedure or returns false, we
 1138 pg                       2684 ECB             :  * return false; otherwise we return true.
                               2685                 :  *
                               2686                 :  * Returned boolean value is stored in index metapage during index builds.
                               2687                 :  * Deduplication can only be used when we return true.
                               2688                 :  */
                               2689                 : bool
 1138 pg                       2690 CBC       67018 : _bt_allequalimage(Relation rel, bool debugmessage)
 1138 pg                       2691 ECB             : {
 1138 pg                       2692 GIC       67018 :     bool        allequalimage = true;
 1138 pg                       2693 ECB             : 
                               2694                 :     /* INCLUDE indexes can never support deduplication */
 1138 pg                       2695 CBC       67018 :     if (IndexRelationGetNumberOfAttributes(rel) !=
                               2696           67018 :         IndexRelationGetNumberOfKeyAttributes(rel))
                               2697             133 :         return false;
                               2698                 : 
 1138 pg                       2699 GIC      180400 :     for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(rel); i++)
 1138 pg                       2700 ECB             :     {
 1138 pg                       2701 GIC      113957 :         Oid         opfamily = rel->rd_opfamily[i];
                               2702          113957 :         Oid         opcintype = rel->rd_opcintype[i];
                               2703          113957 :         Oid         collation = rel->rd_indcollation[i];
                               2704                 :         Oid         equalimageproc;
                               2705                 : 
                               2706          113957 :         equalimageproc = get_opfamily_proc(opfamily, opcintype, opcintype,
 1138 pg                       2707 ECB             :                                            BTEQUALIMAGE_PROC);
                               2708                 : 
                               2709                 :         /*
                               2710                 :          * If there is no BTEQUALIMAGE_PROC then deduplication is assumed to
                               2711                 :          * be unsafe.  Otherwise, actually call proc and see what it says.
                               2712                 :          */
 1138 pg                       2713 GIC      113957 :         if (!OidIsValid(equalimageproc) ||
                               2714          113525 :             !DatumGetBool(OidFunctionCall1Coll(equalimageproc, collation,
                               2715                 :                                                ObjectIdGetDatum(opcintype))))
 1138 pg                       2716 ECB             :         {
 1138 pg                       2717 GIC         442 :             allequalimage = false;
 1138 pg                       2718 CBC         442 :             break;
 1138 pg                       2719 ECB             :         }
                               2720                 :     }
                               2721                 : 
 1138 pg                       2722 CBC       66885 :     if (debugmessage)
                               2723                 :     {
 1138 pg                       2724 GIC       63980 :         if (allequalimage)
                               2725           63538 :             elog(DEBUG1, "index \"%s\" can safely use deduplication",
 1138 pg                       2726 ECB             :                  RelationGetRelationName(rel));
                               2727                 :         else
 1138 pg                       2728 GIC         442 :             elog(DEBUG1, "index \"%s\" cannot use deduplication",
                               2729                 :                  RelationGetRelationName(rel));
                               2730                 :     }
                               2731                 : 
                               2732           66885 :     return allequalimage;
                               2733                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a