LCOV - differential code coverage report
Current view: top level - src/backend/access/gin - gindatapage.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 80.5 % 687 553 134 553
Current Date: 2023-04-08 15:15:32 Functions: 93.1 % 29 27 2 27
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * gindatapage.c
       4                 :  *    routines for handling GIN posting tree pages.
       5                 :  *
       6                 :  *
       7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       8                 :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *          src/backend/access/gin/gindatapage.c
      12                 :  *-------------------------------------------------------------------------
      13                 :  */
      14                 : 
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include "access/gin_private.h"
      18                 : #include "access/ginxlog.h"
      19                 : #include "access/xloginsert.h"
      20                 : #include "lib/ilist.h"
      21                 : #include "miscadmin.h"
      22                 : #include "storage/predicate.h"
      23                 : #include "utils/rel.h"
      24                 : 
      25                 : /*
      26                 :  * Min, Max and Target size of posting lists stored on leaf pages, in bytes.
      27                 :  *
      28                 :  * The code can deal with any size, but random access is more efficient when
      29                 :  * a number of smaller lists are stored, rather than one big list. If a
      30                 :  * posting list would become larger than Max size as a result of insertions,
      31                 :  * it is split into two. If a posting list would be smaller than minimum
      32                 :  * size, it is merged with the next posting list.
      33                 :  */
      34                 : #define GinPostingListSegmentMaxSize 384
      35                 : #define GinPostingListSegmentTargetSize 256
      36                 : #define GinPostingListSegmentMinSize 128
      37                 : 
      38                 : /*
      39                 :  * At least this many items fit in a GinPostingListSegmentMaxSize-bytes
      40                 :  * long segment. This is used when estimating how much space is required
      41                 :  * for N items, at minimum.
      42                 :  */
      43                 : #define MinTuplesPerSegment ((GinPostingListSegmentMaxSize - 2) / 6)
      44                 : 
      45                 : /*
      46                 :  * A working struct for manipulating a posting tree leaf page.
      47                 :  */
      48                 : typedef struct
      49                 : {
      50                 :     dlist_head  segments;       /* a list of leafSegmentInfos */
      51                 : 
      52                 :     /*
      53                 :      * The following fields represent how the segments are split across pages,
      54                 :      * if a page split is required. Filled in by leafRepackItems.
      55                 :      */
      56                 :     dlist_node *lastleft;       /* last segment on left page */
      57                 :     int         lsize;          /* total size on left page */
      58                 :     int         rsize;          /* total size on right page */
      59                 : 
      60                 :     bool        oldformat;      /* page is in pre-9.4 format on disk */
      61                 : 
      62                 :     /*
      63                 :      * If we need WAL data representing the reconstructed leaf page, it's
      64                 :      * stored here by computeLeafRecompressWALData.
      65                 :      */
      66                 :     char       *walinfo;        /* buffer start */
      67                 :     int         walinfolen;     /* and length */
      68                 : } disassembledLeaf;
      69                 : 
      70                 : typedef struct
      71                 : {
      72                 :     dlist_node  node;           /* linked list pointers */
      73                 : 
      74                 :     /*-------------
      75                 :      * 'action' indicates the status of this in-memory segment, compared to
      76                 :      * what's on disk. It is one of the GIN_SEGMENT_* action codes:
      77                 :      *
      78                 :      * UNMODIFIED   no changes
      79                 :      * DELETE       the segment is to be removed. 'seg' and 'items' are
      80                 :      *              ignored
      81                 :      * INSERT       this is a completely new segment
      82                 :      * REPLACE      this replaces an existing segment with new content
      83                 :      * ADDITEMS     like REPLACE, but no items have been removed, and we track
      84                 :      *              in detail what items have been added to this segment, in
      85                 :      *              'modifieditems'
      86                 :      *-------------
      87                 :      */
      88                 :     char        action;
      89                 : 
      90                 :     ItemPointerData *modifieditems;
      91                 :     uint16      nmodifieditems;
      92                 : 
      93                 :     /*
      94                 :      * The following fields represent the items in this segment. If 'items' is
      95                 :      * not NULL, it contains a palloc'd array of the items in this segment. If
      96                 :      * 'seg' is not NULL, it contains the items in an already-compressed
      97                 :      * format. It can point to an on-disk page (!modified), or a palloc'd
      98                 :      * segment in memory. If both are set, they must represent the same items.
      99                 :      */
     100                 :     GinPostingList *seg;
     101                 :     ItemPointer items;
     102                 :     int         nitems;         /* # of items in 'items', if items != NULL */
     103                 : } leafSegmentInfo;
     104                 : 
     105                 : static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
     106                 : static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
     107                 :                                   GinBtreeStack *stack,
     108                 :                                   void *insertdata, BlockNumber updateblkno,
     109                 :                                   Page *newlpage, Page *newrpage);
     110                 : 
     111                 : static disassembledLeaf *disassembleLeaf(Page page);
     112                 : static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
     113                 : static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
     114                 :                            int nNewItems);
     115                 : 
     116                 : static void computeLeafRecompressWALData(disassembledLeaf *leaf);
     117                 : static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
     118                 : static void dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
     119                 :                                      ItemPointerData lbound, ItemPointerData rbound,
     120                 :                                      Page lpage, Page rpage);
     121                 : 
     122                 : /*
     123                 :  * Read TIDs from leaf data page to single uncompressed array. The TIDs are
     124                 :  * returned in ascending order.
     125                 :  *
     126                 :  * advancePast is a hint, indicating that the caller is only interested in
     127                 :  * TIDs > advancePast. To return all items, use ItemPointerSetMin.
     128                 :  *
     129                 :  * Note: This function can still return items smaller than advancePast that
     130                 :  * are in the same posting list as the items of interest, so the caller must
     131                 :  * still check all the returned items. But passing it allows this function to
     132                 :  * skip whole posting lists.
     133                 :  */
     134                 : ItemPointer
     135 CBC          82 : GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast)
     136                 : {
     137                 :     ItemPointer result;
     138                 : 
     139              82 :     if (GinPageIsCompressed(page))
     140                 :     {
     141              82 :         GinPostingList *seg = GinDataLeafPageGetPostingList(page);
     142              82 :         Size        len = GinDataLeafPageGetPostingListSize(page);
     143              82 :         Pointer     endptr = ((Pointer) seg) + len;
     144                 :         GinPostingList *next;
     145                 : 
     146                 :         /* Skip to the segment containing advancePast+1 */
     147              82 :         if (ItemPointerIsValid(&advancePast))
     148                 :         {
     149              47 :             next = GinNextPostingListSegment(seg);
     150              89 :             while ((Pointer) next < endptr &&
     151              39 :                    ginCompareItemPointers(&next->first, &advancePast) <= 0)
     152                 :             {
     153               3 :                 seg = next;
     154               3 :                 next = GinNextPostingListSegment(seg);
     155                 :             }
     156              47 :             len = endptr - (Pointer) seg;
     157                 :         }
     158                 : 
     159              82 :         if (len > 0)
     160              76 :             result = ginPostingListDecodeAllSegments(seg, len, nitems);
     161                 :         else
     162                 :         {
     163               6 :             result = NULL;
     164               6 :             *nitems = 0;
     165                 :         }
     166                 :     }
     167                 :     else
     168                 :     {
     169 UBC           0 :         ItemPointer tmp = dataLeafPageGetUncompressed(page, nitems);
     170                 : 
     171               0 :         result = palloc((*nitems) * sizeof(ItemPointerData));
     172               0 :         memcpy(result, tmp, (*nitems) * sizeof(ItemPointerData));
     173                 :     }
     174                 : 
     175 CBC          82 :     return result;
     176                 : }
     177                 : 
     178                 : /*
     179                 :  * Places all TIDs from leaf data page to bitmap.
     180                 :  */
     181                 : int
     182              15 : GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm)
     183                 : {
     184                 :     ItemPointer uncompressed;
     185                 :     int         nitems;
     186                 : 
     187              15 :     if (GinPageIsCompressed(page))
     188                 :     {
     189              15 :         GinPostingList *segment = GinDataLeafPageGetPostingList(page);
     190              15 :         Size        len = GinDataLeafPageGetPostingListSize(page);
     191                 : 
     192              15 :         nitems = ginPostingListDecodeAllSegmentsToTbm(segment, len, tbm);
     193                 :     }
     194                 :     else
     195                 :     {
     196 UBC           0 :         uncompressed = dataLeafPageGetUncompressed(page, &nitems);
     197                 : 
     198               0 :         if (nitems > 0)
     199               0 :             tbm_add_tuples(tbm, uncompressed, nitems, false);
     200                 :     }
     201                 : 
     202 CBC          15 :     return nitems;
     203                 : }
     204                 : 
     205                 : /*
     206                 :  * Get pointer to the uncompressed array of items on a pre-9.4 format
     207                 :  * uncompressed leaf page. The number of items in the array is returned in
     208                 :  * *nitems.
     209                 :  */
     210                 : static ItemPointer
     211 UBC           0 : dataLeafPageGetUncompressed(Page page, int *nitems)
     212                 : {
     213                 :     ItemPointer items;
     214                 : 
     215               0 :     Assert(!GinPageIsCompressed(page));
     216                 : 
     217                 :     /*
     218                 :      * In the old pre-9.4 page format, the whole page content is used for
     219                 :      * uncompressed items, and the number of items is stored in 'maxoff'
     220                 :      */
     221               0 :     items = (ItemPointer) GinDataPageGetData(page);
     222               0 :     *nitems = GinPageGetOpaque(page)->maxoff;
     223                 : 
     224               0 :     return items;
     225                 : }
     226                 : 
     227                 : /*
     228                 :  * Check if we should follow the right link to find the item we're searching
     229                 :  * for.
     230                 :  *
     231                 :  * Compares inserting item pointer with the right bound of the current page.
     232                 :  */
     233                 : static bool
     234 CBC       13308 : dataIsMoveRight(GinBtree btree, Page page)
     235                 : {
     236           13308 :     ItemPointer iptr = GinDataPageGetRightBound(page);
     237                 : 
     238           13308 :     if (GinPageRightMost(page))
     239           13308 :         return false;
     240                 : 
     241 UBC           0 :     if (GinPageIsDeleted(page))
     242               0 :         return true;
     243                 : 
     244               0 :     return (ginCompareItemPointers(&btree->itemptr, iptr) > 0);
     245                 : }
     246                 : 
     247                 : /*
     248                 :  * Find correct PostingItem in non-leaf page. It is assumed that this is
     249                 :  * the correct page, and the searched value SHOULD be on the page.
     250                 :  */
     251                 : static BlockNumber
     252 CBC       13346 : dataLocateItem(GinBtree btree, GinBtreeStack *stack)
     253                 : {
     254                 :     OffsetNumber low,
     255                 :                 high,
     256                 :                 maxoff;
     257           13346 :     PostingItem *pitem = NULL;
     258                 :     int         result;
     259           13346 :     Page        page = BufferGetPage(stack->buffer);
     260                 : 
     261           13346 :     Assert(!GinPageIsLeaf(page));
     262           13346 :     Assert(GinPageIsData(page));
     263                 : 
     264           13346 :     if (btree->fullScan)
     265                 :     {
     266              38 :         stack->off = FirstOffsetNumber;
     267              38 :         stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
     268              38 :         return btree->getLeftMostChild(btree, page);
     269                 :     }
     270                 : 
     271           13308 :     low = FirstOffsetNumber;
     272           13308 :     maxoff = high = GinPageGetOpaque(page)->maxoff;
     273           13308 :     Assert(high >= low);
     274                 : 
     275           13308 :     high++;
     276                 : 
     277           39924 :     while (high > low)
     278                 :     {
     279           26616 :         OffsetNumber mid = low + ((high - low) / 2);
     280                 : 
     281           26616 :         pitem = GinDataPageGetPostingItem(page, mid);
     282                 : 
     283           26616 :         if (mid == maxoff)
     284                 :         {
     285                 :             /*
     286                 :              * Right infinity, page already correctly chosen with a help of
     287                 :              * dataIsMoveRight
     288                 :              */
     289           13308 :             result = -1;
     290                 :         }
     291                 :         else
     292                 :         {
     293           13308 :             pitem = GinDataPageGetPostingItem(page, mid);
     294           13308 :             result = ginCompareItemPointers(&btree->itemptr, &(pitem->key));
     295                 :         }
     296                 : 
     297           26616 :         if (result == 0)
     298                 :         {
     299 UBC           0 :             stack->off = mid;
     300               0 :             return PostingItemGetBlockNumber(pitem);
     301                 :         }
     302 CBC       26616 :         else if (result > 0)
     303           13308 :             low = mid + 1;
     304                 :         else
     305           13308 :             high = mid;
     306                 :     }
     307                 : 
     308           13308 :     Assert(high >= FirstOffsetNumber && high <= maxoff);
     309                 : 
     310           13308 :     stack->off = high;
     311           13308 :     pitem = GinDataPageGetPostingItem(page, high);
     312           13308 :     return PostingItemGetBlockNumber(pitem);
     313                 : }
     314                 : 
     315                 : /*
     316                 :  * Find link to blkno on non-leaf page, returns offset of PostingItem
     317                 :  */
     318                 : static OffsetNumber
     319              23 : dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
     320                 : {
     321                 :     OffsetNumber i,
     322              23 :                 maxoff = GinPageGetOpaque(page)->maxoff;
     323                 :     PostingItem *pitem;
     324                 : 
     325              23 :     Assert(!GinPageIsLeaf(page));
     326              23 :     Assert(GinPageIsData(page));
     327                 : 
     328                 :     /* if page isn't changed, we return storedOff */
     329              23 :     if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
     330                 :     {
     331              23 :         pitem = GinDataPageGetPostingItem(page, storedOff);
     332              23 :         if (PostingItemGetBlockNumber(pitem) == blkno)
     333              23 :             return storedOff;
     334                 : 
     335                 :         /*
     336                 :          * we hope, that needed pointer goes to right. It's true if there
     337                 :          * wasn't a deletion
     338                 :          */
     339 UBC           0 :         for (i = storedOff + 1; i <= maxoff; i++)
     340                 :         {
     341               0 :             pitem = GinDataPageGetPostingItem(page, i);
     342               0 :             if (PostingItemGetBlockNumber(pitem) == blkno)
     343               0 :                 return i;
     344                 :         }
     345                 : 
     346               0 :         maxoff = storedOff - 1;
     347                 :     }
     348                 : 
     349                 :     /* last chance */
     350               0 :     for (i = FirstOffsetNumber; i <= maxoff; i++)
     351                 :     {
     352               0 :         pitem = GinDataPageGetPostingItem(page, i);
     353               0 :         if (PostingItemGetBlockNumber(pitem) == blkno)
     354               0 :             return i;
     355                 :     }
     356                 : 
     357               0 :     return InvalidOffsetNumber;
     358                 : }
     359                 : 
     360                 : /*
     361                 :  * Return blkno of leftmost child
     362                 :  */
     363                 : static BlockNumber
     364 CBC          38 : dataGetLeftMostPage(GinBtree btree, Page page)
     365                 : {
     366                 :     PostingItem *pitem;
     367                 : 
     368              38 :     Assert(!GinPageIsLeaf(page));
     369              38 :     Assert(GinPageIsData(page));
     370              38 :     Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
     371                 : 
     372              38 :     pitem = GinDataPageGetPostingItem(page, FirstOffsetNumber);
     373              38 :     return PostingItemGetBlockNumber(pitem);
     374                 : }
     375                 : 
     376                 : /*
     377                 :  * Add PostingItem to a non-leaf page.
     378                 :  */
     379                 : void
     380             130 : GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
     381                 : {
     382             130 :     OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
     383                 :     char       *ptr;
     384                 : 
     385             130 :     Assert(PostingItemGetBlockNumber(data) != InvalidBlockNumber);
     386             130 :     Assert(!GinPageIsLeaf(page));
     387                 : 
     388             130 :     if (offset == InvalidOffsetNumber)
     389                 :     {
     390             104 :         ptr = (char *) GinDataPageGetPostingItem(page, maxoff + 1);
     391                 :     }
     392                 :     else
     393                 :     {
     394              26 :         ptr = (char *) GinDataPageGetPostingItem(page, offset);
     395              26 :         if (offset != maxoff + 1)
     396              26 :             memmove(ptr + sizeof(PostingItem),
     397                 :                     ptr,
     398              26 :                     (maxoff - offset + 1) * sizeof(PostingItem));
     399                 :     }
     400             130 :     memcpy(ptr, data, sizeof(PostingItem));
     401                 : 
     402             130 :     maxoff++;
     403             130 :     GinPageGetOpaque(page)->maxoff = maxoff;
     404                 : 
     405                 :     /*
     406                 :      * Also set pd_lower to the end of the posting items, to follow the
     407                 :      * "standard" page layout, so that we can squeeze out the unused space
     408                 :      * from full-page images.
     409                 :      */
     410             130 :     GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
     411             130 : }
     412                 : 
     413                 : /*
     414                 :  * Delete posting item from non-leaf page
     415                 :  */
     416                 : void
     417               6 : GinPageDeletePostingItem(Page page, OffsetNumber offset)
     418                 : {
     419               6 :     OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
     420                 : 
     421               6 :     Assert(!GinPageIsLeaf(page));
     422               6 :     Assert(offset >= FirstOffsetNumber && offset <= maxoff);
     423                 : 
     424               6 :     if (offset != maxoff)
     425               6 :         memmove(GinDataPageGetPostingItem(page, offset),
     426               6 :                 GinDataPageGetPostingItem(page, offset + 1),
     427               6 :                 sizeof(PostingItem) * (maxoff - offset));
     428                 : 
     429               6 :     maxoff--;
     430               6 :     GinPageGetOpaque(page)->maxoff = maxoff;
     431                 : 
     432               6 :     GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
     433               6 : }
     434                 : 
     435                 : /*
     436                 :  * Prepare to insert data on a leaf data page.
     437                 :  *
     438                 :  * If it will fit, return GPTP_INSERT after doing whatever setup is needed
     439                 :  * before we enter the insertion critical section.  *ptp_workspace can be
     440                 :  * set to pass information along to the execPlaceToPage function.
     441                 :  *
     442                 :  * If it won't fit, perform a page split and return two temporary page
     443                 :  * images into *newlpage and *newrpage, with result GPTP_SPLIT.
     444                 :  *
     445                 :  * In neither case should the given page buffer be modified here.
     446                 :  */
     447                 : static GinPlaceToPageRC
     448           24772 : dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
     449                 :                          void *insertdata,
     450                 :                          void **ptp_workspace,
     451                 :                          Page *newlpage, Page *newrpage)
     452                 : {
     453           24772 :     GinBtreeDataLeafInsertData *items = insertdata;
     454           24772 :     ItemPointer newItems = &items->items[items->curitem];
     455           24772 :     int         maxitems = items->nitem - items->curitem;
     456           24772 :     Page        page = BufferGetPage(buf);
     457                 :     int         i;
     458                 :     ItemPointerData rbound;
     459                 :     ItemPointerData lbound;
     460                 :     bool        needsplit;
     461                 :     bool        append;
     462                 :     int         segsize;
     463                 :     Size        freespace;
     464                 :     disassembledLeaf *leaf;
     465                 :     leafSegmentInfo *lastleftinfo;
     466                 :     ItemPointerData maxOldItem;
     467                 :     ItemPointerData remaining;
     468                 : 
     469           24772 :     rbound = *GinDataPageGetRightBound(page);
     470                 : 
     471                 :     /*
     472                 :      * Count how many of the new items belong to this page.
     473                 :      */
     474           24772 :     if (!GinPageRightMost(page))
     475                 :     {
     476 UBC           0 :         for (i = 0; i < maxitems; i++)
     477                 :         {
     478               0 :             if (ginCompareItemPointers(&newItems[i], &rbound) > 0)
     479                 :             {
     480                 :                 /*
     481                 :                  * This needs to go to some other location in the tree. (The
     482                 :                  * caller should've chosen the insert location so that at
     483                 :                  * least the first item goes here.)
     484                 :                  */
     485               0 :                 Assert(i > 0);
     486               0 :                 break;
     487                 :             }
     488                 :         }
     489               0 :         maxitems = i;
     490                 :     }
     491                 : 
     492                 :     /* Disassemble the data on the page */
     493 CBC       24772 :     leaf = disassembleLeaf(page);
     494                 : 
     495                 :     /*
     496                 :      * Are we appending to the end of the page? IOW, are all the new items
     497                 :      * larger than any of the existing items.
     498                 :      */
     499           24772 :     if (!dlist_is_empty(&leaf->segments))
     500                 :     {
     501           24772 :         lastleftinfo = dlist_container(leafSegmentInfo, node,
     502                 :                                        dlist_tail_node(&leaf->segments));
     503           24772 :         if (!lastleftinfo->items)
     504           24772 :             lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
     505                 :                                                        &lastleftinfo->nitems);
     506           24772 :         maxOldItem = lastleftinfo->items[lastleftinfo->nitems - 1];
     507           24772 :         if (ginCompareItemPointers(&newItems[0], &maxOldItem) >= 0)
     508           24772 :             append = true;
     509                 :         else
     510 UBC           0 :             append = false;
     511                 :     }
     512                 :     else
     513                 :     {
     514               0 :         ItemPointerSetMin(&maxOldItem);
     515               0 :         append = true;
     516                 :     }
     517                 : 
     518                 :     /*
     519                 :      * If we're appending to the end of the page, we will append as many items
     520                 :      * as we can fit (after splitting), and stop when the pages becomes full.
     521                 :      * Otherwise we have to limit the number of new items to insert, because
     522                 :      * once we start packing we can't just stop when we run out of space,
     523                 :      * because we must make sure that all the old items still fit.
     524                 :      */
     525 CBC       24772 :     if (GinPageIsCompressed(page))
     526           24772 :         freespace = GinDataLeafPageGetFreeSpace(page);
     527                 :     else
     528 UBC           0 :         freespace = 0;
     529 CBC       24772 :     if (append)
     530                 :     {
     531                 :         /*
     532                 :          * Even when appending, trying to append more items than will fit is
     533                 :          * not completely free, because we will merge the new items and old
     534                 :          * items into an array below. In the best case, every new item fits in
     535                 :          * a single byte, and we can use all the free space on the old page as
     536                 :          * well as the new page. For simplicity, ignore segment overhead etc.
     537                 :          */
     538           24772 :         maxitems = Min(maxitems, freespace + GinDataPageMaxDataSize);
     539                 :     }
     540                 :     else
     541                 :     {
     542                 :         /*
     543                 :          * Calculate a conservative estimate of how many new items we can fit
     544                 :          * on the two pages after splitting.
     545                 :          *
     546                 :          * We can use any remaining free space on the old page to store full
     547                 :          * segments, as well as the new page. Each full-sized segment can hold
     548                 :          * at least MinTuplesPerSegment items
     549                 :          */
     550                 :         int         nnewsegments;
     551                 : 
     552 UBC           0 :         nnewsegments = freespace / GinPostingListSegmentMaxSize;
     553               0 :         nnewsegments += GinDataPageMaxDataSize / GinPostingListSegmentMaxSize;
     554               0 :         maxitems = Min(maxitems, nnewsegments * MinTuplesPerSegment);
     555                 :     }
     556                 : 
     557                 :     /* Add the new items to the segment list */
     558 CBC       24772 :     if (!addItemsToLeaf(leaf, newItems, maxitems))
     559                 :     {
     560                 :         /* all items were duplicates, we have nothing to do */
     561 UBC           0 :         items->curitem += maxitems;
     562                 : 
     563               0 :         return GPTP_NO_WORK;
     564                 :     }
     565                 : 
     566                 :     /*
     567                 :      * Pack the items back to compressed segments, ready for writing to disk.
     568                 :      */
     569 CBC       24772 :     needsplit = leafRepackItems(leaf, &remaining);
     570                 : 
     571                 :     /*
     572                 :      * Did all the new items fit?
     573                 :      *
     574                 :      * If we're appending, it's OK if they didn't. But as a sanity check,
     575                 :      * verify that all the old items fit.
     576                 :      */
     577           24772 :     if (ItemPointerIsValid(&remaining))
     578                 :     {
     579              20 :         if (!append || ItemPointerCompare(&maxOldItem, &remaining) >= 0)
     580 UBC           0 :             elog(ERROR, "could not split GIN page; all old items didn't fit");
     581                 : 
     582                 :         /* Count how many of the new items did fit. */
     583 CBC      152421 :         for (i = 0; i < maxitems; i++)
     584                 :         {
     585          152421 :             if (ginCompareItemPointers(&newItems[i], &remaining) >= 0)
     586              20 :                 break;
     587                 :         }
     588              20 :         if (i == 0)
     589 UBC           0 :             elog(ERROR, "could not split GIN page; no new items fit");
     590 CBC          20 :         maxitems = i;
     591                 :     }
     592                 : 
     593           24772 :     if (!needsplit)
     594                 :     {
     595                 :         /*
     596                 :          * Great, all the items fit on a single page.  If needed, prepare data
     597                 :          * for a WAL record describing the changes we'll make.
     598                 :          */
     599           24697 :         if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     600           24697 :             computeLeafRecompressWALData(leaf);
     601                 : 
     602                 :         /*
     603                 :          * We're ready to enter the critical section, but
     604                 :          * dataExecPlaceToPageLeaf will need access to the "leaf" data.
     605                 :          */
     606           24697 :         *ptp_workspace = leaf;
     607                 : 
     608           24697 :         if (append)
     609           24697 :             elog(DEBUG2, "appended %d new items to block %u; %d bytes (%d to go)",
     610                 :                  maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
     611                 :                  items->nitem - items->curitem - maxitems);
     612                 :         else
     613 UBC           0 :             elog(DEBUG2, "inserted %d new items to block %u; %d bytes (%d to go)",
     614                 :                  maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
     615                 :                  items->nitem - items->curitem - maxitems);
     616                 :     }
     617                 :     else
     618                 :     {
     619                 :         /*
     620                 :          * Have to split.
     621                 :          *
     622                 :          * leafRepackItems already divided the segments between the left and
     623                 :          * the right page. It filled the left page as full as possible, and
     624                 :          * put the rest to the right page. When building a new index, that's
     625                 :          * good, because the table is scanned from beginning to end and there
     626                 :          * won't be any more insertions to the left page during the build.
     627                 :          * This packs the index as tight as possible. But otherwise, split
     628                 :          * 50/50, by moving segments from the left page to the right page
     629                 :          * until they're balanced.
     630                 :          *
     631                 :          * As a further heuristic, when appending items to the end of the
     632                 :          * page, try to make the left page 75% full, on the assumption that
     633                 :          * subsequent insertions will probably also go to the end. This packs
     634                 :          * the index somewhat tighter when appending to a table, which is very
     635                 :          * common.
     636                 :          */
     637 CBC          75 :         if (!btree->isBuild)
     638                 :         {
     639             130 :             while (dlist_has_prev(&leaf->segments, leaf->lastleft))
     640                 :             {
     641             130 :                 lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
     642                 : 
     643                 :                 /* ignore deleted segments */
     644             130 :                 if (lastleftinfo->action != GIN_SEGMENT_DELETE)
     645                 :                 {
     646             130 :                     segsize = SizeOfGinPostingList(lastleftinfo->seg);
     647                 : 
     648                 :                     /*
     649                 :                      * Note that we check that the right page doesn't become
     650                 :                      * more full than the left page even when appending. It's
     651                 :                      * possible that we added enough items to make both pages
     652                 :                      * more than 75% full.
     653                 :                      */
     654             130 :                     if ((leaf->lsize - segsize) - (leaf->rsize + segsize) < 0)
     655              24 :                         break;
     656             106 :                     if (append)
     657                 :                     {
     658             106 :                         if ((leaf->lsize - segsize) < (BLCKSZ * 3) / 4)
     659               5 :                             break;
     660                 :                     }
     661                 : 
     662             101 :                     leaf->lsize -= segsize;
     663             101 :                     leaf->rsize += segsize;
     664                 :                 }
     665             101 :                 leaf->lastleft = dlist_prev_node(&leaf->segments, leaf->lastleft);
     666                 :             }
     667                 :         }
     668              75 :         Assert(leaf->lsize <= GinDataPageMaxDataSize);
     669              75 :         Assert(leaf->rsize <= GinDataPageMaxDataSize);
     670                 : 
     671                 :         /*
     672                 :          * Fetch the max item in the left page's last segment; it becomes the
     673                 :          * right bound of the page.
     674                 :          */
     675              75 :         lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
     676              75 :         if (!lastleftinfo->items)
     677              75 :             lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
     678                 :                                                        &lastleftinfo->nitems);
     679              75 :         lbound = lastleftinfo->items[lastleftinfo->nitems - 1];
     680                 : 
     681                 :         /*
     682                 :          * Now allocate a couple of temporary page images, and fill them.
     683                 :          */
     684              75 :         *newlpage = palloc(BLCKSZ);
     685              75 :         *newrpage = palloc(BLCKSZ);
     686                 : 
     687              75 :         dataPlaceToPageLeafSplit(leaf, lbound, rbound,
     688                 :                                  *newlpage, *newrpage);
     689                 : 
     690              75 :         Assert(GinPageRightMost(page) ||
     691                 :                ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
     692                 :                                       GinDataPageGetRightBound(*newrpage)) < 0);
     693                 : 
     694              75 :         if (append)
     695              75 :             elog(DEBUG2, "appended %d items to block %u; split %d/%d (%d to go)",
     696                 :                  maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
     697                 :                  items->nitem - items->curitem - maxitems);
     698                 :         else
     699 UBC           0 :             elog(DEBUG2, "inserted %d items to block %u; split %d/%d (%d to go)",
     700                 :                  maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
     701                 :                  items->nitem - items->curitem - maxitems);
     702                 :     }
     703                 : 
     704 CBC       24772 :     items->curitem += maxitems;
     705                 : 
     706           24772 :     return needsplit ? GPTP_SPLIT : GPTP_INSERT;
     707                 : }
     708                 : 
     709                 : /*
     710                 :  * Perform data insertion after beginPlaceToPage has decided it will fit.
     711                 :  *
     712                 :  * This is invoked within a critical section, and XLOG record creation (if
     713                 :  * needed) is already started.  The target buffer is registered in slot 0.
     714                 :  */
     715                 : static void
     716           24697 : dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
     717                 :                         void *insertdata, void *ptp_workspace)
     718                 : {
     719           24697 :     disassembledLeaf *leaf = (disassembledLeaf *) ptp_workspace;
     720                 : 
     721                 :     /* Apply changes to page */
     722           24697 :     dataPlaceToPageLeafRecompress(buf, leaf);
     723                 : 
     724                 :     /* If needed, register WAL data built by computeLeafRecompressWALData */
     725           24697 :     if (RelationNeedsWAL(btree->index) && !btree->isBuild)
     726                 :     {
     727           24697 :         XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
     728                 :     }
     729           24697 : }
     730                 : 
     731                 : /*
     732                 :  * Vacuum a posting tree leaf page.
     733                 :  */
     734                 : void
     735              21 : ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
     736                 : {
     737              21 :     Page        page = BufferGetPage(buffer);
     738                 :     disassembledLeaf *leaf;
     739              21 :     bool        removedsomething = false;
     740                 :     dlist_iter  iter;
     741                 : 
     742              21 :     leaf = disassembleLeaf(page);
     743                 : 
     744                 :     /* Vacuum each segment. */
     745             471 :     dlist_foreach(iter, &leaf->segments)
     746                 :     {
     747             450 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
     748                 :         int         oldsegsize;
     749                 :         ItemPointer cleaned;
     750                 :         int         ncleaned;
     751                 : 
     752             450 :         if (!seginfo->items)
     753             450 :             seginfo->items = ginPostingListDecode(seginfo->seg,
     754                 :                                                   &seginfo->nitems);
     755             450 :         if (seginfo->seg)
     756             450 :             oldsegsize = SizeOfGinPostingList(seginfo->seg);
     757                 :         else
     758 UBC           0 :             oldsegsize = GinDataPageMaxDataSize;
     759                 : 
     760 CBC         450 :         cleaned = ginVacuumItemPointers(gvs,
     761                 :                                         seginfo->items,
     762                 :                                         seginfo->nitems,
     763                 :                                         &ncleaned);
     764             450 :         pfree(seginfo->items);
     765             450 :         seginfo->items = NULL;
     766             450 :         seginfo->nitems = 0;
     767             450 :         if (cleaned)
     768                 :         {
     769             441 :             if (ncleaned > 0)
     770                 :             {
     771                 :                 int         npacked;
     772                 : 
     773               9 :                 seginfo->seg = ginCompressPostingList(cleaned,
     774                 :                                                       ncleaned,
     775                 :                                                       oldsegsize,
     776                 :                                                       &npacked);
     777                 :                 /* Removing an item never increases the size of the segment */
     778               9 :                 if (npacked != ncleaned)
     779 UBC           0 :                     elog(ERROR, "could not fit vacuumed posting list");
     780 CBC           9 :                 seginfo->action = GIN_SEGMENT_REPLACE;
     781                 :             }
     782                 :             else
     783                 :             {
     784             432 :                 seginfo->seg = NULL;
     785             432 :                 seginfo->items = NULL;
     786             432 :                 seginfo->action = GIN_SEGMENT_DELETE;
     787                 :             }
     788             441 :             seginfo->nitems = ncleaned;
     789                 : 
     790             441 :             removedsomething = true;
     791                 :         }
     792                 :     }
     793                 : 
     794                 :     /*
     795                 :      * If we removed any items, reconstruct the page from the pieces.
     796                 :      *
     797                 :      * We don't try to re-encode the segments here, even though some of them
     798                 :      * might be really small now that we've removed some items from them. It
     799                 :      * seems like a waste of effort, as there isn't really any benefit from
     800                 :      * larger segments per se; larger segments only help to pack more items in
     801                 :      * the same space. We might as well delay doing that until the next
     802                 :      * insertion, which will need to re-encode at least part of the page
     803                 :      * anyway.
     804                 :      *
     805                 :      * Also note if the page was in uncompressed, pre-9.4 format before, it is
     806                 :      * now represented as one huge segment that contains all the items. It
     807                 :      * might make sense to split that, to speed up random access, but we don't
     808                 :      * bother. You'll have to REINDEX anyway if you want the full gain of the
     809                 :      * new tighter index format.
     810                 :      */
     811              21 :     if (removedsomething)
     812                 :     {
     813                 :         bool        modified;
     814                 : 
     815                 :         /*
     816                 :          * Make sure we have a palloc'd copy of all segments, after the first
     817                 :          * segment that is modified. (dataPlaceToPageLeafRecompress requires
     818                 :          * this).
     819                 :          */
     820              21 :         modified = false;
     821             471 :         dlist_foreach(iter, &leaf->segments)
     822                 :         {
     823             450 :             leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
     824                 :                                                        iter.cur);
     825                 : 
     826             450 :             if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
     827             441 :                 modified = true;
     828             450 :             if (modified && seginfo->action != GIN_SEGMENT_DELETE)
     829                 :             {
     830              18 :                 int         segsize = SizeOfGinPostingList(seginfo->seg);
     831              18 :                 GinPostingList *tmp = (GinPostingList *) palloc(segsize);
     832                 : 
     833              18 :                 memcpy(tmp, seginfo->seg, segsize);
     834              18 :                 seginfo->seg = tmp;
     835                 :             }
     836                 :         }
     837                 : 
     838              21 :         if (RelationNeedsWAL(indexrel))
     839               3 :             computeLeafRecompressWALData(leaf);
     840                 : 
     841                 :         /* Apply changes to page */
     842              21 :         START_CRIT_SECTION();
     843                 : 
     844              21 :         dataPlaceToPageLeafRecompress(buffer, leaf);
     845                 : 
     846              21 :         MarkBufferDirty(buffer);
     847                 : 
     848              21 :         if (RelationNeedsWAL(indexrel))
     849                 :         {
     850                 :             XLogRecPtr  recptr;
     851                 : 
     852               3 :             XLogBeginInsert();
     853               3 :             XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
     854               3 :             XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
     855               3 :             recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
     856               3 :             PageSetLSN(page, recptr);
     857                 :         }
     858                 : 
     859              21 :         END_CRIT_SECTION();
     860                 :     }
     861              21 : }
     862                 : 
     863                 : /*
     864                 :  * Construct a ginxlogRecompressDataLeaf record representing the changes
     865                 :  * in *leaf.  (Because this requires a palloc, we have to do it before
     866                 :  * we enter the critical section that actually updates the page.)
     867                 :  */
     868                 : static void
     869           24700 : computeLeafRecompressWALData(disassembledLeaf *leaf)
     870                 : {
     871           24700 :     int         nmodified = 0;
     872                 :     char       *walbufbegin;
     873                 :     char       *walbufend;
     874                 :     dlist_iter  iter;
     875                 :     int         segno;
     876                 :     ginxlogRecompressDataLeaf *recompress_xlog;
     877                 : 
     878                 :     /* Count the modified segments */
     879          445907 :     dlist_foreach(iter, &leaf->segments)
     880                 :     {
     881          421207 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
     882                 :                                                    iter.cur);
     883                 : 
     884          421207 :         if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
     885           24733 :             nmodified++;
     886                 :     }
     887                 : 
     888                 :     walbufbegin =
     889           24700 :         palloc(sizeof(ginxlogRecompressDataLeaf) +
     890                 :                BLCKSZ +         /* max size needed to hold the segment data */
     891           24700 :                nmodified * 2    /* (segno + action) per action */
     892                 :         );
     893           24700 :     walbufend = walbufbegin;
     894                 : 
     895           24700 :     recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
     896           24700 :     walbufend += sizeof(ginxlogRecompressDataLeaf);
     897                 : 
     898           24700 :     recompress_xlog->nactions = nmodified;
     899                 : 
     900           24700 :     segno = 0;
     901          445907 :     dlist_foreach(iter, &leaf->segments)
     902                 :     {
     903          421207 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
     904                 :                                                    iter.cur);
     905          421207 :         int         segsize = 0;
     906                 :         int         datalen;
     907          421207 :         uint8       action = seginfo->action;
     908                 : 
     909          421207 :         if (action == GIN_SEGMENT_UNMODIFIED)
     910                 :         {
     911          396474 :             segno++;
     912          396474 :             continue;
     913                 :         }
     914                 : 
     915           24733 :         if (action != GIN_SEGMENT_DELETE)
     916           24721 :             segsize = SizeOfGinPostingList(seginfo->seg);
     917                 : 
     918                 :         /*
     919                 :          * If storing the uncompressed list of added item pointers would take
     920                 :          * more space than storing the compressed segment as is, do that
     921                 :          * instead.
     922                 :          */
     923           24733 :         if (action == GIN_SEGMENT_ADDITEMS &&
     924           24580 :             seginfo->nmodifieditems * sizeof(ItemPointerData) > segsize)
     925                 :         {
     926 UBC           0 :             action = GIN_SEGMENT_REPLACE;
     927                 :         }
     928                 : 
     929 CBC       24733 :         *((uint8 *) (walbufend++)) = segno;
     930           24733 :         *(walbufend++) = action;
     931                 : 
     932           24733 :         switch (action)
     933                 :         {
     934              12 :             case GIN_SEGMENT_DELETE:
     935              12 :                 datalen = 0;
     936              12 :                 break;
     937                 : 
     938           24580 :             case GIN_SEGMENT_ADDITEMS:
     939           24580 :                 datalen = seginfo->nmodifieditems * sizeof(ItemPointerData);
     940           24580 :                 memcpy(walbufend, &seginfo->nmodifieditems, sizeof(uint16));
     941           24580 :                 memcpy(walbufend + sizeof(uint16), seginfo->modifieditems, datalen);
     942           24580 :                 datalen += sizeof(uint16);
     943           24580 :                 break;
     944                 : 
     945             141 :             case GIN_SEGMENT_INSERT:
     946                 :             case GIN_SEGMENT_REPLACE:
     947             141 :                 datalen = SHORTALIGN(segsize);
     948             141 :                 memcpy(walbufend, seginfo->seg, segsize);
     949             141 :                 break;
     950                 : 
     951 UBC           0 :             default:
     952               0 :                 elog(ERROR, "unexpected GIN leaf action %d", action);
     953                 :         }
     954 CBC       24733 :         walbufend += datalen;
     955                 : 
     956           24733 :         if (action != GIN_SEGMENT_INSERT)
     957           24601 :             segno++;
     958                 :     }
     959                 : 
     960                 :     /* Pass back the constructed info via *leaf */
     961           24700 :     leaf->walinfo = walbufbegin;
     962           24700 :     leaf->walinfolen = walbufend - walbufbegin;
     963           24700 : }
     964                 : 
     965                 : /*
     966                 :  * Assemble a disassembled posting tree leaf page back to a buffer.
     967                 :  *
     968                 :  * This just updates the target buffer; WAL stuff is caller's responsibility.
     969                 :  *
     970                 :  * NOTE: The segment pointers must not point directly to the same buffer,
     971                 :  * except for segments that have not been modified and whose preceding
     972                 :  * segments have not been modified either.
     973                 :  */
     974                 : static void
     975           24718 : dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
     976                 : {
     977           24718 :     Page        page = BufferGetPage(buf);
     978                 :     char       *ptr;
     979                 :     int         newsize;
     980           24718 :     bool        modified = false;
     981                 :     dlist_iter  iter;
     982                 :     int         segsize;
     983                 : 
     984                 :     /*
     985                 :      * If the page was in pre-9.4 format before, convert the header, and force
     986                 :      * all segments to be copied to the page whether they were modified or
     987                 :      * not.
     988                 :      */
     989           24718 :     if (!GinPageIsCompressed(page))
     990                 :     {
     991 UBC           0 :         Assert(leaf->oldformat);
     992               0 :         GinPageSetCompressed(page);
     993               0 :         GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
     994               0 :         modified = true;
     995                 :     }
     996                 : 
     997 CBC       24718 :     ptr = (char *) GinDataLeafPageGetPostingList(page);
     998           24718 :     newsize = 0;
     999          446348 :     dlist_foreach(iter, &leaf->segments)
    1000                 :     {
    1001          421630 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
    1002                 : 
    1003          421630 :         if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
    1004           25156 :             modified = true;
    1005                 : 
    1006          421630 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1007                 :         {
    1008          421198 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1009                 : 
    1010          421198 :             if (modified)
    1011           24733 :                 memcpy(ptr, seginfo->seg, segsize);
    1012                 : 
    1013          421198 :             ptr += segsize;
    1014          421198 :             newsize += segsize;
    1015                 :         }
    1016                 :     }
    1017                 : 
    1018           24718 :     Assert(newsize <= GinDataPageMaxDataSize);
    1019           24718 :     GinDataPageSetDataSize(page, newsize);
    1020           24718 : }
    1021                 : 
    1022                 : /*
    1023                 :  * Like dataPlaceToPageLeafRecompress, but writes the disassembled leaf
    1024                 :  * segments to two pages instead of one.
    1025                 :  *
    1026                 :  * This is different from the non-split cases in that this does not modify
    1027                 :  * the original page directly, but writes to temporary in-memory copies of
    1028                 :  * the new left and right pages.
    1029                 :  */
    1030                 : static void
    1031              75 : dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
    1032                 :                          ItemPointerData lbound, ItemPointerData rbound,
    1033                 :                          Page lpage, Page rpage)
    1034                 : {
    1035                 :     char       *ptr;
    1036                 :     int         segsize;
    1037                 :     int         lsize;
    1038                 :     int         rsize;
    1039                 :     dlist_node *node;
    1040                 :     dlist_node *firstright;
    1041                 :     leafSegmentInfo *seginfo;
    1042                 : 
    1043                 :     /* Initialize temporary pages to hold the new left and right pages */
    1044              75 :     GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
    1045              75 :     GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
    1046                 : 
    1047                 :     /*
    1048                 :      * Copy the segments that go to the left page.
    1049                 :      *
    1050                 :      * XXX: We should skip copying the unmodified part of the left page, like
    1051                 :      * we do when recompressing.
    1052                 :      */
    1053              75 :     lsize = 0;
    1054              75 :     ptr = (char *) GinDataLeafPageGetPostingList(lpage);
    1055              75 :     firstright = dlist_next_node(&leaf->segments, leaf->lastleft);
    1056              75 :     for (node = dlist_head_node(&leaf->segments);
    1057            1793 :          node != firstright;
    1058            1718 :          node = dlist_next_node(&leaf->segments, node))
    1059                 :     {
    1060            1718 :         seginfo = dlist_container(leafSegmentInfo, node, node);
    1061                 : 
    1062            1718 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1063                 :         {
    1064            1718 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1065            1718 :             memcpy(ptr, seginfo->seg, segsize);
    1066            1718 :             ptr += segsize;
    1067            1718 :             lsize += segsize;
    1068                 :         }
    1069                 :     }
    1070              75 :     Assert(lsize == leaf->lsize);
    1071              75 :     GinDataPageSetDataSize(lpage, lsize);
    1072              75 :     *GinDataPageGetRightBound(lpage) = lbound;
    1073                 : 
    1074                 :     /* Copy the segments that go to the right page */
    1075              75 :     ptr = (char *) GinDataLeafPageGetPostingList(rpage);
    1076              75 :     rsize = 0;
    1077              75 :     for (node = firstright;
    1078                 :          ;
    1079             972 :          node = dlist_next_node(&leaf->segments, node))
    1080                 :     {
    1081            1047 :         seginfo = dlist_container(leafSegmentInfo, node, node);
    1082                 : 
    1083            1047 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1084                 :         {
    1085            1047 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1086            1047 :             memcpy(ptr, seginfo->seg, segsize);
    1087            1047 :             ptr += segsize;
    1088            1047 :             rsize += segsize;
    1089                 :         }
    1090                 : 
    1091            1047 :         if (!dlist_has_next(&leaf->segments, node))
    1092              75 :             break;
    1093                 :     }
    1094              75 :     Assert(rsize == leaf->rsize);
    1095              75 :     GinDataPageSetDataSize(rpage, rsize);
    1096              75 :     *GinDataPageGetRightBound(rpage) = rbound;
    1097              75 : }
    1098                 : 
    1099                 : /*
    1100                 :  * Prepare to insert data on an internal data page.
    1101                 :  *
    1102                 :  * If it will fit, return GPTP_INSERT after doing whatever setup is needed
    1103                 :  * before we enter the insertion critical section.  *ptp_workspace can be
    1104                 :  * set to pass information along to the execPlaceToPage function.
    1105                 :  *
    1106                 :  * If it won't fit, perform a page split and return two temporary page
    1107                 :  * images into *newlpage and *newrpage, with result GPTP_SPLIT.
    1108                 :  *
    1109                 :  * In neither case should the given page buffer be modified here.
    1110                 :  *
    1111                 :  * Note: on insertion to an internal node, in addition to inserting the given
    1112                 :  * item, the downlink of the existing item at stack->off will be updated to
    1113                 :  * point to updateblkno.
    1114                 :  */
    1115                 : static GinPlaceToPageRC
    1116              23 : dataBeginPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1117                 :                              void *insertdata, BlockNumber updateblkno,
    1118                 :                              void **ptp_workspace,
    1119                 :                              Page *newlpage, Page *newrpage)
    1120                 : {
    1121              23 :     Page        page = BufferGetPage(buf);
    1122                 : 
    1123                 :     /* If it doesn't fit, deal with split case */
    1124              23 :     if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
    1125                 :     {
    1126 UBC           0 :         dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
    1127                 :                               newlpage, newrpage);
    1128               0 :         return GPTP_SPLIT;
    1129                 :     }
    1130                 : 
    1131                 :     /* Else, we're ready to proceed with insertion */
    1132 CBC          23 :     return GPTP_INSERT;
    1133                 : }
    1134                 : 
    1135                 : /*
    1136                 :  * Perform data insertion after beginPlaceToPage has decided it will fit.
    1137                 :  *
    1138                 :  * This is invoked within a critical section, and XLOG record creation (if
    1139                 :  * needed) is already started.  The target buffer is registered in slot 0.
    1140                 :  */
    1141                 : static void
    1142              23 : dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1143                 :                             void *insertdata, BlockNumber updateblkno,
    1144                 :                             void *ptp_workspace)
    1145                 : {
    1146              23 :     Page        page = BufferGetPage(buf);
    1147              23 :     OffsetNumber off = stack->off;
    1148                 :     PostingItem *pitem;
    1149                 : 
    1150                 :     /* Update existing downlink to point to next page (on internal page) */
    1151              23 :     pitem = GinDataPageGetPostingItem(page, off);
    1152              23 :     PostingItemSetBlockNumber(pitem, updateblkno);
    1153                 : 
    1154                 :     /* Add new item */
    1155              23 :     pitem = (PostingItem *) insertdata;
    1156              23 :     GinDataPageAddPostingItem(page, pitem, off);
    1157                 : 
    1158              23 :     if (RelationNeedsWAL(btree->index) && !btree->isBuild)
    1159                 :     {
    1160                 :         /*
    1161                 :          * This must be static, because it has to survive until XLogInsert,
    1162                 :          * and we can't palloc here.  Ugly, but the XLogInsert infrastructure
    1163                 :          * isn't reentrant anyway.
    1164                 :          */
    1165                 :         static ginxlogInsertDataInternal data;
    1166                 : 
    1167               9 :         data.offset = off;
    1168               9 :         data.newitem = *pitem;
    1169                 : 
    1170               9 :         XLogRegisterBufData(0, (char *) &data,
    1171                 :                             sizeof(ginxlogInsertDataInternal));
    1172                 :     }
    1173              23 : }
    1174                 : 
    1175                 : /*
    1176                 :  * Prepare to insert data on a posting-tree data page.
    1177                 :  *
    1178                 :  * If it will fit, return GPTP_INSERT after doing whatever setup is needed
    1179                 :  * before we enter the insertion critical section.  *ptp_workspace can be
    1180                 :  * set to pass information along to the execPlaceToPage function.
    1181                 :  *
    1182                 :  * If it won't fit, perform a page split and return two temporary page
    1183                 :  * images into *newlpage and *newrpage, with result GPTP_SPLIT.
    1184                 :  *
    1185                 :  * In neither case should the given page buffer be modified here.
    1186                 :  *
    1187                 :  * Note: on insertion to an internal node, in addition to inserting the given
    1188                 :  * item, the downlink of the existing item at stack->off will be updated to
    1189                 :  * point to updateblkno.
    1190                 :  *
    1191                 :  * Calls relevant function for internal or leaf page because they are handled
    1192                 :  * very differently.
    1193                 :  */
    1194                 : static GinPlaceToPageRC
    1195           24795 : dataBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1196                 :                      void *insertdata, BlockNumber updateblkno,
    1197                 :                      void **ptp_workspace,
    1198                 :                      Page *newlpage, Page *newrpage)
    1199                 : {
    1200           24795 :     Page        page = BufferGetPage(buf);
    1201                 : 
    1202           24795 :     Assert(GinPageIsData(page));
    1203                 : 
    1204           24795 :     if (GinPageIsLeaf(page))
    1205           24772 :         return dataBeginPlaceToPageLeaf(btree, buf, stack, insertdata,
    1206                 :                                         ptp_workspace,
    1207                 :                                         newlpage, newrpage);
    1208                 :     else
    1209              23 :         return dataBeginPlaceToPageInternal(btree, buf, stack,
    1210                 :                                             insertdata, updateblkno,
    1211                 :                                             ptp_workspace,
    1212                 :                                             newlpage, newrpage);
    1213                 : }
    1214                 : 
    1215                 : /*
    1216                 :  * Perform data insertion after beginPlaceToPage has decided it will fit.
    1217                 :  *
    1218                 :  * This is invoked within a critical section, and XLOG record creation (if
    1219                 :  * needed) is already started.  The target buffer is registered in slot 0.
    1220                 :  *
    1221                 :  * Calls relevant function for internal or leaf page because they are handled
    1222                 :  * very differently.
    1223                 :  */
    1224                 : static void
    1225           24720 : dataExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1226                 :                     void *insertdata, BlockNumber updateblkno,
    1227                 :                     void *ptp_workspace)
    1228                 : {
    1229           24720 :     Page        page = BufferGetPage(buf);
    1230                 : 
    1231           24720 :     if (GinPageIsLeaf(page))
    1232           24697 :         dataExecPlaceToPageLeaf(btree, buf, stack, insertdata,
    1233                 :                                 ptp_workspace);
    1234                 :     else
    1235              23 :         dataExecPlaceToPageInternal(btree, buf, stack, insertdata,
    1236                 :                                     updateblkno, ptp_workspace);
    1237           24720 : }
    1238                 : 
    1239                 : /*
    1240                 :  * Split internal page and insert new data.
    1241                 :  *
    1242                 :  * Returns new temp pages to *newlpage and *newrpage.
    1243                 :  * The original buffer is left untouched.
    1244                 :  */
    1245                 : static void
    1246 UBC           0 : dataSplitPageInternal(GinBtree btree, Buffer origbuf,
    1247                 :                       GinBtreeStack *stack,
    1248                 :                       void *insertdata, BlockNumber updateblkno,
    1249                 :                       Page *newlpage, Page *newrpage)
    1250                 : {
    1251               0 :     Page        oldpage = BufferGetPage(origbuf);
    1252               0 :     OffsetNumber off = stack->off;
    1253               0 :     int         nitems = GinPageGetOpaque(oldpage)->maxoff;
    1254                 :     int         nleftitems;
    1255                 :     int         nrightitems;
    1256               0 :     Size        pageSize = PageGetPageSize(oldpage);
    1257               0 :     ItemPointerData oldbound = *GinDataPageGetRightBound(oldpage);
    1258                 :     ItemPointer bound;
    1259                 :     Page        lpage;
    1260                 :     Page        rpage;
    1261                 :     OffsetNumber separator;
    1262                 :     PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
    1263                 : 
    1264               0 :     lpage = PageGetTempPage(oldpage);
    1265               0 :     rpage = PageGetTempPage(oldpage);
    1266               0 :     GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
    1267               0 :     GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
    1268                 : 
    1269                 :     /*
    1270                 :      * First construct a new list of PostingItems, which includes all the old
    1271                 :      * items, and the new item.
    1272                 :      */
    1273               0 :     memcpy(allitems, GinDataPageGetPostingItem(oldpage, FirstOffsetNumber),
    1274               0 :            (off - 1) * sizeof(PostingItem));
    1275                 : 
    1276               0 :     allitems[off - 1] = *((PostingItem *) insertdata);
    1277               0 :     memcpy(&allitems[off], GinDataPageGetPostingItem(oldpage, off),
    1278               0 :            (nitems - (off - 1)) * sizeof(PostingItem));
    1279               0 :     nitems++;
    1280                 : 
    1281                 :     /* Update existing downlink to point to next page */
    1282               0 :     PostingItemSetBlockNumber(&allitems[off], updateblkno);
    1283                 : 
    1284                 :     /*
    1285                 :      * When creating a new index, fit as many tuples as possible on the left
    1286                 :      * page, on the assumption that the table is scanned from beginning to
    1287                 :      * end. This packs the index as tight as possible.
    1288                 :      */
    1289               0 :     if (btree->isBuild && GinPageRightMost(oldpage))
    1290               0 :         separator = GinNonLeafDataPageGetFreeSpace(rpage) / sizeof(PostingItem);
    1291                 :     else
    1292               0 :         separator = nitems / 2;
    1293               0 :     nleftitems = separator;
    1294               0 :     nrightitems = nitems - separator;
    1295                 : 
    1296               0 :     memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber),
    1297                 :            allitems,
    1298                 :            nleftitems * sizeof(PostingItem));
    1299               0 :     GinPageGetOpaque(lpage)->maxoff = nleftitems;
    1300               0 :     memcpy(GinDataPageGetPostingItem(rpage, FirstOffsetNumber),
    1301               0 :            &allitems[separator],
    1302                 :            nrightitems * sizeof(PostingItem));
    1303               0 :     GinPageGetOpaque(rpage)->maxoff = nrightitems;
    1304                 : 
    1305                 :     /*
    1306                 :      * Also set pd_lower for both pages, like GinDataPageAddPostingItem does.
    1307                 :      */
    1308               0 :     GinDataPageSetDataSize(lpage, nleftitems * sizeof(PostingItem));
    1309               0 :     GinDataPageSetDataSize(rpage, nrightitems * sizeof(PostingItem));
    1310                 : 
    1311                 :     /* set up right bound for left page */
    1312               0 :     bound = GinDataPageGetRightBound(lpage);
    1313               0 :     *bound = GinDataPageGetPostingItem(lpage, nleftitems)->key;
    1314                 : 
    1315                 :     /* set up right bound for right page */
    1316               0 :     *GinDataPageGetRightBound(rpage) = oldbound;
    1317                 : 
    1318                 :     /* return temp pages to caller */
    1319               0 :     *newlpage = lpage;
    1320               0 :     *newrpage = rpage;
    1321               0 : }
    1322                 : 
    1323                 : /*
    1324                 :  * Construct insertion payload for inserting the downlink for given buffer.
    1325                 :  */
    1326                 : static void *
    1327 CBC          23 : dataPrepareDownlink(GinBtree btree, Buffer lbuf)
    1328                 : {
    1329              23 :     PostingItem *pitem = palloc(sizeof(PostingItem));
    1330              23 :     Page        lpage = BufferGetPage(lbuf);
    1331                 : 
    1332              23 :     PostingItemSetBlockNumber(pitem, BufferGetBlockNumber(lbuf));
    1333              23 :     pitem->key = *GinDataPageGetRightBound(lpage);
    1334                 : 
    1335              23 :     return pitem;
    1336                 : }
    1337                 : 
    1338                 : /*
    1339                 :  * Fills new root by right bound values from child.
    1340                 :  * Also called from ginxlog, should not use btree
    1341                 :  */
    1342                 : void
    1343              52 : ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage)
    1344                 : {
    1345                 :     PostingItem li,
    1346                 :                 ri;
    1347                 : 
    1348              52 :     li.key = *GinDataPageGetRightBound(lpage);
    1349              52 :     PostingItemSetBlockNumber(&li, lblkno);
    1350              52 :     GinDataPageAddPostingItem(root, &li, InvalidOffsetNumber);
    1351                 : 
    1352              52 :     ri.key = *GinDataPageGetRightBound(rpage);
    1353              52 :     PostingItemSetBlockNumber(&ri, rblkno);
    1354              52 :     GinDataPageAddPostingItem(root, &ri, InvalidOffsetNumber);
    1355              52 : }
    1356                 : 
    1357                 : 
    1358                 : /*** Functions to work with disassembled leaf pages ***/
    1359                 : 
    1360                 : /*
    1361                 :  * Disassemble page into a disassembledLeaf struct.
    1362                 :  */
    1363                 : static disassembledLeaf *
    1364           24793 : disassembleLeaf(Page page)
    1365                 : {
    1366                 :     disassembledLeaf *leaf;
    1367                 :     GinPostingList *seg;
    1368                 :     Pointer     segbegin;
    1369                 :     Pointer     segend;
    1370                 : 
    1371           24793 :     leaf = palloc0(sizeof(disassembledLeaf));
    1372           24793 :     dlist_init(&leaf->segments);
    1373                 : 
    1374           24793 :     if (GinPageIsCompressed(page))
    1375                 :     {
    1376                 :         /*
    1377                 :          * Create a leafSegmentInfo entry for each segment.
    1378                 :          */
    1379           24793 :         seg = GinDataLeafPageGetPostingList(page);
    1380           24793 :         segbegin = (Pointer) seg;
    1381           24793 :         segend = segbegin + GinDataLeafPageGetPostingListSize(page);
    1382          448115 :         while ((Pointer) seg < segend)
    1383                 :         {
    1384          423322 :             leafSegmentInfo *seginfo = palloc(sizeof(leafSegmentInfo));
    1385                 : 
    1386          423322 :             seginfo->action = GIN_SEGMENT_UNMODIFIED;
    1387          423322 :             seginfo->seg = seg;
    1388          423322 :             seginfo->items = NULL;
    1389          423322 :             seginfo->nitems = 0;
    1390          423322 :             dlist_push_tail(&leaf->segments, &seginfo->node);
    1391                 : 
    1392          423322 :             seg = GinNextPostingListSegment(seg);
    1393                 :         }
    1394           24793 :         leaf->oldformat = false;
    1395                 :     }
    1396                 :     else
    1397                 :     {
    1398                 :         /*
    1399                 :          * A pre-9.4 format uncompressed page is represented by a single
    1400                 :          * segment, with an array of items.  The corner case is uncompressed
    1401                 :          * page containing no items, which is represented as no segments.
    1402                 :          */
    1403                 :         ItemPointer uncompressed;
    1404                 :         int         nuncompressed;
    1405                 :         leafSegmentInfo *seginfo;
    1406                 : 
    1407 UBC           0 :         uncompressed = dataLeafPageGetUncompressed(page, &nuncompressed);
    1408                 : 
    1409               0 :         if (nuncompressed > 0)
    1410                 :         {
    1411               0 :             seginfo = palloc(sizeof(leafSegmentInfo));
    1412                 : 
    1413               0 :             seginfo->action = GIN_SEGMENT_REPLACE;
    1414               0 :             seginfo->seg = NULL;
    1415               0 :             seginfo->items = palloc(nuncompressed * sizeof(ItemPointerData));
    1416               0 :             memcpy(seginfo->items, uncompressed, nuncompressed * sizeof(ItemPointerData));
    1417               0 :             seginfo->nitems = nuncompressed;
    1418                 : 
    1419               0 :             dlist_push_tail(&leaf->segments, &seginfo->node);
    1420                 :         }
    1421                 : 
    1422               0 :         leaf->oldformat = true;
    1423                 :     }
    1424                 : 
    1425 CBC       24793 :     return leaf;
    1426                 : }
    1427                 : 
    1428                 : /*
    1429                 :  * Distribute newItems to the segments.
    1430                 :  *
    1431                 :  * Any segments that acquire new items are decoded, and the new items are
    1432                 :  * merged with the old items.
    1433                 :  *
    1434                 :  * Returns true if any new items were added. False means they were all
    1435                 :  * duplicates of existing items on the page.
    1436                 :  */
    1437                 : static bool
    1438           24772 : addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems)
    1439                 : {
    1440                 :     dlist_iter  iter;
    1441           24772 :     ItemPointer nextnew = newItems;
    1442           24772 :     int         newleft = nNewItems;
    1443           24772 :     bool        modified = false;
    1444                 :     leafSegmentInfo *newseg;
    1445                 : 
    1446                 :     /*
    1447                 :      * If the page is completely empty, just construct one new segment to hold
    1448                 :      * all the new items.
    1449                 :      */
    1450           24772 :     if (dlist_is_empty(&leaf->segments))
    1451                 :     {
    1452 UBC           0 :         newseg = palloc(sizeof(leafSegmentInfo));
    1453               0 :         newseg->seg = NULL;
    1454               0 :         newseg->items = newItems;
    1455               0 :         newseg->nitems = nNewItems;
    1456               0 :         newseg->action = GIN_SEGMENT_INSERT;
    1457               0 :         dlist_push_tail(&leaf->segments, &newseg->node);
    1458               0 :         return true;
    1459                 :     }
    1460                 : 
    1461 CBC      422872 :     dlist_foreach(iter, &leaf->segments)
    1462                 :     {
    1463          422872 :         leafSegmentInfo *cur = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node, iter.cur);
    1464                 :         int         nthis;
    1465                 :         ItemPointer tmpitems;
    1466                 :         int         ntmpitems;
    1467                 : 
    1468                 :         /*
    1469                 :          * How many of the new items fall into this segment?
    1470                 :          */
    1471          422872 :         if (!dlist_has_next(&leaf->segments, iter.cur))
    1472           24772 :             nthis = newleft;
    1473                 :         else
    1474                 :         {
    1475                 :             leafSegmentInfo *next;
    1476                 :             ItemPointerData next_first;
    1477                 : 
    1478          398100 :             next = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node,
    1479                 :                                                        dlist_next_node(&leaf->segments, iter.cur));
    1480          398100 :             if (next->items)
    1481           24766 :                 next_first = next->items[0];
    1482                 :             else
    1483                 :             {
    1484          373334 :                 Assert(next->seg != NULL);
    1485          373334 :                 next_first = next->seg->first;
    1486                 :             }
    1487                 : 
    1488          398100 :             nthis = 0;
    1489          398100 :             while (nthis < newleft && ginCompareItemPointers(&nextnew[nthis], &next_first) < 0)
    1490 UBC           0 :                 nthis++;
    1491                 :         }
    1492 CBC      422872 :         if (nthis == 0)
    1493          398100 :             continue;
    1494                 : 
    1495                 :         /* Merge the new items with the existing items. */
    1496           24772 :         if (!cur->items)
    1497 UBC           0 :             cur->items = ginPostingListDecode(cur->seg, &cur->nitems);
    1498                 : 
    1499                 :         /*
    1500                 :          * Fast path for the important special case that we're appending to
    1501                 :          * the end of the page: don't let the last segment on the page grow
    1502                 :          * larger than the target, create a new segment before that happens.
    1503                 :          */
    1504 CBC       49544 :         if (!dlist_has_next(&leaf->segments, iter.cur) &&
    1505           24772 :             ginCompareItemPointers(&cur->items[cur->nitems - 1], &nextnew[0]) < 0 &&
    1506           24772 :             cur->seg != NULL &&
    1507           24772 :             SizeOfGinPostingList(cur->seg) >= GinPostingListSegmentTargetSize)
    1508                 :         {
    1509             184 :             newseg = palloc(sizeof(leafSegmentInfo));
    1510             184 :             newseg->seg = NULL;
    1511             184 :             newseg->items = nextnew;
    1512             184 :             newseg->nitems = nthis;
    1513             184 :             newseg->action = GIN_SEGMENT_INSERT;
    1514             184 :             dlist_push_tail(&leaf->segments, &newseg->node);
    1515             184 :             modified = true;
    1516           24772 :             break;
    1517                 :         }
    1518                 : 
    1519           24588 :         tmpitems = ginMergeItemPointers(cur->items, cur->nitems,
    1520                 :                                         nextnew, nthis,
    1521                 :                                         &ntmpitems);
    1522           24588 :         if (ntmpitems != cur->nitems)
    1523                 :         {
    1524                 :             /*
    1525                 :              * If there are no duplicates, track the added items so that we
    1526                 :              * can emit a compact ADDITEMS WAL record later on. (it doesn't
    1527                 :              * seem worth re-checking which items were duplicates, if there
    1528                 :              * were any)
    1529                 :              */
    1530           24588 :             if (ntmpitems == nthis + cur->nitems &&
    1531           24588 :                 cur->action == GIN_SEGMENT_UNMODIFIED)
    1532                 :             {
    1533           24588 :                 cur->action = GIN_SEGMENT_ADDITEMS;
    1534           24588 :                 cur->modifieditems = nextnew;
    1535           24588 :                 cur->nmodifieditems = nthis;
    1536                 :             }
    1537                 :             else
    1538 UBC           0 :                 cur->action = GIN_SEGMENT_REPLACE;
    1539                 : 
    1540 CBC       24588 :             cur->items = tmpitems;
    1541           24588 :             cur->nitems = ntmpitems;
    1542           24588 :             cur->seg = NULL;
    1543           24588 :             modified = true;
    1544                 :         }
    1545                 : 
    1546           24588 :         nextnew += nthis;
    1547           24588 :         newleft -= nthis;
    1548           24588 :         if (newleft == 0)
    1549           24588 :             break;
    1550                 :     }
    1551                 : 
    1552           24772 :     return modified;
    1553                 : }
    1554                 : 
    1555                 : /*
    1556                 :  * Recompresses all segments that have been modified.
    1557                 :  *
    1558                 :  * If not all the items fit on two pages (ie. after split), we store as
    1559                 :  * many items as fit, and set *remaining to the first item that didn't fit.
    1560                 :  * If all items fit, *remaining is set to invalid.
    1561                 :  *
    1562                 :  * Returns true if the page has to be split.
    1563                 :  */
    1564                 : static bool
    1565           24772 : leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
    1566                 : {
    1567           24772 :     int         pgused = 0;
    1568           24772 :     bool        needsplit = false;
    1569                 :     dlist_iter  iter;
    1570                 :     int         segsize;
    1571                 :     leafSegmentInfo *nextseg;
    1572                 :     int         npacked;
    1573                 :     bool        modified;
    1574                 :     dlist_node *cur_node;
    1575                 :     dlist_node *next_node;
    1576                 : 
    1577           24772 :     ItemPointerSetInvalid(remaining);
    1578                 : 
    1579                 :     /*
    1580                 :      * cannot use dlist_foreach_modify here because we insert adjacent items
    1581                 :      * while iterating.
    1582                 :      */
    1583           24772 :     for (cur_node = dlist_head_node(&leaf->segments);
    1584          448717 :          cur_node != NULL;
    1585          423945 :          cur_node = next_node)
    1586                 :     {
    1587          423965 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
    1588                 :                                                    cur_node);
    1589                 : 
    1590          423965 :         if (dlist_has_next(&leaf->segments, cur_node))
    1591          398284 :             next_node = dlist_next_node(&leaf->segments, cur_node);
    1592                 :         else
    1593           25681 :             next_node = NULL;
    1594                 : 
    1595                 :         /* Compress the posting list, if necessary */
    1596          423965 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1597                 :         {
    1598          423965 :             if (seginfo->seg == NULL)
    1599                 :             {
    1600           25681 :                 if (seginfo->nitems > GinPostingListSegmentMaxSize)
    1601             926 :                     npacked = 0;    /* no chance that it would fit. */
    1602                 :                 else
    1603                 :                 {
    1604           24755 :                     seginfo->seg = ginCompressPostingList(seginfo->items,
    1605                 :                                                           seginfo->nitems,
    1606                 :                                                           GinPostingListSegmentMaxSize,
    1607                 :                                                           &npacked);
    1608                 :                 }
    1609           25681 :                 if (npacked != seginfo->nitems)
    1610                 :                 {
    1611                 :                     /*
    1612                 :                      * Too large. Compress again to the target size, and
    1613                 :                      * create a new segment to represent the remaining items.
    1614                 :                      * The new segment is inserted after this one, so it will
    1615                 :                      * be processed in the next iteration of this loop.
    1616                 :                      */
    1617             929 :                     if (seginfo->seg)
    1618               3 :                         pfree(seginfo->seg);
    1619             929 :                     seginfo->seg = ginCompressPostingList(seginfo->items,
    1620                 :                                                           seginfo->nitems,
    1621                 :                                                           GinPostingListSegmentTargetSize,
    1622                 :                                                           &npacked);
    1623             929 :                     if (seginfo->action != GIN_SEGMENT_INSERT)
    1624               3 :                         seginfo->action = GIN_SEGMENT_REPLACE;
    1625                 : 
    1626             929 :                     nextseg = palloc(sizeof(leafSegmentInfo));
    1627             929 :                     nextseg->action = GIN_SEGMENT_INSERT;
    1628             929 :                     nextseg->seg = NULL;
    1629             929 :                     nextseg->items = &seginfo->items[npacked];
    1630             929 :                     nextseg->nitems = seginfo->nitems - npacked;
    1631             929 :                     next_node = &nextseg->node;
    1632             929 :                     dlist_insert_after(cur_node, next_node);
    1633                 :                 }
    1634                 :             }
    1635                 : 
    1636                 :             /*
    1637                 :              * If the segment is very small, merge it with the next segment.
    1638                 :              */
    1639          423965 :             if (SizeOfGinPostingList(seginfo->seg) < GinPostingListSegmentMinSize && next_node)
    1640                 :             {
    1641                 :                 int         nmerged;
    1642                 : 
    1643 UBC           0 :                 nextseg = dlist_container(leafSegmentInfo, node, next_node);
    1644                 : 
    1645               0 :                 if (seginfo->items == NULL)
    1646               0 :                     seginfo->items = ginPostingListDecode(seginfo->seg,
    1647                 :                                                           &seginfo->nitems);
    1648               0 :                 if (nextseg->items == NULL)
    1649               0 :                     nextseg->items = ginPostingListDecode(nextseg->seg,
    1650                 :                                                           &nextseg->nitems);
    1651               0 :                 nextseg->items =
    1652               0 :                     ginMergeItemPointers(seginfo->items, seginfo->nitems,
    1653               0 :                                          nextseg->items, nextseg->nitems,
    1654                 :                                          &nmerged);
    1655               0 :                 Assert(nmerged == seginfo->nitems + nextseg->nitems);
    1656               0 :                 nextseg->nitems = nmerged;
    1657               0 :                 nextseg->seg = NULL;
    1658                 : 
    1659               0 :                 nextseg->action = GIN_SEGMENT_REPLACE;
    1660               0 :                 nextseg->modifieditems = NULL;
    1661               0 :                 nextseg->nmodifieditems = 0;
    1662                 : 
    1663               0 :                 if (seginfo->action == GIN_SEGMENT_INSERT)
    1664                 :                 {
    1665               0 :                     dlist_delete(cur_node);
    1666               0 :                     continue;
    1667                 :                 }
    1668                 :                 else
    1669                 :                 {
    1670               0 :                     seginfo->action = GIN_SEGMENT_DELETE;
    1671               0 :                     seginfo->seg = NULL;
    1672                 :                 }
    1673                 :             }
    1674                 : 
    1675 CBC      423965 :             seginfo->items = NULL;
    1676          423965 :             seginfo->nitems = 0;
    1677                 :         }
    1678                 : 
    1679          423965 :         if (seginfo->action == GIN_SEGMENT_DELETE)
    1680 UBC           0 :             continue;
    1681                 : 
    1682                 :         /*
    1683                 :          * OK, we now have a compressed version of this segment ready for
    1684                 :          * copying to the page. Did we exceed the size that fits on one page?
    1685                 :          */
    1686 CBC      423965 :         segsize = SizeOfGinPostingList(seginfo->seg);
    1687          423965 :         if (pgused + segsize > GinDataPageMaxDataSize)
    1688                 :         {
    1689              95 :             if (!needsplit)
    1690                 :             {
    1691                 :                 /* switch to right page */
    1692              75 :                 Assert(pgused > 0);
    1693              75 :                 leaf->lastleft = dlist_prev_node(&leaf->segments, cur_node);
    1694              75 :                 needsplit = true;
    1695              75 :                 leaf->lsize = pgused;
    1696              75 :                 pgused = 0;
    1697                 :             }
    1698                 :             else
    1699                 :             {
    1700                 :                 /*
    1701                 :                  * Filled both pages. The last segment we constructed did not
    1702                 :                  * fit.
    1703                 :                  */
    1704              20 :                 *remaining = seginfo->seg->first;
    1705                 : 
    1706                 :                 /*
    1707                 :                  * remove all segments that did not fit from the list.
    1708                 :                  */
    1709              40 :                 while (dlist_has_next(&leaf->segments, cur_node))
    1710              20 :                     dlist_delete(dlist_next_node(&leaf->segments, cur_node));
    1711              20 :                 dlist_delete(cur_node);
    1712              20 :                 break;
    1713                 :             }
    1714                 :         }
    1715                 : 
    1716          423945 :         pgused += segsize;
    1717                 :     }
    1718                 : 
    1719           24772 :     if (!needsplit)
    1720                 :     {
    1721           24697 :         leaf->lsize = pgused;
    1722           24697 :         leaf->rsize = 0;
    1723                 :     }
    1724                 :     else
    1725              75 :         leaf->rsize = pgused;
    1726                 : 
    1727           24772 :     Assert(leaf->lsize <= GinDataPageMaxDataSize);
    1728           24772 :     Assert(leaf->rsize <= GinDataPageMaxDataSize);
    1729                 : 
    1730                 :     /*
    1731                 :      * Make a palloc'd copy of every segment after the first modified one,
    1732                 :      * because as we start copying items to the original page, we might
    1733                 :      * overwrite an existing segment.
    1734                 :      */
    1735           24772 :     modified = false;
    1736          448717 :     dlist_foreach(iter, &leaf->segments)
    1737                 :     {
    1738          423945 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
    1739                 :                                                    iter.cur);
    1740                 : 
    1741          423945 :         if (!modified && seginfo->action != GIN_SEGMENT_UNMODIFIED)
    1742                 :         {
    1743           24772 :             modified = true;
    1744                 :         }
    1745          399173 :         else if (modified && seginfo->action == GIN_SEGMENT_UNMODIFIED)
    1746                 :         {
    1747                 :             GinPostingList *tmp;
    1748                 : 
    1749 UBC           0 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1750               0 :             tmp = palloc(segsize);
    1751               0 :             memcpy(tmp, seginfo->seg, segsize);
    1752               0 :             seginfo->seg = tmp;
    1753                 :         }
    1754                 :     }
    1755                 : 
    1756 CBC       24772 :     return needsplit;
    1757                 : }
    1758                 : 
    1759                 : 
    1760                 : /*** Functions that are exported to the rest of the GIN code ***/
    1761                 : 
    1762                 : /*
    1763                 :  * Creates new posting tree containing the given TIDs. Returns the page
    1764                 :  * number of the root of the new posting tree.
    1765                 :  *
    1766                 :  * items[] must be in sorted order with no duplicates.
    1767                 :  */
    1768                 : BlockNumber
    1769              58 : createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
    1770                 :                   GinStatsData *buildStats, Buffer entrybuffer)
    1771                 : {
    1772                 :     BlockNumber blkno;
    1773                 :     Buffer      buffer;
    1774                 :     Page        tmppage;
    1775                 :     Page        page;
    1776                 :     Pointer     ptr;
    1777                 :     int         nrootitems;
    1778                 :     int         rootsize;
    1779              58 :     bool        is_build = (buildStats != NULL);
    1780                 : 
    1781                 :     /* Construct the new root page in memory first. */
    1782              58 :     tmppage = (Page) palloc(BLCKSZ);
    1783              58 :     GinInitPage(tmppage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
    1784              58 :     GinPageGetOpaque(tmppage)->rightlink = InvalidBlockNumber;
    1785                 : 
    1786                 :     /*
    1787                 :      * Write as many of the items to the root page as fit. In segments of max
    1788                 :      * GinPostingListSegmentMaxSize bytes each.
    1789                 :      */
    1790              58 :     nrootitems = 0;
    1791              58 :     rootsize = 0;
    1792              58 :     ptr = (Pointer) GinDataLeafPageGetPostingList(tmppage);
    1793            1166 :     while (nrootitems < nitems)
    1794                 :     {
    1795                 :         GinPostingList *segment;
    1796                 :         int         npacked;
    1797                 :         int         segsize;
    1798                 : 
    1799            1158 :         segment = ginCompressPostingList(&items[nrootitems],
    1800            1158 :                                          nitems - nrootitems,
    1801                 :                                          GinPostingListSegmentMaxSize,
    1802                 :                                          &npacked);
    1803            1158 :         segsize = SizeOfGinPostingList(segment);
    1804            1158 :         if (rootsize + segsize > GinDataPageMaxDataSize)
    1805              50 :             break;
    1806                 : 
    1807            1108 :         memcpy(ptr, segment, segsize);
    1808            1108 :         ptr += segsize;
    1809            1108 :         rootsize += segsize;
    1810            1108 :         nrootitems += npacked;
    1811            1108 :         pfree(segment);
    1812                 :     }
    1813              58 :     GinDataPageSetDataSize(tmppage, rootsize);
    1814                 : 
    1815                 :     /*
    1816                 :      * All set. Get a new physical page, and copy the in-memory page to it.
    1817                 :      */
    1818              58 :     buffer = GinNewBuffer(index);
    1819              58 :     page = BufferGetPage(buffer);
    1820              58 :     blkno = BufferGetBlockNumber(buffer);
    1821                 : 
    1822                 :     /*
    1823                 :      * Copy any predicate locks from the entry tree leaf (containing posting
    1824                 :      * list) to the posting tree.
    1825                 :      */
    1826              58 :     PredicateLockPageSplit(index, BufferGetBlockNumber(entrybuffer), blkno);
    1827                 : 
    1828              58 :     START_CRIT_SECTION();
    1829                 : 
    1830              58 :     PageRestoreTempPage(tmppage, page);
    1831              58 :     MarkBufferDirty(buffer);
    1832                 : 
    1833              58 :     if (RelationNeedsWAL(index) && !is_build)
    1834                 :     {
    1835                 :         XLogRecPtr  recptr;
    1836                 :         ginxlogCreatePostingTree data;
    1837                 : 
    1838              14 :         data.size = rootsize;
    1839                 : 
    1840              14 :         XLogBeginInsert();
    1841              14 :         XLogRegisterData((char *) &data, sizeof(ginxlogCreatePostingTree));
    1842                 : 
    1843              14 :         XLogRegisterData((char *) GinDataLeafPageGetPostingList(page),
    1844                 :                          rootsize);
    1845              14 :         XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
    1846                 : 
    1847              14 :         recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
    1848              14 :         PageSetLSN(page, recptr);
    1849                 :     }
    1850                 : 
    1851              58 :     UnlockReleaseBuffer(buffer);
    1852                 : 
    1853              58 :     END_CRIT_SECTION();
    1854                 : 
    1855                 :     /* During index build, count the newly-added data page */
    1856              58 :     if (buildStats)
    1857              38 :         buildStats->nDataPages++;
    1858                 : 
    1859              58 :     elog(DEBUG2, "created GIN posting tree with %d items", nrootitems);
    1860                 : 
    1861                 :     /*
    1862                 :      * Add any remaining TIDs to the newly-created posting tree.
    1863                 :      */
    1864              58 :     if (nitems > nrootitems)
    1865                 :     {
    1866              50 :         ginInsertItemPointers(index, blkno,
    1867              50 :                               items + nrootitems,
    1868                 :                               nitems - nrootitems,
    1869                 :                               buildStats);
    1870                 :     }
    1871                 : 
    1872              58 :     return blkno;
    1873                 : }
    1874                 : 
    1875                 : static void
    1876           24792 : ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno)
    1877                 : {
    1878           24792 :     memset(btree, 0, sizeof(GinBtreeData));
    1879                 : 
    1880           24792 :     btree->index = index;
    1881           24792 :     btree->rootBlkno = rootBlkno;
    1882                 : 
    1883           24792 :     btree->findChildPage = dataLocateItem;
    1884           24792 :     btree->getLeftMostChild = dataGetLeftMostPage;
    1885           24792 :     btree->isMoveRight = dataIsMoveRight;
    1886           24792 :     btree->findItem = NULL;
    1887           24792 :     btree->findChildPtr = dataFindChildPtr;
    1888           24792 :     btree->beginPlaceToPage = dataBeginPlaceToPage;
    1889           24792 :     btree->execPlaceToPage = dataExecPlaceToPage;
    1890           24792 :     btree->fillRoot = ginDataFillRoot;
    1891           24792 :     btree->prepareDownlink = dataPrepareDownlink;
    1892                 : 
    1893           24792 :     btree->isData = true;
    1894           24792 :     btree->fullScan = false;
    1895           24792 :     btree->isBuild = false;
    1896           24792 : }
    1897                 : 
    1898                 : /*
    1899                 :  * Inserts array of item pointers, may execute several tree scan (very rare)
    1900                 :  */
    1901                 : void
    1902           24754 : ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
    1903                 :                       ItemPointerData *items, uint32 nitem,
    1904                 :                       GinStatsData *buildStats)
    1905                 : {
    1906                 :     GinBtreeData btree;
    1907                 :     GinBtreeDataLeafInsertData insertdata;
    1908                 :     GinBtreeStack *stack;
    1909                 : 
    1910           24754 :     ginPrepareDataScan(&btree, index, rootBlkno);
    1911           24754 :     btree.isBuild = (buildStats != NULL);
    1912           24754 :     insertdata.items = items;
    1913           24754 :     insertdata.nitem = nitem;
    1914           24754 :     insertdata.curitem = 0;
    1915                 : 
    1916           49526 :     while (insertdata.curitem < insertdata.nitem)
    1917                 :     {
    1918                 :         /* search for the leaf page where the first item should go to */
    1919           24774 :         btree.itemptr = insertdata.items[insertdata.curitem];
    1920           24774 :         stack = ginFindLeafPage(&btree, false, true, NULL);
    1921                 : 
    1922           24772 :         ginInsertValue(&btree, stack, &insertdata, buildStats);
    1923                 :     }
    1924           24752 : }
    1925                 : 
    1926                 : /*
    1927                 :  * Starts a new scan on a posting tree.
    1928                 :  */
    1929                 : GinBtreeStack *
    1930              38 : ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno,
    1931                 :                         Snapshot snapshot)
    1932                 : {
    1933                 :     GinBtreeStack *stack;
    1934                 : 
    1935              38 :     ginPrepareDataScan(btree, index, rootBlkno);
    1936                 : 
    1937              38 :     btree->fullScan = true;
    1938                 : 
    1939              38 :     stack = ginFindLeafPage(btree, true, false, snapshot);
    1940                 : 
    1941              38 :     return stack;
    1942                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a