LCOV - differential code coverage report
Current view: top level - src/backend/access/gin - ginpostinglist.c (source / functions) Coverage Total Hit UBC GNC CBC DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 87.3 % 142 124 18 1 123 1
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 9 9 1 8 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * ginpostinglist.c
       4                 :  *    routines for dealing with posting lists.
       5                 :  *
       6                 :  *
       7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       8                 :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *          src/backend/access/gin/ginpostinglist.c
      12                 :  *-------------------------------------------------------------------------
      13                 :  */
      14                 : 
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include "access/gin_private.h"
      18                 : 
      19                 : #ifdef USE_ASSERT_CHECKING
      20                 : #define CHECK_ENCODING_ROUNDTRIP
      21                 : #endif
      22                 : 
      23                 : /*
      24                 :  * For encoding purposes, item pointers are represented as 64-bit unsigned
      25                 :  * integers. The lowest 11 bits represent the offset number, and the next
      26                 :  * lowest 32 bits are the block number. That leaves 21 bits unused, i.e.
      27                 :  * only 43 low bits are used.
      28                 :  *
      29                 :  * 11 bits is enough for the offset number, because MaxHeapTuplesPerPage <
      30                 :  * 2^11 on all supported block sizes. We are frugal with the bits, because
      31                 :  * smaller integers use fewer bytes in the varbyte encoding, saving disk
      32                 :  * space. (If we get a new table AM in the future that wants to use the full
      33                 :  * range of possible offset numbers, we'll need to change this.)
      34                 :  *
      35                 :  * These 43-bit integers are encoded using varbyte encoding. In each byte,
      36                 :  * the 7 low bits contain data, while the highest bit is a continuation bit.
      37                 :  * When the continuation bit is set, the next byte is part of the same
      38                 :  * integer, otherwise this is the last byte of this integer. 43 bits need
      39                 :  * at most 7 bytes in this encoding:
      40                 :  *
      41                 :  * 0XXXXXXX
      42                 :  * 1XXXXXXX 0XXXXYYY
      43                 :  * 1XXXXXXX 1XXXXYYY 0YYYYYYY
      44                 :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 0YYYYYYY
      45                 :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
      46                 :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
      47                 :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0uuuuuuY
      48                 :  *
      49                 :  * X = bits used for offset number
      50                 :  * Y = bits used for block number
      51                 :  * u = unused bit
      52                 :  *
      53                 :  * The bytes are in stored in little-endian order.
      54                 :  *
      55                 :  * An important property of this encoding is that removing an item from list
      56                 :  * never increases the size of the resulting compressed posting list. Proof:
      57                 :  *
      58                 :  * Removing number is actually replacement of two numbers with their sum. We
      59                 :  * have to prove that varbyte encoding of a sum can't be longer than varbyte
      60                 :  * encoding of its summands. Sum of two numbers is at most one bit wider than
      61                 :  * the larger of the summands. Widening a number by one bit enlarges its length
      62                 :  * in varbyte encoding by at most one byte. Therefore, varbyte encoding of sum
      63                 :  * is at most one byte longer than varbyte encoding of larger summand. Lesser
      64                 :  * summand is at least one byte, so the sum cannot take more space than the
      65                 :  * summands, Q.E.D.
      66                 :  *
      67                 :  * This property greatly simplifies VACUUM, which can assume that posting
      68                 :  * lists always fit on the same page after vacuuming. Note that even though
      69                 :  * that holds for removing items from a posting list, you must also be
      70                 :  * careful to not cause expansion e.g. when merging uncompressed items on the
      71                 :  * page into the compressed lists, when vacuuming.
      72                 :  */
      73                 : 
      74                 : /*
      75                 :  * How many bits do you need to encode offset number? OffsetNumber is a 16-bit
      76                 :  * integer, but you can't fit that many items on a page. 11 ought to be more
      77                 :  * than enough. It's tempting to derive this from MaxHeapTuplesPerPage, and
      78                 :  * use the minimum number of bits, but that would require changing the on-disk
      79                 :  * format if MaxHeapTuplesPerPage changes. Better to leave some slack.
      80                 :  */
      81                 : #define MaxHeapTuplesPerPageBits        11
      82                 : 
      83                 : /* Max. number of bytes needed to encode the largest supported integer. */
      84                 : #define MaxBytesPerInteger              7
      85                 : 
      86                 : static inline uint64
      87 CBC    17310561 : itemptr_to_uint64(const ItemPointer iptr)
      88                 : {
      89                 :     uint64      val;
      90                 : 
      91        17310561 :     Assert(ItemPointerIsValid(iptr));
      92        17310561 :     Assert(GinItemPointerGetOffsetNumber(iptr) < (1 << MaxHeapTuplesPerPageBits));
      93                 : 
      94        17310561 :     val = GinItemPointerGetBlockNumber(iptr);
      95        17310561 :     val <<= MaxHeapTuplesPerPageBits;
      96        17310561 :     val |= GinItemPointerGetOffsetNumber(iptr);
      97                 : 
      98        17310561 :     return val;
      99                 : }
     100                 : 
     101                 : static inline void
     102        33594173 : uint64_to_itemptr(uint64 val, ItemPointer iptr)
     103                 : {
     104        33594173 :     GinItemPointerSetOffsetNumber(iptr, val & ((1 << MaxHeapTuplesPerPageBits) - 1));
     105        33594173 :     val = val >> MaxHeapTuplesPerPageBits;
     106        33594173 :     GinItemPointerSetBlockNumber(iptr, val);
     107                 : 
     108        33594173 :     Assert(ItemPointerIsValid(iptr));
     109        33594173 : }
     110                 : 
     111                 : /*
     112                 :  * Varbyte-encode 'val' into *ptr. *ptr is incremented to next integer.
     113                 :  */
     114                 : static void
     115        16397095 : encode_varbyte(uint64 val, unsigned char **ptr)
     116                 : {
     117        16397095 :     unsigned char *p = *ptr;
     118                 : 
     119        16694602 :     while (val > 0x7F)
     120                 :     {
     121          297507 :         *(p++) = 0x80 | (val & 0x7F);
     122          297507 :         val >>= 7;
     123                 :     }
     124        16397095 :     *(p++) = (unsigned char) val;
     125                 : 
     126        16397095 :     *ptr = p;
     127        16397095 : }
     128                 : 
     129                 : /*
     130                 :  * Decode varbyte-encoded integer at *ptr. *ptr is incremented to next integer.
     131                 :  */
     132                 : static uint64
     133        33594173 : decode_varbyte(unsigned char **ptr)
     134                 : {
     135                 :     uint64      val;
     136        33594173 :     unsigned char *p = *ptr;
     137                 :     uint64      c;
     138                 : 
     139                 :     /* 1st byte */
     140        33594173 :     c = *(p++);
     141        33594173 :     val = c & 0x7F;
     142        33594173 :     if (c & 0x80)
     143                 :     {
     144                 :         /* 2nd byte */
     145          873201 :         c = *(p++);
     146          873201 :         val |= (c & 0x7F) << 7;
     147          873201 :         if (c & 0x80)
     148                 :         {
     149                 :             /* 3rd byte */
     150           90630 :             c = *(p++);
     151           90630 :             val |= (c & 0x7F) << 14;
     152           90630 :             if (c & 0x80)
     153                 :             {
     154                 :                 /* 4th byte */
     155               2 :                 c = *(p++);
     156               2 :                 val |= (c & 0x7F) << 21;
     157               2 :                 if (c & 0x80)
     158                 :                 {
     159                 :                     /* 5th byte */
     160               2 :                     c = *(p++);
     161               2 :                     val |= (c & 0x7F) << 28;
     162               2 :                     if (c & 0x80)
     163                 :                     {
     164                 :                         /* 6th byte */
     165               2 :                         c = *(p++);
     166               2 :                         val |= (c & 0x7F) << 35;
     167               2 :                         if (c & 0x80)
     168                 :                         {
     169                 :                             /* 7th byte, should not have continuation bit */
     170               2 :                             c = *(p++);
     171               2 :                             val |= c << 42;
     172               2 :                             Assert((c & 0x80) == 0);
     173                 :                         }
     174                 :                     }
     175                 :                 }
     176                 :             }
     177                 :         }
     178                 :     }
     179                 : 
     180        33594173 :     *ptr = p;
     181                 : 
     182        33594173 :     return val;
     183                 : }
     184                 : 
     185                 : /*
     186                 :  * Encode a posting list.
     187                 :  *
     188                 :  * The encoded list is returned in a palloc'd struct, which will be at most
     189                 :  * 'maxsize' bytes in size.  The number items in the returned segment is
     190                 :  * returned in *nwritten. If it's not equal to nipd, not all the items fit
     191                 :  * in 'maxsize', and only the first *nwritten were encoded.
     192                 :  *
     193                 :  * The allocated size of the returned struct is short-aligned, and the padding
     194                 :  * byte at the end, if any, is zero.
     195                 :  */
     196                 : GinPostingList *
     197          301120 : ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
     198                 :                        int *nwritten)
     199                 : {
     200                 :     uint64      prev;
     201          301120 :     int         totalpacked = 0;
     202                 :     int         maxbytes;
     203                 :     GinPostingList *result;
     204                 :     unsigned char *ptr;
     205                 :     unsigned char *endptr;
     206                 : 
     207          301120 :     maxsize = SHORTALIGN_DOWN(maxsize);
     208                 : 
     209          301120 :     result = palloc(maxsize);
     210                 : 
     211          301120 :     maxbytes = maxsize - offsetof(GinPostingList, bytes);
     212          301120 :     Assert(maxbytes > 0);
     213                 : 
     214                 :     /* Store the first special item */
     215          301120 :     result->first = ipd[0];
     216                 : 
     217          301120 :     prev = itemptr_to_uint64(&result->first);
     218                 : 
     219          301120 :     ptr = result->bytes;
     220          301120 :     endptr = result->bytes + maxbytes;
     221        16696110 :     for (totalpacked = 1; totalpacked < nipd; totalpacked++)
     222                 :     {
     223        16397095 :         uint64      val = itemptr_to_uint64(&ipd[totalpacked]);
     224        16397095 :         uint64      delta = val - prev;
     225                 : 
     226        16397095 :         Assert(val > prev);
     227                 : 
     228        16397095 :         if (endptr - ptr >= MaxBytesPerInteger)
     229        16382642 :             encode_varbyte(delta, &ptr);
     230                 :         else
     231                 :         {
     232                 :             /*
     233                 :              * There are less than 7 bytes left. Have to check if the next
     234                 :              * item fits in that space before writing it out.
     235                 :              */
     236                 :             unsigned char buf[MaxBytesPerInteger];
     237           14453 :             unsigned char *p = buf;
     238                 : 
     239           14453 :             encode_varbyte(delta, &p);
     240           14453 :             if (p - buf > (endptr - ptr))
     241            2105 :                 break;          /* output is full */
     242                 : 
     243           12348 :             memcpy(ptr, buf, p - buf);
     244           12348 :             ptr += (p - buf);
     245                 :         }
     246        16394990 :         prev = val;
     247                 :     }
     248          301120 :     result->nbytes = ptr - result->bytes;
     249                 : 
     250                 :     /*
     251                 :      * If we wrote an odd number of bytes, zero out the padding byte at the
     252                 :      * end.
     253                 :      */
     254          301120 :     if (result->nbytes != SHORTALIGN(result->nbytes))
     255           30366 :         result->bytes[result->nbytes] = 0;
     256                 : 
     257          301120 :     if (nwritten)
     258           30181 :         *nwritten = totalpacked;
     259                 : 
     260          301120 :     Assert(SizeOfGinPostingList(result) <= maxsize);
     261                 : 
     262                 :     /*
     263                 :      * Check that the encoded segment decodes back to the original items.
     264                 :      */
     265                 : #if defined (CHECK_ENCODING_ROUNDTRIP)
     266                 :     {
     267                 :         int         ndecoded;
     268          301120 :         ItemPointer tmp = ginPostingListDecode(result, &ndecoded);
     269                 : 
     270          301120 :         Assert(ndecoded == totalpacked);
     271          301120 :         Assert(memcmp(tmp, ipd, ndecoded * sizeof(ItemPointerData)) == 0);
     272          301120 :         pfree(tmp);
     273                 :     }
     274                 : #endif
     275                 : 
     276          301120 :     return result;
     277                 : }
     278                 : 
     279                 : /*
     280                 :  * Decode a compressed posting list into an array of item pointers.
     281                 :  * The number of items is returned in *ndecoded.
     282                 :  */
     283                 : ItemPointer
     284 GNC      610670 : ginPostingListDecode(GinPostingList *plist, int *ndecoded_out)
     285                 : {
     286 CBC     1221340 :     return ginPostingListDecodeAllSegments(plist,
     287          610670 :                                            SizeOfGinPostingList(plist),
     288                 :                                            ndecoded_out);
     289                 : }
     290                 : 
     291                 : /*
     292                 :  * Decode multiple posting list segments into an array of item pointers.
     293                 :  * The number of items is returned in *ndecoded_out. The segments are stored
     294                 :  * one after each other, with total size 'len' bytes.
     295                 :  */
     296                 : ItemPointer
     297          610761 : ginPostingListDecodeAllSegments(GinPostingList *segment, int len, int *ndecoded_out)
     298                 : {
     299                 :     ItemPointer result;
     300                 :     int         nallocated;
     301                 :     uint64      val;
     302          610761 :     char       *endseg = ((char *) segment) + len;
     303                 :     int         ndecoded;
     304                 :     unsigned char *ptr;
     305                 :     unsigned char *endptr;
     306                 : 
     307                 :     /*
     308                 :      * Guess an initial size of the array.
     309                 :      */
     310          610761 :     nallocated = segment->nbytes * 2 + 1;
     311          610761 :     result = palloc(nallocated * sizeof(ItemPointerData));
     312                 : 
     313          610761 :     ndecoded = 0;
     314         1223107 :     while ((char *) segment < endseg)
     315                 :     {
     316                 :         /* enlarge output array if needed */
     317          612346 :         if (ndecoded >= nallocated)
     318                 :         {
     319 UBC           0 :             nallocated *= 2;
     320               0 :             result = repalloc(result, nallocated * sizeof(ItemPointerData));
     321                 :         }
     322                 : 
     323                 :         /* copy the first item */
     324 CBC      612346 :         Assert(OffsetNumberIsValid(ItemPointerGetOffsetNumber(&segment->first)));
     325          612346 :         Assert(ndecoded == 0 || ginCompareItemPointers(&segment->first, &result[ndecoded - 1]) > 0);
     326          612346 :         result[ndecoded] = segment->first;
     327          612346 :         ndecoded++;
     328                 : 
     329          612346 :         val = itemptr_to_uint64(&segment->first);
     330          612346 :         ptr = segment->bytes;
     331          612346 :         endptr = segment->bytes + segment->nbytes;
     332        34206519 :         while (ptr < endptr)
     333                 :         {
     334                 :             /* enlarge output array if needed */
     335        33594173 :             if (ndecoded >= nallocated)
     336                 :             {
     337             290 :                 nallocated *= 2;
     338             290 :                 result = repalloc(result, nallocated * sizeof(ItemPointerData));
     339                 :             }
     340                 : 
     341        33594173 :             val += decode_varbyte(&ptr);
     342                 : 
     343        33594173 :             uint64_to_itemptr(val, &result[ndecoded]);
     344        33594173 :             ndecoded++;
     345                 :         }
     346          612346 :         segment = GinNextPostingListSegment(segment);
     347                 :     }
     348                 : 
     349          610761 :     if (ndecoded_out)
     350          610761 :         *ndecoded_out = ndecoded;
     351          610761 :     return result;
     352                 : }
     353                 : 
     354                 : /*
     355                 :  * Add all item pointers from a bunch of posting lists to a TIDBitmap.
     356                 :  */
     357                 : int
     358              15 : ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int len,
     359                 :                                      TIDBitmap *tbm)
     360                 : {
     361                 :     int         ndecoded;
     362                 :     ItemPointer items;
     363                 : 
     364              15 :     items = ginPostingListDecodeAllSegments(ptr, len, &ndecoded);
     365              15 :     tbm_add_tuples(tbm, items, ndecoded, false);
     366              15 :     pfree(items);
     367                 : 
     368              15 :     return ndecoded;
     369                 : }
     370                 : 
     371                 : /*
     372                 :  * Merge two ordered arrays of itempointers, eliminating any duplicates.
     373                 :  *
     374                 :  * Returns a palloc'd array, and *nmerged is set to the number of items in
     375                 :  * the result, after eliminating duplicates.
     376                 :  */
     377                 : ItemPointer
     378           44240 : ginMergeItemPointers(ItemPointerData *a, uint32 na,
     379                 :                      ItemPointerData *b, uint32 nb,
     380                 :                      int *nmerged)
     381                 : {
     382                 :     ItemPointerData *dst;
     383                 : 
     384           44240 :     dst = (ItemPointer) palloc((na + nb) * sizeof(ItemPointerData));
     385                 : 
     386                 :     /*
     387                 :      * If the argument arrays don't overlap, we can just append them to each
     388                 :      * other.
     389                 :      */
     390           44240 :     if (na == 0 || nb == 0 || ginCompareItemPointers(&a[na - 1], &b[0]) < 0)
     391                 :     {
     392           24594 :         memcpy(dst, a, na * sizeof(ItemPointerData));
     393           24594 :         memcpy(&dst[na], b, nb * sizeof(ItemPointerData));
     394           24594 :         *nmerged = na + nb;
     395                 :     }
     396           19646 :     else if (ginCompareItemPointers(&b[nb - 1], &a[0]) < 0)
     397                 :     {
     398           19646 :         memcpy(dst, b, nb * sizeof(ItemPointerData));
     399           19646 :         memcpy(&dst[nb], a, na * sizeof(ItemPointerData));
     400           19646 :         *nmerged = na + nb;
     401                 :     }
     402                 :     else
     403                 :     {
     404 UBC           0 :         ItemPointerData *dptr = dst;
     405               0 :         ItemPointerData *aptr = a;
     406               0 :         ItemPointerData *bptr = b;
     407                 : 
     408               0 :         while (aptr - a < na && bptr - b < nb)
     409                 :         {
     410               0 :             int         cmp = ginCompareItemPointers(aptr, bptr);
     411                 : 
     412               0 :             if (cmp > 0)
     413               0 :                 *dptr++ = *bptr++;
     414               0 :             else if (cmp == 0)
     415                 :             {
     416                 :                 /* only keep one copy of the identical items */
     417               0 :                 *dptr++ = *bptr++;
     418               0 :                 aptr++;
     419                 :             }
     420                 :             else
     421               0 :                 *dptr++ = *aptr++;
     422                 :         }
     423                 : 
     424               0 :         while (aptr - a < na)
     425               0 :             *dptr++ = *aptr++;
     426                 : 
     427               0 :         while (bptr - b < nb)
     428               0 :             *dptr++ = *bptr++;
     429                 : 
     430               0 :         *nmerged = dptr - dst;
     431                 :     }
     432                 : 
     433 CBC       44240 :     return dst;
     434                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a