LCOV - differential code coverage report
Current view: top level - src/backend/access/gin - ginpostinglist.c (source / functions) Coverage Total Hit UBC GNC CBC DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 87.3 % 142 124 18 1 123 1
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 9 9 1 8 1
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 (180,240] days: 100.0 % 1 1 1
Legend: Lines: hit not hit (240..) days: 87.2 % 141 123 18 123
Function coverage date bins:
(180,240] days: 100.0 % 1 1 1
(240..) days: 100.0 % 8 8 8

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * ginpostinglist.c
                                  4                 :  *    routines for dealing with posting lists.
                                  5                 :  *
                                  6                 :  *
                                  7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                  8                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                  9                 :  *
                                 10                 :  * IDENTIFICATION
                                 11                 :  *          src/backend/access/gin/ginpostinglist.c
                                 12                 :  *-------------------------------------------------------------------------
                                 13                 :  */
                                 14                 : 
                                 15                 : #include "postgres.h"
                                 16                 : 
                                 17                 : #include "access/gin_private.h"
                                 18                 : 
                                 19                 : #ifdef USE_ASSERT_CHECKING
                                 20                 : #define CHECK_ENCODING_ROUNDTRIP
                                 21                 : #endif
                                 22                 : 
                                 23                 : /*
                                 24                 :  * For encoding purposes, item pointers are represented as 64-bit unsigned
                                 25                 :  * integers. The lowest 11 bits represent the offset number, and the next
                                 26                 :  * lowest 32 bits are the block number. That leaves 21 bits unused, i.e.
                                 27                 :  * only 43 low bits are used.
                                 28                 :  *
                                 29                 :  * 11 bits is enough for the offset number, because MaxHeapTuplesPerPage <
                                 30                 :  * 2^11 on all supported block sizes. We are frugal with the bits, because
                                 31                 :  * smaller integers use fewer bytes in the varbyte encoding, saving disk
                                 32                 :  * space. (If we get a new table AM in the future that wants to use the full
                                 33                 :  * range of possible offset numbers, we'll need to change this.)
                                 34                 :  *
                                 35                 :  * These 43-bit integers are encoded using varbyte encoding. In each byte,
                                 36                 :  * the 7 low bits contain data, while the highest bit is a continuation bit.
                                 37                 :  * When the continuation bit is set, the next byte is part of the same
                                 38                 :  * integer, otherwise this is the last byte of this integer. 43 bits need
                                 39                 :  * at most 7 bytes in this encoding:
                                 40                 :  *
                                 41                 :  * 0XXXXXXX
                                 42                 :  * 1XXXXXXX 0XXXXYYY
                                 43                 :  * 1XXXXXXX 1XXXXYYY 0YYYYYYY
                                 44                 :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 0YYYYYYY
                                 45                 :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
                                 46                 :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
                                 47                 :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0uuuuuuY
                                 48                 :  *
                                 49                 :  * X = bits used for offset number
                                 50                 :  * Y = bits used for block number
                                 51                 :  * u = unused bit
                                 52                 :  *
                                 53                 :  * The bytes are in stored in little-endian order.
                                 54                 :  *
                                 55                 :  * An important property of this encoding is that removing an item from list
                                 56                 :  * never increases the size of the resulting compressed posting list. Proof:
                                 57                 :  *
                                 58                 :  * Removing number is actually replacement of two numbers with their sum. We
                                 59                 :  * have to prove that varbyte encoding of a sum can't be longer than varbyte
                                 60                 :  * encoding of its summands. Sum of two numbers is at most one bit wider than
                                 61                 :  * the larger of the summands. Widening a number by one bit enlarges its length
                                 62                 :  * in varbyte encoding by at most one byte. Therefore, varbyte encoding of sum
                                 63                 :  * is at most one byte longer than varbyte encoding of larger summand. Lesser
                                 64                 :  * summand is at least one byte, so the sum cannot take more space than the
                                 65                 :  * summands, Q.E.D.
                                 66                 :  *
                                 67                 :  * This property greatly simplifies VACUUM, which can assume that posting
                                 68                 :  * lists always fit on the same page after vacuuming. Note that even though
                                 69                 :  * that holds for removing items from a posting list, you must also be
                                 70                 :  * careful to not cause expansion e.g. when merging uncompressed items on the
                                 71                 :  * page into the compressed lists, when vacuuming.
                                 72                 :  */
                                 73                 : 
                                 74                 : /*
                                 75                 :  * How many bits do you need to encode offset number? OffsetNumber is a 16-bit
                                 76                 :  * integer, but you can't fit that many items on a page. 11 ought to be more
                                 77                 :  * than enough. It's tempting to derive this from MaxHeapTuplesPerPage, and
                                 78                 :  * use the minimum number of bits, but that would require changing the on-disk
                                 79                 :  * format if MaxHeapTuplesPerPage changes. Better to leave some slack.
                                 80                 :  */
                                 81                 : #define MaxHeapTuplesPerPageBits        11
                                 82                 : 
                                 83                 : /* Max. number of bytes needed to encode the largest supported integer. */
                                 84                 : #define MaxBytesPerInteger              7
                                 85                 : 
                                 86                 : static inline uint64
 3364 heikki.linnakangas         87 CBC    17310561 : itemptr_to_uint64(const ItemPointer iptr)
                                 88                 : {
                                 89                 :     uint64      val;
                                 90                 : 
                                 91        17310561 :     Assert(ItemPointerIsValid(iptr));
 2203 alvherre                   92        17310561 :     Assert(GinItemPointerGetOffsetNumber(iptr) < (1 << MaxHeapTuplesPerPageBits));
                                 93                 : 
                                 94        17310561 :     val = GinItemPointerGetBlockNumber(iptr);
 3364 heikki.linnakangas         95        17310561 :     val <<= MaxHeapTuplesPerPageBits;
 2203 alvherre                   96        17310561 :     val |= GinItemPointerGetOffsetNumber(iptr);
                                 97                 : 
 3364 heikki.linnakangas         98        17310561 :     return val;
                                 99                 : }
                                100                 : 
                                101                 : static inline void
                                102        33594173 : uint64_to_itemptr(uint64 val, ItemPointer iptr)
                                103                 : {
 2203 alvherre                  104        33594173 :     GinItemPointerSetOffsetNumber(iptr, val & ((1 << MaxHeapTuplesPerPageBits) - 1));
 3364 heikki.linnakangas        105        33594173 :     val = val >> MaxHeapTuplesPerPageBits;
 2203 alvherre                  106        33594173 :     GinItemPointerSetBlockNumber(iptr, val);
                                107                 : 
 3364 heikki.linnakangas        108        33594173 :     Assert(ItemPointerIsValid(iptr));
                                109        33594173 : }
                                110                 : 
                                111                 : /*
                                112                 :  * Varbyte-encode 'val' into *ptr. *ptr is incremented to next integer.
                                113                 :  */
                                114                 : static void
                                115        16397095 : encode_varbyte(uint64 val, unsigned char **ptr)
                                116                 : {
                                117        16397095 :     unsigned char *p = *ptr;
                                118                 : 
                                119        16694602 :     while (val > 0x7F)
                                120                 :     {
                                121          297507 :         *(p++) = 0x80 | (val & 0x7F);
                                122          297507 :         val >>= 7;
                                123                 :     }
                                124        16397095 :     *(p++) = (unsigned char) val;
                                125                 : 
                                126        16397095 :     *ptr = p;
                                127        16397095 : }
                                128                 : 
                                129                 : /*
                                130                 :  * Decode varbyte-encoded integer at *ptr. *ptr is incremented to next integer.
                                131                 :  */
                                132                 : static uint64
                                133        33594173 : decode_varbyte(unsigned char **ptr)
                                134                 : {
                                135                 :     uint64      val;
                                136        33594173 :     unsigned char *p = *ptr;
                                137                 :     uint64      c;
                                138                 : 
                                139                 :     /* 1st byte */
                                140        33594173 :     c = *(p++);
                                141        33594173 :     val = c & 0x7F;
                                142        33594173 :     if (c & 0x80)
                                143                 :     {
                                144                 :         /* 2nd byte */
                                145          873201 :         c = *(p++);
                                146          873201 :         val |= (c & 0x7F) << 7;
                                147          873201 :         if (c & 0x80)
                                148                 :         {
                                149                 :             /* 3rd byte */
                                150           90630 :             c = *(p++);
                                151           90630 :             val |= (c & 0x7F) << 14;
                                152           90630 :             if (c & 0x80)
                                153                 :             {
                                154                 :                 /* 4th byte */
                                155               2 :                 c = *(p++);
                                156               2 :                 val |= (c & 0x7F) << 21;
                                157               2 :                 if (c & 0x80)
                                158                 :                 {
                                159                 :                     /* 5th byte */
                                160               2 :                     c = *(p++);
                                161               2 :                     val |= (c & 0x7F) << 28;
                                162               2 :                     if (c & 0x80)
                                163                 :                     {
                                164                 :                         /* 6th byte */
                                165               2 :                         c = *(p++);
                                166               2 :                         val |= (c & 0x7F) << 35;
                                167               2 :                         if (c & 0x80)
                                168                 :                         {
                                169                 :                             /* 7th byte, should not have continuation bit */
                                170               2 :                             c = *(p++);
                                171               2 :                             val |= c << 42;
 1320                           172               2 :                             Assert((c & 0x80) == 0);
                                173                 :                         }
                                174                 :                     }
                                175                 :                 }
                                176                 :             }
                                177                 :         }
                                178                 :     }
                                179                 : 
 3364                           180        33594173 :     *ptr = p;
                                181                 : 
                                182        33594173 :     return val;
                                183                 : }
                                184                 : 
                                185                 : /*
                                186                 :  * Encode a posting list.
                                187                 :  *
                                188                 :  * The encoded list is returned in a palloc'd struct, which will be at most
                                189                 :  * 'maxsize' bytes in size.  The number items in the returned segment is
                                190                 :  * returned in *nwritten. If it's not equal to nipd, not all the items fit
                                191                 :  * in 'maxsize', and only the first *nwritten were encoded.
                                192                 :  *
                                193                 :  * The allocated size of the returned struct is short-aligned, and the padding
                                194                 :  * byte at the end, if any, is zero.
                                195                 :  */
                                196                 : GinPostingList *
                                197          301120 : ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
                                198                 :                        int *nwritten)
                                199                 : {
                                200                 :     uint64      prev;
                                201          301120 :     int         totalpacked = 0;
                                202                 :     int         maxbytes;
                                203                 :     GinPostingList *result;
                                204                 :     unsigned char *ptr;
                                205                 :     unsigned char *endptr;
                                206                 : 
 3289                           207          301120 :     maxsize = SHORTALIGN_DOWN(maxsize);
                                208                 : 
 3364                           209          301120 :     result = palloc(maxsize);
                                210                 : 
                                211          301120 :     maxbytes = maxsize - offsetof(GinPostingList, bytes);
 3289                           212          301120 :     Assert(maxbytes > 0);
                                213                 : 
                                214                 :     /* Store the first special item */
 3364                           215          301120 :     result->first = ipd[0];
                                216                 : 
                                217          301120 :     prev = itemptr_to_uint64(&result->first);
                                218                 : 
                                219          301120 :     ptr = result->bytes;
                                220          301120 :     endptr = result->bytes + maxbytes;
                                221        16696110 :     for (totalpacked = 1; totalpacked < nipd; totalpacked++)
                                222                 :     {
                                223        16397095 :         uint64      val = itemptr_to_uint64(&ipd[totalpacked]);
                                224        16397095 :         uint64      delta = val - prev;
                                225                 : 
 3260 bruce                     226        16397095 :         Assert(val > prev);
                                227                 : 
 1320 heikki.linnakangas        228        16397095 :         if (endptr - ptr >= MaxBytesPerInteger)
 3364                           229        16382642 :             encode_varbyte(delta, &ptr);
                                230                 :         else
                                231                 :         {
                                232                 :             /*
                                233                 :              * There are less than 7 bytes left. Have to check if the next
                                234                 :              * item fits in that space before writing it out.
                                235                 :              */
                                236                 :             unsigned char buf[MaxBytesPerInteger];
                                237           14453 :             unsigned char *p = buf;
                                238                 : 
                                239           14453 :             encode_varbyte(delta, &p);
                                240           14453 :             if (p - buf > (endptr - ptr))
 3260 bruce                     241            2105 :                 break;          /* output is full */
                                242                 : 
 3364 heikki.linnakangas        243           12348 :             memcpy(ptr, buf, p - buf);
                                244           12348 :             ptr += (p - buf);
                                245                 :         }
                                246        16394990 :         prev = val;
                                247                 :     }
                                248          301120 :     result->nbytes = ptr - result->bytes;
                                249                 : 
                                250                 :     /*
                                251                 :      * If we wrote an odd number of bytes, zero out the padding byte at the
                                252                 :      * end.
                                253                 :      */
 3289                           254          301120 :     if (result->nbytes != SHORTALIGN(result->nbytes))
                                255           30366 :         result->bytes[result->nbytes] = 0;
                                256                 : 
 3364                           257          301120 :     if (nwritten)
                                258           30181 :         *nwritten = totalpacked;
                                259                 : 
                                260          301120 :     Assert(SizeOfGinPostingList(result) <= maxsize);
                                261                 : 
                                262                 :     /*
                                263                 :      * Check that the encoded segment decodes back to the original items.
                                264                 :      */
                                265                 : #if defined (CHECK_ENCODING_ROUNDTRIP)
                                266                 :     {
                                267                 :         int         ndecoded;
                                268          301120 :         ItemPointer tmp = ginPostingListDecode(result, &ndecoded);
                                269                 : 
                                270          301120 :         Assert(ndecoded == totalpacked);
 1128 tgl                       271          301120 :         Assert(memcmp(tmp, ipd, ndecoded * sizeof(ItemPointerData)) == 0);
 3364 heikki.linnakangas        272          301120 :         pfree(tmp);
                                273                 :     }
                                274                 : #endif
                                275                 : 
                                276          301120 :     return result;
                                277                 : }
                                278                 : 
                                279                 : /*
                                280                 :  * Decode a compressed posting list into an array of item pointers.
                                281                 :  * The number of items is returned in *ndecoded.
                                282                 :  */
                                283                 : ItemPointer
  202 pg                        284 GNC      610670 : ginPostingListDecode(GinPostingList *plist, int *ndecoded_out)
                                285                 : {
 3364 heikki.linnakangas        286 CBC     1221340 :     return ginPostingListDecodeAllSegments(plist,
                                287          610670 :                                            SizeOfGinPostingList(plist),
                                288                 :                                            ndecoded_out);
                                289                 : }
                                290                 : 
                                291                 : /*
                                292                 :  * Decode multiple posting list segments into an array of item pointers.
                                293                 :  * The number of items is returned in *ndecoded_out. The segments are stored
                                294                 :  * one after each other, with total size 'len' bytes.
                                295                 :  */
                                296                 : ItemPointer
                                297          610761 : ginPostingListDecodeAllSegments(GinPostingList *segment, int len, int *ndecoded_out)
                                298                 : {
                                299                 :     ItemPointer result;
                                300                 :     int         nallocated;
                                301                 :     uint64      val;
                                302          610761 :     char       *endseg = ((char *) segment) + len;
                                303                 :     int         ndecoded;
                                304                 :     unsigned char *ptr;
                                305                 :     unsigned char *endptr;
                                306                 : 
                                307                 :     /*
                                308                 :      * Guess an initial size of the array.
                                309                 :      */
                                310          610761 :     nallocated = segment->nbytes * 2 + 1;
                                311          610761 :     result = palloc(nallocated * sizeof(ItemPointerData));
                                312                 : 
                                313          610761 :     ndecoded = 0;
                                314         1223107 :     while ((char *) segment < endseg)
                                315                 :     {
                                316                 :         /* enlarge output array if needed */
                                317          612346 :         if (ndecoded >= nallocated)
                                318                 :         {
 3364 heikki.linnakangas        319 UBC           0 :             nallocated *= 2;
                                320               0 :             result = repalloc(result, nallocated * sizeof(ItemPointerData));
                                321                 :         }
                                322                 : 
                                323                 :         /* copy the first item */
 3296 heikki.linnakangas        324 CBC      612346 :         Assert(OffsetNumberIsValid(ItemPointerGetOffsetNumber(&segment->first)));
                                325          612346 :         Assert(ndecoded == 0 || ginCompareItemPointers(&segment->first, &result[ndecoded - 1]) > 0);
 3364                           326          612346 :         result[ndecoded] = segment->first;
                                327          612346 :         ndecoded++;
                                328                 : 
                                329          612346 :         val = itemptr_to_uint64(&segment->first);
                                330          612346 :         ptr = segment->bytes;
                                331          612346 :         endptr = segment->bytes + segment->nbytes;
                                332        34206519 :         while (ptr < endptr)
                                333                 :         {
                                334                 :             /* enlarge output array if needed */
                                335        33594173 :             if (ndecoded >= nallocated)
                                336                 :             {
                                337             290 :                 nallocated *= 2;
                                338             290 :                 result = repalloc(result, nallocated * sizeof(ItemPointerData));
                                339                 :             }
                                340                 : 
                                341        33594173 :             val += decode_varbyte(&ptr);
                                342                 : 
                                343        33594173 :             uint64_to_itemptr(val, &result[ndecoded]);
                                344        33594173 :             ndecoded++;
                                345                 :         }
                                346          612346 :         segment = GinNextPostingListSegment(segment);
                                347                 :     }
                                348                 : 
                                349          610761 :     if (ndecoded_out)
                                350          610761 :         *ndecoded_out = ndecoded;
                                351          610761 :     return result;
                                352                 : }
                                353                 : 
                                354                 : /*
                                355                 :  * Add all item pointers from a bunch of posting lists to a TIDBitmap.
                                356                 :  */
                                357                 : int
                                358              15 : ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int len,
                                359                 :                                      TIDBitmap *tbm)
                                360                 : {
                                361                 :     int         ndecoded;
                                362                 :     ItemPointer items;
                                363                 : 
                                364              15 :     items = ginPostingListDecodeAllSegments(ptr, len, &ndecoded);
                                365              15 :     tbm_add_tuples(tbm, items, ndecoded, false);
                                366              15 :     pfree(items);
                                367                 : 
                                368              15 :     return ndecoded;
                                369                 : }
                                370                 : 
                                371                 : /*
                                372                 :  * Merge two ordered arrays of itempointers, eliminating any duplicates.
                                373                 :  *
                                374                 :  * Returns a palloc'd array, and *nmerged is set to the number of items in
                                375                 :  * the result, after eliminating duplicates.
                                376                 :  */
                                377                 : ItemPointer
 3303                           378           44240 : ginMergeItemPointers(ItemPointerData *a, uint32 na,
                                379                 :                      ItemPointerData *b, uint32 nb,
                                380                 :                      int *nmerged)
                                381                 : {
                                382                 :     ItemPointerData *dst;
                                383                 : 
                                384           44240 :     dst = (ItemPointer) palloc((na + nb) * sizeof(ItemPointerData));
                                385                 : 
                                386                 :     /*
                                387                 :      * If the argument arrays don't overlap, we can just append them to each
                                388                 :      * other.
                                389                 :      */
 3364                           390           44240 :     if (na == 0 || nb == 0 || ginCompareItemPointers(&a[na - 1], &b[0]) < 0)
                                391                 :     {
 3303                           392           24594 :         memcpy(dst, a, na * sizeof(ItemPointerData));
                                393           24594 :         memcpy(&dst[na], b, nb * sizeof(ItemPointerData));
                                394           24594 :         *nmerged = na + nb;
                                395                 :     }
 3364                           396           19646 :     else if (ginCompareItemPointers(&b[nb - 1], &a[0]) < 0)
                                397                 :     {
 3303                           398           19646 :         memcpy(dst, b, nb * sizeof(ItemPointerData));
                                399           19646 :         memcpy(&dst[nb], a, na * sizeof(ItemPointerData));
                                400           19646 :         *nmerged = na + nb;
                                401                 :     }
                                402                 :     else
                                403                 :     {
 3303 heikki.linnakangas        404 UBC           0 :         ItemPointerData *dptr = dst;
                                405               0 :         ItemPointerData *aptr = a;
                                406               0 :         ItemPointerData *bptr = b;
                                407                 : 
 3364                           408               0 :         while (aptr - a < na && bptr - b < nb)
                                409                 :         {
                                410               0 :             int         cmp = ginCompareItemPointers(aptr, bptr);
                                411                 : 
                                412               0 :             if (cmp > 0)
                                413               0 :                 *dptr++ = *bptr++;
                                414               0 :             else if (cmp == 0)
                                415                 :             {
                                416                 :                 /* only keep one copy of the identical items */
                                417               0 :                 *dptr++ = *bptr++;
                                418               0 :                 aptr++;
                                419                 :             }
                                420                 :             else
                                421               0 :                 *dptr++ = *aptr++;
                                422                 :         }
                                423                 : 
                                424               0 :         while (aptr - a < na)
 3441                           425               0 :             *dptr++ = *aptr++;
                                426                 : 
 3364                           427               0 :         while (bptr - b < nb)
                                428               0 :             *dptr++ = *bptr++;
                                429                 : 
 3303                           430               0 :         *nmerged = dptr - dst;
                                431                 :     }
                                432                 : 
 3303 heikki.linnakangas        433 CBC       44240 :     return dst;
                                434                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a