LCOV - differential code coverage report
Current view: top level - src/backend/lib - integerset.c (source / functions) Coverage Total Hit UIC UBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 97.7 % 264 258 2 4 81 177 2 79 2
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 16 16 7 1 8 7
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 (240..) days: 97.7 % 264 258 2 4 81 177 2 79
Legend: Lines: hit not hit Function coverage date bins:
(240..) days: 69.6 % 23 16 7 1 8 7

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * integerset.c
                                  4                 :  *    Data structure to hold a large set of 64-bit integers efficiently
                                  5                 :  *
                                  6                 :  * IntegerSet provides an in-memory data structure to hold a set of
                                  7                 :  * arbitrary 64-bit integers.  Internally, the values are stored in a
                                  8                 :  * B-tree, with a special packed representation at the leaf level using
                                  9                 :  * the Simple-8b algorithm, which can pack clusters of nearby values
                                 10                 :  * very tightly.
                                 11                 :  *
                                 12                 :  * Memory consumption depends on the number of values stored, but also
                                 13                 :  * on how far the values are from each other.  In the best case, with
                                 14                 :  * long runs of consecutive integers, memory consumption can be as low as
                                 15                 :  * 0.1 bytes per integer.  In the worst case, if integers are more than
                                 16                 :  * 2^32 apart, it uses about 8 bytes per integer.  In typical use, the
                                 17                 :  * consumption per integer is somewhere between those extremes, depending
                                 18                 :  * on the range of integers stored, and how "clustered" they are.
                                 19                 :  *
                                 20                 :  *
                                 21                 :  * Interface
                                 22                 :  * ---------
                                 23                 :  *
                                 24                 :  *  intset_create           - Create a new, empty set
                                 25                 :  *  intset_add_member       - Add an integer to the set
                                 26                 :  *  intset_is_member        - Test if an integer is in the set
                                 27                 :  *  intset_begin_iterate    - Begin iterating through all integers in set
                                 28                 :  *  intset_iterate_next     - Return next set member, if any
                                 29                 :  *
                                 30                 :  * intset_create() creates the set in the current memory context.  Subsequent
                                 31                 :  * operations that add to the data structure will continue to allocate from
                                 32                 :  * that same context, even if it's not current anymore.
                                 33                 :  *
                                 34                 :  * Note that there is no function to free an integer set.  If you need to do
                                 35                 :  * that, create a dedicated memory context to hold it, and destroy the memory
                                 36                 :  * context instead.
                                 37                 :  *
                                 38                 :  *
                                 39                 :  * Limitations
                                 40                 :  * -----------
                                 41                 :  *
                                 42                 :  * - Values must be added in order.  (Random insertions would require
                                 43                 :  *   splitting nodes, which hasn't been implemented.)
                                 44                 :  *
                                 45                 :  * - Values cannot be added while iteration is in progress.
                                 46                 :  *
                                 47                 :  * - No support for removing values.
                                 48                 :  *
                                 49                 :  * None of these limitations are fundamental to the data structure, so they
                                 50                 :  * could be lifted if needed, by writing some new code.  But the current
                                 51                 :  * users of this facility don't need them.
                                 52                 :  *
                                 53                 :  *
                                 54                 :  * References
                                 55                 :  * ----------
                                 56                 :  *
                                 57                 :  * Simple-8b encoding is based on:
                                 58                 :  *
                                 59                 :  * Vo Ngoc Anh, Alistair Moffat, Index compression using 64-bit words,
                                 60                 :  *   Software - Practice & Experience, v.40 n.2, p.131-147, February 2010
                                 61                 :  *   (https://doi.org/10.1002/spe.948)
                                 62                 :  *
                                 63                 :  *
                                 64                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                 65                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                 66                 :  *
                                 67                 :  * IDENTIFICATION
                                 68                 :  *    src/backend/lib/integerset.c
                                 69                 :  *
                                 70                 :  *-------------------------------------------------------------------------
                                 71                 :  */
                                 72                 : #include "postgres.h"
                                 73                 : 
                                 74                 : #include "access/htup_details.h"
                                 75                 : #include "lib/integerset.h"
                                 76                 : #include "port/pg_bitutils.h"
                                 77                 : #include "utils/memutils.h"
                                 78                 : 
                                 79                 : 
                                 80                 : /*
                                 81                 :  * Maximum number of integers that can be encoded in a single Simple-8b
                                 82                 :  * codeword. (Defined here before anything else, so that we can size arrays
                                 83                 :  * using this.)
                                 84                 :  */
                                 85                 : #define SIMPLE8B_MAX_VALUES_PER_CODEWORD 240
                                 86                 : 
                                 87                 : /*
                                 88                 :  * Parameters for shape of the in-memory B-tree.
                                 89                 :  *
                                 90                 :  * These set the size of each internal and leaf node.  They don't necessarily
                                 91                 :  * need to be the same, because the tree is just an in-memory structure.
                                 92                 :  * With the default 64, each node is about 1 kb.
                                 93                 :  *
                                 94                 :  * If you change these, you must recalculate MAX_TREE_LEVELS, too!
                                 95                 :  */
                                 96                 : #define MAX_INTERNAL_ITEMS  64
                                 97                 : #define MAX_LEAF_ITEMS  64
                                 98                 : 
                                 99                 : /*
                                100                 :  * Maximum height of the tree.
                                101                 :  *
                                102                 :  * MAX_TREE_ITEMS is calculated from the "fan-out" of the B-tree.  The
                                103                 :  * theoretical maximum number of items that we can store in a set is 2^64,
                                104                 :  * so MAX_TREE_LEVELS should be set so that:
                                105                 :  *
                                106                 :  *   MAX_LEAF_ITEMS * MAX_INTERNAL_ITEMS ^ (MAX_TREE_LEVELS - 1) >= 2^64.
                                107                 :  *
                                108                 :  * In practice, we'll need far fewer levels, because you will run out of
                                109                 :  * memory long before reaching that number, but let's be conservative.
                                110                 :  */
                                111                 : #define MAX_TREE_LEVELS     11
                                112                 : 
                                113                 : /*
                                114                 :  * Node structures, for the in-memory B-tree.
                                115                 :  *
                                116                 :  * An internal node holds a number of downlink pointers to leaf nodes, or
                                117                 :  * to internal nodes on a lower level.  For each downlink, the key value
                                118                 :  * corresponding to the lower level node is stored in a sorted array.  The
                                119                 :  * stored key values are low keys.  In other words, if the downlink has value
                                120                 :  * X, then all items stored on that child are >= X.
                                121                 :  *
                                122                 :  * Each leaf node holds a number of "items", with a varying number of
                                123                 :  * integers packed into each item.  Each item consists of two 64-bit words:
                                124                 :  * The first word holds the first integer stored in the item, in plain format.
                                125                 :  * The second word contains between 0 and 240 more integers, packed using
                                126                 :  * Simple-8b encoding.  By storing the first integer in plain, unpacked,
                                127                 :  * format, we can use binary search to quickly find an item that holds (or
                                128                 :  * would hold) a particular integer.  And by storing the rest in packed form,
                                129                 :  * we still get pretty good memory density, if there are clusters of integers
                                130                 :  * with similar values.
                                131                 :  *
                                132                 :  * Each leaf node also has a pointer to the next leaf node, so that the leaf
                                133                 :  * nodes can be easily walked from beginning to end when iterating.
                                134                 :  */
                                135                 : typedef struct intset_node intset_node;
                                136                 : typedef struct intset_leaf_node intset_leaf_node;
                                137                 : typedef struct intset_internal_node intset_internal_node;
                                138                 : 
                                139                 : /* Common structure of both leaf and internal nodes. */
                                140                 : struct intset_node
                                141                 : {
                                142                 :     uint16      level;          /* tree level of this node */
                                143                 :     uint16      num_items;      /* number of items in this node */
                                144                 : };
                                145                 : 
                                146                 : /* Internal node */
                                147                 : struct intset_internal_node
                                148                 : {
                                149                 :     /* common header, must match intset_node */
                                150                 :     uint16      level;          /* >= 1 on internal nodes */
                                151                 :     uint16      num_items;
                                152                 : 
                                153                 :     /*
                                154                 :      * 'values' is an array of key values, and 'downlinks' are pointers to
                                155                 :      * lower-level nodes, corresponding to the key values.
                                156                 :      */
                                157                 :     uint64      values[MAX_INTERNAL_ITEMS];
                                158                 :     intset_node *downlinks[MAX_INTERNAL_ITEMS];
                                159                 : };
                                160                 : 
                                161                 : /* Leaf node */
                                162                 : typedef struct
                                163                 : {
                                164                 :     uint64      first;          /* first integer in this item */
                                165                 :     uint64      codeword;       /* simple8b encoded differences from 'first' */
                                166                 : } leaf_item;
                                167                 : 
                                168                 : #define MAX_VALUES_PER_LEAF_ITEM    (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)
                                169                 : 
                                170                 : struct intset_leaf_node
                                171                 : {
                                172                 :     /* common header, must match intset_node */
                                173                 :     uint16      level;          /* 0 on leafs */
                                174                 :     uint16      num_items;
                                175                 : 
                                176                 :     intset_leaf_node *next;     /* right sibling, if any */
                                177                 : 
                                178                 :     leaf_item   items[MAX_LEAF_ITEMS];
                                179                 : };
                                180                 : 
                                181                 : /*
                                182                 :  * We buffer insertions in a simple array, before packing and inserting them
                                183                 :  * into the B-tree.  MAX_BUFFERED_VALUES sets the size of the buffer.  The
                                184                 :  * encoder assumes that it is large enough that we can always fill a leaf
                                185                 :  * item with buffered new items.  In other words, MAX_BUFFERED_VALUES must be
                                186                 :  * larger than MAX_VALUES_PER_LEAF_ITEM.  For efficiency, make it much larger.
                                187                 :  */
                                188                 : #define MAX_BUFFERED_VALUES         (MAX_VALUES_PER_LEAF_ITEM * 2)
                                189                 : 
                                190                 : /*
                                191                 :  * IntegerSet is the top-level object representing the set.
                                192                 :  *
                                193                 :  * The integers are stored in an in-memory B-tree structure, plus an array
                                194                 :  * for newly-added integers.  IntegerSet also tracks information about memory
                                195                 :  * usage, as well as the current position when iterating the set with
                                196                 :  * intset_begin_iterate / intset_iterate_next.
                                197                 :  */
                                198                 : struct IntegerSet
                                199                 : {
                                200                 :     /*
                                201                 :      * 'context' is the memory context holding this integer set and all its
                                202                 :      * tree nodes.
                                203                 :      *
                                204                 :      * 'mem_used' tracks the amount of memory used.  We don't do anything with
                                205                 :      * it in integerset.c itself, but the callers can ask for it with
                                206                 :      * intset_memory_usage().
                                207                 :      */
                                208                 :     MemoryContext context;
                                209                 :     uint64      mem_used;
                                210                 : 
                                211                 :     uint64      num_entries;    /* total # of values in the set */
                                212                 :     uint64      highest_value;  /* highest value stored in this set */
                                213                 : 
                                214                 :     /*
                                215                 :      * B-tree to hold the packed values.
                                216                 :      *
                                217                 :      * 'rightmost_nodes' hold pointers to the rightmost node on each level.
                                218                 :      * rightmost_parent[0] is rightmost leaf, rightmost_parent[1] is its
                                219                 :      * parent, and so forth, all the way up to the root. These are needed when
                                220                 :      * adding new values. (Currently, we require that new values are added at
                                221                 :      * the end.)
                                222                 :      */
                                223                 :     int         num_levels;     /* height of the tree */
                                224                 :     intset_node *root;          /* root node */
                                225                 :     intset_node *rightmost_nodes[MAX_TREE_LEVELS];
                                226                 :     intset_leaf_node *leftmost_leaf;    /* leftmost leaf node */
                                227                 : 
                                228                 :     /*
                                229                 :      * Holding area for new items that haven't been inserted to the tree yet.
                                230                 :      */
                                231                 :     uint64      buffered_values[MAX_BUFFERED_VALUES];
                                232                 :     int         num_buffered_values;
                                233                 : 
                                234                 :     /*
                                235                 :      * Iterator support.
                                236                 :      *
                                237                 :      * 'iter_values' is an array of integers ready to be returned to the
                                238                 :      * caller; 'iter_num_values' is the length of that array, and
                                239                 :      * 'iter_valueno' is the next index.  'iter_node' and 'iter_itemno' point
                                240                 :      * to the leaf node, and item within the leaf node, to get the next batch
                                241                 :      * of values from.
                                242                 :      *
                                243                 :      * Normally, 'iter_values' points to 'iter_values_buf', which holds items
                                244                 :      * decoded from a leaf item.  But after we have scanned the whole B-tree,
                                245                 :      * we iterate through all the unbuffered values, too, by pointing
                                246                 :      * iter_values to 'buffered_values'.
                                247                 :      */
                                248                 :     bool        iter_active;    /* is iteration in progress? */
                                249                 : 
                                250                 :     const uint64 *iter_values;
                                251                 :     int         iter_num_values;    /* number of elements in 'iter_values' */
                                252                 :     int         iter_valueno;   /* next index into 'iter_values' */
                                253                 : 
                                254                 :     intset_leaf_node *iter_node;    /* current leaf node */
                                255                 :     int         iter_itemno;    /* next item in 'iter_node' to decode */
                                256                 : 
                                257                 :     uint64      iter_values_buf[MAX_VALUES_PER_LEAF_ITEM];
                                258                 : };
                                259                 : 
                                260                 : /*
                                261                 :  * Prototypes for internal functions.
                                262                 :  */
                                263                 : static void intset_update_upper(IntegerSet *intset, int level,
                                264                 :                                 intset_node *child, uint64 child_key);
                                265                 : static void intset_flush_buffered_values(IntegerSet *intset);
                                266                 : 
                                267                 : static int  intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems,
                                268                 :                                   bool nextkey);
                                269                 : static int  intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems,
                                270                 :                                 bool nextkey);
                                271                 : 
                                272                 : static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base);
                                273                 : static int  simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base);
                                274                 : static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base);
                                275                 : 
                                276                 : 
                                277                 : /*
                                278                 :  * Create a new, initially empty, integer set.
                                279                 :  *
                                280                 :  * The integer set is created in the current memory context.
                                281                 :  * We will do all subsequent allocations in the same context, too, regardless
                                282                 :  * of which memory context is current when new integers are added to the set.
                                283                 :  */
                                284                 : IntegerSet *
 1479 heikki.linnakangas        285 CBC         104 : intset_create(void)
                                286                 : {
                                287                 :     IntegerSet *intset;
                                288                 : 
                                289             104 :     intset = (IntegerSet *) palloc(sizeof(IntegerSet));
                                290             104 :     intset->context = CurrentMemoryContext;
                                291             104 :     intset->mem_used = GetMemoryChunkSpace(intset);
                                292                 : 
                                293             104 :     intset->num_entries = 0;
                                294             104 :     intset->highest_value = 0;
                                295                 : 
                                296             104 :     intset->num_levels = 0;
                                297             104 :     intset->root = NULL;
                                298             104 :     memset(intset->rightmost_nodes, 0, sizeof(intset->rightmost_nodes));
                                299             104 :     intset->leftmost_leaf = NULL;
                                300                 : 
                                301             104 :     intset->num_buffered_values = 0;
                                302                 : 
 1476 tgl                       303             104 :     intset->iter_active = false;
 1479 heikki.linnakangas        304             104 :     intset->iter_node = NULL;
                                305             104 :     intset->iter_itemno = 0;
                                306             104 :     intset->iter_valueno = 0;
                                307             104 :     intset->iter_num_values = 0;
 1476 tgl                       308             104 :     intset->iter_values = NULL;
                                309                 : 
 1479 heikki.linnakangas        310             104 :     return intset;
                                311                 : }
                                312                 : 
                                313                 : /*
                                314                 :  * Allocate a new node.
                                315                 :  */
                                316                 : static intset_internal_node *
                                317            3377 : intset_new_internal_node(IntegerSet *intset)
                                318                 : {
                                319                 :     intset_internal_node *n;
                                320                 : 
                                321            3377 :     n = (intset_internal_node *) MemoryContextAlloc(intset->context,
                                322                 :                                                     sizeof(intset_internal_node));
                                323            3377 :     intset->mem_used += GetMemoryChunkSpace(n);
                                324                 : 
                                325            3377 :     n->level = 0;                /* caller must set */
                                326            3377 :     n->num_items = 0;
                                327                 : 
                                328            3377 :     return n;
                                329                 : }
                                330                 : 
                                331                 : static intset_leaf_node *
                                332          211678 : intset_new_leaf_node(IntegerSet *intset)
                                333                 : {
                                334                 :     intset_leaf_node *n;
                                335                 : 
                                336          211678 :     n = (intset_leaf_node *) MemoryContextAlloc(intset->context,
                                337                 :                                                 sizeof(intset_leaf_node));
                                338          211678 :     intset->mem_used += GetMemoryChunkSpace(n);
                                339                 : 
                                340          211678 :     n->level = 0;
                                341          211678 :     n->num_items = 0;
                                342          211678 :     n->next = NULL;
                                343                 : 
                                344          211678 :     return n;
                                345                 : }
                                346                 : 
                                347                 : /*
                                348                 :  * Return the number of entries in the integer set.
                                349                 :  */
                                350                 : uint64
                                351              60 : intset_num_entries(IntegerSet *intset)
                                352                 : {
                                353              60 :     return intset->num_entries;
                                354                 : }
                                355                 : 
                                356                 : /*
                                357                 :  * Return the amount of memory used by the integer set.
                                358                 :  */
                                359                 : uint64
                                360               5 : intset_memory_usage(IntegerSet *intset)
                                361                 : {
                                362               5 :     return intset->mem_used;
                                363                 : }
                                364                 : 
                                365                 : /*
                                366                 :  * Add a value to the set.
                                367                 :  *
                                368                 :  * Values must be added in order.
                                369                 :  */
                                370                 : void
                                371       163004065 : intset_add_member(IntegerSet *intset, uint64 x)
                                372                 : {
 1476 tgl                       373       163004065 :     if (intset->iter_active)
 1476 tgl                       374 UBC           0 :         elog(ERROR, "cannot add new values to integer set while iteration is in progress");
                                375                 : 
 1479 heikki.linnakangas        376 CBC   163004065 :     if (x <= intset->highest_value && intset->num_entries > 0)
 1479 heikki.linnakangas        377 UBC           0 :         elog(ERROR, "cannot add value to integer set out of order");
                                378                 : 
 1479 heikki.linnakangas        379 CBC   163004065 :     if (intset->num_buffered_values >= MAX_BUFFERED_VALUES)
                                380                 :     {
                                381                 :         /* Time to flush our buffer */
                                382          567003 :         intset_flush_buffered_values(intset);
                                383          567003 :         Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES);
                                384                 :     }
                                385                 : 
                                386                 :     /* Add it to the buffer of newly-added values */
                                387       163004065 :     intset->buffered_values[intset->num_buffered_values] = x;
                                388       163004065 :     intset->num_buffered_values++;
                                389       163004065 :     intset->num_entries++;
                                390       163004065 :     intset->highest_value = x;
                                391       163004065 : }
                                392                 : 
                                393                 : /*
                                394                 :  * Take a batch of buffered values, and pack them into the B-tree.
                                395                 :  */
                                396                 : static void
                                397          567003 : intset_flush_buffered_values(IntegerSet *intset)
                                398                 : {
                                399          567003 :     uint64     *values = intset->buffered_values;
                                400          567003 :     uint64      num_values = intset->num_buffered_values;
                                401          567003 :     int         num_packed = 0;
                                402                 :     intset_leaf_node *leaf;
                                403                 : 
                                404          567003 :     leaf = (intset_leaf_node *) intset->rightmost_nodes[0];
                                405                 : 
                                406                 :     /*
                                407                 :      * If the tree is completely empty, create the first leaf page, which is
                                408                 :      * also the root.
                                409                 :      */
                                410          567003 :     if (leaf == NULL)
                                411                 :     {
                                412                 :         /*
                                413                 :          * This is the very first item in the set.
                                414                 :          *
                                415                 :          * Allocate root node. It's also a leaf.
                                416                 :          */
                                417              14 :         leaf = intset_new_leaf_node(intset);
                                418                 : 
                                419              14 :         intset->root = (intset_node *) leaf;
                                420              14 :         intset->leftmost_leaf = leaf;
                                421              14 :         intset->rightmost_nodes[0] = (intset_node *) leaf;
                                422              14 :         intset->num_levels = 1;
                                423                 :     }
                                424                 : 
                                425                 :     /*
                                426                 :      * If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer,
                                427                 :      * stop.  In most cases, we cannot encode that many values in a single
                                428                 :      * value, but this way, the encoder doesn't have to worry about running
                                429                 :      * out of input.
                                430                 :      */
                                431        14113906 :     while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM)
                                432                 :     {
                                433                 :         leaf_item   item;
                                434                 :         int         num_encoded;
                                435                 : 
                                436                 :         /*
                                437                 :          * Construct the next leaf item, packing as many buffered values as
                                438                 :          * possible.
                                439                 :          */
                                440        13546903 :         item.first = values[num_packed];
                                441        13546903 :         item.codeword = simple8b_encode(&values[num_packed + 1],
                                442                 :                                         &num_encoded,
                                443                 :                                         item.first);
                                444                 : 
                                445                 :         /*
                                446                 :          * Add the item to the node, allocating a new node if the old one is
                                447                 :          * full.
                                448                 :          */
                                449        13546903 :         if (leaf->num_items >= MAX_LEAF_ITEMS)
                                450                 :         {
                                451                 :             /* Allocate new leaf and link it to the tree */
                                452          211664 :             intset_leaf_node *old_leaf = leaf;
                                453                 : 
                                454          211664 :             leaf = intset_new_leaf_node(intset);
                                455          211664 :             old_leaf->next = leaf;
                                456          211664 :             intset->rightmost_nodes[0] = (intset_node *) leaf;
                                457          211664 :             intset_update_upper(intset, 1, (intset_node *) leaf, item.first);
                                458                 :         }
                                459        13546903 :         leaf->items[leaf->num_items++] = item;
                                460                 : 
                                461        13546903 :         num_packed += 1 + num_encoded;
                                462                 :     }
                                463                 : 
                                464                 :     /*
                                465                 :      * Move any remaining buffered values to the beginning of the array.
                                466                 :      */
                                467          567003 :     if (num_packed < intset->num_buffered_values)
                                468                 :     {
                                469          542105 :         memmove(&intset->buffered_values[0],
                                470          542105 :                 &intset->buffered_values[num_packed],
                                471          542105 :                 (intset->num_buffered_values - num_packed) * sizeof(uint64));
                                472                 :     }
                                473          567003 :     intset->num_buffered_values -= num_packed;
                                474          567003 : }
                                475                 : 
                                476                 : /*
                                477                 :  * Insert a downlink into parent node, after creating a new node.
                                478                 :  *
                                479                 :  * Recurses if the parent node is full, too.
                                480                 :  */
                                481                 : static void
                                482          215016 : intset_update_upper(IntegerSet *intset, int level, intset_node *child,
                                483                 :                     uint64 child_key)
                                484                 : {
                                485                 :     intset_internal_node *parent;
                                486                 : 
                                487          215016 :     Assert(level > 0);
                                488                 : 
                                489                 :     /*
                                490                 :      * Create a new root node, if necessary.
                                491                 :      */
                                492          215016 :     if (level >= intset->num_levels)
                                493                 :     {
                                494              25 :         intset_node *oldroot = intset->root;
                                495                 :         uint64      downlink_key;
                                496                 : 
                                497                 :         /* MAX_TREE_LEVELS should be more than enough, this shouldn't happen */
                                498              25 :         if (intset->num_levels == MAX_TREE_LEVELS)
 1479 heikki.linnakangas        499 UBC           0 :             elog(ERROR, "could not expand integer set, maximum number of levels reached");
 1479 heikki.linnakangas        500 CBC          25 :         intset->num_levels++;
                                501                 : 
                                502                 :         /*
                                503                 :          * Get the first value on the old root page, to be used as the
                                504                 :          * downlink.
                                505                 :          */
                                506              25 :         if (intset->root->level == 0)
                                507              10 :             downlink_key = ((intset_leaf_node *) oldroot)->items[0].first;
                                508                 :         else
                                509              15 :             downlink_key = ((intset_internal_node *) oldroot)->values[0];
                                510                 : 
                                511              25 :         parent = intset_new_internal_node(intset);
                                512              25 :         parent->level = level;
                                513              25 :         parent->values[0] = downlink_key;
                                514              25 :         parent->downlinks[0] = oldroot;
                                515              25 :         parent->num_items = 1;
                                516                 : 
                                517              25 :         intset->root = (intset_node *) parent;
                                518              25 :         intset->rightmost_nodes[level] = (intset_node *) parent;
                                519                 :     }
                                520                 : 
                                521                 :     /*
                                522                 :      * Place the downlink on the parent page.
                                523                 :      */
                                524          215016 :     parent = (intset_internal_node *) intset->rightmost_nodes[level];
                                525                 : 
                                526          215016 :     if (parent->num_items < MAX_INTERNAL_ITEMS)
                                527                 :     {
                                528          211664 :         parent->values[parent->num_items] = child_key;
                                529          211664 :         parent->downlinks[parent->num_items] = child;
                                530          211664 :         parent->num_items++;
                                531                 :     }
                                532                 :     else
                                533                 :     {
                                534                 :         /*
                                535                 :          * Doesn't fit.  Allocate new parent, with the downlink as the first
                                536                 :          * item on it, and recursively insert the downlink to the new parent
                                537                 :          * to the grandparent.
                                538                 :          */
                                539            3352 :         parent = intset_new_internal_node(intset);
                                540            3352 :         parent->level = level;
                                541            3352 :         parent->values[0] = child_key;
                                542            3352 :         parent->downlinks[0] = child;
                                543            3352 :         parent->num_items = 1;
                                544                 : 
                                545            3352 :         intset->rightmost_nodes[level] = (intset_node *) parent;
                                546                 : 
                                547            3352 :         intset_update_upper(intset, level + 1, (intset_node *) parent, child_key);
                                548                 :     }
                                549          215016 : }
                                550                 : 
                                551                 : /*
                                552                 :  * Does the set contain the given value?
                                553                 :  */
                                554                 : bool
                                555          903083 : intset_is_member(IntegerSet *intset, uint64 x)
                                556                 : {
                                557                 :     intset_node *node;
                                558                 :     intset_leaf_node *leaf;
                                559                 :     int         level;
                                560                 :     int         itemno;
                                561                 :     leaf_item  *item;
                                562                 : 
                                563                 :     /*
                                564                 :      * The value might be in the buffer of newly-added values.
                                565                 :      */
                                566          903083 :     if (intset->num_buffered_values > 0 && x >= intset->buffered_values[0])
                                567                 :     {
 1479 heikki.linnakangas        568 GIC      100929 :         itemno = intset_binsrch_uint64(x,
                                569          100929 :                                        intset->buffered_values,
 1479 heikki.linnakangas        570 ECB             :                                        intset->num_buffered_values,
                                571                 :                                        false);
 1479 heikki.linnakangas        572 GIC      100929 :         if (itemno >= intset->num_buffered_values)
 1479 heikki.linnakangas        573 CBC       16517 :             return false;
                                574                 :         else
 1476 tgl                       575 GIC       84412 :             return (intset->buffered_values[itemno] == x);
                                576                 :     }
                                577                 : 
                                578                 :     /*
                                579                 :      * Start from the root, and walk down the B-tree to find the right leaf
 1479 heikki.linnakangas        580 ECB             :      * node.
                                581                 :      */
 1479 heikki.linnakangas        582 CBC      802154 :     if (!intset->root)
                                583               8 :         return false;
 1479 heikki.linnakangas        584 GIC      802146 :     node = intset->root;
 1479 heikki.linnakangas        585 CBC     3004148 :     for (level = intset->num_levels - 1; level > 0; level--)
                                586                 :     {
                                587         2202005 :         intset_internal_node *n = (intset_internal_node *) node;
                                588                 : 
                                589         2202005 :         Assert(node->level == level);
 1479 heikki.linnakangas        590 ECB             : 
 1479 heikki.linnakangas        591 CBC     2202005 :         itemno = intset_binsrch_uint64(x, n->values, n->num_items, true);
                                592         2202005 :         if (itemno == 0)
 1479 heikki.linnakangas        593 GIC           3 :             return false;
 1479 heikki.linnakangas        594 CBC     2202002 :         node = n->downlinks[itemno - 1];
 1479 heikki.linnakangas        595 ECB             :     }
 1479 heikki.linnakangas        596 GIC      802143 :     Assert(node->level == 0);
                                597          802143 :     leaf = (intset_leaf_node *) node;
                                598                 : 
                                599                 :     /*
 1476 tgl                       600 ECB             :      * Binary search to find the right item on the leaf page
 1479 heikki.linnakangas        601                 :      */
 1479 heikki.linnakangas        602 CBC      802143 :     itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true);
                                603          802143 :     if (itemno == 0)
 1479 heikki.linnakangas        604 GIC           9 :         return false;
                                605          802134 :     item = &leaf->items[itemno - 1];
 1479 heikki.linnakangas        606 ECB             : 
                                607                 :     /* Is this a match to the first value on the item? */
 1479 heikki.linnakangas        608 CBC      802134 :     if (item->first == x)
 1479 heikki.linnakangas        609 GIC        1636 :         return true;
                                610          800498 :     Assert(x > item->first);
 1479 heikki.linnakangas        611 ECB             : 
                                612                 :     /* Is it in the packed codeword? */
 1479 heikki.linnakangas        613 GIC      800498 :     if (simple8b_contains(item->codeword, x, item->first))
 1479 heikki.linnakangas        614 CBC      150219 :         return true;
                                615                 : 
 1479 heikki.linnakangas        616 GIC      650279 :     return false;
                                617                 : }
                                618                 : 
                                619                 : /*
                                620                 :  * Begin in-order scan through all the values.
                                621                 :  *
                                622                 :  * While the iteration is in-progress, you cannot add new values to the set.
 1479 heikki.linnakangas        623 ECB             :  */
                                624                 : void
 1479 heikki.linnakangas        625 GIC          62 : intset_begin_iterate(IntegerSet *intset)
 1479 heikki.linnakangas        626 ECB             : {
 1476 tgl                       627                 :     /* Note that we allow an iteration to be abandoned midway */
 1476 tgl                       628 CBC          62 :     intset->iter_active = true;
 1479 heikki.linnakangas        629              62 :     intset->iter_node = intset->leftmost_leaf;
                                630              62 :     intset->iter_itemno = 0;
                                631              62 :     intset->iter_valueno = 0;
                                632              62 :     intset->iter_num_values = 0;
 1479 heikki.linnakangas        633 GIC          62 :     intset->iter_values = intset->iter_values_buf;
                                634              62 : }
                                635                 : 
                                636                 : /*
                                637                 :  * Returns the next integer, when iterating.
                                638                 :  *
                                639                 :  * intset_begin_iterate() must be called first.  intset_iterate_next() returns
                                640                 :  * the next value in the set.  Returns true, if there was another value, and
                                641                 :  * stores the value in *next.  Otherwise, returns false.
 1479 heikki.linnakangas        642 ECB             :  */
                                643                 : bool
 1479 heikki.linnakangas        644 CBC   163004049 : intset_iterate_next(IntegerSet *intset, uint64 *next)
                                645                 : {
 1476 tgl                       646 GIC   163004049 :     Assert(intset->iter_active);
                                647                 :     for (;;)
 1479 heikki.linnakangas        648 ECB             :     {
                                649                 :         /* Return next iter_values[] entry if any */
 1479 heikki.linnakangas        650 CBC   176762656 :         if (intset->iter_valueno < intset->iter_num_values)
 1479 heikki.linnakangas        651 ECB             :         {
 1479 heikki.linnakangas        652 GIC   163004032 :             *next = intset->iter_values[intset->iter_valueno++];
                                653       163004032 :             return true;
                                654                 :         }
 1479 heikki.linnakangas        655 ECB             : 
 1476 tgl                       656                 :         /* Decode next item in current leaf node, if any */
 1476 tgl                       657 CBC    13758624 :         if (intset->iter_node &&
 1476 tgl                       658 GIC    13758581 :             intset->iter_itemno < intset->iter_node->num_items)
 1479 heikki.linnakangas        659        13546903 :         {
                                660                 :             leaf_item  *item;
 1479 heikki.linnakangas        661 ECB             :             int         num_decoded;
                                662                 : 
 1479 heikki.linnakangas        663 CBC    13546903 :             item = &intset->iter_node->items[intset->iter_itemno++];
 1479 heikki.linnakangas        664 ECB             : 
 1476 tgl                       665 GIC    13546903 :             intset->iter_values_buf[0] = item->first;
                                666        13546903 :             num_decoded = simple8b_decode(item->codeword,
 1476 tgl                       667 ECB             :                                           &intset->iter_values_buf[1],
                                668                 :                                           item->first);
 1479 heikki.linnakangas        669 CBC    13546903 :             intset->iter_num_values = num_decoded + 1;
 1479 heikki.linnakangas        670 GIC    13546903 :             intset->iter_valueno = 0;
                                671        13546903 :             continue;
                                672                 :         }
 1479 heikki.linnakangas        673 ECB             : 
                                674                 :         /* No more items on this leaf, step to next node */
 1479 heikki.linnakangas        675 CBC      211721 :         if (intset->iter_node)
 1479 heikki.linnakangas        676 ECB             :         {
 1479 heikki.linnakangas        677 CBC      211678 :             intset->iter_node = intset->iter_node->next;
 1479 heikki.linnakangas        678 GIC      211678 :             intset->iter_itemno = 0;
                                679          211678 :             continue;
                                680                 :         }
                                681                 : 
                                682                 :         /*
                                683                 :          * We have reached the end of the B-tree.  But we might still have
 1479 heikki.linnakangas        684 ECB             :          * some integers in the buffer of newly-added values.
                                685                 :          */
 1476 tgl                       686 CBC          43 :         if (intset->iter_values == (const uint64 *) intset->iter_values_buf)
 1479 heikki.linnakangas        687 ECB             :         {
 1479 heikki.linnakangas        688 CBC          26 :             intset->iter_values = intset->buffered_values;
                                689              26 :             intset->iter_num_values = intset->num_buffered_values;
 1476 tgl                       690 GIC          26 :             intset->iter_valueno = 0;
 1479 heikki.linnakangas        691              26 :             continue;
 1479 heikki.linnakangas        692 ECB             :         }
                                693                 : 
 1479 heikki.linnakangas        694 GIC          17 :         break;
                                695                 :     }
 1479 heikki.linnakangas        696 ECB             : 
                                697                 :     /* No more results. */
 1476 tgl                       698 CBC          17 :     intset->iter_active = false;
 1476 tgl                       699 GIC          17 :     *next = 0;                  /* prevent uninitialized-variable warnings */
 1479 heikki.linnakangas        700              17 :     return false;
                                701                 : }
                                702                 : 
                                703                 : /*
                                704                 :  * intset_binsrch_uint64() -- search a sorted array of uint64s
                                705                 :  *
                                706                 :  * Returns the first position with key equal or less than the given key.
                                707                 :  * The returned position would be the "insert" location for the given key,
                                708                 :  * that is, the position where the new key should be inserted to.
                                709                 :  *
                                710                 :  * 'nextkey' affects the behavior on equal keys.  If true, and there is an
                                711                 :  * equal key in the array, this returns the position immediately after the
                                712                 :  * equal key.  If false, this returns the position of the equal key itself.
 1479 heikki.linnakangas        713 ECB             :  */
                                714                 : static int
 1479 heikki.linnakangas        715 GIC     2302934 : intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
                                716                 : {
                                717                 :     int         low,
                                718                 :                 high,
 1479 heikki.linnakangas        719 ECB             :                 mid;
                                720                 : 
 1479 heikki.linnakangas        721 CBC     2302934 :     low = 0;
 1479 heikki.linnakangas        722 GIC     2302934 :     high = arr_elems;
 1479 heikki.linnakangas        723 CBC    13986516 :     while (high > low)
                                724                 :     {
                                725        11683582 :         mid = low + (high - low) / 2;
                                726                 : 
                                727        11683582 :         if (nextkey)
 1479 heikki.linnakangas        728 ECB             :         {
 1479 heikki.linnakangas        729 GIC    11225973 :             if (item >= arr[mid])
 1479 heikki.linnakangas        730 CBC     5562665 :                 low = mid + 1;
                                731                 :             else
 1479 heikki.linnakangas        732 GIC     5663308 :                 high = mid;
                                733                 :         }
 1479 heikki.linnakangas        734 ECB             :         else
                                735                 :         {
 1479 heikki.linnakangas        736 GIC      457609 :             if (item > arr[mid])
 1479 heikki.linnakangas        737 CBC      253247 :                 low = mid + 1;
                                738                 :             else
 1479 heikki.linnakangas        739 GIC      204362 :                 high = mid;
                                740                 :         }
 1479 heikki.linnakangas        741 ECB             :     }
                                742                 : 
 1479 heikki.linnakangas        743 GIC     2302934 :     return low;
                                744                 : }
                                745                 : 
 1479 heikki.linnakangas        746 ECB             : /* same, but for an array of leaf items */
                                747                 : static int
 1479 heikki.linnakangas        748 GIC      802143 : intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
                                749                 : {
                                750                 :     int         low,
                                751                 :                 high,
 1479 heikki.linnakangas        752 ECB             :                 mid;
                                753                 : 
 1479 heikki.linnakangas        754 CBC      802143 :     low = 0;
 1479 heikki.linnakangas        755 GIC      802143 :     high = arr_elems;
 1479 heikki.linnakangas        756 CBC     5626605 :     while (high > low)
                                757                 :     {
                                758         4824462 :         mid = low + (high - low) / 2;
                                759                 : 
                                760         4824462 :         if (nextkey)
 1479 heikki.linnakangas        761 ECB             :         {
 1479 heikki.linnakangas        762 GIC     4824462 :             if (item >= arr[mid].first)
 1479 heikki.linnakangas        763 CBC     2433977 :                 low = mid + 1;
                                764                 :             else
 1479 heikki.linnakangas        765 GIC     2390485 :                 high = mid;
                                766                 :         }
 1479 heikki.linnakangas        767 EUB             :         else
                                768                 :         {
 1479 heikki.linnakangas        769 UIC           0 :             if (item > arr[mid].first)
 1479 heikki.linnakangas        770 UBC           0 :                 low = mid + 1;
                                771                 :             else
 1479 heikki.linnakangas        772 UIC           0 :                 high = mid;
                                773                 :         }
 1479 heikki.linnakangas        774 ECB             :     }
                                775                 : 
 1479 heikki.linnakangas        776 GIC      802143 :     return low;
                                777                 : }
                                778                 : 
                                779                 : /*
                                780                 :  * Simple-8b encoding.
                                781                 :  *
                                782                 :  * The simple-8b algorithm packs between 1 and 240 integers into 64-bit words,
                                783                 :  * called "codewords".  The number of integers packed into a single codeword
                                784                 :  * depends on the integers being packed; small integers are encoded using
                                785                 :  * fewer bits than large integers.  A single codeword can store a single
                                786                 :  * 60-bit integer, or two 30-bit integers, for example.
                                787                 :  *
                                788                 :  * Since we're storing a unique, sorted, set of integers, we actually encode
                                789                 :  * the *differences* between consecutive integers.  That way, clusters of
                                790                 :  * integers that are close to each other are packed efficiently, regardless
                                791                 :  * of their absolute values.
                                792                 :  *
                                793                 :  * In Simple-8b, each codeword consists of a 4-bit selector, which indicates
                                794                 :  * how many integers are encoded in the codeword, and the encoded integers are
                                795                 :  * packed into the remaining 60 bits.  The selector allows for 16 different
                                796                 :  * ways of using the remaining 60 bits, called "modes".  The number of integers
                                797                 :  * packed into a single codeword in each mode is listed in the simple8b_modes
                                798                 :  * table below.  For example, consider the following codeword:
                                799                 :  *
                                800                 :  *      20-bit integer       20-bit integer       20-bit integer
                                801                 :  * 1101 00000000000000010010 01111010000100100000 00000000000000010100
                                802                 :  * ^
                                803                 :  * selector
                                804                 :  *
                                805                 :  * The selector 1101 is 13 in decimal.  From the modes table below, we see
                                806                 :  * that it means that the codeword encodes three 20-bit integers.  In decimal,
                                807                 :  * those integers are 18, 500000 and 20.  Because we encode deltas rather than
                                808                 :  * absolute values, the actual values that they represent are 18, 500018 and
                                809                 :  * 500038.
                                810                 :  *
                                811                 :  * Modes 0 and 1 are a bit special; they encode a run of 240 or 120 zeroes
                                812                 :  * (which means 240 or 120 consecutive integers, since we're encoding the
                                813                 :  * deltas between integers), without using the rest of the codeword bits
                                814                 :  * for anything.
                                815                 :  *
                                816                 :  * Simple-8b cannot encode integers larger than 60 bits.  Values larger than
                                817                 :  * that are always stored in the 'first' field of a leaf item, never in the
                                818                 :  * packed codeword.  If there is a sequence of integers that are more than
                                819                 :  * 2^60 apart, the codeword will go unused on those items.  To represent that,
                                820                 :  * we use a magic EMPTY_CODEWORD codeword value.
                                821                 :  */
                                822                 : static const struct simple8b_mode
                                823                 : {
                                824                 :     uint8       bits_per_int;
                                825                 :     uint8       num_ints;
                                826                 : }           simple8b_modes[17] =
                                827                 : 
                                828                 : {
                                829                 :     {0, 240},                   /* mode  0: 240 zeroes */
                                830                 :     {0, 120},                   /* mode  1: 120 zeroes */
                                831                 :     {1, 60},                    /* mode  2: sixty 1-bit integers */
                                832                 :     {2, 30},                    /* mode  3: thirty 2-bit integers */
                                833                 :     {3, 20},                    /* mode  4: twenty 3-bit integers */
                                834                 :     {4, 15},                    /* mode  5: fifteen 4-bit integers */
                                835                 :     {5, 12},                    /* mode  6: twelve 5-bit integers */
                                836                 :     {6, 10},                    /* mode  7: ten 6-bit integers */
                                837                 :     {7, 8},                     /* mode  8: eight 7-bit integers (four bits
                                838                 :                                  * are wasted) */
                                839                 :     {8, 7},                     /* mode  9: seven 8-bit integers (four bits
                                840                 :                                  * are wasted) */
                                841                 :     {10, 6},                    /* mode 10: six 10-bit integers */
                                842                 :     {12, 5},                    /* mode 11: five 12-bit integers */
                                843                 :     {15, 4},                    /* mode 12: four 15-bit integers */
                                844                 :     {20, 3},                    /* mode 13: three 20-bit integers */
                                845                 :     {30, 2},                    /* mode 14: two 30-bit integers */
                                846                 :     {60, 1},                    /* mode 15: one 60-bit integer */
                                847                 : 
                                848                 :     {0, 0}                      /* sentinel value */
                                849                 : };
                                850                 : 
                                851                 : /*
                                852                 :  * EMPTY_CODEWORD is a special value, used to indicate "no values".
                                853                 :  * It is used if the next value is too large to be encoded with Simple-8b.
                                854                 :  *
                                855                 :  * This value looks like a mode-0 codeword, but we can distinguish it
                                856                 :  * because a regular mode-0 codeword would have zeroes in the unused bits.
                                857                 :  */
                                858                 : #define EMPTY_CODEWORD      UINT64CONST(0x0FFFFFFFFFFFFFFF)
                                859                 : 
                                860                 : /*
                                861                 :  * Encode a number of integers into a Simple-8b codeword.
                                862                 :  *
                                863                 :  * (What we actually encode are deltas between successive integers.
                                864                 :  * "base" is the value before ints[0].)
                                865                 :  *
                                866                 :  * The input array must contain at least SIMPLE8B_MAX_VALUES_PER_CODEWORD
                                867                 :  * elements, ensuring that we can produce a full codeword.
                                868                 :  *
                                869                 :  * Returns the encoded codeword, and sets *num_encoded to the number of
                                870                 :  * input integers that were encoded.  That can be zero, if the first delta
                                871                 :  * is too large to be encoded.
 1479 heikki.linnakangas        872 ECB             :  */
                                873                 : static uint64
 1476 tgl                       874 GIC    13546903 : simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
                                875                 : {
                                876                 :     int         selector;
                                877                 :     int         nints;
                                878                 :     int         bits;
                                879                 :     uint64      diff;
                                880                 :     uint64      last_val;
                                881                 :     uint64      codeword;
 1479 heikki.linnakangas        882 ECB             :     int         i;
                                883                 : 
 1479 heikki.linnakangas        884 GIC    13546903 :     Assert(ints[0] > base);
                                885                 : 
                                886                 :     /*
                                887                 :      * Select the "mode" to use for this codeword.
                                888                 :      *
                                889                 :      * In each iteration, check if the next value can be represented in the
                                890                 :      * current mode we're considering.  If it's too large, then step up the
                                891                 :      * mode to a wider one, and repeat.  If it fits, move on to the next
                                892                 :      * integer.  Repeat until the codeword is full, given the current mode.
                                893                 :      *
                                894                 :      * Note that we don't have any way to represent unused slots in the
                                895                 :      * codeword, so we require each codeword to be "full".  It is always
                                896                 :      * possible to produce a full codeword unless the very first delta is too
                                897                 :      * large to be encoded.  For example, if the first delta is small but the
                                898                 :      * second is too large to be encoded, we'll end up using the last "mode",
 1476 tgl                       899 ECB             :      * which has nints == 1.
 1479 heikki.linnakangas        900                 :      */
 1479 heikki.linnakangas        901 CBC    13546903 :     selector = 0;
                                902        13546903 :     nints = simple8b_modes[0].num_ints;
                                903        13546903 :     bits = simple8b_modes[0].bits_per_int;
                                904        13546903 :     diff = ints[0] - base - 1;
 1479 heikki.linnakangas        905 GIC    13546903 :     last_val = ints[0];
 1476 tgl                       906        13546903 :     i = 0;                      /* number of deltas we have accepted */
 1479 heikki.linnakangas        907 ECB             :     for (;;)
                                908                 :     {
 1479 heikki.linnakangas        909 GIC   345945628 :         if (diff >= (UINT64CONST(1) << bits))
 1479 heikki.linnakangas        910 ECB             :         {
                                911                 :             /* too large, step up to next mode */
 1479 heikki.linnakangas        912 CBC   148993038 :             selector++;
 1479 heikki.linnakangas        913 GIC   148993038 :             nints = simple8b_modes[selector].num_ints;
 1479 heikki.linnakangas        914 CBC   148993038 :             bits = simple8b_modes[selector].bits_per_int;
 1476 tgl                       915 ECB             :             /* we might already have accepted enough deltas for this mode */
 1479 heikki.linnakangas        916 GIC   148993038 :             if (i >= nints)
                                917         6499933 :                 break;
                                918                 :         }
                                919                 :         else
 1479 heikki.linnakangas        920 ECB             :         {
 1476 tgl                       921                 :             /* accept this delta; then done if codeword is full */
 1479 heikki.linnakangas        922 CBC   196952590 :             i++;
 1479 heikki.linnakangas        923 GIC   196952590 :             if (i >= nints)
 1479 heikki.linnakangas        924 CBC     7046970 :                 break;
 1476 tgl                       925 ECB             :             /* examine next delta */
 1479 heikki.linnakangas        926 CBC   189905620 :             Assert(ints[i] > last_val);
 1479 heikki.linnakangas        927 GIC   189905620 :             diff = ints[i] - last_val - 1;
                                928       189905620 :             last_val = ints[i];
                                929                 :         }
 1479 heikki.linnakangas        930 ECB             :     }
                                931                 : 
 1479 heikki.linnakangas        932 GIC    13546903 :     if (nints == 0)
                                933                 :     {
                                934                 :         /*
                                935                 :          * The first delta is too large to be encoded with Simple-8b.
                                936                 :          *
                                937                 :          * If there is at least one not-too-large integer in the input, we
                                938                 :          * will encode it using mode 15 (or a more compact mode).  Hence, we
 1476 tgl                       939 ECB             :          * can only get here if the *first* delta is >= 2^60.
      heikki.linnakangas        940                 :          */
 1479 heikki.linnakangas        941 CBC           4 :         Assert(i == 0);
 1479 heikki.linnakangas        942 GIC           4 :         *num_encoded = 0;
                                943               4 :         return EMPTY_CODEWORD;
                                944                 :     }
                                945                 : 
                                946                 :     /*
                                947                 :      * Encode the integers using the selected mode.  Note that we shift them
                                948                 :      * into the codeword in reverse order, so that they will come out in the
 1479 heikki.linnakangas        949 ECB             :      * correct order in the decoder.
                                950                 :      */
 1479 heikki.linnakangas        951 GIC    13546899 :     codeword = 0;
 1479 heikki.linnakangas        952 CBC    13546899 :     if (bits > 0)
                                953                 :     {
 1476                           954       137501045 :         for (i = nints - 1; i > 0; i--)
 1479 heikki.linnakangas        955 ECB             :         {
 1476 heikki.linnakangas        956 CBC   124003945 :             diff = ints[i] - ints[i - 1] - 1;
 1476 heikki.linnakangas        957 GIC   124003945 :             codeword |= diff;
 1479 heikki.linnakangas        958 CBC   124003945 :             codeword <<= bits;
 1479 heikki.linnakangas        959 ECB             :         }
 1476 heikki.linnakangas        960 GIC    13497100 :         diff = ints[0] - base - 1;
                                961        13497100 :         codeword |= diff;
                                962                 :     }
 1479 heikki.linnakangas        963 ECB             : 
                                964                 :     /* add selector to the codeword, and return */
 1476 heikki.linnakangas        965 CBC    13546899 :     codeword |= (uint64) selector << 60;
 1479 heikki.linnakangas        966 ECB             : 
 1479 heikki.linnakangas        967 GIC    13546899 :     *num_encoded = nints;
                                968        13546899 :     return codeword;
                                969                 : }
                                970                 : 
                                971                 : /*
                                972                 :  * Decode a codeword into an array of integers.
                                973                 :  * Returns the number of integers decoded.
 1479 heikki.linnakangas        974 ECB             :  */
                                975                 : static int
 1479 heikki.linnakangas        976 CBC    13546903 : simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
 1479 heikki.linnakangas        977 ECB             : {
 1476 heikki.linnakangas        978 CBC    13546903 :     int         selector = (codeword >> 60);
 1479                           979        13546903 :     int         nints = simple8b_modes[selector].num_ints;
 1476 tgl                       980 GIC    13546903 :     int         bits = simple8b_modes[selector].bits_per_int;
 1479 heikki.linnakangas        981        13546903 :     uint64      mask = (UINT64CONST(1) << bits) - 1;
 1476 tgl                       982 ECB             :     uint64      curr_value;
 1479 heikki.linnakangas        983                 : 
 1479 heikki.linnakangas        984 GIC    13546903 :     if (codeword == EMPTY_CODEWORD)
 1479 heikki.linnakangas        985 CBC           4 :         return 0;
 1479 heikki.linnakangas        986 ECB             : 
 1476 tgl                       987 GIC    13546899 :     curr_value = base;
 1479 heikki.linnakangas        988 CBC   162999704 :     for (int i = 0; i < nints; i++)
                                989                 :     {
                                990       149452805 :         uint64      diff = codeword & mask;
 1479 heikki.linnakangas        991 ECB             : 
 1476 tgl                       992 CBC   149452805 :         curr_value += 1 + diff;
 1476 tgl                       993 GIC   149452805 :         decoded[i] = curr_value;
 1479 heikki.linnakangas        994 CBC   149452805 :         codeword >>= bits;
                                995                 :     }
 1479 heikki.linnakangas        996 GIC    13546899 :     return nints;
                                997                 : }
                                998                 : 
                                999                 : /*
                               1000                 :  * This is very similar to simple8b_decode(), but instead of decoding all
                               1001                 :  * the values to an array, it just checks if the given "key" is part of
                               1002                 :  * the codeword.
 1479 heikki.linnakangas       1003 ECB             :  */
                               1004                 : static bool
 1479 heikki.linnakangas       1005 CBC      800498 : simple8b_contains(uint64 codeword, uint64 key, uint64 base)
 1479 heikki.linnakangas       1006 ECB             : {
 1476 heikki.linnakangas       1007 CBC      800498 :     int         selector = (codeword >> 60);
 1479 heikki.linnakangas       1008 GIC      800498 :     int         nints = simple8b_modes[selector].num_ints;
 1479 heikki.linnakangas       1009 CBC      800498 :     int         bits = simple8b_modes[selector].bits_per_int;
 1479 heikki.linnakangas       1010 ECB             : 
 1479 heikki.linnakangas       1011 GIC      800498 :     if (codeword == EMPTY_CODEWORD)
 1479 heikki.linnakangas       1012 CBC           8 :         return false;
                               1013                 : 
 1479 heikki.linnakangas       1014 GIC      800490 :     if (bits == 0)
 1479 heikki.linnakangas       1015 ECB             :     {
                               1016                 :         /* Special handling for 0-bit cases. */
 1476 tgl                      1017 GIC       99625 :         return (key - base) <= nints;
                               1018                 :     }
 1479 heikki.linnakangas       1019 ECB             :     else
                               1020                 :     {
 1479 heikki.linnakangas       1021 GIC      700865 :         uint64      mask = (UINT64CONST(1) << bits) - 1;
 1476 tgl                      1022 ECB             :         uint64      curr_value;
 1479 heikki.linnakangas       1023                 : 
 1476 tgl                      1024 GIC      700865 :         curr_value = base;
 1479 heikki.linnakangas       1025 CBC     5215588 :         for (int i = 0; i < nints; i++)
                               1026                 :         {
                               1027         4856742 :             uint64      diff = codeword & mask;
                               1028                 : 
 1476 tgl                      1029         4856742 :             curr_value += 1 + diff;
                               1030                 : 
 1479 heikki.linnakangas       1031         4856742 :             if (curr_value >= key)
 1479 heikki.linnakangas       1032 ECB             :             {
 1479 heikki.linnakangas       1033 GIC      342019 :                 if (curr_value == key)
 1479 heikki.linnakangas       1034 CBC       50594 :                     return true;
                               1035                 :                 else
 1479 heikki.linnakangas       1036 GIC      291425 :                     return false;
 1479 heikki.linnakangas       1037 ECB             :             }
                               1038                 : 
 1479 heikki.linnakangas       1039 GIC     4514723 :             codeword >>= bits;
 1479 heikki.linnakangas       1040 ECB             :         }
                               1041                 :     }
 1479 heikki.linnakangas       1042 GIC      358846 :     return false;
                               1043                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a