LCOV - differential code coverage report
Current view: top level - src/backend/lib - dshash.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 83.0 % 289 240 49 240
Current Date: 2023-04-08 15:15:32 Functions: 92.6 % 27 25 2 25
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * dshash.c
       4                 :  *    Concurrent hash tables backed by dynamic shared memory areas.
       5                 :  *
       6                 :  * This is an open hashing hash table, with a linked list at each table
       7                 :  * entry.  It supports dynamic resizing, as required to prevent the linked
       8                 :  * lists from growing too long on average.  Currently, only growing is
       9                 :  * supported: the hash table never becomes smaller.
      10                 :  *
      11                 :  * To deal with concurrency, it has a fixed size set of partitions, each of
      12                 :  * which is independently locked.  Each bucket maps to a partition; so insert,
      13                 :  * find and iterate operations normally only acquire one lock.  Therefore,
      14                 :  * good concurrency is achieved whenever such operations don't collide at the
      15                 :  * lock partition level.  However, when a resize operation begins, all
      16                 :  * partition locks must be acquired simultaneously for a brief period.  This
      17                 :  * is only expected to happen a small number of times until a stable size is
      18                 :  * found, since growth is geometric.
      19                 :  *
      20                 :  * Future versions may support iterators and incremental resizing; for now
      21                 :  * the implementation is minimalist.
      22                 :  *
      23                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      24                 :  * Portions Copyright (c) 1994, Regents of the University of California
      25                 :  *
      26                 :  * IDENTIFICATION
      27                 :  *    src/backend/lib/dshash.c
      28                 :  *
      29                 :  *-------------------------------------------------------------------------
      30                 :  */
      31                 : 
      32                 : #include "postgres.h"
      33                 : 
      34                 : #include "common/hashfn.h"
      35                 : #include "lib/dshash.h"
      36                 : #include "storage/ipc.h"
      37                 : #include "storage/lwlock.h"
      38                 : #include "utils/dsa.h"
      39                 : #include "utils/memutils.h"
      40                 : 
      41                 : /*
      42                 :  * An item in the hash table.  This wraps the user's entry object in an
      43                 :  * envelop that holds a pointer back to the bucket and a pointer to the next
      44                 :  * item in the bucket.
      45                 :  */
      46                 : struct dshash_table_item
      47                 : {
      48                 :     /* The next item in the same bucket. */
      49                 :     dsa_pointer next;
      50                 :     /* The hashed key, to avoid having to recompute it. */
      51                 :     dshash_hash hash;
      52                 :     /* The user's entry object follows here.  See ENTRY_FROM_ITEM(item). */
      53                 : };
      54                 : 
      55                 : /*
      56                 :  * The number of partitions for locking purposes.  This is set to match
      57                 :  * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
      58                 :  * the buffer pool must be good enough for any other purpose.  This could
      59                 :  * become a runtime parameter in future.
      60                 :  */
      61                 : #define DSHASH_NUM_PARTITIONS_LOG2 7
      62                 : #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
      63                 : 
      64                 : /* A magic value used to identify our hash tables. */
      65                 : #define DSHASH_MAGIC 0x75ff6a20
      66                 : 
      67                 : /*
      68                 :  * Tracking information for each lock partition.  Initially, each partition
      69                 :  * corresponds to one bucket, but each time the hash table grows, the buckets
      70                 :  * covered by each partition split so the number of buckets covered doubles.
      71                 :  *
      72                 :  * We might want to add padding here so that each partition is on a different
      73                 :  * cache line, but doing so would bloat this structure considerably.
      74                 :  */
      75                 : typedef struct dshash_partition
      76                 : {
      77                 :     LWLock      lock;           /* Protects all buckets in this partition. */
      78                 :     size_t      count;          /* # of items in this partition's buckets */
      79                 : } dshash_partition;
      80                 : 
      81                 : /*
      82                 :  * The head object for a hash table.  This will be stored in dynamic shared
      83                 :  * memory.
      84                 :  */
      85                 : typedef struct dshash_table_control
      86                 : {
      87                 :     dshash_table_handle handle;
      88                 :     uint32      magic;
      89                 :     dshash_partition partitions[DSHASH_NUM_PARTITIONS];
      90                 :     int         lwlock_tranche_id;
      91                 : 
      92                 :     /*
      93                 :      * The following members are written to only when ALL partitions locks are
      94                 :      * held.  They can be read when any one partition lock is held.
      95                 :      */
      96                 : 
      97                 :     /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
      98                 :     size_t      size_log2;      /* log2(number of buckets) */
      99                 :     dsa_pointer buckets;        /* current bucket array */
     100                 : } dshash_table_control;
     101                 : 
     102                 : /*
     103                 :  * Per-backend state for a dynamic hash table.
     104                 :  */
     105                 : struct dshash_table
     106                 : {
     107                 :     dsa_area   *area;           /* Backing dynamic shared memory area. */
     108                 :     dshash_parameters params;   /* Parameters. */
     109                 :     void       *arg;            /* User-supplied data pointer. */
     110                 :     dshash_table_control *control;  /* Control object in DSM. */
     111                 :     dsa_pointer *buckets;       /* Current bucket pointers in DSM. */
     112                 :     size_t      size_log2;      /* log2(number of buckets) */
     113                 : };
     114                 : 
     115                 : /* Given a pointer to an item, find the entry (user data) it holds. */
     116                 : #define ENTRY_FROM_ITEM(item) \
     117                 :     ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
     118                 : 
     119                 : /* Given a pointer to an entry, find the item that holds it. */
     120                 : #define ITEM_FROM_ENTRY(entry)                                          \
     121                 :     ((dshash_table_item *)((char *)(entry) -                            \
     122                 :                              MAXALIGN(sizeof(dshash_table_item))))
     123                 : 
     124                 : /* How many resize operations (bucket splits) have there been? */
     125                 : #define NUM_SPLITS(size_log2)                   \
     126                 :     (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
     127                 : 
     128                 : /* How many buckets are there in a given size? */
     129                 : #define NUM_BUCKETS(size_log2)      \
     130                 :     (((size_t) 1) << (size_log2))
     131                 : 
     132                 : /* How many buckets are there in each partition at a given size? */
     133                 : #define BUCKETS_PER_PARTITION(size_log2)        \
     134                 :     (((size_t) 1) << NUM_SPLITS(size_log2))
     135                 : 
     136                 : /* Max entries before we need to grow.  Half + quarter = 75% load factor. */
     137                 : #define MAX_COUNT_PER_PARTITION(hash_table)             \
     138                 :     (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
     139                 :      BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
     140                 : 
     141                 : /* Choose partition based on the highest order bits of the hash. */
     142                 : #define PARTITION_FOR_HASH(hash)                                        \
     143                 :     (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
     144                 : 
     145                 : /*
     146                 :  * Find the bucket index for a given hash and table size.  Each time the table
     147                 :  * doubles in size, the appropriate bucket for a given hash value doubles and
     148                 :  * possibly adds one, depending on the newly revealed bit, so that all buckets
     149                 :  * are split.
     150                 :  */
     151                 : #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2)     \
     152                 :     (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
     153                 : 
     154                 : /* The index of the first bucket in a given partition. */
     155                 : #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2)    \
     156                 :     ((partition) << NUM_SPLITS(size_log2))
     157                 : 
     158                 : /* Choose partition based on bucket index. */
     159                 : #define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2)               \
     160                 :     ((bucket_idx) >> NUM_SPLITS(size_log2))
     161                 : 
     162                 : /* The head of the active bucket for a given hash value (lvalue). */
     163                 : #define BUCKET_FOR_HASH(hash_table, hash)                               \
     164                 :     (hash_table->buckets[                                                \
     165                 :         BUCKET_INDEX_FOR_HASH_AND_SIZE(hash,                            \
     166                 :                                        hash_table->size_log2)])
     167                 : 
     168                 : static void delete_item(dshash_table *hash_table,
     169                 :                         dshash_table_item *item);
     170                 : static void resize(dshash_table *hash_table, size_t new_size_log2);
     171                 : static inline void ensure_valid_bucket_pointers(dshash_table *hash_table);
     172                 : static inline dshash_table_item *find_in_bucket(dshash_table *hash_table,
     173                 :                                                 const void *key,
     174                 :                                                 dsa_pointer item_pointer);
     175                 : static void insert_item_into_bucket(dshash_table *hash_table,
     176                 :                                     dsa_pointer item_pointer,
     177                 :                                     dshash_table_item *item,
     178                 :                                     dsa_pointer *bucket);
     179                 : static dshash_table_item *insert_into_bucket(dshash_table *hash_table,
     180                 :                                              const void *key,
     181                 :                                              dsa_pointer *bucket);
     182                 : static bool delete_key_from_bucket(dshash_table *hash_table,
     183                 :                                    const void *key,
     184                 :                                    dsa_pointer *bucket_head);
     185                 : static bool delete_item_from_bucket(dshash_table *hash_table,
     186                 :                                     dshash_table_item *item,
     187                 :                                     dsa_pointer *bucket_head);
     188                 : static inline dshash_hash hash_key(dshash_table *hash_table, const void *key);
     189                 : static inline bool equal_keys(dshash_table *hash_table,
     190                 :                               const void *a, const void *b);
     191                 : 
     192                 : #define PARTITION_LOCK(hash_table, i)           \
     193                 :     (&(hash_table)->control->partitions[(i)].lock)
     194                 : 
     195                 : #define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table) \
     196                 :     Assert(!LWLockAnyHeldByMe(&(hash_table)->control->partitions[0].lock, \
     197                 :            DSHASH_NUM_PARTITIONS, sizeof(dshash_partition)))
     198                 : 
     199                 : /*
     200                 :  * Create a new hash table backed by the given dynamic shared area, with the
     201                 :  * given parameters.  The returned object is allocated in backend-local memory
     202                 :  * using the current MemoryContext.  'arg' will be passed through to the
     203                 :  * compare and hash functions.
     204                 :  */
     205                 : dshash_table *
     206 CBC        1974 : dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
     207                 : {
     208                 :     dshash_table *hash_table;
     209                 :     dsa_pointer control;
     210                 : 
     211                 :     /* Allocate the backend-local object representing the hash table. */
     212            1974 :     hash_table = palloc(sizeof(dshash_table));
     213                 : 
     214                 :     /* Allocate the control object in shared memory. */
     215            1974 :     control = dsa_allocate(area, sizeof(dshash_table_control));
     216                 : 
     217                 :     /* Set up the local and shared hash table structs. */
     218            1974 :     hash_table->area = area;
     219            1974 :     hash_table->params = *params;
     220            1974 :     hash_table->arg = arg;
     221            1974 :     hash_table->control = dsa_get_address(area, control);
     222            1974 :     hash_table->control->handle = control;
     223            1974 :     hash_table->control->magic = DSHASH_MAGIC;
     224            1974 :     hash_table->control->lwlock_tranche_id = params->tranche_id;
     225                 : 
     226                 :     /* Set up the array of lock partitions. */
     227                 :     {
     228            1974 :         dshash_partition *partitions = hash_table->control->partitions;
     229            1974 :         int         tranche_id = hash_table->control->lwlock_tranche_id;
     230                 :         int         i;
     231                 : 
     232          254646 :         for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     233                 :         {
     234          252672 :             LWLockInitialize(&partitions[i].lock, tranche_id);
     235          252672 :             partitions[i].count = 0;
     236                 :         }
     237                 :     }
     238                 : 
     239                 :     /*
     240                 :      * Set up the initial array of buckets.  Our initial size is the same as
     241                 :      * the number of partitions.
     242                 :      */
     243            1974 :     hash_table->control->size_log2 = DSHASH_NUM_PARTITIONS_LOG2;
     244            3948 :     hash_table->control->buckets =
     245            1974 :         dsa_allocate_extended(area,
     246                 :                               sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS,
     247                 :                               DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO);
     248            1974 :     if (!DsaPointerIsValid(hash_table->control->buckets))
     249                 :     {
     250 UBC           0 :         dsa_free(area, control);
     251               0 :         ereport(ERROR,
     252                 :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     253                 :                  errmsg("out of memory"),
     254                 :                  errdetail("Failed on DSA request of size %zu.",
     255                 :                            sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS)));
     256                 :     }
     257 CBC        3948 :     hash_table->buckets = dsa_get_address(area,
     258            1974 :                                           hash_table->control->buckets);
     259            1974 :     hash_table->size_log2 = hash_table->control->size_log2;
     260                 : 
     261            1974 :     return hash_table;
     262                 : }
     263                 : 
     264                 : /*
     265                 :  * Attach to an existing hash table using a handle.  The returned object is
     266                 :  * allocated in backend-local memory using the current MemoryContext.  'arg'
     267                 :  * will be passed through to the compare and hash functions.
     268                 :  */
     269                 : dshash_table *
     270           15943 : dshash_attach(dsa_area *area, const dshash_parameters *params,
     271                 :               dshash_table_handle handle, void *arg)
     272                 : {
     273                 :     dshash_table *hash_table;
     274                 :     dsa_pointer control;
     275                 : 
     276                 :     /* Allocate the backend-local object representing the hash table. */
     277           15943 :     hash_table = palloc(sizeof(dshash_table));
     278                 : 
     279                 :     /* Find the control object in shared memory. */
     280           15943 :     control = handle;
     281                 : 
     282                 :     /* Set up the local hash table struct. */
     283           15943 :     hash_table->area = area;
     284           15943 :     hash_table->params = *params;
     285           15943 :     hash_table->arg = arg;
     286           15943 :     hash_table->control = dsa_get_address(area, control);
     287           15943 :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     288                 : 
     289                 :     /*
     290                 :      * These will later be set to the correct values by
     291                 :      * ensure_valid_bucket_pointers(), at which time we'll be holding a
     292                 :      * partition lock for interlocking against concurrent resizing.
     293                 :      */
     294           15943 :     hash_table->buckets = NULL;
     295           15943 :     hash_table->size_log2 = 0;
     296                 : 
     297           15943 :     return hash_table;
     298                 : }
     299                 : 
     300                 : /*
     301                 :  * Detach from a hash table.  This frees backend-local resources associated
     302                 :  * with the hash table, but the hash table will continue to exist until it is
     303                 :  * either explicitly destroyed (by a backend that is still attached to it), or
     304                 :  * the area that backs it is returned to the operating system.
     305                 :  */
     306                 : void
     307           17800 : dshash_detach(dshash_table *hash_table)
     308                 : {
     309           17800 :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     310                 : 
     311                 :     /* The hash table may have been destroyed.  Just free local memory. */
     312           17800 :     pfree(hash_table);
     313           17800 : }
     314                 : 
     315                 : /*
     316                 :  * Destroy a hash table, returning all memory to the area.  The caller must be
     317                 :  * certain that no other backend will attempt to access the hash table before
     318                 :  * calling this function.  Other backend must explicitly call dshash_detach to
     319                 :  * free up backend-local memory associated with the hash table.  The backend
     320                 :  * that calls dshash_destroy must not call dshash_detach.
     321                 :  */
     322                 : void
     323 UBC           0 : dshash_destroy(dshash_table *hash_table)
     324                 : {
     325                 :     size_t      size;
     326                 :     size_t      i;
     327                 : 
     328               0 :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     329               0 :     ensure_valid_bucket_pointers(hash_table);
     330                 : 
     331                 :     /* Free all the entries. */
     332               0 :     size = NUM_BUCKETS(hash_table->size_log2);
     333               0 :     for (i = 0; i < size; ++i)
     334                 :     {
     335               0 :         dsa_pointer item_pointer = hash_table->buckets[i];
     336                 : 
     337               0 :         while (DsaPointerIsValid(item_pointer))
     338                 :         {
     339                 :             dshash_table_item *item;
     340                 :             dsa_pointer next_item_pointer;
     341                 : 
     342               0 :             item = dsa_get_address(hash_table->area, item_pointer);
     343               0 :             next_item_pointer = item->next;
     344               0 :             dsa_free(hash_table->area, item_pointer);
     345               0 :             item_pointer = next_item_pointer;
     346                 :         }
     347                 :     }
     348                 : 
     349                 :     /*
     350                 :      * Vandalize the control block to help catch programming errors where
     351                 :      * other backends access the memory formerly occupied by this hash table.
     352                 :      */
     353               0 :     hash_table->control->magic = 0;
     354                 : 
     355                 :     /* Free the active table and control object. */
     356               0 :     dsa_free(hash_table->area, hash_table->control->buckets);
     357               0 :     dsa_free(hash_table->area, hash_table->control->handle);
     358                 : 
     359               0 :     pfree(hash_table);
     360               0 : }
     361                 : 
     362                 : /*
     363                 :  * Get a handle that can be used by other processes to attach to this hash
     364                 :  * table.
     365                 :  */
     366                 : dshash_table_handle
     367 CBC        1974 : dshash_get_hash_table_handle(dshash_table *hash_table)
     368                 : {
     369            1974 :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     370                 : 
     371            1974 :     return hash_table->control->handle;
     372                 : }
     373                 : 
     374                 : /*
     375                 :  * Look up an entry, given a key.  Returns a pointer to an entry if one can be
     376                 :  * found with the given key.  Returns NULL if the key is not found.  If a
     377                 :  * non-NULL value is returned, the entry is locked and must be released by
     378                 :  * calling dshash_release_lock.  If an error is raised before
     379                 :  * dshash_release_lock is called, the lock will be released automatically, but
     380                 :  * the caller must take care to ensure that the entry is not left corrupted.
     381                 :  * The lock mode is either shared or exclusive depending on 'exclusive'.
     382                 :  *
     383                 :  * The caller must not hold a lock already.
     384                 :  *
     385                 :  * Note that the lock held is in fact an LWLock, so interrupts will be held on
     386                 :  * return from this function, and not resumed until dshash_release_lock is
     387                 :  * called.  It is a very good idea for the caller to release the lock quickly.
     388                 :  */
     389                 : void *
     390          809097 : dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
     391                 : {
     392                 :     dshash_hash hash;
     393                 :     size_t      partition;
     394                 :     dshash_table_item *item;
     395                 : 
     396          809097 :     hash = hash_key(hash_table, key);
     397          809097 :     partition = PARTITION_FOR_HASH(hash);
     398                 : 
     399          809097 :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     400          809097 :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     401                 : 
     402          809097 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition),
     403          809097 :                   exclusive ? LW_EXCLUSIVE : LW_SHARED);
     404          809097 :     ensure_valid_bucket_pointers(hash_table);
     405                 : 
     406                 :     /* Search the active bucket. */
     407          809097 :     item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
     408                 : 
     409          809097 :     if (!item)
     410                 :     {
     411                 :         /* Not found. */
     412          339138 :         LWLockRelease(PARTITION_LOCK(hash_table, partition));
     413          339138 :         return NULL;
     414                 :     }
     415                 :     else
     416                 :     {
     417                 :         /* The caller will free the lock by calling dshash_release_lock. */
     418          469959 :         return ENTRY_FROM_ITEM(item);
     419                 :     }
     420                 : }
     421                 : 
     422                 : /*
     423                 :  * Returns a pointer to an exclusively locked item which must be released with
     424                 :  * dshash_release_lock.  If the key is found in the hash table, 'found' is set
     425                 :  * to true and a pointer to the existing entry is returned.  If the key is not
     426                 :  * found, 'found' is set to false, and a pointer to a newly created entry is
     427                 :  * returned.
     428                 :  *
     429                 :  * Notes above dshash_find() regarding locking and error handling equally
     430                 :  * apply here.
     431                 :  */
     432                 : void *
     433          301269 : dshash_find_or_insert(dshash_table *hash_table,
     434                 :                       const void *key,
     435                 :                       bool *found)
     436                 : {
     437                 :     dshash_hash hash;
     438                 :     size_t      partition_index;
     439                 :     dshash_partition *partition;
     440                 :     dshash_table_item *item;
     441                 : 
     442          301269 :     hash = hash_key(hash_table, key);
     443          301269 :     partition_index = PARTITION_FOR_HASH(hash);
     444          301269 :     partition = &hash_table->control->partitions[partition_index];
     445                 : 
     446          301269 :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     447          301269 :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     448                 : 
     449          301269 : restart:
     450          304933 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
     451                 :                   LW_EXCLUSIVE);
     452          304933 :     ensure_valid_bucket_pointers(hash_table);
     453                 : 
     454                 :     /* Search the active bucket. */
     455          304933 :     item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
     456                 : 
     457          304933 :     if (item)
     458              18 :         *found = true;
     459                 :     else
     460                 :     {
     461          304915 :         *found = false;
     462                 : 
     463                 :         /* Check if we are getting too full. */
     464          304915 :         if (partition->count > MAX_COUNT_PER_PARTITION(hash_table))
     465                 :         {
     466                 :             /*
     467                 :              * The load factor (= keys / buckets) for all buckets protected by
     468                 :              * this partition is > 0.75.  Presumably the same applies
     469                 :              * generally across the whole hash table (though we don't attempt
     470                 :              * to track that directly to avoid contention on some kind of
     471                 :              * central counter; we just assume that this partition is
     472                 :              * representative).  This is a good time to resize.
     473                 :              *
     474                 :              * Give up our existing lock first, because resizing needs to
     475                 :              * reacquire all the locks in the right order to avoid deadlocks.
     476                 :              */
     477            3664 :             LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
     478            3664 :             resize(hash_table, hash_table->size_log2 + 1);
     479                 : 
     480            3664 :             goto restart;
     481                 :         }
     482                 : 
     483                 :         /* Finally we can try to insert the new item. */
     484          301251 :         item = insert_into_bucket(hash_table, key,
     485          301251 :                                   &BUCKET_FOR_HASH(hash_table, hash));
     486          301251 :         item->hash = hash;
     487                 :         /* Adjust per-lock-partition counter for load factor knowledge. */
     488          301251 :         ++partition->count;
     489                 :     }
     490                 : 
     491                 :     /* The caller must release the lock with dshash_release_lock. */
     492          301269 :     return ENTRY_FROM_ITEM(item);
     493                 : }
     494                 : 
     495                 : /*
     496                 :  * Remove an entry by key.  Returns true if the key was found and the
     497                 :  * corresponding entry was removed.
     498                 :  *
     499                 :  * To delete an entry that you already have a pointer to, see
     500                 :  * dshash_delete_entry.
     501                 :  */
     502                 : bool
     503             108 : dshash_delete_key(dshash_table *hash_table, const void *key)
     504                 : {
     505                 :     dshash_hash hash;
     506                 :     size_t      partition;
     507                 :     bool        found;
     508                 : 
     509             108 :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     510             108 :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     511                 : 
     512             108 :     hash = hash_key(hash_table, key);
     513             108 :     partition = PARTITION_FOR_HASH(hash);
     514                 : 
     515             108 :     LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE);
     516             108 :     ensure_valid_bucket_pointers(hash_table);
     517                 : 
     518             108 :     if (delete_key_from_bucket(hash_table, key,
     519             108 :                                &BUCKET_FOR_HASH(hash_table, hash)))
     520                 :     {
     521              76 :         Assert(hash_table->control->partitions[partition].count > 0);
     522              76 :         found = true;
     523              76 :         --hash_table->control->partitions[partition].count;
     524                 :     }
     525                 :     else
     526              32 :         found = false;
     527                 : 
     528             108 :     LWLockRelease(PARTITION_LOCK(hash_table, partition));
     529                 : 
     530             108 :     return found;
     531                 : }
     532                 : 
     533                 : /*
     534                 :  * Remove an entry.  The entry must already be exclusively locked, and must
     535                 :  * have been obtained by dshash_find or dshash_find_or_insert.  Note that this
     536                 :  * function releases the lock just like dshash_release_lock.
     537                 :  *
     538                 :  * To delete an entry by key, see dshash_delete_key.
     539                 :  */
     540                 : void
     541           28096 : dshash_delete_entry(dshash_table *hash_table, void *entry)
     542                 : {
     543           28096 :     dshash_table_item *item = ITEM_FROM_ENTRY(entry);
     544           28096 :     size_t      partition = PARTITION_FOR_HASH(item->hash);
     545                 : 
     546           28096 :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     547           28096 :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
     548                 :                                 LW_EXCLUSIVE));
     549                 : 
     550           28096 :     delete_item(hash_table, item);
     551           28096 :     LWLockRelease(PARTITION_LOCK(hash_table, partition));
     552           28096 : }
     553                 : 
     554                 : /*
     555                 :  * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
     556                 :  */
     557                 : void
     558          743132 : dshash_release_lock(dshash_table *hash_table, void *entry)
     559                 : {
     560          743132 :     dshash_table_item *item = ITEM_FROM_ENTRY(entry);
     561          743132 :     size_t      partition_index = PARTITION_FOR_HASH(item->hash);
     562                 : 
     563          743132 :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     564                 : 
     565          743132 :     LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
     566          743132 : }
     567                 : 
     568                 : /*
     569                 :  * A compare function that forwards to memcmp.
     570                 :  */
     571                 : int
     572             155 : dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
     573                 : {
     574             155 :     return memcmp(a, b, size);
     575                 : }
     576                 : 
     577                 : /*
     578                 :  * A hash function that forwards to tag_hash.
     579                 :  */
     580                 : dshash_hash
     581             403 : dshash_memhash(const void *v, size_t size, void *arg)
     582                 : {
     583             403 :     return tag_hash(v, size);
     584                 : }
     585                 : 
     586                 : /*
     587                 :  * Sequentially scan through dshash table and return all the elements one by
     588                 :  * one, return NULL when all elements have been returned.
     589                 :  *
     590                 :  * dshash_seq_term needs to be called when a scan finished.  The caller may
     591                 :  * delete returned elements midst of a scan by using dshash_delete_current()
     592                 :  * if exclusive = true.
     593                 :  */
     594                 : void
     595            1462 : dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
     596                 :                 bool exclusive)
     597                 : {
     598            1462 :     status->hash_table = hash_table;
     599            1462 :     status->curbucket = 0;
     600            1462 :     status->nbuckets = 0;
     601            1462 :     status->curitem = NULL;
     602            1462 :     status->pnextitem = InvalidDsaPointer;
     603            1462 :     status->curpartition = -1;
     604            1462 :     status->exclusive = exclusive;
     605            1462 : }
     606                 : 
     607                 : /*
     608                 :  * Returns the next element.
     609                 :  *
     610                 :  * Returned elements are locked and the caller may not release the lock. It is
     611                 :  * released by future calls to dshash_seq_next() or dshash_seq_term().
     612                 :  */
     613                 : void *
     614          259987 : dshash_seq_next(dshash_seq_status *status)
     615                 : {
     616                 :     dsa_pointer next_item_pointer;
     617                 : 
     618                 :     /*
     619                 :      * Not yet holding any partition locks. Need to determine the size of the
     620                 :      * hash table, it could have been resized since we were looking last.
     621                 :      * Since we iterate in partition order, we can start by unconditionally
     622                 :      * lock partition 0.
     623                 :      *
     624                 :      * Once we hold the lock, no resizing can happen until the scan ends. So
     625                 :      * we don't need to repeatedly call ensure_valid_bucket_pointers().
     626                 :      */
     627          259987 :     if (status->curpartition == -1)
     628                 :     {
     629            1462 :         Assert(status->curbucket == 0);
     630            1462 :         ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(status->hash_table);
     631                 : 
     632            1462 :         status->curpartition = 0;
     633                 : 
     634            1462 :         LWLockAcquire(PARTITION_LOCK(status->hash_table,
     635                 :                                      status->curpartition),
     636            1462 :                       status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
     637                 : 
     638            1462 :         ensure_valid_bucket_pointers(status->hash_table);
     639                 : 
     640            1462 :         status->nbuckets =
     641            1462 :             NUM_BUCKETS(status->hash_table->control->size_log2);
     642            1462 :         next_item_pointer = status->hash_table->buckets[status->curbucket];
     643                 :     }
     644                 :     else
     645          258525 :         next_item_pointer = status->pnextitem;
     646                 : 
     647          259987 :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(status->hash_table,
     648                 :                                                status->curpartition),
     649                 :                                 status->exclusive ? LW_EXCLUSIVE : LW_SHARED));
     650                 : 
     651                 :     /* Move to the next bucket if we finished the current bucket */
     652         1910365 :     while (!DsaPointerIsValid(next_item_pointer))
     653                 :     {
     654                 :         int         next_partition;
     655                 : 
     656         1651840 :         if (++status->curbucket >= status->nbuckets)
     657                 :         {
     658                 :             /* all buckets have been scanned. finish. */
     659            1462 :             return NULL;
     660                 :         }
     661                 : 
     662                 :         /* Check if move to the next partition */
     663         1650378 :         next_partition =
     664         1650378 :             PARTITION_FOR_BUCKET_INDEX(status->curbucket,
     665                 :                                        status->hash_table->size_log2);
     666                 : 
     667         1650378 :         if (status->curpartition != next_partition)
     668                 :         {
     669                 :             /*
     670                 :              * Move to the next partition. Lock the next partition then
     671                 :              * release the current, not in the reverse order to avoid
     672                 :              * concurrent resizing.  Avoid dead lock by taking lock in the
     673                 :              * same order with resize().
     674                 :              */
     675          185674 :             LWLockAcquire(PARTITION_LOCK(status->hash_table,
     676                 :                                          next_partition),
     677          185674 :                           status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
     678          185674 :             LWLockRelease(PARTITION_LOCK(status->hash_table,
     679                 :                                          status->curpartition));
     680          185674 :             status->curpartition = next_partition;
     681                 :         }
     682                 : 
     683         1650378 :         next_item_pointer = status->hash_table->buckets[status->curbucket];
     684                 :     }
     685                 : 
     686          258525 :     status->curitem =
     687          258525 :         dsa_get_address(status->hash_table->area, next_item_pointer);
     688                 : 
     689                 :     /*
     690                 :      * The caller may delete the item. Store the next item in case of
     691                 :      * deletion.
     692                 :      */
     693          258525 :     status->pnextitem = status->curitem->next;
     694                 : 
     695          258525 :     return ENTRY_FROM_ITEM(status->curitem);
     696                 : }
     697                 : 
     698                 : /*
     699                 :  * Terminates the seqscan and release all locks.
     700                 :  *
     701                 :  * Needs to be called after finishing or when exiting a seqscan.
     702                 :  */
     703                 : void
     704            1462 : dshash_seq_term(dshash_seq_status *status)
     705                 : {
     706            1462 :     if (status->curpartition >= 0)
     707            1462 :         LWLockRelease(PARTITION_LOCK(status->hash_table, status->curpartition));
     708            1462 : }
     709                 : 
     710                 : /*
     711                 :  * Remove the current entry of the seq scan.
     712                 :  */
     713                 : void
     714            1061 : dshash_delete_current(dshash_seq_status *status)
     715                 : {
     716            1061 :     dshash_table *hash_table = status->hash_table;
     717            1061 :     dshash_table_item *item = status->curitem;
     718                 :     size_t      partition PG_USED_FOR_ASSERTS_ONLY;
     719                 : 
     720            1061 :     partition = PARTITION_FOR_HASH(item->hash);
     721                 : 
     722            1061 :     Assert(status->exclusive);
     723            1061 :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     724            1061 :     Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
     725                 :                                 LW_EXCLUSIVE));
     726                 : 
     727            1061 :     delete_item(hash_table, item);
     728            1061 : }
     729                 : 
     730                 : /*
     731                 :  * Print debugging information about the internal state of the hash table to
     732                 :  * stderr.  The caller must hold no partition locks.
     733                 :  */
     734                 : void
     735 UBC           0 : dshash_dump(dshash_table *hash_table)
     736                 : {
     737                 :     size_t      i;
     738                 :     size_t      j;
     739                 : 
     740               0 :     Assert(hash_table->control->magic == DSHASH_MAGIC);
     741               0 :     ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table);
     742                 : 
     743               0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     744                 :     {
     745               0 :         Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
     746               0 :         LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED);
     747                 :     }
     748                 : 
     749               0 :     ensure_valid_bucket_pointers(hash_table);
     750                 : 
     751               0 :     fprintf(stderr,
     752               0 :             "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2);
     753               0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     754                 :     {
     755               0 :         dshash_partition *partition = &hash_table->control->partitions[i];
     756               0 :         size_t      begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2);
     757               0 :         size_t      end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2);
     758                 : 
     759               0 :         fprintf(stderr, "  partition %zu\n", i);
     760               0 :         fprintf(stderr,
     761                 :                 "    active buckets (key count = %zu)\n", partition->count);
     762                 : 
     763               0 :         for (j = begin; j < end; ++j)
     764                 :         {
     765               0 :             size_t      count = 0;
     766               0 :             dsa_pointer bucket = hash_table->buckets[j];
     767                 : 
     768               0 :             while (DsaPointerIsValid(bucket))
     769                 :             {
     770                 :                 dshash_table_item *item;
     771                 : 
     772               0 :                 item = dsa_get_address(hash_table->area, bucket);
     773                 : 
     774               0 :                 bucket = item->next;
     775               0 :                 ++count;
     776                 :             }
     777               0 :             fprintf(stderr, "      bucket %zu (key count = %zu)\n", j, count);
     778                 :         }
     779                 :     }
     780                 : 
     781               0 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     782               0 :         LWLockRelease(PARTITION_LOCK(hash_table, i));
     783               0 : }
     784                 : 
     785                 : /*
     786                 :  * Delete a locked item to which we have a pointer.
     787                 :  */
     788                 : static void
     789 CBC       29157 : delete_item(dshash_table *hash_table, dshash_table_item *item)
     790                 : {
     791           29157 :     size_t      hash = item->hash;
     792           29157 :     size_t      partition = PARTITION_FOR_HASH(hash);
     793                 : 
     794           29157 :     Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition)));
     795                 : 
     796           29157 :     if (delete_item_from_bucket(hash_table, item,
     797           29157 :                                 &BUCKET_FOR_HASH(hash_table, hash)))
     798                 :     {
     799           29157 :         Assert(hash_table->control->partitions[partition].count > 0);
     800           29157 :         --hash_table->control->partitions[partition].count;
     801                 :     }
     802                 :     else
     803                 :     {
     804 UBC           0 :         Assert(false);
     805                 :     }
     806 CBC       29157 : }
     807                 : 
     808                 : /*
     809                 :  * Grow the hash table if necessary to the requested number of buckets.  The
     810                 :  * requested size must be double some previously observed size.
     811                 :  *
     812                 :  * Must be called without any partition lock held.
     813                 :  */
     814                 : static void
     815            3664 : resize(dshash_table *hash_table, size_t new_size_log2)
     816                 : {
     817                 :     dsa_pointer old_buckets;
     818                 :     dsa_pointer new_buckets_shared;
     819                 :     dsa_pointer *new_buckets;
     820                 :     size_t      size;
     821            3664 :     size_t      new_size = ((size_t) 1) << new_size_log2;
     822                 :     size_t      i;
     823                 : 
     824                 :     /*
     825                 :      * Acquire the locks for all lock partitions.  This is expensive, but we
     826                 :      * shouldn't have to do it many times.
     827                 :      */
     828          472656 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     829                 :     {
     830          468992 :         Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
     831                 : 
     832          468992 :         LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_EXCLUSIVE);
     833          468992 :         if (i == 0 && hash_table->control->size_log2 >= new_size_log2)
     834                 :         {
     835                 :             /*
     836                 :              * Another backend has already increased the size; we can avoid
     837                 :              * obtaining all the locks and return early.
     838                 :              */
     839 UBC           0 :             LWLockRelease(PARTITION_LOCK(hash_table, 0));
     840               0 :             return;
     841                 :         }
     842                 :     }
     843                 : 
     844 CBC        3664 :     Assert(new_size_log2 == hash_table->control->size_log2 + 1);
     845                 : 
     846                 :     /* Allocate the space for the new table. */
     847            3664 :     new_buckets_shared = dsa_allocate0(hash_table->area,
     848                 :                                        sizeof(dsa_pointer) * new_size);
     849            3664 :     new_buckets = dsa_get_address(hash_table->area, new_buckets_shared);
     850                 : 
     851                 :     /*
     852                 :      * We've allocated the new bucket array; all that remains to do now is to
     853                 :      * reinsert all items, which amounts to adjusting all the pointers.
     854                 :      */
     855            3664 :     size = ((size_t) 1) << hash_table->control->size_log2;
     856         1565904 :     for (i = 0; i < size; ++i)
     857                 :     {
     858         1562240 :         dsa_pointer item_pointer = hash_table->buckets[i];
     859                 : 
     860         1725682 :         while (DsaPointerIsValid(item_pointer))
     861                 :         {
     862                 :             dshash_table_item *item;
     863                 :             dsa_pointer next_item_pointer;
     864                 : 
     865          163442 :             item = dsa_get_address(hash_table->area, item_pointer);
     866          163442 :             next_item_pointer = item->next;
     867          163442 :             insert_item_into_bucket(hash_table, item_pointer, item,
     868          163442 :                                     &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash,
     869                 :                                                                                 new_size_log2)]);
     870          163442 :             item_pointer = next_item_pointer;
     871                 :         }
     872                 :     }
     873                 : 
     874                 :     /* Swap the hash table into place and free the old one. */
     875            3664 :     old_buckets = hash_table->control->buckets;
     876            3664 :     hash_table->control->buckets = new_buckets_shared;
     877            3664 :     hash_table->control->size_log2 = new_size_log2;
     878            3664 :     hash_table->buckets = new_buckets;
     879            3664 :     dsa_free(hash_table->area, old_buckets);
     880                 : 
     881                 :     /* Release all the locks. */
     882          472656 :     for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
     883          468992 :         LWLockRelease(PARTITION_LOCK(hash_table, i));
     884                 : }
     885                 : 
     886                 : /*
     887                 :  * Make sure that our backend-local bucket pointers are up to date.  The
     888                 :  * caller must have locked one lock partition, which prevents resize() from
     889                 :  * running concurrently.
     890                 :  */
     891                 : static inline void
     892         1115600 : ensure_valid_bucket_pointers(dshash_table *hash_table)
     893                 : {
     894         1115600 :     if (hash_table->size_log2 != hash_table->control->size_log2)
     895                 :     {
     896           32594 :         hash_table->buckets = dsa_get_address(hash_table->area,
     897           16297 :                                               hash_table->control->buckets);
     898           16297 :         hash_table->size_log2 = hash_table->control->size_log2;
     899                 :     }
     900         1115600 : }
     901                 : 
     902                 : /*
     903                 :  * Scan a locked bucket for a match, using the provided compare function.
     904                 :  */
     905                 : static inline dshash_table_item *
     906         1114030 : find_in_bucket(dshash_table *hash_table, const void *key,
     907                 :                dsa_pointer item_pointer)
     908                 : {
     909         1271555 :     while (DsaPointerIsValid(item_pointer))
     910                 :     {
     911                 :         dshash_table_item *item;
     912                 : 
     913          627502 :         item = dsa_get_address(hash_table->area, item_pointer);
     914          627502 :         if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
     915          469977 :             return item;
     916          157525 :         item_pointer = item->next;
     917                 :     }
     918          644053 :     return NULL;
     919                 : }
     920                 : 
     921                 : /*
     922                 :  * Insert an already-allocated item into a bucket.
     923                 :  */
     924                 : static void
     925          464693 : insert_item_into_bucket(dshash_table *hash_table,
     926                 :                         dsa_pointer item_pointer,
     927                 :                         dshash_table_item *item,
     928                 :                         dsa_pointer *bucket)
     929                 : {
     930          464693 :     Assert(item == dsa_get_address(hash_table->area, item_pointer));
     931                 : 
     932          464693 :     item->next = *bucket;
     933          464693 :     *bucket = item_pointer;
     934          464693 : }
     935                 : 
     936                 : /*
     937                 :  * Allocate space for an entry with the given key and insert it into the
     938                 :  * provided bucket.
     939                 :  */
     940                 : static dshash_table_item *
     941          301251 : insert_into_bucket(dshash_table *hash_table,
     942                 :                    const void *key,
     943                 :                    dsa_pointer *bucket)
     944                 : {
     945                 :     dsa_pointer item_pointer;
     946                 :     dshash_table_item *item;
     947                 : 
     948          301251 :     item_pointer = dsa_allocate(hash_table->area,
     949                 :                                 hash_table->params.entry_size +
     950                 :                                 MAXALIGN(sizeof(dshash_table_item)));
     951          301251 :     item = dsa_get_address(hash_table->area, item_pointer);
     952          301251 :     memcpy(ENTRY_FROM_ITEM(item), key, hash_table->params.key_size);
     953          301251 :     insert_item_into_bucket(hash_table, item_pointer, item, bucket);
     954          301251 :     return item;
     955                 : }
     956                 : 
     957                 : /*
     958                 :  * Search a bucket for a matching key and delete it.
     959                 :  */
     960                 : static bool
     961             108 : delete_key_from_bucket(dshash_table *hash_table,
     962                 :                        const void *key,
     963                 :                        dsa_pointer *bucket_head)
     964                 : {
     965             108 :     while (DsaPointerIsValid(*bucket_head))
     966                 :     {
     967                 :         dshash_table_item *item;
     968                 : 
     969              76 :         item = dsa_get_address(hash_table->area, *bucket_head);
     970                 : 
     971              76 :         if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
     972                 :         {
     973                 :             dsa_pointer next;
     974                 : 
     975              76 :             next = item->next;
     976              76 :             dsa_free(hash_table->area, *bucket_head);
     977              76 :             *bucket_head = next;
     978                 : 
     979              76 :             return true;
     980                 :         }
     981 UBC           0 :         bucket_head = &item->next;
     982                 :     }
     983 CBC          32 :     return false;
     984                 : }
     985                 : 
     986                 : /*
     987                 :  * Delete the specified item from the bucket.
     988                 :  */
     989                 : static bool
     990           29157 : delete_item_from_bucket(dshash_table *hash_table,
     991                 :                         dshash_table_item *item,
     992                 :                         dsa_pointer *bucket_head)
     993                 : {
     994           29331 :     while (DsaPointerIsValid(*bucket_head))
     995                 :     {
     996                 :         dshash_table_item *bucket_item;
     997                 : 
     998           29331 :         bucket_item = dsa_get_address(hash_table->area, *bucket_head);
     999                 : 
    1000           29331 :         if (bucket_item == item)
    1001                 :         {
    1002                 :             dsa_pointer next;
    1003                 : 
    1004           29157 :             next = item->next;
    1005           29157 :             dsa_free(hash_table->area, *bucket_head);
    1006           29157 :             *bucket_head = next;
    1007           29157 :             return true;
    1008                 :         }
    1009             174 :         bucket_head = &bucket_item->next;
    1010                 :     }
    1011 UBC           0 :     return false;
    1012                 : }
    1013                 : 
    1014                 : /*
    1015                 :  * Compute the hash value for a key.
    1016                 :  */
    1017                 : static inline dshash_hash
    1018 CBC     1110474 : hash_key(dshash_table *hash_table, const void *key)
    1019                 : {
    1020         1110474 :     return hash_table->params.hash_function(key,
    1021                 :                                             hash_table->params.key_size,
    1022                 :                                             hash_table->arg);
    1023                 : }
    1024                 : 
    1025                 : /*
    1026                 :  * Check whether two keys compare equal.
    1027                 :  */
    1028                 : static inline bool
    1029          627578 : equal_keys(dshash_table *hash_table, const void *a, const void *b)
    1030                 : {
    1031          627578 :     return hash_table->params.compare_function(a, b,
    1032                 :                                                hash_table->params.key_size,
    1033          627578 :                                                hash_table->arg) == 0;
    1034                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a