LCOV - differential code coverage report
Current view: top level - src/backend/utils/hash - dynahash.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 80.7 % 522 421 21 51 29 14 263 1 143 58 254 3
Current Date: 2023-04-08 15:15:32 Functions: 91.4 % 35 32 3 31 1 3 31
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * dynahash.c
       4                 :  *    dynamic chained hash tables
       5                 :  *
       6                 :  * dynahash.c supports both local-to-a-backend hash tables and hash tables in
       7                 :  * shared memory.  For shared hash tables, it is the caller's responsibility
       8                 :  * to provide appropriate access interlocking.  The simplest convention is
       9                 :  * that a single LWLock protects the whole hash table.  Searches (HASH_FIND or
      10                 :  * hash_seq_search) need only shared lock, but any update requires exclusive
      11                 :  * lock.  For heavily-used shared tables, the single-lock approach creates a
      12                 :  * concurrency bottleneck, so we also support "partitioned" locking wherein
      13                 :  * there are multiple LWLocks guarding distinct subsets of the table.  To use
      14                 :  * a hash table in partitioned mode, the HASH_PARTITION flag must be given
      15                 :  * to hash_create.  This prevents any attempt to split buckets on-the-fly.
      16                 :  * Therefore, each hash bucket chain operates independently, and no fields
      17                 :  * of the hash header change after init except nentries and freeList.
      18                 :  * (A partitioned table uses multiple copies of those fields, guarded by
      19                 :  * spinlocks, for additional concurrency.)
      20                 :  * This lets any subset of the hash buckets be treated as a separately
      21                 :  * lockable partition.  We expect callers to use the low-order bits of a
      22                 :  * lookup key's hash value as a partition number --- this will work because
      23                 :  * of the way calc_bucket() maps hash values to bucket numbers.
      24                 :  *
      25                 :  * For hash tables in shared memory, the memory allocator function should
      26                 :  * match malloc's semantics of returning NULL on failure.  For hash tables
      27                 :  * in local memory, we typically use palloc() which will throw error on
      28                 :  * failure.  The code in this file has to cope with both cases.
      29                 :  *
      30                 :  * dynahash.c provides support for these types of lookup keys:
      31                 :  *
      32                 :  * 1. Null-terminated C strings (truncated if necessary to fit in keysize),
      33                 :  * compared as though by strcmp().  This is selected by specifying the
      34                 :  * HASH_STRINGS flag to hash_create.
      35                 :  *
      36                 :  * 2. Arbitrary binary data of size keysize, compared as though by memcmp().
      37                 :  * (Caller must ensure there are no undefined padding bits in the keys!)
      38                 :  * This is selected by specifying the HASH_BLOBS flag to hash_create.
      39                 :  *
      40                 :  * 3. More complex key behavior can be selected by specifying user-supplied
      41                 :  * hashing, comparison, and/or key-copying functions.  At least a hashing
      42                 :  * function must be supplied; comparison defaults to memcmp() and key copying
      43                 :  * to memcpy() when a user-defined hashing function is selected.
      44                 :  *
      45                 :  * Compared to simplehash, dynahash has the following benefits:
      46                 :  *
      47                 :  * - It supports partitioning, which is useful for shared memory access using
      48                 :  *   locks.
      49                 :  * - Shared memory hashes are allocated in a fixed size area at startup and
      50                 :  *   are discoverable by name from other processes.
      51                 :  * - Because entries don't need to be moved in the case of hash conflicts,
      52                 :  *   dynahash has better performance for large entries.
      53                 :  * - Guarantees stable pointers to entries.
      54                 :  *
      55                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      56                 :  * Portions Copyright (c) 1994, Regents of the University of California
      57                 :  *
      58                 :  *
      59                 :  * IDENTIFICATION
      60                 :  *    src/backend/utils/hash/dynahash.c
      61                 :  *
      62                 :  *-------------------------------------------------------------------------
      63                 :  */
      64                 : 
      65                 : /*
      66                 :  * Original comments:
      67                 :  *
      68                 :  * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
      69                 :  * Coded into C, with minor code improvements, and with hsearch(3) interface,
      70                 :  * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
      71                 :  * also, hcreate/hdestroy routines added to simulate hsearch(3).
      72                 :  *
      73                 :  * These routines simulate hsearch(3) and family, with the important
      74                 :  * difference that the hash table is dynamic - can grow indefinitely
      75                 :  * beyond its original size (as supplied to hcreate()).
      76                 :  *
      77                 :  * Performance appears to be comparable to that of hsearch(3).
      78                 :  * The 'source-code' options referred to in hsearch(3)'s 'man' page
      79                 :  * are not implemented; otherwise functionality is identical.
      80                 :  *
      81                 :  * Compilation controls:
      82                 :  * HASH_DEBUG controls some informative traces, mainly for debugging.
      83                 :  * HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained;
      84                 :  * when combined with HASH_DEBUG, these are displayed by hdestroy().
      85                 :  *
      86                 :  * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor
      87                 :  * concatenation property, in probably unnecessary code 'optimization'.
      88                 :  *
      89                 :  * Modified margo@postgres.berkeley.edu February 1990
      90                 :  *      added multiple table interface
      91                 :  * Modified by sullivan@postgres.berkeley.edu April 1990
      92                 :  *      changed ctl structure for shared memory
      93                 :  */
      94                 : 
      95                 : #include "postgres.h"
      96                 : 
      97                 : #include <limits.h>
      98                 : 
      99                 : #include "access/xact.h"
     100                 : #include "common/hashfn.h"
     101                 : #include "port/pg_bitutils.h"
     102                 : #include "storage/shmem.h"
     103                 : #include "storage/spin.h"
     104                 : #include "utils/dynahash.h"
     105                 : #include "utils/memutils.h"
     106                 : 
     107                 : 
     108                 : /*
     109                 :  * Constants
     110                 :  *
     111                 :  * A hash table has a top-level "directory", each of whose entries points
     112                 :  * to a "segment" of ssize bucket headers.  The maximum number of hash
     113                 :  * buckets is thus dsize * ssize (but dsize may be expansible).  Of course,
     114                 :  * the number of records in the table can be larger, but we don't want a
     115                 :  * whole lot of records per bucket or performance goes down.
     116                 :  *
     117                 :  * In a hash table allocated in shared memory, the directory cannot be
     118                 :  * expanded because it must stay at a fixed address.  The directory size
     119                 :  * should be selected using hash_select_dirsize (and you'd better have
     120                 :  * a good idea of the maximum number of entries!).  For non-shared hash
     121                 :  * tables, the initial directory size can be left at the default.
     122                 :  */
     123                 : #define DEF_SEGSIZE            256
     124                 : #define DEF_SEGSIZE_SHIFT      8    /* must be log2(DEF_SEGSIZE) */
     125                 : #define DEF_DIRSIZE            256
     126                 : 
     127                 : /* Number of freelists to be used for a partitioned hash table. */
     128                 : #define NUM_FREELISTS           32
     129                 : 
     130                 : /* A hash bucket is a linked list of HASHELEMENTs */
     131                 : typedef HASHELEMENT *HASHBUCKET;
     132                 : 
     133                 : /* A hash segment is an array of bucket headers */
     134                 : typedef HASHBUCKET *HASHSEGMENT;
     135                 : 
     136                 : /*
     137                 :  * Per-freelist data.
     138                 :  *
     139                 :  * In a partitioned hash table, each freelist is associated with a specific
     140                 :  * set of hashcodes, as determined by the FREELIST_IDX() macro below.
     141                 :  * nentries tracks the number of live hashtable entries having those hashcodes
     142                 :  * (NOT the number of entries in the freelist, as you might expect).
     143                 :  *
     144                 :  * The coverage of a freelist might be more or less than one partition, so it
     145                 :  * needs its own lock rather than relying on caller locking.  Relying on that
     146                 :  * wouldn't work even if the coverage was the same, because of the occasional
     147                 :  * need to "borrow" entries from another freelist; see get_hash_entry().
     148                 :  *
     149                 :  * Using an array of FreeListData instead of separate arrays of mutexes,
     150                 :  * nentries and freeLists helps to reduce sharing of cache lines between
     151                 :  * different mutexes.
     152                 :  */
     153                 : typedef struct
     154                 : {
     155                 :     slock_t     mutex;          /* spinlock for this freelist */
     156                 :     long        nentries;       /* number of entries in associated buckets */
     157                 :     HASHELEMENT *freeList;      /* chain of free elements */
     158                 : } FreeListData;
     159                 : 
     160                 : /*
     161                 :  * Header structure for a hash table --- contains all changeable info
     162                 :  *
     163                 :  * In a shared-memory hash table, the HASHHDR is in shared memory, while
     164                 :  * each backend has a local HTAB struct.  For a non-shared table, there isn't
     165                 :  * any functional difference between HASHHDR and HTAB, but we separate them
     166                 :  * anyway to share code between shared and non-shared tables.
     167                 :  */
     168                 : struct HASHHDR
     169                 : {
     170                 :     /*
     171                 :      * The freelist can become a point of contention in high-concurrency hash
     172                 :      * tables, so we use an array of freelists, each with its own mutex and
     173                 :      * nentries count, instead of just a single one.  Although the freelists
     174                 :      * normally operate independently, we will scavenge entries from freelists
     175                 :      * other than a hashcode's default freelist when necessary.
     176                 :      *
     177                 :      * If the hash table is not partitioned, only freeList[0] is used and its
     178                 :      * spinlock is not used at all; callers' locking is assumed sufficient.
     179                 :      */
     180                 :     FreeListData freeList[NUM_FREELISTS];
     181                 : 
     182                 :     /* These fields can change, but not in a partitioned table */
     183                 :     /* Also, dsize can't change in a shared table, even if unpartitioned */
     184                 :     long        dsize;          /* directory size */
     185                 :     long        nsegs;          /* number of allocated segments (<= dsize) */
     186                 :     uint32      max_bucket;     /* ID of maximum bucket in use */
     187                 :     uint32      high_mask;      /* mask to modulo into entire table */
     188                 :     uint32      low_mask;       /* mask to modulo into lower half of table */
     189                 : 
     190                 :     /* These fields are fixed at hashtable creation */
     191                 :     Size        keysize;        /* hash key length in bytes */
     192                 :     Size        entrysize;      /* total user element size in bytes */
     193                 :     long        num_partitions; /* # partitions (must be power of 2), or 0 */
     194                 :     long        max_dsize;      /* 'dsize' limit if directory is fixed size */
     195                 :     long        ssize;          /* segment size --- must be power of 2 */
     196                 :     int         sshift;         /* segment shift = log2(ssize) */
     197                 :     int         nelem_alloc;    /* number of entries to allocate at once */
     198                 : 
     199                 : #ifdef HASH_STATISTICS
     200                 : 
     201                 :     /*
     202                 :      * Count statistics here.  NB: stats code doesn't bother with mutex, so
     203                 :      * counts could be corrupted a bit in a partitioned table.
     204                 :      */
     205                 :     long        accesses;
     206                 :     long        collisions;
     207                 : #endif
     208                 : };
     209                 : 
     210                 : #define IS_PARTITIONED(hctl)  ((hctl)->num_partitions != 0)
     211                 : 
     212                 : #define FREELIST_IDX(hctl, hashcode) \
     213                 :     (IS_PARTITIONED(hctl) ? (hashcode) % NUM_FREELISTS : 0)
     214                 : 
     215                 : /*
     216                 :  * Top control structure for a hashtable --- in a shared table, each backend
     217                 :  * has its own copy (OK since no fields change at runtime)
     218                 :  */
     219                 : struct HTAB
     220                 : {
     221                 :     HASHHDR    *hctl;           /* => shared control information */
     222                 :     HASHSEGMENT *dir;           /* directory of segment starts */
     223                 :     HashValueFunc hash;         /* hash function */
     224                 :     HashCompareFunc match;      /* key comparison function */
     225                 :     HashCopyFunc keycopy;       /* key copying function */
     226                 :     HashAllocFunc alloc;        /* memory allocator */
     227                 :     MemoryContext hcxt;         /* memory context if default allocator used */
     228                 :     char       *tabname;        /* table name (for error messages) */
     229                 :     bool        isshared;       /* true if table is in shared memory */
     230                 :     bool        isfixed;        /* if true, don't enlarge */
     231                 : 
     232                 :     /* freezing a shared table isn't allowed, so we can keep state here */
     233                 :     bool        frozen;         /* true = no more inserts allowed */
     234                 : 
     235                 :     /* We keep local copies of these fixed values to reduce contention */
     236                 :     Size        keysize;        /* hash key length in bytes */
     237                 :     long        ssize;          /* segment size --- must be power of 2 */
     238                 :     int         sshift;         /* segment shift = log2(ssize) */
     239                 : };
     240                 : 
     241                 : /*
     242                 :  * Key (also entry) part of a HASHELEMENT
     243                 :  */
     244                 : #define ELEMENTKEY(helem)  (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))
     245                 : 
     246                 : /*
     247                 :  * Obtain element pointer given pointer to key
     248                 :  */
     249                 : #define ELEMENT_FROM_KEY(key)  \
     250                 :     ((HASHELEMENT *) (((char *) (key)) - MAXALIGN(sizeof(HASHELEMENT))))
     251                 : 
     252                 : /*
     253                 :  * Fast MOD arithmetic, assuming that y is a power of 2 !
     254                 :  */
     255                 : #define MOD(x,y)               ((x) & ((y)-1))
     256                 : 
     257                 : #ifdef HASH_STATISTICS
     258                 : static long hash_accesses,
     259                 :             hash_collisions,
     260                 :             hash_expansions;
     261                 : #endif
     262                 : 
     263                 : /*
     264                 :  * Private function prototypes
     265                 :  */
     266                 : static void *DynaHashAlloc(Size size);
     267                 : static HASHSEGMENT seg_alloc(HTAB *hashp);
     268                 : static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx);
     269                 : static bool dir_realloc(HTAB *hashp);
     270                 : static bool expand_table(HTAB *hashp);
     271                 : static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);
     272                 : static void hdefault(HTAB *hashp);
     273                 : static int  choose_nelem_alloc(Size entrysize);
     274                 : static bool init_htab(HTAB *hashp, long nelem);
     275                 : static void hash_corrupted(HTAB *hashp);
     276                 : static long next_pow2_long(long num);
     277                 : static int  next_pow2_int(long num);
     278                 : static void register_seq_scan(HTAB *hashp);
     279                 : static void deregister_seq_scan(HTAB *hashp);
     280                 : static bool has_seq_scans(HTAB *hashp);
     281                 : 
     282                 : 
     283                 : /*
     284                 :  * memory allocation support
     285                 :  */
     286                 : static MemoryContext CurrentDynaHashCxt = NULL;
     287                 : 
     288                 : static void *
     289 CBC     1206562 : DynaHashAlloc(Size size)
     290                 : {
     291         1206562 :     Assert(MemoryContextIsValid(CurrentDynaHashCxt));
     292 GNC     1206562 :     return MemoryContextAllocExtended(CurrentDynaHashCxt, size,
     293                 :                                       MCXT_ALLOC_NO_OOM);
     294                 : }
     295                 : 
     296                 : 
     297                 : /*
     298                 :  * HashCompareFunc for string keys
     299                 :  *
     300                 :  * Because we copy keys with strlcpy(), they will be truncated at keysize-1
     301                 :  * bytes, so we can only compare that many ... hence strncmp is almost but
     302                 :  * not quite the right thing.
     303                 :  */
     304                 : static int
     305 GIC      593000 : string_compare(const char *key1, const char *key2, Size keysize)
     306 ECB             : {
     307 GIC      593000 :     return strncmp(key1, key2, keysize - 1);
     308 ECB             : }
     309                 : 
     310                 : 
     311                 : /************************** CREATE ROUTINES **********************/
     312                 : 
     313                 : /*
     314                 :  * hash_create -- create a new dynamic hash table
     315                 :  *
     316                 :  *  tabname: a name for the table (for debugging purposes)
     317                 :  *  nelem: maximum number of elements expected
     318                 :  *  *info: additional table parameters, as indicated by flags
     319                 :  *  flags: bitmask indicating which parameters to take from *info
     320                 :  *
     321                 :  * The flags value *must* include HASH_ELEM.  (Formerly, this was nominally
     322                 :  * optional, but the default keysize and entrysize values were useless.)
     323                 :  * The flags value must also include exactly one of HASH_STRINGS, HASH_BLOBS,
     324                 :  * or HASH_FUNCTION, to define the key hashing semantics (C strings,
     325                 :  * binary blobs, or custom, respectively).  Callers specifying a custom
     326                 :  * hash function will likely also want to use HASH_COMPARE, and perhaps
     327                 :  * also HASH_KEYCOPY, to control key comparison and copying.
     328                 :  * Another often-used flag is HASH_CONTEXT, to allocate the hash table
     329                 :  * under info->hcxt rather than under TopMemoryContext; the default
     330                 :  * behavior is only suitable for session-lifespan hash tables.
     331                 :  * Other flags bits are special-purpose and seldom used, except for those
     332                 :  * associated with shared-memory hash tables, for which see ShmemInitHash().
     333                 :  *
     334                 :  * Fields in *info are read only when the associated flags bit is set.
     335                 :  * It is not necessary to initialize other fields of *info.
     336                 :  * Neither tabname nor *info need persist after the hash_create() call.
     337                 :  *
     338                 :  * Note: It is deprecated for callers of hash_create() to explicitly specify
     339                 :  * string_hash, tag_hash, uint32_hash, or oid_hash.  Just set HASH_STRINGS or
     340                 :  * HASH_BLOBS.  Use HASH_FUNCTION only when you want something other than
     341                 :  * one of these.
     342                 :  *
     343                 :  * Note: for a shared-memory hashtable, nelem needs to be a pretty good
     344                 :  * estimate, since we can't expand the table on the fly.  But an unshared
     345                 :  * hashtable can be expanded on-the-fly, so it's better for nelem to be
     346                 :  * on the small side and let the table grow if it's exceeded.  An overly
     347                 :  * large nelem will penalize hash_seq_search speed without buying much.
     348                 :  */
     349                 : HTAB *
     350 GIC      227592 : hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
     351 ECB             : {
     352                 :     HTAB       *hashp;
     353                 :     HASHHDR    *hctl;
     354                 : 
     355                 :     /*
     356                 :      * Hash tables now allocate space for key and data, but you have to say
     357                 :      * how much space to allocate.
     358                 :      */
     359 GIC      227592 :     Assert(flags & HASH_ELEM);
     360 CBC      227592 :     Assert(info->keysize > 0);
     361          227592 :     Assert(info->entrysize >= info->keysize);
     362 ECB             : 
     363                 :     /*
     364                 :      * For shared hash tables, we have a local hash header (HTAB struct) that
     365                 :      * we allocate in TopMemoryContext; all else is in shared memory.
     366                 :      *
     367                 :      * For non-shared hash tables, everything including the hash header is in
     368                 :      * a memory context created specially for the hash table --- this makes
     369                 :      * hash_destroy very simple.  The memory context is made a child of either
     370                 :      * a context specified by the caller, or TopMemoryContext if nothing is
     371                 :      * specified.
     372                 :      */
     373 GIC      227592 :     if (flags & HASH_SHARED_MEM)
     374 ECB             :     {
     375                 :         /* Set up to allocate the hash header */
     376 GIC       12785 :         CurrentDynaHashCxt = TopMemoryContext;
     377 ECB             :     }
     378                 :     else
     379                 :     {
     380                 :         /* Create the hash table's private memory context */
     381 GIC      214807 :         if (flags & HASH_CONTEXT)
     382 CBC      128360 :             CurrentDynaHashCxt = info->hcxt;
     383 ECB             :         else
     384 GIC       86447 :             CurrentDynaHashCxt = TopMemoryContext;
     385 CBC      214807 :         CurrentDynaHashCxt = AllocSetContextCreate(CurrentDynaHashCxt,
     386 ECB             :                                                    "dynahash",
     387                 :                                                    ALLOCSET_DEFAULT_SIZES);
     388                 :     }
     389                 : 
     390                 :     /* Initialize the hash header, plus a copy of the table name */
     391 GIC      227592 :     hashp = (HTAB *) DynaHashAlloc(sizeof(HTAB) + strlen(tabname) + 1);
     392 CBC     2958696 :     MemSet(hashp, 0, sizeof(HTAB));
     393 ECB             : 
     394 GIC      227592 :     hashp->tabname = (char *) (hashp + 1);
     395 CBC      227592 :     strcpy(hashp->tabname, tabname);
     396 ECB             : 
     397                 :     /* If we have a private context, label it with hashtable's name */
     398 GIC      227592 :     if (!(flags & HASH_SHARED_MEM))
     399 CBC      214807 :         MemoryContextSetIdentifier(CurrentDynaHashCxt, hashp->tabname);
     400 ECB             : 
     401                 :     /*
     402                 :      * Select the appropriate hash function (see comments at head of file).
     403                 :      */
     404 GIC      227592 :     if (flags & HASH_FUNCTION)
     405 ECB             :     {
     406 GIC       14270 :         Assert(!(flags & (HASH_BLOBS | HASH_STRINGS)));
     407 CBC       14270 :         hashp->hash = info->hash;
     408 ECB             :     }
     409 GIC      213322 :     else if (flags & HASH_BLOBS)
     410 ECB             :     {
     411 GIC      173060 :         Assert(!(flags & HASH_STRINGS));
     412 ECB             :         /* We can optimize hashing for common key sizes */
     413 GIC      173060 :         if (info->keysize == sizeof(uint32))
     414 CBC       77104 :             hashp->hash = uint32_hash;
     415 ECB             :         else
     416 GIC       95956 :             hashp->hash = tag_hash;
     417 ECB             :     }
     418                 :     else
     419                 :     {
     420                 :         /*
     421                 :          * string_hash used to be considered the default hash method, and in a
     422                 :          * non-assert build it effectively still is.  But we now consider it
     423                 :          * an assertion error to not say HASH_STRINGS explicitly.  To help
     424                 :          * catch mistaken usage of HASH_STRINGS, we also insist on a
     425                 :          * reasonably long string length: if the keysize is only 4 or 8 bytes,
     426                 :          * it's almost certainly an integer or pointer not a string.
     427                 :          */
     428 GIC       40262 :         Assert(flags & HASH_STRINGS);
     429 CBC       40262 :         Assert(info->keysize > 8);
     430 ECB             : 
     431 GIC       40262 :         hashp->hash = string_hash;
     432 ECB             :     }
     433                 : 
     434                 :     /*
     435                 :      * If you don't specify a match function, it defaults to string_compare if
     436                 :      * you used string_hash, and to memcmp otherwise.
     437                 :      *
     438                 :      * Note: explicitly specifying string_hash is deprecated, because this
     439                 :      * might not work for callers in loadable modules on some platforms due to
     440                 :      * referencing a trampoline instead of the string_hash function proper.
     441                 :      * Specify HASH_STRINGS instead.
     442                 :      */
     443 GIC      227592 :     if (flags & HASH_COMPARE)
     444 CBC       10618 :         hashp->match = info->match;
     445          216974 :     else if (hashp->hash == string_hash)
     446           40262 :         hashp->match = (HashCompareFunc) string_compare;
     447 ECB             :     else
     448 GIC      176712 :         hashp->match = memcmp;
     449 ECB             : 
     450                 :     /*
     451                 :      * Similarly, the key-copying function defaults to strlcpy or memcpy.
     452                 :      */
     453 GIC      227592 :     if (flags & HASH_KEYCOPY)
     454 LBC           0 :         hashp->keycopy = info->keycopy;
     455 GBC      227592 :     else if (hashp->hash == string_hash)
     456 ECB             :     {
     457                 :         /*
     458                 :          * The signature of keycopy is meant for memcpy(), which returns
     459                 :          * void*, but strlcpy() returns size_t.  Since we never use the return
     460                 :          * value of keycopy, and size_t is pretty much always the same size as
     461                 :          * void *, this should be safe.  The extra cast in the middle is to
     462                 :          * avoid warnings from -Wcast-function-type.
     463                 :          */
     464 GIC       40262 :         hashp->keycopy = (HashCopyFunc) (pg_funcptr_t) strlcpy;
     465 ECB             :     }
     466                 :     else
     467 GIC      187330 :         hashp->keycopy = memcpy;
     468 ECB             : 
     469                 :     /* And select the entry allocation function, too. */
     470 GIC      227592 :     if (flags & HASH_ALLOC)
     471 CBC       12785 :         hashp->alloc = info->alloc;
     472 ECB             :     else
     473 GIC      214807 :         hashp->alloc = DynaHashAlloc;
     474 ECB             : 
     475 GIC      227592 :     if (flags & HASH_SHARED_MEM)
     476 ECB             :     {
     477                 :         /*
     478                 :          * ctl structure and directory are preallocated for shared memory
     479                 :          * tables.  Note that HASH_DIRSIZE and HASH_ALLOC had better be set as
     480                 :          * well.
     481                 :          */
     482 GIC       12785 :         hashp->hctl = info->hctl;
     483 CBC       12785 :         hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR));
     484           12785 :         hashp->hcxt = NULL;
     485           12785 :         hashp->isshared = true;
     486 ECB             : 
     487                 :         /* hash table already exists, we're just attaching to it */
     488 GIC       12785 :         if (flags & HASH_ATTACH)
     489 ECB             :         {
     490                 :             /* make local copies of some heavily-used values */
     491 UIC           0 :             hctl = hashp->hctl;
     492 UBC           0 :             hashp->keysize = hctl->keysize;
     493               0 :             hashp->ssize = hctl->ssize;
     494               0 :             hashp->sshift = hctl->sshift;
     495 EUB             : 
     496 UIC           0 :             return hashp;
     497 EUB             :         }
     498                 :     }
     499                 :     else
     500                 :     {
     501                 :         /* setup hash table defaults */
     502 GIC      214807 :         hashp->hctl = NULL;
     503 CBC      214807 :         hashp->dir = NULL;
     504          214807 :         hashp->hcxt = CurrentDynaHashCxt;
     505          214807 :         hashp->isshared = false;
     506 ECB             :     }
     507                 : 
     508 GIC      227592 :     if (!hashp->hctl)
     509 ECB             :     {
     510 GIC      214807 :         hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR));
     511 CBC      214807 :         if (!hashp->hctl)
     512 LBC           0 :             ereport(ERROR,
     513 EUB             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     514                 :                      errmsg("out of memory")));
     515                 :     }
     516                 : 
     517 GIC      227592 :     hashp->frozen = false;
     518 ECB             : 
     519 GIC      227592 :     hdefault(hashp);
     520 ECB             : 
     521 GIC      227592 :     hctl = hashp->hctl;
     522 ECB             : 
     523 GIC      227592 :     if (flags & HASH_PARTITION)
     524 ECB             :     {
     525                 :         /* Doesn't make sense to partition a local hash table */
     526 GIC        9130 :         Assert(flags & HASH_SHARED_MEM);
     527 ECB             : 
     528                 :         /*
     529                 :          * The number of partitions had better be a power of 2. Also, it must
     530                 :          * be less than INT_MAX (see init_htab()), so call the int version of
     531                 :          * next_pow2.
     532                 :          */
     533 GIC        9130 :         Assert(info->num_partitions == next_pow2_int(info->num_partitions));
     534 ECB             : 
     535 GIC        9130 :         hctl->num_partitions = info->num_partitions;
     536 ECB             :     }
     537                 : 
     538 GIC      227592 :     if (flags & HASH_SEGMENT)
     539 ECB             :     {
     540 UIC           0 :         hctl->ssize = info->ssize;
     541 UBC           0 :         hctl->sshift = my_log2(info->ssize);
     542 EUB             :         /* ssize had better be a power of 2 */
     543 UIC           0 :         Assert(hctl->ssize == (1L << hctl->sshift));
     544 EUB             :     }
     545                 : 
     546                 :     /*
     547                 :      * SHM hash tables have fixed directory size passed by the caller.
     548                 :      */
     549 GIC      227592 :     if (flags & HASH_DIRSIZE)
     550 ECB             :     {
     551 GIC       12785 :         hctl->max_dsize = info->max_dsize;
     552 CBC       12785 :         hctl->dsize = info->dsize;
     553 ECB             :     }
     554                 : 
     555                 :     /* remember the entry sizes, too */
     556 GIC      227592 :     hctl->keysize = info->keysize;
     557 CBC      227592 :     hctl->entrysize = info->entrysize;
     558 ECB             : 
     559                 :     /* make local copies of heavily-used constant fields */
     560 GIC      227592 :     hashp->keysize = hctl->keysize;
     561 CBC      227592 :     hashp->ssize = hctl->ssize;
     562          227592 :     hashp->sshift = hctl->sshift;
     563 ECB             : 
     564                 :     /* Build the hash directory structure */
     565 GIC      227592 :     if (!init_htab(hashp, nelem))
     566 LBC           0 :         elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname);
     567 EUB             : 
     568                 :     /*
     569                 :      * For a shared hash table, preallocate the requested number of elements.
     570                 :      * This reduces problems with run-time out-of-shared-memory conditions.
     571                 :      *
     572                 :      * For a non-shared hash table, preallocate the requested number of
     573                 :      * elements if it's less than our chosen nelem_alloc.  This avoids wasting
     574                 :      * space if the caller correctly estimates a small table size.
     575                 :      */
     576 GIC      227592 :     if ((flags & HASH_SHARED_MEM) ||
     577 CBC      214807 :         nelem < hctl->nelem_alloc)
     578 ECB             :     {
     579                 :         int         i,
     580                 :                     freelist_partitions,
     581                 :                     nelem_alloc,
     582                 :                     nelem_alloc_first;
     583                 : 
     584                 :         /*
     585                 :          * If hash table is partitioned, give each freelist an equal share of
     586                 :          * the initial allocation.  Otherwise only freeList[0] is used.
     587                 :          */
     588 GIC       75292 :         if (IS_PARTITIONED(hashp->hctl))
     589 CBC        9130 :             freelist_partitions = NUM_FREELISTS;
     590 ECB             :         else
     591 GIC       66162 :             freelist_partitions = 1;
     592 ECB             : 
     593 GIC       75292 :         nelem_alloc = nelem / freelist_partitions;
     594 CBC       75292 :         if (nelem_alloc <= 0)
     595 LBC           0 :             nelem_alloc = 1;
     596 EUB             : 
     597                 :         /*
     598                 :          * Make sure we'll allocate all the requested elements; freeList[0]
     599                 :          * gets the excess if the request isn't divisible by NUM_FREELISTS.
     600                 :          */
     601 GIC       75292 :         if (nelem_alloc * freelist_partitions < nelem)
     602 CBC         307 :             nelem_alloc_first =
     603             307 :                 nelem - nelem_alloc * (freelist_partitions - 1);
     604 ECB             :         else
     605 GIC       74985 :             nelem_alloc_first = nelem_alloc;
     606 ECB             : 
     607 GIC      433614 :         for (i = 0; i < freelist_partitions; i++)
     608 ECB             :         {
     609 GIC      358322 :             int         temp = (i == 0) ? nelem_alloc_first : nelem_alloc;
     610 ECB             : 
     611 GIC      358322 :             if (!element_alloc(hashp, temp, i))
     612 LBC           0 :                 ereport(ERROR,
     613 EUB             :                         (errcode(ERRCODE_OUT_OF_MEMORY),
     614                 :                          errmsg("out of memory")));
     615                 :         }
     616                 :     }
     617                 : 
     618 GIC      227592 :     if (flags & HASH_FIXED_SIZE)
     619 CBC        5478 :         hashp->isfixed = true;
     620          227592 :     return hashp;
     621 ECB             : }
     622                 : 
     623                 : /*
     624                 :  * Set default HASHHDR parameters.
     625                 :  */
     626                 : static void
     627 GIC      227592 : hdefault(HTAB *hashp)
     628 ECB             : {
     629 GIC      227592 :     HASHHDR    *hctl = hashp->hctl;
     630 ECB             : 
     631 GIC    24352344 :     MemSet(hctl, 0, sizeof(HASHHDR));
     632 ECB             : 
     633 GIC      227592 :     hctl->dsize = DEF_DIRSIZE;
     634 CBC      227592 :     hctl->nsegs = 0;
     635 ECB             : 
     636 GIC      227592 :     hctl->num_partitions = 0;    /* not partitioned */
     637 ECB             : 
     638                 :     /* table has no fixed maximum size */
     639 GIC      227592 :     hctl->max_dsize = NO_MAX_DSIZE;
     640 ECB             : 
     641 GIC      227592 :     hctl->ssize = DEF_SEGSIZE;
     642 CBC      227592 :     hctl->sshift = DEF_SEGSIZE_SHIFT;
     643 ECB             : 
     644                 : #ifdef HASH_STATISTICS
     645                 :     hctl->accesses = hctl->collisions = 0;
     646                 : #endif
     647 GIC      227592 : }
     648 ECB             : 
     649                 : /*
     650                 :  * Given the user-specified entry size, choose nelem_alloc, ie, how many
     651                 :  * elements to add to the hash table when we need more.
     652                 :  */
     653                 : static int
     654 GIC      246761 : choose_nelem_alloc(Size entrysize)
     655 ECB             : {
     656                 :     int         nelem_alloc;
     657                 :     Size        elementSize;
     658                 :     Size        allocSize;
     659                 : 
     660                 :     /* Each element has a HASHELEMENT header plus user data. */
     661                 :     /* NB: this had better match element_alloc() */
     662 GIC      246761 :     elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
     663 ECB             : 
     664                 :     /*
     665                 :      * The idea here is to choose nelem_alloc at least 32, but round up so
     666                 :      * that the allocation request will be a power of 2 or just less. This
     667                 :      * makes little difference for hash tables in shared memory, but for hash
     668                 :      * tables managed by palloc, the allocation request will be rounded up to
     669                 :      * a power of 2 anyway.  If we fail to take this into account, we'll waste
     670                 :      * as much as half the allocated space.
     671                 :      */
     672 GIC      246761 :     allocSize = 32 * 4;         /* assume elementSize at least 8 */
     673 ECB             :     do
     674                 :     {
     675 GIC      978326 :         allocSize <<= 1;
     676 CBC      978326 :         nelem_alloc = allocSize / elementSize;
     677          978326 :     } while (nelem_alloc < 32);
     678 ECB             : 
     679 GIC      246761 :     return nelem_alloc;
     680 ECB             : }
     681                 : 
     682                 : /*
     683                 :  * Compute derived fields of hctl and build the initial directory/segment
     684                 :  * arrays
     685                 :  */
     686                 : static bool
     687 GIC      227592 : init_htab(HTAB *hashp, long nelem)
     688 ECB             : {
     689 GIC      227592 :     HASHHDR    *hctl = hashp->hctl;
     690 ECB             :     HASHSEGMENT *segp;
     691                 :     int         nbuckets;
     692                 :     int         nsegs;
     693                 :     int         i;
     694                 : 
     695                 :     /*
     696                 :      * initialize mutexes if it's a partitioned table
     697                 :      */
     698 GIC      227592 :     if (IS_PARTITIONED(hctl))
     699 CBC      301290 :         for (i = 0; i < NUM_FREELISTS; i++)
     700          292160 :             SpinLockInit(&(hctl->freeList[i].mutex));
     701 ECB             : 
     702                 :     /*
     703                 :      * Allocate space for the next greater power of two number of buckets,
     704                 :      * assuming a desired maximum load factor of 1.
     705                 :      */
     706 GIC      227592 :     nbuckets = next_pow2_int(nelem);
     707 ECB             : 
     708                 :     /*
     709                 :      * In a partitioned table, nbuckets must be at least equal to
     710                 :      * num_partitions; were it less, keys with apparently different partition
     711                 :      * numbers would map to the same bucket, breaking partition independence.
     712                 :      * (Normally nbuckets will be much bigger; this is just a safety check.)
     713                 :      */
     714 GIC      227592 :     while (nbuckets < hctl->num_partitions)
     715 LBC           0 :         nbuckets <<= 1;
     716 EUB             : 
     717 GIC      227592 :     hctl->max_bucket = hctl->low_mask = nbuckets - 1;
     718 CBC      227592 :     hctl->high_mask = (nbuckets << 1) - 1;
     719 ECB             : 
     720                 :     /*
     721                 :      * Figure number of directory segments needed, round up to a power of 2
     722                 :      */
     723 GIC      227592 :     nsegs = (nbuckets - 1) / hctl->ssize + 1;
     724 CBC      227592 :     nsegs = next_pow2_int(nsegs);
     725 ECB             : 
     726                 :     /*
     727                 :      * Make sure directory is big enough. If pre-allocated directory is too
     728                 :      * small, choke (caller screwed up).
     729                 :      */
     730 GIC      227592 :     if (nsegs > hctl->dsize)
     731 ECB             :     {
     732 UIC           0 :         if (!(hashp->dir))
     733 UBC           0 :             hctl->dsize = nsegs;
     734 EUB             :         else
     735 UIC           0 :             return false;
     736 EUB             :     }
     737                 : 
     738                 :     /* Allocate a directory */
     739 GIC      227592 :     if (!(hashp->dir))
     740 ECB             :     {
     741 GIC      214807 :         CurrentDynaHashCxt = hashp->hcxt;
     742 CBC      214807 :         hashp->dir = (HASHSEGMENT *)
     743          214807 :             hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT));
     744          214807 :         if (!hashp->dir)
     745 LBC           0 :             return false;
     746 EUB             :     }
     747                 : 
     748                 :     /* Allocate initial segments */
     749 GIC      892240 :     for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
     750 ECB             :     {
     751 GIC      664648 :         *segp = seg_alloc(hashp);
     752 CBC      664648 :         if (*segp == NULL)
     753 LBC           0 :             return false;
     754 EUB             :     }
     755                 : 
     756                 :     /* Choose number of entries to allocate at a time */
     757 GIC      227592 :     hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize);
     758 ECB             : 
     759                 : #ifdef HASH_DEBUG
     760                 :     fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n",
     761                 :             "TABLE POINTER   ", hashp,
     762                 :             "DIRECTORY SIZE  ", hctl->dsize,
     763                 :             "SEGMENT SIZE    ", hctl->ssize,
     764                 :             "SEGMENT SHIFT   ", hctl->sshift,
     765                 :             "MAX BUCKET      ", hctl->max_bucket,
     766                 :             "HIGH MASK       ", hctl->high_mask,
     767                 :             "LOW  MASK       ", hctl->low_mask,
     768                 :             "NSEGS           ", hctl->nsegs);
     769                 : #endif
     770 GIC      227592 :     return true;
     771 ECB             : }
     772                 : 
     773                 : /*
     774                 :  * Estimate the space needed for a hashtable containing the given number
     775                 :  * of entries of given size.
     776                 :  * NOTE: this is used to estimate the footprint of hashtables in shared
     777                 :  * memory; therefore it does not count HTAB which is in local memory.
     778                 :  * NB: assumes that all hash structure parameters have default values!
     779                 :  */
     780                 : Size
     781 GIC       19169 : hash_estimate_size(long num_entries, Size entrysize)
     782 ECB             : {
     783                 :     Size        size;
     784                 :     long        nBuckets,
     785                 :                 nSegments,
     786                 :                 nDirEntries,
     787                 :                 nElementAllocs,
     788                 :                 elementSize,
     789                 :                 elementAllocCnt;
     790                 : 
     791                 :     /* estimate number of buckets wanted */
     792 GIC       19169 :     nBuckets = next_pow2_long(num_entries);
     793 ECB             :     /* # of segments needed for nBuckets */
     794 GIC       19169 :     nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
     795 ECB             :     /* directory entries */
     796 GIC       19169 :     nDirEntries = DEF_DIRSIZE;
     797 CBC       19169 :     while (nDirEntries < nSegments)
     798 LBC           0 :         nDirEntries <<= 1;        /* dir_alloc doubles dsize at each call */
     799 EUB             : 
     800                 :     /* fixed control info */
     801 GIC       19169 :     size = MAXALIGN(sizeof(HASHHDR));   /* but not HTAB, per above */
     802 ECB             :     /* directory */
     803 GIC       19169 :     size = add_size(size, mul_size(nDirEntries, sizeof(HASHSEGMENT)));
     804 ECB             :     /* segments */
     805 GIC       19169 :     size = add_size(size, mul_size(nSegments,
     806 ECB             :                                    MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET))));
     807                 :     /* elements --- allocated in groups of choose_nelem_alloc() entries */
     808 GIC       19169 :     elementAllocCnt = choose_nelem_alloc(entrysize);
     809 CBC       19169 :     nElementAllocs = (num_entries - 1) / elementAllocCnt + 1;
     810           19169 :     elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
     811           19169 :     size = add_size(size,
     812 ECB             :                     mul_size(nElementAllocs,
     813                 :                              mul_size(elementAllocCnt, elementSize)));
     814                 : 
     815 GIC       19169 :     return size;
     816 ECB             : }
     817                 : 
     818                 : /*
     819                 :  * Select an appropriate directory size for a hashtable with the given
     820                 :  * maximum number of entries.
     821                 :  * This is only needed for hashtables in shared memory, whose directories
     822                 :  * cannot be expanded dynamically.
     823                 :  * NB: assumes that all hash structure parameters have default values!
     824                 :  *
     825                 :  * XXX this had better agree with the behavior of init_htab()...
     826                 :  */
     827                 : long
     828 GIC       12785 : hash_select_dirsize(long num_entries)
     829 ECB             : {
     830                 :     long        nBuckets,
     831                 :                 nSegments,
     832                 :                 nDirEntries;
     833                 : 
     834                 :     /* estimate number of buckets wanted */
     835 GIC       12785 :     nBuckets = next_pow2_long(num_entries);
     836 ECB             :     /* # of segments needed for nBuckets */
     837 GIC       12785 :     nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
     838 ECB             :     /* directory entries */
     839 GIC       12785 :     nDirEntries = DEF_DIRSIZE;
     840 CBC       12785 :     while (nDirEntries < nSegments)
     841 LBC           0 :         nDirEntries <<= 1;        /* dir_alloc doubles dsize at each call */
     842 EUB             : 
     843 GIC       12785 :     return nDirEntries;
     844 ECB             : }
     845                 : 
     846                 : /*
     847                 :  * Compute the required initial memory allocation for a shared-memory
     848                 :  * hashtable with the given parameters.  We need space for the HASHHDR
     849                 :  * and for the (non expansible) directory.
     850                 :  */
     851                 : Size
     852 GIC       12785 : hash_get_shared_size(HASHCTL *info, int flags)
     853 ECB             : {
     854 GIC       12785 :     Assert(flags & HASH_DIRSIZE);
     855 CBC       12785 :     Assert(info->dsize == info->max_dsize);
     856           12785 :     return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);
     857 ECB             : }
     858                 : 
     859                 : 
     860                 : /********************** DESTROY ROUTINES ************************/
     861                 : 
     862                 : void
     863 GIC       38331 : hash_destroy(HTAB *hashp)
     864 ECB             : {
     865 GIC       38331 :     if (hashp != NULL)
     866 ECB             :     {
     867                 :         /* allocation method must be one we know how to free, too */
     868 GIC       38331 :         Assert(hashp->alloc == DynaHashAlloc);
     869 ECB             :         /* so this hashtable must have its own context */
     870 GIC       38331 :         Assert(hashp->hcxt != NULL);
     871 ECB             : 
     872 GIC       38331 :         hash_stats("destroy", hashp);
     873 ECB             : 
     874                 :         /*
     875                 :          * Free everything by destroying the hash table's memory context.
     876                 :          */
     877 GIC       38331 :         MemoryContextDelete(hashp->hcxt);
     878 ECB             :     }
     879 GIC       38331 : }
     880 ECB             : 
     881                 : void
     882 GIC       38331 : hash_stats(const char *where, HTAB *hashp)
     883 ECB             : {
     884                 : #ifdef HASH_STATISTICS
     885                 :     fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
     886                 :             where, hashp->hctl->accesses, hashp->hctl->collisions);
     887                 : 
     888                 :     fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n",
     889                 :             hash_get_num_entries(hashp), (long) hashp->hctl->keysize,
     890                 :             hashp->hctl->max_bucket, hashp->hctl->nsegs);
     891                 :     fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
     892                 :             where, hash_accesses, hash_collisions);
     893                 :     fprintf(stderr, "hash_stats: total expansions %ld\n",
     894                 :             hash_expansions);
     895                 : #endif
     896 GIC       38331 : }
     897 ECB             : 
     898                 : /*******************************SEARCH ROUTINES *****************************/
     899                 : 
     900                 : 
     901                 : /*
     902                 :  * get_hash_value -- exported routine to calculate a key's hash value
     903                 :  *
     904                 :  * We export this because for partitioned tables, callers need to compute
     905                 :  * the partition number (from the low-order bits of the hash value) before
     906                 :  * searching.
     907                 :  */
     908                 : uint32
     909 GIC    99338887 : get_hash_value(HTAB *hashp, const void *keyPtr)
     910 ECB             : {
     911 GIC    99338887 :     return hashp->hash(keyPtr, hashp->keysize);
     912 ECB             : }
     913                 : 
     914                 : /* Convert a hash value to a bucket number */
     915                 : static inline uint32
     916 GIC   232941995 : calc_bucket(HASHHDR *hctl, uint32 hash_val)
     917 ECB             : {
     918                 :     uint32      bucket;
     919                 : 
     920 GIC   232941995 :     bucket = hash_val & hctl->high_mask;
     921 CBC   232941995 :     if (bucket > hctl->max_bucket)
     922       105550962 :         bucket = bucket & hctl->low_mask;
     923 ECB             : 
     924 GIC   232941995 :     return bucket;
     925 ECB             : }
     926                 : 
     927                 : /*
     928                 :  * hash_search -- look up key in table and perform action
     929                 :  * hash_search_with_hash_value -- same, with key's hash value already computed
     930                 :  *
     931                 :  * action is one of:
     932                 :  *      HASH_FIND: look up key in table
     933                 :  *      HASH_ENTER: look up key in table, creating entry if not present
     934                 :  *      HASH_ENTER_NULL: same, but return NULL if out of memory
     935                 :  *      HASH_REMOVE: look up key in table, remove entry if present
     936                 :  *
     937                 :  * Return value is a pointer to the element found/entered/removed if any,
     938                 :  * or NULL if no match was found.  (NB: in the case of the REMOVE action,
     939                 :  * the result is a dangling pointer that shouldn't be dereferenced!)
     940                 :  *
     941                 :  * HASH_ENTER will normally ereport a generic "out of memory" error if
     942                 :  * it is unable to create a new entry.  The HASH_ENTER_NULL operation is
     943                 :  * the same except it will return NULL if out of memory.
     944                 :  *
     945                 :  * If foundPtr isn't NULL, then *foundPtr is set true if we found an
     946                 :  * existing entry in the table, false otherwise.  This is needed in the
     947                 :  * HASH_ENTER case, but is redundant with the return value otherwise.
     948                 :  *
     949                 :  * For hash_search_with_hash_value, the hashvalue parameter must have been
     950                 :  * calculated with get_hash_value().
     951                 :  */
     952                 : void *
     953 GIC   145036780 : hash_search(HTAB *hashp,
     954                 :             const void *keyPtr,
     955                 :             HASHACTION action,
     956                 :             bool *foundPtr)
     957 ECB             : {
     958 GIC   145036780 :     return hash_search_with_hash_value(hashp,
     959 ECB             :                                        keyPtr,
     960 GIC   145036780 :                                        hashp->hash(keyPtr, hashp->keysize),
     961                 :                                        action,
     962                 :                                        foundPtr);
     963                 : }
     964                 : 
     965 ECB             : void *
     966 GIC   232374870 : hash_search_with_hash_value(HTAB *hashp,
     967                 :                             const void *keyPtr,
     968                 :                             uint32 hashvalue,
     969                 :                             HASHACTION action,
     970                 :                             bool *foundPtr)
     971 ECB             : {
     972 CBC   232374870 :     HASHHDR    *hctl = hashp->hctl;
     973 GIC   232374870 :     int         freelist_idx = FREELIST_IDX(hctl, hashvalue);
     974                 :     Size        keysize;
     975                 :     uint32      bucket;
     976                 :     long        segment_num;
     977                 :     long        segment_ndx;
     978                 :     HASHSEGMENT segp;
     979                 :     HASHBUCKET  currBucket;
     980                 :     HASHBUCKET *prevBucketPtr;
     981                 :     HashCompareFunc match;
     982                 : 
     983                 : #ifdef HASH_STATISTICS
     984                 :     hash_accesses++;
     985                 :     hctl->accesses++;
     986                 : #endif
     987                 : 
     988                 :     /*
     989                 :      * If inserting, check if it is time to split a bucket.
     990                 :      *
     991                 :      * NOTE: failure to expand table is not a fatal error, it just means we
     992                 :      * have to run at higher fill factor than we wanted.  However, if we're
     993                 :      * using the palloc allocator then it will throw error anyway on
     994                 :      * out-of-memory, so we must do this before modifying the table.
     995 ECB             :      */
     996 GIC   232374870 :     if (action == HASH_ENTER || action == HASH_ENTER_NULL)
     997                 :     {
     998                 :         /*
     999                 :          * Can't split if running in partitioned mode, nor if frozen, nor if
    1000                 :          * table is the subject of any active hash_seq_search scans.
    1001 ECB             :          */
    1002 CBC    55090909 :         if (hctl->freeList[0].nentries > (long) hctl->max_bucket &&
    1003          407775 :             !IS_PARTITIONED(hctl) && !hashp->frozen &&
    1004          407775 :             !has_seq_scans(hashp))
    1005 GIC      407775 :             (void) expand_table(hashp);
    1006                 :     }
    1007                 : 
    1008                 :     /*
    1009                 :      * Do the initial lookup
    1010 ECB             :      */
    1011 GIC   232374870 :     bucket = calc_bucket(hctl, hashvalue);
    1012 ECB             : 
    1013 CBC   232374870 :     segment_num = bucket >> hashp->sshift;
    1014 GIC   232374870 :     segment_ndx = MOD(bucket, hashp->ssize);
    1015 ECB             : 
    1016 GIC   232374870 :     segp = hashp->dir[segment_num];
    1017 ECB             : 
    1018 GBC   232374870 :     if (segp == NULL)
    1019 UIC           0 :         hash_corrupted(hashp);
    1020 ECB             : 
    1021 CBC   232374870 :     prevBucketPtr = &segp[segment_ndx];
    1022 GIC   232374870 :     currBucket = *prevBucketPtr;
    1023                 : 
    1024                 :     /*
    1025                 :      * Follow collision chain looking for matching key
    1026 ECB             :      */
    1027 CBC   232374870 :     match = hashp->match;        /* save one fetch in inner loop */
    1028 GIC   232374870 :     keysize = hashp->keysize;    /* ditto */
    1029 ECB             : 
    1030 GIC   276189852 :     while (currBucket != NULL)
    1031 ECB             :     {
    1032 CBC   409068297 :         if (currBucket->hashvalue == hashvalue &&
    1033       182629469 :             match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
    1034       182623846 :             break;
    1035        43814982 :         prevBucketPtr = &(currBucket->link);
    1036 GIC    43814982 :         currBucket = *prevBucketPtr;
    1037                 : #ifdef HASH_STATISTICS
    1038                 :         hash_collisions++;
    1039                 :         hctl->collisions++;
    1040                 : #endif
    1041                 :     }
    1042 ECB             : 
    1043 CBC   232374870 :     if (foundPtr)
    1044 GIC    56278453 :         *foundPtr = (bool) (currBucket != NULL);
    1045                 : 
    1046                 :     /*
    1047                 :      * OK, now what?
    1048 ECB             :      */
    1049 GIC   232374870 :     switch (action)
    1050 ECB             :     {
    1051 CBC   142500382 :         case HASH_FIND:
    1052       142500382 :             if (currBucket != NULL)
    1053       131844261 :                 return (void *) ELEMENTKEY(currBucket);
    1054 GIC    10656121 :             return NULL;
    1055 ECB             : 
    1056 CBC    34783579 :         case HASH_REMOVE:
    1057 GIC    34783579 :             if (currBucket != NULL)
    1058                 :             {
    1059 ECB             :                 /* if partitioned, must lock to touch nentries and freeList */
    1060 CBC    34781505 :                 if (IS_PARTITIONED(hctl))
    1061 GIC     6527866 :                     SpinLockAcquire(&(hctl->freeList[freelist_idx].mutex));
    1062                 : 
    1063 ECB             :                 /* delete the record from the appropriate nentries counter. */
    1064 CBC    34781505 :                 Assert(hctl->freeList[freelist_idx].nentries > 0);
    1065 GIC    34781505 :                 hctl->freeList[freelist_idx].nentries--;
    1066                 : 
    1067 ECB             :                 /* remove record from hash bucket's chain. */
    1068 GIC    34781505 :                 *prevBucketPtr = currBucket->link;
    1069                 : 
    1070 ECB             :                 /* add the record to the appropriate freelist. */
    1071 CBC    34781505 :                 currBucket->link = hctl->freeList[freelist_idx].freeList;
    1072 GIC    34781505 :                 hctl->freeList[freelist_idx].freeList = currBucket;
    1073 ECB             : 
    1074 CBC    34781505 :                 if (IS_PARTITIONED(hctl))
    1075 GIC     6527866 :                     SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
    1076                 : 
    1077                 :                 /*
    1078                 :                  * better hope the caller is synchronizing access to this
    1079                 :                  * element, because someone else is going to reuse it the next
    1080                 :                  * time something is added to the table
    1081 ECB             :                  */
    1082 GIC    34781505 :                 return (void *) ELEMENTKEY(currBucket);
    1083 ECB             :             }
    1084 GIC        2074 :             return NULL;
    1085 ECB             : 
    1086 GIC    55090909 :         case HASH_ENTER:
    1087                 :         case HASH_ENTER_NULL:
    1088 ECB             :             /* Return existing element if found, else create one */
    1089 GBC    55090909 :             if (currBucket != NULL)
    1090 GIC    15998080 :                 return (void *) ELEMENTKEY(currBucket);
    1091                 : 
    1092 ECB             :             /* disallow inserts if frozen */
    1093 CBC    39092829 :             if (hashp->frozen)
    1094 UIC           0 :                 elog(ERROR, "cannot insert into frozen hashtable \"%s\"",
    1095                 :                      hashp->tabname);
    1096 EUB             : 
    1097 GBC    39092829 :             currBucket = get_hash_entry(hashp, freelist_idx);
    1098 GIC    39092829 :             if (currBucket == NULL)
    1099 EUB             :             {
    1100                 :                 /* out of memory */
    1101 UIC           0 :                 if (action == HASH_ENTER_NULL)
    1102               0 :                     return NULL;
    1103                 :                 /* report a generic message */
    1104 UBC           0 :                 if (hashp->isshared)
    1105 UIC           0 :                     ereport(ERROR,
    1106                 :                             (errcode(ERRCODE_OUT_OF_MEMORY),
    1107                 :                              errmsg("out of shared memory")));
    1108                 :                 else
    1109               0 :                     ereport(ERROR,
    1110 ECB             :                             (errcode(ERRCODE_OUT_OF_MEMORY),
    1111                 :                              errmsg("out of memory")));
    1112                 :             }
    1113                 : 
    1114                 :             /* link into hashbucket chain */
    1115 CBC    39092829 :             *prevBucketPtr = currBucket;
    1116 GIC    39092829 :             currBucket->link = NULL;
    1117                 : 
    1118                 :             /* copy key into record */
    1119        39092829 :             currBucket->hashvalue = hashvalue;
    1120        39092829 :             hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, keysize);
    1121                 : 
    1122                 :             /*
    1123                 :              * Caller is expected to fill the data field on return.  DO NOT
    1124 ECB             :              * insert any code that could possibly throw error here, as doing
    1125                 :              * so would leave the table entry incomplete and hence corrupt the
    1126                 :              * caller's data structure.
    1127 EUB             :              */
    1128                 : 
    1129 GIC    39092829 :             return (void *) ELEMENTKEY(currBucket);
    1130                 :     }
    1131                 : 
    1132 UIC           0 :     elog(ERROR, "unrecognized hash action code: %d", (int) action);
    1133                 : 
    1134                 :     return NULL;                /* keep compiler quiet */
    1135                 : }
    1136                 : 
    1137                 : /*
    1138                 :  * hash_update_hash_key -- change the hash key of an existing table entry
    1139                 :  *
    1140                 :  * This is equivalent to removing the entry, making a new entry, and copying
    1141                 :  * over its data, except that the entry never goes to the table's freelist.
    1142                 :  * Therefore this cannot suffer an out-of-memory failure, even if there are
    1143                 :  * other processes operating in other partitions of the hashtable.
    1144                 :  *
    1145                 :  * Returns true if successful, false if the requested new hash key is already
    1146                 :  * present.  Throws error if the specified entry pointer isn't actually a
    1147                 :  * table member.
    1148                 :  *
    1149                 :  * NB: currently, there is no special case for old and new hash keys being
    1150                 :  * identical, which means we'll report false for that situation.  This is
    1151                 :  * preferable for existing uses.
    1152 ECB             :  *
    1153                 :  * NB: for a partitioned hashtable, caller must hold lock on both relevant
    1154                 :  * partitions, if the new hash key would belong to a different partition.
    1155                 :  */
    1156                 : bool
    1157 CBC         807 : hash_update_hash_key(HTAB *hashp,
    1158                 :                      void *existingEntry,
    1159                 :                      const void *newKeyPtr)
    1160                 : {
    1161 GIC         807 :     HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry);
    1162             807 :     HASHHDR    *hctl = hashp->hctl;
    1163                 :     uint32      newhashvalue;
    1164                 :     Size        keysize;
    1165                 :     uint32      bucket;
    1166                 :     uint32      newbucket;
    1167                 :     long        segment_num;
    1168                 :     long        segment_ndx;
    1169                 :     HASHSEGMENT segp;
    1170                 :     HASHBUCKET  currBucket;
    1171                 :     HASHBUCKET *prevBucketPtr;
    1172                 :     HASHBUCKET *oldPrevPtr;
    1173                 :     HashCompareFunc match;
    1174                 : 
    1175                 : #ifdef HASH_STATISTICS
    1176 ECB             :     hash_accesses++;
    1177 EUB             :     hctl->accesses++;
    1178                 : #endif
    1179                 : 
    1180                 :     /* disallow updates if frozen */
    1181 GIC         807 :     if (hashp->frozen)
    1182 UIC           0 :         elog(ERROR, "cannot update in frozen hashtable \"%s\"",
    1183                 :              hashp->tabname);
    1184                 : 
    1185 ECB             :     /*
    1186                 :      * Lookup the existing element using its saved hash value.  We need to do
    1187                 :      * this to be able to unlink it from its hash chain, but as a side benefit
    1188                 :      * we can verify the validity of the passed existingEntry pointer.
    1189                 :      */
    1190 CBC         807 :     bucket = calc_bucket(hctl, existingElement->hashvalue);
    1191                 : 
    1192             807 :     segment_num = bucket >> hashp->sshift;
    1193 GBC         807 :     segment_ndx = MOD(bucket, hashp->ssize);
    1194                 : 
    1195 CBC         807 :     segp = hashp->dir[segment_num];
    1196 ECB             : 
    1197 GIC         807 :     if (segp == NULL)
    1198 LBC           0 :         hash_corrupted(hashp);
    1199                 : 
    1200 CBC         807 :     prevBucketPtr = &segp[segment_ndx];
    1201             807 :     currBucket = *prevBucketPtr;
    1202 ECB             : 
    1203 CBC         834 :     while (currBucket != NULL)
    1204                 :     {
    1205 GIC         834 :         if (currBucket == existingElement)
    1206 CBC         807 :             break;
    1207 GBC          27 :         prevBucketPtr = &(currBucket->link);
    1208 GIC          27 :         currBucket = *prevBucketPtr;
    1209                 :     }
    1210 ECB             : 
    1211 GIC         807 :     if (currBucket == NULL)
    1212 UIC           0 :         elog(ERROR, "hash_update_hash_key argument is not in hashtable \"%s\"",
    1213                 :              hashp->tabname);
    1214                 : 
    1215 GIC         807 :     oldPrevPtr = prevBucketPtr;
    1216 ECB             : 
    1217                 :     /*
    1218                 :      * Now perform the equivalent of a HASH_ENTER operation to locate the hash
    1219                 :      * chain we want to put the entry into.
    1220                 :      */
    1221 CBC         807 :     newhashvalue = hashp->hash(newKeyPtr, hashp->keysize);
    1222                 : 
    1223             807 :     newbucket = calc_bucket(hctl, newhashvalue);
    1224                 : 
    1225             807 :     segment_num = newbucket >> hashp->sshift;
    1226 GBC         807 :     segment_ndx = MOD(newbucket, hashp->ssize);
    1227                 : 
    1228 CBC         807 :     segp = hashp->dir[segment_num];
    1229 ECB             : 
    1230 GIC         807 :     if (segp == NULL)
    1231 UIC           0 :         hash_corrupted(hashp);
    1232                 : 
    1233 GIC         807 :     prevBucketPtr = &segp[segment_ndx];
    1234 CBC         807 :     currBucket = *prevBucketPtr;
    1235 ECB             : 
    1236                 :     /*
    1237                 :      * Follow collision chain looking for matching key
    1238                 :      */
    1239 CBC         807 :     match = hashp->match;        /* save one fetch in inner loop */
    1240 GBC         807 :     keysize = hashp->keysize;    /* ditto */
    1241 EUB             : 
    1242 CBC         916 :     while (currBucket != NULL)
    1243 ECB             :     {
    1244 GIC         109 :         if (currBucket->hashvalue == newhashvalue &&
    1245 UIC           0 :             match(ELEMENTKEY(currBucket), newKeyPtr, keysize) == 0)
    1246               0 :             break;
    1247 GIC         109 :         prevBucketPtr = &(currBucket->link);
    1248             109 :         currBucket = *prevBucketPtr;
    1249                 : #ifdef HASH_STATISTICS
    1250 ECB             :         hash_collisions++;
    1251 EUB             :         hctl->collisions++;
    1252                 : #endif
    1253 ECB             :     }
    1254                 : 
    1255 GIC         807 :     if (currBucket != NULL)
    1256 UIC           0 :         return false;           /* collision with an existing entry */
    1257                 : 
    1258 GIC         807 :     currBucket = existingElement;
    1259                 : 
    1260                 :     /*
    1261                 :      * If old and new hash values belong to the same bucket, we need not
    1262 ECB             :      * change any chain links, and indeed should not since this simplistic
    1263                 :      * update will corrupt the list if currBucket is the last element.  (We
    1264                 :      * cannot fall out earlier, however, since we need to scan the bucket to
    1265                 :      * check for duplicate keys.)
    1266                 :      */
    1267 GIC         807 :     if (bucket != newbucket)
    1268 ECB             :     {
    1269                 :         /* OK to remove record from old hash bucket's chain. */
    1270 GIC         739 :         *oldPrevPtr = currBucket->link;
    1271                 : 
    1272                 :         /* link into new hashbucket chain */
    1273 CBC         739 :         *prevBucketPtr = currBucket;
    1274             739 :         currBucket->link = NULL;
    1275                 :     }
    1276                 : 
    1277                 :     /* copy new key into record */
    1278             807 :     currBucket->hashvalue = newhashvalue;
    1279 GIC         807 :     hashp->keycopy(ELEMENTKEY(currBucket), newKeyPtr, keysize);
    1280                 : 
    1281                 :     /* rest of record is untouched */
    1282                 : 
    1283             807 :     return true;
    1284                 : }
    1285                 : 
    1286                 : /*
    1287 ECB             :  * Allocate a new hashtable entry if possible; return NULL if out of memory.
    1288                 :  * (Or, if the underlying space allocator throws error for out-of-memory,
    1289                 :  * we won't return at all.)
    1290                 :  */
    1291                 : static HASHBUCKET
    1292 GIC    39092829 : get_hash_entry(HTAB *hashp, int freelist_idx)
    1293                 : {
    1294        39092829 :     HASHHDR    *hctl = hashp->hctl;
    1295 ECB             :     HASHBUCKET  newElement;
    1296                 : 
    1297                 :     for (;;)
    1298                 :     {
    1299                 :         /* if partitioned, must lock to touch nentries and freeList */
    1300 GIC    39314639 :         if (IS_PARTITIONED(hctl))
    1301 CBC     7267667 :             SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
    1302 ECB             : 
    1303                 :         /* try to get an entry from the freelist */
    1304 CBC    39314639 :         newElement = hctl->freeList[freelist_idx].freeList;
    1305 ECB             : 
    1306 GIC    39314639 :         if (newElement != NULL)
    1307        39092829 :             break;
    1308                 : 
    1309          221810 :         if (IS_PARTITIONED(hctl))
    1310            1196 :             SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
    1311                 : 
    1312                 :         /*
    1313                 :          * No free elements in this freelist.  In a partitioned table, there
    1314                 :          * might be entries in other freelists, but to reduce contention we
    1315                 :          * prefer to first try to get another chunk of buckets from the main
    1316                 :          * shmem allocator.  If that fails, though, we *MUST* root through all
    1317                 :          * the other freelists before giving up.  There are multiple callers
    1318                 :          * that assume that they can allocate every element in the initially
    1319 ECB             :          * requested table size, or that deleting an element guarantees they
    1320                 :          * can insert a new element, even if shared memory is entirely full.
    1321                 :          * Failing because the needed element is in a different freelist is
    1322                 :          * not acceptable.
    1323 EUB             :          */
    1324 GBC      221810 :         if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx))
    1325                 :         {
    1326                 :             int         borrow_from_idx;
    1327 EUB             : 
    1328 UIC           0 :             if (!IS_PARTITIONED(hctl))
    1329               0 :                 return NULL;    /* out of memory */
    1330 EUB             : 
    1331                 :             /* try to borrow element from another freelist */
    1332 UBC           0 :             borrow_from_idx = freelist_idx;
    1333                 :             for (;;)
    1334 EUB             :             {
    1335 UBC           0 :                 borrow_from_idx = (borrow_from_idx + 1) % NUM_FREELISTS;
    1336 UIC           0 :                 if (borrow_from_idx == freelist_idx)
    1337 UBC           0 :                     break;      /* examined all freelists, fail */
    1338                 : 
    1339               0 :                 SpinLockAcquire(&(hctl->freeList[borrow_from_idx].mutex));
    1340               0 :                 newElement = hctl->freeList[borrow_from_idx].freeList;
    1341                 : 
    1342 UIC           0 :                 if (newElement != NULL)
    1343 EUB             :                 {
    1344 UBC           0 :                     hctl->freeList[borrow_from_idx].freeList = newElement->link;
    1345               0 :                     SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
    1346                 : 
    1347 EUB             :                     /* careful: count the new element in its proper freelist */
    1348 UIC           0 :                     SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
    1349               0 :                     hctl->freeList[freelist_idx].nentries++;
    1350 UBC           0 :                     SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
    1351                 : 
    1352 UIC           0 :                     return newElement;
    1353                 :                 }
    1354 EUB             : 
    1355 UIC           0 :                 SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
    1356                 :             }
    1357                 : 
    1358                 :             /* no elements available to borrow either, so out of memory */
    1359 LBC           0 :             return NULL;
    1360 ECB             :         }
    1361                 :     }
    1362                 : 
    1363                 :     /* remove entry from freelist, bump nentries */
    1364 GIC    39092829 :     hctl->freeList[freelist_idx].freeList = newElement->link;
    1365 CBC    39092829 :     hctl->freeList[freelist_idx].nentries++;
    1366                 : 
    1367 GIC    39092829 :     if (IS_PARTITIONED(hctl))
    1368         7266471 :         SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
    1369                 : 
    1370        39092829 :     return newElement;
    1371                 : }
    1372 ECB             : 
    1373                 : /*
    1374                 :  * hash_get_num_entries -- get the number of entries in a hashtable
    1375                 :  */
    1376                 : long
    1377 GIC       34950 : hash_get_num_entries(HTAB *hashp)
    1378                 : {
    1379                 :     int         i;
    1380           34950 :     long        sum = hashp->hctl->freeList[0].nentries;
    1381                 : 
    1382 ECB             :     /*
    1383                 :      * We currently don't bother with acquiring the mutexes; it's only
    1384                 :      * sensible to call this function if you've got lock on all partitions of
    1385                 :      * the table.
    1386                 :      */
    1387 GIC       34950 :     if (IS_PARTITIONED(hashp->hctl))
    1388 ECB             :     {
    1389 GIC       72864 :         for (i = 1; i < NUM_FREELISTS; i++)
    1390           70587 :             sum += hashp->hctl->freeList[i].nentries;
    1391                 :     }
    1392                 : 
    1393           34950 :     return sum;
    1394                 : }
    1395                 : 
    1396                 : /*
    1397                 :  * hash_seq_init/_search/_term
    1398                 :  *          Sequentially search through hash table and return
    1399                 :  *          all the elements one by one, return NULL when no more.
    1400                 :  *
    1401                 :  * hash_seq_term should be called if and only if the scan is abandoned before
    1402                 :  * completion; if hash_seq_search returns NULL then it has already done the
    1403                 :  * end-of-scan cleanup.
    1404                 :  *
    1405                 :  * NOTE: caller may delete the returned element before continuing the scan.
    1406                 :  * However, deleting any other element while the scan is in progress is
    1407                 :  * UNDEFINED (it might be the one that curIndex is pointing at!).  Also,
    1408                 :  * if elements are added to the table while the scan is in progress, it is
    1409                 :  * unspecified whether they will be visited by the scan or not.
    1410                 :  *
    1411                 :  * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
    1412                 :  * worry about hash_seq_term cleanup, if the hashtable is first locked against
    1413                 :  * further insertions by calling hash_freeze.
    1414                 :  *
    1415                 :  * NOTE: to use this with a partitioned hashtable, caller had better hold
    1416 ECB             :  * at least shared lock on all partitions of the table throughout the scan!
    1417                 :  * We can cope with insertions or deletions by our own backend, but *not*
    1418                 :  * with concurrent insertions or deletions by another.
    1419                 :  */
    1420                 : void
    1421 CBC     3509197 : hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
    1422 ECB             : {
    1423 CBC     3509197 :     status->hashp = hashp;
    1424 GIC     3509197 :     status->curBucket = 0;
    1425         3509197 :     status->curEntry = NULL;
    1426 CBC     3509197 :     if (!hashp->frozen)
    1427 GIC     3509197 :         register_seq_scan(hashp);
    1428         3509197 : }
    1429                 : 
    1430                 : void *
    1431        40463454 : hash_seq_search(HASH_SEQ_STATUS *status)
    1432                 : {
    1433                 :     HTAB       *hashp;
    1434                 :     HASHHDR    *hctl;
    1435                 :     uint32      max_bucket;
    1436                 :     long        ssize;
    1437                 :     long        segment_num;
    1438 ECB             :     long        segment_ndx;
    1439                 :     HASHSEGMENT segp;
    1440                 :     uint32      curBucket;
    1441                 :     HASHELEMENT *curElem;
    1442                 : 
    1443 CBC    40463454 :     if ((curElem = status->curEntry) != NULL)
    1444 ECB             :     {
    1445                 :         /* Continuing scan of curBucket... */
    1446 GIC     8366635 :         status->curEntry = curElem->link;
    1447         8366635 :         if (status->curEntry == NULL)    /* end of this bucket */
    1448         6431228 :             ++status->curBucket;
    1449         8366635 :         return (void *) ELEMENTKEY(curElem);
    1450 ECB             :     }
    1451                 : 
    1452                 :     /*
    1453                 :      * Search for next nonempty bucket starting at curBucket.
    1454                 :      */
    1455 GIC    32096819 :     curBucket = status->curBucket;
    1456 CBC    32096819 :     hashp = status->hashp;
    1457 GIC    32096819 :     hctl = hashp->hctl;
    1458 CBC    32096819 :     ssize = hashp->ssize;
    1459        32096819 :     max_bucket = hctl->max_bucket;
    1460                 : 
    1461 GIC    32096819 :     if (curBucket > max_bucket)
    1462                 :     {
    1463          102550 :         hash_seq_term(status);
    1464          102550 :         return NULL;            /* search is done */
    1465 ECB             :     }
    1466                 : 
    1467                 :     /*
    1468                 :      * first find the right segment in the table directory.
    1469                 :      */
    1470 GIC    31994269 :     segment_num = curBucket >> hashp->sshift;
    1471        31994269 :     segment_ndx = MOD(curBucket, ssize);
    1472                 : 
    1473        31994269 :     segp = hashp->dir[segment_num];
    1474                 : 
    1475                 :     /*
    1476 ECB             :      * Pick up the first item in this bucket's chain.  If chain is not empty
    1477                 :      * we can begin searching it.  Otherwise we have to advance to find the
    1478                 :      * next nonempty bucket.  We try to optimize that case since searching a
    1479                 :      * near-empty hashtable has to iterate this loop a lot.
    1480                 :      */
    1481 CBC   282858170 :     while ((curElem = segp[segment_ndx]) == NULL)
    1482 ECB             :     {
    1483                 :         /* empty bucket, advance to next */
    1484 GIC   254254810 :         if (++curBucket > max_bucket)
    1485 ECB             :         {
    1486 GIC     3390909 :             status->curBucket = curBucket;
    1487 CBC     3390909 :             hash_seq_term(status);
    1488         3390909 :             return NULL;        /* search is done */
    1489 ECB             :         }
    1490 GIC   250863901 :         if (++segment_ndx >= ssize)
    1491                 :         {
    1492          131483 :             segment_num++;
    1493          131483 :             segment_ndx = 0;
    1494 CBC      131483 :             segp = hashp->dir[segment_num];
    1495 ECB             :         }
    1496                 :     }
    1497                 : 
    1498                 :     /* Begin scan of curBucket... */
    1499 GIC    28603360 :     status->curEntry = curElem->link;
    1500        28603360 :     if (status->curEntry == NULL)    /* end of this bucket */
    1501        22172100 :         ++curBucket;
    1502 CBC    28603360 :     status->curBucket = curBucket;
    1503 GIC    28603360 :     return (void *) ELEMENTKEY(curElem);
    1504 ECB             : }
    1505                 : 
    1506                 : void
    1507 GIC     3509187 : hash_seq_term(HASH_SEQ_STATUS *status)
    1508                 : {
    1509         3509187 :     if (!status->hashp->frozen)
    1510         3509187 :         deregister_seq_scan(status->hashp);
    1511         3509187 : }
    1512                 : 
    1513                 : /*
    1514                 :  * hash_freeze
    1515                 :  *          Freeze a hashtable against future insertions (deletions are
    1516                 :  *          still allowed)
    1517                 :  *
    1518                 :  * The reason for doing this is that by preventing any more bucket splits,
    1519                 :  * we no longer need to worry about registering hash_seq_search scans,
    1520                 :  * and thus caller need not be careful about ensuring hash_seq_term gets
    1521                 :  * called at the right times.
    1522 EUB             :  *
    1523                 :  * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
    1524                 :  * with active scans (since hash_seq_term would then do the wrong thing).
    1525                 :  */
    1526                 : void
    1527 UBC           0 : hash_freeze(HTAB *hashp)
    1528                 : {
    1529               0 :     if (hashp->isshared)
    1530               0 :         elog(ERROR, "cannot freeze shared hashtable \"%s\"", hashp->tabname);
    1531 UIC           0 :     if (!hashp->frozen && has_seq_scans(hashp))
    1532               0 :         elog(ERROR, "cannot freeze hashtable \"%s\" because it has active scans",
    1533                 :              hashp->tabname);
    1534               0 :     hashp->frozen = true;
    1535               0 : }
    1536                 : 
    1537                 : 
    1538                 : /********************************* UTILITIES ************************/
    1539 ECB             : 
    1540                 : /*
    1541                 :  * Expand the table by adding one more hash bucket.
    1542                 :  */
    1543                 : static bool
    1544 GIC      407775 : expand_table(HTAB *hashp)
    1545                 : {
    1546          407775 :     HASHHDR    *hctl = hashp->hctl;
    1547                 :     HASHSEGMENT old_seg,
    1548                 :                 new_seg;
    1549                 :     long        old_bucket,
    1550                 :                 new_bucket;
    1551                 :     long        new_segnum,
    1552                 :                 new_segndx;
    1553                 :     long        old_segnum,
    1554                 :                 old_segndx;
    1555 ECB             :     HASHBUCKET *oldlink,
    1556                 :                *newlink;
    1557                 :     HASHBUCKET  currElement,
    1558                 :                 nextElement;
    1559                 : 
    1560 GIC      407775 :     Assert(!IS_PARTITIONED(hctl));
    1561 ECB             : 
    1562                 : #ifdef HASH_STATISTICS
    1563                 :     hash_expansions++;
    1564                 : #endif
    1565                 : 
    1566 GIC      407775 :     new_bucket = hctl->max_bucket + 1;
    1567          407775 :     new_segnum = new_bucket >> hashp->sshift;
    1568 CBC      407775 :     new_segndx = MOD(new_bucket, hashp->ssize);
    1569 EUB             : 
    1570 GBC      407775 :     if (new_segnum >= hctl->nsegs)
    1571 ECB             :     {
    1572 EUB             :         /* Allocate new segment if necessary -- could fail if dir full */
    1573 CBC        1447 :         if (new_segnum >= hctl->dsize)
    1574 UIC           0 :             if (!dir_realloc(hashp))
    1575               0 :                 return false;
    1576 GIC        1447 :         if (!(hashp->dir[new_segnum] = seg_alloc(hashp)))
    1577 LBC           0 :             return false;
    1578 GIC        1447 :         hctl->nsegs++;
    1579                 :     }
    1580                 : 
    1581                 :     /* OK, we created a new bucket */
    1582          407775 :     hctl->max_bucket++;
    1583                 : 
    1584                 :     /*
    1585 ECB             :      * *Before* changing masks, find old bucket corresponding to same hash
    1586                 :      * values; values in that bucket may need to be relocated to new bucket.
    1587                 :      * Note that new_bucket is certainly larger than low_mask at this point,
    1588                 :      * so we can skip the first step of the regular hash mask calc.
    1589                 :      */
    1590 CBC      407775 :     old_bucket = (new_bucket & hctl->low_mask);
    1591                 : 
    1592 ECB             :     /*
    1593                 :      * If we crossed a power of 2, readjust masks.
    1594                 :      */
    1595 GIC      407775 :     if ((uint32) new_bucket > hctl->high_mask)
    1596                 :     {
    1597            2635 :         hctl->low_mask = hctl->high_mask;
    1598            2635 :         hctl->high_mask = (uint32) new_bucket | hctl->low_mask;
    1599                 :     }
    1600                 : 
    1601                 :     /*
    1602 ECB             :      * Relocate records to the new bucket.  NOTE: because of the way the hash
    1603                 :      * masking is done in calc_bucket, only one old bucket can need to be
    1604                 :      * split at this point.  With a different way of reducing the hash value,
    1605                 :      * that might not be true!
    1606                 :      */
    1607 GIC      407775 :     old_segnum = old_bucket >> hashp->sshift;
    1608 CBC      407775 :     old_segndx = MOD(old_bucket, hashp->ssize);
    1609 ECB             : 
    1610 GIC      407775 :     old_seg = hashp->dir[old_segnum];
    1611 CBC      407775 :     new_seg = hashp->dir[new_segnum];
    1612 ECB             : 
    1613 CBC      407775 :     oldlink = &old_seg[old_segndx];
    1614 GIC      407775 :     newlink = &new_seg[new_segndx];
    1615 ECB             : 
    1616 CBC      407775 :     for (currElement = *oldlink;
    1617 GIC      973286 :          currElement != NULL;
    1618 CBC      565511 :          currElement = nextElement)
    1619 ECB             :     {
    1620 GIC      565511 :         nextElement = currElement->link;
    1621          565511 :         if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
    1622                 :         {
    1623 CBC      281169 :             *oldlink = currElement;
    1624          281169 :             oldlink = &currElement->link;
    1625                 :         }
    1626                 :         else
    1627                 :         {
    1628          284342 :             *newlink = currElement;
    1629          284342 :             newlink = &currElement->link;
    1630                 :         }
    1631 ECB             :     }
    1632                 :     /* don't forget to terminate the rebuilt hash chains... */
    1633 GIC      407775 :     *oldlink = NULL;
    1634          407775 :     *newlink = NULL;
    1635                 : 
    1636 GBC      407775 :     return true;
    1637                 : }
    1638                 : 
    1639                 : 
    1640                 : static bool
    1641 UIC           0 : dir_realloc(HTAB *hashp)
    1642                 : {
    1643                 :     HASHSEGMENT *p;
    1644 EUB             :     HASHSEGMENT *old_p;
    1645                 :     long        new_dsize;
    1646                 :     long        old_dirsize;
    1647                 :     long        new_dirsize;
    1648                 : 
    1649 UBC           0 :     if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
    1650               0 :         return false;
    1651                 : 
    1652 EUB             :     /* Reallocate directory */
    1653 UBC           0 :     new_dsize = hashp->hctl->dsize << 1;
    1654               0 :     old_dirsize = hashp->hctl->dsize * sizeof(HASHSEGMENT);
    1655 UIC           0 :     new_dirsize = new_dsize * sizeof(HASHSEGMENT);
    1656 EUB             : 
    1657 UIC           0 :     old_p = hashp->dir;
    1658 UBC           0 :     CurrentDynaHashCxt = hashp->hcxt;
    1659               0 :     p = (HASHSEGMENT *) hashp->alloc((Size) new_dirsize);
    1660 EUB             : 
    1661 UBC           0 :     if (p != NULL)
    1662                 :     {
    1663 UIC           0 :         memcpy(p, old_p, old_dirsize);
    1664 UBC           0 :         MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);
    1665               0 :         hashp->dir = p;
    1666 UIC           0 :         hashp->hctl->dsize = new_dsize;
    1667 EUB             : 
    1668                 :         /* XXX assume the allocator is palloc, so we know how to free */
    1669 UIC           0 :         Assert(hashp->alloc == DynaHashAlloc);
    1670 UBC           0 :         pfree(old_p);
    1671                 : 
    1672 UIC           0 :         return true;
    1673                 :     }
    1674                 : 
    1675 LBC           0 :     return false;
    1676                 : }
    1677                 : 
    1678                 : 
    1679 ECB             : static HASHSEGMENT
    1680 CBC      666095 : seg_alloc(HTAB *hashp)
    1681                 : {
    1682 ECB             :     HASHSEGMENT segp;
    1683 EUB             : 
    1684 GIC      666095 :     CurrentDynaHashCxt = hashp->hcxt;
    1685 CBC      666095 :     segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->ssize);
    1686                 : 
    1687          666095 :     if (!segp)
    1688 UIC           0 :         return NULL;
    1689                 : 
    1690 GIC      666095 :     MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->ssize);
    1691                 : 
    1692          666095 :     return segp;
    1693                 : }
    1694 ECB             : 
    1695                 : /*
    1696                 :  * allocate some new elements and link them into the indicated free list
    1697                 :  */
    1698                 : static bool
    1699 GIC      580132 : element_alloc(HTAB *hashp, int nelem, int freelist_idx)
    1700                 : {
    1701          580132 :     HASHHDR    *hctl = hashp->hctl;
    1702                 :     Size        elementSize;
    1703 ECB             :     HASHELEMENT *firstElement;
    1704 EUB             :     HASHELEMENT *tmpElement;
    1705                 :     HASHELEMENT *prevElement;
    1706                 :     int         i;
    1707 ECB             : 
    1708 GIC      580132 :     if (hashp->isfixed)
    1709 LBC           0 :         return false;
    1710 ECB             : 
    1711                 :     /* Each element has a HASHELEMENT header plus user data. */
    1712 CBC      580132 :     elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
    1713 EUB             : 
    1714 GIC      580132 :     CurrentDynaHashCxt = hashp->hcxt;
    1715          580132 :     firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize);
    1716 ECB             : 
    1717 CBC      580132 :     if (!firstElement)
    1718 LBC           0 :         return false;
    1719                 : 
    1720 ECB             :     /* prepare to link all the new entries into the freelist */
    1721 CBC      580132 :     prevElement = NULL;
    1722          580132 :     tmpElement = firstElement;
    1723 GIC    86955337 :     for (i = 0; i < nelem; i++)
    1724                 :     {
    1725        86375205 :         tmpElement->link = prevElement;
    1726 CBC    86375205 :         prevElement = tmpElement;
    1727        86375205 :         tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);
    1728                 :     }
    1729                 : 
    1730 ECB             :     /* if partitioned, must lock to touch freeList */
    1731 CBC      580132 :     if (IS_PARTITIONED(hctl))
    1732 GIC      293356 :         SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
    1733 ECB             : 
    1734                 :     /* freelist could be nonempty if two backends did this concurrently */
    1735 GIC      580132 :     firstElement->link = hctl->freeList[freelist_idx].freeList;
    1736 CBC      580132 :     hctl->freeList[freelist_idx].freeList = prevElement;
    1737                 : 
    1738 GIC      580132 :     if (IS_PARTITIONED(hctl))
    1739          293356 :         SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
    1740                 : 
    1741 GBC      580132 :     return true;
    1742                 : }
    1743                 : 
    1744                 : /* complain when we have detected a corrupted hashtable */
    1745                 : static void
    1746 UIC           0 : hash_corrupted(HTAB *hashp)
    1747 EUB             : {
    1748                 :     /*
    1749                 :      * If the corruption is in a shared hashtable, we'd better force a
    1750                 :      * systemwide restart.  Otherwise, just shut down this one backend.
    1751                 :      */
    1752 UIC           0 :     if (hashp->isshared)
    1753               0 :         elog(PANIC, "hash table \"%s\" corrupted", hashp->tabname);
    1754                 :     else
    1755 LBC           0 :         elog(FATAL, "hash table \"%s\" corrupted", hashp->tabname);
    1756                 : }
    1757                 : 
    1758                 : /* calculate ceil(log base 2) of num */
    1759                 : int
    1760 GIC      547029 : my_log2(long num)
    1761 ECB             : {
    1762 EUB             :     /*
    1763                 :      * guard against too-large input, which would be invalid for
    1764                 :      * pg_ceil_log2_*()
    1765                 :      */
    1766 GIC      547029 :     if (num > LONG_MAX / 2)
    1767 LBC           0 :         num = LONG_MAX / 2;
    1768                 : 
    1769                 : #if SIZEOF_LONG < 8
    1770                 :     return pg_ceil_log2_32(num);
    1771                 : #else
    1772 GIC      547029 :     return pg_ceil_log2_64(num);
    1773 ECB             : #endif
    1774                 : }
    1775                 : 
    1776                 : /* calculate first power of 2 >= num, bounded to what will fit in a long */
    1777                 : static long
    1778 GIC       63908 : next_pow2_long(long num)
    1779                 : {
    1780                 :     /* my_log2's internal range check is sufficient */
    1781 CBC       63908 :     return 1L << my_log2(num);
    1782                 : }
    1783 ECB             : 
    1784 EUB             : /* calculate first power of 2 >= num, bounded to what will fit in an int */
    1785 ECB             : static int
    1786 GIC      464314 : next_pow2_int(long num)
    1787                 : {
    1788          464314 :     if (num > INT_MAX / 2)
    1789 UIC           0 :         num = INT_MAX / 2;
    1790 GIC      464314 :     return 1 << my_log2(num);
    1791                 : }
    1792                 : 
    1793                 : 
    1794                 : /************************* SEQ SCAN TRACKING ************************/
    1795                 : 
    1796                 : /*
    1797                 :  * We track active hash_seq_search scans here.  The need for this mechanism
    1798                 :  * comes from the fact that a scan will get confused if a bucket split occurs
    1799                 :  * while it's in progress: it might visit entries twice, or even miss some
    1800                 :  * entirely (if it's partway through the same bucket that splits).  Hence
    1801                 :  * we want to inhibit bucket splits if there are any active scans on the
    1802                 :  * table being inserted into.  This is a fairly rare case in current usage,
    1803                 :  * so just postponing the split until the next insertion seems sufficient.
    1804                 :  *
    1805                 :  * Given present usages of the function, only a few scans are likely to be
    1806                 :  * open concurrently; so a finite-size stack of open scans seems sufficient,
    1807                 :  * and we don't worry that linear search is too slow.  Note that we do
    1808                 :  * allow multiple scans of the same hashtable to be open concurrently.
    1809                 :  *
    1810                 :  * This mechanism can support concurrent scan and insertion in a shared
    1811                 :  * hashtable if it's the same backend doing both.  It would fail otherwise,
    1812                 :  * but locking reasons seem to preclude any such scenario anyway, so we don't
    1813                 :  * worry.
    1814                 :  *
    1815                 :  * This arrangement is reasonably robust if a transient hashtable is deleted
    1816                 :  * without notifying us.  The absolute worst case is we might inhibit splits
    1817                 :  * in another table created later at exactly the same address.  We will give
    1818                 :  * a warning at transaction end for reference leaks, so any bugs leading to
    1819                 :  * lack of notification should be easy to catch.
    1820                 :  */
    1821                 : 
    1822                 : #define MAX_SEQ_SCANS 100
    1823                 : 
    1824                 : static HTAB *seq_scan_tables[MAX_SEQ_SCANS];    /* tables being scanned */
    1825                 : static int  seq_scan_level[MAX_SEQ_SCANS];  /* subtransaction nest level */
    1826 ECB             : static int  num_seq_scans = 0;
    1827                 : 
    1828                 : 
    1829 EUB             : /* Register a table as having an active hash_seq_search scan */
    1830                 : static void
    1831 CBC     3509197 : register_seq_scan(HTAB *hashp)
    1832 ECB             : {
    1833 CBC     3509197 :     if (num_seq_scans >= MAX_SEQ_SCANS)
    1834 LBC           0 :         elog(ERROR, "too many active hash_seq_search scans, cannot start one on \"%s\"",
    1835                 :              hashp->tabname);
    1836 GIC     3509197 :     seq_scan_tables[num_seq_scans] = hashp;
    1837         3509197 :     seq_scan_level[num_seq_scans] = GetCurrentTransactionNestLevel();
    1838 CBC     3509197 :     num_seq_scans++;
    1839 GIC     3509197 : }
    1840                 : 
    1841                 : /* Deregister an active scan */
    1842                 : static void
    1843 CBC     3509187 : deregister_seq_scan(HTAB *hashp)
    1844                 : {
    1845 ECB             :     int         i;
    1846                 : 
    1847                 :     /* Search backward since it's most likely at the stack top */
    1848 CBC     3509187 :     for (i = num_seq_scans - 1; i >= 0; i--)
    1849 ECB             :     {
    1850 CBC     3509187 :         if (seq_scan_tables[i] == hashp)
    1851                 :         {
    1852 GIC     3509187 :             seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
    1853 GBC     3509187 :             seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
    1854 GIC     3509187 :             num_seq_scans--;
    1855         3509187 :             return;
    1856                 :         }
    1857                 :     }
    1858 UIC           0 :     elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",
    1859 ECB             :          hashp->tabname);
    1860                 : }
    1861                 : 
    1862                 : /* Check if a table has any active scan */
    1863                 : static bool
    1864 GIC      407775 : has_seq_scans(HTAB *hashp)
    1865 EUB             : {
    1866                 :     int         i;
    1867                 : 
    1868 CBC      407775 :     for (i = 0; i < num_seq_scans; i++)
    1869                 :     {
    1870 UIC           0 :         if (seq_scan_tables[i] == hashp)
    1871               0 :             return true;
    1872                 :     }
    1873 CBC      407775 :     return false;
    1874                 : }
    1875                 : 
    1876                 : /* Clean up any open scans at end of transaction */
    1877                 : void
    1878 GIC      486642 : AtEOXact_HashTables(bool isCommit)
    1879                 : {
    1880                 :     /*
    1881                 :      * During abort cleanup, open scans are expected; just silently clean 'em
    1882                 :      * out.  An open scan at commit means someone forgot a hash_seq_term()
    1883                 :      * call, so complain.
    1884 ECB             :      *
    1885                 :      * Note: it's tempting to try to print the tabname here, but refrain for
    1886                 :      * fear of touching deallocated memory.  This isn't a user-facing message
    1887                 :      * anyway, so it needn't be pretty.
    1888                 :      */
    1889 GIC      486642 :     if (isCommit)
    1890 EUB             :     {
    1891                 :         int         i;
    1892                 : 
    1893 GIC      466482 :         for (i = 0; i < num_seq_scans; i++)
    1894 ECB             :         {
    1895 LBC           0 :             elog(WARNING, "leaked hash_seq_search scan for hash table %p",
    1896                 :                  seq_scan_tables[i]);
    1897                 :         }
    1898                 :     }
    1899 CBC      486642 :     num_seq_scans = 0;
    1900 GIC      486642 : }
    1901                 : 
    1902                 : /* Clean up any open scans at end of subtransaction */
    1903                 : void
    1904            8815 : AtEOSubXact_HashTables(bool isCommit, int nestDepth)
    1905                 : {
    1906                 :     int         i;
    1907                 : 
    1908 ECB             :     /*
    1909                 :      * Search backward to make cleanup easy.  Note we must check all entries,
    1910 EUB             :      * not only those at the end of the array, because deletion technique
    1911                 :      * doesn't keep them in order.
    1912                 :      */
    1913 GBC        8815 :     for (i = num_seq_scans - 1; i >= 0; i--)
    1914                 :     {
    1915 UBC           0 :         if (seq_scan_level[i] >= nestDepth)
    1916 EUB             :         {
    1917 UBC           0 :             if (isCommit)
    1918 UIC           0 :                 elog(WARNING, "leaked hash_seq_search scan for hash table %p",
    1919                 :                      seq_scan_tables[i]);
    1920 LBC           0 :             seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
    1921 UIC           0 :             seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
    1922               0 :             num_seq_scans--;
    1923                 :         }
    1924                 :     }
    1925 GIC        8815 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a