Age Owner Branch data TLA Line data Source code
1 : : /*
2 : : * simplehash.h
3 : : *
4 : : * When included this file generates a "templated" (by way of macros)
5 : : * open-addressing hash table implementation specialized to user-defined
6 : : * types.
7 : : *
8 : : * It's probably not worthwhile to generate such a specialized implementation
9 : : * for hash tables that aren't performance or space sensitive.
10 : : *
11 : : * Compared to dynahash, simplehash has the following benefits:
12 : : *
13 : : * - Due to the "templated" code generation has known structure sizes and no
14 : : * indirect function calls (which show up substantially in dynahash
15 : : * profiles). These features considerably increase speed for small
16 : : * entries.
17 : : * - Open addressing has better CPU cache behavior than dynahash's chained
18 : : * hashtables.
19 : : * - The generated interface is type-safe and easier to use than dynahash,
20 : : * though at the cost of more complex setup.
21 : : * - Allocates memory in a MemoryContext or another allocator with a
22 : : * malloc/free style interface (which isn't easily usable in a shared
23 : : * memory context)
24 : : * - Does not require the overhead of a separate memory context.
25 : : *
26 : : * Usage notes:
27 : : *
28 : : * To generate a hash-table and associated functions for a use case several
29 : : * macros have to be #define'ed before this file is included. Including
30 : : * the file #undef's all those, so a new hash table can be generated
31 : : * afterwards.
32 : : * The relevant parameters are:
33 : : * - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
34 : : * will result in hash table type 'foo_hash' and functions like
35 : : * 'foo_insert'/'foo_lookup' and so forth.
36 : : * - SH_ELEMENT_TYPE - type of the contained elements
37 : : * - SH_KEY_TYPE - type of the hashtable's key
38 : : * - SH_DECLARE - if defined function prototypes and type declarations are
39 : : * generated
40 : : * - SH_DEFINE - if defined function definitions are generated
41 : : * - SH_SCOPE - in which scope (e.g. extern, static inline) do function
42 : : * declarations reside
43 : : * - SH_RAW_ALLOCATOR - if defined, memory contexts are not used; instead,
44 : : * use this to allocate bytes. The allocator must zero the returned space.
45 : : * - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
46 : : * are defined, so you can supply your own
47 : : * The following parameters are only relevant when SH_DEFINE is defined:
48 : : * - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
49 : : * - SH_EQUAL(table, a, b) - compare two table keys
50 : : * - SH_HASH_KEY(table, key) - generate hash for the key
51 : : * - SH_STORE_HASH - if defined the hash is stored in the elements
52 : : * - SH_GET_HASH(tb, a) - return the field to store the hash in
53 : : *
54 : : * The element type is required to contain a "status" member that can store
55 : : * the range of values defined in the SH_STATUS enum.
56 : : *
57 : : * While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because
58 : : * the hash table implementation needs to compare hashes to move elements
59 : : * (particularly when growing the hash), it's preferable, if possible, to
60 : : * store the element's hash in the element's data type. If the hash is so
61 : : * stored, the hash table will also compare hashes before calling SH_EQUAL
62 : : * when comparing two keys.
63 : : *
64 : : * For convenience the hash table create functions accept a void pointer
65 : : * that will be stored in the hash table type's member private_data. This
66 : : * allows callbacks to reference caller provided data.
67 : : *
68 : : * For examples of usage look at tidbitmap.c (file local definition) and
69 : : * execnodes.h/execGrouping.c (exposed declaration, file local
70 : : * implementation).
71 : : *
72 : : * Hash table design:
73 : : *
74 : : * The hash table design chosen is a variant of linear open-addressing. The
75 : : * reason for doing so is that linear addressing is CPU cache & pipeline
76 : : * friendly. The biggest disadvantage of simple linear addressing schemes
77 : : * are highly variable lookup times due to clustering, and deletions
78 : : * leaving a lot of tombstones around. To address these issues a variant
79 : : * of "robin hood" hashing is employed. Robin hood hashing optimizes
80 : : * chaining lengths by moving elements close to their optimal bucket
81 : : * ("rich" elements), out of the way if a to-be-inserted element is further
82 : : * away from its optimal position (i.e. it's "poor"). While that can make
83 : : * insertions slower, the average lookup performance is a lot better, and
84 : : * higher fill factors can be used in a still performant manner. To avoid
85 : : * tombstones - which normally solve the issue that a deleted node's
86 : : * presence is relevant to determine whether a lookup needs to continue
87 : : * looking or is done - buckets following a deleted element are shifted
88 : : * backwards, unless they're empty or already at their optimal position.
89 : : *
90 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
91 : : * Portions Copyright (c) 1994, Regents of the University of California
92 : : *
93 : : * src/include/lib/simplehash.h
94 : : */
95 : :
96 : : #include "port/pg_bitutils.h"
97 : :
98 : : /* helpers */
99 : : #define SH_MAKE_PREFIX(a) CppConcat(a,_)
100 : : #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
101 : : #define SH_MAKE_NAME_(a,b) CppConcat(a,b)
102 : :
103 : : /* name macros for: */
104 : :
105 : : /* type declarations */
106 : : #define SH_TYPE SH_MAKE_NAME(hash)
107 : : #define SH_STATUS SH_MAKE_NAME(status)
108 : : #define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
109 : : #define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
110 : : #define SH_ITERATOR SH_MAKE_NAME(iterator)
111 : :
112 : : /* function declarations */
113 : : #define SH_CREATE SH_MAKE_NAME(create)
114 : : #define SH_DESTROY SH_MAKE_NAME(destroy)
115 : : #define SH_RESET SH_MAKE_NAME(reset)
116 : : #define SH_INSERT SH_MAKE_NAME(insert)
117 : : #define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
118 : : #define SH_DELETE_ITEM SH_MAKE_NAME(delete_item)
119 : : #define SH_DELETE SH_MAKE_NAME(delete)
120 : : #define SH_LOOKUP SH_MAKE_NAME(lookup)
121 : : #define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash)
122 : : #define SH_GROW SH_MAKE_NAME(grow)
123 : : #define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
124 : : #define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
125 : : #define SH_ITERATE SH_MAKE_NAME(iterate)
126 : : #define SH_ALLOCATE SH_MAKE_NAME(allocate)
127 : : #define SH_FREE SH_MAKE_NAME(free)
128 : : #define SH_STAT SH_MAKE_NAME(stat)
129 : :
130 : : /* internal helper functions (no externally visible prototypes) */
131 : : #define SH_COMPUTE_SIZE SH_MAKE_NAME(compute_size)
132 : : #define SH_UPDATE_PARAMETERS SH_MAKE_NAME(update_parameters)
133 : : #define SH_NEXT SH_MAKE_NAME(next)
134 : : #define SH_PREV SH_MAKE_NAME(prev)
135 : : #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
136 : : #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
137 : : #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
138 : : #define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
139 : : #define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
140 : :
141 : : /* generate forward declarations necessary to use the hash table */
142 : : #ifdef SH_DECLARE
143 : :
144 : : /* type definitions */
145 : : typedef struct SH_TYPE
146 : : {
147 : : /*
148 : : * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
149 : : * tables. Note that the maximum number of elements is lower
150 : : * (SH_MAX_FILLFACTOR)
151 : : */
152 : : uint64 size;
153 : :
154 : : /* how many elements have valid contents */
155 : : uint32 members;
156 : :
157 : : /* mask for bucket and size calculations, based on size */
158 : : uint32 sizemask;
159 : :
160 : : /* boundary after which to grow hashtable */
161 : : uint32 grow_threshold;
162 : :
163 : : /* hash buckets */
164 : : SH_ELEMENT_TYPE *data;
165 : :
166 : : #ifndef SH_RAW_ALLOCATOR
167 : : /* memory context to use for allocations */
168 : : MemoryContext ctx;
169 : : #endif
170 : :
171 : : /* user defined data, useful for callbacks */
172 : : void *private_data;
173 : : } SH_TYPE;
174 : :
175 : : typedef enum SH_STATUS
176 : : {
177 : : SH_STATUS_EMPTY = 0x00,
178 : : SH_STATUS_IN_USE = 0x01
179 : : } SH_STATUS;
180 : :
181 : : typedef struct SH_ITERATOR
182 : : {
183 : : uint32 cur; /* current element */
184 : : uint32 end;
185 : : bool done; /* iterator exhausted? */
186 : : } SH_ITERATOR;
187 : :
188 : : /* externally visible function prototypes */
189 : : #ifdef SH_RAW_ALLOCATOR
190 : : /* <prefix>_hash <prefix>_create(uint32 nelements, void *private_data) */
191 : : SH_SCOPE SH_TYPE *SH_CREATE(uint32 nelements, void *private_data);
192 : : #else
193 : : /*
194 : : * <prefix>_hash <prefix>_create(MemoryContext ctx, uint32 nelements,
195 : : * void *private_data)
196 : : */
197 : : SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
198 : : void *private_data);
199 : : #endif
200 : :
201 : : /* void <prefix>_destroy(<prefix>_hash *tb) */
202 : : SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
203 : :
204 : : /* void <prefix>_reset(<prefix>_hash *tb) */
205 : : SH_SCOPE void SH_RESET(SH_TYPE * tb);
206 : :
207 : : /* void <prefix>_grow(<prefix>_hash *tb, uint64 newsize) */
208 : : SH_SCOPE void SH_GROW(SH_TYPE * tb, uint64 newsize);
209 : :
210 : : /* <element> *<prefix>_insert(<prefix>_hash *tb, <key> key, bool *found) */
211 : : SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
212 : :
213 : : /*
214 : : * <element> *<prefix>_insert_hash(<prefix>_hash *tb, <key> key, uint32 hash,
215 : : * bool *found)
216 : : */
217 : : SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
218 : : uint32 hash, bool *found);
219 : :
220 : : /* <element> *<prefix>_lookup(<prefix>_hash *tb, <key> key) */
221 : : SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
222 : :
223 : : /* <element> *<prefix>_lookup_hash(<prefix>_hash *tb, <key> key, uint32 hash) */
224 : : SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
225 : : uint32 hash);
226 : :
227 : : /* void <prefix>_delete_item(<prefix>_hash *tb, <element> *entry) */
228 : : SH_SCOPE void SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry);
229 : :
230 : : /* bool <prefix>_delete(<prefix>_hash *tb, <key> key) */
231 : : SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
232 : :
233 : : /* void <prefix>_start_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
234 : : SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
235 : :
236 : : /*
237 : : * void <prefix>_start_iterate_at(<prefix>_hash *tb, <prefix>_iterator *iter,
238 : : * uint32 at)
239 : : */
240 : : SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
241 : :
242 : : /* <element> *<prefix>_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
243 : : SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
244 : :
245 : : /* void <prefix>_stat(<prefix>_hash *tb */
246 : : SH_SCOPE void SH_STAT(SH_TYPE * tb);
247 : :
248 : : #endif /* SH_DECLARE */
249 : :
250 : :
251 : : /* generate implementation of the hash table */
252 : : #ifdef SH_DEFINE
253 : :
254 : : #ifndef SH_RAW_ALLOCATOR
255 : : #include "utils/memutils.h"
256 : : #endif
257 : :
258 : : /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
259 : : #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
260 : :
261 : : /* normal fillfactor, unless already close to maximum */
262 : : #ifndef SH_FILLFACTOR
263 : : #define SH_FILLFACTOR (0.9)
264 : : #endif
265 : : /* increase fillfactor if we otherwise would error out */
266 : : #define SH_MAX_FILLFACTOR (0.98)
267 : : /* grow if actual and optimal location bigger than */
268 : : #ifndef SH_GROW_MAX_DIB
269 : : #define SH_GROW_MAX_DIB 25
270 : : #endif
271 : : /* grow if more than elements to move when inserting */
272 : : #ifndef SH_GROW_MAX_MOVE
273 : : #define SH_GROW_MAX_MOVE 150
274 : : #endif
275 : : #ifndef SH_GROW_MIN_FILLFACTOR
276 : : /* but do not grow due to SH_GROW_MAX_* if below */
277 : : #define SH_GROW_MIN_FILLFACTOR 0.1
278 : : #endif
279 : :
280 : : #ifdef SH_STORE_HASH
281 : : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
282 : : #else
283 : : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
284 : : #endif
285 : :
286 : : /*
287 : : * Wrap the following definitions in include guards, to avoid multiple
288 : : * definition errors if this header is included more than once. The rest of
289 : : * the file deliberately has no include guards, because it can be included
290 : : * with different parameters to define functions and types with non-colliding
291 : : * names.
292 : : */
293 : : #ifndef SIMPLEHASH_H
294 : : #define SIMPLEHASH_H
295 : :
296 : : #ifdef FRONTEND
297 : : #define sh_error(...) pg_fatal(__VA_ARGS__)
298 : : #define sh_log(...) pg_log_info(__VA_ARGS__)
299 : : #else
300 : : #define sh_error(...) elog(ERROR, __VA_ARGS__)
301 : : #define sh_log(...) elog(LOG, __VA_ARGS__)
302 : : #endif
303 : :
304 : : #endif
305 : :
306 : : /*
307 : : * Compute allocation size for hashtable. Result can be passed to
308 : : * SH_UPDATE_PARAMETERS.
309 : : */
310 : : static inline uint64
149 jdavis@postgresql.or 311 :GNC 90596 : SH_COMPUTE_SIZE(uint64 newsize)
312 : : {
313 : : uint64 size;
314 : :
315 : : /* supporting zero sized hashes would complicate matters */
2739 andres@anarazel.de 316 :CBC 90596 : size = Max(newsize, 2);
317 : :
318 : : /* round up size to the next power of 2, that's how bucketing works */
1467 drowley@postgresql.o 319 : 90596 : size = pg_nextpower2_64(size);
2739 andres@anarazel.de 320 [ - + ]: 90596 : Assert(size <= SH_MAX_SIZE);
321 : :
322 : : /*
323 : : * Verify that allocation of ->data is possible on this platform, without
324 : : * overflowing Size.
325 : : */
905 tgl@sss.pgh.pa.us 326 [ - + ]: 90596 : if (unlikely((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= SIZE_MAX / 2))
1580 rhaas@postgresql.org 327 [ # # ]:UBC 0 : sh_error("hash table too large");
328 : :
149 jdavis@postgresql.or 329 :GNC 90596 : return size;
330 : : }
331 : :
332 : : /*
333 : : * Update sizing parameters for hashtable. Called when creating and growing
334 : : * the hashtable.
335 : : */
336 : : static inline void
337 : 45298 : SH_UPDATE_PARAMETERS(SH_TYPE * tb, uint64 newsize)
338 : : {
339 : 45298 : uint64 size = SH_COMPUTE_SIZE(newsize);
340 : :
341 : : /* now set size */
2739 andres@anarazel.de 342 :CBC 45298 : tb->size = size;
975 drowley@postgresql.o 343 : 45298 : tb->sizemask = (uint32) (size - 1);
344 : :
345 : : /*
346 : : * Compute the next threshold at which we need to grow the hash table
347 : : * again.
348 : : */
2739 andres@anarazel.de 349 [ - + ]: 45298 : if (tb->size == SH_MAX_SIZE)
2739 andres@anarazel.de 350 :UBC 0 : tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
351 : : else
2739 andres@anarazel.de 352 :CBC 45298 : tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
353 : 45298 : }
354 : :
355 : : /* return the optimal bucket for the hash */
356 : : static inline uint32
2524 bruce@momjian.us 357 : 19936172 : SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
358 : : {
2739 andres@anarazel.de 359 : 19936172 : return hash & tb->sizemask;
360 : : }
361 : :
362 : : /* return next bucket after the current, handling wraparound */
363 : : static inline uint32
2524 bruce@momjian.us 364 : 8736523 : SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
365 : : {
2739 andres@anarazel.de 366 : 8736523 : curelem = (curelem + 1) & tb->sizemask;
367 : :
368 [ - + ]: 8736523 : Assert(curelem != startelem);
369 : :
370 : 8736523 : return curelem;
371 : : }
372 : :
373 : : /* return bucket before the current, handling wraparound */
374 : : static inline uint32
2524 bruce@momjian.us 375 : 1550844 : SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
376 : : {
2739 andres@anarazel.de 377 : 1550844 : curelem = (curelem - 1) & tb->sizemask;
378 : :
379 [ - + ]: 1550844 : Assert(curelem != startelem);
380 : :
381 : 1550844 : return curelem;
382 : : }
383 : :
384 : : /* return distance between bucket and its optimal position */
385 : : static inline uint32
2524 bruce@momjian.us 386 : 4221276 : SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
387 : : {
2739 andres@anarazel.de 388 [ + + ]: 4221276 : if (optimal <= bucket)
389 : 4200309 : return bucket - optimal;
390 : : else
391 : 20967 : return (tb->size + bucket) - optimal;
392 : : }
393 : :
394 : : static inline uint32
2524 bruce@momjian.us 395 : 4878273 : SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
396 : : {
397 : : #ifdef SH_STORE_HASH
2739 andres@anarazel.de 398 : 1879381 : return SH_GET_HASH(tb, entry);
399 : : #else
400 : 2998892 : return SH_HASH_KEY(tb, entry->SH_KEY);
401 : : #endif
402 : : }
403 : :
404 : : /* default memory allocator function */
405 : : static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
406 : : static inline void SH_FREE(SH_TYPE * type, void *pointer);
407 : :
408 : : #ifndef SH_USE_NONDEFAULT_ALLOCATOR
409 : :
410 : : /* default memory allocator function */
411 : : static inline void *
2524 bruce@momjian.us 412 : 40983 : SH_ALLOCATE(SH_TYPE * type, Size size)
413 : : {
414 : : #ifdef SH_RAW_ALLOCATOR
1580 rhaas@postgresql.org 415 : 294 : return SH_RAW_ALLOCATOR(size);
416 : : #else
2623 417 : 40689 : return MemoryContextAllocExtended(type->ctx, size,
418 : : MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
419 : : #endif
420 : : }
421 : :
422 : : /* default memory free function */
423 : : static inline void
2524 bruce@momjian.us 424 : 17128 : SH_FREE(SH_TYPE * type, void *pointer)
425 : : {
2623 rhaas@postgresql.org 426 : 17128 : pfree(pointer);
427 : 17128 : }
428 : :
429 : : #endif
430 : :
431 : : /*
432 : : * Create a hash table with enough space for `nelements` distinct members.
433 : : * Memory for the hash table is allocated from the passed-in context. If
434 : : * desired, the array of elements can be allocated using a passed-in allocator;
435 : : * this could be useful in order to place the array of elements in a shared
436 : : * memory, or in a context that will outlive the rest of the hash table.
437 : : * Memory other than for the array of elements will still be allocated from
438 : : * the passed-in context.
439 : : */
440 : : #ifdef SH_RAW_ALLOCATOR
441 : : SH_SCOPE SH_TYPE *
1580 442 : 290 : SH_CREATE(uint32 nelements, void *private_data)
443 : : #else
444 : : SH_SCOPE SH_TYPE *
2621 445 : 42905 : SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
446 : : #endif
447 : : {
448 : : SH_TYPE *tb;
449 : : uint64 size;
450 : :
451 : : #ifdef SH_RAW_ALLOCATOR
528 tgl@sss.pgh.pa.us 452 : 290 : tb = (SH_TYPE *) SH_RAW_ALLOCATOR(sizeof(SH_TYPE));
453 : : #else
454 : 42905 : tb = (SH_TYPE *) MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
2739 andres@anarazel.de 455 : 42905 : tb->ctx = ctx;
456 : : #endif
2621 rhaas@postgresql.org 457 : 43195 : tb->private_data = private_data;
458 : :
459 : : /* increase nelements by fillfactor, want to store nelements elements */
2739 andres@anarazel.de 460 [ - + ]: 43195 : size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
461 : :
149 jdavis@postgresql.or 462 :GNC 43195 : size = SH_COMPUTE_SIZE(size);
463 : :
464 : 43195 : tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * size);
465 : :
466 : 43195 : SH_UPDATE_PARAMETERS(tb, size);
2739 andres@anarazel.de 467 :CBC 43195 : return tb;
468 : : }
469 : :
470 : : /* destroy a previously created hash table */
471 : : SH_SCOPE void
2524 bruce@momjian.us 472 : 19338 : SH_DESTROY(SH_TYPE * tb)
473 : : {
2623 rhaas@postgresql.org 474 : 19338 : SH_FREE(tb, tb->data);
2739 andres@anarazel.de 475 : 19338 : pfree(tb);
476 : 19338 : }
477 : :
478 : : /* reset the contents of a previously created hash table */
479 : : SH_SCOPE void
1891 480 : 91242 : SH_RESET(SH_TYPE * tb)
481 : : {
482 : 91242 : memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
483 : 91242 : tb->members = 0;
484 : 91242 : }
485 : :
486 : : /*
487 : : * Grow a hash table to at least `newsize` buckets.
488 : : *
489 : : * Usually this will automatically be called by insertions/deletions, when
490 : : * necessary. But resizing to the exact input size can be advantageous
491 : : * performance-wise, when known at some point.
492 : : */
493 : : SH_SCOPE void
975 drowley@postgresql.o 494 : 2103 : SH_GROW(SH_TYPE * tb, uint64 newsize)
495 : : {
2739 andres@anarazel.de 496 : 2103 : uint64 oldsize = tb->size;
497 : 2103 : SH_ELEMENT_TYPE *olddata = tb->data;
498 : : SH_ELEMENT_TYPE *newdata;
499 : : uint32 i;
500 : 2103 : uint32 startelem = 0;
501 : : uint32 copyelem;
502 : :
1467 drowley@postgresql.o 503 [ - + ]: 2103 : Assert(oldsize == pg_nextpower2_64(oldsize));
2739 andres@anarazel.de 504 [ - + ]: 2103 : Assert(oldsize != SH_MAX_SIZE);
505 [ - + ]: 2103 : Assert(oldsize < newsize);
506 : :
149 jdavis@postgresql.or 507 :GNC 2103 : newsize = SH_COMPUTE_SIZE(newsize);
508 : :
509 : 2103 : tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * newsize);
510 : :
511 : : /*
512 : : * Update parameters for new table after allocation succeeds to avoid
513 : : * inconsistent state on OOM.
514 : : */
515 : 2103 : SH_UPDATE_PARAMETERS(tb, newsize);
516 : :
2739 andres@anarazel.de 517 :CBC 2103 : newdata = tb->data;
518 : :
519 : : /*
520 : : * Copy entries from the old data to newdata. We theoretically could use
521 : : * SH_INSERT here, to avoid code duplication, but that's more general than
522 : : * we need. We neither want tb->members increased, nor do we need to do
523 : : * deal with deleted elements, nor do we need to compare keys. So a
524 : : * special-cased implementation is lot faster. As resizing can be time
525 : : * consuming and frequent, that's worthwhile to optimize.
526 : : *
527 : : * To be able to simply move entries over, we have to start not at the
528 : : * first bucket (i.e olddata[0]), but find the first bucket that's either
529 : : * empty, or is occupied by an entry at its optimal position. Such a
530 : : * bucket has to exist in any table with a load factor under 1, as not all
531 : : * buckets are occupied, i.e. there always has to be an empty bucket. By
532 : : * starting at such a bucket we can move the entries to the larger table,
533 : : * without having to deal with conflicts.
534 : : */
535 : :
536 : : /* search for the first element in the hash that's not wrapped around */
537 [ + - ]: 21220 : for (i = 0; i < oldsize; i++)
538 : : {
539 : 21220 : SH_ELEMENT_TYPE *oldentry = &olddata[i];
540 : : uint32 hash;
541 : : uint32 optimal;
542 : :
543 [ + + ]: 21220 : if (oldentry->status != SH_STATUS_IN_USE)
544 : : {
545 : 752 : startelem = i;
546 : 752 : break;
547 : : }
548 : :
549 : 20468 : hash = SH_ENTRY_HASH(tb, oldentry);
550 : 20468 : optimal = SH_INITIAL_BUCKET(tb, hash);
551 : :
552 [ + + ]: 20468 : if (optimal == i)
553 : : {
554 : 1351 : startelem = i;
555 : 1351 : break;
556 : : }
557 : : }
558 : :
559 : : /* and copy all elements in the old table */
560 : 2103 : copyelem = startelem;
561 [ + + ]: 610627 : for (i = 0; i < oldsize; i++)
562 : : {
563 : 608524 : SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
564 : :
565 [ + + ]: 608524 : if (oldentry->status == SH_STATUS_IN_USE)
566 : : {
567 : : uint32 hash;
568 : : uint32 startelem2;
569 : : uint32 curelem;
570 : : SH_ELEMENT_TYPE *newentry;
571 : :
572 : 535946 : hash = SH_ENTRY_HASH(tb, oldentry);
557 drowley@postgresql.o 573 : 535946 : startelem2 = SH_INITIAL_BUCKET(tb, hash);
574 : 535946 : curelem = startelem2;
575 : :
576 : : /* find empty element to put data into */
577 : : while (true)
578 : : {
2739 andres@anarazel.de 579 : 740262 : newentry = &newdata[curelem];
580 : :
581 [ + + ]: 740262 : if (newentry->status == SH_STATUS_EMPTY)
582 : : {
583 : 535946 : break;
584 : : }
585 : :
557 drowley@postgresql.o 586 : 204316 : curelem = SH_NEXT(tb, curelem, startelem2);
587 : : }
588 : :
589 : : /* copy entry to new slot */
2739 andres@anarazel.de 590 : 535946 : memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
591 : : }
592 : :
593 : : /* can't use SH_NEXT here, would use new size */
594 : 608524 : copyelem++;
595 [ + + ]: 608524 : if (copyelem >= oldsize)
596 : : {
597 : 2103 : copyelem = 0;
598 : : }
599 : : }
600 : :
2623 rhaas@postgresql.org 601 : 2103 : SH_FREE(tb, olddata);
2739 andres@anarazel.de 602 : 2103 : }
603 : :
604 : : /*
605 : : * This is a separate static inline function, so it can be reliably be inlined
606 : : * into its wrapper functions even if SH_SCOPE is extern.
607 : : */
608 : : static inline SH_ELEMENT_TYPE *
1718 jdavis@postgresql.or 609 : 9990878 : SH_INSERT_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
610 : : {
611 : : uint32 startelem;
612 : : uint32 curelem;
613 : : SH_ELEMENT_TYPE *data;
614 : : uint32 insertdist;
615 : :
2699 andres@anarazel.de 616 : 87 : restart:
617 : 9990965 : insertdist = 0;
618 : :
619 : : /*
620 : : * We do the grow check even if the key is actually present, to avoid
621 : : * doing the check inside the loop. This also lets us avoid having to
622 : : * re-find our position in the hashtable after resizing.
623 : : *
624 : : * Note that this also reached when resizing the table due to
625 : : * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
626 : : */
2739 627 [ + + ]: 9990965 : if (unlikely(tb->members >= tb->grow_threshold))
628 : : {
905 tgl@sss.pgh.pa.us 629 [ - + ]: 2103 : if (unlikely(tb->size == SH_MAX_SIZE))
1580 rhaas@postgresql.org 630 [ # # ]:UBC 0 : sh_error("hash table size exceeded");
631 : :
632 : : /*
633 : : * When optimizing, it can be very useful to print these out.
634 : : */
635 : : /* SH_STAT(tb); */
2739 andres@anarazel.de 636 :CBC 2103 : SH_GROW(tb, tb->size * 2);
637 : : /* SH_STAT(tb); */
638 : : }
639 : :
640 : : /* perform insert, start bucket search at optimal location */
641 : 9990965 : data = tb->data;
642 : 9990965 : startelem = SH_INITIAL_BUCKET(tb, hash);
643 : 9990965 : curelem = startelem;
644 : : while (true)
645 : 3981387 : {
646 : : uint32 curdist;
647 : : uint32 curhash;
648 : : uint32 curoptimal;
649 : 13972352 : SH_ELEMENT_TYPE *entry = &data[curelem];
650 : :
651 : : /* any empty bucket can directly be used */
652 [ + + ]: 13972352 : if (entry->status == SH_STATUS_EMPTY)
653 : : {
654 : 2045667 : tb->members++;
655 : 2045667 : entry->SH_KEY = key;
656 : : #ifdef SH_STORE_HASH
657 : 1031027 : SH_GET_HASH(tb, entry) = hash;
658 : : #endif
659 : 2045667 : entry->status = SH_STATUS_IN_USE;
660 : 2045667 : *found = false;
661 : 2045667 : return entry;
662 : : }
663 : :
664 : : /*
665 : : * If the bucket is not empty, we either found a match (in which case
666 : : * we're done), or we have to decide whether to skip over or move the
667 : : * colliding entry. When the colliding element's distance to its
668 : : * optimal position is smaller than the to-be-inserted entry's, we
669 : : * shift the colliding entry (and its followers) forward by one.
670 : : */
671 : :
672 [ + + + - : 11926685 : if (SH_COMPARE_KEYS(tb, hash, key, entry))
+ - ][ + +
+ - + - ]
[ + + + + ]
673 : : {
674 [ - + ]: 7705409 : Assert(entry->status == SH_STATUS_IN_USE);
675 : 7705409 : *found = true;
676 : 7705409 : return entry;
677 : : }
678 : :
679 : 4221276 : curhash = SH_ENTRY_HASH(tb, entry);
680 : 4221276 : curoptimal = SH_INITIAL_BUCKET(tb, curhash);
681 : 4221276 : curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
682 : :
683 [ + + ]: 4221276 : if (insertdist > curdist)
684 : : {
685 : 239889 : SH_ELEMENT_TYPE *lastentry = entry;
686 : 239889 : uint32 emptyelem = curelem;
687 : : uint32 moveelem;
2699 688 : 239889 : int32 emptydist = 0;
689 : :
690 : : /* find next empty bucket */
691 : : while (true)
2739 692 : 1324092 : {
693 : : SH_ELEMENT_TYPE *emptyentry;
694 : :
695 : 1563981 : emptyelem = SH_NEXT(tb, emptyelem, startelem);
696 : 1563981 : emptyentry = &data[emptyelem];
697 : :
698 [ + + ]: 1563981 : if (emptyentry->status == SH_STATUS_EMPTY)
699 : : {
700 : 239802 : lastentry = emptyentry;
701 : 239802 : break;
702 : : }
703 : :
704 : : /*
705 : : * To avoid negative consequences from overly imbalanced
706 : : * hashtables, grow the hashtable if collisions would require
707 : : * us to move a lot of entries. The most likely cause of such
708 : : * imbalance is filling a (currently) small table, from a
709 : : * currently big one, in hash-table order. Don't grow if the
710 : : * hashtable would be too empty, to prevent quick space
711 : : * explosion for some weird edge cases.
712 : : */
2267 713 [ + + ]: 1324179 : if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
714 [ + - ]: 87 : ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
715 : : {
2699 716 : 87 : tb->grow_threshold = 0;
717 : 87 : goto restart;
718 : : }
719 : : }
720 : :
721 : : /* shift forward, starting at last occupied element */
722 : :
723 : : /*
724 : : * TODO: This could be optimized to be one memcpy in many cases,
725 : : * excepting wrapping around at the end of ->data. Hasn't shown up
726 : : * in profiles so far though.
727 : : */
2739 728 : 239802 : moveelem = emptyelem;
729 [ + + ]: 1790646 : while (moveelem != curelem)
730 : : {
731 : : SH_ELEMENT_TYPE *moveentry;
732 : :
733 : 1550844 : moveelem = SH_PREV(tb, moveelem, startelem);
734 : 1550844 : moveentry = &data[moveelem];
735 : :
736 : 1550844 : memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
737 : 1550844 : lastentry = moveentry;
738 : : }
739 : :
740 : : /* and fill the now empty spot */
741 : 239802 : tb->members++;
742 : :
743 : 239802 : entry->SH_KEY = key;
744 : : #ifdef SH_STORE_HASH
745 : 126279 : SH_GET_HASH(tb, entry) = hash;
746 : : #endif
747 : 239802 : entry->status = SH_STATUS_IN_USE;
748 : 239802 : *found = false;
749 : 239802 : return entry;
750 : : }
751 : :
752 : 3981387 : curelem = SH_NEXT(tb, curelem, startelem);
753 : 3981387 : insertdist++;
754 : :
755 : : /*
756 : : * To avoid negative consequences from overly imbalanced hashtables,
757 : : * grow the hashtable if collisions lead to large runs. The most
758 : : * likely cause of such imbalance is filling a (currently) small
759 : : * table, from a currently big one, in hash-table order. Don't grow
760 : : * if the hashtable would be too empty, to prevent quick space
761 : : * explosion for some weird edge cases.
762 : : */
2267 763 [ - + ]: 3981387 : if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
2267 andres@anarazel.de 764 [ # # ]:UBC 0 : ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
765 : : {
2699 766 : 0 : tb->grow_threshold = 0;
767 : 0 : goto restart;
768 : : }
769 : : }
770 : : }
771 : :
772 : : /*
773 : : * Insert the key into the hash-table, set *found to true if the key already
774 : : * exists, false otherwise. Returns the hash-table entry in either case.
775 : : */
776 : : SH_SCOPE SH_ELEMENT_TYPE *
1718 jdavis@postgresql.or 777 :CBC 6934206 : SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
778 : : {
1431 tgl@sss.pgh.pa.us 779 : 6934206 : uint32 hash = SH_HASH_KEY(tb, key);
780 : :
1718 jdavis@postgresql.or 781 : 6934206 : return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
782 : : }
783 : :
784 : : /*
785 : : * Insert the key into the hash-table using an already-calculated hash. Set
786 : : * *found to true if the key already exists, false otherwise. Returns the
787 : : * hash-table entry in either case.
788 : : */
789 : : SH_SCOPE SH_ELEMENT_TYPE *
790 : 3056672 : SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
791 : : {
792 : 3056672 : return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
793 : : }
794 : :
795 : : /*
796 : : * This is a separate static inline function, so it can be reliably be inlined
797 : : * into its wrapper functions even if SH_SCOPE is extern.
798 : : */
799 : : static inline SH_ELEMENT_TYPE *
800 : 4154799 : SH_LOOKUP_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
801 : : {
2739 andres@anarazel.de 802 : 4154799 : const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
803 : 4154799 : uint32 curelem = startelem;
804 : :
805 : : while (true)
806 : 1820800 : {
807 : 5975599 : SH_ELEMENT_TYPE *entry = &tb->data[curelem];
808 : :
809 [ + + ]: 5975599 : if (entry->status == SH_STATUS_EMPTY)
810 : : {
811 : 1377754 : return NULL;
812 : : }
813 : :
814 [ - + ]: 4597845 : Assert(entry->status == SH_STATUS_IN_USE);
815 : :
816 [ + + + - : 4597845 : if (SH_COMPARE_KEYS(tb, hash, key, entry))
+ - ][ + +
+ - + - ]
[ + + + - ]
817 : 2777045 : return entry;
818 : :
819 : : /*
820 : : * TODO: we could stop search based on distance. If the current
821 : : * buckets's distance-from-optimal is smaller than what we've skipped
822 : : * already, the entry doesn't exist. Probably only do so if
823 : : * SH_STORE_HASH is defined, to avoid re-computing hashes?
824 : : */
825 : :
826 : 1820800 : curelem = SH_NEXT(tb, curelem, startelem);
827 : : }
828 : : }
829 : :
830 : : /*
831 : : * Lookup entry in hash table. Returns NULL if key not present.
832 : : */
833 : : SH_SCOPE SH_ELEMENT_TYPE *
1718 jdavis@postgresql.or 834 : 3537308 : SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
835 : : {
1431 tgl@sss.pgh.pa.us 836 : 3537308 : uint32 hash = SH_HASH_KEY(tb, key);
837 : :
1718 jdavis@postgresql.or 838 : 3537308 : return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
839 : : }
840 : :
841 : : /*
842 : : * Lookup entry in hash table using an already-calculated hash.
843 : : *
844 : : * Returns NULL if key not present.
845 : : */
846 : : SH_SCOPE SH_ELEMENT_TYPE *
847 : 617491 : SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
848 : : {
849 : 617491 : return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
850 : : }
851 : :
852 : : /*
853 : : * Delete entry from hash table by key. Returns whether to-be-deleted key was
854 : : * present.
855 : : */
856 : : SH_SCOPE bool
2524 bruce@momjian.us 857 : 912135 : SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
858 : : {
2739 andres@anarazel.de 859 : 912135 : uint32 hash = SH_HASH_KEY(tb, key);
860 : 912135 : uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
861 : 912135 : uint32 curelem = startelem;
862 : :
863 : : while (true)
864 : 256326 : {
865 : 1168461 : SH_ELEMENT_TYPE *entry = &tb->data[curelem];
866 : :
867 [ + + ]: 1168461 : if (entry->status == SH_STATUS_EMPTY)
868 : 72821 : return false;
869 : :
870 [ + - + + ]: 2177885 : if (entry->status == SH_STATUS_IN_USE &&
[ + - + + ]
871 [ + + ]: 1095640 : SH_COMPARE_KEYS(tb, hash, key, entry))
[ # # # # ]
872 : : {
873 : 839314 : SH_ELEMENT_TYPE *lastentry = entry;
874 : :
875 : 839314 : tb->members--;
876 : :
877 : : /*
878 : : * Backward shift following elements till either an empty element
879 : : * or an element at its optimal position is encountered.
880 : : *
881 : : * While that sounds expensive, the average chain length is short,
882 : : * and deletions would otherwise require tombstones.
883 : : */
884 : : while (true)
885 : 66817 : {
886 : : SH_ELEMENT_TYPE *curentry;
887 : : uint32 curhash;
888 : : uint32 curoptimal;
889 : :
890 : 906131 : curelem = SH_NEXT(tb, curelem, startelem);
891 : 906131 : curentry = &tb->data[curelem];
892 : :
893 [ + + ]: 906131 : if (curentry->status != SH_STATUS_IN_USE)
894 : : {
895 : 809709 : lastentry->status = SH_STATUS_EMPTY;
896 : 809709 : break;
897 : : }
898 : :
899 : 96422 : curhash = SH_ENTRY_HASH(tb, curentry);
900 : 96422 : curoptimal = SH_INITIAL_BUCKET(tb, curhash);
901 : :
902 : : /* current is at optimal position, done */
903 [ + + ]: 96422 : if (curoptimal == curelem)
904 : : {
905 : 29605 : lastentry->status = SH_STATUS_EMPTY;
906 : 29605 : break;
907 : : }
908 : :
909 : : /* shift */
910 : 66817 : memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
911 : :
912 : 66817 : lastentry = curentry;
913 : : }
914 : :
915 : 839314 : return true;
916 : : }
917 : :
918 : : /* TODO: return false; if distance too big */
919 : :
920 : 256326 : curelem = SH_NEXT(tb, curelem, startelem);
921 : : }
922 : : }
923 : :
924 : : /*
925 : : * Delete entry from hash table by entry pointer
926 : : */
927 : : SH_SCOPE void
1111 drowley@postgresql.o 928 : 1194 : SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
929 : : {
930 : 1194 : SH_ELEMENT_TYPE *lastentry = entry;
931 : 1194 : uint32 hash = SH_ENTRY_HASH(tb, entry);
932 : 1194 : uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
933 : : uint32 curelem;
934 : :
935 : : /* Calculate the index of 'entry' */
936 : 1194 : curelem = entry - &tb->data[0];
937 : :
938 : 1194 : tb->members--;
939 : :
940 : : /*
941 : : * Backward shift following elements till either an empty element or an
942 : : * element at its optimal position is encountered.
943 : : *
944 : : * While that sounds expensive, the average chain length is short, and
945 : : * deletions would otherwise require tombstones.
946 : : */
947 : : while (true)
948 : 2388 : {
949 : : SH_ELEMENT_TYPE *curentry;
950 : : uint32 curhash;
951 : : uint32 curoptimal;
952 : :
953 : 3582 : curelem = SH_NEXT(tb, curelem, startelem);
954 : 3582 : curentry = &tb->data[curelem];
955 : :
956 [ + + ]: 3582 : if (curentry->status != SH_STATUS_IN_USE)
957 : : {
958 : 615 : lastentry->status = SH_STATUS_EMPTY;
959 : 615 : break;
960 : : }
961 : :
962 : 2967 : curhash = SH_ENTRY_HASH(tb, curentry);
963 : 2967 : curoptimal = SH_INITIAL_BUCKET(tb, curhash);
964 : :
965 : : /* current is at optimal position, done */
966 [ + + ]: 2967 : if (curoptimal == curelem)
967 : : {
968 : 579 : lastentry->status = SH_STATUS_EMPTY;
969 : 579 : break;
970 : : }
971 : :
972 : : /* shift */
973 : 2388 : memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
974 : :
975 : 2388 : lastentry = curentry;
976 : : }
977 : 1194 : }
978 : :
979 : : /*
980 : : * Initialize iterator.
981 : : */
982 : : SH_SCOPE void
2524 bruce@momjian.us 983 : 92422 : SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
984 : : {
2739 andres@anarazel.de 985 : 92422 : uint64 startelem = PG_UINT64_MAX;
986 : :
987 : : /*
988 : : * Search for the first empty element. As deletions during iterations are
989 : : * supported, we want to start/end at an element that cannot be affected
990 : : * by elements being shifted.
991 : : */
283 992 [ + - ]: 105383 : for (uint32 i = 0; i < tb->size; i++)
993 : : {
2739 994 : 105383 : SH_ELEMENT_TYPE *entry = &tb->data[i];
995 : :
996 [ + + ]: 105383 : if (entry->status != SH_STATUS_IN_USE)
997 : : {
998 : 92422 : startelem = i;
999 : 92422 : break;
1000 : : }
1001 : : }
1002 : :
1003 : : /* we should have found an empty element */
1004 [ - + ]: 92422 : Assert(startelem < SH_MAX_SIZE);
1005 : :
1006 : : /*
1007 : : * Iterate backwards, that allows the current element to be deleted, even
1008 : : * if there are backward shifts
1009 : : */
1010 : 92422 : iter->cur = startelem;
1011 : 92422 : iter->end = iter->cur;
1012 : 92422 : iter->done = false;
1013 : 92422 : }
1014 : :
1015 : : /*
1016 : : * Initialize iterator to a specific bucket. That's really only useful for
1017 : : * cases where callers are partially iterating over the hashspace, and that
1018 : : * iteration deletes and inserts elements based on visited entries. Doing that
1019 : : * repeatedly could lead to an unbalanced keyspace when always starting at the
1020 : : * same position.
1021 : : */
1022 : : SH_SCOPE void
2524 bruce@momjian.us 1023 : 12 : SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
1024 : : {
1025 : : /*
1026 : : * Iterate backwards, that allows the current element to be deleted, even
1027 : : * if there are backward shifts.
1028 : : */
2489 tgl@sss.pgh.pa.us 1029 : 12 : iter->cur = at & tb->sizemask; /* ensure at is within a valid range */
2739 andres@anarazel.de 1030 : 12 : iter->end = iter->cur;
1031 : 12 : iter->done = false;
1032 : 12 : }
1033 : :
1034 : : /*
1035 : : * Iterate over all entries in the hash-table. Return the next occupied entry,
1036 : : * or NULL if done.
1037 : : *
1038 : : * During iteration the current entry in the hash table may be deleted,
1039 : : * without leading to elements being skipped or returned twice. Additionally
1040 : : * the rest of the table may be modified (i.e. there can be insertions or
1041 : : * deletions), but if so, there's neither a guarantee that all nodes are
1042 : : * visited at least once, nor a guarantee that a node is visited at most once.
1043 : : */
1044 : : SH_SCOPE SH_ELEMENT_TYPE *
2524 bruce@momjian.us 1045 : 1982236 : SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
1046 : : {
2739 andres@anarazel.de 1047 [ + + ]: 10591451 : while (!iter->done)
1048 : : {
1049 : : SH_ELEMENT_TYPE *elem;
1050 : :
1051 : 10499088 : elem = &tb->data[iter->cur];
1052 : :
1053 : : /* next element in backward direction */
1054 : 10499088 : iter->cur = (iter->cur - 1) & tb->sizemask;
1055 : :
1056 [ + + ]: 10499088 : if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
1057 : 92363 : iter->done = true;
1058 [ + + ]: 10499088 : if (elem->status == SH_STATUS_IN_USE)
1059 : : {
1060 : 1889873 : return elem;
1061 : : }
1062 : : }
1063 : :
1064 : 92363 : return NULL;
1065 : : }
1066 : :
1067 : : /*
1068 : : * Report some statistics about the state of the hashtable. For
1069 : : * debugging/profiling purposes only.
1070 : : */
1071 : : SH_SCOPE void
2524 bruce@momjian.us 1072 :UBC 0 : SH_STAT(SH_TYPE * tb)
1073 : : {
2739 andres@anarazel.de 1074 : 0 : uint32 max_chain_length = 0;
1075 : 0 : uint32 total_chain_length = 0;
1076 : : double avg_chain_length;
1077 : : double fillfactor;
1078 : : uint32 i;
1079 : :
528 tgl@sss.pgh.pa.us 1080 : 0 : uint32 *collisions = (uint32 *) palloc0(tb->size * sizeof(uint32));
2739 andres@anarazel.de 1081 : 0 : uint32 total_collisions = 0;
1082 : 0 : uint32 max_collisions = 0;
1083 : : double avg_collisions;
1084 : :
1085 [ # # ]: 0 : for (i = 0; i < tb->size; i++)
1086 : : {
1087 : : uint32 hash;
1088 : : uint32 optimal;
1089 : : uint32 dist;
1090 : : SH_ELEMENT_TYPE *elem;
1091 : :
1092 : 0 : elem = &tb->data[i];
1093 : :
1094 [ # # ]: 0 : if (elem->status != SH_STATUS_IN_USE)
1095 : 0 : continue;
1096 : :
1097 : 0 : hash = SH_ENTRY_HASH(tb, elem);
1098 : 0 : optimal = SH_INITIAL_BUCKET(tb, hash);
1099 : 0 : dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
1100 : :
1101 [ # # ]: 0 : if (dist > max_chain_length)
1102 : 0 : max_chain_length = dist;
1103 : 0 : total_chain_length += dist;
1104 : :
1105 : 0 : collisions[optimal]++;
1106 : : }
1107 : :
1108 [ # # ]: 0 : for (i = 0; i < tb->size; i++)
1109 : : {
1110 : 0 : uint32 curcoll = collisions[i];
1111 : :
1112 [ # # ]: 0 : if (curcoll == 0)
1113 : 0 : continue;
1114 : :
1115 : : /* single contained element is not a collision */
1116 : 0 : curcoll--;
1117 : 0 : total_collisions += curcoll;
1118 [ # # ]: 0 : if (curcoll > max_collisions)
1119 : 0 : max_collisions = curcoll;
1120 : : }
1121 : :
1122 : : /* large enough to be worth freeing, even if just used for debugging */
7 1123 : 0 : pfree(collisions);
1124 : :
2739 1125 [ # # ]: 0 : if (tb->members > 0)
1126 : : {
1127 : 0 : fillfactor = tb->members / ((double) tb->size);
1128 : 0 : avg_chain_length = ((double) total_chain_length) / tb->members;
1129 : 0 : avg_collisions = ((double) total_collisions) / tb->members;
1130 : : }
1131 : : else
1132 : : {
1133 : 0 : fillfactor = 0;
1134 : 0 : avg_chain_length = 0;
1135 : 0 : avg_collisions = 0;
1136 : : }
1137 : :
928 peter@eisentraut.org 1138 [ # # ]: 0 : sh_log("size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %u, avg_collisions: %f",
1139 : : tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
1140 : : total_collisions, max_collisions, avg_collisions);
2739 andres@anarazel.de 1141 : 0 : }
1142 : :
1143 : : #endif /* SH_DEFINE */
1144 : :
1145 : :
1146 : : /* undefine external parameters, so next hash table can be defined */
1147 : : #undef SH_PREFIX
1148 : : #undef SH_KEY_TYPE
1149 : : #undef SH_KEY
1150 : : #undef SH_ELEMENT_TYPE
1151 : : #undef SH_HASH_KEY
1152 : : #undef SH_SCOPE
1153 : : #undef SH_DECLARE
1154 : : #undef SH_DEFINE
1155 : : #undef SH_GET_HASH
1156 : : #undef SH_STORE_HASH
1157 : : #undef SH_USE_NONDEFAULT_ALLOCATOR
1158 : : #undef SH_EQUAL
1159 : :
1160 : : /* undefine locally declared macros */
1161 : : #undef SH_MAKE_PREFIX
1162 : : #undef SH_MAKE_NAME
1163 : : #undef SH_MAKE_NAME_
1164 : : #undef SH_FILLFACTOR
1165 : : #undef SH_MAX_FILLFACTOR
1166 : : #undef SH_GROW_MAX_DIB
1167 : : #undef SH_GROW_MAX_MOVE
1168 : : #undef SH_GROW_MIN_FILLFACTOR
1169 : : #undef SH_MAX_SIZE
1170 : :
1171 : : /* types */
1172 : : #undef SH_TYPE
1173 : : #undef SH_STATUS
1174 : : #undef SH_STATUS_EMPTY
1175 : : #undef SH_STATUS_IN_USE
1176 : : #undef SH_ITERATOR
1177 : :
1178 : : /* external function names */
1179 : : #undef SH_CREATE
1180 : : #undef SH_DESTROY
1181 : : #undef SH_RESET
1182 : : #undef SH_INSERT
1183 : : #undef SH_INSERT_HASH
1184 : : #undef SH_DELETE_ITEM
1185 : : #undef SH_DELETE
1186 : : #undef SH_LOOKUP
1187 : : #undef SH_LOOKUP_HASH
1188 : : #undef SH_GROW
1189 : : #undef SH_START_ITERATE
1190 : : #undef SH_START_ITERATE_AT
1191 : : #undef SH_ITERATE
1192 : : #undef SH_ALLOCATE
1193 : : #undef SH_FREE
1194 : : #undef SH_STAT
1195 : :
1196 : : /* internal function names */
1197 : : #undef SH_COMPUTE_SIZE
1198 : : #undef SH_UPDATE_PARAMETERS
1199 : : #undef SH_COMPARE_KEYS
1200 : : #undef SH_INITIAL_BUCKET
1201 : : #undef SH_NEXT
1202 : : #undef SH_PREV
1203 : : #undef SH_DISTANCE_FROM_OPTIMAL
1204 : : #undef SH_ENTRY_HASH
1205 : : #undef SH_INSERT_HASH_INTERNAL
1206 : : #undef SH_LOOKUP_HASH_INTERNAL
|