Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * execGrouping.c
4 : * executor utility routines for grouping, hashing, and aggregation
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/execGrouping.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/parallel.h"
18 : #include "common/hashfn.h"
19 : #include "executor/executor.h"
20 : #include "miscadmin.h"
21 : #include "utils/lsyscache.h"
22 : #include "utils/memutils.h"
23 :
24 : static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
25 : static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
26 : const MinimalTuple tuple);
27 : static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
28 : TupleTableSlot *slot,
29 : bool *isnew, uint32 hash);
30 :
31 : /*
32 : * Define parameters for tuple hash table code generation. The interface is
33 : * *also* declared in execnodes.h (to generate the types, which are externally
34 : * visible).
35 : */
36 : #define SH_PREFIX tuplehash
37 : #define SH_ELEMENT_TYPE TupleHashEntryData
38 : #define SH_KEY_TYPE MinimalTuple
39 : #define SH_KEY firstTuple
40 : #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
41 : #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
42 : #define SH_SCOPE extern
43 : #define SH_STORE_HASH
44 : #define SH_GET_HASH(tb, a) a->hash
45 : #define SH_DEFINE
46 : #include "lib/simplehash.h"
47 :
48 :
49 : /*****************************************************************************
50 : * Utility routines for grouping tuples together
51 : *****************************************************************************/
52 :
53 : /*
54 : * execTuplesMatchPrepare
55 : * Build expression that can be evaluated using ExecQual(), returning
56 : * whether an ExprContext's inner/outer tuples are NOT DISTINCT
57 : */
58 : ExprState *
1879 andres 59 CBC 3450 : execTuplesMatchPrepare(TupleDesc desc,
60 : int numCols,
61 : const AttrNumber *keyColIdx,
62 : const Oid *eqOperators,
63 : const Oid *collations,
64 : PlanState *parent)
65 : {
66 3450 : Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
67 : int i;
68 : ExprState *expr;
69 :
70 3450 : if (numCols == 0)
71 36 : return NULL;
72 :
73 : /* lookup equality functions */
7394 tgl 74 7629 : for (i = 0; i < numCols; i++)
1879 andres 75 4215 : eqFunctions[i] = get_opcode(eqOperators[i]);
76 :
77 : /* build actual expression */
1606 78 3414 : expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
79 : numCols, keyColIdx, eqFunctions, collations,
80 : parent);
81 :
1879 82 3414 : return expr;
83 : }
84 :
85 : /*
86 : * execTuplesHashPrepare
87 : * Look up the equality and hashing functions needed for a TupleHashTable.
88 : *
89 : * This is similar to execTuplesMatchPrepare, but we also need to find the
90 : * hash functions associated with the equality operators. *eqFunctions and
91 : * *hashFunctions receive the palloc'd result arrays.
92 : *
93 : * Note: we expect that the given operators are not cross-type comparisons.
94 : */
95 : void
5933 tgl 96 4565 : execTuplesHashPrepare(int numCols,
97 : const Oid *eqOperators,
98 : Oid **eqFuncOids,
99 : FmgrInfo **hashFunctions)
100 : {
101 : int i;
102 :
1879 andres 103 4565 : *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
5933 tgl 104 4565 : *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
105 :
7231 106 13558 : for (i = 0; i < numCols; i++)
107 : {
5933 108 8993 : Oid eq_opr = eqOperators[i];
109 : Oid eq_function;
110 : Oid left_hash_function;
111 : Oid right_hash_function;
112 :
113 8993 : eq_function = get_opcode(eq_opr);
5913 114 8993 : if (!get_op_hash_functions(eq_opr,
115 : &left_hash_function, &right_hash_function))
7202 tgl 116 UBC 0 : elog(ERROR, "could not find hash function for hash operator %u",
117 : eq_opr);
118 : /* We're not supporting cross-type cases here */
5913 tgl 119 CBC 8993 : Assert(left_hash_function == right_hash_function);
1879 andres 120 8993 : (*eqFuncOids)[i] = eq_function;
5913 tgl 121 8993 : fmgr_info(right_hash_function, &(*hashFunctions)[i]);
122 : }
7394 123 4565 : }
124 :
125 :
126 : /*****************************************************************************
127 : * Utility routines for all-in-memory hash tables
128 : *
129 : * These routines build hash tables for grouping tuples together (eg, for
130 : * hash aggregation). There is one entry for each not-distinct set of tuples
131 : * presented.
132 : *****************************************************************************/
133 :
134 : /*
135 : * Construct an empty TupleHashTable
136 : *
137 : * numCols, keyColIdx: identify the tuple fields to use as lookup key
138 : * eqfunctions: equality comparison functions to use
139 : * hashfunctions: datatype-specific hashing functions to use
140 : * nbuckets: initial estimate of hashtable size
141 : * additionalsize: size of data stored in ->additional
142 : * metacxt: memory context for long-lived allocation, but not per-entry data
143 : * tablecxt: memory context in which to store table entries
144 : * tempcxt: short-lived context for evaluation hash and comparison functions
145 : *
146 : * The function arrays may be made with execTuplesHashPrepare(). Note they
147 : * are not cross-type functions, but expect to see the table datatype(s)
148 : * on both sides.
149 : *
150 : * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
151 : * storage that will live as long as the hashtable does.
152 : */
153 : TupleHashTable
1520 andres 154 4378 : BuildTupleHashTableExt(PlanState *parent,
155 : TupleDesc inputDesc,
156 : int numCols, AttrNumber *keyColIdx,
157 : const Oid *eqfuncoids,
158 : FmgrInfo *hashfunctions,
159 : Oid *collations,
160 : long nbuckets, Size additionalsize,
161 : MemoryContext metacxt,
162 : MemoryContext tablecxt,
163 : MemoryContext tempcxt,
164 : bool use_variable_hash_iv)
165 : {
166 : TupleHashTable hashtable;
2368 167 4378 : Size entrysize = sizeof(TupleHashEntryData) + additionalsize;
168 : Size hash_mem_limit;
169 : MemoryContext oldcontext;
170 : bool allow_jit;
171 :
7394 tgl 172 4378 : Assert(nbuckets > 0);
173 :
174 : /* Limit initial table size request to not more than hash_mem */
623 175 4378 : hash_mem_limit = get_hash_memory_limit() / entrysize;
176 4378 : if (nbuckets > hash_mem_limit)
177 12 : nbuckets = hash_mem_limit;
178 :
1520 andres 179 4378 : oldcontext = MemoryContextSwitchTo(metacxt);
180 :
181 4378 : hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
182 :
7394 tgl 183 4378 : hashtable->numCols = numCols;
184 4378 : hashtable->keyColIdx = keyColIdx;
5906 185 4378 : hashtable->tab_hash_funcs = hashfunctions;
1479 peter 186 4378 : hashtable->tab_collations = collations;
7394 tgl 187 4378 : hashtable->tablecxt = tablecxt;
188 4378 : hashtable->tempcxt = tempcxt;
189 4378 : hashtable->entrysize = entrysize;
6385 bruce 190 4378 : hashtable->tableslot = NULL; /* will be made on first lookup */
6598 tgl 191 4378 : hashtable->inputslot = NULL;
5906 192 4378 : hashtable->in_hash_funcs = NULL;
1879 andres 193 4378 : hashtable->cur_eq_func = NULL;
194 :
195 : /*
196 : * If parallelism is in use, even if the leader backend is performing the
197 : * scan itself, we don't want to create the hashtable exactly the same way
198 : * in all workers. As hashtables are iterated over in keyspace-order,
199 : * doing so in all processes in the same way is likely to lead to
200 : * "unbalanced" hashtables when the table size initially is
201 : * underestimated.
202 : */
2305 rhaas 203 4378 : if (use_variable_hash_iv)
1896 andres 204 339 : hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
205 : else
2305 rhaas 206 4039 : hashtable->hash_iv = 0;
207 :
1520 andres 208 4378 : hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
209 :
210 : /*
211 : * We copy the input tuple descriptor just for safety --- we assume all
212 : * input tuples will have equivalent descriptors.
213 : */
1606 214 4378 : hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
215 : &TTSOpsMinimalTuple);
216 :
217 : /*
218 : * If the old reset interface is used (i.e. BuildTupleHashTable, rather
219 : * than BuildTupleHashTableExt), allowing JIT would lead to the generated
220 : * functions to a) live longer than the query b) be re-generated each time
221 : * the table is being reset. Therefore prevent JIT from being used in that
222 : * case, by not providing a parent node (which prevents accessing the
223 : * JitContext in the EState).
224 : */
1288 225 4378 : allow_jit = metacxt != tablecxt;
226 :
227 : /* build comparator for all columns */
228 : /* XXX: should we support non-minimal tuples for the inputslot? */
1879 229 4378 : hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
230 : &TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
231 : numCols,
232 : keyColIdx, eqfuncoids, collations,
233 : allow_jit ? parent : NULL);
234 :
235 : /*
236 : * While not pretty, it's ok to not shut down this context, but instead
237 : * rely on the containing memory context being reset, as
238 : * ExecBuildGroupingEqual() only builds a very simple expression calling
239 : * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
240 : */
1520 241 4378 : hashtable->exprcontext = CreateStandaloneExprContext();
242 :
243 4378 : MemoryContextSwitchTo(oldcontext);
244 :
7394 tgl 245 4378 : return hashtable;
246 : }
247 :
248 : /*
249 : * BuildTupleHashTable is a backwards-compatibility wrapper for
250 : * BuildTupleHashTableExt(), that allocates the hashtable's metadata in
251 : * tablecxt. Note that hashtables created this way cannot be reset leak-free
252 : * with ResetTupleHashTable().
253 : */
254 : TupleHashTable
1520 andres 255 UBC 0 : BuildTupleHashTable(PlanState *parent,
256 : TupleDesc inputDesc,
257 : int numCols, AttrNumber *keyColIdx,
258 : const Oid *eqfuncoids,
259 : FmgrInfo *hashfunctions,
260 : Oid *collations,
261 : long nbuckets, Size additionalsize,
262 : MemoryContext tablecxt,
263 : MemoryContext tempcxt,
264 : bool use_variable_hash_iv)
265 : {
266 0 : return BuildTupleHashTableExt(parent,
267 : inputDesc,
268 : numCols, keyColIdx,
269 : eqfuncoids,
270 : hashfunctions,
271 : collations,
272 : nbuckets, additionalsize,
273 : tablecxt,
274 : tablecxt,
275 : tempcxt,
276 : use_variable_hash_iv);
277 : }
278 :
279 : /*
280 : * Reset contents of the hashtable to be empty, preserving all the non-content
281 : * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
282 : * also be reset, otherwise there will be leaks.
283 : */
284 : void
1520 andres 285 CBC 129117 : ResetTupleHashTable(TupleHashTable hashtable)
286 : {
287 129117 : tuplehash_reset(hashtable->hashtab);
288 129117 : }
289 :
290 : /*
291 : * Find or create a hashtable entry for the tuple group containing the
292 : * given tuple. The tuple must be the same type as the hashtable entries.
293 : *
294 : * If isnew is NULL, we do not create new entries; we return NULL if no
295 : * match is found.
296 : *
297 : * If hash is not NULL, we set it to the calculated hash value. This allows
298 : * callers access to the hash value even if no entry is returned.
299 : *
300 : * If isnew isn't NULL, then a new entry is created if no existing entry
301 : * matches. On return, *isnew is true if the entry is newly created,
302 : * false if it existed already. ->additional_data in the new entry has
303 : * been zeroed.
304 : */
305 : TupleHashEntry
7394 tgl 306 3233629 : LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
307 : bool *isnew, uint32 *hash)
308 : {
309 : TupleHashEntry entry;
310 : MemoryContext oldContext;
311 : uint32 local_hash;
312 :
313 : /* Need to run the hash functions in short-lived context */
314 3233629 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
315 :
316 : /* set up data needed by hash and match functions */
6598 317 3233629 : hashtable->inputslot = slot;
5906 318 3233629 : hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
1879 andres 319 3233629 : hashtable->cur_eq_func = hashtable->tab_eq_func;
320 :
987 jdavis 321 3233629 : local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
322 3233626 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
323 :
324 3233626 : if (hash != NULL)
325 2761321 : *hash = local_hash;
326 :
327 3233626 : Assert(entry == NULL || entry->hash == local_hash);
328 :
1158 329 3233626 : MemoryContextSwitchTo(oldContext);
330 :
331 3233626 : return entry;
332 : }
333 :
334 : /*
335 : * Compute the hash value for a tuple
336 : */
337 : uint32
1154 jdavis 338 UBC 0 : TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
339 : {
340 : MemoryContext oldContext;
341 : uint32 hash;
342 :
343 0 : hashtable->inputslot = slot;
344 0 : hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
345 :
346 : /* Need to run the hash functions in short-lived context */
347 0 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
348 :
349 0 : hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
350 :
351 0 : MemoryContextSwitchTo(oldContext);
352 :
353 0 : return hash;
354 : }
355 :
356 : /*
357 : * A variant of LookupTupleHashEntry for callers that have already computed
358 : * the hash value.
359 : */
360 : TupleHashEntry
1158 jdavis 361 CBC 337596 : LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
362 : bool *isnew, uint32 hash)
363 : {
364 : TupleHashEntry entry;
365 : MemoryContext oldContext;
366 :
367 : /* Need to run the hash functions in short-lived context */
368 337596 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
369 :
370 : /* set up data needed by hash and match functions */
371 337596 : hashtable->inputslot = slot;
372 337596 : hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
373 337596 : hashtable->cur_eq_func = hashtable->tab_eq_func;
374 :
375 337596 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
987 376 337596 : Assert(entry == NULL || entry->hash == hash);
377 :
7173 tgl 378 337596 : MemoryContextSwitchTo(oldContext);
379 :
380 337596 : return entry;
381 : }
382 :
383 : /*
384 : * Search for a hashtable entry matching the given tuple. No entry is
385 : * created if there's not a match. This is similar to the non-creating
386 : * case of LookupTupleHashEntry, except that it supports cross-type
387 : * comparisons, in which the given tuple is not of the same type as the
388 : * table entries. The caller must provide the hash functions to use for
389 : * the input tuple, as well as the equality functions, since these may be
390 : * different from the table's internal functions.
391 : */
392 : TupleHashEntry
5906 393 397070 : FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
394 : ExprState *eqcomp,
395 : FmgrInfo *hashfunctions)
396 : {
397 : TupleHashEntry entry;
398 : MemoryContext oldContext;
399 : MinimalTuple key;
400 :
401 : /* Need to run the hash functions in short-lived context */
402 397070 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
403 :
404 : /* Set up data needed by hash and match functions */
405 397070 : hashtable->inputslot = slot;
406 397070 : hashtable->in_hash_funcs = hashfunctions;
1879 andres 407 397070 : hashtable->cur_eq_func = eqcomp;
408 :
409 : /* Search the hash table */
2368 410 397070 : key = NULL; /* flag to reference inputslot */
411 397070 : entry = tuplehash_lookup(hashtable->hashtab, key);
5906 tgl 412 397070 : MemoryContextSwitchTo(oldContext);
413 :
414 397070 : return entry;
415 : }
416 :
417 : /*
418 : * If tuple is NULL, use the input slot instead. This convention avoids the
419 : * need to materialize virtual input tuples unless they actually need to get
420 : * copied into the table.
421 : *
422 : * Also, the caller must select an appropriate memory context for running
423 : * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
424 : */
425 : static uint32
1154 jdavis 426 3630699 : TupleHashTableHash_internal(struct tuplehash_hash *tb,
427 : const MinimalTuple tuple)
428 : {
2356 peter_e 429 3630699 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
7173 tgl 430 3630699 : int numCols = hashtable->numCols;
431 3630699 : AttrNumber *keyColIdx = hashtable->keyColIdx;
2305 rhaas 432 3630699 : uint32 hashkey = hashtable->hash_iv;
433 : TupleTableSlot *slot;
434 : FmgrInfo *hashfunctions;
435 : int i;
436 :
6598 tgl 437 3630699 : if (tuple == NULL)
438 : {
439 : /* Process the current input tuple for the table */
440 3630699 : slot = hashtable->inputslot;
5906 441 3630699 : hashfunctions = hashtable->in_hash_funcs;
442 : }
443 : else
444 : {
445 : /*
446 : * Process a tuple already stored in the table.
447 : *
448 : * (this case never actually occurs due to the way simplehash.h is
449 : * used, as the hash-value is stored in the entries)
450 : */
6598 tgl 451 UBC 0 : slot = hashtable->tableslot;
6129 452 0 : ExecStoreMinimalTuple(tuple, slot, false);
5906 453 0 : hashfunctions = hashtable->tab_hash_funcs;
454 : }
455 :
7394 tgl 456 CBC 9382450 : for (i = 0; i < numCols; i++)
457 : {
458 5751754 : AttrNumber att = keyColIdx[i];
459 : Datum attr;
460 : bool isNull;
461 :
462 : /* combine successive hashkeys by rotating */
413 john.naylor 463 5751754 : hashkey = pg_rotate_left32(hashkey, 1);
464 :
6598 tgl 465 5751754 : attr = slot_getattr(slot, att, &isNull);
466 :
7231 467 5751754 : if (!isNull) /* treat nulls as having hash key 0 */
468 : {
469 : uint32 hkey;
470 :
1479 peter 471 5642068 : hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
472 5642071 : hashtable->tab_collations[i],
473 : attr));
7231 tgl 474 5642068 : hashkey ^= hkey;
475 : }
476 : }
477 :
478 : /*
479 : * The way hashes are combined above, among each other and with the IV,
480 : * doesn't lead to good bit perturbation. As the IV's goal is to lead to
481 : * achieve that, perform a round of hashing of the combined hash -
482 : * resulting in near perfect perturbation.
483 : */
1896 andres 484 3630696 : return murmurhash32(hashkey);
485 : }
486 :
487 : /*
488 : * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
489 : * so that we can avoid switching the memory context multiple times for
490 : * LookupTupleHashEntry.
491 : *
492 : * NB: This function may or may not change the memory context. Caller is
493 : * expected to change it back.
494 : */
495 : static inline TupleHashEntry
1158 jdavis 496 3571222 : LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
497 : bool *isnew, uint32 hash)
498 : {
499 : TupleHashEntryData *entry;
500 : bool found;
501 : MinimalTuple key;
502 :
503 3571222 : key = NULL; /* flag to reference inputslot */
504 :
505 3571222 : if (isnew)
506 : {
507 2953945 : entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
508 :
509 2953945 : if (found)
510 : {
511 : /* found pre-existing entry */
512 2443289 : *isnew = false;
513 : }
514 : else
515 : {
516 : /* created new entry */
517 510656 : *isnew = true;
518 : /* zero caller data */
519 510656 : entry->additional = NULL;
520 510656 : MemoryContextSwitchTo(hashtable->tablecxt);
521 : /* Copy the first tuple into the table context */
522 510656 : entry->firstTuple = ExecCopySlotMinimalTuple(slot);
523 : }
524 : }
525 : else
526 : {
527 617277 : entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
528 : }
529 :
530 3571222 : return entry;
531 : }
532 :
533 : /*
534 : * See whether two tuples (presumably of the same hash value) match
535 : */
536 : static int
2368 andres 537 2742227 : TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
538 : {
539 : TupleTableSlot *slot1;
540 : TupleTableSlot *slot2;
2356 peter_e 541 2742227 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
1879 andres 542 2742227 : ExprContext *econtext = hashtable->exprcontext;
543 :
544 : /*
545 : * We assume that simplehash.h will only ever call us with the first
546 : * argument being an actual table entry, and the second argument being
547 : * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
548 : * could be supported too, but is not currently required.
549 : */
6598 tgl 550 2742227 : Assert(tuple1 != NULL);
551 2742227 : slot1 = hashtable->tableslot;
6129 552 2742227 : ExecStoreMinimalTuple(tuple1, slot1, false);
6598 553 2742227 : Assert(tuple2 == NULL);
554 2742227 : slot2 = hashtable->inputslot;
555 :
556 : /* For crosstype comparisons, the inputslot must be first */
1879 andres 557 2742227 : econtext->ecxt_innertuple = slot2;
558 2742227 : econtext->ecxt_outertuple = slot1;
559 2742227 : return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
560 : }
|