Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hashfunc.c
4 : * Support functions for hash access method.
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hashfunc.c
12 : *
13 : * NOTES
14 : * These functions are stored in pg_amproc. For each operator class
15 : * defined for hash indexes, they compute the hash value of the argument.
16 : *
17 : * Additional hash functions appear in /utils/adt/ files for various
18 : * specialized datatypes.
19 : *
20 : * It is expected that every bit of a hash function's 32-bit result is
21 : * as random as every other; failure to ensure this is likely to lead
22 : * to poor performance of hash joins, for example. In most cases a hash
23 : * function should use hash_any() or its variant hash_uint32().
24 : *-------------------------------------------------------------------------
25 : */
26 :
27 : #include "postgres.h"
28 :
29 : #include "access/hash.h"
30 : #include "catalog/pg_collation.h"
31 : #include "common/hashfn.h"
32 : #include "utils/builtins.h"
33 : #include "utils/float.h"
34 : #include "utils/pg_locale.h"
35 : #include "varatt.h"
36 :
37 : /*
38 : * Datatype-specific hash functions.
39 : *
40 : * These support both hash indexes and hash joins.
41 : *
42 : * NOTE: some of these are also used by catcache operations, without
43 : * any direct connection to hash indexes. Also, the common hash_any
44 : * routine is also used by dynahash tables.
45 : */
46 :
47 : /* Note: this is used for both "char" and boolean datatypes */
48 : Datum
8329 tgl 49 GIC 235744 : hashchar(PG_FUNCTION_ARGS)
8329 tgl 50 ECB : {
5791 tgl 51 GIC 235744 : return hash_uint32((int32) PG_GETARG_CHAR(0));
8329 tgl 52 ECB : }
53 :
54 : Datum
2047 rhaas 55 GIC 33 : hashcharextended(PG_FUNCTION_ARGS)
2047 rhaas 56 ECB : {
2047 rhaas 57 GIC 33 : return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
2047 rhaas 58 ECB : }
59 :
60 : Datum
8343 tgl 61 GIC 229705 : hashint2(PG_FUNCTION_ARGS)
9770 scrappy 62 ECB : {
5791 tgl 63 GIC 229705 : return hash_uint32((int32) PG_GETARG_INT16(0));
9770 scrappy 64 ECB : }
65 :
66 : Datum
2047 rhaas 67 GIC 24 : hashint2extended(PG_FUNCTION_ARGS)
2047 rhaas 68 ECB : {
2047 rhaas 69 GIC 24 : return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
2047 rhaas 70 ECB : }
71 :
72 : Datum
8343 tgl 73 GIC 12937844 : hashint4(PG_FUNCTION_ARGS)
9770 scrappy 74 ECB : {
5791 tgl 75 GIC 12937844 : return hash_uint32(PG_GETARG_INT32(0));
9770 scrappy 76 ECB : }
77 :
78 : Datum
2047 rhaas 79 GIC 103557 : hashint4extended(PG_FUNCTION_ARGS)
2047 rhaas 80 ECB : {
2047 rhaas 81 GIC 103557 : return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
2047 rhaas 82 ECB : }
83 :
84 : Datum
8343 tgl 85 GIC 307240 : hashint8(PG_FUNCTION_ARGS)
8792 bruce 86 ECB : {
87 : /*
88 : * The idea here is to produce a hash value compatible with the values
89 : * produced by hashint4 and hashint2 for logically equal inputs; this is
90 : * necessary to support cross-type hash joins across these input types.
91 : * Since all three types are signed, we can xor the high half of the int8
92 : * value if the sign is positive, or the complement of the high half when
93 : * the sign is negative.
94 : */
6874 tgl 95 GIC 307240 : int64 val = PG_GETARG_INT64(0);
6874 tgl 96 CBC 307240 : uint32 lohalf = (uint32) val;
97 307240 : uint32 hihalf = (uint32) (val >> 32);
6874 tgl 98 ECB :
6874 tgl 99 GIC 307240 : lohalf ^= (val >= 0) ? hihalf : ~hihalf;
6874 tgl 100 ECB :
5791 tgl 101 GIC 307240 : return hash_uint32(lohalf);
8329 tgl 102 ECB : }
103 :
104 : Datum
2047 rhaas 105 GIC 186 : hashint8extended(PG_FUNCTION_ARGS)
2047 rhaas 106 ECB : {
107 : /* Same approach as hashint8 */
2047 rhaas 108 GIC 186 : int64 val = PG_GETARG_INT64(0);
2047 rhaas 109 CBC 186 : uint32 lohalf = (uint32) val;
110 186 : uint32 hihalf = (uint32) (val >> 32);
2047 rhaas 111 ECB :
2047 rhaas 112 GIC 186 : lohalf ^= (val >= 0) ? hihalf : ~hihalf;
2047 rhaas 113 ECB :
2047 rhaas 114 GIC 186 : return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
2047 rhaas 115 ECB : }
116 :
117 : Datum
8329 tgl 118 GIC 7662999 : hashoid(PG_FUNCTION_ARGS)
8329 tgl 119 ECB : {
5791 tgl 120 GIC 7662999 : return hash_uint32((uint32) PG_GETARG_OID(0));
8792 bruce 121 ECB : }
122 :
123 : Datum
2047 rhaas 124 GIC 36 : hashoidextended(PG_FUNCTION_ARGS)
2047 rhaas 125 ECB : {
2047 rhaas 126 GIC 36 : return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
2047 rhaas 127 ECB : }
128 :
129 : Datum
5851 tgl 130 GIC 1571 : hashenum(PG_FUNCTION_ARGS)
5851 tgl 131 ECB : {
5791 tgl 132 GIC 1571 : return hash_uint32((uint32) PG_GETARG_OID(0));
5851 tgl 133 ECB : }
134 :
135 : Datum
2047 rhaas 136 GIC 3018 : hashenumextended(PG_FUNCTION_ARGS)
2047 rhaas 137 ECB : {
2047 rhaas 138 GIC 3018 : return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
2047 rhaas 139 ECB : }
140 :
141 : Datum
8343 tgl 142 GIC 21155 : hashfloat4(PG_FUNCTION_ARGS)
9770 scrappy 143 ECB : {
8343 tgl 144 GIC 21155 : float4 key = PG_GETARG_FLOAT4(0);
5951 tgl 145 ECB : float8 key8;
146 :
147 : /*
148 : * On IEEE-float machines, minus zero and zero have different bit patterns
149 : * but should compare as equal. We must ensure that they have the same
150 : * hash value, which is most reliably done this way:
151 : */
7231 tgl 152 GIC 21155 : if (key == (float4) 0)
7231 tgl 153 CBC 12 : PG_RETURN_UINT32(0);
7231 tgl 154 ECB :
155 : /*
156 : * To support cross-type hashing of float8 and float4, we want to return
157 : * the same hash value hashfloat8 would produce for an equal float8 value.
158 : * So, widen the value to float8 and hash that. (We must do this rather
159 : * than have hashfloat8 try to narrow its value to float4; that could fail
160 : * on overflow.)
161 : */
5951 tgl 162 GIC 21143 : key8 = key;
5951 tgl 163 ECB :
164 : /*
165 : * Similarly, NaNs can have different bit patterns but they should all
166 : * compare as equal. For backwards-compatibility reasons we force them to
167 : * have the hash value of a standard float8 NaN. (You'd think we could
168 : * replace key with a float4 NaN and then widen it; but on some old
169 : * platforms, that way produces a different bit pattern.)
170 : */
582 tgl 171 GIC 21143 : if (isnan(key8))
582 tgl 172 CBC 9 : key8 = get_float8_nan();
582 tgl 173 ECB :
5951 tgl 174 GIC 21143 : return hash_any((unsigned char *) &key8, sizeof(key8));
9345 bruce 175 ECB : }
176 :
177 : Datum
2047 rhaas 178 GIC 36 : hashfloat4extended(PG_FUNCTION_ARGS)
2047 rhaas 179 ECB : {
2047 rhaas 180 GIC 36 : float4 key = PG_GETARG_FLOAT4(0);
2047 rhaas 181 CBC 36 : uint64 seed = PG_GETARG_INT64(1);
2047 rhaas 182 ECB : float8 key8;
183 :
184 : /* Same approach as hashfloat4 */
2047 rhaas 185 GIC 36 : if (key == (float4) 0)
2047 rhaas 186 CBC 6 : PG_RETURN_UINT64(seed);
187 30 : key8 = key;
582 tgl 188 30 : if (isnan(key8))
582 tgl 189 LBC 0 : key8 = get_float8_nan();
2047 rhaas 190 EUB :
2047 rhaas 191 GIC 30 : return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
2047 rhaas 192 ECB : }
193 :
194 : Datum
8343 tgl 195 GIC 61783 : hashfloat8(PG_FUNCTION_ARGS)
9770 scrappy 196 ECB : {
8343 tgl 197 GIC 61783 : float8 key = PG_GETARG_FLOAT8(0);
9770 scrappy 198 ECB :
199 : /*
200 : * On IEEE-float machines, minus zero and zero have different bit patterns
201 : * but should compare as equal. We must ensure that they have the same
202 : * hash value, which is most reliably done this way:
203 : */
7231 tgl 204 GIC 61783 : if (key == (float8) 0)
7231 tgl 205 CBC 355 : PG_RETURN_UINT32(0);
7231 tgl 206 ECB :
207 : /*
208 : * Similarly, NaNs can have different bit patterns but they should all
209 : * compare as equal. For backwards-compatibility reasons we force them to
210 : * have the hash value of a standard NaN.
211 : */
584 tgl 212 GIC 61428 : if (isnan(key))
584 tgl 213 CBC 9 : key = get_float8_nan();
584 tgl 214 ECB :
7701 tgl 215 GIC 61428 : return hash_any((unsigned char *) &key, sizeof(key));
9770 scrappy 216 ECB : }
217 :
218 : Datum
2047 rhaas 219 GIC 36 : hashfloat8extended(PG_FUNCTION_ARGS)
2047 rhaas 220 ECB : {
2047 rhaas 221 GIC 36 : float8 key = PG_GETARG_FLOAT8(0);
2047 rhaas 222 CBC 36 : uint64 seed = PG_GETARG_INT64(1);
2047 rhaas 223 ECB :
224 : /* Same approach as hashfloat8 */
2047 rhaas 225 GIC 36 : if (key == (float8) 0)
2047 rhaas 226 CBC 6 : PG_RETURN_UINT64(seed);
584 tgl 227 30 : if (isnan(key))
584 tgl 228 LBC 0 : key = get_float8_nan();
2047 rhaas 229 EUB :
2047 rhaas 230 GIC 30 : return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
2047 rhaas 231 ECB : }
232 :
233 : Datum
8343 tgl 234 GIC 460534 : hashoidvector(PG_FUNCTION_ARGS)
8999 bruce 235 ECB : {
6585 tgl 236 GIC 460534 : oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
8999 bruce 237 ECB :
6585 tgl 238 GIC 460534 : return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
8448 tgl 239 ECB : }
240 :
241 : Datum
2047 rhaas 242 GIC 30 : hashoidvectorextended(PG_FUNCTION_ARGS)
2047 rhaas 243 ECB : {
2047 rhaas 244 GIC 30 : oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
2047 rhaas 245 ECB :
2047 rhaas 246 GIC 60 : return hash_any_extended((unsigned char *) key->values,
2047 rhaas 247 CBC 30 : key->dim1 * sizeof(Oid),
248 30 : PG_GETARG_INT64(1));
2047 rhaas 249 ECB : }
250 :
251 : Datum
8329 tgl 252 GIC 249601 : hashname(PG_FUNCTION_ARGS)
9770 scrappy 253 ECB : {
8053 bruce 254 GIC 249601 : char *key = NameStr(*PG_GETARG_NAME(0));
9770 scrappy 255 ECB :
2295 tgl 256 GIC 249601 : return hash_any((unsigned char *) key, strlen(key));
9770 scrappy 257 ECB : }
258 :
259 : Datum
2047 rhaas 260 GIC 30 : hashnameextended(PG_FUNCTION_ARGS)
2047 rhaas 261 ECB : {
2047 rhaas 262 GIC 30 : char *key = NameStr(*PG_GETARG_NAME(0));
2047 rhaas 263 ECB :
2047 rhaas 264 GIC 30 : return hash_any_extended((unsigned char *) key, strlen(key),
2047 rhaas 265 CBC 30 : PG_GETARG_INT64(1));
2047 rhaas 266 ECB : }
267 :
268 : Datum
7231 tgl 269 GIC 1007924 : hashtext(PG_FUNCTION_ARGS)
7231 tgl 270 ECB : {
5679 tgl 271 GIC 1007924 : text *key = PG_GETARG_TEXT_PP(0);
1479 peter 272 CBC 1007924 : Oid collid = PG_GET_COLLATION();
1418 tgl 273 1007924 : pg_locale_t mylocale = 0;
7231 tgl 274 ECB : Datum result;
275 :
1479 peter 276 GIC 1007924 : if (!collid)
1479 peter 277 CBC 3 : ereport(ERROR,
1479 peter 278 ECB : (errcode(ERRCODE_INDETERMINATE_COLLATION),
279 : errmsg("could not determine which collation to use for string hashing"),
280 : errhint("Use the COLLATE clause to set the collation explicitly.")));
281 :
444 peter 282 GIC 1007921 : if (!lc_collate_is_c(collid))
1479 peter 283 CBC 730129 : mylocale = pg_newlocale_from_collation(collid);
1479 peter 284 ECB :
45 jdavis 285 GNC 1007921 : if (pg_locale_deterministic(mylocale))
1479 peter 286 ECB : {
1479 peter 287 GIC 1007804 : result = hash_any((unsigned char *) VARDATA_ANY(key),
1479 peter 288 CBC 1007804 : VARSIZE_ANY_EXHDR(key));
1479 peter 289 ECB : }
290 : else
291 : {
292 : Size bsize, rsize;
293 : char *buf;
45 jdavis 294 GNC 117 : const char *keydata = VARDATA_ANY(key);
295 117 : size_t keylen = VARSIZE_ANY_EXHDR(key);
45 jdavis 296 ECB :
297 :
45 jdavis 298 GNC 117 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
299 117 : buf = palloc(bsize + 1);
300 :
301 117 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
302 117 : if (rsize != bsize)
45 jdavis 303 UNC 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
304 :
305 : /*
306 : * In principle, there's no reason to include the terminating NUL
307 : * character in the hash, but it was done before and the behavior
308 : * must be preserved.
309 : */
45 jdavis 310 GNC 117 : result = hash_any((uint8_t *) buf, bsize + 1);
311 :
312 117 : pfree(buf);
1479 peter 313 ECB : }
314 :
7231 tgl 315 : /* Avoid leaking memory for toasted inputs */
7231 tgl 316 GIC 1007921 : PG_FREE_IF_COPY(key, 0);
317 :
318 1007921 : return result;
7231 tgl 319 ECB : }
320 :
2047 rhaas 321 : Datum
2047 rhaas 322 CBC 2034 : hashtextextended(PG_FUNCTION_ARGS)
2047 rhaas 323 ECB : {
2047 rhaas 324 GIC 2034 : text *key = PG_GETARG_TEXT_PP(0);
1479 peter 325 2034 : Oid collid = PG_GET_COLLATION();
1418 tgl 326 CBC 2034 : pg_locale_t mylocale = 0;
2047 rhaas 327 EUB : Datum result;
328 :
1479 peter 329 GIC 2034 : if (!collid)
1479 peter 330 UIC 0 : ereport(ERROR,
331 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1479 peter 332 ECB : errmsg("could not determine which collation to use for string hashing"),
333 : errhint("Use the COLLATE clause to set the collation explicitly.")));
334 :
444 peter 335 CBC 2034 : if (!lc_collate_is_c(collid))
1479 peter 336 GIC 2034 : mylocale = pg_newlocale_from_collation(collid);
1479 peter 337 ECB :
45 jdavis 338 GNC 2034 : if (pg_locale_deterministic(mylocale))
1479 peter 339 ECB : {
1479 peter 340 GIC 2022 : result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
341 2022 : VARSIZE_ANY_EXHDR(key),
342 2022 : PG_GETARG_INT64(1));
343 : }
344 : else
1479 peter 345 ECB : {
346 : Size bsize, rsize;
347 : char *buf;
45 jdavis 348 GNC 12 : const char *keydata = VARDATA_ANY(key);
349 12 : size_t keylen = VARSIZE_ANY_EXHDR(key);
45 jdavis 350 EUB :
45 jdavis 351 GNC 12 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
352 12 : buf = palloc(bsize + 1);
353 :
354 12 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
355 12 : if (rsize != bsize)
45 jdavis 356 UNC 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
357 :
358 : /*
359 : * In principle, there's no reason to include the terminating NUL
360 : * character in the hash, but it was done before and the behavior
361 : * must be preserved.
362 : */
45 jdavis 363 GNC 12 : result = hash_any_extended((uint8_t *) buf, bsize + 1,
364 12 : PG_GETARG_INT64(1));
365 :
366 12 : pfree(buf);
367 : }
368 :
2047 rhaas 369 GIC 2034 : PG_FREE_IF_COPY(key, 0);
370 :
371 2034 : return result;
2047 rhaas 372 ECB : }
373 :
8329 tgl 374 : /*
375 : * hashvarlena() can be used for any varlena datatype in which there are
376 : * no non-significant bits, ie, distinct bitpatterns never compare as equal.
377 : */
8343 378 : Datum
8329 tgl 379 GIC 3069 : hashvarlena(PG_FUNCTION_ARGS)
380 : {
5679 tgl 381 CBC 3069 : struct varlena *key = PG_GETARG_VARLENA_PP(0);
382 : Datum result;
9345 bruce 383 ECB :
5679 tgl 384 GIC 3069 : result = hash_any((unsigned char *) VARDATA_ANY(key),
385 3069 : VARSIZE_ANY_EXHDR(key));
386 :
8157 tgl 387 EUB : /* Avoid leaking memory for toasted inputs */
8157 tgl 388 GIC 3069 : PG_FREE_IF_COPY(key, 0);
8157 tgl 389 EUB :
8157 tgl 390 GIC 3069 : return result;
391 : }
9770 scrappy 392 EUB :
2047 rhaas 393 : Datum
2047 rhaas 394 UBC 0 : hashvarlenaextended(PG_FUNCTION_ARGS)
395 : {
396 0 : struct varlena *key = PG_GETARG_VARLENA_PP(0);
397 : Datum result;
2047 rhaas 398 EUB :
2047 rhaas 399 UIC 0 : result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
400 0 : VARSIZE_ANY_EXHDR(key),
401 0 : PG_GETARG_INT64(1));
402 :
403 0 : PG_FREE_IF_COPY(key, 0);
404 :
405 0 : return result;
406 : }
|