LCOV - differential code coverage report
Current view: top level - src/backend/access/hash - hashfunc.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 91.3 % 138 126 2 2 6 2 80 19 27 9 79 1 18
Current Date: 2023-04-08 15:15:32 Functions: 95.8 % 24 23 1 23 1 23
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * hashfunc.c
       4                 :  *    Support functions for hash access method.
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/access/hash/hashfunc.c
      12                 :  *
      13                 :  * NOTES
      14                 :  *    These functions are stored in pg_amproc.  For each operator class
      15                 :  *    defined for hash indexes, they compute the hash value of the argument.
      16                 :  *
      17                 :  *    Additional hash functions appear in /utils/adt/ files for various
      18                 :  *    specialized datatypes.
      19                 :  *
      20                 :  *    It is expected that every bit of a hash function's 32-bit result is
      21                 :  *    as random as every other; failure to ensure this is likely to lead
      22                 :  *    to poor performance of hash joins, for example.  In most cases a hash
      23                 :  *    function should use hash_any() or its variant hash_uint32().
      24                 :  *-------------------------------------------------------------------------
      25                 :  */
      26                 : 
      27                 : #include "postgres.h"
      28                 : 
      29                 : #include "access/hash.h"
      30                 : #include "catalog/pg_collation.h"
      31                 : #include "common/hashfn.h"
      32                 : #include "utils/builtins.h"
      33                 : #include "utils/float.h"
      34                 : #include "utils/pg_locale.h"
      35                 : #include "varatt.h"
      36                 : 
      37                 : /*
      38                 :  * Datatype-specific hash functions.
      39                 :  *
      40                 :  * These support both hash indexes and hash joins.
      41                 :  *
      42                 :  * NOTE: some of these are also used by catcache operations, without
      43                 :  * any direct connection to hash indexes.  Also, the common hash_any
      44                 :  * routine is also used by dynahash tables.
      45                 :  */
      46                 : 
      47                 : /* Note: this is used for both "char" and boolean datatypes */
      48                 : Datum
      49 GIC      235744 : hashchar(PG_FUNCTION_ARGS)
      50 ECB             : {
      51 GIC      235744 :     return hash_uint32((int32) PG_GETARG_CHAR(0));
      52 ECB             : }
      53                 : 
      54                 : Datum
      55 GIC          33 : hashcharextended(PG_FUNCTION_ARGS)
      56 ECB             : {
      57 GIC          33 :     return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
      58 ECB             : }
      59                 : 
      60                 : Datum
      61 GIC      229705 : hashint2(PG_FUNCTION_ARGS)
      62 ECB             : {
      63 GIC      229705 :     return hash_uint32((int32) PG_GETARG_INT16(0));
      64 ECB             : }
      65                 : 
      66                 : Datum
      67 GIC          24 : hashint2extended(PG_FUNCTION_ARGS)
      68 ECB             : {
      69 GIC          24 :     return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
      70 ECB             : }
      71                 : 
      72                 : Datum
      73 GIC    12937844 : hashint4(PG_FUNCTION_ARGS)
      74 ECB             : {
      75 GIC    12937844 :     return hash_uint32(PG_GETARG_INT32(0));
      76 ECB             : }
      77                 : 
      78                 : Datum
      79 GIC      103557 : hashint4extended(PG_FUNCTION_ARGS)
      80 ECB             : {
      81 GIC      103557 :     return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
      82 ECB             : }
      83                 : 
      84                 : Datum
      85 GIC      307240 : hashint8(PG_FUNCTION_ARGS)
      86 ECB             : {
      87                 :     /*
      88                 :      * The idea here is to produce a hash value compatible with the values
      89                 :      * produced by hashint4 and hashint2 for logically equal inputs; this is
      90                 :      * necessary to support cross-type hash joins across these input types.
      91                 :      * Since all three types are signed, we can xor the high half of the int8
      92                 :      * value if the sign is positive, or the complement of the high half when
      93                 :      * the sign is negative.
      94                 :      */
      95 GIC      307240 :     int64       val = PG_GETARG_INT64(0);
      96 CBC      307240 :     uint32      lohalf = (uint32) val;
      97          307240 :     uint32      hihalf = (uint32) (val >> 32);
      98 ECB             : 
      99 GIC      307240 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
     100 ECB             : 
     101 GIC      307240 :     return hash_uint32(lohalf);
     102 ECB             : }
     103                 : 
     104                 : Datum
     105 GIC         186 : hashint8extended(PG_FUNCTION_ARGS)
     106 ECB             : {
     107                 :     /* Same approach as hashint8 */
     108 GIC         186 :     int64       val = PG_GETARG_INT64(0);
     109 CBC         186 :     uint32      lohalf = (uint32) val;
     110             186 :     uint32      hihalf = (uint32) (val >> 32);
     111 ECB             : 
     112 GIC         186 :     lohalf ^= (val >= 0) ? hihalf : ~hihalf;
     113 ECB             : 
     114 GIC         186 :     return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
     115 ECB             : }
     116                 : 
     117                 : Datum
     118 GIC     7662999 : hashoid(PG_FUNCTION_ARGS)
     119 ECB             : {
     120 GIC     7662999 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     121 ECB             : }
     122                 : 
     123                 : Datum
     124 GIC          36 : hashoidextended(PG_FUNCTION_ARGS)
     125 ECB             : {
     126 GIC          36 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     127 ECB             : }
     128                 : 
     129                 : Datum
     130 GIC        1571 : hashenum(PG_FUNCTION_ARGS)
     131 ECB             : {
     132 GIC        1571 :     return hash_uint32((uint32) PG_GETARG_OID(0));
     133 ECB             : }
     134                 : 
     135                 : Datum
     136 GIC        3018 : hashenumextended(PG_FUNCTION_ARGS)
     137 ECB             : {
     138 GIC        3018 :     return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     139 ECB             : }
     140                 : 
     141                 : Datum
     142 GIC       21155 : hashfloat4(PG_FUNCTION_ARGS)
     143 ECB             : {
     144 GIC       21155 :     float4      key = PG_GETARG_FLOAT4(0);
     145 ECB             :     float8      key8;
     146                 : 
     147                 :     /*
     148                 :      * On IEEE-float machines, minus zero and zero have different bit patterns
     149                 :      * but should compare as equal.  We must ensure that they have the same
     150                 :      * hash value, which is most reliably done this way:
     151                 :      */
     152 GIC       21155 :     if (key == (float4) 0)
     153 CBC          12 :         PG_RETURN_UINT32(0);
     154 ECB             : 
     155                 :     /*
     156                 :      * To support cross-type hashing of float8 and float4, we want to return
     157                 :      * the same hash value hashfloat8 would produce for an equal float8 value.
     158                 :      * So, widen the value to float8 and hash that.  (We must do this rather
     159                 :      * than have hashfloat8 try to narrow its value to float4; that could fail
     160                 :      * on overflow.)
     161                 :      */
     162 GIC       21143 :     key8 = key;
     163 ECB             : 
     164                 :     /*
     165                 :      * Similarly, NaNs can have different bit patterns but they should all
     166                 :      * compare as equal.  For backwards-compatibility reasons we force them to
     167                 :      * have the hash value of a standard float8 NaN.  (You'd think we could
     168                 :      * replace key with a float4 NaN and then widen it; but on some old
     169                 :      * platforms, that way produces a different bit pattern.)
     170                 :      */
     171 GIC       21143 :     if (isnan(key8))
     172 CBC           9 :         key8 = get_float8_nan();
     173 ECB             : 
     174 GIC       21143 :     return hash_any((unsigned char *) &key8, sizeof(key8));
     175 ECB             : }
     176                 : 
     177                 : Datum
     178 GIC          36 : hashfloat4extended(PG_FUNCTION_ARGS)
     179 ECB             : {
     180 GIC          36 :     float4      key = PG_GETARG_FLOAT4(0);
     181 CBC          36 :     uint64      seed = PG_GETARG_INT64(1);
     182 ECB             :     float8      key8;
     183                 : 
     184                 :     /* Same approach as hashfloat4 */
     185 GIC          36 :     if (key == (float4) 0)
     186 CBC           6 :         PG_RETURN_UINT64(seed);
     187              30 :     key8 = key;
     188              30 :     if (isnan(key8))
     189 LBC           0 :         key8 = get_float8_nan();
     190 EUB             : 
     191 GIC          30 :     return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
     192 ECB             : }
     193                 : 
     194                 : Datum
     195 GIC       61783 : hashfloat8(PG_FUNCTION_ARGS)
     196 ECB             : {
     197 GIC       61783 :     float8      key = PG_GETARG_FLOAT8(0);
     198 ECB             : 
     199                 :     /*
     200                 :      * On IEEE-float machines, minus zero and zero have different bit patterns
     201                 :      * but should compare as equal.  We must ensure that they have the same
     202                 :      * hash value, which is most reliably done this way:
     203                 :      */
     204 GIC       61783 :     if (key == (float8) 0)
     205 CBC         355 :         PG_RETURN_UINT32(0);
     206 ECB             : 
     207                 :     /*
     208                 :      * Similarly, NaNs can have different bit patterns but they should all
     209                 :      * compare as equal.  For backwards-compatibility reasons we force them to
     210                 :      * have the hash value of a standard NaN.
     211                 :      */
     212 GIC       61428 :     if (isnan(key))
     213 CBC           9 :         key = get_float8_nan();
     214 ECB             : 
     215 GIC       61428 :     return hash_any((unsigned char *) &key, sizeof(key));
     216 ECB             : }
     217                 : 
     218                 : Datum
     219 GIC          36 : hashfloat8extended(PG_FUNCTION_ARGS)
     220 ECB             : {
     221 GIC          36 :     float8      key = PG_GETARG_FLOAT8(0);
     222 CBC          36 :     uint64      seed = PG_GETARG_INT64(1);
     223 ECB             : 
     224                 :     /* Same approach as hashfloat8 */
     225 GIC          36 :     if (key == (float8) 0)
     226 CBC           6 :         PG_RETURN_UINT64(seed);
     227              30 :     if (isnan(key))
     228 LBC           0 :         key = get_float8_nan();
     229 EUB             : 
     230 GIC          30 :     return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
     231 ECB             : }
     232                 : 
     233                 : Datum
     234 GIC      460534 : hashoidvector(PG_FUNCTION_ARGS)
     235 ECB             : {
     236 GIC      460534 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     237 ECB             : 
     238 GIC      460534 :     return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
     239 ECB             : }
     240                 : 
     241                 : Datum
     242 GIC          30 : hashoidvectorextended(PG_FUNCTION_ARGS)
     243 ECB             : {
     244 GIC          30 :     oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     245 ECB             : 
     246 GIC          60 :     return hash_any_extended((unsigned char *) key->values,
     247 CBC          30 :                              key->dim1 * sizeof(Oid),
     248              30 :                              PG_GETARG_INT64(1));
     249 ECB             : }
     250                 : 
     251                 : Datum
     252 GIC      249601 : hashname(PG_FUNCTION_ARGS)
     253 ECB             : {
     254 GIC      249601 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     255 ECB             : 
     256 GIC      249601 :     return hash_any((unsigned char *) key, strlen(key));
     257 ECB             : }
     258                 : 
     259                 : Datum
     260 GIC          30 : hashnameextended(PG_FUNCTION_ARGS)
     261 ECB             : {
     262 GIC          30 :     char       *key = NameStr(*PG_GETARG_NAME(0));
     263 ECB             : 
     264 GIC          30 :     return hash_any_extended((unsigned char *) key, strlen(key),
     265 CBC          30 :                              PG_GETARG_INT64(1));
     266 ECB             : }
     267                 : 
     268                 : Datum
     269 GIC     1007924 : hashtext(PG_FUNCTION_ARGS)
     270 ECB             : {
     271 GIC     1007924 :     text       *key = PG_GETARG_TEXT_PP(0);
     272 CBC     1007924 :     Oid         collid = PG_GET_COLLATION();
     273         1007924 :     pg_locale_t mylocale = 0;
     274 ECB             :     Datum       result;
     275                 : 
     276 GIC     1007924 :     if (!collid)
     277 CBC           3 :         ereport(ERROR,
     278 ECB             :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     279                 :                  errmsg("could not determine which collation to use for string hashing"),
     280                 :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     281                 : 
     282 GIC     1007921 :     if (!lc_collate_is_c(collid))
     283 CBC      730129 :         mylocale = pg_newlocale_from_collation(collid);
     284 ECB             : 
     285 GNC     1007921 :     if (pg_locale_deterministic(mylocale))
     286 ECB             :     {
     287 GIC     1007804 :         result = hash_any((unsigned char *) VARDATA_ANY(key),
     288 CBC     1007804 :                           VARSIZE_ANY_EXHDR(key));
     289 ECB             :     }
     290                 :     else
     291                 :     {
     292                 :         Size        bsize, rsize;
     293                 :         char       *buf;
     294 GNC         117 :         const char *keydata = VARDATA_ANY(key);
     295             117 :         size_t      keylen = VARSIZE_ANY_EXHDR(key);
     296 ECB             : 
     297                 : 
     298 GNC         117 :         bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     299             117 :         buf = palloc(bsize + 1);
     300                 : 
     301             117 :         rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     302             117 :         if (rsize != bsize)
     303 UNC           0 :             elog(ERROR, "pg_strnxfrm() returned unexpected result");
     304                 : 
     305                 :         /*
     306                 :          * In principle, there's no reason to include the terminating NUL
     307                 :          * character in the hash, but it was done before and the behavior
     308                 :          * must be preserved.
     309                 :          */
     310 GNC         117 :         result = hash_any((uint8_t *) buf, bsize + 1);
     311                 : 
     312             117 :         pfree(buf);
     313 ECB             :     }
     314                 : 
     315                 :     /* Avoid leaking memory for toasted inputs */
     316 GIC     1007921 :     PG_FREE_IF_COPY(key, 0);
     317                 : 
     318         1007921 :     return result;
     319 ECB             : }
     320                 : 
     321                 : Datum
     322 CBC        2034 : hashtextextended(PG_FUNCTION_ARGS)
     323 ECB             : {
     324 GIC        2034 :     text       *key = PG_GETARG_TEXT_PP(0);
     325            2034 :     Oid         collid = PG_GET_COLLATION();
     326 CBC        2034 :     pg_locale_t mylocale = 0;
     327 EUB             :     Datum       result;
     328                 : 
     329 GIC        2034 :     if (!collid)
     330 UIC           0 :         ereport(ERROR,
     331                 :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     332 ECB             :                  errmsg("could not determine which collation to use for string hashing"),
     333                 :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     334                 : 
     335 CBC        2034 :     if (!lc_collate_is_c(collid))
     336 GIC        2034 :         mylocale = pg_newlocale_from_collation(collid);
     337 ECB             : 
     338 GNC        2034 :     if (pg_locale_deterministic(mylocale))
     339 ECB             :     {
     340 GIC        2022 :         result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     341            2022 :                                    VARSIZE_ANY_EXHDR(key),
     342            2022 :                                    PG_GETARG_INT64(1));
     343                 :     }
     344                 :     else
     345 ECB             :     {
     346                 :         Size        bsize, rsize;
     347                 :         char       *buf;
     348 GNC          12 :         const char *keydata = VARDATA_ANY(key);
     349              12 :         size_t      keylen = VARSIZE_ANY_EXHDR(key);
     350 EUB             : 
     351 GNC          12 :         bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     352              12 :         buf = palloc(bsize + 1);
     353                 : 
     354              12 :         rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     355              12 :         if (rsize != bsize)
     356 UNC           0 :             elog(ERROR, "pg_strnxfrm() returned unexpected result");
     357                 : 
     358                 :         /*
     359                 :          * In principle, there's no reason to include the terminating NUL
     360                 :          * character in the hash, but it was done before and the behavior
     361                 :          * must be preserved.
     362                 :          */
     363 GNC          12 :         result = hash_any_extended((uint8_t *) buf, bsize + 1,
     364              12 :                                    PG_GETARG_INT64(1));
     365                 : 
     366              12 :         pfree(buf);
     367                 :     }
     368                 : 
     369 GIC        2034 :     PG_FREE_IF_COPY(key, 0);
     370                 : 
     371            2034 :     return result;
     372 ECB             : }
     373                 : 
     374                 : /*
     375                 :  * hashvarlena() can be used for any varlena datatype in which there are
     376                 :  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
     377                 :  */
     378                 : Datum
     379 GIC        3069 : hashvarlena(PG_FUNCTION_ARGS)
     380                 : {
     381 CBC        3069 :     struct varlena *key = PG_GETARG_VARLENA_PP(0);
     382                 :     Datum       result;
     383 ECB             : 
     384 GIC        3069 :     result = hash_any((unsigned char *) VARDATA_ANY(key),
     385            3069 :                       VARSIZE_ANY_EXHDR(key));
     386                 : 
     387 EUB             :     /* Avoid leaking memory for toasted inputs */
     388 GIC        3069 :     PG_FREE_IF_COPY(key, 0);
     389 EUB             : 
     390 GIC        3069 :     return result;
     391                 : }
     392 EUB             : 
     393                 : Datum
     394 UBC           0 : hashvarlenaextended(PG_FUNCTION_ARGS)
     395                 : {
     396               0 :     struct varlena *key = PG_GETARG_VARLENA_PP(0);
     397                 :     Datum       result;
     398 EUB             : 
     399 UIC           0 :     result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     400               0 :                                VARSIZE_ANY_EXHDR(key),
     401               0 :                                PG_GETARG_INT64(1));
     402                 : 
     403               0 :     PG_FREE_IF_COPY(key, 0);
     404                 : 
     405               0 :     return result;
     406                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a