LCOV - differential code coverage report
Current view: top level - src/backend/utils/adt - uuid.c (source / functions) Coverage Total Hit UNC UBC GNC CBC DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 88.2 % 144 127 1 16 3 124 1 3
Current Date: 2023-04-08 15:15:32 Functions: 90.0 % 20 18 1 1 2 16 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * uuid.c
       4                 :  *    Functions for the built-in type "uuid".
       5                 :  *
       6                 :  * Copyright (c) 2007-2023, PostgreSQL Global Development Group
       7                 :  *
       8                 :  * IDENTIFICATION
       9                 :  *    src/backend/utils/adt/uuid.c
      10                 :  *
      11                 :  *-------------------------------------------------------------------------
      12                 :  */
      13                 : 
      14                 : #include "postgres.h"
      15                 : 
      16                 : #include "common/hashfn.h"
      17                 : #include "lib/hyperloglog.h"
      18                 : #include "libpq/pqformat.h"
      19                 : #include "port/pg_bswap.h"
      20                 : #include "utils/builtins.h"
      21                 : #include "utils/guc.h"
      22                 : #include "utils/sortsupport.h"
      23                 : #include "utils/uuid.h"
      24                 : 
      25                 : /* sortsupport for uuid */
      26                 : typedef struct
      27                 : {
      28                 :     int64       input_count;    /* number of non-null values seen */
      29                 :     bool        estimating;     /* true if estimating cardinality */
      30                 : 
      31                 :     hyperLogLogState abbr_card; /* cardinality estimator */
      32                 : } uuid_sortsupport_state;
      33                 : 
      34                 : static void string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext);
      35                 : static int  uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
      36                 : static int  uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
      37                 : static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
      38                 : static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
      39                 : 
      40                 : Datum
      41 CBC      286918 : uuid_in(PG_FUNCTION_ARGS)
      42                 : {
      43          286918 :     char       *uuid_str = PG_GETARG_CSTRING(0);
      44                 :     pg_uuid_t  *uuid;
      45                 : 
      46          286918 :     uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
      47 GNC      286918 :     string_to_uuid(uuid_str, uuid, fcinfo->context);
      48 CBC      286900 :     PG_RETURN_UUID_P(uuid);
      49                 : }
      50                 : 
      51                 : Datum
      52            2369 : uuid_out(PG_FUNCTION_ARGS)
      53                 : {
      54            2369 :     pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
      55                 :     static const char hex_chars[] = "0123456789abcdef";
      56                 :     StringInfoData buf;
      57                 :     int         i;
      58                 : 
      59            2369 :     initStringInfo(&buf);
      60           40273 :     for (i = 0; i < UUID_LEN; i++)
      61                 :     {
      62                 :         int         hi;
      63                 :         int         lo;
      64                 : 
      65                 :         /*
      66                 :          * We print uuid values as a string of 8, 4, 4, 4, and then 12
      67                 :          * hexadecimal characters, with each group is separated by a hyphen
      68                 :          * ("-"). Therefore, add the hyphens at the appropriate places here.
      69                 :          */
      70           37904 :         if (i == 4 || i == 6 || i == 8 || i == 10)
      71            9476 :             appendStringInfoChar(&buf, '-');
      72                 : 
      73           37904 :         hi = uuid->data[i] >> 4;
      74           37904 :         lo = uuid->data[i] & 0x0F;
      75                 : 
      76           37904 :         appendStringInfoChar(&buf, hex_chars[hi]);
      77           37904 :         appendStringInfoChar(&buf, hex_chars[lo]);
      78                 :     }
      79                 : 
      80            2369 :     PG_RETURN_CSTRING(buf.data);
      81                 : }
      82                 : 
      83                 : /*
      84                 :  * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
      85                 :  * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
      86                 :  * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
      87                 :  * digits, is the only one used for output.)
      88                 :  */
      89                 : static void
      90 GNC      286918 : string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext)
      91                 : {
      92 CBC      286918 :     const char *src = source;
      93          286918 :     bool        braces = false;
      94                 :     int         i;
      95                 : 
      96          286918 :     if (src[0] == '{')
      97                 :     {
      98              12 :         src++;
      99              12 :         braces = true;
     100                 :     }
     101                 : 
     102         4877399 :     for (i = 0; i < UUID_LEN; i++)
     103                 :     {
     104                 :         char        str_buf[3];
     105                 : 
     106         4590499 :         if (src[0] == '\0' || src[1] == '\0')
     107              18 :             goto syntax_error;
     108         4590493 :         memcpy(str_buf, src, 2);
     109         4590493 :         if (!isxdigit((unsigned char) str_buf[0]) ||
     110         4590487 :             !isxdigit((unsigned char) str_buf[1]))
     111              12 :             goto syntax_error;
     112                 : 
     113         4590481 :         str_buf[2] = '\0';
     114         4590481 :         uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
     115         4590481 :         src += 2;
     116         4590481 :         if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
     117          967576 :             src++;
     118                 :     }
     119                 : 
     120          286900 :     if (braces)
     121                 :     {
     122               9 :         if (*src != '}')
     123               3 :             goto syntax_error;
     124               6 :         src++;
     125                 :     }
     126                 : 
     127          286897 :     if (*src != '\0')
     128               3 :         goto syntax_error;
     129                 : 
     130          286894 :     return;
     131                 : 
     132              24 : syntax_error:
     133 GNC          24 :     ereturn(escontext,,
     134                 :             (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     135                 :              errmsg("invalid input syntax for type %s: \"%s\"",
     136                 :                     "uuid", source)));
     137                 : }
     138                 : 
     139                 : Datum
     140 UBC           0 : uuid_recv(PG_FUNCTION_ARGS)
     141                 : {
     142               0 :     StringInfo  buffer = (StringInfo) PG_GETARG_POINTER(0);
     143                 :     pg_uuid_t  *uuid;
     144                 : 
     145               0 :     uuid = (pg_uuid_t *) palloc(UUID_LEN);
     146               0 :     memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
     147               0 :     PG_RETURN_POINTER(uuid);
     148                 : }
     149                 : 
     150                 : Datum
     151               0 : uuid_send(PG_FUNCTION_ARGS)
     152                 : {
     153               0 :     pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
     154                 :     StringInfoData buffer;
     155                 : 
     156               0 :     pq_begintypsend(&buffer);
     157 UNC           0 :     pq_sendbytes(&buffer, uuid->data, UUID_LEN);
     158 UBC           0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
     159                 : }
     160                 : 
     161                 : /* internal uuid compare function */
     162                 : static int
     163 CBC    20874563 : uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
     164                 : {
     165        20874563 :     return memcmp(arg1->data, arg2->data, UUID_LEN);
     166                 : }
     167                 : 
     168                 : Datum
     169           41961 : uuid_lt(PG_FUNCTION_ARGS)
     170                 : {
     171           41961 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     172           41961 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     173                 : 
     174           41961 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
     175                 : }
     176                 : 
     177                 : Datum
     178            3024 : uuid_le(PG_FUNCTION_ARGS)
     179                 : {
     180            3024 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     181            3024 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     182                 : 
     183            3024 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
     184                 : }
     185                 : 
     186                 : Datum
     187           82120 : uuid_eq(PG_FUNCTION_ARGS)
     188                 : {
     189           82120 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     190           82120 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     191                 : 
     192           82120 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
     193                 : }
     194                 : 
     195                 : Datum
     196            2364 : uuid_ge(PG_FUNCTION_ARGS)
     197                 : {
     198            2364 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     199            2364 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     200                 : 
     201            2364 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
     202                 : }
     203                 : 
     204                 : Datum
     205            2841 : uuid_gt(PG_FUNCTION_ARGS)
     206                 : {
     207            2841 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     208            2841 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     209                 : 
     210            2841 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
     211                 : }
     212                 : 
     213                 : Datum
     214              10 : uuid_ne(PG_FUNCTION_ARGS)
     215                 : {
     216              10 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     217              10 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     218                 : 
     219              10 :     PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
     220                 : }
     221                 : 
     222                 : /* handler for btree index operator */
     223                 : Datum
     224            4638 : uuid_cmp(PG_FUNCTION_ARGS)
     225                 : {
     226            4638 :     pg_uuid_t  *arg1 = PG_GETARG_UUID_P(0);
     227            4638 :     pg_uuid_t  *arg2 = PG_GETARG_UUID_P(1);
     228                 : 
     229            4638 :     PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
     230                 : }
     231                 : 
     232                 : /*
     233                 :  * Sort support strategy routine
     234                 :  */
     235                 : Datum
     236             175 : uuid_sortsupport(PG_FUNCTION_ARGS)
     237                 : {
     238             175 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
     239                 : 
     240             175 :     ssup->comparator = uuid_fast_cmp;
     241             175 :     ssup->ssup_extra = NULL;
     242                 : 
     243             175 :     if (ssup->abbreviate)
     244                 :     {
     245                 :         uuid_sortsupport_state *uss;
     246                 :         MemoryContext oldcontext;
     247                 : 
     248             139 :         oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
     249                 : 
     250             139 :         uss = palloc(sizeof(uuid_sortsupport_state));
     251             139 :         uss->input_count = 0;
     252             139 :         uss->estimating = true;
     253             139 :         initHyperLogLog(&uss->abbr_card, 10);
     254                 : 
     255             139 :         ssup->ssup_extra = uss;
     256                 : 
     257             139 :         ssup->comparator = ssup_datum_unsigned_cmp;
     258             139 :         ssup->abbrev_converter = uuid_abbrev_convert;
     259             139 :         ssup->abbrev_abort = uuid_abbrev_abort;
     260             139 :         ssup->abbrev_full_comparator = uuid_fast_cmp;
     261                 : 
     262             139 :         MemoryContextSwitchTo(oldcontext);
     263                 :     }
     264                 : 
     265             175 :     PG_RETURN_VOID();
     266                 : }
     267                 : 
     268                 : /*
     269                 :  * SortSupport comparison func
     270                 :  */
     271                 : static int
     272        20737605 : uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
     273                 : {
     274        20737605 :     pg_uuid_t  *arg1 = DatumGetUUIDP(x);
     275        20737605 :     pg_uuid_t  *arg2 = DatumGetUUIDP(y);
     276                 : 
     277        20737605 :     return uuid_internal_cmp(arg1, arg2);
     278                 : }
     279                 : 
     280                 : /*
     281                 :  * Callback for estimating effectiveness of abbreviated key optimization.
     282                 :  *
     283                 :  * We pay no attention to the cardinality of the non-abbreviated data, because
     284                 :  * there is no equality fast-path within authoritative uuid comparator.
     285                 :  */
     286                 : static bool
     287            1149 : uuid_abbrev_abort(int memtupcount, SortSupport ssup)
     288                 : {
     289            1149 :     uuid_sortsupport_state *uss = ssup->ssup_extra;
     290                 :     double      abbr_card;
     291                 : 
     292            1149 :     if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
     293            1053 :         return false;
     294                 : 
     295              96 :     abbr_card = estimateHyperLogLog(&uss->abbr_card);
     296                 : 
     297                 :     /*
     298                 :      * If we have >100k distinct values, then even if we were sorting many
     299                 :      * billion rows we'd likely still break even, and the penalty of undoing
     300                 :      * that many rows of abbrevs would probably not be worth it.  Stop even
     301                 :      * counting at that point.
     302                 :      */
     303              96 :     if (abbr_card > 100000.0)
     304                 :     {
     305                 : #ifdef TRACE_SORT
     306 UBC           0 :         if (trace_sort)
     307               0 :             elog(LOG,
     308                 :                  "uuid_abbrev: estimation ends at cardinality %f"
     309                 :                  " after " INT64_FORMAT " values (%d rows)",
     310                 :                  abbr_card, uss->input_count, memtupcount);
     311                 : #endif
     312               0 :         uss->estimating = false;
     313               0 :         return false;
     314                 :     }
     315                 : 
     316                 :     /*
     317                 :      * Target minimum cardinality is 1 per ~2k of non-null inputs.  0.5 row
     318                 :      * fudge factor allows us to abort earlier on genuinely pathological data
     319                 :      * where we've had exactly one abbreviated value in the first 2k
     320                 :      * (non-null) rows.
     321                 :      */
     322 CBC          96 :     if (abbr_card < uss->input_count / 2000.0 + 0.5)
     323                 :     {
     324                 : #ifdef TRACE_SORT
     325              48 :         if (trace_sort)
     326 UBC           0 :             elog(LOG,
     327                 :                  "uuid_abbrev: aborting abbreviation at cardinality %f"
     328                 :                  " below threshold %f after " INT64_FORMAT " values (%d rows)",
     329                 :                  abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
     330                 :                  memtupcount);
     331                 : #endif
     332 CBC          48 :         return true;
     333                 :     }
     334                 : 
     335                 : #ifdef TRACE_SORT
     336              48 :     if (trace_sort)
     337 UBC           0 :         elog(LOG,
     338                 :              "uuid_abbrev: cardinality %f after " INT64_FORMAT
     339                 :              " values (%d rows)", abbr_card, uss->input_count, memtupcount);
     340                 : #endif
     341                 : 
     342 CBC          48 :     return false;
     343                 : }
     344                 : 
     345                 : /*
     346                 :  * Conversion routine for sortsupport.  Converts original uuid representation
     347                 :  * to abbreviated key representation.  Our encoding strategy is simple -- pack
     348                 :  * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
     349                 :  * machines, the bytes are stored in reverse order), and treat it as an
     350                 :  * unsigned integer.
     351                 :  */
     352                 : static Datum
     353         1692045 : uuid_abbrev_convert(Datum original, SortSupport ssup)
     354                 : {
     355         1692045 :     uuid_sortsupport_state *uss = ssup->ssup_extra;
     356         1692045 :     pg_uuid_t  *authoritative = DatumGetUUIDP(original);
     357                 :     Datum       res;
     358                 : 
     359         1692045 :     memcpy(&res, authoritative->data, sizeof(Datum));
     360         1692045 :     uss->input_count += 1;
     361                 : 
     362         1692045 :     if (uss->estimating)
     363                 :     {
     364                 :         uint32      tmp;
     365                 : 
     366                 : #if SIZEOF_DATUM == 8
     367         1692045 :         tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
     368                 : #else                           /* SIZEOF_DATUM != 8 */
     369                 :         tmp = (uint32) res;
     370                 : #endif
     371                 : 
     372         1692045 :         addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
     373                 :     }
     374                 : 
     375                 :     /*
     376                 :      * Byteswap on little-endian machines.
     377                 :      *
     378                 :      * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
     379                 :      * 3-way comparator) works correctly on all platforms.  If we didn't do
     380                 :      * this, the comparator would have to call memcmp() with a pair of
     381                 :      * pointers to the first byte of each abbreviated key, which is slower.
     382                 :      */
     383         1692045 :     res = DatumBigEndianToNative(res);
     384                 : 
     385         1692045 :     return res;
     386                 : }
     387                 : 
     388                 : /* hash index support */
     389                 : Datum
     390            1194 : uuid_hash(PG_FUNCTION_ARGS)
     391                 : {
     392            1194 :     pg_uuid_t  *key = PG_GETARG_UUID_P(0);
     393                 : 
     394            1194 :     return hash_any(key->data, UUID_LEN);
     395                 : }
     396                 : 
     397                 : Datum
     398              30 : uuid_hash_extended(PG_FUNCTION_ARGS)
     399                 : {
     400              30 :     pg_uuid_t  *key = PG_GETARG_UUID_P(0);
     401                 : 
     402              30 :     return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
     403                 : }
     404                 : 
     405                 : Datum
     406               6 : gen_random_uuid(PG_FUNCTION_ARGS)
     407                 : {
     408               6 :     pg_uuid_t  *uuid = palloc(UUID_LEN);
     409                 : 
     410               6 :     if (!pg_strong_random(uuid, UUID_LEN))
     411 UBC           0 :         ereport(ERROR,
     412                 :                 (errcode(ERRCODE_INTERNAL_ERROR),
     413                 :                  errmsg("could not generate random values")));
     414                 : 
     415                 :     /*
     416                 :      * Set magic numbers for a "version 4" (pseudorandom) UUID, see
     417                 :      * http://tools.ietf.org/html/rfc4122#section-4.4
     418                 :      */
     419 CBC           6 :     uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40;    /* time_hi_and_version */
     420               6 :     uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;    /* clock_seq_hi_and_reserved */
     421                 : 
     422               6 :     PG_RETURN_UUID_P(uuid);
     423                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a