Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * uuid.c
4 : * Functions for the built-in type "uuid".
5 : *
6 : * Copyright (c) 2007-2023, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/adt/uuid.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 :
14 : #include "postgres.h"
15 :
16 : #include "common/hashfn.h"
17 : #include "lib/hyperloglog.h"
18 : #include "libpq/pqformat.h"
19 : #include "port/pg_bswap.h"
20 : #include "utils/builtins.h"
21 : #include "utils/guc.h"
22 : #include "utils/sortsupport.h"
23 : #include "utils/uuid.h"
24 :
25 : /* sortsupport for uuid */
26 : typedef struct
27 : {
28 : int64 input_count; /* number of non-null values seen */
29 : bool estimating; /* true if estimating cardinality */
30 :
31 : hyperLogLogState abbr_card; /* cardinality estimator */
32 : } uuid_sortsupport_state;
33 :
34 : static void string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext);
35 : static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
36 : static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
37 : static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
38 : static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
39 :
40 : Datum
5915 neilc 41 CBC 286918 : uuid_in(PG_FUNCTION_ARGS)
42 : {
5624 bruce 43 286918 : char *uuid_str = PG_GETARG_CSTRING(0);
44 : pg_uuid_t *uuid;
45 :
5915 neilc 46 286918 : uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
116 tgl 47 GNC 286918 : string_to_uuid(uuid_str, uuid, fcinfo->context);
5915 neilc 48 CBC 286900 : PG_RETURN_UUID_P(uuid);
49 : }
50 :
51 : Datum
52 2369 : uuid_out(PG_FUNCTION_ARGS)
53 : {
5624 bruce 54 2369 : pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
55 : static const char hex_chars[] = "0123456789abcdef";
56 : StringInfoData buf;
57 : int i;
58 :
5912 neilc 59 2369 : initStringInfo(&buf);
60 40273 : for (i = 0; i < UUID_LEN; i++)
61 : {
62 : int hi;
63 : int lo;
64 :
65 : /*
66 : * We print uuid values as a string of 8, 4, 4, 4, and then 12
67 : * hexadecimal characters, with each group is separated by a hyphen
68 : * ("-"). Therefore, add the hyphens at the appropriate places here.
69 : */
70 37904 : if (i == 4 || i == 6 || i == 8 || i == 10)
71 9476 : appendStringInfoChar(&buf, '-');
72 :
73 37904 : hi = uuid->data[i] >> 4;
74 37904 : lo = uuid->data[i] & 0x0F;
75 :
76 37904 : appendStringInfoChar(&buf, hex_chars[hi]);
77 37904 : appendStringInfoChar(&buf, hex_chars[lo]);
78 : }
79 :
80 2369 : PG_RETURN_CSTRING(buf.data);
81 : }
82 :
83 : /*
84 : * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
85 : * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
86 : * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
87 : * digits, is the only one used for output.)
88 : */
89 : static void
116 tgl 90 GNC 286918 : string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext)
91 : {
5270 peter_e 92 CBC 286918 : const char *src = source;
tgl 93 286918 : bool braces = false;
94 : int i;
95 :
peter_e 96 286918 : if (src[0] == '{')
97 : {
tgl 98 12 : src++;
99 12 : braces = true;
100 : }
101 :
5912 neilc 102 4877399 : for (i = 0; i < UUID_LEN; i++)
103 : {
104 : char str_buf[3];
105 :
5270 peter_e 106 4590499 : if (src[0] == '\0' || src[1] == '\0')
107 18 : goto syntax_error;
108 4590493 : memcpy(str_buf, src, 2);
5912 neilc 109 4590493 : if (!isxdigit((unsigned char) str_buf[0]) ||
110 4590487 : !isxdigit((unsigned char) str_buf[1]))
111 12 : goto syntax_error;
112 :
113 4590481 : str_buf[2] = '\0';
114 4590481 : uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
5270 peter_e 115 4590481 : src += 2;
116 4590481 : if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
117 967576 : src++;
118 : }
119 :
120 286900 : if (braces)
121 : {
tgl 122 9 : if (*src != '}')
peter_e 123 3 : goto syntax_error;
tgl 124 6 : src++;
125 : }
126 :
peter_e 127 286897 : if (*src != '\0')
128 3 : goto syntax_error;
129 :
5912 neilc 130 286894 : return;
131 :
132 24 : syntax_error:
116 tgl 133 GNC 24 : ereturn(escontext,,
134 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
135 : errmsg("invalid input syntax for type %s: \"%s\"",
136 : "uuid", source)));
137 : }
138 :
139 : Datum
5915 neilc 140 UBC 0 : uuid_recv(PG_FUNCTION_ARGS)
141 : {
5624 bruce 142 0 : StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0);
143 : pg_uuid_t *uuid;
144 :
5915 neilc 145 0 : uuid = (pg_uuid_t *) palloc(UUID_LEN);
146 0 : memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
147 0 : PG_RETURN_POINTER(uuid);
148 : }
149 :
150 : Datum
151 0 : uuid_send(PG_FUNCTION_ARGS)
152 : {
5624 bruce 153 0 : pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
154 : StringInfoData buffer;
155 :
5915 neilc 156 0 : pq_begintypsend(&buffer);
54 peter 157 UNC 0 : pq_sendbytes(&buffer, uuid->data, UUID_LEN);
5915 neilc 158 UBC 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
159 : }
160 :
161 : /* internal uuid compare function */
162 : static int
5624 bruce 163 CBC 20874563 : uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
164 : {
5915 neilc 165 20874563 : return memcmp(arg1->data, arg2->data, UUID_LEN);
166 : }
167 :
168 : Datum
169 41961 : uuid_lt(PG_FUNCTION_ARGS)
170 : {
5624 bruce 171 41961 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
172 41961 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
173 :
5915 neilc 174 41961 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
175 : }
176 :
177 : Datum
178 3024 : uuid_le(PG_FUNCTION_ARGS)
179 : {
5624 bruce 180 3024 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
181 3024 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
182 :
5915 neilc 183 3024 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
184 : }
185 :
186 : Datum
187 82120 : uuid_eq(PG_FUNCTION_ARGS)
188 : {
5624 bruce 189 82120 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
190 82120 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
191 :
5915 neilc 192 82120 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
193 : }
194 :
195 : Datum
196 2364 : uuid_ge(PG_FUNCTION_ARGS)
197 : {
5624 bruce 198 2364 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
199 2364 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
200 :
5915 neilc 201 2364 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
202 : }
203 :
204 : Datum
205 2841 : uuid_gt(PG_FUNCTION_ARGS)
206 : {
5624 bruce 207 2841 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
208 2841 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
209 :
5915 neilc 210 2841 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
211 : }
212 :
213 : Datum
214 10 : uuid_ne(PG_FUNCTION_ARGS)
215 : {
5624 bruce 216 10 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
217 10 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
218 :
5915 neilc 219 10 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
220 : }
221 :
222 : /* handler for btree index operator */
223 : Datum
224 4638 : uuid_cmp(PG_FUNCTION_ARGS)
225 : {
5624 bruce 226 4638 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
227 4638 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
228 :
5915 neilc 229 4638 : PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
230 : }
231 :
232 : /*
233 : * Sort support strategy routine
234 : */
235 : Datum
2711 rhaas 236 175 : uuid_sortsupport(PG_FUNCTION_ARGS)
237 : {
2495 238 175 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
239 :
2711 240 175 : ssup->comparator = uuid_fast_cmp;
241 175 : ssup->ssup_extra = NULL;
242 :
243 175 : if (ssup->abbreviate)
244 : {
245 : uuid_sortsupport_state *uss;
246 : MemoryContext oldcontext;
247 :
248 139 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
249 :
250 139 : uss = palloc(sizeof(uuid_sortsupport_state));
251 139 : uss->input_count = 0;
252 139 : uss->estimating = true;
253 139 : initHyperLogLog(&uss->abbr_card, 10);
254 :
255 139 : ssup->ssup_extra = uss;
256 :
372 john.naylor 257 139 : ssup->comparator = ssup_datum_unsigned_cmp;
2711 rhaas 258 139 : ssup->abbrev_converter = uuid_abbrev_convert;
259 139 : ssup->abbrev_abort = uuid_abbrev_abort;
260 139 : ssup->abbrev_full_comparator = uuid_fast_cmp;
261 :
262 139 : MemoryContextSwitchTo(oldcontext);
263 : }
264 :
265 175 : PG_RETURN_VOID();
266 : }
267 :
268 : /*
269 : * SortSupport comparison func
270 : */
271 : static int
272 20737605 : uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
273 : {
274 20737605 : pg_uuid_t *arg1 = DatumGetUUIDP(x);
275 20737605 : pg_uuid_t *arg2 = DatumGetUUIDP(y);
276 :
277 20737605 : return uuid_internal_cmp(arg1, arg2);
278 : }
279 :
280 : /*
281 : * Callback for estimating effectiveness of abbreviated key optimization.
282 : *
283 : * We pay no attention to the cardinality of the non-abbreviated data, because
284 : * there is no equality fast-path within authoritative uuid comparator.
285 : */
286 : static bool
287 1149 : uuid_abbrev_abort(int memtupcount, SortSupport ssup)
288 : {
2495 289 1149 : uuid_sortsupport_state *uss = ssup->ssup_extra;
290 : double abbr_card;
291 :
2711 292 1149 : if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
293 1053 : return false;
294 :
295 96 : abbr_card = estimateHyperLogLog(&uss->abbr_card);
296 :
297 : /*
298 : * If we have >100k distinct values, then even if we were sorting many
299 : * billion rows we'd likely still break even, and the penalty of undoing
300 : * that many rows of abbrevs would probably not be worth it. Stop even
301 : * counting at that point.
302 : */
303 96 : if (abbr_card > 100000.0)
304 : {
305 : #ifdef TRACE_SORT
2711 rhaas 306 UBC 0 : if (trace_sort)
307 0 : elog(LOG,
308 : "uuid_abbrev: estimation ends at cardinality %f"
309 : " after " INT64_FORMAT " values (%d rows)",
310 : abbr_card, uss->input_count, memtupcount);
311 : #endif
312 0 : uss->estimating = false;
313 0 : return false;
314 : }
315 :
316 : /*
317 : * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
318 : * fudge factor allows us to abort earlier on genuinely pathological data
319 : * where we've had exactly one abbreviated value in the first 2k
320 : * (non-null) rows.
321 : */
2711 rhaas 322 CBC 96 : if (abbr_card < uss->input_count / 2000.0 + 0.5)
323 : {
324 : #ifdef TRACE_SORT
325 48 : if (trace_sort)
2711 rhaas 326 UBC 0 : elog(LOG,
327 : "uuid_abbrev: aborting abbreviation at cardinality %f"
328 : " below threshold %f after " INT64_FORMAT " values (%d rows)",
329 : abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
330 : memtupcount);
331 : #endif
2711 rhaas 332 CBC 48 : return true;
333 : }
334 :
335 : #ifdef TRACE_SORT
336 48 : if (trace_sort)
2711 rhaas 337 UBC 0 : elog(LOG,
338 : "uuid_abbrev: cardinality %f after " INT64_FORMAT
339 : " values (%d rows)", abbr_card, uss->input_count, memtupcount);
340 : #endif
341 :
2711 rhaas 342 CBC 48 : return false;
343 : }
344 :
345 : /*
346 : * Conversion routine for sortsupport. Converts original uuid representation
347 : * to abbreviated key representation. Our encoding strategy is simple -- pack
348 : * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
349 : * machines, the bytes are stored in reverse order), and treat it as an
350 : * unsigned integer.
351 : */
352 : static Datum
353 1692045 : uuid_abbrev_convert(Datum original, SortSupport ssup)
354 : {
2495 355 1692045 : uuid_sortsupport_state *uss = ssup->ssup_extra;
356 1692045 : pg_uuid_t *authoritative = DatumGetUUIDP(original);
357 : Datum res;
358 :
2711 359 1692045 : memcpy(&res, authoritative->data, sizeof(Datum));
360 1692045 : uss->input_count += 1;
361 :
362 1692045 : if (uss->estimating)
363 : {
364 : uint32 tmp;
365 :
366 : #if SIZEOF_DATUM == 8
367 1692045 : tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
368 : #else /* SIZEOF_DATUM != 8 */
369 : tmp = (uint32) res;
370 : #endif
371 :
372 1692045 : addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
373 : }
374 :
375 : /*
376 : * Byteswap on little-endian machines.
377 : *
378 : * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
379 : * 3-way comparator) works correctly on all platforms. If we didn't do
380 : * this, the comparator would have to call memcmp() with a pair of
381 : * pointers to the first byte of each abbreviated key, which is slower.
382 : */
383 1692045 : res = DatumBigEndianToNative(res);
384 :
385 1692045 : return res;
386 : }
387 :
388 : /* hash index support */
389 : Datum
5915 neilc 390 1194 : uuid_hash(PG_FUNCTION_ARGS)
391 : {
5624 bruce 392 1194 : pg_uuid_t *key = PG_GETARG_UUID_P(0);
393 :
5912 neilc 394 1194 : return hash_any(key->data, UUID_LEN);
395 : }
396 :
397 : Datum
2047 rhaas 398 30 : uuid_hash_extended(PG_FUNCTION_ARGS)
399 : {
400 30 : pg_uuid_t *key = PG_GETARG_UUID_P(0);
401 :
402 30 : return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
403 : }
404 :
405 : Datum
1365 peter 406 6 : gen_random_uuid(PG_FUNCTION_ARGS)
407 : {
408 6 : pg_uuid_t *uuid = palloc(UUID_LEN);
409 :
410 6 : if (!pg_strong_random(uuid, UUID_LEN))
1365 peter 411 UBC 0 : ereport(ERROR,
412 : (errcode(ERRCODE_INTERNAL_ERROR),
413 : errmsg("could not generate random values")));
414 :
415 : /*
416 : * Set magic numbers for a "version 4" (pseudorandom) UUID, see
417 : * http://tools.ietf.org/html/rfc4122#section-4.4
418 : */
1365 peter 419 CBC 6 : uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */
420 6 : uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */
421 :
422 6 : PG_RETURN_UUID_P(uuid);
423 : }
|