Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * varchar.c
4 : * Functions for the built-in types char(n) and varchar(n).
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/varchar.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/detoast.h"
18 : #include "access/htup_details.h"
19 : #include "catalog/pg_collation.h"
20 : #include "catalog/pg_type.h"
21 : #include "common/hashfn.h"
22 : #include "libpq/pqformat.h"
23 : #include "mb/pg_wchar.h"
24 : #include "nodes/nodeFuncs.h"
25 : #include "nodes/supportnodes.h"
26 : #include "utils/array.h"
27 : #include "utils/builtins.h"
28 : #include "utils/lsyscache.h"
29 : #include "utils/pg_locale.h"
30 : #include "utils/varlena.h"
31 :
32 : /* common code for bpchartypmodin and varchartypmodin */
33 : static int32
5944 tgl 34 GIC 1644 : anychar_typmodin(ArrayType *ta, const char *typename)
5944 tgl 35 ECB : {
36 : int32 typmod;
37 : int32 *tl;
38 : int n;
39 :
5777 tgl 40 GIC 1644 : tl = ArrayGetIntegerTypmods(ta, &n);
5944 tgl 41 ECB :
42 : /*
43 : * we're not too tense about good error message here because grammar
44 : * shouldn't allow wrong number of modifiers for CHAR
45 : */
5944 tgl 46 GIC 1644 : if (n != 1)
5944 tgl 47 LBC 0 : ereport(ERROR,
5944 tgl 48 EUB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
49 : errmsg("invalid type modifier")));
50 :
5944 tgl 51 GIC 1644 : if (*tl < 1)
5944 tgl 52 LBC 0 : ereport(ERROR,
5944 tgl 53 EUB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
54 : errmsg("length for type %s must be at least 1", typename)));
5944 tgl 55 GIC 1644 : if (*tl > MaxAttrSize)
5944 tgl 56 LBC 0 : ereport(ERROR,
5944 tgl 57 EUB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
58 : errmsg("length for type %s cannot exceed %d",
59 : typename, MaxAttrSize)));
60 :
61 : /*
62 : * For largely historical reasons, the typmod is VARHDRSZ plus the number
63 : * of characters; there is enough client-side code that knows about that
64 : * that we'd better not change it.
65 : */
5944 tgl 66 GIC 1644 : typmod = VARHDRSZ + *tl;
5944 tgl 67 ECB :
5944 tgl 68 GIC 1644 : return typmod;
5944 tgl 69 ECB : }
70 :
71 : /* common code for bpchartypmodout and varchartypmodout */
72 : static char *
5944 tgl 73 GIC 476 : anychar_typmodout(int32 typmod)
5944 tgl 74 ECB : {
5624 bruce 75 GIC 476 : char *res = (char *) palloc(64);
5944 tgl 76 ECB :
5944 tgl 77 GIC 476 : if (typmod > VARHDRSZ)
5944 tgl 78 CBC 476 : snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
5944 tgl 79 ECB : else
5944 tgl 80 UIC 0 : *res = '\0';
5944 tgl 81 EUB :
5944 tgl 82 GIC 476 : return res;
5944 tgl 83 ECB : }
84 :
85 :
86 : /*
87 : * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
88 : * is for blank-padded string whose length is specified in CREATE TABLE.
89 : * VARCHAR is for storing string whose length is at most the length specified
90 : * at CREATE TABLE time.
91 : *
92 : * It's hard to implement these types because we cannot figure out
93 : * the length of the type from the type itself. I changed (hopefully all) the
94 : * fmgr calls that invoke input functions of a data type to supply the
95 : * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
96 : * the length of the attributes and hence the exact length of the char() or
97 : * varchar(). We pass this to bpcharin() or varcharin().) In the case where
98 : * we cannot determine the length, we pass in -1 instead and the input
99 : * converter does not enforce any length check.
100 : *
101 : * We actually implement this as a varlena so that we don't have to pass in
102 : * the length for the comparison functions. (The difference between these
103 : * types and "text" is that we truncate and possibly blank-pad the string
104 : * at insertion time.)
105 : *
106 : * - ay 6/95
107 : */
108 :
109 :
110 : /*****************************************************************************
111 : * bpchar - char() *
112 : *****************************************************************************/
113 :
114 : /*
115 : * bpchar_input -- common guts of bpcharin and bpcharrecv
116 : *
117 : * s is the input text of length len (may not be null-terminated)
118 : * atttypmod is the typmod value to apply
119 : *
120 : * Note that atttypmod is measured in characters, which
121 : * is not necessarily the same as the number of bytes.
122 : *
123 : * If the input string is too long, raise an error, unless the extra
124 : * characters are spaces, in which case they're truncated. (per SQL)
125 : *
126 : * If escontext points to an ErrorSaveContext node, that is filled instead
127 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
128 : * to detect errors.
129 : */
130 : static BpChar *
116 tgl 131 GNC 211497 : bpchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
132 : {
133 : BpChar *result;
134 : char *r;
135 : size_t maxlen;
9345 bruce 136 ECB :
137 : /* If typmod is -1 (or invalid), use the actual string length */
7982 tgl 138 GIC 211497 : if (atttypmod < (int32) VARHDRSZ)
6524 139 4253 : maxlen = len;
140 : else
141 : {
142 : size_t charlen; /* number of CHARACTERS in the input */
7836 bruce 143 ECB :
6524 tgl 144 CBC 207244 : maxlen = atttypmod - VARHDRSZ;
6482 tgl 145 GIC 207244 : charlen = pg_mbstrlen_with_len(s, len);
6524 146 207244 : if (charlen > maxlen)
147 : {
148 : /* Verify that extra characters are spaces, and clip them off */
6524 tgl 149 CBC 21 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
6482 tgl 150 ECB : size_t j;
6524 151 :
152 : /*
153 : * at this point, len is the actual BYTE length of the input
154 : * string, maxlen is the max number of CHARACTERS allowed for this
155 : * bpchar type, mbmaxlen is the length in BYTES of those chars.
156 : */
6482 tgl 157 GIC 27 : for (j = mbmaxlen; j < len; j++)
158 : {
159 24 : if (s[j] != ' ')
116 tgl 160 GNC 18 : ereturn(escontext, NULL,
161 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
6482 tgl 162 ECB : errmsg("value too long for type character(%d)",
163 : (int) maxlen)));
164 : }
6524 165 :
166 : /*
167 : * Now we set maxlen to the necessary byte length, not the number
168 : * of CHARACTERS!
169 : */
6482 tgl 170 GIC 3 : maxlen = len = mbmaxlen;
171 : }
172 : else
173 : {
174 : /*
6385 bruce 175 ECB : * Now we set maxlen to the necessary byte length, not the number
176 : * of CHARACTERS!
177 : */
6524 tgl 178 GIC 207223 : maxlen = len + (maxlen - charlen);
179 : }
180 : }
181 :
6482 182 211479 : result = (BpChar *) palloc(maxlen + VARHDRSZ);
5885 tgl 183 CBC 211479 : SET_VARSIZE(result, maxlen + VARHDRSZ);
9223 bruce 184 GIC 211479 : r = VARDATA(result);
6524 tgl 185 211479 : memcpy(r, s, len);
186 :
9345 bruce 187 ECB : /* blank pad the string if necessary */
6524 tgl 188 CBC 211479 : if (maxlen > len)
189 201092 : memset(r + len, ' ', maxlen - len);
8335 tgl 190 ECB :
6482 tgl 191 GIC 211479 : return result;
192 : }
6482 tgl 193 ECB :
194 : /*
195 : * Convert a C string to CHARACTER internal representation. atttypmod
196 : * is the declared length of the type plus VARHDRSZ.
197 : */
198 : Datum
6482 tgl 199 GIC 211497 : bpcharin(PG_FUNCTION_ARGS)
200 : {
201 211497 : char *s = PG_GETARG_CSTRING(0);
202 : #ifdef NOT_USED
6482 tgl 203 ECB : Oid typelem = PG_GETARG_OID(1);
204 : #endif
6482 tgl 205 CBC 211497 : int32 atttypmod = PG_GETARG_INT32(2);
206 : BpChar *result;
207 :
116 tgl 208 GNC 211497 : result = bpchar_input(s, strlen(s), atttypmod, fcinfo->context);
8335 tgl 209 CBC 211485 : PG_RETURN_BPCHAR_P(result);
210 : }
211 :
7993 peter_e 212 ECB :
213 : /*
214 : * Convert a CHARACTER value to a C string.
215 : *
216 : * Uses the text conversion functions, which is only appropriate if BpChar
217 : * and text are equivalent types.
218 : */
219 : Datum
8335 tgl 220 GIC 23070 : bpcharout(PG_FUNCTION_ARGS)
221 : {
5493 222 23070 : Datum txt = PG_GETARG_DATUM(0);
223 :
5493 tgl 224 CBC 23070 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
225 : }
9770 scrappy 226 ECB :
227 : /*
7272 tgl 228 : * bpcharrecv - converts external binary format to bpchar
229 : */
230 : Datum
7272 tgl 231 UIC 0 : bpcharrecv(PG_FUNCTION_ARGS)
232 : {
6482 233 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
6482 tgl 234 EUB : #ifdef NOT_USED
235 : Oid typelem = PG_GETARG_OID(1);
236 : #endif
6482 tgl 237 UIC 0 : int32 atttypmod = PG_GETARG_INT32(2);
238 : BpChar *result;
239 : char *str;
6482 tgl 240 EUB : int nbytes;
241 :
6482 tgl 242 UIC 0 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
116 tgl 243 UNC 0 : result = bpchar_input(str, nbytes, atttypmod, NULL);
6482 tgl 244 UIC 0 : pfree(str);
6482 tgl 245 UBC 0 : PG_RETURN_BPCHAR_P(result);
7272 tgl 246 EUB : }
247 :
248 : /*
249 : * bpcharsend - converts bpchar to binary format
250 : */
251 : Datum
7272 tgl 252 GIC 1 : bpcharsend(PG_FUNCTION_ARGS)
253 : {
254 : /* Exactly the same as textsend, so share code */
7272 tgl 255 CBC 1 : return textsend(fcinfo);
256 : }
257 :
7993 peter_e 258 ECB :
259 : /*
260 : * Converts a CHARACTER type to the specified size.
261 : *
262 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
263 : * isExplicit is true if this is for an explicit cast to char(N).
264 : *
265 : * Truncation rules: for an explicit cast, silently truncate to the given
266 : * length; for an implicit cast, raise error unless extra characters are
267 : * all spaces. (This is sort-of per SQL: the spec would actually have us
268 : * raise a "completion condition" for the explicit cast case, but Postgres
269 : * hasn't got such a concept.)
270 : */
271 : Datum
8335 tgl 272 GIC 5978 : bpchar(PG_FUNCTION_ARGS)
273 : {
5847 274 5978 : BpChar *source = PG_GETARG_BPCHAR_PP(0);
7993 peter_e 275 CBC 5978 : int32 maxlen = PG_GETARG_INT32(1);
7508 tgl 276 GIC 5978 : bool isExplicit = PG_GETARG_BOOL(2);
8335 tgl 277 ECB : BpChar *result;
7993 peter_e 278 : int32 len;
279 : char *r;
280 : char *s;
281 : int i;
282 : int charlen; /* number of characters in the input string +
283 : * VARHDRSZ */
284 :
285 : /* No work if typmod is invalid */
6524 tgl 286 GIC 5978 : if (maxlen < (int32) VARHDRSZ)
6524 tgl 287 UIC 0 : PG_RETURN_BPCHAR_P(source);
288 :
5847 tgl 289 CBC 5978 : maxlen -= VARHDRSZ;
7528 ishii 290 EUB :
5847 tgl 291 GIC 5978 : len = VARSIZE_ANY_EXHDR(source);
5847 tgl 292 CBC 5978 : s = VARDATA_ANY(source);
293 :
294 5978 : charlen = pg_mbstrlen_with_len(s, len);
7528 ishii 295 ECB :
296 : /* No work if supplied data matches typmod already */
6524 tgl 297 CBC 5978 : if (charlen == maxlen)
7993 peter_e 298 GIC 2778 : PG_RETURN_BPCHAR_P(source);
299 :
7938 ishii 300 CBC 3200 : if (charlen > maxlen)
7993 peter_e 301 ECB : {
302 : /* Verify that extra characters are spaces, and clip them off */
303 : size_t maxmblen;
304 :
5847 tgl 305 GIC 18 : maxmblen = pg_mbcharcliplen(s, len, maxlen);
306 :
7508 307 18 : if (!isExplicit)
7508 tgl 308 ECB : {
5847 tgl 309 GIC 42 : for (i = maxmblen; i < len; i++)
5847 tgl 310 CBC 36 : if (s[i] != ' ')
7196 tgl 311 GIC 9 : ereport(ERROR,
6385 bruce 312 ECB : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
313 : errmsg("value too long for type character(%d)",
5847 tgl 314 : maxlen)));
315 : }
316 :
7993 peter_e 317 GIC 9 : len = maxmblen;
318 :
319 : /*
5624 bruce 320 ECB : * At this point, maxlen is the necessary byte length, not the number
321 : * of CHARACTERS!
322 : */
7938 ishii 323 GIC 9 : maxlen = len;
324 : }
325 : else
7938 ishii 326 ECB : {
327 : /*
328 : * At this point, maxlen is the necessary byte length, not the number
329 : * of CHARACTERS!
330 : */
7938 ishii 331 GIC 3182 : maxlen = len + (maxlen - charlen);
332 : }
333 :
5847 tgl 334 CBC 3191 : Assert(maxlen >= len);
335 :
5624 bruce 336 GIC 3191 : result = palloc(maxlen + VARHDRSZ);
5624 bruce 337 CBC 3191 : SET_VARSIZE(result, maxlen + VARHDRSZ);
7993 peter_e 338 GIC 3191 : r = VARDATA(result);
7993 peter_e 339 ECB :
5847 tgl 340 CBC 3191 : memcpy(r, s, len);
9101 lockhart 341 ECB :
342 : /* blank pad the string if necessary */
6524 tgl 343 CBC 3191 : if (maxlen > len)
5847 tgl 344 GIC 3182 : memset(r + len, ' ', maxlen - len);
345 :
8335 tgl 346 CBC 3191 : PG_RETURN_BPCHAR_P(result);
8335 tgl 347 ECB : }
348 :
7993 peter_e 349 :
350 : /* char_bpchar()
351 : * Convert char to bpchar(1).
352 : */
353 : Datum
8343 tgl 354 UIC 0 : char_bpchar(PG_FUNCTION_ARGS)
355 : {
356 0 : char c = PG_GETARG_CHAR(0);
8335 tgl 357 EUB : BpChar *result;
358 :
8335 tgl 359 UBC 0 : result = (BpChar *) palloc(VARHDRSZ + 1);
360 :
5885 tgl 361 UIC 0 : SET_VARSIZE(result, VARHDRSZ + 1);
8343 tgl 362 UBC 0 : *(VARDATA(result)) = c;
363 :
364 0 : PG_RETURN_BPCHAR_P(result);
8343 tgl 365 EUB : }
366 :
9101 lockhart 367 :
368 : /* bpchar_name()
369 : * Converts a bpchar() type to a NameData type.
370 : */
371 : Datum
8289 tgl 372 UIC 0 : bpchar_name(PG_FUNCTION_ARGS)
373 : {
5847 374 0 : BpChar *s = PG_GETARG_BPCHAR_PP(0);
5624 bruce 375 EUB : char *s_data;
376 : Name result;
9081 lockhart 377 : int len;
378 :
5847 tgl 379 UIC 0 : len = VARSIZE_ANY_EXHDR(s);
380 0 : s_data = VARDATA_ANY(s);
381 :
3971 tgl 382 EUB : /* Truncate oversize input */
8311 tgl 383 UBC 0 : if (len >= NAMEDATALEN)
3971 tgl 384 UIC 0 : len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
385 :
8289 tgl 386 EUB : /* Remove trailing blanks */
8986 bruce 387 UBC 0 : while (len > 0)
388 : {
5847 tgl 389 UIC 0 : if (s_data[len - 1] != ' ')
8986 bruce 390 UBC 0 : break;
9081 lockhart 391 UIC 0 : len--;
9081 lockhart 392 EUB : }
393 :
3971 tgl 394 : /* We use palloc0 here to ensure result is zero-padded */
3971 tgl 395 UIC 0 : result = (Name) palloc0(NAMEDATALEN);
5847 396 0 : memcpy(NameStr(*result), s_data, len);
397 :
8289 tgl 398 UBC 0 : PG_RETURN_NAME(result);
8289 tgl 399 EUB : }
400 :
9081 lockhart 401 : /* name_bpchar()
402 : * Converts a NameData type to a bpchar type.
403 : *
404 : * Uses the text conversion functions, which is only appropriate if BpChar
405 : * and text are equivalent types.
406 : */
407 : Datum
8289 tgl 408 GIC 3 : name_bpchar(PG_FUNCTION_ARGS)
409 : {
410 3 : Name s = PG_GETARG_NAME(0);
8289 tgl 411 ECB : BpChar *result;
412 :
5493 tgl 413 CBC 3 : result = (BpChar *) cstring_to_text(NameStr(*s));
8289 tgl 414 GIC 3 : PG_RETURN_BPCHAR_P(result);
415 : }
9081 lockhart 416 ECB :
5944 tgl 417 : Datum
5944 tgl 418 GIC 990 : bpchartypmodin(PG_FUNCTION_ARGS)
419 : {
5624 bruce 420 990 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
5944 tgl 421 ECB :
5944 tgl 422 GIC 990 : PG_RETURN_INT32(anychar_typmodin(ta, "char"));
5944 tgl 423 ECB : }
424 :
425 : Datum
5944 tgl 426 GIC 389 : bpchartypmodout(PG_FUNCTION_ARGS)
427 : {
5624 bruce 428 389 : int32 typmod = PG_GETARG_INT32(0);
5944 tgl 429 ECB :
5944 tgl 430 GIC 389 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
5944 tgl 431 ECB : }
432 :
9081 lockhart 433 :
434 : /*****************************************************************************
435 : * varchar - varchar(n)
436 : *
437 : * Note: varchar piggybacks on type text for most operations, and so has no
438 : * C-coded functions except for I/O and typmod checking.
439 : *****************************************************************************/
440 :
441 : /*
442 : * varchar_input -- common guts of varcharin and varcharrecv
443 : *
444 : * s is the input text of length len (may not be null-terminated)
445 : * atttypmod is the typmod value to apply
446 : *
447 : * Note that atttypmod is measured in characters, which
448 : * is not necessarily the same as the number of bytes.
449 : *
450 : * If the input string is too long, raise an error, unless the extra
451 : * characters are spaces, in which case they're truncated. (per SQL)
452 : *
453 : * If escontext points to an ErrorSaveContext node, that is filled instead
454 : * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
455 : * to detect errors.
456 : */
457 : static VarChar *
116 tgl 458 GNC 1474338 : varchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
459 : {
460 : VarChar *result;
461 : size_t maxlen;
7836 bruce 462 ECB :
7993 peter_e 463 GIC 1474338 : maxlen = atttypmod - VARHDRSZ;
464 :
465 1474338 : if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
466 : {
7982 tgl 467 ECB : /* Verify that extra characters are spaces, and clip them off */
7836 bruce 468 GIC 15 : size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
6482 tgl 469 ECB : size_t j;
470 :
6482 tgl 471 GIC 21 : for (j = mbmaxlen; j < len; j++)
6482 tgl 472 ECB : {
6482 tgl 473 GIC 18 : if (s[j] != ' ')
116 tgl 474 GNC 12 : ereturn(escontext, NULL,
6482 tgl 475 ECB : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
476 : errmsg("value too long for type character varying(%d)",
2118 477 : (int) maxlen)));
6482 478 : }
479 :
6482 tgl 480 GIC 3 : len = mbmaxlen;
481 : }
482 :
483 : /*
484 : * We can use cstring_to_text_with_len because VarChar and text are
485 : * binary-compatible types.
486 : */
5493 487 1474326 : result = (VarChar *) cstring_to_text_with_len(s, len);
6482 tgl 488 CBC 1474326 : return result;
489 : }
490 :
491 : /*
492 : * Convert a C string to VARCHAR internal representation. atttypmod
493 : * is the declared length of the type plus VARHDRSZ.
494 : */
6482 tgl 495 ECB : Datum
6482 tgl 496 CBC 1474337 : varcharin(PG_FUNCTION_ARGS)
497 : {
6482 tgl 498 GIC 1474337 : char *s = PG_GETARG_CSTRING(0);
499 : #ifdef NOT_USED
500 : Oid typelem = PG_GETARG_OID(1);
501 : #endif
502 1474337 : int32 atttypmod = PG_GETARG_INT32(2);
6482 tgl 503 ECB : VarChar *result;
504 :
116 tgl 505 GNC 1474337 : result = varchar_input(s, strlen(s), atttypmod, fcinfo->context);
8335 tgl 506 GIC 1474331 : PG_RETURN_VARCHAR_P(result);
507 : }
508 :
7993 peter_e 509 ECB :
510 : /*
511 : * Convert a VARCHAR value to a C string.
5493 tgl 512 : *
513 : * Uses the text to C string conversion function, which is only appropriate
514 : * if VarChar and text are equivalent types.
515 : */
516 : Datum
8335 tgl 517 GIC 129426 : varcharout(PG_FUNCTION_ARGS)
518 : {
5493 519 129426 : Datum txt = PG_GETARG_DATUM(0);
520 :
521 129426 : PG_RETURN_CSTRING(TextDatumGetCString(txt));
522 : }
523 :
7272 tgl 524 ECB : /*
525 : * varcharrecv - converts external binary format to varchar
526 : */
527 : Datum
7272 tgl 528 CBC 1 : varcharrecv(PG_FUNCTION_ARGS)
529 : {
6482 tgl 530 GIC 1 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
531 : #ifdef NOT_USED
532 : Oid typelem = PG_GETARG_OID(1);
533 : #endif
6482 tgl 534 CBC 1 : int32 atttypmod = PG_GETARG_INT32(2);
535 : VarChar *result;
6482 tgl 536 ECB : char *str;
537 : int nbytes;
538 :
6482 tgl 539 GIC 1 : str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
116 tgl 540 GNC 1 : result = varchar_input(str, nbytes, atttypmod, NULL);
6482 tgl 541 GIC 1 : pfree(str);
542 1 : PG_RETURN_VARCHAR_P(result);
543 : }
544 :
7272 tgl 545 ECB : /*
546 : * varcharsend - converts varchar to binary format
547 : */
548 : Datum
7272 tgl 549 GIC 31669 : varcharsend(PG_FUNCTION_ARGS)
550 : {
551 : /* Exactly the same as textsend, so share code */
552 31669 : return textsend(fcinfo);
553 : }
554 :
7993 peter_e 555 ECB :
556 : /*
557 : * varchar_support()
1520 tgl 558 : *
559 : * Planner support function for the varchar() length coercion function.
560 : *
561 : * Currently, the only interesting thing we can do is flatten calls that set
562 : * the new maximum length >= the previous maximum length. We can ignore the
563 : * isExplicit argument, since that only affects truncation cases.
564 : */
565 : Datum
1520 tgl 566 GIC 1061 : varchar_support(PG_FUNCTION_ARGS)
567 : {
568 1061 : Node *rawreq = (Node *) PG_GETARG_POINTER(0);
4310 rhaas 569 1061 : Node *ret = NULL;
570 :
1520 tgl 571 1061 : if (IsA(rawreq, SupportRequestSimplify))
1520 tgl 572 ECB : {
1520 tgl 573 GIC 439 : SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
1520 tgl 574 CBC 439 : FuncExpr *expr = req->fcall;
1520 tgl 575 ECB : Node *typmod;
576 :
1520 tgl 577 CBC 439 : Assert(list_length(expr->args) >= 2);
578 :
579 439 : typmod = (Node *) lsecond(expr->args);
1520 tgl 580 ECB :
1058 tgl 581 GIC 439 : if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
582 : {
1520 tgl 583 CBC 439 : Node *source = (Node *) linitial(expr->args);
1520 tgl 584 GIC 439 : int32 old_typmod = exprTypmod(source);
1520 tgl 585 CBC 439 : int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
1520 tgl 586 GIC 439 : int32 old_max = old_typmod - VARHDRSZ;
1520 tgl 587 CBC 439 : int32 new_max = new_typmod - VARHDRSZ;
588 :
589 439 : if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
590 15 : ret = relabel_to_typmod(source, new_typmod);
1520 tgl 591 ECB : }
4310 rhaas 592 : }
593 :
4310 rhaas 594 GIC 1061 : PG_RETURN_POINTER(ret);
4310 rhaas 595 ECB : }
596 :
597 : /*
598 : * Converts a VARCHAR type to the specified size.
599 : *
7508 tgl 600 : * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
601 : * isExplicit is true if this is for an explicit cast to varchar(N).
602 : *
603 : * Truncation rules: for an explicit cast, silently truncate to the given
604 : * length; for an implicit cast, raise error unless extra characters are
605 : * all spaces. (This is sort-of per SQL: the spec would actually have us
606 : * raise a "completion condition" for the explicit cast case, but Postgres
607 : * hasn't got such a concept.)
608 : */
609 : Datum
8335 tgl 610 GIC 13390 : varchar(PG_FUNCTION_ARGS)
611 : {
5847 612 13390 : VarChar *source = PG_GETARG_VARCHAR_PP(0);
613 13390 : int32 typmod = PG_GETARG_INT32(1);
7508 614 13390 : bool isExplicit = PG_GETARG_BOOL(2);
615 : int32 len,
5624 bruce 616 ECB : maxlen;
617 : size_t maxmblen;
7993 peter_e 618 : int i;
5624 bruce 619 : char *s_data;
5847 tgl 620 :
5847 tgl 621 GIC 13390 : len = VARSIZE_ANY_EXHDR(source);
622 13390 : s_data = VARDATA_ANY(source);
623 13390 : maxlen = typmod - VARHDRSZ;
624 :
625 : /* No work if typmod is invalid or supplied data fits it already */
626 13390 : if (maxlen < 0 || len <= maxlen)
7993 peter_e 627 CBC 13348 : PG_RETURN_VARCHAR_P(source);
9101 lockhart 628 ECB :
7993 peter_e 629 : /* only reach here if string is too long... */
630 :
631 : /* truncate multibyte string preserving multibyte boundary */
5847 tgl 632 CBC 42 : maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
7993 peter_e 633 ECB :
7508 tgl 634 GIC 42 : if (!isExplicit)
635 : {
5847 636 57 : for (i = maxmblen; i < len; i++)
637 51 : if (s_data[i] != ' ')
7196 tgl 638 CBC 21 : ereport(ERROR,
639 : (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
2118 tgl 640 ECB : errmsg("value too long for type character varying(%d)",
641 : maxlen)));
7993 peter_e 642 : }
9101 lockhart 643 :
5050 bruce 644 CBC 21 : PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
645 : maxmblen));
646 : }
647 :
648 : Datum
5944 tgl 649 GIC 654 : varchartypmodin(PG_FUNCTION_ARGS)
5944 tgl 650 ECB : {
5624 bruce 651 GIC 654 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
652 :
5944 tgl 653 654 : PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
654 : }
5944 tgl 655 ECB :
656 : Datum
5944 tgl 657 CBC 87 : varchartypmodout(PG_FUNCTION_ARGS)
658 : {
5624 bruce 659 87 : int32 typmod = PG_GETARG_INT32(0);
660 :
5944 tgl 661 GIC 87 : PG_RETURN_CSTRING(anychar_typmodout(typmod));
662 : }
5944 tgl 663 ECB :
664 :
7993 peter_e 665 : /*****************************************************************************
666 : * Exported functions
667 : *****************************************************************************/
668 :
669 : /* "True" length (not counting trailing blanks) of a BpChar */
670 : static inline int
8289 tgl 671 GIC 134105 : bcTruelen(BpChar *arg)
672 : {
2622 rhaas 673 134105 : return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
674 : }
675 :
676 : int
2622 rhaas 677 CBC 169255 : bpchartruelen(char *s, int len)
678 : {
9344 bruce 679 ECB : int i;
680 :
681 : /*
682 : * Note that we rely on the assumption that ' ' is a singleton unit on
2622 rhaas 683 : * every supported multibyte server encoding.
684 : */
9345 bruce 685 GIC 1218048 : for (i = len - 1; i >= 0; i--)
686 : {
687 1211011 : if (s[i] != ' ')
688 162218 : break;
689 : }
8986 690 169255 : return i + 1;
9770 scrappy 691 ECB : }
692 :
8289 tgl 693 : Datum
8289 tgl 694 CBC 9 : bpcharlen(PG_FUNCTION_ARGS)
695 : {
5847 696 9 : BpChar *arg = PG_GETARG_BPCHAR_PP(0);
697 : int len;
698 :
699 : /* get number of bytes, ignoring trailing spaces */
7007 700 9 : len = bcTruelen(arg);
701 :
7007 tgl 702 ECB : /* in multibyte encoding, convert to number of characters */
7007 tgl 703 GIC 9 : if (pg_database_encoding_max_length() != 1)
5847 704 9 : len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
705 :
7007 tgl 706 CBC 9 : PG_RETURN_INT32(len);
707 : }
708 :
8289 tgl 709 ECB : Datum
8289 tgl 710 LBC 0 : bpcharoctetlen(PG_FUNCTION_ARGS)
711 : {
5624 bruce 712 0 : Datum arg = PG_GETARG_DATUM(0);
713 :
714 : /* We need not detoast the input at all */
5847 tgl 715 UIC 0 : PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
9222 bruce 716 EUB : }
717 :
8289 tgl 718 :
719 : /*****************************************************************************
720 : * Comparison Functions used for bpchar
721 : *
722 : * Note: btree indexes need these routines not to leak memory; therefore,
723 : * be careful to free working copies of toasted datums. Most places don't
724 : * need to be so careful.
725 : *****************************************************************************/
726 :
727 : static void
1479 peter 728 GIC 12871 : check_collation_set(Oid collid)
729 : {
730 12871 : if (!OidIsValid(collid))
731 : {
732 : /*
733 : * This typically means that the parser could not resolve a conflict
1479 peter 734 ECB : * of implicit collations, so report it that way.
735 : */
1479 peter 736 LBC 0 : ereport(ERROR,
737 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
738 : errmsg("could not determine which collation to use for string comparison"),
739 : errhint("Use the COLLATE clause to set the collation explicitly.")));
740 : }
1479 peter 741 GIC 12871 : }
1479 peter 742 EUB :
743 : Datum
8289 tgl 744 GIC 9667 : bpchareq(PG_FUNCTION_ARGS)
745 : {
5847 746 9667 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5847 tgl 747 CBC 9667 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
748 : int len1,
749 : len2;
8289 tgl 750 ECB : bool result;
1479 peter 751 GIC 9667 : Oid collid = PG_GET_COLLATION();
444 peter 752 CBC 9667 : bool locale_is_c = false;
332 tgl 753 9667 : pg_locale_t mylocale = 0;
754 :
1479 peter 755 GIC 9667 : check_collation_set(collid);
756 :
9345 bruce 757 CBC 9667 : len1 = bcTruelen(arg1);
758 9667 : len2 = bcTruelen(arg2);
9770 scrappy 759 ECB :
444 peter 760 GIC 9667 : if (lc_collate_is_c(collid))
444 peter 761 CBC 24 : locale_is_c = true;
762 : else
763 9643 : mylocale = pg_newlocale_from_collation(collid);
444 peter 764 ECB :
45 jdavis 765 GNC 9667 : if (locale_is_c || pg_locale_deterministic(mylocale))
1479 peter 766 ECB : {
767 : /*
768 : * Since we only care about equality or not-equality, we can avoid all
1418 tgl 769 : * the expense of strcoll() here, and just do bitwise comparison.
770 : */
1479 peter 771 CBC 9595 : if (len1 != len2)
1479 peter 772 GIC 1245 : result = false;
773 : else
774 8350 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
775 : }
776 : else
1479 peter 777 ECB : {
1479 peter 778 CBC 72 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
779 : collid) == 0);
1479 peter 780 ECB : }
781 :
8289 tgl 782 GIC 9667 : PG_FREE_IF_COPY(arg1, 0);
783 9667 : PG_FREE_IF_COPY(arg2, 1);
8289 tgl 784 ECB :
8289 tgl 785 GIC 9667 : PG_RETURN_BOOL(result);
786 : }
787 :
8289 tgl 788 ECB : Datum
8289 tgl 789 CBC 3204 : bpcharne(PG_FUNCTION_ARGS)
790 : {
5847 791 3204 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5847 tgl 792 GIC 3204 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
793 : int len1,
794 : len2;
8289 tgl 795 ECB : bool result;
1479 peter 796 GIC 3204 : Oid collid = PG_GET_COLLATION();
444 peter 797 CBC 3204 : bool locale_is_c = false;
332 tgl 798 3204 : pg_locale_t mylocale = 0;
799 :
1243 tgl 800 GIC 3204 : check_collation_set(collid);
801 :
9345 bruce 802 CBC 3204 : len1 = bcTruelen(arg1);
803 3204 : len2 = bcTruelen(arg2);
9770 scrappy 804 ECB :
444 peter 805 GIC 3204 : if (lc_collate_is_c(collid))
444 peter 806 LBC 0 : locale_is_c = true;
807 : else
444 peter 808 CBC 3204 : mylocale = pg_newlocale_from_collation(collid);
444 peter 809 ECB :
45 jdavis 810 GNC 3204 : if (locale_is_c || pg_locale_deterministic(mylocale))
1479 peter 811 ECB : {
1479 peter 812 EUB : /*
813 : * Since we only care about equality or not-equality, we can avoid all
1418 tgl 814 ECB : * the expense of strcoll() here, and just do bitwise comparison.
815 : */
1479 peter 816 CBC 3192 : if (len1 != len2)
1479 peter 817 GIC 1011 : result = true;
818 : else
819 2181 : result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
820 : }
821 : else
1479 peter 822 ECB : {
1479 peter 823 CBC 12 : result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
824 : collid) != 0);
1479 peter 825 ECB : }
826 :
8289 tgl 827 GIC 3204 : PG_FREE_IF_COPY(arg1, 0);
828 3204 : PG_FREE_IF_COPY(arg2, 1);
9770 scrappy 829 ECB :
8289 tgl 830 GIC 3204 : PG_RETURN_BOOL(result);
831 : }
832 :
8289 tgl 833 ECB : Datum
8289 tgl 834 CBC 3012 : bpcharlt(PG_FUNCTION_ARGS)
835 : {
5847 836 3012 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5847 tgl 837 GIC 3012 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
838 : int len1,
839 : len2;
9344 bruce 840 ECB : int cmp;
841 :
9345 bruce 842 CBC 3012 : len1 = bcTruelen(arg1);
843 3012 : len2 = bcTruelen(arg2);
844 :
4443 peter_e 845 GIC 3012 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
846 : PG_GET_COLLATION());
847 :
8289 tgl 848 CBC 3012 : PG_FREE_IF_COPY(arg1, 0);
849 3012 : PG_FREE_IF_COPY(arg2, 1);
850 :
851 3012 : PG_RETURN_BOOL(cmp < 0);
852 : }
853 :
8289 tgl 854 ECB : Datum
8289 tgl 855 CBC 2774 : bpcharle(PG_FUNCTION_ARGS)
856 : {
5847 857 2774 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5847 tgl 858 GIC 2774 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
859 : int len1,
860 : len2;
9344 bruce 861 ECB : int cmp;
862 :
9345 bruce 863 CBC 2774 : len1 = bcTruelen(arg1);
864 2774 : len2 = bcTruelen(arg2);
865 :
4443 peter_e 866 GIC 2774 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
867 : PG_GET_COLLATION());
868 :
8289 tgl 869 CBC 2774 : PG_FREE_IF_COPY(arg1, 0);
870 2774 : PG_FREE_IF_COPY(arg2, 1);
871 :
872 2774 : PG_RETURN_BOOL(cmp <= 0);
873 : }
874 :
8289 tgl 875 ECB : Datum
8289 tgl 876 CBC 3125 : bpchargt(PG_FUNCTION_ARGS)
877 : {
5847 878 3125 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5847 tgl 879 GIC 3125 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
880 : int len1,
881 : len2;
9344 bruce 882 ECB : int cmp;
883 :
9345 bruce 884 CBC 3125 : len1 = bcTruelen(arg1);
885 3125 : len2 = bcTruelen(arg2);
886 :
4443 peter_e 887 GIC 3125 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
888 : PG_GET_COLLATION());
889 :
8289 tgl 890 CBC 3125 : PG_FREE_IF_COPY(arg1, 0);
891 3125 : PG_FREE_IF_COPY(arg2, 1);
892 :
893 3125 : PG_RETURN_BOOL(cmp > 0);
894 : }
895 :
8289 tgl 896 ECB : Datum
8289 tgl 897 CBC 2858 : bpcharge(PG_FUNCTION_ARGS)
898 : {
5847 899 2858 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5847 tgl 900 GIC 2858 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
901 : int len1,
902 : len2;
9344 bruce 903 ECB : int cmp;
904 :
9345 bruce 905 CBC 2858 : len1 = bcTruelen(arg1);
906 2858 : len2 = bcTruelen(arg2);
907 :
4443 peter_e 908 GIC 2858 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
909 : PG_GET_COLLATION());
910 :
8289 tgl 911 CBC 2858 : PG_FREE_IF_COPY(arg1, 0);
912 2858 : PG_FREE_IF_COPY(arg2, 1);
913 :
914 2858 : PG_RETURN_BOOL(cmp >= 0);
915 : }
916 :
8289 tgl 917 ECB : Datum
8289 tgl 918 CBC 41226 : bpcharcmp(PG_FUNCTION_ARGS)
919 : {
5847 920 41226 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5847 tgl 921 GIC 41226 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
922 : int len1,
923 : len2;
9344 bruce 924 ECB : int cmp;
925 :
9345 bruce 926 CBC 41226 : len1 = bcTruelen(arg1);
927 41226 : len2 = bcTruelen(arg2);
928 :
4443 peter_e 929 GIC 41226 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
930 : PG_GET_COLLATION());
931 :
8289 tgl 932 CBC 41226 : PG_FREE_IF_COPY(arg1, 0);
933 41226 : PG_FREE_IF_COPY(arg2, 1);
934 :
935 41226 : PG_RETURN_INT32(cmp);
936 : }
937 :
2622 rhaas 938 ECB : Datum
2622 rhaas 939 CBC 389 : bpchar_sortsupport(PG_FUNCTION_ARGS)
940 : {
941 389 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
2622 rhaas 942 GIC 389 : Oid collid = ssup->ssup_collation;
943 : MemoryContext oldcontext;
944 :
2622 rhaas 945 CBC 389 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
946 :
2622 rhaas 947 ECB : /* Use generic string SortSupport */
1572 tgl 948 CBC 389 : varstr_sortsupport(ssup, BPCHAROID, collid);
949 :
2622 rhaas 950 GIC 389 : MemoryContextSwitchTo(oldcontext);
2622 rhaas 951 ECB :
2622 rhaas 952 GIC 389 : PG_RETURN_VOID();
953 : }
2622 rhaas 954 ECB :
955 : Datum
6571 tgl 956 LBC 0 : bpchar_larger(PG_FUNCTION_ARGS)
957 : {
5847 958 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5847 tgl 959 UIC 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
960 : int len1,
961 : len2;
6571 tgl 962 EUB : int cmp;
963 :
6571 tgl 964 UBC 0 : len1 = bcTruelen(arg1);
965 0 : len2 = bcTruelen(arg2);
966 :
4443 peter_e 967 UIC 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
968 : PG_GET_COLLATION());
969 :
6571 tgl 970 UBC 0 : PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
6571 tgl 971 EUB : }
972 :
973 : Datum
6571 tgl 974 UIC 0 : bpchar_smaller(PG_FUNCTION_ARGS)
975 : {
5847 tgl 976 UBC 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5847 tgl 977 UIC 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
978 : int len1,
979 : len2;
6571 tgl 980 EUB : int cmp;
981 :
6571 tgl 982 UBC 0 : len1 = bcTruelen(arg1);
983 0 : len2 = bcTruelen(arg2);
984 :
4443 peter_e 985 UIC 0 : cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
986 : PG_GET_COLLATION());
987 :
6571 tgl 988 UBC 0 : PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
6571 tgl 989 EUB : }
990 :
8289 991 :
992 : /*
993 : * bpchar needs a specialized hash function because we want to ignore
7258 994 : * trailing blanks in comparisons.
995 : */
996 : Datum
8289 tgl 997 GIC 2244 : hashbpchar(PG_FUNCTION_ARGS)
998 : {
5847 999 2244 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
1479 peter 1000 2244 : Oid collid = PG_GET_COLLATION();
1001 : char *keydata;
1002 : int keylen;
1418 tgl 1003 CBC 2244 : pg_locale_t mylocale = 0;
1004 : Datum result;
8289 tgl 1005 ECB :
1479 peter 1006 CBC 2244 : if (!collid)
1479 peter 1007 UIC 0 : ereport(ERROR,
1008 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1479 peter 1009 ECB : errmsg("could not determine which collation to use for string hashing"),
1010 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1011 :
5847 tgl 1012 CBC 2244 : keydata = VARDATA_ANY(key);
8289 tgl 1013 GBC 2244 : keylen = bcTruelen(key);
1014 :
444 peter 1015 GIC 2244 : if (!lc_collate_is_c(collid))
1479 1016 2244 : mylocale = pg_newlocale_from_collation(collid);
1017 :
45 jdavis 1018 GNC 2244 : if (pg_locale_deterministic(mylocale))
1479 peter 1019 ECB : {
1479 peter 1020 GIC 2130 : result = hash_any((unsigned char *) keydata, keylen);
1479 peter 1021 ECB : }
1022 : else
1023 : {
1024 : Size bsize, rsize;
1025 : char *buf;
1026 :
45 jdavis 1027 GNC 114 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
1028 114 : buf = palloc(bsize + 1);
1479 peter 1029 ECB :
45 jdavis 1030 GNC 114 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
1031 114 : if (rsize != bsize)
45 jdavis 1032 UNC 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
1033 :
1034 : /*
1035 : * In principle, there's no reason to include the terminating NUL
1036 : * character in the hash, but it was done before and the behavior
1037 : * must be preserved.
1038 : */
45 jdavis 1039 GNC 114 : result = hash_any((uint8_t *) buf, bsize + 1);
1040 :
1041 114 : pfree(buf);
1042 : }
1043 :
8157 tgl 1044 ECB : /* Avoid leaking memory for toasted inputs */
8157 tgl 1045 GIC 2244 : PG_FREE_IF_COPY(key, 0);
8157 tgl 1046 ECB :
8157 tgl 1047 GIC 2244 : return result;
1048 : }
1049 :
2047 rhaas 1050 ECB : Datum
2047 rhaas 1051 GIC 42 : hashbpcharextended(PG_FUNCTION_ARGS)
2047 rhaas 1052 ECB : {
2047 rhaas 1053 CBC 42 : BpChar *key = PG_GETARG_BPCHAR_PP(0);
1479 peter 1054 GIC 42 : Oid collid = PG_GET_COLLATION();
1055 : char *keydata;
2047 rhaas 1056 ECB : int keylen;
1418 tgl 1057 GIC 42 : pg_locale_t mylocale = 0;
1058 : Datum result;
2047 rhaas 1059 ECB :
1479 peter 1060 GBC 42 : if (!collid)
1479 peter 1061 UIC 0 : ereport(ERROR,
1062 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
1063 : errmsg("could not determine which collation to use for string hashing"),
1064 : errhint("Use the COLLATE clause to set the collation explicitly.")));
1479 peter 1065 ECB :
2047 rhaas 1066 CBC 42 : keydata = VARDATA_ANY(key);
2047 rhaas 1067 GIC 42 : keylen = bcTruelen(key);
2047 rhaas 1068 ECB :
444 peter 1069 CBC 42 : if (!lc_collate_is_c(collid))
1479 peter 1070 GIC 42 : mylocale = pg_newlocale_from_collation(collid);
1479 peter 1071 ECB :
45 jdavis 1072 GNC 42 : if (pg_locale_deterministic(mylocale))
1479 peter 1073 ECB : {
1479 peter 1074 CBC 36 : result = hash_any_extended((unsigned char *) keydata, keylen,
1479 peter 1075 GIC 36 : PG_GETARG_INT64(1));
1076 : }
1077 : else
1078 : {
1079 : Size bsize, rsize;
1080 : char *buf;
1479 peter 1081 EUB :
45 jdavis 1082 GNC 6 : bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
1083 6 : buf = palloc(bsize + 1);
1084 :
1085 6 : rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
1086 6 : if (rsize != bsize)
45 jdavis 1087 UNC 0 : elog(ERROR, "pg_strnxfrm() returned unexpected result");
1088 :
1089 : /*
1090 : * In principle, there's no reason to include the terminating NUL
1091 : * character in the hash, but it was done before and the behavior
1092 : * must be preserved.
1093 : */
45 jdavis 1094 GNC 6 : result = hash_any_extended((uint8_t *) buf, bsize + 1,
1095 6 : PG_GETARG_INT64(1));
1096 :
1097 6 : pfree(buf);
1098 : }
1099 :
2047 rhaas 1100 GIC 42 : PG_FREE_IF_COPY(key, 0);
1101 :
1102 42 : return result;
1103 : }
1104 :
1105 : /*
1106 : * The following operators support character-by-character comparison
5430 tgl 1107 ECB : * of bpchar datums, to allow building indexes suitable for LIKE clauses.
1108 : * Note that the regular bpchareq/bpcharne comparison operators, and
1109 : * regular support functions 1 and 2 with "C" collation are assumed to be
1110 : * compatible with these!
1111 : */
1112 :
1113 : static int
1296 tgl 1114 CBC 39 : internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
1115 : {
5430 tgl 1116 ECB : int result;
1117 : int len1,
1118 : len2;
1119 :
5430 tgl 1120 GBC 39 : len1 = bcTruelen(arg1);
5430 tgl 1121 CBC 39 : len2 = bcTruelen(arg2);
5430 tgl 1122 EUB :
4492 rhaas 1123 GIC 39 : result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
5430 tgl 1124 CBC 39 : if (result != 0)
5430 tgl 1125 GIC 24 : return result;
1126 15 : else if (len1 < len2)
5430 tgl 1127 UIC 0 : return -1;
5430 tgl 1128 GIC 15 : else if (len1 > len2)
5430 tgl 1129 UBC 0 : return 1;
1130 : else
5430 tgl 1131 GBC 15 : return 0;
5430 tgl 1132 EUB : }
1133 :
1134 :
1135 : Datum
5430 tgl 1136 UIC 0 : bpchar_pattern_lt(PG_FUNCTION_ARGS)
5430 tgl 1137 EUB : {
5430 tgl 1138 UBC 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5430 tgl 1139 UIC 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
5430 tgl 1140 EUB : int result;
1141 :
1296 tgl 1142 UIC 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1143 :
5430 1144 0 : PG_FREE_IF_COPY(arg1, 0);
5430 tgl 1145 UBC 0 : PG_FREE_IF_COPY(arg2, 1);
1146 :
1147 0 : PG_RETURN_BOOL(result < 0);
5430 tgl 1148 EUB : }
1149 :
1150 :
1151 : Datum
5430 tgl 1152 UIC 0 : bpchar_pattern_le(PG_FUNCTION_ARGS)
5430 tgl 1153 EUB : {
5430 tgl 1154 UBC 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5430 tgl 1155 UIC 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
5430 tgl 1156 EUB : int result;
1157 :
1296 tgl 1158 UIC 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1159 :
5430 1160 0 : PG_FREE_IF_COPY(arg1, 0);
5430 tgl 1161 UBC 0 : PG_FREE_IF_COPY(arg2, 1);
1162 :
1163 0 : PG_RETURN_BOOL(result <= 0);
5430 tgl 1164 EUB : }
1165 :
1166 :
1167 : Datum
5430 tgl 1168 UIC 0 : bpchar_pattern_ge(PG_FUNCTION_ARGS)
5430 tgl 1169 EUB : {
5430 tgl 1170 UBC 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5430 tgl 1171 UIC 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
5430 tgl 1172 EUB : int result;
1173 :
1296 tgl 1174 UIC 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1175 :
5430 1176 0 : PG_FREE_IF_COPY(arg1, 0);
5430 tgl 1177 UBC 0 : PG_FREE_IF_COPY(arg2, 1);
1178 :
1179 0 : PG_RETURN_BOOL(result >= 0);
5430 tgl 1180 EUB : }
1181 :
1182 :
1183 : Datum
5430 tgl 1184 UIC 0 : bpchar_pattern_gt(PG_FUNCTION_ARGS)
5430 tgl 1185 EUB : {
5430 tgl 1186 UBC 0 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5430 tgl 1187 UIC 0 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
5430 tgl 1188 EUB : int result;
1189 :
1296 tgl 1190 UIC 0 : result = internal_bpchar_pattern_compare(arg1, arg2);
1191 :
5430 1192 0 : PG_FREE_IF_COPY(arg1, 0);
5430 tgl 1193 LBC 0 : PG_FREE_IF_COPY(arg2, 1);
1194 :
1195 0 : PG_RETURN_BOOL(result > 0);
5430 tgl 1196 ECB : }
1197 :
1198 :
1199 : Datum
5430 tgl 1200 GIC 39 : btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
5430 tgl 1201 ECB : {
5430 tgl 1202 CBC 39 : BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
5430 tgl 1203 GIC 39 : BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
5430 tgl 1204 ECB : int result;
1205 :
1296 tgl 1206 GIC 39 : result = internal_bpchar_pattern_compare(arg1, arg2);
1207 :
5430 1208 39 : PG_FREE_IF_COPY(arg1, 0);
5430 tgl 1209 CBC 39 : PG_FREE_IF_COPY(arg2, 1);
1210 :
1211 39 : PG_RETURN_INT32(result);
1212 : }
1213 :
2622 rhaas 1214 ECB :
1215 : Datum
2622 rhaas 1216 GIC 6 : btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
2622 rhaas 1217 ECB : {
2622 rhaas 1218 GIC 6 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
2622 rhaas 1219 ECB : MemoryContext oldcontext;
1220 :
2622 rhaas 1221 CBC 6 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1222 :
1223 : /* Use generic string SortSupport, forcing "C" collation */
1572 tgl 1224 GIC 6 : varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
1225 :
2622 rhaas 1226 6 : MemoryContextSwitchTo(oldcontext);
1227 :
1228 6 : PG_RETURN_VOID();
1229 : }
|